/*
 * Copyright 2001 Wasabi Systems, Inc.
 * All rights reserved.
 *
 * Written by Frank van der Linden for Wasabi Systems, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *      This product includes software developed for the NetBSD Project by
 *      Wasabi Systems, Inc.
 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
 *    or promote products derived from this software without specific prior
 *    written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#ifdef _KERNEL_OPT
#include "opt_multiprocessor.h"
#endif

#include <sys/param.h>
#include <sys/conf.h>
#include <sys/filedesc.h>
#include <sys/file.h>
#include <sys/filio.h>
#include <sys/select.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/poll.h>
#include <sys/proc.h>
#include <sys/signalvar.h>
#include <sys/ioctl.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <sys/systm.h>
#include <sys/ttycom.h>
#include <sys/uio.h>
#include <sys/vnode.h>
#include <sys/exec.h>
#include <sys/lkm.h>

#include <machine/stdarg.h>
/*
 * Yes, not <machine/isa_machdep.h>, this is i386 only, and we need it
 * for sysbeep().
 */
#include <arch/i386/include/isa_machdep.h>

#if __NetBSD_Version__ > 105009900
#include <uvm/uvm_extern.h>
#include <uvm/uvm_param.h>
#else
#include <vm/vm.h>
#endif

/* use curproc for pre-nathanw-sa world, curlwp post */
#if __NetBSD_Version__ >= 106130000
#define	CURLWP		curlwp		/* new world order */
#else
#define	CURLWP		curproc		/* old world order */
#endif

/* change to pass lwp rather than proc to driver entry points in 1.6V */
#if __NetBSD_Version__ == 106220000
#define ENTRYARG	lwp
#define LWP2PROC(l)	(l->l_proc)
#else
#define ENTRYARG	proc
#define LWP2PROC(l)	(l)
#endif

/* dupfd moved from struct proc to struct lwp in 1.6ZA */
#if __NetBSD_Version__ >= 106270000
#define DUPFD(p)	(curlwp)->l_dupfd
#else
#define	DUPFD(p)	(p)->p_dupfd
#endif

#define FILECODE "F(300)"

#include "include/x86.h"
#include "include/vm_types.h"
#include "include/iocontrols.h"
#include "include/vm_assert.h"
#include "include/modulecall.h"
#include "include/vm_asm.h"
#include "include/vm_time.h"
#include "common/vmx86.h"
#include "include/initblock.h"
#include "common/task.h"
#include "common/cpuid.h"
#include "common/hostif.h"
#include "driver.h"
#include "include/speaker_reg.h"
#include "include/vtrace.h"
#include "common/memtrack.h"

#ifdef VMX86_DEVEL
#include "private.h"
#endif

#define VMDEBUG  if (vmmon_debug) printf

#ifndef _LKM
void vmmonattach(int);
#else
int vmmon_lkmentry(struct lkm_table *, int, int);
static int vmmon_handle(struct lkm_table *, int);
#endif /* _LKM */

static int vmmon_open(dev_t dev, int oflags, int devtype, struct ENTRYARG *);
static int vmmon_close(dev_t dev, int cflags, int devtype, struct ENTRYARG*);
static int vmmon_ioctl(dev_t dev, u_long cmd, caddr_t data, int flags,
		 struct ENTRYARG *);
static int vmmon_poll(dev_t, int, struct ENTRYARG *);

static int vmmon_fake_clonedev(dev_t, int, struct ENTRYARG *);

static int vm_create(struct vmmon_softc *, struct vmx86_softc **);
static struct vmx86_softc * vm_allocate(struct vmmon_softc *);
static void vm_destroy(struct vmmon_softc *, int);
static void vm_deallocate(struct vmx86_softc *);

static int vmmon_alloc_low_pages(struct vmx86_softc *, unsigned long, PA *);
static MemRange * vmmon_remove_low_pages(MemRange *, PA);
static int vmmon_free_low_pages(struct vmx86_softc *, PA);
static void vmmon_destroy_low_pages(MemRange *);

struct vmmon_softc vmmon_sc;

struct cdevsw vmmon_cdevsw = {
	vmmon_open, vmmon_close, 
	(dev_type_read((*)))enodev, (dev_type_write((*)))enodev,
	vmmon_ioctl, (dev_type_stop((*))) enodev, 0,
	vmmon_poll, (dev_type_mmap((*))) enodev, 0
};

static int vmmon_refcnt = 0;
static int vmmon_debug = 0;

#ifdef _LKM
#if __NetBSD_Version__ >= 106080000
MOD_DEV("vmmon", "vmmon", NULL, -1, &vmmon_cdevsw, -1)
#else
MOD_DEV("vmmon", LM_DT_CHAR, -1, &vmmon_cdevsw)
#endif

int
vmmon_lkmentry(struct lkm_table *lkmtp, int cmd, int ver)
{
	DISPATCH(lkmtp, cmd, ver, vmmon_handle, vmmon_handle, vmmon_handle)
}

static int 
vmmon_handle(struct lkm_table *lkmtp, int cmd)
{
	int error = 0, j;
	struct vmmon_softc *sc = &vmmon_sc;
	
	switch (cmd) {
	case LKM_E_LOAD:
		if (lkmexists(lkmtp)) 
			return EEXIST;
		CPUID_Init();
		Task_ProbeSysenterMSR();
		break;
		
	case LKM_E_UNLOAD:
		if (vmmon_refcnt > 0)
			return (EBUSY);
		for (j = 0; j <= sc->sc_maxvm; j++)
			vm_destroy(sc, j);
		break;
		
	case LKM_E_STAT:
		break;

	default:
		error = EIO;
		break;
	}
	return error;
}
#else /* !_LKM */
void
vmmonattach(int ndevs)
{
	CPUID_Init();
	Task_ProbeSysenterMSR();
}
#endif /* _LKM */

static int
vmmon_open(dev_t dev, int flag, int mode, struct ENTRYARG *l)
{
	struct vmmon_softc *vmmonsc;
	struct vmx86_softc *vmxsc;
	int error;

	if (DUPFD(LWP2PROC(l)) >= 0)
		return ENODEV;

	VMDEBUG("vmmon: %d opened device\n", LWP2PROC(l)->p_pid);

	if (suser(LWP2PROC(l)->p_ucred, &LWP2PROC(l)->p_acflag) != 0)
		return (EPERM);

	vmmonsc = &vmmon_sc;
	if (vmmonsc->sc_dev == 0)
		vmmonsc->sc_dev = dev;

	error = vm_create(vmmonsc, &vmxsc);
	if (error != 0)
		return error;

	vmmon_refcnt++;

	VMDEBUG("vmmon: pid %d new vm: num %d major %d\n",
	    LWP2PROC(l)->p_pid, VMNUM(vmxsc->vm_dev), major(vmxsc->vm_dev));

	error = vmmon_fake_clonedev(vmxsc->vm_dev, flag, l);
	if (error != 0 && DUPFD(LWP2PROC(l)) < 0) {
		vm_destroy(vmmonsc, VMNUM(vmxsc->vm_dev));
		return error;
	}

	/*
	 * Snap shot the time stamp counter and the real time so we
	 * can later compute an estimate of the cycle time.
	 */
	Vmx86_SetStartTime(&vmxsc->startTime);
	callout_init(&vmxsc->vm_callout);

	return error;
}


static int
vmmon_close(dev_t dev, int flags, int mode, struct ENTRYARG *l)
{
	int num;
	struct vmmon_softc *sc;
	struct vmx86_softc *vmxsc;
	MemRange *head, *first;

	VMDEBUG("vmmon: close vm %d by pid %d\n", VMNUM(dev), LWP2PROC(l)->p_pid);

	sc = &vmmon_sc;

	num = VMNUM(dev);
	if (num >= MAXVMS || (vmxsc = sc->sc_vms[num]) == NULL) {
		VMDEBUG("vmmon: close: illegal vm %d??\n", num);
		return ENXIO;
	}

	callout_stop(&vmxsc->vm_callout);

	/*
	 * Destroy all the remaining ranges.
	 */
	for (head = &vmxsc->vm_low_pages; (first = head->next) != head; ) {
		/* Unlink the first item at the beginning of the list. */
		head->next = first->next;

		vmmon_destroy_low_pages(first);
	}

	if (vmxsc->vm_vm != NULL)
		Vmx86_ReleaseVM(vmxsc->vm_vm);

	vm_destroy(sc, num);

	vmmon_refcnt--;
	if (vmmon_refcnt < 0) {
		vmmon_refcnt = 0;
		printf("vmmon: refcnt < 0 ??\n");
	}

	VMDEBUG("vmmon: vm %d closed by %d\n", num, LWP2PROC(l)->p_pid);

	return (0);
}


/*
 * XXXX - poor man's device cloning.
 */
int
vmmon_fake_clonedev(dev_t dev, int flag, struct ENTRYARG *l)
{
	struct file *fp;
	int error, fd;
	struct vnode *vp;

	if (flag & (O_EXLOCK | O_SHLOCK))
		/* XXX */
		return EINVAL;

	error = falloc(LWP2PROC(l), &fp, &fd);
	if (error != 0)
		return error;
	error = cdevvp(dev, &vp);
	if (error != 0)
		return error;

	if (flag & FWRITE)
		vp->v_writecount++;

	fp->f_flag = flag & FMASK;
	fp->f_type = DTYPE_VNODE;
	fp->f_ops = &vnops;
	fp->f_data = (caddr_t)vp;
#if __NetBSD_Version__ >= 105230000
#ifdef FILE_SET_MATURE
	FILE_SET_MATURE(fp);
#endif
#endif
	FILE_UNUSE(fp, l);

	DUPFD(LWP2PROC(l)) = fd;

	return ENXIO;
}

/*
 * Allocate and lock numPages of memory physically continguous addresses
 * below 4GB.
 * The requirement should be < 4GB but there is code in the vmware binaries
 * which relys on < 16MB to fix a bug. c.f. ../linux/driver.c
 */
static int
vmmon_alloc_low_pages(struct vmx86_softc *vmxsc, unsigned long numPages, PA *addr)
{
	MemRange *r;
	struct pglist l;
	struct vm_page *m;
	paddr_t high;
	paddr_t curaddr, lastaddr;
	vaddr_t va;
	int error, n;
	extern paddr_t avail_end;
#if 0
#define ISA_DMA_BOUNCE_THRESHOLD (16UL * 1024 * 1024)
#else
	/*
	 * Unfortunately, all the low memory seems to be taken in multiuser
	 * mode and uvm_pglistalloc doesn't try to free some for us.
	 * So just use any pages we can get.
	 */
#define ISA_DMA_BOUNCE_THRESHOLD (3072UL * 1024 * 1024)
#endif

	if (numPages == 0)
		return EINVAL;

	r = malloc(sizeof *r, M_DEVBUF, M_WAITOK);
	if (r == NULL) {
		VMDEBUG("vmmon_alloc_low_pages: malloc MemRange failed\n");
		return ENOMEM;
	}
	if (avail_end > ISA_DMA_BOUNCE_THRESHOLD)
		high = trunc_page(ISA_DMA_BOUNCE_THRESHOLD);
	else
		high = trunc_page(avail_end);
	/*
	 * Allocate pages from the VM system.
	 */
	TAILQ_INIT(&l);
	VMDEBUG("vmmon_alloc_low_pages: pglistalloc: sz %lu hi %lu\n",
	    numPages * PAGE_SIZE, high);
	error = uvm_pglistalloc(numPages * PAGE_SIZE, 0, high, PAGE_SIZE, 0,
		    &l, 1, 1);
	if (error != 0) {
		VMDEBUG("vmmon_alloc_low_pages: pglistalloc failed: %d\n", error);
		return error;
	}
	/*
	 * Check that we got a single physically continguous set of pages.
	 */
	m = l.tqh_first;
	lastaddr = r->pAddr = VM_PAGE_TO_PHYS(m);
	r->numPages = 1;
	m = m->pageq.tqe_next;

	for (; m != NULL; m = m->pageq.tqe_next) {
		curaddr = VM_PAGE_TO_PHYS(m);
#ifdef DIAGNOSTIC
		if (curaddr >= high) {
			VMDEBUG("vmmon_alloc_low_pages: uvm_pglistalloc returned non-sensical"
			    " address 0x%lx\n", curaddr);
			goto bad;
		}
#endif
		if (curaddr == (lastaddr + PAGE_SIZE))
			r->numPages++;
		else {
			VMDEBUG("vmmon_alloc_low_pages: non-continguous pages\n");
			goto bad;
		}
		lastaddr = curaddr;
	}

	/*
	 * Map the segment into wired kernel virtual memory.
	 */
	va = uvm_km_valloc(kernel_map, numPages * PAGE_SIZE);
	if (va == 0) {
		VMDEBUG("vmmon_alloc_low_pages: km_valloc failed\n");
		goto bad;
	}
	r->kAddr = va;

	for (curaddr = r->pAddr, n = r->numPages;
	     n != 0;
	     curaddr += PAGE_SIZE, va += PAGE_SIZE, n--) {
		pmap_enter(pmap_kernel(), va, curaddr,
		    VM_PROT_READ | VM_PROT_WRITE,
		    PMAP_WIRED | VM_PROT_READ | VM_PROT_WRITE);
	}
	pmap_update(pmap_kernel());

	*addr = r->pAddr;

	/* XXX should lock list insertion. */
	r->next = vmxsc->vm_low_pages.next;
	vmxsc->vm_low_pages.next = r;

	return 0;

bad:
	uvm_pglistfree(&l);
	free(r, M_DEVBUF);
	return ENOMEM;
}

static MemRange *
vmmon_remove_low_pages(MemRange *head, PA addr)
{
	MemRange *cur, *next;

	for (cur = head; (next = cur->next) != head; cur = next) {
		if (next->pAddr == addr) {
			/* Unlink the matching item. */
			cur->next = next->next;
			return next;
		}
	}

	return NULL;
}
static int
vmmon_free_low_pages(struct vmx86_softc *vmxsc, PA addr)
{
	MemRange *r;

	/* XXX should protect this call by lock */
	r = vmmon_remove_low_pages(&vmxsc->vm_low_pages, addr);
	if (r == NULL)
		return EINVAL;

	vmmon_destroy_low_pages(r);
	return 0;
}

static void
vmmon_destroy_low_pages(MemRange *r)
{
	struct vm_page *m;
	struct pglist l;
	paddr_t addr;
	int n;

	/*
	 * Unmap the kernel virtual memory used by this range.
	 */
	uvm_km_free(kernel_map, (vaddr_t)r->kAddr, r->numPages * PAGE_SIZE);

	/*
	 * Build a list of pages to free back to the VM system.
	 */
	TAILQ_INIT(&l);
	for (addr = r->pAddr, n = r->numPages;
	    n != 0;
	    addr += PAGE_SIZE, n--) {
		m = PHYS_TO_VM_PAGE(addr);
		TAILQ_INSERT_TAIL(&l, m, pageq);
	}

	uvm_pglistfree(&l);

	free(r, M_DEVBUF);
}

/*
 * This function uses the hack to make PTIOCLINUX ioctls (passthroughs
 * from the emulation) return EJUSTRETURN to be able to have them
 * set syscall return values for the benefit of Linux emulation.
 */
static int
vmmon_ioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct ENTRYARG*l)
{
	struct vmx86_softc *vmxsc;
	struct vmmon_softc *sc;
	int error, num;
	unsigned long numPages;
	struct ioctl_pt *pt;
	WslimitsInfo wslimitsInfo;
	InitBlock initParams;
	MA ma;
	PA pa;
	MPN mpn;
	int32_t limit;
	VMSetMemInfoArgs memargs;
	VMDriver *vm;
	Bool setVMPtr, probe;
#ifdef USE_PERFCOUNTERS
	Bool forceEnable;
#endif

	VMDEBUG("vmmon: ioctl %lx on vm %d by pid %d\n",
	    cmd, VMNUM(dev), LWP2PROC(l)->p_pid);

	sc = &vmmon_sc;

	num = VMNUM(dev);
	vmxsc = sc->sc_vms[num];
	if (vmxsc == NULL)
		return ENXIO;

	if (cmd != PTIOCLINUX)
		return ENOTTY;

	pt = (struct ioctl_pt *)data;

	vm = vmxsc->vm_vm;

	VMDEBUG("vmmon: ioctl cmd %lu\n", pt->com);

	switch (pt->com) {
	case IOCTLCMD_VERSION:
		pt->data = (void *)VMMON_VERSION;
		return EJUSTRETURN;
	case IOCTLCMD_CREATE_VM:
		if (vm != NULL)
			return EINVAL;
		error = copyin(pt->data, &wslimitsInfo, sizeof wslimitsInfo);
		if (error != 0)
			return error;
		vm = Vmx86_CreateVM((void *)&vmxsc, (void*)LWP2PROC(l)->p_pid,
		    &wslimitsInfo);
		if (vm == NULL)
			return ENOMEM;
		vmxsc->vm_vm = vm;
		pt->data = (void *) vm->id;
		return EJUSTRETURN;
	case IOCTLCMD_BIND_VM:
		if (vm != NULL)
			return EINVAL;
		vm = Vmx86_BindVM((int)pt->data);
		if (vm == NULL)
			return EINVAL;
		vmxsc->vm_vm = vm;
		break;
	case IOCTLCMD_RELEASE_VM:
		if (vm == NULL)
			return EINVAL;
		vmxsc->vm_vm = NULL;
		Vmx86_ReleaseVM(vm);
		break;
	case IOCTLCMD_INIT_VM:
		if (vm == NULL)
			return EINVAL;
		error = copyin(pt->data, &initParams, sizeof initParams);
		if (error != 0)
			return error;
		if (Vmx86_InitVM(vm, &initParams) != 0)
			return EINVAL;
		error = copyout(&initParams, pt->data, sizeof initParams);
		if (error != 0)
			return error;
		break;
	case IOCTLCMD_LATE_INIT_VM:
		if (vm == NULL)
			return EINVAL;
		if (Vmx86_LateInitVM(vm) != 0 )
			return EINVAL;
		break;
	case IOCTLCMD_RUN_VM:
		if (vm == NULL)
			return EINVAL;
		pt->data = (void *)Vmx86_RunVM(vm);
		return EJUSTRETURN;
	case IOCTLCMD_LOOK_UP_MPN:
		mpn = HostIF_LookupUserMPN(vm, pt->data);
		pt->data = (void *)mpn;
		return EJUSTRETURN;
	case IOCTLCMD_LOCK_PAGE:
		if (vm == NULL)
			return EINVAL;
		pt->data = (void *)Vmx86_LockPage(vm, pt->data, TRUE);
		return EJUSTRETURN;
	case IOCTLCMD_UNLOCK_PAGE:
		if (vm == NULL)
			return EINVAL;
		pt->data = (void *)Vmx86_UnlockPage(vm, pt->data, TRUE);
		return EJUSTRETURN;
	case IOCTLCMD_GET_NUM_VMS:
		pt->data = (void *)Vmx86_GetNumVMs();
		return EJUSTRETURN;
	case IOCTLCMD_APIC_BASE:
		if (vm == NULL)
			return EINVAL;
		setVMPtr = ((int)pt->data & APIC_FLAG_DISABLE_NMI) != 0;
		probe = ((int)pt->data & APIC_FLAG_PROBE) != 0;
#ifdef USE_PERFCOUNTERS
		forceEnable = ((int)pt->data & APIC_FLAG_FORCE_ENABLE) != 0;
#endif
		ma = HostIF_APICBase(vm, setVMPtr, probe);
#ifdef USE_PERFCOUNTERS
		if ((ma == 0) && forceEnable) {
			ma = HostIF_APICEnable(vm);
		}
#endif
		pt->data = (void *)ma;
		return EJUSTRETURN;
	case IOCTLCMD_IOAPIC_BASE:
		if (vm == NULL)
			return EINVAL;
		pt->data = (void *)HostIF_IOAPICBase(vm);
		return EJUSTRETURN;
	case IOCTLCMD_GET_STATS:
		if (vm == NULL)
			return EINVAL;
		error = copyout(&vm->stats, pt->data, sizeof vm->stats);
		if (error != 0)
			return error;
		break;
	case IOCTLCMD_SET_STATS:
		if (vm == NULL)
			return EINVAL;
		error = copyin(pt->data, &vm->stats, sizeof vm->stats);
		if (error != 0)
			return error;
		break;
	case IOCTLCMD_GET_HARD_LIMIT:
		pt->data = (void *)Vmx86_GetLockedPagesLimit();
		return EJUSTRETURN;
	case IOCTLCMD_SET_HARD_LIMIT:
		error = copyin(pt->data, &limit, sizeof limit);
		if (error != 0)
			return error;
		if (!Vmx86_SetLockedPagesLimit(limit))
			return EINVAL;
		break;
	case IOCTLCMD_GET_MEM_INFO:
		if (vm == NULL)
			return EINVAL;
		if (!Vmx86_GetMemInfoCopy(vm,
		    (VMGetMemInfoArgs *)pt->data))
			return EINVAL;
		break;
	case IOCTLCMD_SET_MEM_INFO:
		if (vm == NULL)
			return EINVAL;
		error = copyin(pt->data, &memargs, sizeof memargs);
		if (error != 0)
			return error;
		Vmx86_SetMemInfo(vm, &memargs);
		break;
	case IOCTLCMD_PAE_ENABLED:
		pt->data = (void *)(int)Vmx86_PAEEnabled();
		return EJUSTRETURN;
	case IOCTLCMD_GET_TOTAL_MEM_USAGE:
		pt->data = (void *)Vmx86_GetTotalMemUsage();
		return EJUSTRETURN;
	case IOCTLCMD_SET_UID:
		break;
	case IOCTLCMD_IS_MP_SAFE:
#ifdef MULTIPROCESSOR
		pt->data = (void *)TRUE;
#else
		pt->data = (void *)FALSE;
#endif
		return EJUSTRETURN;
	case IOCTLCMD_GET_MHZ_ESTIMATE:
		pt->data = (void *)Vmx86_GetMHzEstimate(&vmxsc->startTime);
		return EJUSTRETURN;
	case IOCTLCMD_ALLOW_CORE_DUMP:
		break;

	case IOCTLCMD_CHECK_MEMORY:
		if (vm == NULL)
			return EINVAL;
		pt->data = (void *)HostIF_CheckMemory(vm);
		return EJUSTRETURN;

	case IOCTLCMD_BROADCAST_IPI:	// SMP 2.2.8+ only
		return ENOTTY;

#ifdef SUPPORT_PASSTHROUGH
	case IOCTLCMD_REGISTER_PASSTHROUGH_IO:
	case IOCTLCMD_REGISTER_PASSTHROUGH_IRQ:
	case IOCTLCMD_FREE_PASSTHROUGH_IO:
	case IOCTLCMD_FREE_PASSTHROUGH_IRQ:
	case IOCTLCMD_START_PASSTHROUGH:
	case IOCTLCMD_STOP_PASSTHROUGH:
	case IOCTLCMD_QUERY_PASSTHROUGH:
		/* not yet supported */
		return ENOTTY;
#endif /* SUPPORT_PASSTHROUGH */

#ifdef USE_PERFCOUNTERS
	case IOCTLCMD_REGISTER_PERFCTR:
	case IOCTLCMD_START_PERFCTR:
	case IOCTLCMD_STOP_PERFCTR:
	case IOCTLCMD_RELEASE_PERFCTR:
		/* not yet supported */
		return ENOTTY;
#endif /* USE_PERFCOUNTERS */

	case IOCTLCMD_ALLOC_LOW_PAGES:
		numPages = (unsigned long)fuword(pt->data);
		VMDEBUG("vmmon: alloc_low_pages %lu\n", numPages);
		if ((int)numPages == -1)
			return EINVAL;
		error = vmmon_alloc_low_pages(vmxsc, numPages, &pa);
		if (error != 0) {
			VMDEBUG("vmmon: cannot alloc_low_pages: %d\n", error);
			return error;
		}
		error = suword(pt->data, pa);
		if (error != 0 ) {
			VMDEBUG("vmmon: cannot suword: %d\n", error);
			return error;
		}
		break;

	case IOCTLCMD_FREE_LOW_PAGES:
		VMDEBUG("vmmon: free_low_pages 0x%08lx\n", (unsigned long)pt->data);
		error = vmmon_free_low_pages(vmxsc, (PA)pt->data);
		if (error != 0)
			return error;
		return EJUSTRETURN;

#ifdef VMX86_SERVER
	case IOCTLCMD_ALLOC_MACHINE_MEMORY:
	case IOCTLCMD_POWERON_SLACK_MEMORY:
	case IOCTLCMD_ASYNC_ACTIONS:
	case IOCTLCMD_FILE_OPEN:
	case IOCTLCMD_FILE_CLOSE:
	case IOCTLCMD_SET_MMAP_LAST:
#endif
#ifdef VMX86_SERVER
	/* bora-root/fileutils/ls needs to be recompiled if the following
	case definition ever changes */
	case IOCTLCMD_VMFS_GET_LENGTH:
	case IOCTLCMD_VMFS_PHYS_MEM_IO:
#endif
	default:
		return ENOTTY;
	}
	return (0);
}

static void
vm_select_timo(void *arg)
{
	struct vmx86_softc *vmxsc = arg;

	selwakeup(&vmxsc->vm_rsel);
	vmxsc->vm_flags |= VMFL_SELTIMO;
	vmxsc->vm_flags &= ~VMFL_SELWAIT;
}


static int
vmmon_poll(dev_t dev, int events, struct ENTRYARG *l)
{
	struct vmmon_softc *sc;
	struct vmx86_softc *vmxsc;
	int revents = 0, s;

	sc = &vmmon_sc;
	if (sc == NULL)
		return ENXIO;

	vmxsc = sc->sc_vms[VMNUM(dev)];
	if (vmxsc == NULL)
		return ENXIO;

	VMDEBUG("vmmon: poll on vm %d by pid %d\n",
	    VMNUM(dev), LWP2PROC(l)->p_pid);

	s = splsoftclock();
	if (vmxsc->vm_flags & VMFL_SELTIMO) {
		revents = events;
		vmxsc->vm_flags &= ~VMFL_SELTIMO;
	} else {
		if (vmxsc->vm_flags & VMFL_SELWAIT)
			callout_stop(&vmxsc->vm_callout);
		selrecord(l, &vmxsc->vm_rsel);
		vmxsc->vm_flags |= VMFL_SELWAIT;
		callout_reset(&vmxsc->vm_callout, 1, vm_select_timo, vmxsc);
	}
	splx(s);

	return (revents);
}

static int
vm_create(struct vmmon_softc *sc, struct vmx86_softc **vmxpp)
{
	struct vmx86_softc *vmxsc;
	int i;

	for (i = 0; i < MAXVMS; i++)
		if (sc->sc_vms[i] == NULL)
			break;
	if (i == MAXVMS)
		return EBUSY;

	vmxsc = vm_allocate(sc);
	if (vmxsc == NULL)
		return ENOMEM;
	sc->sc_vms[i] = vmxsc;
	if (i >= sc->sc_maxvm)
		sc->sc_maxvm = i;
	vmxsc->vm_dev = MAKEVMDEV(sc->sc_dev, i);
	vmxsc->vm_low_pages.next = &vmxsc->vm_low_pages;

	*vmxpp = vmxsc;

	return 0;
}

static struct vmx86_softc *
vm_allocate(struct vmmon_softc *sc)
{
	struct vmx86_softc *vmxsc;

	vmxsc = malloc(sizeof *vmxsc, M_DEVBUF, M_WAITOK);
	if (vmxsc == NULL)
		return NULL;

	memset(vmxsc, 0, sizeof *vmxsc);

	vmxsc->vm_monsc = sc;

	return vmxsc;
}

static void
vm_destroy(struct vmmon_softc *sc, int num)
{
	struct vmx86_softc *vmxsc;

	vmxsc = sc->sc_vms[num];
	if (vmxsc == NULL)
		return;
	sc->sc_vms[num] = NULL;
	if (num == sc->sc_maxvm)
		sc->sc_maxvm--;

	vm_deallocate(vmxsc);
}

static void
vm_deallocate(struct vmx86_softc *vmxsc)
{
	FREE(vmxsc, M_DEVBUF);
}

static void
vLog(int fd)
{
	struct vmmon_softc *sc = &vmmon_sc;

	log(LOG_DEBUG, "vmmon: %s", sc->buf);
}
   
static void
vWarning(VMDriver *vm)
{
	struct vmmon_softc *sc = &vmmon_sc;

	printf("vmmon: %s", sc->buf);
}

void 
Warning(char *fmt,...)
{
	va_list args;
	VMDriver *vm;
	struct vmmon_softc *sc = &vmmon_sc;
   
	vm = Vmx86_GetVMforProcess((void *)(curproc->p_pid));

	va_start(args, fmt);
	vsprintf(sc->buf, fmt, args); 
	va_end(args);
   
	if (vm != NULL)
		vLog(vm->logFD);

	vWarning(vm);
}

/*
 *----------------------------------------------------------------------
 *
 * Log --
 *
 *      Log messages from kernel module: logged to log file only
 *
 *----------------------------------------------------------------------
 */
void 
Log(char *fmt,...)
{
	va_list args;
	VMDriver *vm;
	struct vmmon_softc *sc = &vmmon_sc;


	vm = Vmx86_GetVMforProcess((void *)(curproc->p_pid));
  
	va_start(args, fmt);
	vsnprintf(sc->buf, sizeof sc->buf, fmt, args); 
	va_end(args);
   
	if (vm != NULL)
		vLog(vm->logFD);
	else
		log(LOG_DEBUG, "vmmon: %s", sc->buf);
}


/*
 *----------------------------------------------------------------------
 *
 * Panic --
 *
 *      ASSERTION failures and Panics from kernel module get here.
 *      Message is logged to stdout and the log file      
 *      
 *
 * Side effects:
 *      Never returns
 *
 *----------------------------------------------------------------------
 */
void
Panic(char *fmt, ...)
{
	VMDriver *vm = Vmx86_GetVMforProcess((void *)(curproc->p_pid));
	va_list args;
	struct vmmon_softc *sc = &vmmon_sc;

	va_start(args, fmt);
	vsprintf(sc->buf, fmt, args); 
	va_end(args);

	/*
	 * XXX 
	 * XXX We cannot exit() the process since we are not running it
	 * XXX
	 */
	if (curproc == NULL) {
		printf("vmmon: Panic in interruptn\n");
		panic("Assertion failure in interrupt handling in VMX86\n");
	}
   
	if (vm != NULL) { 
		vLog(vm->logFD);
		vWarning(vm);
		snprintf(sc->buf, sizeof sc->buf,
		    "VMX86 driver panic. pid=%d\n\r", curproc->p_pid);  
		vLog(vm->logFD);
		vWarning(vm);
	}
  
	exit1(CURLWP, 0);
	/* NOTREACHED */
	for(;;); /* to suppress gcc warning */
}
