1 files changed, 132 insertions, 85 deletions
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index 5eea4356d70..0bf1e4edf04 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -1,6 +1,8 @@
-/*P:400 This contains run_guest() which actually calls into the Host<->Guest
+/*P:400
+ * This contains run_guest() which actually calls into the Host<->Guest
  * Switcher and analyzes the return, such as determining if the Guest wants the
- * Host to do something.  This file also contains useful helper routines. :*/
+ * Host to do something.  This file also contains useful helper routines.
+:*/
 #include <linux/module.h>
 #include <linux/stringify.h>
 #include <linux/stddef.h>
@@ -10,6 +12,7 @@
 #include <linux/cpu.h>
 #include <linux/freezer.h>
 #include <linux/highmem.h>
+#include <linux/slab.h>
 #include <asm/paravirt.h>
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
@@ -17,14 +20,15 @@
 #include <asm/asm-offsets.h>
 #include "lg.h"
 
-
+unsigned long switcher_addr;
+struct page **lg_switcher_pages;
 static struct vm_struct *switcher_vma;
-static struct page **switcher_page;
 
 /* This One Big lock protects all inter-guest data structures. */
 DEFINE_MUTEX(lguest_lock);
 
-/*H:010 We need to set up the Switcher at a high virtual address.  Remember the
+/*H:010
+ * We need to set up the Switcher at a high virtual address.  Remember the
  * Switcher is a few hundred bytes of assembler code which actually changes the
  * CPU to run the Guest, and then changes back to the Host when a trap or
  * interrupt happens.
@@ -33,7 +37,8 @@ DEFINE_MUTEX(lguest_lock);
  * Host since it will be running as the switchover occurs.
  *
  * Trying to map memory at a particular address is an unusual thing to do, so
- * it's not a simple one-liner. */
+ * it's not a simple one-liner.
+ */
 static __init int map_switcher(void)
 {
 	int i, err;
@@ -47,41 +52,53 @@ static __init int map_switcher(void)
 	 * easy.
 	 */
 
-	/* We allocate an array of struct page pointers.  map_vm_area() wants
-	 * this, rather than just an array of pages. */
-	switcher_page = kmalloc(sizeof(switcher_page[0])*TOTAL_SWITCHER_PAGES,
-				GFP_KERNEL);
-	if (!switcher_page) {
+	/* We assume Switcher text fits into a single page. */
+	if (end_switcher_text - start_switcher_text > PAGE_SIZE) {
+		printk(KERN_ERR "lguest: switcher text too large (%zu)\n",
+		       end_switcher_text - start_switcher_text);
+		return -EINVAL;
+	}
+
+	/*
+	 * We allocate an array of struct page pointers.  map_vm_area() wants
+	 * this, rather than just an array of pages.
+	 */
+	lg_switcher_pages = kmalloc(sizeof(lg_switcher_pages[0])
+				    * TOTAL_SWITCHER_PAGES,
+				    GFP_KERNEL);
+	if (!lg_switcher_pages) {
 		err = -ENOMEM;
 		goto out;
 	}
 
-	/* Now we actually allocate the pages.  The Guest will see these pages,
-	 * so we make sure they're zeroed. */
+	/*
+	 * Now we actually allocate the pages.  The Guest will see these pages,
+	 * so we make sure they're zeroed.
+	 */
 	for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) {
-		unsigned long addr = get_zeroed_page(GFP_KERNEL);
-		if (!addr) {
+		lg_switcher_pages[i] = alloc_page(GFP_KERNEL|__GFP_ZERO);
+		if (!lg_switcher_pages[i]) {
 			err = -ENOMEM;
 			goto free_some_pages;
 		}
-		switcher_page[i] = virt_to_page(addr);
 	}
 
-	/* First we check that the Switcher won't overlap the fixmap area at
-	 * the top of memory.  It's currently nowhere near, but it could have
-	 * very strange effects if it ever happened. */
-	if (SWITCHER_ADDR + (TOTAL_SWITCHER_PAGES+1)*PAGE_SIZE > FIXADDR_START){
-		err = -ENOMEM;
-		printk("lguest: mapping switcher would thwack fixmap\n");
-		goto free_pages;
-	}
+	/*
+	 * We place the Switcher underneath the fixmap area, which is the
+	 * highest virtual address we can get.  This is important, since we
+	 * tell the Guest it can't access this memory, so we want its ceiling
+	 * as high as possible.
+	 */
+	switcher_addr = FIXADDR_START - (TOTAL_SWITCHER_PAGES+1)*PAGE_SIZE;
 
-	/* Now we reserve the "virtual memory area" we want: 0xFFC00000
-	 * (SWITCHER_ADDR).  We might not get it in theory, but in practice
-	 * it's worked so far.  The end address needs +1 because __get_vm_area
-	 * allocates an extra guard page, so we need space for that. */
+	/*
+	 * Now we reserve the "virtual memory area" we want.  We might
+	 * not get it in theory, but in practice it's worked so far.
+	 * The end address needs +1 because __get_vm_area allocates an
+	 * extra guard page, so we need space for that.
+	 */
 	switcher_vma = __get_vm_area(TOTAL_SWITCHER_PAGES * PAGE_SIZE,
-				     VM_ALLOC, SWITCHER_ADDR, SWITCHER_ADDR
+				     VM_ALLOC, switcher_addr, switcher_addr
 				     + (TOTAL_SWITCHER_PAGES+1) * PAGE_SIZE);
 	if (!switcher_vma) {
 		err = -ENOMEM;
@@ -89,20 +106,24 @@ static __init int map_switcher(void)
 		goto free_pages;
 	}
 
-	/* This code actually sets up the pages we've allocated to appear at
-	 * SWITCHER_ADDR.  map_vm_area() takes the vma we allocated above, the
+	/*
+	 * This code actually sets up the pages we've allocated to appear at
+	 * switcher_addr.  map_vm_area() takes the vma we allocated above, the
 	 * kind of pages we're mapping (kernel pages), and a pointer to our
 	 * array of struct pages.  It increments that pointer, but we don't
-	 * care. */
-	pagep = switcher_page;
-	err = map_vm_area(switcher_vma, PAGE_KERNEL, &pagep);
+	 * care.
+	 */
+	pagep = lg_switcher_pages;
+	err = map_vm_area(switcher_vma, PAGE_KERNEL_EXEC, &pagep);
 	if (err) {
 		printk("lguest: map_vm_area failed: %i\n", err);
 		goto free_vma;
 	}
 
-	/* Now the Switcher is mapped at the right address, we can't fail!
-	 * Copy in the compiled-in Switcher code (from <arch>_switcher.S). */
+	/*
+	 * Now the Switcher is mapped at the right address, we can't fail!
+	 * Copy in the compiled-in Switcher code (from x86/switcher_32.S).
+	 */
 	memcpy(switcher_vma->addr, start_switcher_text,
 	       end_switcher_text - start_switcher_text);
 
@@ -117,15 +138,14 @@ free_pages:
 	i = TOTAL_SWITCHER_PAGES;
 free_some_pages:
 	for (--i; i >= 0; i--)
-		__free_pages(switcher_page[i], 0);
-	kfree(switcher_page);
+		__free_pages(lg_switcher_pages[i], 0);
+	kfree(lg_switcher_pages);
 out:
 	return err;
 }
 /*:*/
 
-/* Cleaning up the mapping when the module is unloaded is almost...
- * too easy. */
+/* Cleaning up the mapping when the module is unloaded is almost... too easy. */
 static void unmap_switcher(void)
 {
 	unsigned int i;
@@ -134,7 +154,8 @@ static void unmap_switcher(void)
 	vunmap(switcher_vma->addr);
 	/* Now we just need to free the pages we copied the switcher into */
 	for (i = 0; i < TOTAL_SWITCHER_PAGES; i++)
-		__free_pages(switcher_page[i], 0);
+		__free_pages(lg_switcher_pages[i], 0);
+	kfree(lg_switcher_pages);
 }
 
 /*H:032
@@ -150,16 +171,19 @@ static void unmap_switcher(void)
  * But we can't trust the Guest: it might be trying to access the Launcher
  * code.  We have to check that the range is below the pfn_limit the Launcher
  * gave us.  We have to make sure that addr + len doesn't give us a false
- * positive by overflowing, too. */
-int lguest_address_ok(const struct lguest *lg,
-		      unsigned long addr, unsigned long len)
+ * positive by overflowing, too.
+ */
+bool lguest_address_ok(const struct lguest *lg,
+		       unsigned long addr, unsigned long len)
 {
 	return (addr+len) / PAGE_SIZE < lg->pfn_limit && (addr+len >= addr);
 }
 
-/* This routine copies memory from the Guest.  Here we can see how useful the
+/*
+ * This routine copies memory from the Guest.  Here we can see how useful the
  * kill_lguest() routine we met in the Launcher can be: we return a random
- * value (all zeroes) instead of needing to return an error. */
+ * value (all zeroes) instead of needing to return an error.
+ */
 void __lgread(struct lg_cpu *cpu, void *b, unsigned long addr, unsigned bytes)
 {
 	if (!lguest_address_ok(cpu->lg, addr, bytes)
@@ -180,58 +204,87 @@ void __lgwrite(struct lg_cpu *cpu, unsigned long addr, const void *b,
 }
 /*:*/
 
-/*H:030 Let's jump straight to the the main loop which runs the Guest.
+/*H:030
+ * Let's jump straight to the the main loop which runs the Guest.
  * Remember, this is called by the Launcher reading /dev/lguest, and we keep
- * going around and around until something interesting happens. */
+ * going around and around until something interesting happens.
+ */
 int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
 {
 	/* We stop running once the Guest is dead. */
 	while (!cpu->lg->dead) {
+		unsigned int irq;
+		bool more;
+
 		/* First we run any hypercalls the Guest wants done. */
 		if (cpu->hcall)
 			do_hypercalls(cpu);
 
-		/* It's possible the Guest did a NOTIFY hypercall to the
-		 * Launcher, in which case we return from the read() now. */
+		/*
+		 * It's possible the Guest did a NOTIFY hypercall to the
+		 * Launcher.
+		 */
 		if (cpu->pending_notify) {
-			if (put_user(cpu->pending_notify, user))
-				return -EFAULT;
-			return sizeof(cpu->pending_notify);
+			/*
+			 * Does it just needs to write to a registered
+			 * eventfd (ie. the appropriate virtqueue thread)?
+			 */
+			if (!send_notify_to_eventfd(cpu)) {
+				/* OK, we tell the main Launcher. */
+				if (put_user(cpu->pending_notify, user))
+					return -EFAULT;
+				return sizeof(cpu->pending_notify);
+			}
 		}
 
+		/*
+		 * All long-lived kernel loops need to check with this horrible
+		 * thing called the freezer.  If the Host is trying to suspend,
+		 * it stops us.
+		 */
+		try_to_freeze();
+
 		/* Check for signals */
 		if (signal_pending(current))
 			return -ERESTARTSYS;
 
-		/* If Waker set break_out, return to Launcher. */
-		if (cpu->break_out)
-			return -EAGAIN;
-
-		/* Check if there are any interrupts which can be delivered now:
+		/*
+		 * Check if there are any interrupts which can be delivered now:
 		 * if so, this sets up the hander to be executed when we next
-		 * run the Guest. */
-		maybe_do_interrupt(cpu);
-
-		/* All long-lived kernel loops need to check with this horrible
-		 * thing called the freezer.  If the Host is trying to suspend,
-		 * it stops us. */
-		try_to_freeze();
-
-		/* Just make absolutely sure the Guest is still alive.  One of
-		 * those hypercalls could have been fatal, for example. */
+		 * run the Guest.
+		 */
+		irq = interrupt_pending(cpu, &more);
+		if (irq < LGUEST_IRQS)
+			try_deliver_interrupt(cpu, irq, more);
+
+		/*
+		 * Just make absolutely sure the Guest is still alive.  One of
+		 * those hypercalls could have been fatal, for example.
+		 */
 		if (cpu->lg->dead)
 			break;
 
-		/* If the Guest asked to be stopped, we sleep.  The Guest's
-		 * clock timer or LHCALL_BREAK from the Waker will wake us. */
+		/*
+		 * If the Guest asked to be stopped, we sleep.  The Guest's
+		 * clock timer will wake us.
+		 */
 		if (cpu->halted) {
 			set_current_state(TASK_INTERRUPTIBLE);
-			schedule();
+			/*
+			 * Just before we sleep, make sure no interrupt snuck in
+			 * which we should be doing.
+			 */
+			if (interrupt_pending(cpu, &more) < LGUEST_IRQS)
+				set_current_state(TASK_RUNNING);
+			else
+				schedule();
 			continue;
 		}
 
-		/* OK, now we're ready to jump into the Guest.  First we put up
-		 * the "Do Not Disturb" sign: */
+		/*
+		 * OK, now we're ready to jump into the Guest.  First we put up
+		 * the "Do Not Disturb" sign:
+		 */
 		local_irq_disable();
 
 		/* Actually run the Guest until something happens. */
@@ -265,7 +318,7 @@ static int __init init(void)
 	int err;
 
 	/* Lguest can't run under Xen, VMI or itself.  It does Tricky Stuff. */
-	if (paravirt_enabled()) {
+	if (get_kernel_rpl() != 0) {
 		printk("lguest is afraid of being a guest\n");
 		return -EPERM;
 	}
@@ -275,15 +328,10 @@ static int __init init(void)
 	if (err)
 		goto out;
 
-	/* Now we set up the pagetable implementation for the Guests. */
-	err = init_pagetables(switcher_page, SHARED_SWITCHER_PAGES);
-	if (err)
-		goto unmap;
-
 	/* We might need to reserve an interrupt vector. */
 	err = init_interrupts();
 	if (err)
-		goto free_pgtables;
+		goto unmap;
 
 	/* /dev/lguest needs to be registered. */
 	err = lguest_device_init();
@@ -298,8 +346,6 @@ static int __init init(void)
 
 free_interrupts:
 	free_interrupts();
-free_pgtables:
-	free_pagetables();
 unmap:
 	unmap_switcher();
 out:
@@ -311,15 +357,16 @@ static void __exit fini(void)
 {
 	lguest_device_remove();
 	free_interrupts();
-	free_pagetables();
 	unmap_switcher();
 
 	lguest_arch_host_fini();
 }
 /*:*/
 
-/* The Host side of lguest can be a module.  This is a nice way for people to
- * play with it.  */
+/*
+ * The Host side of lguest can be a module.  This is a nice way for people to
+ * play with it.
+ */
 module_init(init);
 module_exit(fini);
 MODULE_LICENSE("GPL");