Merge git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux-2.6-for-linus

* git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux-2.6-for-linus: (27 commits) lguest: use __PAGE_KERNEL instead of _PAGE_KERNEL lguest: Use explicit includes rateher than indirect lguest: get rid of lg variable assignments lguest: change gpte_addr header lguest: move changed bitmap to lg_cpu lguest: move last_pages to lg_cpu lguest: change last_guest to last_cpu lguest: change spte_addr header lguest: per-vcpu lguest pgdir management lguest: make pending notifications per-vcpu lguest: makes special fields be per-vcpu lguest: per-vcpu lguest task management lguest: replace lguest_arch with lg_cpu_arch. lguest: make registers per-vcpu lguest: make emulate_insn receive a vcpu struct. lguest: map_switcher_in_guest() per-vcpu lguest: per-vcpu interrupt processing. lguest: per-vcpu lguest timers lguest: make hypercalls use the vcpu struct lguest: make write() operation smp aware ... Manual conflict resolved (maybe even correctly, who knows) in drivers/lguest/x86/core.c
author: Linus Torvalds <torvalds@linux-foundation.org> 2008-01-31 09:35:32 +1100
committer: Linus Torvalds <torvalds@linux-foundation.org> 2008-01-31 09:35:32 +1100
commit: d145c7253c8cb2ed8a75a8839621b0bb8f778820 (patch)
tree: fac21920d149a2cddfdfbde65066ff98935a9c57 /drivers
parent: 44c3b59102e3ecc7a01e9811862633e670595e51 (diff)
parent: 84f12e39c856a8b1ab407f8216ecebaf4204b94d (diff)
9 files changed, 513 insertions, 445 deletions
diff --git a/drivers/Makefile b/drivers/Makefile
index 9e1f808e43c..0ee9a8a4095 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -72,7 +72,7 @@ obj-$(CONFIG_ISDN)		+= isdn/
 obj-$(CONFIG_EDAC)		+= edac/
 obj-$(CONFIG_MCA)		+= mca/
 obj-$(CONFIG_EISA)		+= eisa/
-obj-$(CONFIG_LGUEST_GUEST)	+= lguest/
+obj-y				+= lguest/
 obj-$(CONFIG_CPU_FREQ)		+= cpufreq/
 obj-$(CONFIG_CPU_IDLE)		+= cpuidle/
 obj-$(CONFIG_MMC)		+= mmc/
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index cb4c67025d5..7743d73768d 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -151,43 +151,43 @@ int lguest_address_ok(const struct lguest *lg,
 /* This routine copies memory from the Guest.  Here we can see how useful the
  * kill_lguest() routine we met in the Launcher can be: we return a random
  * value (all zeroes) instead of needing to return an error. */
-void __lgread(struct lguest *lg, void *b, unsigned long addr, unsigned bytes)
+void __lgread(struct lg_cpu *cpu, void *b, unsigned long addr, unsigned bytes)
 {
-	if (!lguest_address_ok(lg, addr, bytes)
-	    || copy_from_user(b, lg->mem_base + addr, bytes) != 0) {
+	if (!lguest_address_ok(cpu->lg, addr, bytes)
+	    || copy_from_user(b, cpu->lg->mem_base + addr, bytes) != 0) {
 		/* copy_from_user should do this, but as we rely on it... */
 		memset(b, 0, bytes);
-		kill_guest(lg, "bad read address %#lx len %u", addr, bytes);
+		kill_guest(cpu, "bad read address %#lx len %u", addr, bytes);
 	}
 }
 
 /* This is the write (copy into guest) version. */
-void __lgwrite(struct lguest *lg, unsigned long addr, const void *b,
+void __lgwrite(struct lg_cpu *cpu, unsigned long addr, const void *b,
 	       unsigned bytes)
 {
-	if (!lguest_address_ok(lg, addr, bytes)
-	    || copy_to_user(lg->mem_base + addr, b, bytes) != 0)
-		kill_guest(lg, "bad write address %#lx len %u", addr, bytes);
+	if (!lguest_address_ok(cpu->lg, addr, bytes)
+	    || copy_to_user(cpu->lg->mem_base + addr, b, bytes) != 0)
+		kill_guest(cpu, "bad write address %#lx len %u", addr, bytes);
 }
 /*:*/
 
 /*H:030 Let's jump straight to the the main loop which runs the Guest.
  * Remember, this is called by the Launcher reading /dev/lguest, and we keep
  * going around and around until something interesting happens. */
-int run_guest(struct lguest *lg, unsigned long __user *user)
+int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
 {
 	/* We stop running once the Guest is dead. */
-	while (!lg->dead) {
+	while (!cpu->lg->dead) {
 		/* First we run any hypercalls the Guest wants done. */
-		if (lg->hcall)
-			do_hypercalls(lg);
+		if (cpu->hcall)
+			do_hypercalls(cpu);
 
 		/* It's possible the Guest did a NOTIFY hypercall to the
 		 * Launcher, in which case we return from the read() now. */
-		if (lg->pending_notify) {
-			if (put_user(lg->pending_notify, user))
+		if (cpu->pending_notify) {
+			if (put_user(cpu->pending_notify, user))
 				return -EFAULT;
-			return sizeof(lg->pending_notify);
+			return sizeof(cpu->pending_notify);
 		}
 
 		/* Check for signals */
@@ -195,13 +195,13 @@ int run_guest(struct lguest *lg, unsigned long __user *user)
 			return -ERESTARTSYS;
 
 		/* If Waker set break_out, return to Launcher. */
-		if (lg->break_out)
+		if (cpu->break_out)
 			return -EAGAIN;
 
 		/* Check if there are any interrupts which can be delivered
 		 * now: if so, this sets up the hander to be executed when we
 		 * next run the Guest. */
-		maybe_do_interrupt(lg);
+		maybe_do_interrupt(cpu);
 
 		/* All long-lived kernel loops need to check with this horrible
 		 * thing called the freezer.  If the Host is trying to suspend,
@@ -210,12 +210,12 @@ int run_guest(struct lguest *lg, unsigned long __user *user)
 
 		/* Just make absolutely sure the Guest is still alive.  One of
 		 * those hypercalls could have been fatal, for example. */
-		if (lg->dead)
+		if (cpu->lg->dead)
 			break;
 
 		/* If the Guest asked to be stopped, we sleep.  The Guest's
 		 * clock timer or LHCALL_BREAK from the Waker will wake us. */
-		if (lg->halted) {
+		if (cpu->halted) {
 			set_current_state(TASK_INTERRUPTIBLE);
 			schedule();
 			continue;
@@ -226,15 +226,17 @@ int run_guest(struct lguest *lg, unsigned long __user *user)
 		local_irq_disable();
 
 		/* Actually run the Guest until something happens. */
-		lguest_arch_run_guest(lg);
+		lguest_arch_run_guest(cpu);
 
 		/* Now we're ready to be interrupted or moved to other CPUs */
 		local_irq_enable();
 
 		/* Now we deal with whatever happened to the Guest. */
-		lguest_arch_handle_trap(lg);
+		lguest_arch_handle_trap(cpu);
 	}
 
+	if (cpu->lg->dead == ERR_PTR(-ERESTART))
+		return -ERESTART;
 	/* The Guest is dead => "No such file or directory" */
 	return -ENOENT;
 }
@@ -253,7 +255,7 @@ static int __init init(void)
 
 	/* Lguest can't run under Xen, VMI or itself.  It does Tricky Stuff. */
 	if (paravirt_enabled()) {
-		printk("lguest is afraid of %s\n", pv_info.name);
+		printk("lguest is afraid of being a guest\n");
 		return -EPERM;
 	}
 
diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c
index b478affe8f9..0f2cb4fd7c6 100644
--- a/drivers/lguest/hypercalls.c
+++ b/drivers/lguest/hypercalls.c
@@ -23,13 +23,14 @@
 #include <linux/uaccess.h>
 #include <linux/syscalls.h>
 #include <linux/mm.h>
+#include <linux/ktime.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include "lg.h"
 
 /*H:120 This is the core hypercall routine: where the Guest gets what it wants.
  * Or gets killed.  Or, in the case of LHCALL_CRASH, both. */
-static void do_hcall(struct lguest *lg, struct hcall_args *args)
+static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args)
 {
 	switch (args->arg0) {
 	case LHCALL_FLUSH_ASYNC:
@@ -39,60 +40,62 @@ static void do_hcall(struct lguest *lg, struct hcall_args *args)
 	case LHCALL_LGUEST_INIT:
 		/* You can't get here unless you're already initialized.  Don't
 		 * do that. */
-		kill_guest(lg, "already have lguest_data");
+		kill_guest(cpu, "already have lguest_data");
 		break;
-	case LHCALL_CRASH: {
-		/* Crash is such a trivial hypercall that we do it in four
+	case LHCALL_SHUTDOWN: {
+		/* Shutdown is such a trivial hypercall that we do it in four
 		 * lines right here. */
 		char msg[128];
 		/* If the lgread fails, it will call kill_guest() itself; the
 		 * kill_guest() with the message will be ignored. */
-		__lgread(lg, msg, args->arg1, sizeof(msg));
+		__lgread(cpu, msg, args->arg1, sizeof(msg));
 		msg[sizeof(msg)-1] = '\0';
-		kill_guest(lg, "CRASH: %s", msg);
+		kill_guest(cpu, "CRASH: %s", msg);
+		if (args->arg2 == LGUEST_SHUTDOWN_RESTART)
+			cpu->lg->dead = ERR_PTR(-ERESTART);
 		break;
 	}
 	case LHCALL_FLUSH_TLB:
 		/* FLUSH_TLB comes in two flavors, depending on the
 		 * argument: */
 		if (args->arg1)
-			guest_pagetable_clear_all(lg);
+			guest_pagetable_clear_all(cpu);
 		else
-			guest_pagetable_flush_user(lg);
+			guest_pagetable_flush_user(cpu);
 		break;
 
 	/* All these calls simply pass the arguments through to the right
 	 * routines. */
 	case LHCALL_NEW_PGTABLE:
-		guest_new_pagetable(lg, args->arg1);
+		guest_new_pagetable(cpu, args->arg1);
 		break;
 	case LHCALL_SET_STACK:
-		guest_set_stack(lg, args->arg1, args->arg2, args->arg3);
+		guest_set_stack(cpu, args->arg1, args->arg2, args->arg3);
 		break;
 	case LHCALL_SET_PTE:
-		guest_set_pte(lg, args->arg1, args->arg2, __pte(args->arg3));
+		guest_set_pte(cpu, args->arg1, args->arg2, __pte(args->arg3));
 		break;
 	case LHCALL_SET_PMD:
-		guest_set_pmd(lg, args->arg1, args->arg2);
+		guest_set_pmd(cpu->lg, args->arg1, args->arg2);
 		break;
 	case LHCALL_SET_CLOCKEVENT:
-		guest_set_clockevent(lg, args->arg1);
+		guest_set_clockevent(cpu, args->arg1);
 		break;
 	case LHCALL_TS:
 		/* This sets the TS flag, as we saw used in run_guest(). */
-		lg->ts = args->arg1;
+		cpu->ts = args->arg1;
 		break;
 	case LHCALL_HALT:
 		/* Similarly, this sets the halted flag for run_guest(). */
-		lg->halted = 1;
+		cpu->halted = 1;
 		break;
 	case LHCALL_NOTIFY:
-		lg->pending_notify = args->arg1;
+		cpu->pending_notify = args->arg1;
 		break;
 	default:
 		/* It should be an architecture-specific hypercall. */
-		if (lguest_arch_do_hcall(lg, args))
-			kill_guest(lg, "Bad hypercall %li\n", args->arg0);
+		if (lguest_arch_do_hcall(cpu, args))
+			kill_guest(cpu, "Bad hypercall %li\n", args->arg0);
 	}
 }
 /*:*/
@@ -104,13 +107,13 @@ static void do_hcall(struct lguest *lg, struct hcall_args *args)
  * Guest put them in the ring, but we also promise the Guest that they will
  * happen before any normal hypercall (which is why we check this before
  * checking for a normal hcall). */
-static void do_async_hcalls(struct lguest *lg)
+static void do_async_hcalls(struct lg_cpu *cpu)
 {
 	unsigned int i;
 	u8 st[LHCALL_RING_SIZE];
 
 	/* For simplicity, we copy the entire call status array in at once. */
-	if (copy_from_user(&st, &lg->lguest_data->hcall_status, sizeof(st)))
+	if (copy_from_user(&st, &cpu->lg->lguest_data->hcall_status, sizeof(st)))
 		return;
 
 	/* We process "struct lguest_data"s hcalls[] ring once. */
@@ -119,7 +122,7 @@ static void do_async_hcalls(struct lguest *lg)
 		/* We remember where we were up to from last time.  This makes
 		 * sure that the hypercalls are done in the order the Guest
 		 * places them in the ring. */
-		unsigned int n = lg->next_hcall;
+		unsigned int n = cpu->next_hcall;
 
 		/* 0xFF means there's no call here (yet). */
 		if (st[n] == 0xFF)
@@ -127,65 +130,65 @@ static void do_async_hcalls(struct lguest *lg)
 
 		/* OK, we have hypercall.  Increment the "next_hcall" cursor,
 		 * and wrap back to 0 if we reach the end. */
-		if (++lg->next_hcall == LHCALL_RING_SIZE)
-			lg->next_hcall = 0;
+		if (++cpu->next_hcall == LHCALL_RING_SIZE)
+			cpu->next_hcall = 0;
 
 		/* Copy the hypercall arguments into a local copy of
 		 * the hcall_args struct. */
-		if (copy_from_user(&args, &lg->lguest_data->hcalls[n],
+		if (copy_from_user(&args, &cpu->lg->lguest_data->hcalls[n],
 				   sizeof(struct hcall_args))) {
-			kill_guest(lg, "Fetching async hypercalls");
+			kill_guest(cpu, "Fetching async hypercalls");
 			break;
 		}
 
 		/* Do the hypercall, same as a normal one. */
-		do_hcall(lg, &args);
+		do_hcall(cpu, &args);
 
 		/* Mark the hypercall done. */
-		if (put_user(0xFF, &lg->lguest_data->hcall_status[n])) {
-			kill_guest(lg, "Writing result for async hypercall");
+		if (put_user(0xFF, &cpu->lg->lguest_data->hcall_status[n])) {
+			kill_guest(cpu, "Writing result for async hypercall");
 			break;
 		}
 
 		/* Stop doing hypercalls if they want to notify the Launcher:
 		 * it needs to service this first. */
-		if (lg->pending_notify)
+		if (cpu->pending_notify)
 			break;
 	}
 }
 
 /* Last of all, we look at what happens first of all.  The very first time the
  * Guest makes a hypercall, we end up here to set things up: */
-static void initialize(struct lguest *lg)
+static void initialize(struct lg_cpu *cpu)
 {
 	/* You can't do anything until you're initialized.  The Guest knows the
 	 * rules, so we're unforgiving here. */
-	if (lg->hcall->arg0 != LHCALL_LGUEST_INIT) {
-		kill_guest(lg, "hypercall %li before INIT", lg->hcall->arg0);
+	if (cpu->hcall->arg0 != LHCALL_LGUEST_INIT) {
+		kill_guest(cpu, "hypercall %li before INIT", cpu->hcall->arg0);
 		return;
 	}
 
-	if (lguest_arch_init_hypercalls(lg))
-		kill_guest(lg, "bad guest page %p", lg->lguest_data);
+	if (lguest_arch_init_hypercalls(cpu))
+		kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data);
 
 	/* The Guest tells us where we're not to deliver interrupts by putting
 	 * the range of addresses into "struct lguest_data". */
-	if (get_user(lg->noirq_start, &lg->lguest_data->noirq_start)
-	    || get_user(lg->noirq_end, &lg->lguest_data->noirq_end))
-		kill_guest(lg, "bad guest page %p", lg->lguest_data);
+	if (get_user(cpu->lg->noirq_start, &cpu->lg->lguest_data->noirq_start)
+	    || get_user(cpu->lg->noirq_end, &cpu->lg->lguest_data->noirq_end))
+		kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data);
 
 	/* We write the current time into the Guest's data page once so it can
 	 * set its clock. */
-	write_timestamp(lg);
+	write_timestamp(cpu);
 
 	/* page_tables.c will also do some setup. */
-	page_table_guest_data_init(lg);
+	page_table_guest_data_init(cpu);
 
 	/* This is the one case where the above accesses might have been the
 	 * first write to a Guest page.  This may have caused a copy-on-write
 	 * fault, but the old page might be (read-only) in the Guest
 	 * pagetable. */
-	guest_pagetable_clear_all(lg);
+	guest_pagetable_clear_all(cpu);
 }
 
 /*H:100
@@ -194,27 +197,27 @@ static void initialize(struct lguest *lg)
  * Remember from the Guest, hypercalls come in two flavors: normal and
  * asynchronous.  This file handles both of types.
  */
-void do_hypercalls(struct lguest *lg)
+void do_hypercalls(struct lg_cpu *cpu)
 {
 	/* Not initialized yet?  This hypercall must do it. */
-	if (unlikely(!lg->lguest_data)) {
+	if (unlikely(!cpu->lg->lguest_data)) {
 		/* Set up the "struct lguest_data" */
-		initialize(lg);
+		initialize(cpu);
 		/* Hcall is done. */
-		lg->hcall = NULL;
+		cpu->hcall = NULL;
 		return;
 	}
 
 	/* The Guest has initialized.
 	 *
 	 * Look in the hypercall ring for the async hypercalls: */
-	do_async_hcalls(lg);
+	do_async_hcalls(cpu);
 
 	/* If we stopped reading the hypercall ring because the Guest did a
 	 * NOTIFY to the Launcher, we want to return now.  Otherwise we do
 	 * the hypercall. */
-	if (!lg->pending_notify) {
-		do_hcall(lg, lg->hcall);
+	if (!cpu->pending_notify) {
+		do_hcall(cpu, cpu->hcall);
 		/* Tricky point: we reset the hcall pointer to mark the
 		 * hypercall as "done".  We use the hcall pointer rather than
 		 * the trap number to indicate a hypercall is pending.
@@ -225,16 +228,17 @@ void do_hypercalls(struct lguest *lg)
 		 * Launcher, the run_guest() loop will exit without running the
 		 * Guest.  When it comes back it would try to re-run the
 		 * hypercall. */
-		lg->hcall = NULL;
+		cpu->hcall = NULL;
 	}
 }
 
 /* This routine supplies the Guest with time: it's used for wallclock time at
  * initial boot and as a rough time source if the TSC isn't available. */
-void write_timestamp(struct lguest *lg)
+void write_timestamp(struct lg_cpu *cpu)
 {
 	struct timespec now;
 	ktime_get_real_ts(&now);
-	if (copy_to_user(&lg->lguest_data->time, &now, sizeof(struct timespec)))
-		kill_guest(lg, "Writing timestamp");
+	if (copy_to_user(&cpu->lg->lguest_data->time,
+			 &now, sizeof(struct timespec)))
+		kill_guest(cpu, "Writing timestamp");
 }
diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c
index 2b66f79c208..32e97c1858e 100644
--- a/drivers/lguest/interrupts_and_traps.c
+++ b/drivers/lguest/interrupts_and_traps.c
@@ -41,11 +41,11 @@ static int idt_present(u32 lo, u32 hi)
 
 /* We need a helper to "push" a value onto the Guest's stack, since that's a
  * big part of what delivering an interrupt does. */
-static void push_guest_stack(struct lguest *lg, unsigned long *gstack, u32 val)
+static void push_guest_stack(struct lg_cpu *cpu, unsigned long *gstack, u32 val)
 {
 	/* Stack grows upwards: move stack then write value. */
 	*gstack -= 4;
-	lgwrite(lg, *gstack, u32, val);
+	lgwrite(cpu, *gstack, u32, val);
 }
 
 /*H:210 The set_guest_interrupt() routine actually delivers the interrupt or
@@ -60,7 +60,7 @@ static void push_guest_stack(struct lguest *lg, unsigned long *gstack, u32 val)
  * We set up the stack just like the CPU does for a real interrupt, so it's
  * identical for the Guest (and the standard "iret" instruction will undo
  * it). */
-static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err)
+static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, int has_err)
 {
 	unsigned long gstack, origstack;
 	u32 eflags, ss, irq_enable;
@@ -69,59 +69,59 @@ static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err)
 	/* There are two cases for interrupts: one where the Guest is already
 	 * in the kernel, and a more complex one where the Guest is in
 	 * userspace.  We check the privilege level to find out. */
-	if ((lg->regs->ss&0x3) != GUEST_PL) {
+	if ((cpu->regs->ss&0x3) != GUEST_PL) {
 		/* The Guest told us their kernel stack with the SET_STACK
 		 * hypercall: both the virtual address and the segment */
-		virtstack = lg->esp1;
-		ss = lg->ss1;
+		virtstack = cpu->esp1;
+		ss = cpu->ss1;
 
-		origstack = gstack = guest_pa(lg, virtstack);
+		origstack = gstack = guest_pa(cpu, virtstack);
 		/* We push the old stack segment and pointer onto the new
 		 * stack: when the Guest does an "iret" back from the interrupt
 		 * handler the CPU will notice they're dropping privilege
 		 * levels and expect these here. */
-		push_guest_stack(lg, &gstack, lg->regs->ss);
-		push_guest_stack(lg, &gstack, lg->regs->esp);
+		push_guest_stack(cpu, &gstack, cpu->regs->ss);
+		push_guest_stack(cpu, &gstack, cpu->regs->esp);
 	} else {
 		/* We're staying on the same Guest (kernel) stack. */
-		virtstack = lg->regs->esp;
-		ss = lg->regs->ss;
+		virtstack = cpu->regs->esp;
+		ss = cpu->regs->ss;
 
-		origstack = gstack = guest_pa(lg, virtstack);
+		origstack = gstack = guest_pa(cpu, virtstack);
 	}
 
 	/* Remember that we never let the Guest actually disable interrupts, so
 	 * the "Interrupt Flag" bit is always set.  We copy that bit from the
 	 * Guest's "irq_enabled" field into the eflags word: we saw the Guest
 	 * copy it back in "lguest_iret". */
-	eflags = lg->regs->eflags;
-	if (get_user(irq_enable, &lg->lguest_data->irq_enabled) == 0
+	eflags = cpu->regs->eflags;
+	if (get_user(irq_enable, &cpu->lg->lguest_data->irq_enabled) == 0
 	    && !(irq_enable & X86_EFLAGS_IF))
 		eflags &= ~X86_EFLAGS_IF;
 
 	/* An interrupt is expected to push three things on the stack: the old
 	 * "eflags" word, the old code segment, and the old instruction
 	 * pointer. */
-	push_guest_stack(lg, &gstack, eflags);
-	push_guest_stack(lg, &gstack, lg->regs->cs);
-	push_guest_stack(lg, &gstack, lg->regs->eip);
+	push_guest_stack(cpu, &gstack, eflags);
+	push_guest_stack(cpu, &gstack, cpu->regs->cs);
+	push_guest_stack(cpu, &gstack, cpu->regs->eip);
 
 	/* For the six traps which supply an error code, we push that, too. */
 	if (has_err)
-		push_guest_stack(lg, &gstack, lg->regs->errcode);
+		push_guest_stack(cpu, &gstack, cpu->regs->errcode);
 
 	/* Now we've pushed all the old state, we change the stack, the code
 	 * segment and the address to execute. */
-	lg->regs->ss = ss;
-	lg->regs->esp = virtstack + (gstack - origstack);
-	lg->regs->cs = (__KERNEL_CS|GUEST_PL);
-	lg->regs->eip = idt_address(lo, hi);
+	cpu->regs->ss = ss;
+	cpu->regs->esp = virtstack + (gstack - origstack);
+	cpu->regs->cs = (__KERNEL_CS|GUEST_PL);
+	cpu->regs->eip = idt_address(lo, hi);
 
 	/* There are two kinds of interrupt handlers: 0xE is an "interrupt
 	 * gate" which expects interrupts to be disabled on entry. */
 	if (idt_type(lo, hi) == 0xE)
-		if (put_user(0, &lg->lguest_data->irq_enabled))
-			kill_guest(lg, "Disabling interrupts");
+		if (put_user(0, &cpu->lg->lguest_data->irq_enabled))
+			kill_guest(cpu, "Disabling interrupts");
 }
 
 /*H:205
@@ -129,23 +129,23 @@ static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err)
  *
  * maybe_do_interrupt() gets called before every entry to the Guest, to see if
  * we should divert the Guest to running an interrupt handler. */
-void maybe_do_interrupt(struct lguest *lg)
+void maybe_do_interrupt(struct lg_cpu *cpu)
 {
 	unsigned int irq;
 	DECLARE_BITMAP(blk, LGUEST_IRQS);
 	struct desc_struct *idt;
 
 	/* If the Guest hasn't even initialized yet, we can do nothing. */
-	if (!lg->lguest_data)
+	if (!cpu->lg->lguest_data)
 		return;
 
 	/* Take our "irqs_pending" array and remove any interrupts the Guest
 	 * wants blocked: the result ends up in "blk". */
-	if (copy_from_user(&blk, lg->lguest_data->blocked_interrupts,
+	if (copy_from_user(&blk, cpu->lg->lguest_data->blocked_interrupts,
 			   sizeof(blk)))
 		return;
 
-	bitmap_andnot(blk, lg->irqs_pending, blk, LGUEST_IRQS);
+	bitmap_andnot(blk, cpu->irqs_pending, blk, LGUEST_IRQS);
 
 	/* Find the first interrupt. */
 	irq = find_first_bit(blk, LGUEST_IRQS);
@@ -155,19 +155,20 @@ void maybe_do_interrupt(struct lguest *lg)
 
 	/* They may be in the middle of an iret, where they asked us never to
 	 * deliver interrupts. */
-	if (lg->regs->eip >= lg->noirq_start && lg->regs->eip < lg->noirq_end)
+	if (cpu->regs->eip >= cpu->lg->noirq_start &&
+	   (cpu->regs->eip < cpu->lg->noirq_end))
 		return;
 
 	/* If they're halted, interrupts restart them. */
-	if (lg->halted) {
+	if (cpu->halted) {
 		/* Re-enable interrupts. */
-		if (put_user(X86_EFLAGS_IF, &lg->lguest_data->irq_enabled))
-			kill_guest(lg, "Re-enabling interrupts");
-		lg->halted = 0;
+		if (put_user(X86_EFLAGS_IF, &cpu->lg->lguest_data->irq_enabled))
+			kill_guest(cpu, "Re-enabling interrupts");
+		cpu->halted = 0;
 	} else {
 		/* Otherwise we check if they have interrupts disabled. */
 		u32 irq_enabled;
-		if (get_user(irq_enabled, &lg->lguest_data->irq_enabled))
+		if (get_user(irq_enabled, &cpu->lg->lguest_data->irq_enabled))
 			irq_enabled = 0;
 		if (!irq_enabled)
 			return;
@@ -176,15 +177,15 @@ void maybe_do_interrupt(struct lguest *lg)
 	/* Look at the IDT entry the Guest gave us for this interrupt.  The
 	 * first 32 (FIRST_EXTERNAL_VECTOR) entries are for traps, so we skip
 	 * over them. */
-	idt = &lg->arch.idt[FIRST_EXTERNAL_VECTOR+irq];
+	idt = &cpu->arch.idt[FIRST_EXTERNAL_VECTOR+irq];
 	/* If they don't have a handler (yet?), we just ignore it */
 	if (idt_present(idt->a, idt->b)) {
 		/* OK, mark it no longer pending and deliver it. */
-		clear_bit(irq, lg->irqs_pending);
+		clear_bit(irq, cpu->irqs_pending);
 		/* set_guest_interrupt() takes the interrupt descriptor and a
 		 * flag to say whether this interrupt pushes an error code onto
 		 * the stack as well: virtual interrupts never do. */
-		set_guest_interrupt(lg, idt->a, idt->b, 0);
+		set_guest_interrupt(cpu, idt->a, idt->b, 0);
 	}
 
 	/* Every time we deliver an interrupt, we update the timestamp in the
@@ -192,7 +193,7 @@ void maybe_do_interrupt(struct lguest *lg)
 	 * did this more often, but it can actually be quite slow: doing it
 	 * here is a compromise which means at least it gets updated every
 	 * timer interrupt. */
-	write_timestamp(lg);
+	write_timestamp(cpu);
 }
 /*:*/
 
@@ -245,19 +246,19 @@ static int has_err(unsigned int trap)
 }
 
 /* deliver_trap() returns true if it could deliver the trap. */
-int deliver_trap(struct lguest *lg, unsigned int num)
+int deliver_trap(struct lg_cpu *cpu, unsigned int num)
 {
 	/* Trap numbers are always 8 bit, but we set an impossible trap number
 	 * for traps inside the Switcher, so check that here. */
-	if (num >= ARRAY_SIZE(lg->arch.idt))
+	if (num >= ARRAY_SIZE(cpu->arch.idt))
 		return 0;
 
 	/* Early on the Guest hasn't set the IDT entries (or maybe it put a
 	 * bogus one in): if we fail here, the Guest will be killed. */
-	if (!idt_present(lg->arch.idt[num].a, lg->arch.idt[num].b))
+	if (!idt_present(cpu->arch.idt[num].a, cpu->arch.idt[num].b))
 		return 0;
-	set_guest_interrupt(lg, lg->arch.idt[num].a, lg->arch.idt[num].b,
-			    has_err(num));
+	set_guest_interrupt(cpu, cpu->arch.idt[num].a,
+			    cpu->arch.idt[num].b, has_err(num));
 	return 1;
 }
 
@@ -309,18 +310,18 @@ static int direct_trap(unsigned int num)
  * the Guest.
  *
  * Which is deeply unfair, because (literally!) it wasn't the Guests' fault. */
-void pin_stack_pages(struct lguest *lg)
+void pin_stack_pages(struct lg_cpu *cpu)
 {
 	unsigned int i;
 
 	/* Depending on the CONFIG_4KSTACKS option, the Guest can have one or
 	 * two pages of stack space. */
-	for (i = 0; i < lg->stack_pages; i++)
+	for (i = 0; i < cpu->lg->stack_pages; i++)
 		/* The stack grows *upwards*, so the address we're given is the
 		 * start of the page after the kernel stack.  Subtract one to
 		 * get back onto the first stack page, and keep subtracting to
 		 * get to the rest of the stack pages. */
-		pin_page(lg, lg->esp1 - 1 - i * PAGE_SIZE);
+		pin_page(cpu, cpu->esp1 - 1 - i * PAGE_SIZE);
 }
 
 /* Direct traps also mean that we need to know whenever the Guest wants to use
@@ -331,21 +332,21 @@ void pin_stack_pages(struct lguest *lg)
  *
  * In Linux each process has its own kernel stack, so this happens a lot: we
  * change stacks on each context switch. */
-void guest_set_stack(struct lguest *lg, u32 seg, u32 esp, unsigned int pages)
+void guest_set_stack(struct lg_cpu *cpu, u32 seg, u32 esp, unsigned int pages)
 {
 	/* You are not allowed have a stack segment with privilege level 0: bad
 	 * Guest! */
 	if ((seg & 0x3) != GUEST_PL)
-		kill_guest(lg, "bad stack segment %i", seg);
+		kill_guest(cpu, "bad stack segment %i", seg);
 	/* We only expect one or two stack pages. */
 	if (pages > 2)
-		kill_guest(lg, "bad stack pages %u", pages);
+		kill_guest(cpu, "bad stack pages %u", pages);
 	/* Save where the stack is, and how many pages */
-	lg->ss1 = seg;
-	lg->esp1 = esp;
-	lg->stack_pages = pages;
+	cpu->ss1 = seg;
+	cpu->esp1 = esp;
+	cpu->lg->stack_pages = pages;
 	/* Make sure the new stack pages are mapped */
-	pin_stack_pages(lg);
+	pin_stack_pages(cpu);
 }
 
 /* All this reference to mapping stacks leads us neatly into the other complex
@@ -353,7 +354,7 @@ void guest_set_stack(struct lguest *lg, u32 seg, u32 esp, unsigned int pages)
 
 /*H:235 This is the routine which actually checks the Guest's IDT entry and
  * transfers it into the entry in "struct lguest": */
-static void set_trap(struct lguest *lg, struct desc_struct *trap,
+static void set_trap(struct lg_cpu *cpu, struct desc_struct *trap,
 		     unsigned int num, u32 lo, u32 hi)
 {
 	u8 type = idt_type(lo, hi);
@@ -366,7 +367,7 @@ static void set_trap(struct lguest *lg, struct desc_struct *trap,
 
 	/* We only support interrupt and trap gates. */
 	if (type != 0xE && type != 0xF)
-		kill_guest(lg, "bad IDT type %i", type);
+		kill_guest(cpu, "bad IDT type %i", type);
 
 	/* We only copy the handler address, present bit, privilege level and
 	 * type.  The privilege level controls where the trap can be triggered
@@ -383,7 +384,7 @@ static void set_trap(struct lguest *lg, struct desc_struct *trap,
  *
  * We saw the Guest setting Interrupt Descriptor Table (IDT) entries with the
  * LHCALL_LOAD_IDT_ENTRY hypercall before: that comes here. */
-void load_guest_idt_entry(struct lguest *lg, unsigned int num, u32 lo, u32 hi)
+void load_guest_idt_entry(struct lg_cpu *cpu, unsigned int num, u32 lo, u32 hi)
 {
 	/* Guest never handles: NMI, doublefault, spurious interrupt or
 	 * hypercall.  We ignore when it tries to set them. */
@@ -392,13 +393,13 @@ void load_guest_idt_entry(struct lguest *lg, unsigned int num, u32 lo, u32 hi)
 
 	/* Mark the IDT as changed: next time the Guest runs we'll know we have
 	 * to copy this again. */
-	lg->changed |= CHANGED_IDT;
+	cpu->changed |= CHANGED_IDT;
 
 	/* Check that the Guest doesn't try to step outside the bounds. */
-	if (num >= ARRAY_SIZE(lg->arch.idt))
-		kill_guest(lg, "Setting idt entry %u", num);
+	if (num >= ARRAY_SIZE(cpu->arch.idt))
+		kill_guest(cpu, "Setting idt entry %u", num);
 	else
-		set_trap(lg, &lg->arch.idt[num], num, lo, hi);
+		set_trap(cpu, &cpu->arch.idt[num], num, lo, hi);
 }
 
 /* The default entry for each interrupt points into the Switcher routines which
@@ -434,14 +435,14 @@ void setup_default_idt_entries(struct lguest_ro_state *state,
 /*H:240 We don't use the IDT entries in the "struct lguest" directly, instead
  * we copy them into the IDT which we've set up for Guests on this CPU, just
  * before we run the Guest.  This routine does that copy. */
-void copy_traps(const struct lguest *lg, struct desc_struct *idt,
+void copy_traps(const struct lg_cpu *cpu, struct desc_struct *idt,
 		const unsigned long *def)
 {
 	unsigned int i;
 
 	/* We can simply copy the direct traps, otherwise we use the default
 	 * ones in the Switcher: they will return to the Host. */
-	for (i = 0; i < ARRAY_SIZE(lg->arch.idt); i++) {
+	for (i = 0; i < ARRAY_SIZE(cpu->arch.idt); i++) {
 		/* If no Guest can ever override this trap, leave it alone. */
 		if (!direct_trap(i))
 			continue;
@@ -450,8 +451,8 @@ void copy_traps(const struct lguest *lg, struct desc_struct *idt,
 		 * Interrupt gates (type 14) disable interrupts as they are
 		 * entered, which we never let the Guest do.  Not present
 		 * entries (type 0x0) also can't go direct, of course. */
-		if (idt_type(lg->arch.idt[i].a, lg->arch.idt[i].b) == 0xF)
-			idt[i] = lg->arch.idt[i];
+		if (idt_type(cpu->arch.idt[i].a, cpu->arch.idt[i].b) == 0xF)
+			idt[i] = cpu->arch.idt[i];
 		else
 			/* Reset it to the default. */
 			default_idt_entry(&idt[i], i, def[i]);
@@ -470,13 +471,13 @@ void copy_traps(const struct lguest *lg, struct desc_struct *idt,
  * infrastructure to set a callback at that time.
  *
  * 0 means "turn off the clock". */
-void guest_set_clockevent(struct lguest *lg, unsigned long delta)
+void guest_set_clockevent(struct lg_cpu *cpu, unsigned long delta)
 {
 	ktime_t expires;
 
 	if (unlikely(delta == 0)) {
 		/* Clock event device is shutting down. */
-		hrtimer_cancel(&lg->hrt);
+		hrtimer_cancel(&cpu->hrt);
 		return;
 	}
 
@@ -484,25 +485,25 @@ void guest_set_clockevent(struct lguest *lg, unsigned long delta)
 	 * all the time between now and the timer interrupt it asked for.  This
 	 * is almost always the right thing to do. */
 	expires = ktime_add_ns(ktime_get_real(), delta);
-	hrtimer_start(&lg->hrt, expires, HRTIMER_MODE_ABS);
+	hrtimer_start(&cpu->hrt, expires, HRTIMER_MODE_ABS);
 }
 
 /* This is the function called when the Guest's timer expires. */
 static enum hrtimer_restart clockdev_fn(struct hrtimer *timer)
 {
-	struct lguest *lg = container_of(timer, struct lguest, hrt);
+	struct lg_cpu *cpu = container_of(timer, struct lg_cpu, hrt);
 
 	/* Remember the first interrupt is the timer interrupt. */
-	set_bit(0, lg->irqs_pending);
+	set_bit(0, cpu->irqs_pending);
 	/* If the Guest is actually stopped, we need to wake it up. */
-	if (lg->halted)
-		wake_up_process(lg->tsk);
+	if (cpu->halted)
+		wake_up_process(cpu->tsk);
 	return HRTIMER_NORESTART;
 }
 
 /* This sets up the timer for this Guest. */
-void init_clockdev(struct lguest *lg)
+void init_clockdev(struct lg_cpu *cpu)
 {
-	hrtimer_init(&lg->hrt, CLOCK_REALTIME, HRTIMER_MODE_ABS);
-	lg->hrt.function = clockdev_fn;
+	hrtimer_init(&cpu->hrt, CLOCK_REALTIME, HRTIMER_MODE_ABS);
+	cpu->hrt.function = clockdev_fn;
 }
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h
index 86924891b5e..2337e1a06f0 100644
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -8,6 +8,7 @@
 #include <linux/lguest.h>
 #include <linux/lguest_launcher.h>
 #include <linux/wait.h>
+#include <linux/hrtimer.h>
 #include <linux/err.h>
 #include <asm/semaphore.h>
 
@@ -38,58 +39,72 @@ struct lguest_pages
 #define CHANGED_GDT_TLS		4 /* Actually a subset of CHANGED_GDT */
 #define CHANGED_ALL	        3
 
-/* The private info the thread maintains about the guest. */
-struct lguest
-{
-	/* At end of a page shared mapped over lguest_pages in guest.  */
-	unsigned long regs_page;
-	struct lguest_regs *regs;
-	struct lguest_data __user *lguest_data;
+struct lguest;
+
+struct lg_cpu {
+	unsigned int id;
+	struct lguest *lg;
 	struct task_struct *tsk;
 	struct mm_struct *mm; 	/* == tsk->mm, but that becomes NULL on exit */
-	u32 pfn_limit;
-	/* This provides the offset to the base of guest-physical
-	 * memory in the Launcher. */
-	void __user *mem_base;
-	unsigned long kernel_address;
+
 	u32 cr2;
-	int halted;
 	int ts;
-	u32 next_hcall;
 	u32 esp1;
 	u8 ss1;
 
+	/* Bitmap of what has changed: see CHANGED_* above. */
+	int changed;
+
+	unsigned long pending_notify; /* pfn from LHCALL_NOTIFY */
+
+	/* At end of a page shared mapped over lguest_pages in guest.  */
+	unsigned long regs_page;
+	struct lguest_regs *regs;
+
+	struct lguest_pages *last_pages;
+
+	int cpu_pgd; /* which pgd this cpu is currently using */
+
 	/* If a hypercall was asked for, this points to the arguments. */
 	struct hcall_args *hcall;
+	u32 next_hcall;
+
+	/* Virtual clock device */
+	struct hrtimer hrt;
 
 	/* Do we need to stop what we're doing and return to userspace? */
 	int break_out;
 	wait_queue_head_t break_wq;
+	int halted;
 
-	/* Bitmap of what has changed: see CHANGED_* above. */
-	int changed;
-	struct lguest_pages *last_pages;
+	/* Pending virtual interrupts */
+	DECLARE_BITMAP(irqs_pending, LGUEST_IRQS);
+
+	struct lg_cpu_arch arch;
+};
+
+/* The private info the thread maintains about the guest. */
+struct lguest
+{
+	struct lguest_data __user *lguest_data;
+	struct lg_cpu cpus[NR_CPUS];
+	unsigned int nr_cpus;
+
+	u32 pfn_limit;
+	/* This provides the offset to the base of guest-physical
+	 * memory in the Launcher. */
+	v
author	Linus Torvalds <torvalds@linux-foundation.org>	2008-01-31 09:35:32 +1100
committer	Linus Torvalds <torvalds@linux-foundation.org>	2008-01-31 09:35:32 +1100
commit	d145c7253c8cb2ed8a75a8839621b0bb8f778820 (patch)
tree	fac21920d149a2cddfdfbde65066ff98935a9c57 /drivers
parent	44c3b59102e3ecc7a01e9811862633e670595e51 (diff)
parent	84f12e39c856a8b1ab407f8216ecebaf4204b94d (diff)