aboutsummaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-01-31 09:35:32 +1100
committerLinus Torvalds <torvalds@linux-foundation.org>2008-01-31 09:35:32 +1100
commitd145c7253c8cb2ed8a75a8839621b0bb8f778820 (patch)
treefac21920d149a2cddfdfbde65066ff98935a9c57 /drivers
parent44c3b59102e3ecc7a01e9811862633e670595e51 (diff)
parent84f12e39c856a8b1ab407f8216ecebaf4204b94d (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux-2.6-for-linus
* git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux-2.6-for-linus: (27 commits) lguest: use __PAGE_KERNEL instead of _PAGE_KERNEL lguest: Use explicit includes rateher than indirect lguest: get rid of lg variable assignments lguest: change gpte_addr header lguest: move changed bitmap to lg_cpu lguest: move last_pages to lg_cpu lguest: change last_guest to last_cpu lguest: change spte_addr header lguest: per-vcpu lguest pgdir management lguest: make pending notifications per-vcpu lguest: makes special fields be per-vcpu lguest: per-vcpu lguest task management lguest: replace lguest_arch with lg_cpu_arch. lguest: make registers per-vcpu lguest: make emulate_insn receive a vcpu struct. lguest: map_switcher_in_guest() per-vcpu lguest: per-vcpu interrupt processing. lguest: per-vcpu lguest timers lguest: make hypercalls use the vcpu struct lguest: make write() operation smp aware ... Manual conflict resolved (maybe even correctly, who knows) in drivers/lguest/x86/core.c
Diffstat (limited to 'drivers')
-rw-r--r--drivers/Makefile2
-rw-r--r--drivers/lguest/core.c46
-rw-r--r--drivers/lguest/hypercalls.c106
-rw-r--r--drivers/lguest/interrupts_and_traps.c149
-rw-r--r--drivers/lguest/lg.h154
-rw-r--r--drivers/lguest/lguest_user.c147
-rw-r--r--drivers/lguest/page_tables.c179
-rw-r--r--drivers/lguest/segments.c48
-rw-r--r--drivers/lguest/x86/core.c127
9 files changed, 513 insertions, 445 deletions
diff --git a/drivers/Makefile b/drivers/Makefile
index 9e1f808e43c..0ee9a8a4095 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -72,7 +72,7 @@ obj-$(CONFIG_ISDN) += isdn/
obj-$(CONFIG_EDAC) += edac/
obj-$(CONFIG_MCA) += mca/
obj-$(CONFIG_EISA) += eisa/
-obj-$(CONFIG_LGUEST_GUEST) += lguest/
+obj-y += lguest/
obj-$(CONFIG_CPU_FREQ) += cpufreq/
obj-$(CONFIG_CPU_IDLE) += cpuidle/
obj-$(CONFIG_MMC) += mmc/
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index cb4c67025d5..7743d73768d 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -151,43 +151,43 @@ int lguest_address_ok(const struct lguest *lg,
/* This routine copies memory from the Guest. Here we can see how useful the
* kill_lguest() routine we met in the Launcher can be: we return a random
* value (all zeroes) instead of needing to return an error. */
-void __lgread(struct lguest *lg, void *b, unsigned long addr, unsigned bytes)
+void __lgread(struct lg_cpu *cpu, void *b, unsigned long addr, unsigned bytes)
{
- if (!lguest_address_ok(lg, addr, bytes)
- || copy_from_user(b, lg->mem_base + addr, bytes) != 0) {
+ if (!lguest_address_ok(cpu->lg, addr, bytes)
+ || copy_from_user(b, cpu->lg->mem_base + addr, bytes) != 0) {
/* copy_from_user should do this, but as we rely on it... */
memset(b, 0, bytes);
- kill_guest(lg, "bad read address %#lx len %u", addr, bytes);
+ kill_guest(cpu, "bad read address %#lx len %u", addr, bytes);
}
}
/* This is the write (copy into guest) version. */
-void __lgwrite(struct lguest *lg, unsigned long addr, const void *b,
+void __lgwrite(struct lg_cpu *cpu, unsigned long addr, const void *b,
unsigned bytes)
{
- if (!lguest_address_ok(lg, addr, bytes)
- || copy_to_user(lg->mem_base + addr, b, bytes) != 0)
- kill_guest(lg, "bad write address %#lx len %u", addr, bytes);
+ if (!lguest_address_ok(cpu->lg, addr, bytes)
+ || copy_to_user(cpu->lg->mem_base + addr, b, bytes) != 0)
+ kill_guest(cpu, "bad write address %#lx len %u", addr, bytes);
}
/*:*/
/*H:030 Let's jump straight to the the main loop which runs the Guest.
* Remember, this is called by the Launcher reading /dev/lguest, and we keep
* going around and around until something interesting happens. */
-int run_guest(struct lguest *lg, unsigned long __user *user)
+int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
{
/* We stop running once the Guest is dead. */
- while (!lg->dead) {
+ while (!cpu->lg->dead) {
/* First we run any hypercalls the Guest wants done. */
- if (lg->hcall)
- do_hypercalls(lg);
+ if (cpu->hcall)
+ do_hypercalls(cpu);
/* It's possible the Guest did a NOTIFY hypercall to the
* Launcher, in which case we return from the read() now. */
- if (lg->pending_notify) {
- if (put_user(lg->pending_notify, user))
+ if (cpu->pending_notify) {
+ if (put_user(cpu->pending_notify, user))
return -EFAULT;
- return sizeof(lg->pending_notify);
+ return sizeof(cpu->pending_notify);
}
/* Check for signals */
@@ -195,13 +195,13 @@ int run_guest(struct lguest *lg, unsigned long __user *user)
return -ERESTARTSYS;
/* If Waker set break_out, return to Launcher. */
- if (lg->break_out)
+ if (cpu->break_out)
return -EAGAIN;
/* Check if there are any interrupts which can be delivered
* now: if so, this sets up the hander to be executed when we
* next run the Guest. */
- maybe_do_interrupt(lg);
+ maybe_do_interrupt(cpu);
/* All long-lived kernel loops need to check with this horrible
* thing called the freezer. If the Host is trying to suspend,
@@ -210,12 +210,12 @@ int run_guest(struct lguest *lg, unsigned long __user *user)
/* Just make absolutely sure the Guest is still alive. One of
* those hypercalls could have been fatal, for example. */
- if (lg->dead)
+ if (cpu->lg->dead)
break;
/* If the Guest asked to be stopped, we sleep. The Guest's
* clock timer or LHCALL_BREAK from the Waker will wake us. */
- if (lg->halted) {
+ if (cpu->halted) {
set_current_state(TASK_INTERRUPTIBLE);
schedule();
continue;
@@ -226,15 +226,17 @@ int run_guest(struct lguest *lg, unsigned long __user *user)
local_irq_disable();
/* Actually run the Guest until something happens. */
- lguest_arch_run_guest(lg);
+ lguest_arch_run_guest(cpu);
/* Now we're ready to be interrupted or moved to other CPUs */
local_irq_enable();
/* Now we deal with whatever happened to the Guest. */
- lguest_arch_handle_trap(lg);
+ lguest_arch_handle_trap(cpu);
}
+ if (cpu->lg->dead == ERR_PTR(-ERESTART))
+ return -ERESTART;
/* The Guest is dead => "No such file or directory" */
return -ENOENT;
}
@@ -253,7 +255,7 @@ static int __init init(void)
/* Lguest can't run under Xen, VMI or itself. It does Tricky Stuff. */
if (paravirt_enabled()) {
- printk("lguest is afraid of %s\n", pv_info.name);
+ printk("lguest is afraid of being a guest\n");
return -EPERM;
}
diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c
index b478affe8f9..0f2cb4fd7c6 100644
--- a/drivers/lguest/hypercalls.c
+++ b/drivers/lguest/hypercalls.c
@@ -23,13 +23,14 @@
#include <linux/uaccess.h>
#include <linux/syscalls.h>
#include <linux/mm.h>
+#include <linux/ktime.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include "lg.h"
/*H:120 This is the core hypercall routine: where the Guest gets what it wants.
* Or gets killed. Or, in the case of LHCALL_CRASH, both. */
-static void do_hcall(struct lguest *lg, struct hcall_args *args)
+static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args)
{
switch (args->arg0) {
case LHCALL_FLUSH_ASYNC:
@@ -39,60 +40,62 @@ static void do_hcall(struct lguest *lg, struct hcall_args *args)
case LHCALL_LGUEST_INIT:
/* You can't get here unless you're already initialized. Don't
* do that. */
- kill_guest(lg, "already have lguest_data");
+ kill_guest(cpu, "already have lguest_data");
break;
- case LHCALL_CRASH: {
- /* Crash is such a trivial hypercall that we do it in four
+ case LHCALL_SHUTDOWN: {
+ /* Shutdown is such a trivial hypercall that we do it in four
* lines right here. */
char msg[128];
/* If the lgread fails, it will call kill_guest() itself; the
* kill_guest() with the message will be ignored. */
- __lgread(lg, msg, args->arg1, sizeof(msg));
+ __lgread(cpu, msg, args->arg1, sizeof(msg));
msg[sizeof(msg)-1] = '\0';
- kill_guest(lg, "CRASH: %s", msg);
+ kill_guest(cpu, "CRASH: %s", msg);
+ if (args->arg2 == LGUEST_SHUTDOWN_RESTART)
+ cpu->lg->dead = ERR_PTR(-ERESTART);
break;
}
case LHCALL_FLUSH_TLB:
/* FLUSH_TLB comes in two flavors, depending on the
* argument: */
if (args->arg1)
- guest_pagetable_clear_all(lg);
+ guest_pagetable_clear_all(cpu);
else
- guest_pagetable_flush_user(lg);
+ guest_pagetable_flush_user(cpu);
break;
/* All these calls simply pass the arguments through to the right
* routines. */
case LHCALL_NEW_PGTABLE:
- guest_new_pagetable(lg, args->arg1);
+ guest_new_pagetable(cpu, args->arg1);
break;
case LHCALL_SET_STACK:
- guest_set_stack(lg, args->arg1, args->arg2, args->arg3);
+ guest_set_stack(cpu, args->arg1, args->arg2, args->arg3);
break;
case LHCALL_SET_PTE:
- guest_set_pte(lg, args->arg1, args->arg2, __pte(args->arg3));
+ guest_set_pte(cpu, args->arg1, args->arg2, __pte(args->arg3));
break;
case LHCALL_SET_PMD:
- guest_set_pmd(lg, args->arg1, args->arg2);
+ guest_set_pmd(cpu->lg, args->arg1, args->arg2);
break;
case LHCALL_SET_CLOCKEVENT:
- guest_set_clockevent(lg, args->arg1);
+ guest_set_clockevent(cpu, args->arg1);
break;
case LHCALL_TS:
/* This sets the TS flag, as we saw used in run_guest(). */
- lg->ts = args->arg1;
+ cpu->ts = args->arg1;
break;
case LHCALL_HALT:
/* Similarly, this sets the halted flag for run_guest(). */
- lg->halted = 1;
+ cpu->halted = 1;
break;
case LHCALL_NOTIFY:
- lg->pending_notify = args->arg1;
+ cpu->pending_notify = args->arg1;
break;
default:
/* It should be an architecture-specific hypercall. */
- if (lguest_arch_do_hcall(lg, args))
- kill_guest(lg, "Bad hypercall %li\n", args->arg0);
+ if (lguest_arch_do_hcall(cpu, args))
+ kill_guest(cpu, "Bad hypercall %li\n", args->arg0);
}
}
/*:*/
@@ -104,13 +107,13 @@ static void do_hcall(struct lguest *lg, struct hcall_args *args)
* Guest put them in the ring, but we also promise the Guest that they will
* happen before any normal hypercall (which is why we check this before
* checking for a normal hcall). */
-static void do_async_hcalls(struct lguest *lg)
+static void do_async_hcalls(struct lg_cpu *cpu)
{
unsigned int i;
u8 st[LHCALL_RING_SIZE];
/* For simplicity, we copy the entire call status array in at once. */
- if (copy_from_user(&st, &lg->lguest_data->hcall_status, sizeof(st)))
+ if (copy_from_user(&st, &cpu->lg->lguest_data->hcall_status, sizeof(st)))
return;
/* We process "struct lguest_data"s hcalls[] ring once. */
@@ -119,7 +122,7 @@ static void do_async_hcalls(struct lguest *lg)
/* We remember where we were up to from last time. This makes
* sure that the hypercalls are done in the order the Guest
* places them in the ring. */
- unsigned int n = lg->next_hcall;
+ unsigned int n = cpu->next_hcall;
/* 0xFF means there's no call here (yet). */
if (st[n] == 0xFF)
@@ -127,65 +130,65 @@ static void do_async_hcalls(struct lguest *lg)
/* OK, we have hypercall. Increment the "next_hcall" cursor,
* and wrap back to 0 if we reach the end. */
- if (++lg->next_hcall == LHCALL_RING_SIZE)
- lg->next_hcall = 0;
+ if (++cpu->next_hcall == LHCALL_RING_SIZE)
+ cpu->next_hcall = 0;
/* Copy the hypercall arguments into a local copy of
* the hcall_args struct. */
- if (copy_from_user(&args, &lg->lguest_data->hcalls[n],
+ if (copy_from_user(&args, &cpu->lg->lguest_data->hcalls[n],
sizeof(struct hcall_args))) {
- kill_guest(lg, "Fetching async hypercalls");
+ kill_guest(cpu, "Fetching async hypercalls");
break;
}
/* Do the hypercall, same as a normal one. */
- do_hcall(lg, &args);
+ do_hcall(cpu, &args);
/* Mark the hypercall done. */
- if (put_user(0xFF, &lg->lguest_data->hcall_status[n])) {
- kill_guest(lg, "Writing result for async hypercall");
+ if (put_user(0xFF, &cpu->lg->lguest_data->hcall_status[n])) {
+ kill_guest(cpu, "Writing result for async hypercall");
break;
}
/* Stop doing hypercalls if they want to notify the Launcher:
* it needs to service this first. */
- if (lg->pending_notify)
+ if (cpu->pending_notify)
break;
}
}
/* Last of all, we look at what happens first of all. The very first time the
* Guest makes a hypercall, we end up here to set things up: */
-static void initialize(struct lguest *lg)
+static void initialize(struct lg_cpu *cpu)
{
/* You can't do anything until you're initialized. The Guest knows the
* rules, so we're unforgiving here. */
- if (lg->hcall->arg0 != LHCALL_LGUEST_INIT) {
- kill_guest(lg, "hypercall %li before INIT", lg->hcall->arg0);
+ if (cpu->hcall->arg0 != LHCALL_LGUEST_INIT) {
+ kill_guest(cpu, "hypercall %li before INIT", cpu->hcall->arg0);
return;
}
- if (lguest_arch_init_hypercalls(lg))
- kill_guest(lg, "bad guest page %p", lg->lguest_data);
+ if (lguest_arch_init_hypercalls(cpu))
+ kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data);
/* The Guest tells us where we're not to deliver interrupts by putting
* the range of addresses into "struct lguest_data". */
- if (get_user(lg->noirq_start, &lg->lguest_data->noirq_start)
- || get_user(lg->noirq_end, &lg->lguest_data->noirq_end))
- kill_guest(lg, "bad guest page %p", lg->lguest_data);
+ if (get_user(cpu->lg->noirq_start, &cpu->lg->lguest_data->noirq_start)
+ || get_user(cpu->lg->noirq_end, &cpu->lg->lguest_data->noirq_end))
+ kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data);
/* We write the current time into the Guest's data page once so it can
* set its clock. */
- write_timestamp(lg);
+ write_timestamp(cpu);
/* page_tables.c will also do some setup. */
- page_table_guest_data_init(lg);
+ page_table_guest_data_init(cpu);
/* This is the one case where the above accesses might have been the
* first write to a Guest page. This may have caused a copy-on-write
* fault, but the old page might be (read-only) in the Guest
* pagetable. */
- guest_pagetable_clear_all(lg);
+ guest_pagetable_clear_all(cpu);
}
/*H:100
@@ -194,27 +197,27 @@ static void initialize(struct lguest *lg)
* Remember from the Guest, hypercalls come in two flavors: normal and
* asynchronous. This file handles both of types.
*/
-void do_hypercalls(struct lguest *lg)
+void do_hypercalls(struct lg_cpu *cpu)
{
/* Not initialized yet? This hypercall must do it. */
- if (unlikely(!lg->lguest_data)) {
+ if (unlikely(!cpu->lg->lguest_data)) {
/* Set up the "struct lguest_data" */
- initialize(lg);
+ initialize(cpu);
/* Hcall is done. */
- lg->hcall = NULL;
+ cpu->hcall = NULL;
return;
}
/* The Guest has initialized.
*
* Look in the hypercall ring for the async hypercalls: */
- do_async_hcalls(lg);
+ do_async_hcalls(cpu);
/* If we stopped reading the hypercall ring because the Guest did a
* NOTIFY to the Launcher, we want to return now. Otherwise we do
* the hypercall. */
- if (!lg->pending_notify) {
- do_hcall(lg, lg->hcall);
+ if (!cpu->pending_notify) {
+ do_hcall(cpu, cpu->hcall);
/* Tricky point: we reset the hcall pointer to mark the
* hypercall as "done". We use the hcall pointer rather than
* the trap number to indicate a hypercall is pending.
@@ -225,16 +228,17 @@ void do_hypercalls(struct lguest *lg)
* Launcher, the run_guest() loop will exit without running the
* Guest. When it comes back it would try to re-run the
* hypercall. */
- lg->hcall = NULL;
+ cpu->hcall = NULL;
}
}
/* This routine supplies the Guest with time: it's used for wallclock time at
* initial boot and as a rough time source if the TSC isn't available. */
-void write_timestamp(struct lguest *lg)
+void write_timestamp(struct lg_cpu *cpu)
{
struct timespec now;
ktime_get_real_ts(&now);
- if (copy_to_user(&lg->lguest_data->time, &now, sizeof(struct timespec)))
- kill_guest(lg, "Writing timestamp");
+ if (copy_to_user(&cpu->lg->lguest_data->time,
+ &now, sizeof(struct timespec)))
+ kill_guest(cpu, "Writing timestamp");
}
diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c
index 2b66f79c208..32e97c1858e 100644
--- a/drivers/lguest/interrupts_and_traps.c
+++ b/drivers/lguest/interrupts_and_traps.c
@@ -41,11 +41,11 @@ static int idt_present(u32 lo, u32 hi)
/* We need a helper to "push" a value onto the Guest's stack, since that's a
* big part of what delivering an interrupt does. */
-static void push_guest_stack(struct lguest *lg, unsigned long *gstack, u32 val)
+static void push_guest_stack(struct lg_cpu *cpu, unsigned long *gstack, u32 val)
{
/* Stack grows upwards: move stack then write value. */
*gstack -= 4;
- lgwrite(lg, *gstack, u32, val);
+ lgwrite(cpu, *gstack, u32, val);
}
/*H:210 The set_guest_interrupt() routine actually delivers the interrupt or
@@ -60,7 +60,7 @@ static void push_guest_stack(struct lguest *lg, unsigned long *gstack, u32 val)
* We set up the stack just like the CPU does for a real interrupt, so it's
* identical for the Guest (and the standard "iret" instruction will undo
* it). */
-static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err)
+static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, int has_err)
{
unsigned long gstack, origstack;
u32 eflags, ss, irq_enable;
@@ -69,59 +69,59 @@ static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err)
/* There are two cases for interrupts: one where the Guest is already
* in the kernel, and a more complex one where the Guest is in
* userspace. We check the privilege level to find out. */
- if ((lg->regs->ss&0x3) != GUEST_PL) {
+ if ((cpu->regs->ss&0x3) != GUEST_PL) {
/* The Guest told us their kernel stack with the SET_STACK
* hypercall: both the virtual address and the segment */
- virtstack = lg->esp1;
- ss = lg->ss1;
+ virtstack = cpu->esp1;
+ ss = cpu->ss1;
- origstack = gstack = guest_pa(lg, virtstack);
+ origstack = gstack = guest_pa(cpu, virtstack);
/* We push the old stack segment and pointer onto the new
* stack: when the Guest does an "iret" back from the interrupt
* handler the CPU will notice they're dropping privilege
* levels and expect these here. */
- push_guest_stack(lg, &gstack, lg->regs->ss);
- push_guest_stack(lg, &gstack, lg->regs->esp);
+ push_guest_stack(cpu, &gstack, cpu->regs->ss);
+ push_guest_stack(cpu, &gstack, cpu->regs->esp);
} else {
/* We're staying on the same Guest (kernel) stack. */
- virtstack = lg->regs->esp;
- ss = lg->regs->ss;
+ virtstack = cpu->regs->esp;
+ ss = cpu->regs->ss;
- origstack = gstack = guest_pa(lg, virtstack);
+ origstack = gstack = guest_pa(cpu, virtstack);
}
/* Remember that we never let the Guest actually disable interrupts, so
* the "Interrupt Flag" bit is always set. We copy that bit from the
* Guest's "irq_enabled" field into the eflags word: we saw the Guest
* copy it back in "lguest_iret". */
- eflags = lg->regs->eflags;
- if (get_user(irq_enable, &lg->lguest_data->irq_enabled) == 0
+ eflags = cpu->regs->eflags;
+ if (get_user(irq_enable, &cpu->lg->lguest_data->irq_enabled) == 0
&& !(irq_enable & X86_EFLAGS_IF))
eflags &= ~X86_EFLAGS_IF;
/* An interrupt is expected to push three things on the stack: the old
* "eflags" word, the old code segment, and the old instruction
* pointer. */
- push_guest_stack(lg, &gstack, eflags);
- push_guest_stack(lg, &gstack, lg->regs->cs);
- push_guest_stack(lg, &gstack, lg->regs->eip);
+ push_guest_stack(cpu, &gstack, eflags);
+ push_guest_stack(cpu, &gstack, cpu->regs->cs);
+ push_guest_stack(cpu, &gstack, cpu->regs->eip);
/* For the six traps which supply an error code, we push that, too. */
if (has_err)
- push_guest_stack(lg, &gstack, lg->regs->errcode);
+ push_guest_stack(cpu, &gstack, cpu->regs->errcode);
/* Now we've pushed all the old state, we change the stack, the code
* segment and the address to execute. */
- lg->regs->ss = ss;
- lg->regs->esp = virtstack + (gstack - origstack);
- lg->regs->cs = (__KERNEL_CS|GUEST_PL);
- lg->regs->eip = idt_address(lo, hi);
+ cpu->regs->ss = ss;
+ cpu->regs->esp = virtstack + (gstack - origstack);
+ cpu->regs->cs = (__KERNEL_CS|GUEST_PL);
+ cpu->regs->eip = idt_address(lo, hi);
/* There are two kinds of interrupt handlers: 0xE is an "interrupt
* gate" which expects interrupts to be disabled on entry. */
if (idt_type(lo, hi) == 0xE)
- if (put_user(0, &lg->lguest_data->irq_enabled))
- kill_guest(lg, "Disabling interrupts");
+ if (put_user(0, &cpu->lg->lguest_data->irq_enabled))
+ kill_guest(cpu, "Disabling interrupts");
}
/*H:205
@@ -129,23 +129,23 @@ static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err)
*
* maybe_do_interrupt() gets called before every entry to the Guest, to see if
* we should divert the Guest to running an interrupt handler. */
-void maybe_do_interrupt(struct lguest *lg)
+void maybe_do_interrupt(struct lg_cpu *cpu)
{
unsigned int irq;
DECLARE_BITMAP(blk, LGUEST_IRQS);
struct desc_struct *idt;
/* If the Guest hasn't even initialized yet, we can do nothing. */
- if (!lg->lguest_data)
+ if (!cpu->lg->lguest_data)
return;
/* Take our "irqs_pending" array and remove any interrupts the Guest
* wants blocked: the result ends up in "blk". */
- if (copy_from_user(&blk, lg->lguest_data->blocked_interrupts,
+ if (copy_from_user(&blk, cpu->lg->lguest_data->blocked_interrupts,
sizeof(blk)))
return;
- bitmap_andnot(blk, lg->irqs_pending, blk, LGUEST_IRQS);
+ bitmap_andnot(blk, cpu->irqs_pending, blk, LGUEST_IRQS);
/* Find the first interrupt. */
irq = find_first_bit(blk, LGUEST_IRQS);
@@ -155,19 +155,20 @@ void maybe_do_interrupt(struct lguest *lg)
/* They may be in the middle of an iret, where they asked us never to
* deliver interrupts. */
- if (lg->regs->eip >= lg->noirq_start && lg->regs->eip < lg->noirq_end)
+ if (cpu->regs->eip >= cpu->lg->noirq_start &&
+ (cpu->regs->eip < cpu->lg->noirq_end))
return;
/* If they're halted, interrupts restart them. */
- if (lg->halted) {
+ if (cpu->halted) {
/* Re-enable interrupts. */
- if (put_user(X86_EFLAGS_IF, &lg->lguest_data->irq_enabled))
- kill_guest(lg, "Re-enabling interrupts");
- lg->halted = 0;
+ if (put_user(X86_EFLAGS_IF, &cpu->lg->lguest_data->irq_enabled))
+ kill_guest(cpu, "Re-enabling interrupts");
+ cpu->halted = 0;
} else {
/* Otherwise we check if they have interrupts disabled. */
u32 irq_enabled;
- if (get_user(irq_enabled, &lg->lguest_data->irq_enabled))
+ if (get_user(irq_enabled, &cpu->lg->lguest_data->irq_enabled))
irq_enabled = 0;
if (!irq_enabled)
return;
@@ -176,15 +177,15 @@ void maybe_do_interrupt(struct lguest *lg)
/* Look at the IDT entry the Guest gave us for this interrupt. The
* first 32 (FIRST_EXTERNAL_VECTOR) entries are for traps, so we skip
* over them. */
- idt = &lg->arch.idt[FIRST_EXTERNAL_VECTOR+irq];
+ idt = &cpu->arch.idt[FIRST_EXTERNAL_VECTOR+irq];
/* If they don't have a handler (yet?), we just ignore it */
if (idt_present(idt->a, idt->b)) {
/* OK, mark it no longer pending and deliver it. */
- clear_bit(irq, lg->irqs_pending);
+ clear_bit(irq, cpu->irqs_pending);
/* set_guest_interrupt() takes the interrupt descriptor and a
* flag to say whether this interrupt pushes an error code onto
* the stack as well: virtual interrupts never do. */
- set_guest_interrupt(lg, idt->a, idt->b, 0);
+ set_guest_interrupt(cpu, idt->a, idt->b, 0);
}
/* Every time we deliver an interrupt, we update the timestamp in the
@@ -192,7 +193,7 @@ void maybe_do_interrupt(struct lguest *lg)
* did this more often, but it can actually be quite slow: doing it
* here is a compromise which means at least it gets updated every
* timer interrupt. */
- write_timestamp(lg);
+ write_timestamp(cpu);
}
/*:*/
@@ -245,19 +246,19 @@ static int has_err(unsigned int trap)
}
/* deliver_trap() returns true if it could deliver the trap. */
-int deliver_trap(struct lguest *lg, unsigned int num)
+int deliver_trap(struct lg_cpu *cpu, unsigned int num)
{
/* Trap numbers are always 8 bit, but we set an impossible trap number
* for traps inside the Switcher, so check that here. */
- if (num >= ARRAY_SIZE(lg->arch.idt))
+ if (num >= ARRAY_SIZE(cpu->arch.idt))
return 0;
/* Early on the Guest hasn't set the IDT entries (or maybe it put a
* bogus one in): if we fail here, the Guest will be killed. */
- if (!idt_present(lg->arch.idt[num].a, lg->arch.idt[num].b))
+ if (!idt_present(cpu->arch.idt[num].a, cpu->arch.idt[num].b))
return 0;
- set_guest_interrupt(lg, lg->arch.idt[num].a, lg->arch.idt[num].b,
- has_err(num));
+ set_guest_interrupt(cpu, cpu->arch.idt[num].a,
+ cpu->arch.idt[num].b, has_err(num));
return 1;
}
@@ -309,18 +310,18 @@ static int direct_trap(unsigned int num)
* the Guest.
*
* Which is deeply unfair, because (literally!) it wasn't the Guests' fault. */
-void pin_stack_pages(struct lguest *lg)
+void pin_stack_pages(struct lg_cpu *cpu)
{
unsigned int i;
/* Depending on the CONFIG_4KSTACKS option, the Guest can have one or
* two pages of stack space. */
- for (i = 0; i < lg->stack_pages; i++)
+ for (i = 0; i < cpu->lg->stack_pages; i++)
/* The stack grows *upwards*, so the address we're given is the
* start of the page after the kernel stack. Subtract one to
* get back onto the first stack page, and keep subtracting to
* get to the rest of the stack pages. */
- pin_page(lg, lg->esp1 - 1 - i * PAGE_SIZE);
+ pin_page(cpu, cpu->esp1 - 1 - i * PAGE_SIZE);
}
/* Direct traps also mean that we need to know whenever the Guest wants to use
@@ -331,21 +332,21 @@ void pin_stack_pages(struct lguest *lg)
*
* In Linux each process has its own kernel stack, so this happens a lot: we
* change stacks on each context switch. */
-void guest_set_stack(struct lguest *lg, u32 seg, u32 esp, unsigned int pages)
+void guest_set_stack(struct lg_cpu *cpu, u32 seg, u32 esp, unsigned int pages)
{
/* You are not allowed have a stack segment with privilege level 0: bad
* Guest! */
if ((seg & 0x3) != GUEST_PL)
- kill_guest(lg, "bad stack segment %i", seg);
+ kill_guest(cpu, "bad stack segment %i", seg);
/* We only expect one or two stack pages. */
if (pages > 2)
- kill_guest(lg, "bad stack pages %u", pages);
+ kill_guest(cpu, "bad stack pages %u", pages);
/* Save where the stack is, and how many pages */
- lg->ss1 = seg;
- lg->esp1 = esp;
- lg->stack_pages = pages;
+ cpu->ss1 = seg;
+ cpu->esp1 = esp;
+ cpu->lg->stack_pages = pages;
/* Make sure the new stack pages are mapped */
- pin_stack_pages(lg);
+ pin_stack_pages(cpu);
}
/* All this reference to mapping stacks leads us neatly into the other complex
@@ -353,7 +354,7 @@ void guest_set_stack(struct lguest *lg, u32 seg, u32 esp, unsigned int pages)
/*H:235 This is the routine which actually checks the Guest's IDT entry and
* transfers it into the entry in "struct lguest": */
-static void set_trap(struct lguest *lg, struct desc_struct *trap,
+static void set_trap(struct lg_cpu *cpu, struct desc_struct *trap,
unsigned int num, u32 lo, u32 hi)
{
u8 type = idt_type(lo, hi);
@@ -366,7 +367,7 @@ static void set_trap(struct lguest *lg, struct desc_struct *trap,
/* We only support interrupt and trap gates. */
if (type != 0xE && type != 0xF)
- kill_guest(lg, "bad IDT type %i", type);
+ kill_guest(cpu, "bad IDT type %i", type);
/* We only copy the handler address, present bit, privilege level and
* type. The privilege level controls where the trap can be triggered
@@ -383,7 +384,7 @@ static void set_trap(struct lguest *lg, struct desc_struct *trap,
*
* We saw the Guest setting Interrupt Descriptor Table (IDT) entries with the
* LHCALL_LOAD_IDT_ENTRY hypercall before: that comes here. */
-void load_guest_idt_entry(struct lguest *lg, unsigned int num, u32 lo, u32 hi)
+void load_guest_idt_entry(struct lg_cpu *cpu, unsigned int num, u32 lo, u32 hi)
{
/* Guest never handles: NMI, doublefault, spurious interrupt or
* hypercall. We ignore when it tries to set them. */
@@ -392,13 +393,13 @@ void load_guest_idt_entry(struct lguest *lg, unsigned int num, u32 lo, u32 hi)
/* Mark the IDT as changed: next time the Guest runs we'll know we have
* to copy this again. */
- lg->changed |= CHANGED_IDT;
+ cpu->changed |= CHANGED_IDT;
/* Check that the Guest doesn't try to step outside the bounds. */
- if (num >= ARRAY_SIZE(lg->arch.idt))
- kill_guest(lg, "Setting idt entry %u", num);
+ if (num >= ARRAY_SIZE(cpu->arch.idt))
+ kill_guest(cpu, "Setting idt entry %u", num);
else
- set_trap(lg, &lg->arch.idt[num], num, lo, hi);
+ set_trap(cpu, &cpu->arch.idt[num], num, lo, hi);
}
/* The default entry for each interrupt points into the Switcher routines which
@@ -434,14 +435,14 @@ void setup_default_idt_entries(struct lguest_ro_state *state,
/*H:240 We don't use the IDT entries in the "struct lguest" directly, instead
* we copy them into the IDT which we've set up for Guests on this CPU, just
* before we run the Guest. This routine does that copy. */
-void copy_traps(const struct lguest *lg, struct desc_struct *idt,
+void copy_traps(const struct lg_cpu *cpu, struct desc_struct *idt,
const unsigned long *def)
{
unsigned int i;
/* We can simply copy the direct traps, otherwise we use the default
* ones in the Switcher: they will return to the Host. */
- for (i = 0; i < ARRAY_SIZE(lg->arch.idt); i++) {
+ for (i = 0; i < ARRAY_SIZE(cpu->arch.idt); i++) {
/* If no Guest can ever override this trap, leave it alone. */
if (!direct_trap(i))
continue;
@@ -450,8 +451,8 @@ void copy_traps(const struct lguest *lg, struct desc_struct *idt,
* Interrupt gates (type 14) disable interrupts as they are
* entered, which we never let the Guest do. Not present
* entries (type 0x0) also can't go direct, of course. */
- if (idt_type(lg->arch.idt[i].a, lg->arch.idt[i].b) == 0xF)
- idt[i] = lg->arch.idt[i];
+ if (idt_type(cpu->arch.idt[i].a, cpu->arch.idt[i].b) == 0xF)
+ idt[i] = cpu->arch.idt[i];
else
/* Reset it to the default. */
default_idt_entry(&idt[i], i, def[i]);
@@ -470,13 +471,13 @@ void copy_traps(const struct lguest *lg, struct desc_struct *idt,
* infrastructure to set a callback at that time.
*
* 0 means "turn off the clock". */
-void guest_set_clockevent(struct lguest *lg, unsigned long delta)
+void guest_set_clockevent(struct lg_cpu *cpu, unsigned long delta)
{
ktime_t expires;
if (unlikely(delta == 0)) {
/* Clock event device is shutting down. */
- hrtimer_cancel(&lg->hrt);
+ hrtimer_cancel(&cpu->hrt);
return;
}
@@ -484,25 +485,25 @@ void guest_set_clockevent(struct lguest *lg, unsigned long delta)
* all the time between now and the timer interrupt it asked for. This
* is almost always the right thing to do. */
expires = ktime_add_ns(ktime_get_real(), delta);
- hrtimer_start(&lg->hrt, expires, HRTIMER_MODE_ABS);
+ hrtimer_start(&cpu->hrt, expires, HRTIMER_MODE_ABS);
}
/* This is the function called when the Guest's timer expires. */
static enum hrtimer_restart clockdev_fn(struct hrtimer *timer)
{
- struct lguest *lg = container_of(timer, struct lguest, hrt);
+ struct lg_cpu *cpu = container_of(timer, struct lg_cpu, hrt);
/* Remember the first interrupt is the timer interrupt. */
- set_bit(0, lg->irqs_pending);
+ set_bit(0, cpu->irqs_pending);
/* If the Guest is actually stopped, we need to wake it up. */
- if (lg->halted)
- wake_up_process(lg->tsk);
+ if (cpu->halted)
+ wake_up_process(cpu->tsk);
return HRTIMER_NORESTART;
}
/* This sets up the timer for this Guest. */
-void init_clockdev(struct lguest *lg)
+void init_clockdev(struct lg_cpu *cpu)
{
- hrtimer_init(&lg->hrt, CLOCK_REALTIME, HRTIMER_MODE_ABS);
- lg->hrt.function = clockdev_fn;
+ hrtimer_init(&cpu->hrt, CLOCK_REALTIME, HRTIMER_MODE_ABS);
+ cpu->hrt.function = clockdev_fn;
}
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h
index 86924891b5e..2337e1a06f0 100644
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -8,6 +8,7 @@
#include <linux/lguest.h>
#include <linux/lguest_launcher.h>
#include <linux/wait.h>
+#include <linux/hrtimer.h>
#include <linux/err.h>
#include <asm/semaphore.h>
@@ -38,58 +39,72 @@ struct lguest_pages
#define CHANGED_GDT_TLS 4 /* Actually a subset of CHANGED_GDT */
#define CHANGED_ALL 3
-/* The private info the thread maintains about the guest. */
-struct lguest
-{
- /* At end of a page shared mapped over lguest_pages in guest. */
- unsigned long regs_page;
- struct lguest_regs *regs;
- struct lguest_data __user *lguest_data;
+struct lguest;
+
+struct lg_cpu {
+ unsigned int id;
+ struct lguest *lg;
struct task_struct *tsk;
struct mm_struct *mm; /* == tsk->mm, but that becomes NULL on exit */
- u32 pfn_limit;
- /* This provides the offset to the base of guest-physical
- * memory in the Launcher. */
- void __user *mem_base;
- unsigned long kernel_address;
+
u32 cr2;
- int halted;
int ts;
- u32 next_hcall;
u32 esp1;
u8 ss1;
+ /* Bitmap of what has changed: see CHANGED_* above. */
+ int changed;
+
+ unsigned long pending_notify; /* pfn from LHCALL_NOTIFY */
+
+ /* At end of a page shared mapped over lguest_pages in guest. */
+ unsigned long regs_page;
+ struct lguest_regs *regs;
+
+ struct lguest_pages *last_pages;
+
+ int cpu_pgd; /* which pgd this cpu is currently using */
+
/* If a hypercall was asked for, this points to the arguments. */
struct hcall_args *hcall;
+ u32 next_hcall;
+
+ /* Virtual clock device */
+ struct hrtimer hrt;
/* Do we need to stop what we're doing and return to userspace? */
int break_out;
wait_queue_head_t break_wq;
+ int halted;
- /* Bitmap of what has changed: see CHANGED_* above. */
- int changed;
- struct lguest_pages *last_pages;
+ /* Pending virtual interrupts */
+ DECLARE_BITMAP(irqs_pending, LGUEST_IRQS);
+
+ struct lg_cpu_arch arch;
+};
+
+/* The private info the thread maintains about the guest. */
+struct lguest
+{
+ struct lguest_data __user *lguest_data;
+ struct lg_cpu cpus[NR_CPUS];
+ unsigned int nr_cpus;
+
+ u32 pfn_limit;
+ /* This provides the offset to the base of guest-physical
+ * memory in th