diff options
-rw-r--r-- | Documentation/lguest/lguest.c | 49 | ||||
-rw-r--r-- | arch/x86/lguest/boot.c | 11 | ||||
-rw-r--r-- | drivers/Makefile | 2 | ||||
-rw-r--r-- | drivers/lguest/core.c | 46 | ||||
-rw-r--r-- | drivers/lguest/hypercalls.c | 106 | ||||
-rw-r--r-- | drivers/lguest/interrupts_and_traps.c | 149 | ||||
-rw-r--r-- | drivers/lguest/lg.h | 154 | ||||
-rw-r--r-- | drivers/lguest/lguest_user.c | 147 | ||||
-rw-r--r-- | drivers/lguest/page_tables.c | 179 | ||||
-rw-r--r-- | drivers/lguest/segments.c | 48 | ||||
-rw-r--r-- | drivers/lguest/x86/core.c | 127 | ||||
-rw-r--r-- | include/asm-x86/lguest.h | 2 | ||||
-rw-r--r-- | include/asm-x86/lguest_hcall.h | 6 |
13 files changed, 571 insertions, 455 deletions
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index 9b0e322118b..6c8a2386cd5 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c @@ -79,6 +79,9 @@ static void *guest_base; /* The maximum guest physical address allowed, and maximum possible. */ static unsigned long guest_limit, guest_max; +/* a per-cpu variable indicating whose vcpu is currently running */ +static unsigned int __thread cpu_id; + /* This is our list of devices. */ struct device_list { @@ -153,6 +156,9 @@ struct virtqueue void (*handle_output)(int fd, struct virtqueue *me); }; +/* Remember the arguments to the program so we can "reboot" */ +static char **main_args; + /* Since guest is UP and we don't run at the same time, we don't need barriers. * But I include them in the code in case others copy it. */ #define wmb() @@ -554,7 +560,7 @@ static void wake_parent(int pipefd, int lguest_fd) else FD_CLR(-fd - 1, &devices.infds); } else /* Send LHREQ_BREAK command. */ - write(lguest_fd, args, sizeof(args)); + pwrite(lguest_fd, args, sizeof(args), cpu_id); } } @@ -1489,7 +1495,9 @@ static void setup_block_file(const char *filename) /* Create stack for thread and run it */ stack = malloc(32768); - if (clone(io_thread, stack + 32768, CLONE_VM, dev) == -1) + /* SIGCHLD - We dont "wait" for our cloned thread, so prevent it from + * becoming a zombie. */ + if (clone(io_thread, stack + 32768, CLONE_VM | SIGCHLD, dev) == -1) err(1, "Creating clone"); /* We don't need to keep the I/O thread's end of the pipes open. */ @@ -1499,7 +1507,21 @@ static void setup_block_file(const char *filename) verbose("device %u: virtblock %llu sectors\n", devices.device_num, cap); } -/* That's the end of device setup. */ +/* That's the end of device setup. :*/ + +/* Reboot */ +static void __attribute__((noreturn)) restart_guest(void) +{ + unsigned int i; + + /* Closing pipes causes the waker thread and io_threads to die, and + * closing /dev/lguest cleans up the Guest. Since we don't track all + * open fds, we simply close everything beyond stderr. */ + for (i = 3; i < FD_SETSIZE; i++) + close(i); + execv(main_args[0], main_args); + err(1, "Could not exec %s", main_args[0]); +} /*L:220 Finally we reach the core of the Launcher, which runs the Guest, serves * its input and output, and finally, lays it to rest. */ @@ -1511,7 +1533,8 @@ static void __attribute__((noreturn)) run_guest(int lguest_fd) int readval; /* We read from the /dev/lguest device to run the Guest. */ - readval = read(lguest_fd, ¬ify_addr, sizeof(notify_addr)); + readval = pread(lguest_fd, ¬ify_addr, + sizeof(notify_addr), cpu_id); /* One unsigned long means the Guest did HCALL_NOTIFY */ if (readval == sizeof(notify_addr)) { @@ -1521,16 +1544,23 @@ static void __attribute__((noreturn)) run_guest(int lguest_fd) /* ENOENT means the Guest died. Reading tells us why. */ } else if (errno == ENOENT) { char reason[1024] = { 0 }; - read(lguest_fd, reason, sizeof(reason)-1); + pread(lguest_fd, reason, sizeof(reason)-1, cpu_id); errx(1, "%s", reason); + /* ERESTART means that we need to reboot the guest */ + } else if (errno == ERESTART) { + restart_guest(); /* EAGAIN means the Waker wanted us to look at some input. * Anything else means a bug or incompatible change. */ } else if (errno != EAGAIN) err(1, "Running guest failed"); + /* Only service input on thread for CPU 0. */ + if (cpu_id != 0) + continue; + /* Service input, then unset the BREAK to release the Waker. */ handle_input(lguest_fd); - if (write(lguest_fd, args, sizeof(args)) < 0) + if (pwrite(lguest_fd, args, sizeof(args), cpu_id) < 0) err(1, "Resetting break"); } } @@ -1571,6 +1601,12 @@ int main(int argc, char *argv[]) /* If they specify an initrd file to load. */ const char *initrd_name = NULL; + /* Save the args: we "reboot" by execing ourselves again. */ + main_args = argv; + /* We don't "wait" for the children, so prevent them from becoming + * zombies. */ + signal(SIGCHLD, SIG_IGN); + /* First we initialize the device list. Since console and network * device receive input from a file descriptor, we keep an fdset * (infds) and the maximum fd number (max_infd) with the head of the @@ -1582,6 +1618,7 @@ int main(int argc, char *argv[]) devices.lastdev = &devices.dev; devices.next_irq = 1; + cpu_id = 0; /* We need to know how much memory so we can set up the device * descriptor and memory pages for the devices as we parse the command * line. So we quickly look through the arguments to find the amount diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index a63373759f0..5afdde4895d 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -67,6 +67,7 @@ #include <asm/mce.h> #include <asm/io.h> #include <asm/i387.h> +#include <asm/reboot.h> /* for struct machine_ops */ /*G:010 Welcome to the Guest! * @@ -813,7 +814,7 @@ static void lguest_safe_halt(void) * rather than virtual addresses, so we use __pa() here. */ static void lguest_power_off(void) { - hcall(LHCALL_CRASH, __pa("Power down"), 0, 0); + hcall(LHCALL_SHUTDOWN, __pa("Power down"), LGUEST_SHUTDOWN_POWEROFF, 0); } /* @@ -823,7 +824,7 @@ static void lguest_power_off(void) */ static int lguest_panic(struct notifier_block *nb, unsigned long l, void *p) { - hcall(LHCALL_CRASH, __pa(p), 0, 0); + hcall(LHCALL_SHUTDOWN, __pa(p), LGUEST_SHUTDOWN_POWEROFF, 0); /* The hcall won't return, but to keep gcc happy, we're "done". */ return NOTIFY_DONE; } @@ -927,6 +928,11 @@ static unsigned lguest_patch(u8 type, u16 clobber, void *ibuf, return insn_len; } +static void lguest_restart(char *reason) +{ + hcall(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART, 0); +} + /*G:030 Once we get to lguest_init(), we know we're a Guest. The pv_ops * structures in the kernel provide points for (almost) every routine we have * to override to avoid privileged instructions. */ @@ -1060,6 +1066,7 @@ __init void lguest_init(void) * the Guest routine to power off. */ pm_power_off = lguest_power_off; + machine_ops.restart = lguest_restart; /* Now we're set up, call start_kernel() in init/main.c and we proceed * to boot as normal. It never returns. */ start_kernel(); diff --git a/drivers/Makefile b/drivers/Makefile index 9e1f808e43c..0ee9a8a4095 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -72,7 +72,7 @@ obj-$(CONFIG_ISDN) += isdn/ obj-$(CONFIG_EDAC) += edac/ obj-$(CONFIG_MCA) += mca/ obj-$(CONFIG_EISA) += eisa/ -obj-$(CONFIG_LGUEST_GUEST) += lguest/ +obj-y += lguest/ obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_CPU_IDLE) += cpuidle/ obj-$(CONFIG_MMC) += mmc/ diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index cb4c67025d5..7743d73768d 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c @@ -151,43 +151,43 @@ int lguest_address_ok(const struct lguest *lg, /* This routine copies memory from the Guest. Here we can see how useful the * kill_lguest() routine we met in the Launcher can be: we return a random * value (all zeroes) instead of needing to return an error. */ -void __lgread(struct lguest *lg, void *b, unsigned long addr, unsigned bytes) +void __lgread(struct lg_cpu *cpu, void *b, unsigned long addr, unsigned bytes) { - if (!lguest_address_ok(lg, addr, bytes) - || copy_from_user(b, lg->mem_base + addr, bytes) != 0) { + if (!lguest_address_ok(cpu->lg, addr, bytes) + || copy_from_user(b, cpu->lg->mem_base + addr, bytes) != 0) { /* copy_from_user should do this, but as we rely on it... */ memset(b, 0, bytes); - kill_guest(lg, "bad read address %#lx len %u", addr, bytes); + kill_guest(cpu, "bad read address %#lx len %u", addr, bytes); } } /* This is the write (copy into guest) version. */ -void __lgwrite(struct lguest *lg, unsigned long addr, const void *b, +void __lgwrite(struct lg_cpu *cpu, unsigned long addr, const void *b, unsigned bytes) { - if (!lguest_address_ok(lg, addr, bytes) - || copy_to_user(lg->mem_base + addr, b, bytes) != 0) - kill_guest(lg, "bad write address %#lx len %u", addr, bytes); + if (!lguest_address_ok(cpu->lg, addr, bytes) + || copy_to_user(cpu->lg->mem_base + addr, b, bytes) != 0) + kill_guest(cpu, "bad write address %#lx len %u", addr, bytes); } /*:*/ /*H:030 Let's jump straight to the the main loop which runs the Guest. * Remember, this is called by the Launcher reading /dev/lguest, and we keep * going around and around until something interesting happens. */ -int run_guest(struct lguest *lg, unsigned long __user *user) +int run_guest(struct lg_cpu *cpu, unsigned long __user *user) { /* We stop running once the Guest is dead. */ - while (!lg->dead) { + while (!cpu->lg->dead) { /* First we run any hypercalls the Guest wants done. */ - if (lg->hcall) - do_hypercalls(lg); + if (cpu->hcall) + do_hypercalls(cpu); /* It's possible the Guest did a NOTIFY hypercall to the * Launcher, in which case we return from the read() now. */ - if (lg->pending_notify) { - if (put_user(lg->pending_notify, user)) + if (cpu->pending_notify) { + if (put_user(cpu->pending_notify, user)) return -EFAULT; - return sizeof(lg->pending_notify); + return sizeof(cpu->pending_notify); } /* Check for signals */ @@ -195,13 +195,13 @@ int run_guest(struct lguest *lg, unsigned long __user *user) return -ERESTARTSYS; /* If Waker set break_out, return to Launcher. */ - if (lg->break_out) + if (cpu->break_out) return -EAGAIN; /* Check if there are any interrupts which can be delivered * now: if so, this sets up the hander to be executed when we * next run the Guest. */ - maybe_do_interrupt(lg); + maybe_do_interrupt(cpu); /* All long-lived kernel loops need to check with this horrible * thing called the freezer. If the Host is trying to suspend, @@ -210,12 +210,12 @@ int run_guest(struct lguest *lg, unsigned long __user *user) /* Just make absolutely sure the Guest is still alive. One of * those hypercalls could have been fatal, for example. */ - if (lg->dead) + if (cpu->lg->dead) break; /* If the Guest asked to be stopped, we sleep. The Guest's * clock timer or LHCALL_BREAK from the Waker will wake us. */ - if (lg->halted) { + if (cpu->halted) { set_current_state(TASK_INTERRUPTIBLE); schedule(); continue; @@ -226,15 +226,17 @@ int run_guest(struct lguest *lg, unsigned long __user *user) local_irq_disable(); /* Actually run the Guest until something happens. */ - lguest_arch_run_guest(lg); + lguest_arch_run_guest(cpu); /* Now we're ready to be interrupted or moved to other CPUs */ local_irq_enable(); /* Now we deal with whatever happened to the Guest. */ - lguest_arch_handle_trap(lg); + lguest_arch_handle_trap(cpu); } + if (cpu->lg->dead == ERR_PTR(-ERESTART)) + return -ERESTART; /* The Guest is dead => "No such file or directory" */ return -ENOENT; } @@ -253,7 +255,7 @@ static int __init init(void) /* Lguest can't run under Xen, VMI or itself. It does Tricky Stuff. */ if (paravirt_enabled()) { - printk("lguest is afraid of %s\n", pv_info.name); + printk("lguest is afraid of being a guest\n"); return -EPERM; } diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c index b478affe8f9..0f2cb4fd7c6 100644 --- a/drivers/lguest/hypercalls.c +++ b/drivers/lguest/hypercalls.c @@ -23,13 +23,14 @@ #include <linux/uaccess.h> #include <linux/syscalls.h> #include <linux/mm.h> +#include <linux/ktime.h> #include <asm/page.h> #include <asm/pgtable.h> #include "lg.h" /*H:120 This is the core hypercall routine: where the Guest gets what it wants. * Or gets killed. Or, in the case of LHCALL_CRASH, both. */ -static void do_hcall(struct lguest *lg, struct hcall_args *args) +static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args) { switch (args->arg0) { case LHCALL_FLUSH_ASYNC: @@ -39,60 +40,62 @@ static void do_hcall(struct lguest *lg, struct hcall_args *args) case LHCALL_LGUEST_INIT: /* You can't get here unless you're already initialized. Don't * do that. */ - kill_guest(lg, "already have lguest_data"); + kill_guest(cpu, "already have lguest_data"); break; - case LHCALL_CRASH: { - /* Crash is such a trivial hypercall that we do it in four + case LHCALL_SHUTDOWN: { + /* Shutdown is such a trivial hypercall that we do it in four * lines right here. */ char msg[128]; /* If the lgread fails, it will call kill_guest() itself; the * kill_guest() with the message will be ignored. */ - __lgread(lg, msg, args->arg1, sizeof(msg)); + __lgread(cpu, msg, args->arg1, sizeof(msg)); msg[sizeof(msg)-1] = '\0'; - kill_guest(lg, "CRASH: %s", msg); + kill_guest(cpu, "CRASH: %s", msg); + if (args->arg2 == LGUEST_SHUTDOWN_RESTART) + cpu->lg->dead = ERR_PTR(-ERESTART); break; } case LHCALL_FLUSH_TLB: /* FLUSH_TLB comes in two flavors, depending on the * argument: */ if (args->arg1) - guest_pagetable_clear_all(lg); + guest_pagetable_clear_all(cpu); else - guest_pagetable_flush_user(lg); + guest_pagetable_flush_user(cpu); break; /* All these calls simply pass the arguments through to the right * routines. */ case LHCALL_NEW_PGTABLE: - guest_new_pagetable(lg, args->arg1); + guest_new_pagetable(cpu, args->arg1); break; case LHCALL_SET_STACK: - guest_set_stack(lg, args->arg1, args->arg2, args->arg3); + guest_set_stack(cpu, args->arg1, args->arg2, args->arg3); break; case LHCALL_SET_PTE: - guest_set_pte(lg, args->arg1, args->arg2, __pte(args->arg3)); + guest_set_pte(cpu, args->arg1, args->arg2, __pte(args->arg3)); break; case LHCALL_SET_PMD: - guest_set_pmd(lg, args->arg1, args->arg2); + guest_set_pmd(cpu->lg, args->arg1, args->arg2); break; case LHCALL_SET_CLOCKEVENT: - guest_set_clockevent(lg, args->arg1); + guest_set_clockevent(cpu, args->arg1); break; case LHCALL_TS: /* This sets the TS flag, as we saw used in run_guest(). */ - lg->ts = args->arg1; + cpu->ts = args->arg1; break; case LHCALL_HALT: /* Similarly, this sets the halted flag for run_guest(). */ - lg->halted = 1; + cpu->halted = 1; break; case LHCALL_NOTIFY: - lg->pending_notify = args->arg1; + cpu->pending_notify = args->arg1; break; default: /* It should be an architecture-specific hypercall. */ - if (lguest_arch_do_hcall(lg, args)) - kill_guest(lg, "Bad hypercall %li\n", args->arg0); + if (lguest_arch_do_hcall(cpu, args)) + kill_guest(cpu, "Bad hypercall %li\n", args->arg0); } } /*:*/ @@ -104,13 +107,13 @@ static void do_hcall(struct lguest *lg, struct hcall_args *args) * Guest put them in the ring, but we also promise the Guest that they will * happen before any normal hypercall (which is why we check this before * checking for a normal hcall). */ -static void do_async_hcalls(struct lguest *lg) +static void do_async_hcalls(struct lg_cpu *cpu) { unsigned int i; u8 st[LHCALL_RING_SIZE]; /* For simplicity, we copy the entire call status array in at once. */ - if (copy_from_user(&st, &lg->lguest_data->hcall_status, sizeof(st))) + if (copy_from_user(&st, &cpu->lg->lguest_data->hcall_status, sizeof(st))) return; /* We process "struct lguest_data"s hcalls[] ring once. */ @@ -119,7 +122,7 @@ static void do_async_hcalls(struct lguest *lg) /* We remember where we were up to from last time. This makes * sure that the hypercalls are done in the order the Guest * places them in the ring. */ - unsigned int n = lg->next_hcall; + unsigned int n = cpu->next_hcall; /* 0xFF means there's no call here (yet). */ if (st[n] == 0xFF) @@ -127,65 +130,65 @@ static void do_async_hcalls(struct lguest *lg) /* OK, we have hypercall. Increment the "next_hcall" cursor, * and wrap back to 0 if we reach the end. */ - if (++lg->next_hcall == LHCALL_RING_SIZE) - lg->next_hcall = 0; + if (++cpu->next_hcall == LHCALL_RING_SIZE) + cpu->next_hcall = 0; /* Copy the hypercall arguments into a local copy of * the hcall_args struct. */ - if (copy_from_user(&args, &lg->lguest_data->hcalls[n], + if (copy_from_user(&args, &cpu->lg->lguest_data->hcalls[n], sizeof(struct hcall_args))) { - kill_guest(lg, "Fetching async hypercalls"); + kill_guest(cpu, "Fetching async hypercalls"); break; } /* Do the hypercall, same as a normal one. */ - do_hcall(lg, &args); + do_hcall(cpu, &args); /* Mark the hypercall done. */ - if (put_user(0xFF, &lg->lguest_data->hcall_status[n])) { - kill_guest(lg, "Writing result for async hypercall"); + if (put_user(0xFF, &cpu->lg->lguest_data->hcall_status[n])) { + kill_guest(cpu, "Writing result for async hypercall"); break; } /* Stop doing hypercalls if they want to notify the Launcher: * it needs to service this first. */ - if (lg->pending_notify) + if (cpu->pending_notify) break; } } /* Last of all, we look at what happens first of all. The very first time the * Guest makes a hypercall, we end up here to set things up: */ -static void initialize(struct lguest *lg) +static void initialize(struct lg_cpu *cpu) { /* You can't do anything until you're initialized. The Guest knows the * rules, so we're unforgiving here. */ - if (lg->hcall->arg0 != LHCALL_LGUEST_INIT) { - kill_guest(lg, "hypercall %li before INIT", lg->hcall->arg0); + if (cpu->hcall->arg0 != LHCALL_LGUEST_INIT) { + kill_guest(cpu, "hypercall %li before INIT", cpu->hcall->arg0); return; } - if (lguest_arch_init_hypercalls(lg)) - kill_guest(lg, "bad guest page %p", lg->lguest_data); + if (lguest_arch_init_hypercalls(cpu)) + kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data); /* The Guest tells us where we're not to deliver interrupts by putting * the range of addresses into "struct lguest_data". */ - if (get_user(lg->noirq_start, &lg->lguest_data->noirq_start) - || get_user(lg->noirq_end, &lg->lguest_data->noirq_end)) - kill_guest(lg, "bad guest page %p", lg->lguest_data); + if (get_user(cpu->lg->noirq_start, &cpu->lg->lguest_data->noirq_start) + || get_user(cpu->lg->noirq_end, &cpu->lg->lguest_data->noirq_end)) + kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data); /* We write the current time into the Guest's data page once so it can * set its clock. */ - write_timestamp(lg); + write_timestamp(cpu); /* page_tables.c will also do some setup. */ - page_table_guest_data_init(lg); + page_table_guest_data_init(cpu); /* This is the one case where the above accesses might have been the * first write to a Guest page. This may have caused a copy-on-write * fault, but the old page might be (read-only) in the Guest * pagetable. */ - guest_pagetable_clear_all(lg); + guest_pagetable_clear_all(cpu); } /*H:100 @@ -194,27 +197,27 @@ static void initialize(struct lguest *lg) * Remember from the Guest, hypercalls come in two flavors: normal and * asynchronous. This file handles both of types. */ -void do_hypercalls(struct lguest *lg) +void do_hypercalls(struct lg_cpu *cpu) { /* Not initialized yet? This hypercall must do it. */ - if (unlikely(!lg->lguest_data)) { + if (unlikely(!cpu->lg->lguest_data)) { /* Set up the "struct lguest_data" */ - initialize(lg); + initialize(cpu); /* Hcall is done. */ - lg->hcall = NULL; + cpu->hcall = NULL; return; } /* The Guest has initialized. * * Look in the hypercall ring for the async hypercalls: */ - do_async_hcalls(lg); + do_async_hcalls(cpu); /* If we stopped reading the hypercall ring because the Guest did a * NOTIFY to the Launcher, we want to return now. Otherwise we do * the hypercall. */ - if (!lg->pending_notify) { - do_hcall(lg, lg->hcall); + if (!cpu->pending_notify) { + do_hcall(cpu, cpu->hcall); /* Tricky point: we reset the hcall pointer to mark the * hypercall as "done". We use the hcall pointer rather than * the trap number to indicate a hypercall is pending. @@ -225,16 +228,17 @@ void do_hypercalls(struct lguest *lg) * Launcher, the run_guest() loop will exit without running the * Guest. When it comes back it would try to re-run the * hypercall. */ - lg->hcall = NULL; + cpu->hcall = NULL; } } /* This routine supplies the Guest with time: it's used for wallclock time at * initial boot and as a rough time source if the TSC isn't available. */ -void write_timestamp(struct lguest *lg) +void write_timestamp(struct lg_cpu *cpu) { struct timespec now; ktime_get_real_ts(&now); - if (copy_to_user(&lg->lguest_data->time, &now, sizeof(struct timespec))) - kill_guest(lg, "Writing timestamp"); + if (copy_to_user(&cpu->lg->lguest_data->time, + &now, sizeof(struct timespec))) + kill_guest(cpu, "Writing timestamp"); } diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c index 2b66f79c208..32e97c1858e 100644 --- a/drivers/lguest/interrupts_and_traps.c +++ b/drivers/lguest/interrupts_and_traps.c @@ -41,11 +41,11 @@ static int idt_present(u32 lo, u32 hi) /* We need a helper to "push" a value onto the Guest's stack, since that's a * big part of what delivering an interrupt does. */ -static void push_guest_stack(struct lguest *lg, unsigned long *gstack, u32 val) +static void push_guest_stack(struct lg_cpu *cpu, unsigned long *gstack, u32 val) { /* Stack grows upwards: move stack then write value. */ *gstack -= 4; - lgwrite(lg, *gstack, u32, val); + lgwrite(cpu, *gstack, u32, val); } /*H:210 The set_guest_interrupt() routine actually delivers the interrupt or @@ -60,7 +60,7 @@ static void push_guest_stack(struct lguest *lg, unsigned long *gstack, u32 val) * We set up the stack just like the CPU does for a real interrupt, so it's * identical for the Guest (and the standard "iret" instruction will undo * it). */ -static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err) +static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, int has_err) { unsigned long gstack, origstack; u32 eflags, ss, irq_enable; @@ -69,59 +69,59 @@ static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err) /* There are two cases for interrupts: one where the Guest is already * in the kernel, and a more complex one where the Guest is in * userspace. We check the privilege level to find out. */ - if ((lg->regs->ss&0x3) != GUEST_PL) { + if ((cpu->regs->ss&0x3) != GUEST_PL) { /* The Guest told us their kernel stack with the SET_STACK * hypercall: both the virtual address and the segment */ - virtstack = lg->esp1; - ss = lg->ss1; + virtstack = cpu->esp1; + ss = cpu->ss1; - origstack = gstack = guest_pa(lg, virtstack); + origstack = gstack = guest_pa(cpu, virtstack); /* We push the old stack segment and pointer onto the new * stack: when the Guest does an "iret" back from the interrupt * handler the CPU will notice they're dropping privilege * levels and expect these here. */ - push_guest_stack(lg, &gstack, lg->regs->ss); - push_guest_stack(lg, &gstack, lg->regs->esp); + push_guest_stack(cpu, &gstack, cpu->regs->ss); + push_guest_stack(cpu, &gstack, cpu->regs->esp); } else { /* We're staying on the same Guest (kernel) stack. */ - virtstack = lg->regs->esp; - ss = lg->regs->ss; + virtstack = cpu->regs->esp; + ss = cpu->regs->ss; - origstack = gstack = guest_pa(lg, virtstack); + origstack = gstack = guest_pa(cpu, virtstack); } /* Remember that we never let the Guest actually disable interrupts, so * the "Interrupt Flag" bit is always set. We copy that bit from the * Guest's "irq_enabled" field into the eflags word: we saw the Guest * copy it back in "lguest_iret". */ - eflags = lg->regs->eflags; - if (get_user(irq_enable, &lg->lguest_data->irq_enabled) == 0 + eflags = cpu->regs->eflags; + if (get_user(irq_enable, &cpu->lg->lguest_data->irq_enabled) == 0 && !(irq_enable & X86_EFLAGS_IF)) eflags &= ~X86_EFLAGS_IF; /* An interrupt is expected to push three things on the stack: the old * "eflags" word, the old code segment, and the old instruction * pointer. */ - push_guest_stack(lg, &gstack, eflags); - push_guest_stack(lg, &gstack, lg->regs->cs); - push_guest_stack(lg, &gstack, lg->regs->eip); + push_guest_stack(cpu, &gstack, eflags); + push_guest_stack(cpu, &gstack, cpu->regs->cs); + push_guest_stack(cpu, &gstack, cpu->regs->eip); /* For the six traps which supply an error code, we push that, too. */ if (has_err) - push_guest_stack(lg, &gstack, lg->regs->errcode); + push_guest_stack(cpu, &gstack, cpu->regs->errcode); /* Now we've pushed all the old state, we change the stack, the code * segment and the address to execute. */ - lg->regs->ss = ss; - lg->regs->esp = virtstack + (gstack - origstack); - lg->regs->cs = (__KERNEL_CS|GUEST_PL); - lg->regs->eip = idt_address(lo, hi); + cpu->regs->ss = ss; + cpu->regs->esp = virtstack + (gstack - origstack); + cpu->regs->cs = (__KERNEL_CS|GUEST_PL); + cpu->regs->eip = idt_address(lo, hi); /* There are two kinds of interrupt handlers: 0xE is an "interrupt * gate" which expects interrupts to be disabled on entry. */ if (idt_type(lo, hi) == 0xE) - if (put_user(0, &lg->lguest_data->irq_enabled)) - kill_guest(lg, "Disabling interrupts"); + if (put_user(0, &cpu->lg->lguest_data->irq_enabled)) + kill_guest(cpu, "Disabling interrupts"); } /*H:205 @@ -129,23 +129,23 @@ static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err) * * maybe_do_interrupt() gets called before every entry to the Guest, to see if * we should divert the Guest to running an interrupt handler. */ -void maybe_do_interrupt(struct lguest *lg) +void maybe_do_interrupt(struct lg_cpu *cpu) { unsigned int irq; DECLARE_BITMAP(blk, LGUEST_IRQS); struct desc_struct *idt; /* If the Guest hasn't even initialized yet, we can do nothing. */ - if (!lg->lguest_data) + if (!cpu->lg->lguest_data) return; /* Take our "irqs_pending" array and remove any interrupts the Guest * wants blocked: the result ends up in "blk". */ - if (copy_from_user(&blk, lg->lguest_data->blocked_interrupts, + if (copy_from_user(&blk, cpu->lg->lguest_data->blocked_interrupts, sizeof(blk))) return; - bitmap_andnot(blk, lg->irqs_pending, blk, LGUEST_IRQS); + bitmap_andnot(blk, cpu->irqs_pending, blk, LGUEST_IRQS); /* Find the first interrupt. */ irq = find_first_bit(blk, LGUEST_IRQS); @@ -155,19 +155,20 @@ void maybe_do_interrupt(struct lguest *lg) /* They may be in the middle of an iret, where they asked us never to * deliver interrupts. */ - if (lg->regs->eip >= lg->noirq_start && lg->regs->eip < lg->noirq_end) + if (cpu->regs->eip >= cpu->lg->noirq_start && + (cpu->regs->eip < cpu->lg->noirq_end)) return; /* If they're halted, interrupts restart them. */ - if (lg->halted) { + if (cpu->halted) { /* Re-enable interrupts. */ - if (put_user(X86_EFLAGS_IF, &lg->lguest_data->irq_enabled)) - kill_guest(lg, "Re-enabling interrupts"); - lg->halted = 0; + if (put_user(X86_EFLAGS_IF, &cpu->lg->lguest_data->irq_enabled)) + kill_guest(cpu, "Re-enabling interrupts"); + cpu->halted = 0; } else { /* Otherwise we check if they have interrupts disabled. */ u32 irq_enabled; - if (get_user(irq_enabled, &lg->lguest_data->irq_enabled)) + if (get_user(irq_enabled, &cpu->lg->lguest_data->irq_enabled)) irq_enabled = 0; if (!irq_enabled) return; @@ -176,15 +177,15 @@ void maybe_do_interrupt(struct lguest *lg) /* Look at the IDT entry the Guest gave us for this interrupt. The * first 32 (FIRST_EXTERNAL_VECTOR) entries are for traps, so we skip * over them. */ - idt = &lg->arch.idt[FIRST_EXTERNAL_VECTOR+irq]; + idt = &cpu->arch.idt[FIRST_EXTERNAL_VECTOR+irq]; /* If they don't have a handler (yet?), we just ignore it */ if (idt_present(idt->a, idt->b)) { /* OK, mark it no longer pending and deliver it. */ - clear_bit(irq, lg->irqs_pending); + clear_bit(irq, cpu->irqs_pending); /* set_guest_interrupt() takes the interrupt descriptor and a * flag to say whether this interrupt pushes an error code onto * the stack as well: virtual interrupts never do. */ - set_guest_interrupt(lg, idt->a, idt->b, 0); + set_guest_interrupt(cpu, idt->a, idt->b, 0); } /* Every time we deliver an interrupt, we update the timestamp in the @@ -192,7 +193,7 @@ void maybe_do_interrupt(struct lguest *lg) * did this more often, but it can actually be quite slow: doing it * here is a compromise which means at least it gets updated every * timer interrupt. */ - write_timestamp(lg); + write_timestamp(cpu); } /*:*/ @@ -245,19 +246,19 @@ static int has_err(unsigned int trap) } /* deliver_trap() returns true if it could deliver the trap. */ -int deliver_trap(struct lguest *lg, unsigned int num) +int deliver_trap(struct lg_cpu *cpu, unsigned int num) { /* Trap numbers are always 8 bit, but we set an impossible trap number * for traps inside the Switcher, so check that here. */ - if (num >= ARRAY_SIZE(lg->arch.idt)) + if (num >= ARRAY_SIZE(cpu->arch.idt)) return 0; /* Early on the Guest hasn't set the IDT entries (or maybe it put a * bogus one in): if we fail here, the Guest will be killed. */ - if (!idt_present(lg->arch.idt[num].a, lg->arch.idt[num].b)) + if (!idt_present(cpu->arch.idt[num].a, cpu->arch.idt[num].b)) return 0; - set_guest_interrupt(lg, lg->arch.idt[num].a, lg->arch.idt[num].b, - has_err(num)); + set_guest_interrupt(cpu, cpu->arch.idt[num].a, + cpu->arch.idt[num].b, has_err(num)); return 1; } @@ -309,18 +310,18 @@ static int direct_trap(unsigned int num) * the Guest. * * Which is deeply unfair, because (literally!) it wasn't the Guests' fault. */ -void pin_stack_pages(struct lguest *lg) +void pin_stack_pages(struct lg_cpu *cpu) { unsigned int i; /* Depending on the CONFIG_4KSTACKS option, the Guest can have one or * two pages of stack space. */ - for (i = 0; i < lg->stack_pages; i++) + for (i = 0; i < cpu->lg->stack_pages; i++) /* The stack grows *upwards*, so the address we're given is the * start of the page after the kernel stack. Subtract one to * get back onto the first stack page, and keep subtracting to * get to the rest of the stack pages. */ - pin_page(lg, lg->esp1 - 1 - i * PAGE_SIZE); + pin_page(cpu, cpu->esp1 - 1 - i * PAGE_SIZE); } /* Direct traps also mean that we need to know whenever the Guest wants to use @@ -331,21 +332,21 @@ void pin_stack_pages(struct lguest *lg) * * In Linux each process has its own kernel stack, so this happens a lot: we * change stacks on each context switch. */ -void guest_set_stack(struct lguest *lg, u32 seg, u32 esp, unsigned int pages) +void guest_set_stack(struct lg_cpu *cpu, u32 seg, u32 esp, unsigned int pages) { /* You are not allowed have a stack segment with privilege level 0: bad * Guest! */ if ((seg & 0x3) != GUEST_PL) - kill_guest(lg, "bad stack segment %i", seg); + kill_guest(cpu, "bad stack segment %i", seg); /* We only expect one or two stack pages. */ if (pages > 2) - kill_guest(lg, "bad stack pages %u", pages); + kill_guest(cpu, "bad stack pages %u", pages); /* Save where the stack is, and how many pages */ - lg->ss1 = seg; - lg->esp1 = esp; - lg->stack_pages = pages; + cpu->ss1 = seg; + cpu->esp1 = esp; + cpu->lg->stack_pages = pages; /* Make sure the new stack pages are mapped */ - pin_stack_pages(lg); + pin_stack_pages(cpu); } /* All this reference to mapping stacks leads us neatly into the other complex @@ -353,7 +354,7 @@ void guest_set_stack(struct lguest *lg, u32 seg, u32 esp, unsigned int pages) /*H:235 This is the routine which actually checks the Guest's IDT entry and * transfers it into the entry in "struct lguest": */ -static void set_trap(struct lguest *lg, struct desc_struct *trap, +static void set_trap(struct lg_cpu *cpu, struct desc_struct *trap, unsigned int num, u32 lo, u32 hi) { u8 type = idt_type(lo, hi); @@ -366,7 +367,7 @@ static void set_trap(struct lguest *lg, struct desc_struct *trap, /* We only support interrupt and trap gates. */ if (type != 0xE && type != 0xF) - kill_guest(lg, "bad IDT type %i", type); + kill_guest(cpu, "bad IDT type %i", type); /* We only copy the handler address, present bit, privilege level and * type. The privilege level controls where the trap can be triggered @@ -383,7 +384,7 @@ static void set_trap(struct lguest *lg, struct desc_struct *trap, * * We saw the Guest setting Interrupt Descriptor Table (IDT) entries with the * LHCALL_LOAD_IDT_ENTRY hypercall before: that comes here. */ -void load_guest_idt_entry(struct lguest *lg, unsigned int num, u32 lo, u32 hi) +void load_guest_idt_entry(struct lg_cpu *cpu, unsigned int num, u32 lo, u32 hi) { /* Guest never handles: NMI, doublefault, spurious interrupt or * hypercall. We ignore when it tries to set them. */ @@ -392,13 +393,13 @@ void load_guest_idt_entry(struct lguest *lg, unsigned int num, u32 lo, u32 hi) /* Mark the IDT as changed: next time the Guest runs we'll know we have * to copy this again. */ - lg->changed |= CHANGED_IDT; + cpu->changed |= CHANGED_IDT; /* Check that the Guest doesn't try to step outside the bounds. */ - if (num >= ARRAY_SIZE(lg->arch.idt)) - kill_guest(lg, "Setting idt entry %u", num); + if (num >= ARRAY_SIZE(cpu->arch.idt)) + kill_guest(cpu, "Setting idt entry %u", num); else - set_trap(lg, &lg->arch.idt[num], num, lo, hi); + set_trap(cpu, &cpu->arch.idt[num], num, lo, hi); } /* The default entry for each interrupt points into the Switcher routines which @@ -434,14 +435,14 @@ void setup_default_idt_entries(struct lguest_ro_state *state, |