diff options
Diffstat (limited to 'drivers/lguest/x86')
| -rw-r--r-- | drivers/lguest/x86/core.c | 136 |
1 files changed, 32 insertions, 104 deletions
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index b4eb675a807..922a1acbf65 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -59,14 +59,13 @@ static struct { /* Offset from where switcher.S was compiled to where we've copied it */ static unsigned long switcher_offset(void) { - return SWITCHER_ADDR - (unsigned long)start_switcher_text; + return switcher_addr - (unsigned long)start_switcher_text; } -/* This cpu's struct lguest_pages. */ +/* This cpu's struct lguest_pages (after the Switcher text page) */ static struct lguest_pages *lguest_pages(unsigned int cpu) { - return &(((struct lguest_pages *) - (SWITCHER_ADDR + SHARED_SWITCHER_PAGES*PAGE_SIZE))[cpu]); + return &(((struct lguest_pages *)(switcher_addr + PAGE_SIZE))[cpu]); } static DEFINE_PER_CPU(struct lg_cpu *, lg_last_cpu); @@ -90,8 +89,8 @@ static void copy_in_guest_info(struct lg_cpu *cpu, struct lguest_pages *pages) * meanwhile). If that's not the case, we pretend everything in the * Guest has changed. */ - if (__get_cpu_var(lg_last_cpu) != cpu || cpu->last_pages != pages) { - __get_cpu_var(lg_last_cpu) = cpu; + if (__this_cpu_read(lg_last_cpu) != cpu || cpu->last_pages != pages) { + __this_cpu_write(lg_last_cpu, cpu); cpu->last_pages = pages; cpu->changed = CHANGED_ALL; } @@ -158,7 +157,7 @@ static void run_guest_once(struct lg_cpu *cpu, struct lguest_pages *pages) * stack, then the address of this call. This stack layout happens to * exactly match the stack layout created by an interrupt... */ - asm volatile("pushf; lcall *lguest_entry" + asm volatile("pushf; lcall *%4" /* * This is how we tell GCC that %eax ("a") and %ebx ("b") * are changed by this routine. The "=" means output. @@ -170,7 +169,9 @@ static void run_guest_once(struct lg_cpu *cpu, struct lguest_pages *pages) * physical address of the Guest's top-level page * directory. */ - : "0"(pages), "1"(__pa(cpu->lg->pgdirs[cpu->cpu_pgd].pgdir)) + : "0"(pages), + "1"(__pa(cpu->lg->pgdirs[cpu->cpu_pgd].pgdir)), + "m"(lguest_entry) /* * We tell gcc that all these registers could change, * which means we don't have to save and restore them in @@ -203,8 +204,8 @@ void lguest_arch_run_guest(struct lg_cpu *cpu) * we set it now, so we can trap and pass that trap to the Guest if it * uses the FPU. */ - if (cpu->ts) - unlazy_fpu(current); + if (cpu->ts && user_has_fpu()) + stts(); /* * SYSENTER is an optimized way of doing system calls. We can't allow @@ -234,6 +235,10 @@ void lguest_arch_run_guest(struct lg_cpu *cpu) if (boot_cpu_has(X86_FEATURE_SEP)) wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); + /* Clear the host TS bit if it was set above. */ + if (cpu->ts && user_has_fpu()) + clts(); + /* * If the Guest page faulted, then the cr2 register will tell us the * bad virtual address. We have to grab this now, because once we @@ -249,7 +254,7 @@ void lguest_arch_run_guest(struct lg_cpu *cpu) * a different CPU. So all the critical stuff should be done * before this. */ - else if (cpu->regs->trapnum == 7) + else if (cpu->regs->trapnum == 7 && !user_has_fpu()) math_state_restore(); } @@ -269,10 +274,10 @@ void lguest_arch_run_guest(struct lg_cpu *cpu) static int emulate_insn(struct lg_cpu *cpu) { u8 insn; - unsigned int insnlen = 0, in = 0, shift = 0; + unsigned int insnlen = 0, in = 0, small_operand = 0; /* * The eip contains the *virtual* address of the Guest's instruction: - * guest_pa just subtracts the Guest's page_offset. + * walk the Guest's page tables to find the "physical" address. */ unsigned long physaddr = guest_pa(cpu, cpu->regs->eip); @@ -300,11 +305,10 @@ static int emulate_insn(struct lg_cpu *cpu) } /* - * 0x66 is an "operand prefix". It means it's using the upper 16 bits - * of the eax register. + * 0x66 is an "operand prefix". It means a 16, not 32 bit in/out. */ if (insn == 0x66) { - shift = 16; + small_operand = 1; /* The instruction is 1 byte so far, read the next byte. */ insnlen = 1; insn = lgread(cpu, physaddr + insnlen, u8); @@ -340,11 +344,14 @@ static int emulate_insn(struct lg_cpu *cpu) * traditionally means "there's nothing there". */ if (in) { - /* Lower bit tells is whether it's a 16 or 32 bit access */ - if (insn & 0x1) - cpu->regs->eax = 0xFFFFFFFF; - else - cpu->regs->eax |= (0xFFFF << shift); + /* Lower bit tells means it's a 32/16 bit access */ + if (insn & 0x1) { + if (small_operand) + cpu->regs->eax |= 0xFFFF; + else + cpu->regs->eax = 0xFFFFFFFF; + } else + cpu->regs->eax |= 0xFF; } /* Finally, we've "done" the instruction, so move past it. */ cpu->regs->eip += insnlen; @@ -352,69 +359,6 @@ static int emulate_insn(struct lg_cpu *cpu) return 1; } -/* - * Our hypercalls mechanism used to be based on direct software interrupts. - * After Anthony's "Refactor hypercall infrastructure" kvm patch, we decided to - * change over to using kvm hypercalls. - * - * KVM_HYPERCALL is actually a "vmcall" instruction, which generates an invalid - * opcode fault (fault 6) on non-VT cpus, so the easiest solution seemed to be - * an *emulation approach*: if the fault was really produced by an hypercall - * (is_hypercall() does exactly this check), we can just call the corresponding - * hypercall host implementation function. - * - * But these invalid opcode faults are notably slower than software interrupts. - * So we implemented the *patching (or rewriting) approach*: every time we hit - * the KVM_HYPERCALL opcode in Guest code, we patch it to the old "int 0x1f" - * opcode, so next time the Guest calls this hypercall it will use the - * faster trap mechanism. - * - * Matias even benchmarked it to convince you: this shows the average cycle - * cost of a hypercall. For each alternative solution mentioned above we've - * made 5 runs of the benchmark: - * - * 1) direct software interrupt: 2915, 2789, 2764, 2721, 2898 - * 2) emulation technique: 3410, 3681, 3466, 3392, 3780 - * 3) patching (rewrite) technique: 2977, 2975, 2891, 2637, 2884 - * - * One two-line function is worth a 20% hypercall speed boost! - */ -static void rewrite_hypercall(struct lg_cpu *cpu) -{ - /* - * This are the opcodes we use to patch the Guest. The opcode for "int - * $0x1f" is "0xcd 0x1f" but vmcall instruction is 3 bytes long, so we - * complete the sequence with a NOP (0x90). - */ - u8 insn[3] = {0xcd, 0x1f, 0x90}; - - __lgwrite(cpu, guest_pa(cpu, cpu->regs->eip), insn, sizeof(insn)); - /* - * The above write might have caused a copy of that page to be made - * (if it was read-only). We need to make sure the Guest has - * up-to-date pagetables. As this doesn't happen often, we can just - * drop them all. - */ - guest_pagetable_clear_all(cpu); -} - -static bool is_hypercall(struct lg_cpu *cpu) -{ - u8 insn[3]; - - /* - * This must be the Guest kernel trying to do something. - * The bottom two bits of the CS segment register are the privilege - * level. - */ - if ((cpu->regs->cs & 3) != GUEST_PL) - return false; - - /* Is it a vmcall? */ - __lgread(cpu, insn, guest_pa(cpu, cpu->regs->eip), sizeof(insn)); - return insn[0] == 0x0f && insn[1] == 0x01 && insn[2] == 0xc1; -} - /*H:050 Once we've re-enabled interrupts, we look at why the Guest exited. */ void lguest_arch_handle_trap(struct lg_cpu *cpu) { @@ -429,20 +373,6 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu) if (emulate_insn(cpu)) return; } - /* - * If KVM is active, the vmcall instruction triggers a General - * Protection Fault. Normally it triggers an invalid opcode - * fault (6): - */ - case 6: - /* - * We need to check if ring == GUEST_PL and faulting - * instruction == vmcall. - */ - if (is_hypercall(cpu)) { - rewrite_hypercall(cpu); - return; - } break; case 14: /* We've intercepted a Page Fault. */ /* @@ -486,7 +416,7 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu) * These values mean a real interrupt occurred, in which case * the Host handler has already been run. We just do a * friendly check if another process should now be run, then - * return to run the Guest again + * return to run the Guest again. */ cond_resched(); return; @@ -536,7 +466,7 @@ void __init lguest_arch_host_init(void) int i; /* - * Most of the i386/switcher.S doesn't care that it's been moved; on + * Most of the x86/switcher_32.S doesn't care that it's been moved; on * Intel, jumps are relative, and it doesn't access any references to * external code or data. * @@ -664,7 +594,7 @@ void __init lguest_arch_host_init(void) clear_cpu_cap(&boot_cpu_data, X86_FEATURE_PGE); } put_online_cpus(); -}; +} /*:*/ void __exit lguest_arch_host_fini(void) @@ -747,8 +677,6 @@ int lguest_arch_init_hypercalls(struct lg_cpu *cpu) /*:*/ /*L:030 - * lguest_arch_setup_regs() - * * Most of the Guest's registers are left alone: we used get_zeroed_page() to * allocate the structure, so they will be 0. */ @@ -774,7 +702,7 @@ void lguest_arch_setup_regs(struct lg_cpu *cpu, unsigned long start) * interrupts are enabled. We always leave interrupts enabled while * running the Guest. */ - regs->eflags = X86_EFLAGS_IF | 0x2; + regs->eflags = X86_EFLAGS_IF | X86_EFLAGS_FIXED; /* * The "Extended Instruction Pointer" register says where the Guest is |
