1 files changed, 32 insertions, 104 deletions
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c
index b4eb675a807..922a1acbf65 100644
--- a/drivers/lguest/x86/core.c
+++ b/drivers/lguest/x86/core.c
@@ -59,14 +59,13 @@ static struct {
 /* Offset from where switcher.S was compiled to where we've copied it */
 static unsigned long switcher_offset(void)
 {
-	return SWITCHER_ADDR - (unsigned long)start_switcher_text;
+	return switcher_addr - (unsigned long)start_switcher_text;
 }
 
-/* This cpu's struct lguest_pages. */
+/* This cpu's struct lguest_pages (after the Switcher text page) */
 static struct lguest_pages *lguest_pages(unsigned int cpu)
 {
-	return &(((struct lguest_pages *)
-		  (SWITCHER_ADDR + SHARED_SWITCHER_PAGES*PAGE_SIZE))[cpu]);
+	return &(((struct lguest_pages *)(switcher_addr + PAGE_SIZE))[cpu]);
 }
 
 static DEFINE_PER_CPU(struct lg_cpu *, lg_last_cpu);
@@ -90,8 +89,8 @@ static void copy_in_guest_info(struct lg_cpu *cpu, struct lguest_pages *pages)
 	 * meanwhile).  If that's not the case, we pretend everything in the
 	 * Guest has changed.
 	 */
-	if (__get_cpu_var(lg_last_cpu) != cpu || cpu->last_pages != pages) {
-		__get_cpu_var(lg_last_cpu) = cpu;
+	if (__this_cpu_read(lg_last_cpu) != cpu || cpu->last_pages != pages) {
+		__this_cpu_write(lg_last_cpu, cpu);
 		cpu->last_pages = pages;
 		cpu->changed = CHANGED_ALL;
 	}
@@ -158,7 +157,7 @@ static void run_guest_once(struct lg_cpu *cpu, struct lguest_pages *pages)
 	 * stack, then the address of this call.  This stack layout happens to
 	 * exactly match the stack layout created by an interrupt...
 	 */
-	asm volatile("pushf; lcall *lguest_entry"
+	asm volatile("pushf; lcall *%4"
 		     /*
 		      * This is how we tell GCC that %eax ("a") and %ebx ("b")
 		      * are changed by this routine.  The "=" means output.
@@ -170,7 +169,9 @@ static void run_guest_once(struct lg_cpu *cpu, struct lguest_pages *pages)
 		      * physical address of the Guest's top-level page
 		      * directory.
 		      */
-		     : "0"(pages), "1"(__pa(cpu->lg->pgdirs[cpu->cpu_pgd].pgdir))
+		     : "0"(pages), 
+		       "1"(__pa(cpu->lg->pgdirs[cpu->cpu_pgd].pgdir)),
+		       "m"(lguest_entry)
 		     /*
 		      * We tell gcc that all these registers could change,
 		      * which means we don't have to save and restore them in
@@ -203,8 +204,8 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
 	 * we set it now, so we can trap and pass that trap to the Guest if it
 	 * uses the FPU.
 	 */
-	if (cpu->ts)
-		unlazy_fpu(current);
+	if (cpu->ts && user_has_fpu())
+		stts();
 
 	/*
 	 * SYSENTER is an optimized way of doing system calls.  We can't allow
@@ -234,6 +235,10 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
 	 if (boot_cpu_has(X86_FEATURE_SEP))
 		wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
 
+	/* Clear the host TS bit if it was set above. */
+	if (cpu->ts && user_has_fpu())
+		clts();
+
 	/*
 	 * If the Guest page faulted, then the cr2 register will tell us the
 	 * bad virtual address.  We have to grab this now, because once we
@@ -249,7 +254,7 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
 	 * a different CPU. So all the critical stuff should be done
 	 * before this.
 	 */
-	else if (cpu->regs->trapnum == 7)
+	else if (cpu->regs->trapnum == 7 && !user_has_fpu())
 		math_state_restore();
 }
 
@@ -269,10 +274,10 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
 static int emulate_insn(struct lg_cpu *cpu)
 {
 	u8 insn;
-	unsigned int insnlen = 0, in = 0, shift = 0;
+	unsigned int insnlen = 0, in = 0, small_operand = 0;
 	/*
 	 * The eip contains the *virtual* address of the Guest's instruction:
-	 * guest_pa just subtracts the Guest's page_offset.
+	 * walk the Guest's page tables to find the "physical" address.
 	 */
 	unsigned long physaddr = guest_pa(cpu, cpu->regs->eip);
 
@@ -300,11 +305,10 @@ static int emulate_insn(struct lg_cpu *cpu)
 	}
 
 	/*
-	 * 0x66 is an "operand prefix".  It means it's using the upper 16 bits
-	 * of the eax register.
+	 * 0x66 is an "operand prefix".  It means a 16, not 32 bit in/out.
 	 */
 	if (insn == 0x66) {
-		shift = 16;
+		small_operand = 1;
 		/* The instruction is 1 byte so far, read the next byte. */
 		insnlen = 1;
 		insn = lgread(cpu, physaddr + insnlen, u8);
@@ -340,11 +344,14 @@ static int emulate_insn(struct lg_cpu *cpu)
 	 * traditionally means "there's nothing there".
 	 */
 	if (in) {
-		/* Lower bit tells is whether it's a 16 or 32 bit access */
-		if (insn & 0x1)
-			cpu->regs->eax = 0xFFFFFFFF;
-		else
-			cpu->regs->eax |= (0xFFFF << shift);
+		/* Lower bit tells means it's a 32/16 bit access */
+		if (insn & 0x1) {
+			if (small_operand)
+				cpu->regs->eax |= 0xFFFF;
+			else
+				cpu->regs->eax = 0xFFFFFFFF;
+		} else
+			cpu->regs->eax |= 0xFF;
 	}
 	/* Finally, we've "done" the instruction, so move past it. */
 	cpu->regs->eip += insnlen;
@@ -352,69 +359,6 @@ static int emulate_insn(struct lg_cpu *cpu)
 	return 1;
 }
 
-/*
- * Our hypercalls mechanism used to be based on direct software interrupts.
- * After Anthony's "Refactor hypercall infrastructure" kvm patch, we decided to
- * change over to using kvm hypercalls.
- *
- * KVM_HYPERCALL is actually a "vmcall" instruction, which generates an invalid
- * opcode fault (fault 6) on non-VT cpus, so the easiest solution seemed to be
- * an *emulation approach*: if the fault was really produced by an hypercall
- * (is_hypercall() does exactly this check), we can just call the corresponding
- * hypercall host implementation function.
- *
- * But these invalid opcode faults are notably slower than software interrupts.
- * So we implemented the *patching (or rewriting) approach*: every time we hit
- * the KVM_HYPERCALL opcode in Guest code, we patch it to the old "int 0x1f"
- * opcode, so next time the Guest calls this hypercall it will use the
- * faster trap mechanism.
- *
- * Matias even benchmarked it to convince you: this shows the average cycle
- * cost of a hypercall.  For each alternative solution mentioned above we've
- * made 5 runs of the benchmark:
- *
- * 1) direct software interrupt: 2915, 2789, 2764, 2721, 2898
- * 2) emulation technique: 3410, 3681, 3466, 3392, 3780
- * 3) patching (rewrite) technique: 2977, 2975, 2891, 2637, 2884
- *
- * One two-line function is worth a 20% hypercall speed boost!
- */
-static void rewrite_hypercall(struct lg_cpu *cpu)
-{
-	/*
-	 * This are the opcodes we use to patch the Guest.  The opcode for "int
-	 * $0x1f" is "0xcd 0x1f" but vmcall instruction is 3 bytes long, so we
-	 * complete the sequence with a NOP (0x90).
-	 */
-	u8 insn[3] = {0xcd, 0x1f, 0x90};
-
-	__lgwrite(cpu, guest_pa(cpu, cpu->regs->eip), insn, sizeof(insn));
-	/*
-	 * The above write might have caused a copy of that page to be made
-	 * (if it was read-only).  We need to make sure the Guest has
-	 * up-to-date pagetables.  As this doesn't happen often, we can just
-	 * drop them all.
-	 */
-	guest_pagetable_clear_all(cpu);
-}
-
-static bool is_hypercall(struct lg_cpu *cpu)
-{
-	u8 insn[3];
-
-	/*
-	 * This must be the Guest kernel trying to do something.
-	 * The bottom two bits of the CS segment register are the privilege
-	 * level.
-	 */
-	if ((cpu->regs->cs & 3) != GUEST_PL)
-		return false;
-
-	/* Is it a vmcall? */
-	__lgread(cpu, insn, guest_pa(cpu, cpu->regs->eip), sizeof(insn));
-	return insn[0] == 0x0f && insn[1] == 0x01 && insn[2] == 0xc1;
-}
-
 /*H:050 Once we've re-enabled interrupts, we look at why the Guest exited. */
 void lguest_arch_handle_trap(struct lg_cpu *cpu)
 {
@@ -429,20 +373,6 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu)
 			if (emulate_insn(cpu))
 				return;
 		}
-		/*
-		 * If KVM is active, the vmcall instruction triggers a General
-		 * Protection Fault.  Normally it triggers an invalid opcode
-		 * fault (6):
-		 */
-	case 6:
-		/*
-		 * We need to check if ring == GUEST_PL and faulting
-		 * instruction == vmcall.
-		 */
-		if (is_hypercall(cpu)) {
-			rewrite_hypercall(cpu);
-			return;
-		}
 		break;
 	case 14: /* We've intercepted a Page Fault. */
 		/*
@@ -486,7 +416,7 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu)
 		 * These values mean a real interrupt occurred, in which case
 		 * the Host handler has already been run. We just do a
 		 * friendly check if another process should now be run, then
-		 * return to run the Guest again
+		 * return to run the Guest again.
 		 */
 		cond_resched();
 		return;
@@ -536,7 +466,7 @@ void __init lguest_arch_host_init(void)
 	int i;
 
 	/*
-	 * Most of the i386/switcher.S doesn't care that it's been moved; on
+	 * Most of the x86/switcher_32.S doesn't care that it's been moved; on
 	 * Intel, jumps are relative, and it doesn't access any references to
 	 * external code or data.
 	 *
@@ -664,7 +594,7 @@ void __init lguest_arch_host_init(void)
 		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_PGE);
 	}
 	put_online_cpus();
-};
+}
 /*:*/
 
 void __exit lguest_arch_host_fini(void)
@@ -747,8 +677,6 @@ int lguest_arch_init_hypercalls(struct lg_cpu *cpu)
 /*:*/
 
 /*L:030
- * lguest_arch_setup_regs()
- *
  * Most of the Guest's registers are left alone: we used get_zeroed_page() to
  * allocate the structure, so they will be 0.
  */
@@ -774,7 +702,7 @@ void lguest_arch_setup_regs(struct lg_cpu *cpu, unsigned long start)
 	 * interrupts are enabled.  We always leave interrupts enabled while
 	 * running the Guest.
 	 */
-	regs->eflags = X86_EFLAGS_IF | 0x2;
+	regs->eflags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
 
 	/*
 	 * The "Extended Instruction Pointer" register says where the Guest is