From 7f662273e476e2d7ff44f411fa9f17c946480100 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 10 Dec 2012 11:42:30 +0200 Subject: KVM: emulator: implement AAD instruction Windows2000 uses it during boot. This fixes https://bugzilla.kernel.org/show_bug.cgi?id=50921 Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/emulate.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index a27e7637110..47d62e16f9e 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2852,6 +2852,27 @@ static int em_das(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } +static int em_aad(struct x86_emulate_ctxt *ctxt) +{ + u8 al = ctxt->dst.val & 0xff; + u8 ah = (ctxt->dst.val >> 8) & 0xff; + + al = (al + (ah * ctxt->src.val)) & 0xff; + + ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al; + + ctxt->eflags &= ~(X86_EFLAGS_PF | X86_EFLAGS_SF | X86_EFLAGS_ZF); + + if (!al) + ctxt->eflags |= X86_EFLAGS_ZF; + if (!(al & 1)) + ctxt->eflags |= X86_EFLAGS_PF; + if (al & 0x80) + ctxt->eflags |= X86_EFLAGS_SF; + + return X86EMUL_CONTINUE; +} + static int em_call(struct x86_emulate_ctxt *ctxt) { long rel = ctxt->src.val; @@ -3801,7 +3822,7 @@ static const struct opcode opcode_table[256] = { D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret), /* 0xD0 - 0xD7 */ D2bv(DstMem | SrcOne | ModRM), D2bv(DstMem | ModRM), - N, N, N, N, + N, I(DstAcc | SrcImmByte | No64, em_aad), N, N, /* 0xD8 - 0xDF */ N, N, N, N, N, N, N, N, /* 0xE0 - 0xE7 */ -- cgit v1.2.3-18-g5258 From 5e2c688351f4aee9981918661b6c1679f4155f06 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Thu, 6 Dec 2012 21:55:10 -0200 Subject: KVM: x86: fix mov immediate emulation for 64-bit operands MOV immediate instruction (opcodes 0xB8-0xBF) may take 64-bit operand. The previous emulation implementation assumes the operand is no longer than 32. Adding OpImm64 for this matter. Fixes https://bugzilla.redhat.com/show_bug.cgi?id=881579 Signed-off-by: Nadav Amit Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/emulate.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 47d62e16f9e..c7547b3fd52 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -43,7 +43,7 @@ #define OpCL 9ull /* CL register (for shifts) */ #define OpImmByte 10ull /* 8-bit sign extended immediate */ #define OpOne 11ull /* Implied 1 */ -#define OpImm 12ull /* Sign extended immediate */ +#define OpImm 12ull /* Sign extended up to 32-bit immediate */ #define OpMem16 13ull /* Memory operand (16-bit). */ #define OpMem32 14ull /* Memory operand (32-bit). */ #define OpImmU 15ull /* Immediate operand, zero extended */ @@ -58,6 +58,7 @@ #define OpFS 24ull /* FS */ #define OpGS 25ull /* GS */ #define OpMem8 26ull /* 8-bit zero extended memory operand */ +#define OpImm64 27ull /* Sign extended 16/32/64-bit immediate */ #define OpBits 5 /* Width of operand field */ #define OpMask ((1ull << OpBits) - 1) @@ -101,6 +102,7 @@ #define SrcMemFAddr (OpMemFAddr << SrcShift) #define SrcAcc (OpAcc << SrcShift) #define SrcImmU16 (OpImmU16 << SrcShift) +#define SrcImm64 (OpImm64 << SrcShift) #define SrcDX (OpDX << SrcShift) #define SrcMem8 (OpMem8 << SrcShift) #define SrcMask (OpMask << SrcShift) @@ -3807,7 +3809,7 @@ static const struct opcode opcode_table[256] = { /* 0xB0 - 0xB7 */ X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)), /* 0xB8 - 0xBF */ - X8(I(DstReg | SrcImm | Mov, em_mov)), + X8(I(DstReg | SrcImm64 | Mov, em_mov)), /* 0xC0 - 0xC7 */ D2bv(DstMem | SrcImmByte | ModRM), I(ImplicitOps | Stack | SrcImmU16, em_ret_near_imm), @@ -3971,6 +3973,9 @@ static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op, case 4: op->val = insn_fetch(s32, ctxt); break; + case 8: + op->val = insn_fetch(s64, ctxt); + break; } if (!sign_extension) { switch (op->bytes) { @@ -4049,6 +4054,9 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, case OpImm: rc = decode_imm(ctxt, op, imm_size(ctxt), true); break; + case OpImm64: + rc = decode_imm(ctxt, op, ctxt->op_bytes, true); + break; case OpMem8: ctxt->memop.bytes = 1; goto mem_common; -- cgit v1.2.3-18-g5258 From f3200d00ea42e485772ff92d6d649aa8eeb640c0 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 10 Dec 2012 14:05:55 +0200 Subject: KVM: inject ExtINT interrupt before APIC interrupts According to Intel SDM Volume 3 Section 10.8.1 "Interrupt Handling with the Pentium 4 and Intel Xeon Processors" and Section 10.8.2 "Interrupt Handling with the P6 Family and Pentium Processors" ExtINT interrupts are sent directly to the processor core for handling. Currently KVM checks APIC before it considers ExtINT interrupts for injection which is backwards from the spec. Make code behave according to the SDM. Signed-off-by: Gleb Natapov Acked-by: "Zhang, Yang Z" Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/i8259.c | 2 ++ arch/x86/kvm/irq.c | 26 ++++++++------------------ 2 files changed, 10 insertions(+), 18 deletions(-) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 848206df096..cc31f7c06d3 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -241,6 +241,8 @@ int kvm_pic_read_irq(struct kvm *kvm) int irq, irq2, intno; struct kvm_pic *s = pic_irqchip(kvm); + s->output = 0; + pic_lock(s); irq = pic_get_irq(&s->pics[0]); if (irq >= 0) { diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index 7e06ba1618b..ebd98d0c4f6 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -48,14 +48,10 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v) if (!irqchip_in_kernel(v->kvm)) return v->arch.interrupt.pending; - if (kvm_apic_has_interrupt(v) == -1) { /* LAPIC */ - if (kvm_apic_accept_pic_intr(v)) { - s = pic_irqchip(v->kvm); /* PIC */ - return s->output; - } else - return 0; - } - return 1; + if (kvm_apic_accept_pic_intr(v) && pic_irqchip(v->kvm)->output) + return pic_irqchip(v->kvm)->output; /* PIC */ + + return kvm_apic_has_interrupt(v) != -1; /* LAPIC */ } EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt); @@ -65,20 +61,14 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt); int kvm_cpu_get_interrupt(struct kvm_vcpu *v) { struct kvm_pic *s; - int vector; if (!irqchip_in_kernel(v->kvm)) return v->arch.interrupt.nr; - vector = kvm_get_apic_interrupt(v); /* APIC */ - if (vector == -1) { - if (kvm_apic_accept_pic_intr(v)) { - s = pic_irqchip(v->kvm); - s->output = 0; /* PIC */ - vector = kvm_pic_read_irq(v->kvm); - } - } - return vector; + if (kvm_apic_accept_pic_intr(v) && pic_irqchip(v->kvm)->output) + return kvm_pic_read_irq(v->kvm); /* PIC */ + + return kvm_get_apic_interrupt(v); /* APIC */ } EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt); -- cgit v1.2.3-18-g5258 From bbacc0c111c3c5d1f3192b8cc1642b9c3954f80d Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 10 Dec 2012 10:33:09 -0700 Subject: KVM: Rename KVM_MEMORY_SLOTS -> KVM_USER_MEM_SLOTS It's easy to confuse KVM_MEMORY_SLOTS and KVM_MEM_SLOTS_NUM. One is the user accessible slots and the other is user + private. Make this more obvious. Reviewed-by: Gleb Natapov Signed-off-by: Alex Williamson Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 76f54461f7c..816074757c9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2518,7 +2518,7 @@ int kvm_dev_ioctl_check_extension(long ext) r = KVM_MAX_VCPUS; break; case KVM_CAP_NR_MEMSLOTS: - r = KVM_MEMORY_SLOTS; + r = KVM_USER_MEM_SLOTS; break; case KVM_CAP_PV_MMU: /* obsolete */ r = 0; @@ -3435,7 +3435,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) mutex_lock(&kvm->slots_lock); r = -EINVAL; - if (log->slot >= KVM_MEMORY_SLOTS) + if (log->slot >= KVM_USER_MEM_SLOTS) goto out; memslot = id_to_memslot(kvm->memslots, log->slot); @@ -6845,7 +6845,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, int map_flags = MAP_PRIVATE | MAP_ANONYMOUS; /* Prevent internal slot pages from being moved by fork()/COW. */ - if (memslot->id >= KVM_MEMORY_SLOTS) + if (memslot->id >= KVM_USER_MEM_SLOTS) map_flags = MAP_SHARED | MAP_ANONYMOUS; /*To keep backward compatibility with older userspace, -- cgit v1.2.3-18-g5258 From f82a8cfe9354f5cdea55ebeceba3fd19051d3ee8 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 10 Dec 2012 10:33:21 -0700 Subject: KVM: struct kvm_memory_slot.user_alloc -> bool There's no need for this to be an int, it holds a boolean. Move to the end of the struct for alignment. Reviewed-by: Gleb Natapov Signed-off-by: Alex Williamson Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 6 +++--- arch/x86/kvm/x86.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 9120ae1901e..b3101e36807 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3667,7 +3667,7 @@ static int alloc_apic_access_page(struct kvm *kvm) kvm_userspace_mem.flags = 0; kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL; kvm_userspace_mem.memory_size = PAGE_SIZE; - r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0); + r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false); if (r) goto out; @@ -3697,7 +3697,7 @@ static int alloc_identity_pagetable(struct kvm *kvm) kvm_userspace_mem.guest_phys_addr = kvm->arch.ept_identity_map_addr; kvm_userspace_mem.memory_size = PAGE_SIZE; - r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0); + r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false); if (r) goto out; @@ -4251,7 +4251,7 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) .flags = 0, }; - ret = kvm_set_memory_region(kvm, &tss_mem, 0); + ret = kvm_set_memory_region(kvm, &tss_mem, false); if (ret) return ret; kvm->arch.tss_addr = addr; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 816074757c9..1c9c834b72f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6839,7 +6839,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_memory_slot old, struct kvm_userspace_memory_region *mem, - int user_alloc) + bool user_alloc) { int npages = memslot->npages; int map_flags = MAP_PRIVATE | MAP_ANONYMOUS; @@ -6875,7 +6875,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, struct kvm_memory_slot old, - int user_alloc) + bool user_alloc) { int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT; -- cgit v1.2.3-18-g5258 From e11ae1a102b46f76441e328a2743ae5d6e201423 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Fri, 14 Dec 2012 15:23:16 +0200 Subject: KVM: remove unused variable. Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/irq.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index ebd98d0c4f6..b111aee815f 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -43,8 +43,6 @@ EXPORT_SYMBOL(kvm_cpu_has_pending_timer); */ int kvm_cpu_has_interrupt(struct kvm_vcpu *v) { - struct kvm_pic *s; - if (!irqchip_in_kernel(v->kvm)) return v->arch.interrupt.pending; @@ -60,8 +58,6 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt); */ int kvm_cpu_get_interrupt(struct kvm_vcpu *v) { - struct kvm_pic *s; - if (!irqchip_in_kernel(v->kvm)) return v->arch.interrupt.nr; -- cgit v1.2.3-18-g5258 From d4b06c2d4cce466e2d62163c0a954e1b2ce96f8b Mon Sep 17 00:00:00 2001 From: Nickolai Zeldovich Date: Sat, 15 Dec 2012 06:34:37 -0500 Subject: kvm: fix i8254 counter 0 wraparound The kvm i8254 emulation for counter 0 (but not for counters 1 and 2) has at least two bugs in mode 0: 1. The OUT bit, computed by pit_get_out(), is never set high. 2. The counter value, computed by pit_get_count(), wraps back around to the initial counter value, rather than wrapping back to 0xFFFF (which is the behavior described in the comment in __kpit_elapsed, the behavior implemented by qemu, and the behavior observed on AMD hardware). The bug stems from __kpit_elapsed computing the elapsed time mod the initial counter value (stored as nanoseconds in ps->period). This is both unnecessary (none of the callers of kpit_elapsed expect the value to be at most the initial counter value) and incorrect (it causes pit_get_count to appear to wrap around to the initial counter value rather than 0xFFFF). Removing this mod from __kpit_elapsed fixes both of the above bugs. Signed-off-by: Nickolai Zeldovich Reviewed-by: Marcelo Tosatti Signed-off-by: Gleb Natapov --- arch/x86/kvm/i8254.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 11300d2fa71..c1d30b2fc9b 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -122,7 +122,6 @@ static s64 __kpit_elapsed(struct kvm *kvm) */ remaining = hrtimer_get_remaining(&ps->timer); elapsed = ps->period - ktime_to_ns(remaining); - elapsed = mod_64(elapsed, ps->period); return elapsed; } -- cgit v1.2.3-18-g5258 From 07f42f5f25dc214a33214159fc8b62b984b713eb Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 12 Dec 2012 19:10:49 +0200 Subject: KVM: VMX: cleanup rmode_segment_valid() Set segment fields explicitly instead of using binary operations. No behaviour changes. Reviewed-by: Marcelo Tosatti Signed-off-by: Gleb Natapov --- arch/x86/kvm/vmx.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index b3101e36807..265fdd37b24 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3380,13 +3380,16 @@ static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg) u32 ar; vmx_get_segment(vcpu, &var, seg); + var.dpl = 0x3; + var.g = 0; + var.db = 0; ar = vmx_segment_access_rights(&var); if (var.base != (var.selector << 4)) return false; if (var.limit < 0xffff) return false; - if (((ar | (3 << AR_DPL_SHIFT)) & ~(AR_G_MASK | AR_DB_MASK)) != 0xf3) + if (ar != 0xf3) return false; return true; -- cgit v1.2.3-18-g5258 From 0647f4aa8c58a7e5adb873b485c83e0c93d9c6d1 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 12 Dec 2012 19:10:50 +0200 Subject: KVM: VMX: relax check for CS register in rmode_segment_valid() rmode_segment_valid() checks if segment descriptor can be used to enter vm86 mode. VMX spec mandates that in vm86 mode CS register will be of type data, not code. Lets allow guest entry with vm86 mode if the only problem with CS register is incorrect type. Otherwise entire real mode will be emulated. Reviewed-by: Marcelo Tosatti Signed-off-by: Gleb Natapov --- arch/x86/kvm/vmx.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 265fdd37b24..7c2c05406b6 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3383,6 +3383,8 @@ static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg) var.dpl = 0x3; var.g = 0; var.db = 0; + if (seg == VCPU_SREG_CS) + var.type = 0x3; ar = vmx_segment_access_rights(&var); if (var.base != (var.selector << 4)) -- cgit v1.2.3-18-g5258 From c6ad115348035f474d6fb81005bb4764bdd128ef Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 12 Dec 2012 19:10:51 +0200 Subject: KVM: VMX: return correct segment limit and flags for CS/SS registers in real mode VMX without unrestricted mode cannot virtualize real mode, so if emulate_invalid_guest_state=0 kvm uses vm86 mode to approximate it. Sometimes, when guest moves from protected mode to real mode, it leaves segment descriptors in a state not suitable for use by vm86 mode virtualization, so we keep shadow copy of segment descriptors for internal use and load fake register to VMCS for guest entry to succeed. Till now we kept shadow for all segments except SS and CS (for SS and CS we returned parameters directly from VMCS), but since commit a5625189f6810 emulator enforces segment limits in real mode. This causes #GP during move from protected mode to real mode when emulator fetches first instruction after moving to real mode since it uses incorrect CS base and limit to linearize the %rip. Fix by keeping shadow for SS and CS too. Reviewed-by: Marcelo Tosatti Signed-off-by: Gleb Natapov --- arch/x86/kvm/vmx.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 7c2c05406b6..792d9cce884 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2856,6 +2856,8 @@ static void enter_rmode(struct kvm_vcpu *vcpu) vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS); vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS); vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS); + vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS); + vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS); vmx->emulation_required = 1; vmx->rmode.vm86_active = 1; @@ -3171,10 +3173,7 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx = to_vmx(vcpu); u32 ar; - if (vmx->rmode.vm86_active - && (seg == VCPU_SREG_TR || seg == VCPU_SREG_ES - || seg == VCPU_SREG_DS || seg == VCPU_SREG_FS - || seg == VCPU_SREG_GS)) { + if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) { *var = vmx->rmode.segs[seg]; if (seg == VCPU_SREG_TR || var->selector == vmx_read_guest_seg_selector(vmx, seg)) -- cgit v1.2.3-18-g5258 From beb853ffeccbfa626f30038e816d61187103c455 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 12 Dec 2012 19:10:52 +0200 Subject: KVM: VMX: use fix_rmode_seg() to fix all code/data segments The code for SS and CS does the same thing fix_rmode_seg() is doing. Use it instead of hand crafted code. Reviewed-by: Marcelo Tosatti Signed-off-by: Gleb Natapov --- arch/x86/kvm/vmx.c | 26 ++------------------------ 1 file changed, 2 insertions(+), 24 deletions(-) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 792d9cce884..9e784c2c978 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3315,30 +3315,8 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, * unrestricted guest like Westmere to older host that don't have * unrestricted guest like Nehelem. */ - if (vmx->rmode.vm86_active) { - switch (seg) { - case VCPU_SREG_CS: - vmcs_write32(GUEST_CS_AR_BYTES, 0xf3); - vmcs_write32(GUEST_CS_LIMIT, 0xffff); - if (vmcs_readl(GUEST_CS_BASE) == 0xffff0000) - vmcs_writel(GUEST_CS_BASE, 0xf0000); - vmcs_write16(GUEST_CS_SELECTOR, - vmcs_readl(GUEST_CS_BASE) >> 4); - break; - case VCPU_SREG_ES: - case VCPU_SREG_DS: - case VCPU_SREG_GS: - case VCPU_SREG_FS: - fix_rmode_seg(seg, &vmx->rmode.segs[seg]); - break; - case VCPU_SREG_SS: - vmcs_write16(GUEST_SS_SELECTOR, - vmcs_readl(GUEST_SS_BASE) >> 4); - vmcs_write32(GUEST_SS_LIMIT, 0xffff); - vmcs_write32(GUEST_SS_AR_BYTES, 0xf3); - break; - } - } + if (vmx->rmode.vm86_active && var->s) + fix_rmode_seg(seg, &vmx->rmode.segs[seg]); } static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) -- cgit v1.2.3-18-g5258 From 39dcfb95def4646ecdb76ea7a7491e8420dce7a7 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 12 Dec 2012 19:10:53 +0200 Subject: KVM: VMX: remove redundant code from vmx_set_segment() Segment descriptor's base is fixed by call to fix_rmode_seg(). Not need to do it twice. Reviewed-by: Marcelo Tosatti Signed-off-by: Gleb Natapov --- arch/x86/kvm/vmx.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 9e784c2c978..9b8edd2298e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3268,7 +3268,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, { struct vcpu_vmx *vmx = to_vmx(vcpu); const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; - u32 ar; + u32 ar = 0; vmx_segment_cache_clear(vmx); @@ -3280,15 +3280,9 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, vmcs_writel(sf->base, var->base); vmcs_write32(sf->limit, var->limit); vmcs_write16(sf->selector, var->selector); - if (vmx->rmode.vm86_active && var->s) { + if (vmx->rmode.vm86_active && var->s) vmx->rmode.segs[seg] = *var; - /* - * Hack real-mode segments into vm86 compatibility. - */ - if (var->base == 0xffff0000 && var->selector == 0xf000) - vmcs_writel(sf->base, 0xf0000); - ar = 0xf3; - } else + else ar = vmx_segment_access_rights(var); /* -- cgit v1.2.3-18-g5258 From 1ecd50a9474c4bfa3129d90cfcf1c1eb4fbf19e7 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 12 Dec 2012 19:10:54 +0200 Subject: KVM: VMX: clean-up vmx_set_segment() Move all vm86_active logic into one place. Reviewed-by: Marcelo Tosatti Signed-off-by: Gleb Natapov --- arch/x86/kvm/vmx.c | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 9b8edd2298e..4bd1c22db39 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3271,19 +3271,21 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, u32 ar = 0; vmx_segment_cache_clear(vmx); + __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); - if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) { - vmcs_write16(sf->selector, var->selector); - vmx->rmode.segs[VCPU_SREG_TR] = *var; + if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) { + vmx->rmode.segs[seg] = *var; + if (seg == VCPU_SREG_TR) + vmcs_write16(sf->selector, var->selector); + else if (var->s) + fix_rmode_seg(seg, &vmx->rmode.segs[seg]); return; } + vmcs_writel(sf->base, var->base); vmcs_write32(sf->limit, var->limit); vmcs_write16(sf->selector, var->selector); - if (vmx->rmode.vm86_active && var->s) - vmx->rmode.segs[seg] = *var; - else - ar = vmx_segment_access_rights(var); + ar = vmx_segment_access_rights(var); /* * Fix the "Accessed" bit in AR field of segment registers for older @@ -3300,17 +3302,6 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, ar |= 0x1; /* Accessed */ vmcs_write32(sf->ar_bytes, ar); - __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); - - /* - * Fix segments for real mode guest in hosts that don't have - * "unrestricted_mode" or it was disabled. - * This is done to allow migration of the guests from hosts with - * unrestricted guest like Westmere to older host that don't have - * unrestricted guest like Nehelem. - */ - if (vmx->rmode.vm86_active && var->s) - fix_rmode_seg(seg, &vmx->rmode.segs[seg]); } static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) -- cgit v1.2.3-18-g5258 From f924d66d278d5da890f3098805b0450a4ef66c32 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 12 Dec 2012 19:10:55 +0200 Subject: KVM: VMX: remove unneeded temporary variable from vmx_set_segment() Reviewed-by: Marcelo Tosatti Signed-off-by: Gleb Natapov --- arch/x86/kvm/vmx.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 4bd1c22db39..23d5aec7807 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3268,7 +3268,6 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, { struct vcpu_vmx *vmx = to_vmx(vcpu); const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; - u32 ar = 0; vmx_segment_cache_clear(vmx); __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); @@ -3285,7 +3284,6 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, vmcs_writel(sf->base, var->base); vmcs_write32(sf->limit, var->limit); vmcs_write16(sf->selector, var->selector); - ar = vmx_segment_access_rights(var); /* * Fix the "Accessed" bit in AR field of segment registers for older @@ -3299,9 +3297,9 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, * kvm hack. */ if (enable_unrestricted_guest && (seg != VCPU_SREG_LDTR)) - ar |= 0x1; /* Accessed */ + var->type |= 0x1; /* Accessed */ - vmcs_write32(sf->ar_bytes, ar); + vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var)); } static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) -- cgit v1.2.3-18-g5258 From 3a78a4f46302bfc83602a53dfa4dcbe76a7a1f5f Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 20 Dec 2012 16:57:42 +0200 Subject: KVM: emulator: drop RPL check from linearize() function According to Intel SDM Vol3 Section 5.5 "Privilege Levels" and 5.6 "Privilege Level Checking When Accessing Data Segments" RPL checking is done during loading of a segment selector, not during data access. We already do checking during segment selector loading, so drop the check during data access. Checking RPL during data access triggers #GP if after transition from real mode to protected mode RPL bits in a segment selector are set. Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/emulate.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index c7547b3fd52..a3d31e3d987 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -665,7 +665,7 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, ulong la; u32 lim; u16 sel; - unsigned cpl, rpl; + unsigned cpl; la = seg_base(ctxt, addr.seg) + addr.ea; switch (ctxt->mode) { @@ -699,11 +699,6 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, goto bad; } cpl = ctxt->ops->cpl(ctxt); - if (ctxt->mode == X86EMUL_MODE_REAL) - rpl = 0; - else - rpl = sel & 3; - cpl = max(cpl, rpl); if (!(desc.type & 8)) { /* data segment */ if (cpl > desc.dpl) -- cgit v1.2.3-18-g5258 From 045a282ca41505184e8fc805335d1f5aae0c8a03 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 20 Dec 2012 16:57:43 +0200 Subject: KVM: emulator: implement fninit, fnstsw, fnstcw Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/emulate.c | 126 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 125 insertions(+), 1 deletion(-) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index a3d31e3d987..53c5ad6851d 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -115,6 +115,7 @@ #define GroupDual (2<<15) /* Alternate decoding of mod == 3 */ #define Prefix (3<<15) /* Instruction varies with 66/f2/f3 prefix */ #define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */ +#define Escape (5<<15) /* Escape to coprocessor instruction */ #define Sse (1<<18) /* SSE Vector instruction */ /* Generic ModRM decode. */ #define ModRM (1<<19) @@ -166,6 +167,7 @@ struct opcode { const struct opcode *group; const struct group_dual *gdual; const struct gprefix *gprefix; + const struct escape *esc; } u; int (*check_perm)(struct x86_emulate_ctxt *ctxt); }; @@ -182,6 +184,11 @@ struct gprefix { struct opcode pfx_f3; }; +struct escape { + struct opcode op[8]; + struct opcode high[64]; +}; + /* EFLAGS bit definitions. */ #define EFLG_ID (1<<21) #define EFLG_VIP (1<<20) @@ -991,6 +998,53 @@ static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) ctxt->ops->put_fpu(ctxt); } +static int em_fninit(struct x86_emulate_ctxt *ctxt) +{ + if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) + return emulate_nm(ctxt); + + ctxt->ops->get_fpu(ctxt); + asm volatile("fninit"); + ctxt->ops->put_fpu(ctxt); + return X86EMUL_CONTINUE; +} + +static int em_fnstcw(struct x86_emulate_ctxt *ctxt) +{ + u16 fcw; + + if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) + return emulate_nm(ctxt); + + ctxt->ops->get_fpu(ctxt); + asm volatile("fnstcw %0": "+m"(fcw)); + ctxt->ops->put_fpu(ctxt); + + /* force 2 byte destination */ + ctxt->dst.bytes = 2; + ctxt->dst.val = fcw; + + return X86EMUL_CONTINUE; +} + +static int em_fnstsw(struct x86_emulate_ctxt *ctxt) +{ + u16 fsw; + + if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) + return emulate_nm(ctxt); + + ctxt->ops->get_fpu(ctxt); + asm volatile("fnstsw %0": "+m"(fsw)); + ctxt->ops->put_fpu(ctxt); + + /* force 2 byte destination */ + ctxt->dst.bytes = 2; + ctxt->dst.val = fsw; + + return X86EMUL_CONTINUE; +} + static void decode_register_operand(struct x86_emulate_ctxt *ctxt, struct operand *op) { @@ -3590,6 +3644,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) } #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) } #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) } +#define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) } #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } #define II(_f, _e, _i) \ { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i } @@ -3725,6 +3780,69 @@ static const struct gprefix pfx_vmovntpx = { I(0, em_mov), N, N, N, }; +static const struct escape escape_d9 = { { + N, N, N, N, N, N, N, I(DstMem, em_fnstcw), +}, { + /* 0xC0 - 0xC7 */ + N, N, N, N, N, N, N, N, + /* 0xC8 - 0xCF */ + N, N, N, N, N, N, N, N, + /* 0xD0 - 0xC7 */ + N, N, N, N, N, N, N, N, + /* 0xD8 - 0xDF */ + N, N, N, N, N, N, N, N, + /* 0xE0 - 0xE7 */ + N, N, N, N, N, N, N, N, + /* 0xE8 - 0xEF */ + N, N, N, N, N, N, N, N, + /* 0xF0 - 0xF7 */ + N, N, N, N, N, N, N, N, + /* 0xF8 - 0xFF */ + N, N, N, N, N, N, N, N, +} }; + +static const struct escape escape_db = { { + N, N, N, N, N, N, N, N, +}, { + /* 0xC0 - 0xC7 */ + N, N, N, N, N, N, N, N, + /* 0xC8 - 0xCF */ + N, N, N, N, N, N, N, N, + /* 0xD0 - 0xC7 */ + N, N, N, N, N, N, N, N, + /* 0xD8 - 0xDF */ + N, N, N, N, N, N, N, N, + /* 0xE0 - 0xE7 */ + N, N, N, I(ImplicitOps, em_fninit), N, N, N, N, + /* 0xE8 - 0xEF */ + N, N, N, N, N, N, N, N, + /* 0xF0 - 0xF7 */ + N, N, N, N, N, N, N, N, + /* 0xF8 - 0xFF */ + N, N, N, N, N, N, N, N, +} }; + +static const struct escape escape_dd = { { + N, N, N, N, N, N, N, I(DstMem, em_fnstsw), +}, { + /* 0xC0 - 0xC7 */ + N, N, N, N, N, N, N, N, + /* 0xC8 - 0xCF */ + N, N, N, N, N, N, N, N, + /* 0xD0 - 0xC7 */ + N, N, N, N, N, N, N, N, + /* 0xD8 - 0xDF */ + N, N, N, N, N, N, N, N, + /* 0xE0 - 0xE7 */ + N, N, N, N, N, N, N, N, + /* 0xE8 - 0xEF */ + N, N, N, N, N, N, N, N, + /* 0xF0 - 0xF7 */ + N, N, N, N, N, N, N, N, + /* 0xF8 - 0xFF */ + N, N, N, N, N, N, N, N, +} }; + static const struct opcode opcode_table[256] = { /* 0x00 - 0x07 */ I6ALU(Lock, em_add), @@ -3821,7 +3939,7 @@ static const struct opcode opcode_table[256] = { D2bv(DstMem | SrcOne | ModRM), D2bv(DstMem | ModRM), N, I(DstAcc | SrcImmByte | No64, em_aad), N, N, /* 0xD8 - 0xDF */ - N, N, N, N, N, N, N, N, + N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N, /* 0xE0 - 0xE7 */ X3(I(SrcImmByte, em_loop)), I(SrcImmByte, em_jcxz), @@ -4246,6 +4364,12 @@ done_prefixes: case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break; } break; + case Escape: + if (ctxt->modrm > 0xbf) + opcode = opcode.u.esc->high[ctxt->modrm - 0xc0]; + else + opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7]; + break; default: return EMULATION_FAILED; } -- cgit v1.2.3-18-g5258 From 89efbed02cfd7e9ce3324de0b44a70ee1c716fac Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 20 Dec 2012 16:57:44 +0200 Subject: KVM: VMX: make rmode_segment_valid() more strict. Currently it allows entering vm86 mode if segment limit is greater than 0xffff and db bit is set. Both of those can cause incorrect execution of instruction by cpu since in vm86 mode limit will be set to 0xffff and db will be forced to 0. Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 23d5aec7807..7ebcac25725 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3341,15 +3341,13 @@ static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg) vmx_get_segment(vcpu, &var, seg); var.dpl = 0x3; - var.g = 0; - var.db = 0; if (seg == VCPU_SREG_CS) var.type = 0x3; ar = vmx_segment_access_rights(&var); if (var.base != (var.selector << 4)) return false; - if (var.limit < 0xffff) + if (var.limit != 0xffff) return false; if (ar != 0xf3) return false; -- cgit v1.2.3-18-g5258 From d99e415275dd3f757b75981adad8645cdc26da45 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 20 Dec 2012 16:57:45 +0200 Subject: KVM: VMX: fix emulation of invalid guest state. Currently when emulation of invalid guest state is enable (emulate_invalid_guest_state=1) segment registers are still fixed for entry to vm86 mode some times. Segment register fixing is avoided in enter_rmode(), but vmx_set_segment() still does it unconditionally. The patch fixes it. Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 122 +++++++++++++++++++++++++++++------------------------ 1 file changed, 68 insertions(+), 54 deletions(-) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 7ebcac25725..9dff310a2e9 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -624,6 +624,8 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); static void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); +static bool guest_state_valid(struct kvm_vcpu *vcpu); +static u32 vmx_segment_access_rights(struct kvm_segment *var); static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); @@ -2758,18 +2760,23 @@ static __exit void hardware_unsetup(void) free_kvm_area(); } -static void fix_pmode_dataseg(struct kvm_vcpu *vcpu, int seg, struct kvm_segment *save) +static void fix_pmode_dataseg(struct kvm_vcpu *vcpu, int seg, + struct kvm_segment *save) { - const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; - struct kvm_segment tmp = *save; - - if (!(vmcs_readl(sf->base) == tmp.base && tmp.s)) { - tmp.base = vmcs_readl(sf->base); - tmp.selector = vmcs_read16(sf->selector); - tmp.dpl = tmp.selector & SELECTOR_RPL_MASK; - tmp.s = 1; + if (!emulate_invalid_guest_state) { + /* + * CS and SS RPL should be equal during guest entry according + * to VMX spec, but in reality it is not always so. Since vcpu + * is in the middle of the transition from real mode to + * protected mode it is safe to assume that RPL 0 is a good + * default value. + */ + if (seg == VCPU_SREG_CS || seg == VCPU_SREG_SS) + save->selector &= ~SELECTOR_RPL_MASK; + save->dpl = save->selector & SELECTOR_RPL_MASK; + save->s = 1; } - vmx_set_segment(vcpu, &tmp, seg); + vmx_set_segment(vcpu, save, seg); } static void enter_pmode(struct kvm_vcpu *vcpu) @@ -2777,6 +2784,17 @@ static void enter_pmode(struct kvm_vcpu *vcpu) unsigned long flags; struct vcpu_vmx *vmx = to_vmx(vcpu); + /* + * Update real mode segment cache. It may be not up-to-date if sement + * register was written while vcpu was in a guest mode. + */ + vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES); + vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS); + vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS); + vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS); + vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS); + vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS); + vmx->emulation_required = 1; vmx->rmode.vm86_active = 0; @@ -2794,22 +2812,12 @@ static void enter_pmode(struct kvm_vcpu *vcpu) update_exception_bitmap(vcpu); - if (emulate_invalid_guest_state) - return; - + fix_pmode_dataseg(vcpu, VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]); + fix_pmode_dataseg(vcpu, VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]); fix_pmode_dataseg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]); fix_pmode_dataseg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]); fix_pmode_dataseg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]); fix_pmode_dataseg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]); - - vmx_segment_cache_clear(vmx); - - vmcs_write16(GUEST_SS_SELECTOR, 0); - vmcs_write32(GUEST_SS_AR_BYTES, 0x93); - - vmcs_write16(GUEST_CS_SELECTOR, - vmcs_read16(GUEST_CS_SELECTOR) & ~SELECTOR_RPL_MASK); - vmcs_write32(GUEST_CS_AR_BYTES, 0x9b); } static gva_t rmode_tss_base(struct kvm *kvm) @@ -2831,22 +2839,40 @@ static gva_t rmode_tss_base(struct kvm *kvm) static void fix_rmode_seg(int seg, struct kvm_segment *save) { const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; + struct kvm_segment var = *save; - vmcs_write16(sf->selector, save->base >> 4); - vmcs_write32(sf->base, save->base & 0xffff0); - vmcs_write32(sf->limit, 0xffff); - vmcs_write32(sf->ar_bytes, 0xf3); - if (save->base & 0xf) - printk_once(KERN_WARNING "kvm: segment base is not paragraph" - " aligned when entering protected mode (seg=%d)", - seg); + var.dpl = 0x3; + if (seg == VCPU_SREG_CS) + var.type = 0x3; + + if (!emulate_invalid_guest_state) { + var.selector = var.base >> 4; + var.base = var.base & 0xffff0; + var.limit = 0xffff; + var.g = 0; + var.db = 0; + var.present = 1; + var.s = 1; + var.l = 0; + var.unusable = 0; + var.type = 0x3; + var.avl = 0; + if (save->base & 0xf) + printk_once(KERN_WARNING "kvm: segment base is not " + "paragraph aligned when entering " + "protected mode (seg=%d)", seg); + } + + vmcs_write16(sf->selector, var.selector); + vmcs_write32(sf->base, var.base); + vmcs_write32(sf->limit, var.limit); + vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(&var)); } static void enter_rmode(struct kvm_vcpu *vcpu) { unsigned long flags; struct vcpu_vmx *vmx = to_vmx(vcpu); - struct kvm_segment var; if (enable_unrestricted_guest) return; @@ -2862,7 +2888,6 @@ static void enter_rmode(struct kvm_vcpu *vcpu) vmx->emulation_required = 1; vmx->rmode.vm86_active = 1; - /* * Very old userspace does not call KVM_SET_TSS_ADDR before entering * vcpu. Call it here with phys address pointing 16M below 4G. @@ -2890,28 +2915,13 @@ static void enter_rmode(struct kvm_vcpu *vcpu) vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME); update_exception_bitmap(vcpu); - if (emulate_invalid_guest_state) - goto continue_rmode; - - vmx_get_segment(vcpu, &var, VCPU_SREG_SS); - vmx_set_segment(vcpu, &var, VCPU_SREG_SS); + fix_rmode_seg(VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]); + fix_rmode_seg(VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]); + fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]); + fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]); + fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]); + fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]); - vmx_get_segment(vcpu, &var, VCPU_SREG_CS); - vmx_set_segment(vcpu, &var, VCPU_SREG_CS); - - vmx_get_segment(vcpu, &var, VCPU_SREG_ES); - vmx_set_segment(vcpu, &var, VCPU_SREG_ES); - - vmx_get_segment(vcpu, &var, VCPU_SREG_DS); - vmx_set_segment(vcpu, &var, VCPU_SREG_DS); - - vmx_get_segment(vcpu, &var, VCPU_SREG_GS); - vmx_set_segment(vcpu, &var, VCPU_SREG_GS); - - vmx_get_segment(vcpu, &var, VCPU_SREG_FS); - vmx_set_segment(vcpu, &var, VCPU_SREG_FS); - -continue_rmode: kvm_mmu_reset_context(vcpu); } @@ -3278,7 +3288,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, vmcs_write16(sf->selector, var->selector); else if (var->s) fix_rmode_seg(seg, &vmx->rmode.segs[seg]); - return; + goto out; } vmcs_writel(sf->base, var->base); @@ -3300,6 +3310,10 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, var->type |= 0x1; /* Accessed */ vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var)); + +out: + if (!vmx->emulation_required) + vmx->emulation_required = !guest_state_valid(vcpu); } static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) -- cgit v1.2.3-18-g5258 From d54d07b2ca19a2908aa89e0c67715ca2e8e62a4c Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 20 Dec 2012 16:57:46 +0200 Subject: KVM: VMX: Do not fix segment register during vcpu initialization. Segment registers will be fixed according to current emulation policy during switching to real mode for the first time. Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 9dff310a2e9..a101dd488f2 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3621,12 +3621,9 @@ static void seg_setup(int seg) vmcs_write16(sf->selector, 0); vmcs_writel(sf->base, 0); vmcs_write32(sf->limit, 0xffff); - if (enable_unrestricted_guest) { - ar = 0x93; - if (seg == VCPU_SREG_CS) - ar |= 0x08; /* code segment */ - } else - ar = 0xf3; + ar = 0x93; + if (seg == VCPU_SREG_CS) + ar |= 0x08; /* code segment */ vmcs_write32(sf->ar_bytes, ar); } @@ -3967,14 +3964,9 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmx_segment_cache_clear(vmx); seg_setup(VCPU_SREG_CS); - /* - * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode - * insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4. Sigh. - */ - if (kvm_vcpu_is_bsp(&vmx->vcpu)) { + if (kvm_vcpu_is_bsp(&vmx->vcpu)) vmcs_write16(GUEST_CS_SELECTOR, 0xf000); - vmcs_writel(GUEST_CS_BASE, 0x000f0000); - } else { + else { vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.arch.sipi_vector << 8); vmcs_writel(GUEST_CS_BASE, vmx->vcpu.arch.sipi_vector << 12); } -- cgit v1.2.3-18-g5258 From 0ca1b4f4ba3a9f75bb099ccaf6c4bd8bb6db7a74 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 20 Dec 2012 16:57:47 +0200 Subject: KVM: VMX: handle IO when emulation is due to #GP in real mode. With emulate_invalid_guest_state=0 if a vcpu is in real mode VMX can enter the vcpu with smaller segment limit than guest configured. If the guest tries to access pass this limit it will get #GP at which point instruction will be emulated with correct segment limit applied. If during the emulation IO is detected it is not handled correctly. Vcpu thread should exit to userspace to serve the IO, but it returns to the guest instead. Since emulation is not completed till userspace completes the IO the faulty instruction is re-executed ad infinitum. The patch fixes that by exiting to userspace if IO happens during instruction emulation. Reported-by: Alex Williamson Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 75 +++++++++++++++++++++++++++++------------------------- 1 file changed, 41 insertions(+), 34 deletions(-) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a101dd488f2..55dfc375f1a 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4230,28 +4230,9 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) return 0; } -static int handle_rmode_exception(struct kvm_vcpu *vcpu, - int vec, u32 err_code) +static bool rmode_exception(struct kvm_vcpu *vcpu, int vec) { - /* - * Instruction with address size override prefix opcode 0x67 - * Cause the #SS fault with 0 error code in VM86 mode. - */ - if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) - if (emulate_instruction(vcpu, 0) == EMULATE_DONE) - return 1; - /* - * Forward all other exceptions that are valid in real mode. - * FIXME: Breaks guest debugging in real mode, needs to be fixed with - * the required debugging infrastructure rework. - */ switch (vec) { - case DB_VECTOR: - if (vcpu->guest_debug & - (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) - return 0; - kvm_queue_exception(vcpu, vec); - return 1; case BP_VECTOR: /* * Update instruction length as we may reinject the exception @@ -4260,7 +4241,12 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, to_vmx(vcpu)->vcpu.arch.event_exit_inst_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) - return 0; + return false; + /* fall through */ + case DB_VECTOR: + if (vcpu->guest_debug & + (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) + return false; /* fall through */ case DE_VECTOR: case OF_VECTOR: @@ -4270,10 +4256,37 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, case SS_VECTOR: case GP_VECTOR: case MF_VECTOR: - kvm_queue_exception(vcpu, vec); - return 1; + return true; + break; } - return 0; + return false; +} + +static int handle_rmode_exception(struct kvm_vcpu *vcpu, + int vec, u32 err_code) +{ + /* + * Instruction with address size override prefix opcode 0x67 + * Cause the #SS fault with 0 error code in VM86 mode. + */ + if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) { + if (emulate_instruction(vcpu, 0) == EMULATE_DONE) { + if (vcpu->arch.halt_request) { + vcpu->arch.halt_request = 0; + return kvm_emulate_halt(vcpu); + } + return 1; + } + return 0; + } + + /* + * Forward all other exceptions that are valid in real mode. + * FIXME: Breaks guest debugging in real mode, needs to be fixed with + * the required debugging infrastructure rework. + */ + kvm_queue_exception(vcpu, vec); + return 1; } /* @@ -4361,17 +4374,11 @@ static int handle_exception(struct kvm_vcpu *vcpu) return kvm_mmu_page_fault(vcpu, cr2, error_code, NULL, 0); } - if (vmx->rmode.vm86_active && - handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK, - error_code)) { - if (vcpu->arch.halt_request) { - vcpu->arch.halt_request = 0; - return kvm_emulate_halt(vcpu); - } - return 1; - } - ex_no = intr_info & INTR_INFO_VECTOR_MASK; + + if (vmx->rmode.vm86_active && rmode_exception(vcpu, ex_no)) + return handle_rmode_exception(vcpu, ex_no, error_code); + switch (ex_no) { case DB_VECTOR: dr6 = vmcs_readl(EXIT_QUALIFICATION); -- cgit v1.2.3-18-g5258 From ee04e0cea8b170ae9c8542162091c716de36666b Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Tue, 25 Dec 2012 14:34:06 +0200 Subject: KVM: mmu: remove unused trace event trace_kvm_mmu_delay_free_pages() is no longer used. Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmutrace.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index cd6e98333ba..b8f6172f417 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h @@ -195,12 +195,6 @@ DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_prepare_zap_page, TP_ARGS(sp) ); -DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_delay_free_pages, - TP_PROTO(struct kvm_mmu_page *sp), - - TP_ARGS(sp) -); - TRACE_EVENT( mark_mmio_spte, TP_PROTO(u64 *sptep, gfn_t gfn, unsigned access), -- cgit v1.2.3-18-g5258 From 908e7d7999bcce70ac52e7f390a8f5cbc55948de Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 27 Dec 2012 14:44:58 +0200 Subject: KVM: MMU: simplify folding of dirty bit into accessed_dirty MMU code tries to avoid if()s HW is not able to predict reliably by using bitwise operation to streamline code execution, but in case of a dirty bit folding this gives us nothing since write_fault is checked right before the folding code. Lets just piggyback onto the if() to make code more clear. Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/paging_tmpl.h | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 891eb6d93b8..a7b24cf59a3 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -249,16 +249,12 @@ retry_walk: if (!write_fault) protect_clean_gpte(&pte_access, pte); - - /* - * On a write fault, fold the dirty bit into accessed_dirty by shifting it one - * place right. - * - * On a read fault, do nothing. - */ - shift = write_fault >> ilog2(PFERR_WRITE_MASK); - shift *= PT_DIRTY_SHIFT - PT_ACCESSED_SHIFT; - accessed_dirty &= pte >> shift; + else + /* + * On a write fault, fold the dirty bit into accessed_dirty by + * shifting it one place right. + */ + accessed_dirty &= pte >> (PT_DIRTY_SHIFT - PT_ACCESSED_SHIFT); if (unlikely(!accessed_dirty)) { ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault); -- cgit v1.2.3-18-g5258 From b0cfeb5dedf9109b8a8bd594abe60791b135a643 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Tue, 8 Jan 2013 10:49:00 +0200 Subject: KVM: x86: remove unused variable from walk_addr_generic() Fix compilation warning. Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/paging_tmpl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index a7b24cf59a3..2ad76b98111 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -151,7 +151,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, pt_element_t pte; pt_element_t __user *uninitialized_var(ptep_user); gfn_t table_gfn; - unsigned index, pt_access, pte_access, accessed_dirty, shift; + unsigned index, pt_access, pte_access, accessed_dirty; gpa_t pte_gpa; int offset; const int write_fault = access & PFERR_WRITE_MASK; -- cgit v1.2.3-18-g5258 From b09408d00fd82be80289a329dd94d1a0d6b77dc2 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 7 Jan 2013 19:27:06 -0200 Subject: KVM: VMX: fix incorrect cached cpl value with real/v8086 modes CPL is always 0 when in real mode, and always 3 when virtual 8086 mode. Using values other than those can cause failures on operations that check CPL. Reviewed-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 55dfc375f1a..dd2a85c1c6f 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1696,7 +1696,6 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) { __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail); - __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); to_vmx(vcpu)->rflags = rflags; if (to_vmx(vcpu)->rmode.vm86_active) { to_vmx(vcpu)->rmode.save_rflags = rflags; @@ -3110,7 +3109,6 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) vmcs_writel(CR0_READ_SHADOW, cr0); vmcs_writel(GUEST_CR0, hw_cr0); vcpu->arch.cr0 = cr0; - __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); } static u64 construct_eptp(unsigned long root_hpa) @@ -3220,8 +3218,10 @@ static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) return vmx_read_guest_seg_base(to_vmx(vcpu), seg); } -static int __vmx_get_cpl(struct kvm_vcpu *vcpu) +static int vmx_get_cpl(struct kvm_vcpu *vcpu) { + struct vcpu_vmx *vmx = to_vmx(vcpu); + if (!is_protmode(vcpu)) return 0; @@ -3229,13 +3229,6 @@ static int __vmx_get_cpl(struct kvm_vcpu *vcpu) && (kvm_get_rflags(vcpu) & X86_EFLAGS_VM)) /* if virtual 8086 */ return 3; - return vmx_read_guest_seg_selector(to_vmx(vcpu), VCPU_SREG_CS) & 3; -} - -static int vmx_get_cpl(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - /* * If we enter real mode with cs.sel & 3 != 0, the normal CPL calculations * fail; use the cache instead. @@ -3246,7 +3239,7 @@ static int vmx_get_cpl(struct kvm_vcpu *vcpu) if (!test_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail)) { __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); - vmx->cpl = __vmx_get_cpl(vcpu); + vmx->cpl = vmx_read_guest_seg_selector(vmx, VCPU_SREG_CS) & 3; } return vmx->cpl; -- cgit v1.2.3-18-g5258 From e28bbd44dad134046ef9463cbb8c1cf81f53de5e Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Fri, 4 Jan 2013 16:18:48 +0200 Subject: KVM: x86 emulator: framework for streamlining arithmetic opcodes We emulate arithmetic opcodes by executing a "similar" (same operation, different operands) on the cpu. This ensures accurate emulation, esp. wrt. eflags. However, the prologue and epilogue around the opcode is fairly long, consisting of a switch (for the operand size) and code to load and save the operands. This is repeated for every opcode. This patch introduces an alternative way to emulate arithmetic opcodes. Instead of the above, we have four (three on i386) functions consisting of just the opcode and a ret; one for each operand size. For example: .align 8 em_notb: not %al ret .align 8 em_notw: not %ax ret .align 8 em_notl: not %eax ret .align 8 em_notq: not %rax ret The prologue and epilogue are shared across all opcodes. Note the functions use a special calling convention; notably eflags is an input/output parameter and is not clobbered. Rather than dispatching the four functions through a jump table, the functions are declared as a constant size (8) so their address can be calculated. Acked-by: Gleb Natapov Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/emulate.c | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 53c5ad6851d..dd71567d7c7 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -149,6 +149,7 @@ #define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */ #define Unaligned ((u64)1 << 42) /* Explicitly unaligned (e.g. MOVDQU) */ #define Avx ((u64)1 << 43) /* Advanced Vector Extensions */ +#define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */ #define X2(x...) x, x #define X3(x...) X2(x), x @@ -159,6 +160,27 @@ #define X8(x...) X4(x), X4(x) #define X16(x...) X8(x), X8(x) +#define NR_FASTOP (ilog2(sizeof(ulong)) + 1) +#define FASTOP_SIZE 8 + +/* + * fastop functions have a special calling convention: + * + * dst: [rdx]:rax (in/out) + * src: rbx (in/out) + * src2: rcx (in) + * flags: rflags (in/out) + * + * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for + * different operand sizes can be reached by calculation, rather than a jump + * table (which would be bigger than the code). + * + * fastop functions are declared as taking a never-defined fastop parameter, + * so they can't be called from C directly. + */ + +struct fastop; + struct opcode { u64 flags : 56; u64 intercept : 8; @@ -168,6 +190,7 @@ struct opcode { const struct group_dual *gdual; const struct gprefix *gprefix; const struct escape *esc; + void (*fastop)(struct fastop *fake); } u; int (*check_perm)(struct x86_emulate_ctxt *ctxt); }; @@ -3646,6 +3669,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) } #define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) } #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } +#define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) } #define II(_f, _e, _i) \ { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i } #define IIP(_f, _e, _i, _p) \ @@ -4502,6 +4526,16 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt, read_mmx_reg(ctxt, &op->mm_val, op->addr.mm); } +static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)) +{ + ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF; + fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE; + asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n" + : "+a"(ctxt->dst.val), "+b"(ctxt->src.val), [flags]"+D"(flags) + : "c"(ctxt->src2.val), [fastop]"S"(fop)); + ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); + return X86EMUL_CONTINUE; +} int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) { @@ -4631,6 +4665,13 @@ special_insn: } if (ctxt->execute) { + if (ctxt->d & Fastop) { + void (*fop)(struct fastop *) = (void *)ctxt->execute; + rc = fastop(ctxt, fop); + if (rc != X86EMUL_CONTINUE) + goto done; + goto writeback; + } rc = ctxt->execute(ctxt); if (rc != X86EMUL_CONTINUE) goto done; -- cgit v1.2.3-18-g5258 From b7d491e7f065b5b957a27a65c9e7cd3ef96b2736 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Fri, 4 Jan 2013 16:18:49 +0200 Subject: KVM: x86 emulator: Support for declaring single operand fastops Acked-by: Gleb Natapov Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/emulate.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index dd71567d7c7..42c53c8071b 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -24,6 +24,7 @@ #include "kvm_cache_regs.h" #include #include +#include #include "x86.h" #include "tss.h" @@ -439,6 +440,30 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt) } \ } while (0) +#define FOP_ALIGN ".align " __stringify(FASTOP_SIZE) " \n\t" +#define FOP_RET "ret \n\t" + +#define FOP_START(op) \ + extern void em_##op(struct fastop *fake); \ + asm(".pushsection .text, \"ax\" \n\t" \ + ".global em_" #op " \n\t" \ + FOP_ALIGN \ + "em_" #op ": \n\t" + +#define FOP_END \ + ".popsection") + +#define FOP1E(op, dst) \ + FOP_ALIGN #op " %" #dst " \n\t" FOP_RET + +#define FASTOP1(op) \ + FOP_START(op) \ + FOP1E(op##b, al) \ + FOP1E(op##w, ax) \ + FOP1E(op##l, eax) \ + ON64(FOP1E(op##q, rax)) \ + FOP_END + #define __emulate_1op_rax_rdx(ctxt, _op, _suffix, _ex) \ do { \ unsigned long _tmp; \ -- cgit v1.2.3-18-g5258 From b6744dc3fb55fda7cfcb53beecfa056f02415e6d Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Fri, 4 Jan 2013 16:18:50 +0200 Subject: KVM: x86 emulator: introduce NoWrite flag Instead of disabling writeback via OP_NONE, just specify NoWrite. Acked-by: Gleb Natapov Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/emulate.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 42c53c8071b..fe113fbb373 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -151,6 +151,7 @@ #define Unaligned ((u64)1 << 42) /* Explicitly unaligned (e.g. MOVDQU) */ #define Avx ((u64)1 << 43) /* Advanced Vector Extensions */ #define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */ +#define NoWrite ((u64)1 << 45) /* No writeback */ #define X2(x...) x, x #define X3(x...) X2(x), x @@ -1633,6 +1634,9 @@ static int writeback(struct x86_emulate_ctxt *ctxt) { int rc; + if (ctxt->d & NoWrite) + return X86EMUL_CONTINUE; + switch (ctxt->dst.type) { case OP_REG: write_register_operand(&ctxt->dst); -- cgit v1.2.3-18-g5258 From 75f728456f368140e6b34b6a79f3408774076325 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Fri, 4 Jan 2013 16:18:51 +0200 Subject: KVM: x86 emulator: mark CMP, CMPS, SCAS, TEST as NoWrite Acked-by: Gleb Natapov Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/emulate.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index fe113fbb373..2af0c447560 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3069,16 +3069,12 @@ static int em_xor(struct x86_emulate_ctxt *ctxt) static int em_cmp(struct x86_emulate_ctxt *ctxt) { emulate_2op_SrcV(ctxt, "cmp"); - /* Disable writeback. */ - ctxt->dst.type = OP_NONE; return X86EMUL_CONTINUE; } static int em_test(struct x86_emulate_ctxt *ctxt) { emulate_2op_SrcV(ctxt, "test"); - /* Disable writeback. */ - ctxt->dst.type = OP_NONE; return X86EMUL_CONTINUE; } @@ -3747,7 +3743,7 @@ static const struct opcode group1[] = { I(Lock | PageTable, em_and), I(Lock, em_sub), I(Lock, em_xor), - I(0, em_cmp), + I(NoWrite, em_cmp), }; static const struct opcode group1A[] = { @@ -3755,8 +3751,8 @@ static const struct opcode group1A[] = { }; static const struct opcode group3[] = { - I(DstMem | SrcImm, em_test), - I(DstMem | SrcImm, em_test), + I(DstMem | SrcImm | NoWrite, em_test), + I(DstMem | SrcImm | NoWrite, em_test), I(DstMem | SrcNone | Lock, em_not), I(DstMem | SrcNone | Lock, em_neg), I(SrcMem, em_mul_ex), @@ -3920,7 +3916,7 @@ static const struct opcode opcode_table[256] = { /* 0x30 - 0x37 */ I6ALU(Lock, em_xor), N, N, /* 0x38 - 0x3F */ - I6ALU(0, em_cmp), N, N, + I6ALU(NoWrite, em_cmp), N, N, /* 0x40 - 0x4F */ X16(D(DstReg)), /* 0x50 - 0x57 */ @@ -3946,7 +3942,7 @@ static const struct opcode opcode_table[256] = { G(DstMem | SrcImm, group1), G(ByteOp | DstMem | SrcImm | No64, group1), G(DstMem | SrcImmByte, group1), - I2bv(DstMem | SrcReg | ModRM, em_test), + I2bv(DstMem | SrcReg | ModRM | NoWrite, em_test), I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg), /* 0x88 - 0x8F */ I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov), @@ -3966,12 +3962,12 @@ static const struct opcode opcode_table[256] = { I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov), I2bv(SrcSI | DstDI | Mov | String, em_mov), - I2bv(SrcSI | DstDI | String, em_cmp), + I2bv(SrcSI | DstDI | String | NoWrite, em_cmp), /* 0xA8 - 0xAF */ - I2bv(DstAcc | SrcImm, em_test), + I2bv(DstAcc | SrcImm | NoWrite, em_test), I2bv(SrcAcc | DstDI | Mov | String, em_mov), I2bv(SrcSI | DstAcc | Mov | String, em_mov), - I2bv(SrcAcc | DstDI | String, em_cmp), + I2bv(SrcAcc | DstDI | String | NoWrite, em_cmp), /* 0xB0 - 0xB7 */ X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)), /* 0xB8 - 0xBF */ -- cgit v1.2.3-18-g5258 From 45a1467d7edff741d97a8be28342440ee65aa03c Mon Sep 17 00:00:00 2001 From: Avi Kivity