aboutsummaryrefslogtreecommitdiff
path: root/drivers/kvm/vmx.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/kvm/vmx.c')
-rw-r--r--drivers/kvm/vmx.c2002
1 files changed, 2002 insertions, 0 deletions
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
new file mode 100644
index 00000000000..bda7a7ae216
--- /dev/null
+++ b/drivers/kvm/vmx.c
@@ -0,0 +1,2002 @@
+/*
+ * Kernel-based Virtual Machine driver for Linux
+ *
+ * This module enables machines with Intel VT-x extensions to run virtual
+ * machines without emulation or binary translation.
+ *
+ * Copyright (C) 2006 Qumranet, Inc.
+ *
+ * Authors:
+ * Avi Kivity <avi@qumranet.com>
+ * Yaniv Kamay <yaniv@qumranet.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "kvm.h"
+#include "vmx.h"
+#include "kvm_vmx.h"
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <asm/io.h>
+
+#include "segment_descriptor.h"
+
+#define MSR_IA32_FEATURE_CONTROL 0x03a
+
+MODULE_AUTHOR("Qumranet");
+MODULE_LICENSE("GPL");
+
+static DEFINE_PER_CPU(struct vmcs *, vmxarea);
+static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
+
+#ifdef __x86_64__
+#define HOST_IS_64 1
+#else
+#define HOST_IS_64 0
+#endif
+
+static struct vmcs_descriptor {
+ int size;
+ int order;
+ u32 revision_id;
+} vmcs_descriptor;
+
+#define VMX_SEGMENT_FIELD(seg) \
+ [VCPU_SREG_##seg] = { \
+ .selector = GUEST_##seg##_SELECTOR, \
+ .base = GUEST_##seg##_BASE, \
+ .limit = GUEST_##seg##_LIMIT, \
+ .ar_bytes = GUEST_##seg##_AR_BYTES, \
+ }
+
+static struct kvm_vmx_segment_field {
+ unsigned selector;
+ unsigned base;
+ unsigned limit;
+ unsigned ar_bytes;
+} kvm_vmx_segment_fields[] = {
+ VMX_SEGMENT_FIELD(CS),
+ VMX_SEGMENT_FIELD(DS),
+ VMX_SEGMENT_FIELD(ES),
+ VMX_SEGMENT_FIELD(FS),
+ VMX_SEGMENT_FIELD(GS),
+ VMX_SEGMENT_FIELD(SS),
+ VMX_SEGMENT_FIELD(TR),
+ VMX_SEGMENT_FIELD(LDTR),
+};
+
+static const u32 vmx_msr_index[] = {
+#ifdef __x86_64__
+ MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, MSR_KERNEL_GS_BASE,
+#endif
+ MSR_EFER, MSR_K6_STAR,
+};
+#define NR_VMX_MSR (sizeof(vmx_msr_index) / sizeof(*vmx_msr_index))
+
+struct vmx_msr_entry *find_msr_entry(struct kvm_vcpu *vcpu, u32 msr);
+
+static inline int is_page_fault(u32 intr_info)
+{
+ return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
+ INTR_INFO_VALID_MASK)) ==
+ (INTR_TYPE_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK);
+}
+
+static inline int is_external_interrupt(u32 intr_info)
+{
+ return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
+ == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
+}
+
+static void vmcs_clear(struct vmcs *vmcs)
+{
+ u64 phys_addr = __pa(vmcs);
+ u8 error;
+
+ asm volatile (ASM_VMX_VMCLEAR_RAX "; setna %0"
+ : "=g"(error) : "a"(&phys_addr), "m"(phys_addr)
+ : "cc", "memory");
+ if (error)
+ printk(KERN_ERR "kvm: vmclear fail: %p/%llx\n",
+ vmcs, phys_addr);
+}
+
+static void __vcpu_clear(void *arg)
+{
+ struct kvm_vcpu *vcpu = arg;
+ int cpu = smp_processor_id();
+
+ if (vcpu->cpu == cpu)
+ vmcs_clear(vcpu->vmcs);
+ if (per_cpu(current_vmcs, cpu) == vcpu->vmcs)
+ per_cpu(current_vmcs, cpu) = NULL;
+}
+
+static unsigned long vmcs_readl(unsigned long field)
+{
+ unsigned long value;
+
+ asm volatile (ASM_VMX_VMREAD_RDX_RAX
+ : "=a"(value) : "d"(field) : "cc");
+ return value;
+}
+
+static u16 vmcs_read16(unsigned long field)
+{
+ return vmcs_readl(field);
+}
+
+static u32 vmcs_read32(unsigned long field)
+{
+ return vmcs_readl(field);
+}
+
+static u64 vmcs_read64(unsigned long field)
+{
+#ifdef __x86_64__
+ return vmcs_readl(field);
+#else
+ return vmcs_readl(field) | ((u64)vmcs_readl(field+1) << 32);
+#endif
+}
+
+static void vmcs_writel(unsigned long field, unsigned long value)
+{
+ u8 error;
+
+ asm volatile (ASM_VMX_VMWRITE_RAX_RDX "; setna %0"
+ : "=q"(error) : "a"(value), "d"(field) : "cc" );
+ if (error)
+ printk(KERN_ERR "vmwrite error: reg %lx value %lx (err %d)\n",
+ field, value, vmcs_read32(VM_INSTRUCTION_ERROR));
+}
+
+static void vmcs_write16(unsigned long field, u16 value)
+{
+ vmcs_writel(field, value);
+}
+
+static void vmcs_write32(unsigned long field, u32 value)
+{
+ vmcs_writel(field, value);
+}
+
+static void vmcs_write64(unsigned long field, u64 value)
+{
+#ifdef __x86_64__
+ vmcs_writel(field, value);
+#else
+ vmcs_writel(field, value);
+ asm volatile ("");
+ vmcs_writel(field+1, value >> 32);
+#endif
+}
+
+/*
+ * Switches to specified vcpu, until a matching vcpu_put(), but assumes
+ * vcpu mutex is already taken.
+ */
+static struct kvm_vcpu *vmx_vcpu_load(struct kvm_vcpu *vcpu)
+{
+ u64 phys_addr = __pa(vcpu->vmcs);
+ int cpu;
+
+ cpu = get_cpu();
+
+ if (vcpu->cpu != cpu) {
+ smp_call_function(__vcpu_clear, vcpu, 0, 1);
+ vcpu->launched = 0;
+ }
+
+ if (per_cpu(current_vmcs, cpu) != vcpu->vmcs) {
+ u8 error;
+
+ per_cpu(current_vmcs, cpu) = vcpu->vmcs;
+ asm volatile (ASM_VMX_VMPTRLD_RAX "; setna %0"
+ : "=g"(error) : "a"(&phys_addr), "m"(phys_addr)
+ : "cc");
+ if (error)
+ printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n",
+ vcpu->vmcs, phys_addr);
+ }
+
+ if (vcpu->cpu != cpu) {
+ struct descriptor_table dt;
+ unsigned long sysenter_esp;
+
+ vcpu->cpu = cpu;
+ /*
+ * Linux uses per-cpu TSS and GDT, so set these when switching
+ * processors.
+ */
+ vmcs_writel(HOST_TR_BASE, read_tr_base()); /* 22.2.4 */
+ get_gdt(&dt);
+ vmcs_writel(HOST_GDTR_BASE, dt.base); /* 22.2.4 */
+
+ rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
+ vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
+ }
+ return vcpu;
+}
+
+static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
+{
+ put_cpu();
+}
+
+static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
+{
+ return vmcs_readl(GUEST_RFLAGS);
+}
+
+static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
+{
+ vmcs_writel(GUEST_RFLAGS, rflags);
+}
+
+static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
+{
+ unsigned long rip;
+ u32 interruptibility;
+
+ rip = vmcs_readl(GUEST_RIP);
+ rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
+ vmcs_writel(GUEST_RIP, rip);
+
+ /*
+ * We emulated an instruction, so temporary interrupt blocking
+ * should be removed, if set.
+ */
+ interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
+ if (interruptibility & 3)
+ vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
+ interruptibility & ~3);
+}
+
+static void vmx_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code)
+{
+ printk(KERN_DEBUG "inject_general_protection: rip 0x%lx\n",
+ vmcs_readl(GUEST_RIP));
+ vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
+ vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
+ GP_VECTOR |
+ INTR_TYPE_EXCEPTION |
+ INTR_INFO_DELIEVER_CODE_MASK |
+ INTR_INFO_VALID_MASK);
+}
+
+/*
+ * reads and returns guest's timestamp counter "register"
+ * guest_tsc = host_tsc + tsc_offset -- 21.3
+ */
+static u64 guest_read_tsc(void)
+{
+ u64 host_tsc, tsc_offset;
+
+ rdtscll(host_tsc);
+ tsc_offset = vmcs_read64(TSC_OFFSET);
+ return host_tsc + tsc_offset;
+}
+
+/*
+ * writes 'guest_tsc' into guest's timestamp counter "register"
+ * guest_tsc = host_tsc + tsc_offset ==> tsc_offset = guest_tsc - host_tsc
+ */
+static void guest_write_tsc(u64 guest_tsc)
+{
+ u64 host_tsc;
+
+ rdtscll(host_tsc);
+ vmcs_write64(TSC_OFFSET, guest_tsc - host_tsc);
+}
+
+static void reload_tss(void)
+{
+#ifndef __x86_64__
+
+ /*
+ * VT restores TR but not its size. Useless.
+ */
+ struct descriptor_table gdt;
+ struct segment_descriptor *descs;
+
+ get_gdt(&gdt);
+ descs = (void *)gdt.base;
+ descs[GDT_ENTRY_TSS].type = 9; /* available TSS */
+ load_TR_desc();
+#endif
+}
+
+/*
+ * Reads an msr value (of 'msr_index') into 'pdata'.
+ * Returns 0 on success, non-0 otherwise.
+ * Assumes vcpu_load() was already called.
+ */
+static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
+{
+ u64 data;
+ struct vmx_msr_entry *msr;
+
+ if (!pdata) {
+ printk(KERN_ERR "BUG: get_msr called with NULL pdata\n");
+ return -EINVAL;
+ }
+
+ switch (msr_index) {
+#ifdef __x86_64__
+ case MSR_FS_BASE:
+ data = vmcs_readl(GUEST_FS_BASE);
+ break;
+ case MSR_GS_BASE:
+ data = vmcs_readl(GUEST_GS_BASE);
+ break;
+ case MSR_EFER:
+ data = vcpu->shadow_efer;
+ break;
+#endif
+ case MSR_IA32_TIME_STAMP_COUNTER:
+ data = guest_read_tsc();
+ break;
+ case MSR_IA32_SYSENTER_CS:
+ data = vmcs_read32(GUEST_SYSENTER_CS);
+ break;
+ case MSR_IA32_SYSENTER_EIP:
+ data = vmcs_read32(GUEST_SYSENTER_EIP);
+ break;
+ case MSR_IA32_SYSENTER_ESP:
+ data = vmcs_read32(GUEST_SYSENTER_ESP);
+ break;
+ case MSR_IA32_MC0_CTL:
+ case MSR_IA32_MCG_STATUS:
+ case MSR_IA32_MCG_CAP:
+ case MSR_IA32_MC0_MISC:
+ case MSR_IA32_MC0_MISC+4:
+ case MSR_IA32_MC0_MISC+8:
+ case MSR_IA32_MC0_MISC+12:
+ case MSR_IA32_MC0_MISC+16:
+ case MSR_IA32_UCODE_REV:
+ /* MTRR registers */
+ case 0xfe:
+ case 0x200 ... 0x2ff:
+ data = 0;
+ break;
+ case MSR_IA32_APICBASE:
+ data = vcpu->apic_base;
+ break;
+ default:
+ msr = find_msr_entry(vcpu, msr_index);
+ if (!msr) {
+ printk(KERN_ERR "kvm: unhandled rdmsr: %x\n", msr_index);
+ return 1;
+ }
+ data = msr->data;
+ break;
+ }
+
+ *pdata = data;
+ return 0;
+}
+
+/*
+ * Writes msr value into into the appropriate "register".
+ * Returns 0 on success, non-0 otherwise.
+ * Assumes vcpu_load() was already called.
+ */
+static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
+{
+ struct vmx_msr_entry *msr;
+ switch (msr_index) {
+#ifdef __x86_64__
+ case MSR_FS_BASE:
+ vmcs_writel(GUEST_FS_BASE, data);
+ break;
+ case MSR_GS_BASE:
+ vmcs_writel(GUEST_GS_BASE, data);
+ break;
+#endif
+ case MSR_IA32_SYSENTER_CS:
+ vmcs_write32(GUEST_SYSENTER_CS, data);
+ break;
+ case MSR_IA32_SYSENTER_EIP:
+ vmcs_write32(GUEST_SYSENTER_EIP, data);
+ break;
+ case MSR_IA32_SYSENTER_ESP:
+ vmcs_write32(GUEST_SYSENTER_ESP, data);
+ break;
+#ifdef __x86_64
+ case MSR_EFER:
+ set_efer(vcpu, data);
+ break;
+ case MSR_IA32_MC0_STATUS:
+ printk(KERN_WARNING "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n"
+ , __FUNCTION__, data);
+ break;
+#endif
+ case MSR_IA32_TIME_STAMP_COUNTER: {
+ guest_write_tsc(data);
+ break;
+ }
+ case MSR_IA32_UCODE_REV:
+ case MSR_IA32_UCODE_WRITE:
+ case 0x200 ... 0x2ff: /* MTRRs */
+ break;
+ case MSR_IA32_APICBASE:
+ vcpu->apic_base = data;
+ break;
+ default:
+ msr = find_msr_entry(vcpu, msr_index);
+ if (!msr) {
+ printk(KERN_ERR "kvm: unhandled wrmsr: 0x%x\n", msr_index);
+ return 1;
+ }
+ msr->data = data;
+ break;
+ }
+
+ return 0;
+}
+
+/*
+ * Sync the rsp and rip registers into the vcpu structure. This allows
+ * registers to be accessed by indexing vcpu->regs.
+ */
+static void vcpu_load_rsp_rip(struct kvm_vcpu *vcpu)
+{
+ vcpu->regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP);
+ vcpu->rip = vmcs_readl(GUEST_RIP);
+}
+
+/*
+ * Syncs rsp and rip back into the vmcs. Should be called after possible
+ * modification.
+ */
+static void vcpu_put_rsp_rip(struct kvm_vcpu *vcpu)
+{
+ vmcs_writel(GUEST_RSP, vcpu->regs[VCPU_REGS_RSP]);
+ vmcs_writel(GUEST_RIP, vcpu->rip);
+}
+
+static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg)
+{
+ unsigned long dr7 = 0x400;
+ u32 exception_bitmap;
+ int old_singlestep;
+
+ exception_bitmap = vmcs_read32(EXCEPTION_BITMAP);
+ old_singlestep = vcpu->guest_debug.singlestep;
+
+ vcpu->guest_debug.enabled = dbg->enabled;
+ if (vcpu->guest_debug.enabled) {
+ int i;
+
+ dr7 |= 0x200; /* exact */
+ for (i = 0; i < 4; ++i) {
+ if (!dbg->breakpoints[i].enabled)
+ continue;
+ vcpu->guest_debug.bp[i] = dbg->breakpoints[i].address;
+ dr7 |= 2 << (i*2); /* global enable */
+ dr7 |= 0 << (i*4+16); /* execution breakpoint */
+ }
+
+ exception_bitmap |= (1u << 1); /* Trap debug exceptions */
+
+ vcpu->guest_debug.singlestep = dbg->singlestep;
+ } else {
+ exception_bitmap &= ~(1u << 1); /* Ignore debug exceptions */
+ vcpu->guest_debug.singlestep = 0;
+ }
+
+ if (old_singlestep && !vcpu->guest_debug.singlestep) {
+ unsigned long flags;
+
+ flags = vmcs_readl(GUEST_RFLAGS);
+ flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
+ vmcs_writel(GUEST_RFLAGS, flags);
+ }
+
+ vmcs_write32(EXCEPTION_BITMAP, exception_bitmap);
+ vmcs_writel(GUEST_DR7, dr7);
+
+ return 0;
+}
+
+static __init int cpu_has_kvm_support(void)
+{
+ unsigned long ecx = cpuid_ecx(1);
+ return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */
+}
+
+static __init int vmx_disabled_by_bios(void)
+{
+ u64 msr;
+
+ rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
+ return (msr & 5) == 1; /* locked but not enabled */
+}
+
+static __init void hardware_enable(void *garbage)
+{
+ int cpu = raw_smp_processor_id();
+ u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
+ u64 old;
+
+ rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
+ if ((old & 5) == 0)
+ /* enable and lock */
+ wrmsrl(MSR_IA32_FEATURE_CONTROL, old | 5);
+ write_cr4(read_cr4() | CR4_VMXE); /* FIXME: not cpu hotplug safe */
+ asm volatile (ASM_VMX_VMXON_RAX : : "a"(&phys_addr), "m"(phys_addr)
+ : "memory", "cc");
+}
+
+static void hardware_disable(void *garbage)
+{
+ asm volatile (ASM_VMX_VMXOFF : : : "cc");
+}
+
+static __init void setup_vmcs_descriptor(void)
+{
+ u32 vmx_msr_low, vmx_msr_high;
+
+ rdmsr(MSR_IA32_VMX_BASIC_MSR, vmx_msr_low, vmx_msr_high);
+ vmcs_descriptor.size = vmx_msr_high & 0x1fff;
+ vmcs_descriptor.order = get_order(vmcs_descriptor.size);
+ vmcs_descriptor.revision_id = vmx_msr_low;
+};
+
+static struct vmcs *alloc_vmcs_cpu(int cpu)
+{
+ int node = cpu_to_node(cpu);
+ struct page *pages;
+ struct vmcs *vmcs;
+
+ pages = alloc_pages_node(node, GFP_KERNEL, vmcs_descriptor.order);
+ if (!pages)
+ return NULL;
+ vmcs = page_address(pages);
+ memset(vmcs, 0, vmcs_descriptor.size);
+ vmcs->revision_id = vmcs_descriptor.revision_id; /* vmcs revision id */
+ return vmcs;
+}
+
+static struct vmcs *alloc_vmcs(void)
+{
+ return alloc_vmcs_cpu(smp_processor_id());
+}
+
+static void free_vmcs(struct vmcs *vmcs)
+{
+ free_pages((unsigned long)vmcs, vmcs_descriptor.order);
+}
+
+static __exit void free_kvm_area(void)
+{
+ int cpu;
+
+ for_each_online_cpu(cpu)
+ free_vmcs(per_cpu(vmxarea, cpu));
+}
+
+extern struct vmcs *alloc_vmcs_cpu(int cpu);
+
+static __init int alloc_kvm_area(void)
+{
+ int cpu;
+
+ for_each_online_cpu(cpu) {
+ struct vmcs *vmcs;
+
+ vmcs = alloc_vmcs_cpu(cpu);
+ if (!vmcs) {
+ free_kvm_area();
+ return -ENOMEM;
+ }
+
+ per_cpu(vmxarea, cpu) = vmcs;
+ }
+ return 0;
+}
+
+static __init int hardware_setup(void)
+{
+ setup_vmcs_descriptor();
+ return alloc_kvm_area();
+}
+
+static __exit void hardware_unsetup(void)
+{
+ free_kvm_area();
+}
+
+static void update_exception_bitmap(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->rmode.active)
+ vmcs_write32(EXCEPTION_BITMAP, ~0);
+ else
+ vmcs_write32(EXCEPTION_BITMAP, 1 << PF_VECTOR);
+}
+
+static void fix_pmode_dataseg(int seg, struct kvm_save_segment *save)
+{
+ struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
+
+ if (vmcs_readl(sf->base) == save->base) {
+ vmcs_write16(sf->selector, save->selector);
+ vmcs_writel(sf->base, save->base);
+ vmcs_write32(sf->limit, save->limit);
+ vmcs_write32(sf->ar_bytes, save->ar);
+ } else {
+ u32 dpl = (vmcs_read16(sf->selector) & SELECTOR_RPL_MASK)
+ << AR_DPL_SHIFT;
+ vmcs_write32(sf->ar_bytes, 0x93 | dpl);
+ }
+}
+
+static void enter_pmode(struct kvm_vcpu *vcpu)
+{
+ unsigned long flags;
+
+ vcpu->rmode.active = 0;
+
+ vmcs_writel(GUEST_TR_BASE, vcpu->rmode.tr.base);
+ vmcs_write32(GUEST_TR_LIMIT, vcpu->rmode.tr.limit);
+ vmcs_write32(GUEST_TR_AR_BYTES, vcpu->rmode.tr.ar);
+
+ flags = vmcs_readl(GUEST_RFLAGS);
+ flags &= ~(IOPL_MASK | X86_EFLAGS_VM);
+ flags |= (vcpu->rmode.save_iopl << IOPL_SHIFT);
+ vmcs_writel(GUEST_RFLAGS, flags);
+
+ vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~CR4_VME_MASK) |
+ (vmcs_readl(CR4_READ_SHADOW) & CR4_VME_MASK));
+
+ update_exception_bitmap(vcpu);
+
+ fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->rmode.es);
+ fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->rmode.ds);
+ fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->rmode.gs);
+ fix_pmode_dataseg(VCPU_SREG_FS, &vcpu->rmode.fs);
+
+ vmcs_write16(GUEST_SS_SELECTOR, 0);
+ vmcs_write32(GUEST_SS_AR_BYTES, 0x93);
+
+ vmcs_write16(GUEST_CS_SELECTOR,
+ vmcs_read16(GUEST_CS_SELECTOR) & ~SELECTOR_RPL_MASK);
+ vmcs_write32(GUEST_CS_AR_BYTES, 0x9b);
+}
+
+static int rmode_tss_base(struct kvm* kvm)
+{
+ gfn_t base_gfn = kvm->memslots[0].base_gfn + kvm->memslots[0].npages - 3;
+ return base_gfn << PAGE_SHIFT;
+}
+
+static void fix_rmode_seg(int seg, struct kvm_save_segment *save)
+{
+ struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
+
+ save->selector = vmcs_read16(sf->selector);
+ save->base = vmcs_readl(sf->base);
+ save->limit = vmcs_read32(sf->limit);
+ save->ar = vmcs_read32(sf->ar_bytes);
+ vmcs_write16(sf->selector, vmcs_readl(sf->base) >> 4);
+ vmcs_write32(sf->limit, 0xffff);
+ vmcs_write32(sf->ar_bytes, 0xf3);
+}
+
+static void enter_rmode(struct kvm_vcpu *vcpu)
+{
+ unsigned long flags;
+
+ vcpu->rmode.active = 1;
+
+ vcpu->rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
+ vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm));
+
+ vcpu->rmode.tr.limit = vmcs_read32(GUEST_TR_LIMIT);
+ vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1);
+
+ vcpu->rmode.tr.ar = vmcs_read32(GUEST_TR_AR_BYTES);
+ vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
+
+ flags = vmcs_readl(GUEST_RFLAGS);
+ vcpu->rmode.save_iopl = (flags & IOPL_MASK) >> IOPL_SHIFT;
+
+ flags |= IOPL_MASK | X86_EFLAGS_VM;
+
+ vmcs_writel(GUEST_RFLAGS, flags);
+ vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | CR4_VME_MASK);
+ update_exception_bitmap(vcpu);
+
+ vmcs_write16(GUEST_SS_SELECTOR, vmcs_readl(GUEST_SS_BASE) >> 4);
+ vmcs_write32(GUEST_SS_LIMIT, 0xffff);
+ vmcs_write32(GUEST_SS_AR_BYTES, 0xf3);
+
+ vmcs_write32(GUEST_CS_AR_BYTES, 0xf3);
+ vmcs_write16(GUEST_CS_SELECTOR, vmcs_readl(GUEST_CS_BASE) >> 4);
+
+ fix_rmode_seg(VCPU_SREG_ES, &vcpu->rmode.es);
+ fix_rmode_seg(VCPU_SREG_DS, &vcpu->rmode.ds);
+ fix_rmode_seg(VCPU_SREG_GS, &vcpu->rmode.gs);
+ fix_rmode_seg(VCPU_SREG_FS, &vcpu->rmode.fs);
+}
+
+#ifdef __x86_64__
+
+static void enter_lmode(struct kvm_vcpu *vcpu)
+{
+ u32 guest_tr_ar;
+
+ guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES);
+ if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) {
+ printk(KERN_DEBUG "%s: tss fixup for long mode. \n",
+ __FUNCTION__);
+ vmcs_write32(GUEST_TR_AR_BYTES,
+ (guest_tr_ar & ~AR_TYPE_MASK)
+ | AR_TYPE_BUSY_64_TSS);
+ }
+
+ vcpu->shadow_efer |= EFER_LMA;
+
+ find_msr_entry(vcpu, MSR_EFER)->data |= EFER_LMA | EFER_LME;
+ vmcs_write32(VM_ENTRY_CONTROLS,
+ vmcs_read32(VM_ENTRY_CONTROLS)
+ | VM_ENTRY_CONTROLS_IA32E_MASK);
+}
+
+static void exit_lmode(struct kvm_vcpu *vcpu)
+{
+ vcpu->shadow_efer &= ~EFER_LMA;
+
+ vmcs_write32(VM_ENTRY_CONTROLS,
+ vmcs_read32(VM_ENTRY_CONTROLS)
+ & ~VM_ENTRY_CONTROLS_IA32E_MASK);
+}
+
+#endif
+
+static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
+{
+ if (vcpu->rmode.active && (cr0 & CR0_PE_MASK))
+ enter_pmode(vcpu);
+
+ if (!vcpu->rmode.active && !(cr0 & CR0_PE_MASK))
+ enter_rmode(vcpu);
+
+#ifdef __x86_64__
+ if (vcpu->shadow_efer & EFER_LME) {
+ if (!is_paging(vcpu) && (cr0 & CR0_PG_MASK))
+ enter_lmode(vcpu);
+ if (is_paging(vcpu) && !(cr0 & CR0_PG_MASK))
+ exit_lmode(vcpu);
+ }
+#endif
+
+ vmcs_writel(CR0_READ_SHADOW, cr0);
+ vmcs_writel(GUEST_CR0,
+ (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON);
+ vcpu->cr0 = cr0;
+}
+
+/*
+ * Used when restoring the VM to avoid corrupting segment registers
+ */
+static void vmx_set_cr0_no_modeswitch(struct kvm_vcpu *vcpu, unsigned long cr0)
+{
+ vcpu->rmode.active = ((cr0 & CR0_PE_MASK) == 0);
+ update_exception_bitmap(vcpu);
+ vmcs_writel(CR0_READ_SHADOW, cr0);
+ vmcs_writel(GUEST_CR0,
+ (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON);
+ vcpu->cr0 = cr0;
+}
+
+static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
+{
+ vmcs_writel(GUEST_CR3, cr3);
+}
+
+static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+{
+ vmcs_writel(CR4_READ_SHADOW, cr4);
+ vmcs_writel(GUEST_CR4, cr4 | (vcpu->rmode.active ?
+ KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON));
+ vcpu->cr4 = cr4;
+}
+
+#ifdef __x86_64__
+
+static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
+{
+ struct vmx_msr_entry *msr = find_msr_entry(vcpu, MSR_EFER);
+
+ vcpu->shadow_efer = efer;
+ if (efer & EFER_LMA) {
+ vmcs_write32(VM_ENTRY_CONTROLS,
+ vmcs_read32(VM_ENTRY_CONTROLS) |
+ VM_ENTRY_CONTROLS_IA32E_MASK);
+ msr->data = efer;
+
+ } else {
+ vmcs_write32(VM_ENTRY_CONTROLS,
+ vmcs_read32(VM_ENTRY_CONTROLS) &
+ ~VM_ENTRY_CONTROLS_IA32E_MASK);
+
+ msr->data = efer & ~EFER_LME;
+ }
+}
+
+#endif
+
+static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
+{
+ struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
+
+ return vmcs_readl(sf->base);
+}
+
+static void vmx_get_segment(struct kvm_vcpu *vcpu,
+ struct kvm_segment *var, int seg)
+{
+ struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
+ u32 ar;
+
+ var->base = vmcs_readl(sf->base);
+ var->limit = vmcs_read32(sf->limit);
+ var->selector = vmcs_read16(sf->selector);
+ ar = vmcs_read32(sf->ar_bytes);
+ if (ar & AR_UNUSABLE_MASK)
+ ar = 0;
+ var->type = ar & 15;
+ var->s = (ar >> 4) & 1;
+ var->dpl = (ar >> 5) & 3;
+ var->present = (ar >> 7) & 1;
+ var->avl = (ar >> 12) & 1;
+ var->l = (ar >> 13) & 1;
+ var->db = (ar >> 14) & 1;
+ var->g = (ar >> 15) & 1;
+ var->unusable = (ar >> 16) & 1;
+}
+
+static void vmx_set_segment(struct kvm_vcpu *vcpu,
+ struct kvm_segment *var, int seg)
+{
+ struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
+ u32 ar;
+
+ vmcs_writel(sf->base, var->base);
+ vmcs_write32(sf->limit, var->limit);
+ vmcs_write16(sf->selector, var->selector);
+ if (var->unusable)
+ ar = 1 << 16;
+ else {
+ ar = var->type & 15;
+ ar |= (var->s & 1) << 4;
+ ar |= (var->dpl & 3) << 5;
+ ar |= (var->present & 1) << 7;
+ ar |= (var->avl & 1) << 12;
+ ar |= (var->l & 1) << 13;
+ ar |= (var->db & 1) << 14;
+ ar |= (var->g & 1) << 15;
+ }
+ vmcs_write32(sf->ar_bytes, ar);
+}
+
+static int vmx_is_long_mode(struct kvm_vcpu *vcpu)
+{
+ return vmcs_read32(VM_ENTRY_CONTROLS) & VM_ENTRY_CONTROLS_IA32E_MASK;
+}
+
+static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
+{
+ u32 ar = vmcs_read32(GUEST_CS_AR_BYTES);
+
+ *db = (ar >> 14) & 1;
+ *l = (ar >> 13) & 1;
+}
+
+static void vmx_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
+{
+ dt->limit = vmcs_read32(GUEST_IDTR_LIMIT);
+ dt->base = vmcs_readl(GUEST_IDTR_BASE);
+}
+
+static void vmx_set_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
+{
+ vmcs_write32(GUEST_IDTR_LIMIT, dt->limit);
+ vmcs_writel(GUEST_IDTR_BASE, dt->base);
+}
+
+static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
+{
+ dt->limit = vmcs_read32(GUEST_GDTR_LIMIT);
+ dt->base = vmcs_readl(GUEST_GDTR_BASE);
+}
+
+static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
+{
+ vmcs_write32(GUEST_GDTR_LIMIT, dt->limit);
+ vmcs_writel(GUEST_GDTR_BASE, dt->base);
+}
+
+static int init_rmode_tss(struct kvm* kvm)
+{
+ struct page *p1, *p2, *p3;
+ gfn_t fn = rmode_tss_base(kvm) >> PAGE_SHIFT;
+ char *page;
+
+ p1 = _gfn_to_page(kvm, fn++);
+ p2 = _gfn_to_page(kvm, fn++);
+ p3 = _gfn_to_page(kvm, fn);
+
+ if (!p1 || !p2 || !p3) {
+ kvm_printf(kvm,"%s: gfn_to_page failed\n", __FUNCTION__);
+ return 0;
+ }
+
+ page = kmap_atomic(p1, KM_USER0);
+ memset(page, 0, PAGE_SIZE);
+ *(u16*)(page + 0x66) = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE;
+ kunmap_atomic(page, KM_USER0);
+
+ page = kmap_atomic(p2, KM_USER0);
+ memset(page, 0, PAGE_SIZE);
+ kunmap_atomic(page, KM_USER0);
+
+ page = kmap_atomic(p3, KM_USER0);
+ memset(page, 0, PAGE_SIZE);
+ *(page + RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1) = ~0;
+ kunmap_atomic(page, KM_USER0);
+
+ return 1;
+}
+
+static void vmcs_write32_fixedbits(u32 msr, u32 vmcs_field, u32 val)
+{
+ u32 msr_high, msr_low;
+
+ rdmsr(msr, msr_low, msr_high);
+
+ val &= msr_high;
+ val |= msr_low;
+ vmcs_write32(vmcs_field, val);
+}
+
+static void seg_setup(int seg)
+{
+ struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
+
+ vmcs_write16(sf->selector, 0);
+ vmcs_writel(sf->base, 0);
+ vmcs_write32(sf->limit, 0xffff);
+ vmcs_write32(sf->ar_bytes, 0x93);
+}
+
+/*
+ * Sets up the vmcs for emulated real mode.
+ */
+static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+ u32 host_sysenter_cs;
+ u32 junk;
+ unsigned long a;
+ struct descriptor_table dt;
+ int i;
+ int ret = 0;
+ int nr_good_msrs;
+ extern asmlinkage void kvm_vmx_return(void);
+
+ if (!init_rmode_tss(vcpu->kvm)) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ memset(vcpu->regs, 0, sizeof(vcpu->regs));
+ vcpu->regs[VCPU_REGS_RDX] = get_rdx_init_val();
+ vcpu->cr8 = 0;
+ vcpu->apic_base = 0xfee00000 |
+ /*for vcpu 0*/ MSR_IA32_APICBASE_BSP |
+ MSR_IA32_APICBASE_ENABLE;
+
+ fx_init(vcpu);
+
+ /*
+ * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode
+ * insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4. Sigh.
+ */
+ vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
+ vmcs_writel(GUEST_CS_BASE, 0x000f0000);
+ vmcs_write32(GUEST_CS_LIMIT, 0xffff);
+ vmcs_write32(GUEST_CS_AR_BYTES, 0x9b);
+
+ seg_setup(VCPU_SREG_DS);
+ seg_setup(VCPU_SREG_ES);
+ seg_setup(VCPU_SREG_FS);
+ seg_setup(VCPU_SREG_GS);
+ seg_setup(VCPU_SREG_SS);
+
+ vmcs_write16(GUEST_TR_SELECTOR, 0);
+ vmcs_writel(GUEST_TR_BASE, 0);
+ vmcs_write32(GUEST_TR_LIMIT, 0xffff);
+ vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
+
+ vmcs_write16(GUEST_LDTR_SELECTOR, 0);
+ vmcs_writel(GUEST_LDTR_BASE, 0);
+ vmcs_write32(GUEST_LDTR_LIMIT, 0xffff);
+ vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082);
+
+ vmcs_write32(GUEST_SYSENTER_CS, 0);
+ vmcs_writel(GUEST_SYSENTER_ESP, 0);
+ vmcs_writel(GUEST_SYSENTER_EIP, 0);
+
+ vmcs_writel(GUEST_RFLAGS, 0x02);
+ vmcs_writel(GUEST_RIP, 0xfff0);
+ vmcs_writel(GUEST_RSP, 0);
+
+ vmcs_writel(GUEST_CR3, 0);
+
+ //todo: dr0 = dr1 = dr2 = dr3 = 0; dr6 = 0xffff0ff0
+ vmcs_writel(GUEST_DR7, 0x400);
+
+ vmcs_writel(GUEST_GDTR_BASE, 0);
+ vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
+
+ vmcs_writel(GUEST_IDTR_BASE, 0);
+ vmcs_write32(GUEST_IDTR_LIMIT, 0xffff);
+
+ vmcs_write32(GUEST_ACTIVITY_STATE, 0);
+ vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
+ vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0);
+
+ /* I/O */
+ vmcs_write64(IO_BITMAP_A, 0);
+ vmcs_write64(IO_BITMAP_B, 0);
+
+ guest_write_tsc(0);
+
+ vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
+
+ /* Special registers */
+ vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
+
+ /* Control */
+ vmcs_write32_fixedbits(MSR_IA32_VMX_PINBASED_CTLS_MSR,
+ PIN_BASED_VM_EXEC_CONTROL,
+ PIN_BASED_EXT_INTR_MASK /* 20.6.1 */
+ | PIN_BASED_NMI_EXITING /* 20.6.1 */
+ );
+ vmcs_write32_fixedbits(MSR_IA32_VMX_PROCBASED_CTLS_MSR,
+ CPU_BASED_VM_EXEC_CONTROL,
+ CPU_BASED_HLT_EXITING /* 20.6.2 */
+ | CPU_BASED_CR8_LOAD_EXITING /* 20.6.2 */
+ | CPU_BASED_CR8_STORE_EXITING /* 20.6.2 */
+ | CPU_BASED_UNCOND_IO_EXITING /* 20.6.2 */
+ | CPU_BASED_INVDPG_EXITING
+ | CPU_BASED_MOV_DR_EXITING
+ | CPU_BASED_USE_TSC_OFFSETING /* 21.3 */
+ );
+
+ vmcs_write32(EXCEPTION_BITMAP, 1 << PF_VECTOR);
+ vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0);
+ vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0);
+ vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */
+
+ vmcs_writel(HOST_CR0, read_cr0()); /* 22.2.3 */
+ vmcs_writel(HOST_CR4, read_cr4()); /* 22.2.3, 22.2.5 */
+ vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */
+
+ vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */
+ vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
+ vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */
+ vmcs_write16(HOST_FS_SELECTOR, read_fs()); /* 22.2.4 */
+ vmcs_write16(HOST_GS_SELECTOR, read_gs()); /* 22.2.4 */
+ vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
+#ifdef __x86_64__
+ rdmsrl(MSR_FS_BASE, a);
+ vmcs_writel(HOST_FS_BASE, a); /* 22.2.4 */
+ rdmsrl(MSR_GS_BASE, a);
+ vmcs_writel(HOST_GS_BASE, a); /* 22.2.4 */
+#else
+ vmcs_writel(HOST_FS_BASE, 0); /* 22.2.4 */
+ vmcs_writel(HOST_GS_BASE, 0); /* 22.2.4 */
+#endif
+
+ vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */
+
+ get_idt(&dt);
+ vmcs_writel(HOST_IDTR_BASE, dt.base); /* 22.2.4 */
+
+
+ vmcs_writel(HOST_RIP, (unsigned long)kvm_vmx_return); /* 22.2.5 */
+
+ rdmsr(MSR_IA32_SYSENTER_CS, host_sysenter_cs, junk);
+ vmcs_write32(HOST_IA32_SYSENTER_CS, host_sysenter_cs);
+ rdmsrl(MSR_IA32_SYSENTER_ESP, a);
+ vmcs_writel(HOST_IA32_SYSENTER_ESP, a); /* 22.2.3 */
+ rdmsrl(MSR_IA32_SYSENTER_EIP, a);
+ vmcs_writel(HOST_IA32_SYSENTER_EIP, a); /* 22.2.3 */
+
+ ret = -ENOMEM;
+ vcpu->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!vcpu->guest_msrs)
+ goto out;
+ vcpu->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!vcpu->host_msrs)
+ goto out_free_guest_msrs;
+
+ for (i = 0; i < NR_VMX_MSR; ++i) {
+ u32 index = vmx_msr_index[i];
+ u32 data_low, data_high;
+ u64 data;
+ int j = vcpu->nmsrs;
+
+ if (rdmsr_safe(index, &data_low, &data_high) < 0)
+ continue;
+ data = data_low | ((u64)data_high << 32);
+ vcpu->host_msrs[j].index = index;
+ vcpu->host_msrs[j].reserved = 0;
+ vcpu->host_msrs[j].data = data;
+ vcpu->guest_msrs[j] = vcpu->host_msrs[j];
+ ++vcpu->nmsrs;
+ }
+ printk(KERN_DEBUG "kvm: msrs: %d\n", vcpu->nmsrs);
+
+ nr_good_msrs = vcpu->nmsrs - NR_BAD_MSRS;
+ vmcs_writel(VM_ENTRY_MSR_LOAD_ADDR,
+ virt_to_phys(vcpu->guest_msrs + NR_BAD_MSRS));
+ vmcs_writel(VM_EXIT_MSR_STORE_ADDR,
+ virt_to_phys(vcpu->guest_msrs + NR_BAD_MSRS));
+ vmcs_writel(VM_EXIT_MSR_LOAD_ADDR,
+ virt_to_phys(vcpu->host_msrs + NR_BAD_MSRS));
+ vmcs_write32_fixedbits(MSR_IA32_VMX_EXIT_CTLS_MSR, VM_EXIT_CONTROLS,
+ (HOST_IS_64 << 9)); /* 22.2,1, 20.7.1 */
+ vmcs_write32(VM_EXIT_MSR_STORE_COUNT, nr_good_msrs); /* 22.2.2 */
+ vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, nr_good_msrs); /* 22.2.2 */
+ vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, nr_good_msrs); /* 22.2.2 */
+
+
+ /* 22.2.1, 20.8.1 */
+ vmcs_write32_fixedbits(MSR_IA32_VMX_ENTRY_CTLS_MSR,
+ VM_ENTRY_CONTROLS, 0);
+ vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */
+
+ vmcs_writel(VIRTUAL_APIC_PAGE_ADDR, 0);
+ vmcs_writel(TPR_THRESHOLD, 0);
+
+ vmcs_writel(CR0_GUEST_HOST_MASK, KVM_GUEST_CR0_MASK);
+ vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK);
+
+ vcpu->cr0 = 0x60000010;
+ vmx_set_cr0(vcpu, vcpu->cr0); // enter rmode
+ vmx_set_cr4(vcpu, 0);
+#ifdef __x86_64__
+ vmx_set_efer(vcpu, 0);
+#endif
+
+ return 0;
+
+out_free_guest_msrs:
+ kfree(vcpu->guest_msrs);
+out:
+ return ret;
+}
+
+static void inject_rmode_irq(struct kvm_vcpu *vcpu, int irq)
+{
+ u16 ent[2];
+ u16 cs;
+ u16 ip;
+ unsigned long flags;
+ unsigned long ss_base = vmcs_readl(GUEST_SS_BASE);
+ u16 sp = vmcs_readl(GUEST_RSP);
+ u32 ss_limit = vmcs_read32(GUEST_SS_LIMIT);
+
+ if (sp > ss_limit || sp - 6 > sp) {
+ vcpu_printf(vcpu, "%s: #SS, rsp 0x%lx ss 0x%lx limit 0x%x\n",
+ __FUNCTION__,
+ vmcs_readl(GUEST_RSP),
+ vmcs_readl(GUEST_SS_BASE),
+ vmcs_read32(GUEST_SS_LIMIT));
+ return;
+ }
+
+ if (kvm_read_guest(vcpu, irq * sizeof(ent), sizeof(ent), &ent) !=
+ sizeof(ent)) {
+ vcpu_printf(vcpu, "%s: read guest err\n", __FUNCTION__);
+ return;
+ }
+
+ flags = vmcs_readl(GUEST_RFLAGS);
+ cs = vmcs_readl(GUEST_CS_BASE) >> 4;
+ ip = vmcs_readl(GUEST_RIP);
+
+
+ if (kvm_write_guest(vcpu, ss_base + sp - 2, 2, &flags) != 2 ||
+ kvm_write_guest(vcpu, ss_base + sp - 4, 2, &cs) != 2 ||
+ kvm_write_guest(vcpu, ss_base + sp - 6, 2, &ip) != 2) {
+ vcpu_printf(vcpu, "%s: write guest err\n", __FUNCTION__);
+ return;
+ }
+
+ vmcs_writel(GUEST_RFLAGS, flags &
+ ~( X86_EFLAGS_IF | X86_EFLAGS_AC | X86_EFLAGS_TF));
+ vmcs_write16(GUEST_CS_SELECTOR, ent[1]) ;
+ vmcs_writel(GUEST_CS_BASE, ent[1] << 4);
+ vmcs_writel(GUEST_RIP, ent[0]);
+