From 67253af52e9133fb4cfbf7a2448a2d3524d1fa6c Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Fri, 25 Apr 2008 10:20:22 +0800 Subject: KVM: Add kvm_x86_ops get_tdp_level() The function get_tdp_level() provided the number of tdp level for EPT and NPT rather than the NPT specific macro. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- include/asm-x86/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index 9d963cd6533..897a1be24cf 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -422,6 +422,7 @@ struct kvm_x86_ops { struct kvm_run *run); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); + int (*get_tdp_level)(void); }; extern struct kvm_x86_ops *kvm_x86_ops; -- cgit v1.2.3-70-g09d2 From 7b52345e2c4c7333bf7eba8034ffc4683fa63c91 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Fri, 25 Apr 2008 21:13:50 +0800 Subject: KVM: MMU: Add EPT support Enable kvm_set_spte() to generate EPT entries. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 43 +++++++++++++++++++++++++++++++++---------- arch/x86/kvm/x86.c | 3 +++ include/asm-x86/kvm_host.h | 3 +++ 3 files changed, 39 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 20fb3c852db..c28a36b4cbb 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -152,6 +152,12 @@ static struct kmem_cache *mmu_page_header_cache; static u64 __read_mostly shadow_trap_nonpresent_pte; static u64 __read_mostly shadow_notrap_nonpresent_pte; +static u64 __read_mostly shadow_base_present_pte; +static u64 __read_mostly shadow_nx_mask; +static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */ +static u64 __read_mostly shadow_user_mask; +static u64 __read_mostly shadow_accessed_mask; +static u64 __read_mostly shadow_dirty_mask; void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) { @@ -160,6 +166,23 @@ void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) } EXPORT_SYMBOL_GPL(kvm_mmu_set_nonpresent_ptes); +void kvm_mmu_set_base_ptes(u64 base_pte) +{ + shadow_base_present_pte = base_pte; +} +EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes); + +void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, + u64 dirty_mask, u64 nx_mask, u64 x_mask) +{ + shadow_user_mask = user_mask; + shadow_accessed_mask = accessed_mask; + shadow_dirty_mask = dirty_mask; + shadow_nx_mask = nx_mask; + shadow_x_mask = x_mask; +} +EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); + static int is_write_protection(struct kvm_vcpu *vcpu) { return vcpu->arch.cr0 & X86_CR0_WP; @@ -198,7 +221,7 @@ static int is_writeble_pte(unsigned long pte) static int is_dirty_pte(unsigned long pte) { - return pte & PT_DIRTY_MASK; + return pte & shadow_dirty_mask; } static int is_rmap_pte(u64 pte) @@ -513,7 +536,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) return; sp = page_header(__pa(spte)); pfn = spte_to_pfn(*spte); - if (*spte & PT_ACCESSED_MASK) + if (*spte & shadow_accessed_mask) kvm_set_pfn_accessed(pfn); if (is_writeble_pte(*spte)) kvm_release_pfn_dirty(pfn); @@ -1039,17 +1062,17 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, * whether the guest actually used the pte (in order to detect * demand paging). */ - spte = PT_PRESENT_MASK | PT_DIRTY_MASK; + spte = shadow_base_present_pte | shadow_dirty_mask; if (!speculative) pte_access |= PT_ACCESSED_MASK; if (!dirty) pte_access &= ~ACC_WRITE_MASK; - if (!(pte_access & ACC_EXEC_MASK)) - spte |= PT64_NX_MASK; - - spte |= PT_PRESENT_MASK; + if (pte_access & ACC_EXEC_MASK) + spte |= shadow_x_mask; + else + spte |= shadow_nx_mask; if (pte_access & ACC_USER_MASK) - spte |= PT_USER_MASK; + spte |= shadow_user_mask; if (largepage) spte |= PT_PAGE_SIZE_MASK; @@ -1155,7 +1178,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, } table[index] = __pa(new_table->spt) | PT_PRESENT_MASK - | PT_WRITABLE_MASK | PT_USER_MASK; + | PT_WRITABLE_MASK | shadow_user_mask; } table_addr = table[index] & PT64_BASE_ADDR_MASK; } @@ -1599,7 +1622,7 @@ static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu) { u64 *spte = vcpu->arch.last_pte_updated; - return !!(spte && (*spte & PT_ACCESSED_MASK)); + return !!(spte && (*spte & shadow_accessed_mask)); } static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0ce556372a4..0735efbfa71 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2417,6 +2417,9 @@ int kvm_arch_init(void *opaque) kvm_x86_ops = ops; kvm_mmu_set_nonpresent_ptes(0ull, 0ull); + kvm_mmu_set_base_ptes(PT_PRESENT_MASK); + kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, + PT_DIRTY_MASK, PT64_NX_MASK, 0); return 0; out: diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index 897a1be24cf..d1dedda958f 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -434,6 +434,9 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu); int kvm_mmu_create(struct kvm_vcpu *vcpu); int kvm_mmu_setup(struct kvm_vcpu *vcpu); void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte); +void kvm_mmu_set_base_ptes(u64 base_pte); +void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, + u64 dirty_mask, u64 nx_mask, u64 x_mask); int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); -- cgit v1.2.3-70-g09d2 From b7ebfb0509692cd923e31650f81ed4d79c9a3e59 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Fri, 25 Apr 2008 21:44:52 +0800 Subject: KVM: VMX: Prepare an identity page table for EPT in real mode [aliguory: plug leak] Signed-off-by: Sheng Yang Signed-off-by: Anthony Liguori Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 79 ++++++++++++++++++++++++++++++++++++++++++++-- arch/x86/kvm/vmx.h | 3 ++ arch/x86/kvm/x86.c | 2 ++ include/asm-x86/kvm_host.h | 3 ++ 4 files changed, 84 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 98e4f2b036d..de5f6150f2f 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -87,7 +87,7 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) return container_of(vcpu, struct vcpu_vmx, vcpu); } -static int init_rmode_tss(struct kvm *kvm); +static int init_rmode(struct kvm *kvm); static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); @@ -1304,7 +1304,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu) fix_rmode_seg(VCPU_SREG_FS, &vcpu->arch.rmode.fs); kvm_mmu_reset_context(vcpu); - init_rmode_tss(vcpu->kvm); + init_rmode(vcpu->kvm); } #ifdef CONFIG_X86_64 @@ -1578,6 +1578,41 @@ out: return ret; } +static int init_rmode_identity_map(struct kvm *kvm) +{ + int i, r, ret; + pfn_t identity_map_pfn; + u32 tmp; + + if (!vm_need_ept()) + return 1; + if (unlikely(!kvm->arch.ept_identity_pagetable)) { + printk(KERN_ERR "EPT: identity-mapping pagetable " + "haven't been allocated!\n"); + return 0; + } + if (likely(kvm->arch.ept_identity_pagetable_done)) + return 1; + ret = 0; + identity_map_pfn = VMX_EPT_IDENTITY_PAGETABLE_ADDR >> PAGE_SHIFT; + r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE); + if (r < 0) + goto out; + /* Set up identity-mapping pagetable for EPT in real mode */ + for (i = 0; i < PT32_ENT_PER_PAGE; i++) { + tmp = (i << 22) + (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | + _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE); + r = kvm_write_guest_page(kvm, identity_map_pfn, + &tmp, i * sizeof(tmp), sizeof(tmp)); + if (r < 0) + goto out; + } + kvm->arch.ept_identity_pagetable_done = true; + ret = 1; +out: + return ret; +} + static void seg_setup(int seg) { struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; @@ -1612,6 +1647,31 @@ out: return r; } +static int alloc_identity_pagetable(struct kvm *kvm) +{ + struct kvm_userspace_memory_region kvm_userspace_mem; + int r = 0; + + down_write(&kvm->slots_lock); + if (kvm->arch.ept_identity_pagetable) + goto out; + kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT; + kvm_userspace_mem.flags = 0; + kvm_userspace_mem.guest_phys_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR; + kvm_userspace_mem.memory_size = PAGE_SIZE; + r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0); + if (r) + goto out; + + down_read(¤t->mm->mmap_sem); + kvm->arch.ept_identity_pagetable = gfn_to_page(kvm, + VMX_EPT_IDENTITY_PAGETABLE_ADDR >> PAGE_SHIFT); + up_read(¤t->mm->mmap_sem); +out: + up_write(&kvm->slots_lock); + return r; +} + static void allocate_vpid(struct vcpu_vmx *vmx) { int vpid; @@ -1775,6 +1835,15 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) return 0; } +static int init_rmode(struct kvm *kvm) +{ + if (!init_rmode_tss(kvm)) + return 0; + if (!init_rmode_identity_map(kvm)) + return 0; + return 1; +} + static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -1782,7 +1851,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) int ret; down_read(&vcpu->kvm->slots_lock); - if (!init_rmode_tss(vmx->vcpu.kvm)) { + if (!init_rmode(vmx->vcpu.kvm)) { ret = -ENOMEM; goto out; } @@ -2759,6 +2828,10 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) if (alloc_apic_access_page(kvm) != 0) goto free_vmcs; + if (vm_need_ept()) + if (alloc_identity_pagetable(kvm) != 0) + goto free_vmcs; + return &vmx->vcpu; free_vmcs: diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h index 093b085daf6..f97eccc754e 100644 --- a/arch/x86/kvm/vmx.h +++ b/arch/x86/kvm/vmx.h @@ -340,6 +340,7 @@ enum vmcs_field { #define MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED 0x4 #define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9 +#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT 10 #define VMX_NR_VPIDS (1 << 16) #define VMX_VPID_EXTENT_SINGLE_CONTEXT 1 @@ -353,4 +354,6 @@ enum vmcs_field { #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) #define VMX_EPT_DEFAULT_GAW 3 +#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul + #endif diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0735efbfa71..1842a86f7c3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3909,6 +3909,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_free_physmem(kvm); if (kvm->arch.apic_access_page) put_page(kvm->arch.apic_access_page); + if (kvm->arch.ept_identity_pagetable) + put_page(kvm->arch.ept_identity_pagetable); kfree(kvm); } diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index d1dedda958f..e24afaa64a4 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -314,6 +314,9 @@ struct kvm_arch{ struct page *apic_access_page; gpa_t wall_clock; + + struct page *ept_identity_pagetable; + bool ept_identity_pagetable_done; }; struct kvm_vm_stat { -- cgit v1.2.3-70-g09d2 From 1439442c7b257b47a83aea4daed8fbf4a32cdff9 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Mon, 28 Apr 2008 12:24:45 +0800 Subject: KVM: VMX: Enable EPT feature for KVM Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 5 +- arch/x86/kvm/vmx.c | 229 +++++++++++++++++++++++++++++++++++++++++++-- arch/x86/kvm/vmx.h | 9 ++ include/asm-x86/kvm_host.h | 1 + 4 files changed, 234 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 3dbedf16973..d9344be3644 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1177,8 +1177,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, return -ENOMEM; } - table[index] = __pa(new_table->spt) | PT_PRESENT_MASK - | PT_WRITABLE_MASK | shadow_user_mask; + table[index] = __pa(new_table->spt) + | PT_PRESENT_MASK | PT_WRITABLE_MASK + | shadow_user_mask | shadow_x_mask; } table_addr = table[index] & PT64_BASE_ADDR_MASK; } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index de5f6150f2f..bfe4db11989 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -42,7 +42,7 @@ module_param(enable_vpid, bool, 0); static int flexpriority_enabled = 1; module_param(flexpriority_enabled, bool, 0); -static int enable_ept; +static int enable_ept = 1; module_param(enable_ept, bool, 0); struct vmcs { @@ -284,6 +284,18 @@ static inline void __invvpid(int ext, u16 vpid, gva_t gva) : : "a"(&operand), "c"(ext) : "cc", "memory"); } +static inline void __invept(int ext, u64 eptp, gpa_t gpa) +{ + struct { + u64 eptp, gpa; + } operand = {eptp, gpa}; + + asm volatile (ASM_VMX_INVEPT + /* CF==1 or ZF==1 --> rc = -1 */ + "; ja 1f ; ud2 ; 1:\n" + : : "a" (&operand), "c" (ext) : "cc", "memory"); +} + static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr) { int i; @@ -335,6 +347,33 @@ static inline void vpid_sync_vcpu_all(struct vcpu_vmx *vmx) __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0); } +static inline void ept_sync_global(void) +{ + if (cpu_has_vmx_invept_global()) + __invept(VMX_EPT_EXTENT_GLOBAL, 0, 0); +} + +static inline void ept_sync_context(u64 eptp) +{ + if (vm_need_ept()) { + if (cpu_has_vmx_invept_context()) + __invept(VMX_EPT_EXTENT_CONTEXT, eptp, 0); + else + ept_sync_global(); + } +} + +static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa) +{ + if (vm_need_ept()) { + if (cpu_has_vmx_invept_individual_addr()) + __invept(VMX_EPT_EXTENT_INDIVIDUAL_ADDR, + eptp, gpa); + else + ept_sync_context(eptp); + } +} + static unsigned long vmcs_readl(unsigned long field) { unsigned long value; @@ -422,6 +461,8 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) eb |= 1u << 1; if (vcpu->arch.rmode.active) eb = ~0; + if (vm_need_ept()) + eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */ vmcs_write32(EXCEPTION_BITMAP, eb); } @@ -1352,8 +1393,64 @@ static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK; } +static void ept_load_pdptrs(struct kvm_vcpu *vcpu) +{ + if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) { + if (!load_pdptrs(vcpu, vcpu->arch.cr3)) { + printk(KERN_ERR "EPT: Fail to load pdptrs!\n"); + return; + } + vmcs_write64(GUEST_PDPTR0, vcpu->arch.pdptrs[0]); + vmcs_write64(GUEST_PDPTR1, vcpu->arch.pdptrs[1]); + vmcs_write64(GUEST_PDPTR2, vcpu->arch.pdptrs[2]); + vmcs_write64(GUEST_PDPTR3, vcpu->arch.pdptrs[3]); + } +} + +static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); + +static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, + unsigned long cr0, + struct kvm_vcpu *vcpu) +{ + if (!(cr0 & X86_CR0_PG)) { + /* From paging/starting to nonpaging */ + vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, + vmcs_config.cpu_based_exec_ctrl | + (CPU_BASED_CR3_LOAD_EXITING | + CPU_BASED_CR3_STORE_EXITING)); + vcpu->arch.cr0 = cr0; + vmx_set_cr4(vcpu, vcpu->arch.cr4); + *hw_cr0 |= X86_CR0_PE | X86_CR0_PG; + *hw_cr0 &= ~X86_CR0_WP; + } else if (!is_paging(vcpu)) { + /* From nonpaging to paging */ + vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, + vmcs_config.cpu_based_exec_ctrl & + ~(CPU_BASED_CR3_LOAD_EXITING | + CPU_BASED_CR3_STORE_EXITING)); + vcpu->arch.cr0 = cr0; + vmx_set_cr4(vcpu, vcpu->arch.cr4); + if (!(vcpu->arch.cr0 & X86_CR0_WP)) + *hw_cr0 &= ~X86_CR0_WP; + } +} + +static void ept_update_paging_mode_cr4(unsigned long *hw_cr4, + struct kvm_vcpu *vcpu) +{ + if (!is_paging(vcpu)) { + *hw_cr4 &= ~X86_CR4_PAE; + *hw_cr4 |= X86_CR4_PSE; + } else if (!(vcpu->arch.cr4 & X86_CR4_PAE)) + *hw_cr4 &= ~X86_CR4_PAE; +} + static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { + unsigned long hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | + KVM_VM_CR0_ALWAYS_ON; + vmx_fpu_deactivate(vcpu); if (vcpu->arch.rmode.active && (cr0 & X86_CR0_PE)) @@ -1371,29 +1468,61 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) } #endif + if (vm_need_ept()) + ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu); + vmcs_writel(CR0_READ_SHADOW, cr0); - vmcs_writel(GUEST_CR0, - (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON); + vmcs_writel(GUEST_CR0, hw_cr0); vcpu->arch.cr0 = cr0; if (!(cr0 & X86_CR0_TS) || !(cr0 & X86_CR0_PE)) vmx_fpu_activate(vcpu); } +static u64 construct_eptp(unsigned long root_hpa) +{ + u64 eptp; + + /* TODO write the value reading from MSR */ + eptp = VMX_EPT_DEFAULT_MT | + VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT; + eptp |= (root_hpa & PAGE_MASK); + + return eptp; +} + static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) { + unsigned long guest_cr3; + u64 eptp; + + guest_cr3 = cr3; + if (vm_need_ept()) { + eptp = construct_eptp(cr3); + vmcs_write64(EPT_POINTER, eptp); + ept_sync_context(eptp); + ept_load_pdptrs(vcpu); + guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 : + VMX_EPT_IDENTITY_PAGETABLE_ADDR; + } + vmx_flush_tlb(vcpu); - vmcs_writel(GUEST_CR3, cr3); + vmcs_writel(GUEST_CR3, guest_cr3); if (vcpu->arch.cr0 & X86_CR0_PE) vmx_fpu_deactivate(vcpu); } static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { - vmcs_writel(CR4_READ_SHADOW, cr4); - vmcs_writel(GUEST_CR4, cr4 | (vcpu->arch.rmode.active ? - KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON)); + unsigned long hw_cr4 = cr4 | (vcpu->arch.rmode.active ? + KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON); + vcpu->arch.cr4 = cr4; + if (vm_need_ept()) + ept_update_paging_mode_cr4(&hw_cr4, vcpu); + + vmcs_writel(CR4_READ_SHADOW, cr4); + vmcs_writel(GUEST_CR4, hw_cr4); } static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) @@ -2116,6 +2245,9 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (intr_info & INTR_INFO_DELIVER_CODE_MASK) error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); if (is_page_fault(intr_info)) { + /* EPT won't cause page fault directly */ + if (vm_need_ept()) + BUG(); cr2 = vmcs_readl(EXIT_QUALIFICATION); KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, (u32)((u64)cr2 >> 32), handler); @@ -2445,6 +2577,64 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return kvm_task_switch(vcpu, tss_selector, reason); } +static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + u64 exit_qualification; + enum emulation_result er; + gpa_t gpa; + unsigned long hva; + int gla_validity; + int r; + + exit_qualification = vmcs_read64(EXIT_QUALIFICATION); + + if (exit_qualification & (1 << 6)) { + printk(KERN_ERR "EPT: GPA exceeds GAW!\n"); + return -ENOTSUPP; + } + + gla_validity = (exit_qualification >> 7) & 0x3; + if (gla_validity != 0x3 && gla_validity != 0x1 && gla_validity != 0) { + printk(KERN_ERR "EPT: Handling EPT violation failed!\n"); + printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n", + (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS), + (long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS)); + printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n", + (long unsigned int)exit_qualification); + kvm_run->exit_reason = KVM_EXIT_UNKNOWN; + kvm_run->hw.hardware_exit_reason = 0; + return -ENOTSUPP; + } + + gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); + hva = gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT); + if (!kvm_is_error_hva(hva)) { + r = kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0); + if (r < 0) { + printk(KERN_ERR "EPT: Not enough memory!\n"); + return -ENOMEM; + } + return 1; + } else { + /* must be MMIO */ + er = emulate_instruction(vcpu, kvm_run, 0, 0, 0); + + if (er == EMULATE_FAIL) { + printk(KERN_ERR + "EPT: Fail to handle EPT violation vmexit!er is %d\n", + er); + printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n", + (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS), + (long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS)); + printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n", + (long unsigned int)exit_qualification); + return -ENOTSUPP; + } else if (er == EMULATE_DO_MMIO) + return 0; + } + return 1; +} + /* * The exit handlers return 1 if the exit was handled fully and guest execution * may resume. Otherwise they set the kvm_run parameter to indicate what needs @@ -2468,6 +2658,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, [EXIT_REASON_APIC_ACCESS] = handle_apic_access, [EXIT_REASON_WBINVD] = handle_wbinvd, [EXIT_REASON_TASK_SWITCH] = handle_task_switch, + [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, }; static const int kvm_vmx_max_exit_handlers = @@ -2486,6 +2677,13 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)vmcs_readl(GUEST_RIP), (u32)((u64)vmcs_readl(GUEST_RIP) >> 32), entryexit); + /* Access CR3 don't cause VMExit in paging mode, so we need + * to sync with guest real CR3. */ + if (vm_need_ept() && is_paging(vcpu)) { + vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); + ept_load_pdptrs(vcpu); + } + if (unlikely(vmx->fail)) { kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; kvm_run->fail_entry.hardware_entry_failure_reason @@ -2494,7 +2692,8 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) } if ((vectoring_info & VECTORING_INFO_VALID_MASK) && - exit_reason != EXIT_REASON_EXCEPTION_NMI) + (exit_reason != EXIT_REASON_EXCEPTION_NMI && + exit_reason != EXIT_REASON_EPT_VIOLATION)) printk(KERN_WARNING "%s: unexpected, valid vectoring info and " "exit reason is 0x%x\n", __func__, exit_reason); if (exit_reason < kvm_vmx_max_exit_handlers @@ -2796,6 +2995,15 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) return ERR_PTR(-ENOMEM); allocate_vpid(vmx); + if (id == 0 && vm_need_ept()) { + kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | + VMX_EPT_WRITABLE_MASK | + VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); + kvm_mmu_set_mask_ptes(0ull, VMX_EPT_FAKE_ACCESSED_MASK, + VMX_EPT_FAKE_DIRTY_MASK, 0ull, + VMX_EPT_EXECUTABLE_MASK); + kvm_enable_tdp(); + } err = kvm_vcpu_init(&vmx->vcpu, kvm, id); if (err) @@ -2975,9 +3183,14 @@ static int __init vmx_init(void) vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP); vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP); + if (cpu_has_vmx_ept()) + bypass_guest_pf = 0; + if (bypass_guest_pf) kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); + ept_sync_global(); + return 0; out2: diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h index f97eccc754e..79d94c610df 100644 --- a/arch/x86/kvm/vmx.h +++ b/arch/x86/kvm/vmx.h @@ -353,6 +353,15 @@ enum vmcs_field { #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) #define VMX_EPT_DEFAULT_GAW 3 +#define VMX_EPT_MAX_GAW 0x4 +#define VMX_EPT_MT_EPTE_SHIFT 3 +#define VMX_EPT_GAW_EPTP_SHIFT 3 +#define VMX_EPT_DEFAULT_MT 0x6ull +#define VMX_EPT_READABLE_MASK 0x1ull +#define VMX_EPT_WRITABLE_MASK 0x2ull +#define VMX_EPT_EXECUTABLE_MASK 0x4ull +#define VMX_EPT_FAKE_ACCESSED_MASK (1ull << 62) +#define VMX_EPT_FAKE_DIRTY_MASK (1ull << 63) #define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index e24afaa64a4..4baa9c9e1f4 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -651,6 +651,7 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code) #define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4" #define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4" #define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30" +#define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08" #define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08" #define MSR_IA32_TIME_STAMP_COUNTER 0x010 -- cgit v1.2.3-70-g09d2 From 45c5eb67da5a668abe79c23a7e64dbc87a600f90 Mon Sep 17 00:00:00 2001 From: Hollis Blanchard Date: Fri, 25 Apr 2008 17:55:49 -0500 Subject: KVM: ppc: Handle guest idle by emulating MSR[WE] writes This reduces host CPU usage when the guest is idle. However, the guest must set MSR[WE] in its idle loop, which Linux did not do until 2.6.26. Signed-off-by: Hollis Blanchard Signed-off-by: Jerone Young Signed-off-by: Avi Kivity --- arch/powerpc/kvm/booke_guest.c | 1 + arch/powerpc/kvm/powerpc.c | 20 +++++++++++++++++--- include/asm-powerpc/kvm_host.h | 1 + include/asm-powerpc/kvm_ppc.h | 5 +++++ 4 files changed, 24 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kvm/booke_guest.c b/arch/powerpc/kvm/booke_guest.c index 6d9884a6884..b3db6f4576a 100644 --- a/arch/powerpc/kvm/booke_guest.c +++ b/arch/powerpc/kvm/booke_guest.c @@ -49,6 +49,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "inst_emu", VCPU_STAT(emulated_inst_exits) }, { "dec", VCPU_STAT(dec_exits) }, { "ext_intr", VCPU_STAT(ext_intr_exits) }, + { "halt_wakeup", VCPU_STAT(halt_wakeup) }, { NULL } }; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index bad40bd2d3a..777e0f34e0e 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -36,13 +36,12 @@ gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) int kvm_cpu_has_interrupt(struct kvm_vcpu *v) { - /* XXX implement me */ - return 0; + return !!(v->arch.pending_exceptions); } int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) { - return 1; + return !(v->arch.msr & MSR_WE); } @@ -214,6 +213,11 @@ static void kvmppc_decrementer_func(unsigned long data) struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_DECREMENTER); + + if (waitqueue_active(&vcpu->wq)) { + wake_up_interruptible(&vcpu->wq); + vcpu->stat.halt_wakeup++; + } } int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) @@ -339,6 +343,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) int r; sigset_t sigsaved; + vcpu_load(vcpu); + if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); @@ -363,12 +369,20 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &sigsaved, NULL); + vcpu_put(vcpu); + return r; } int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) { kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_EXTERNAL); + + if (waitqueue_active(&vcpu->wq)) { + wake_up_interruptible(&vcpu->wq); + vcpu->stat.halt_wakeup++; + } + return 0; } diff --git a/include/asm-powerpc/kvm_host.h b/include/asm-powerpc/kvm_host.h index 04ffbb8e0a3..81a69d71101 100644 --- a/include/asm-powerpc/kvm_host.h +++ b/include/asm-powerpc/kvm_host.h @@ -59,6 +59,7 @@ struct kvm_vcpu_stat { u32 emulated_inst_exits; u32 dec_exits; u32 ext_intr_exits; + u32 halt_wakeup; }; struct tlbe { diff --git a/include/asm-powerpc/kvm_ppc.h b/include/asm-powerpc/kvm_ppc.h index 7ac820308a7..b35a7e3ef97 100644 --- a/include/asm-powerpc/kvm_ppc.h +++ b/include/asm-powerpc/kvm_ppc.h @@ -77,12 +77,17 @@ static inline void kvmppc_clear_exception(struct kvm_vcpu *vcpu, int exception) clear_bit(priority, &vcpu->arch.pending_exceptions); } +/* Helper function for "full" MSR writes. No need to call this if only EE is + * changing. */ static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr) { if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR)) kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR); vcpu->arch.msr = new_msr; + + if (vcpu->arch.msr & MSR_WE) + kvm_vcpu_block(vcpu); } #endif /* __POWERPC_KVM_PPC_H__ */ -- cgit v1.2.3-70-g09d2 From bc1a34f1bf354fabc03e3f465620c80e510d0e8f Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Thu, 1 May 2008 18:43:33 +0200 Subject: KVM: avoid fx_init() schedule in atomic This make sure not to schedule in atomic during fx_init. I also changed the name of fpu_init to fx_finit to avoid duplicating the name with fpu_init that is already used in the kernel, this makes grep simpler if nothing else. Signed-off-by: Andrea Arcangeli Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 11 ++++++++++- include/asm-x86/kvm_host.h | 2 +- 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bc224bba1e8..21338bdb28f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3703,10 +3703,19 @@ void fx_init(struct kvm_vcpu *vcpu) { unsigned after_mxcsr_mask; + /* + * Touch the fpu the first time in non atomic context as if + * this is the first fpu instruction the exception handler + * will fire before the instruction returns and it'll have to + * allocate ram with GFP_KERNEL. + */ + if (!used_math()) + fx_save(&vcpu->arch.host_fx_image); + /* Initialize guest FPU by resetting ours and saving into guest's */ preempt_disable(); fx_save(&vcpu->arch.host_fx_image); - fpu_init(); + fx_finit(); fx_save(&vcpu->arch.guest_fx_image); fx_restore(&vcpu->arch.host_fx_image); preempt_enable(); diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index 4baa9c9e1f4..1d8cd01fa51 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -627,7 +627,7 @@ static inline void fx_restore(struct i387_fxsave_struct *image) asm("fxrstor (%0)":: "r" (image)); } -static inline void fpu_init(void) +static inline void fx_finit(void) { asm("finit"); } -- cgit v1.2.3-70-g09d2