diff options
Diffstat (limited to 'arch/x86/kvm/svm.c')
| -rw-r--r-- | arch/x86/kvm/svm.c | 1170 |
1 files changed, 817 insertions, 353 deletions
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 63fec1531e8..b5e994ad013 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -20,8 +20,10 @@ #include "mmu.h" #include "kvm_cache_regs.h" #include "x86.h" +#include "cpuid.h" #include <linux/module.h> +#include <linux/mod_devicetable.h> #include <linux/kernel.h> #include <linux/vmalloc.h> #include <linux/highmem.h> @@ -29,8 +31,10 @@ #include <linux/ftrace_event.h> #include <linux/slab.h> +#include <asm/perf_event.h> #include <asm/tlbflush.h> #include <asm/desc.h> +#include <asm/debugreg.h> #include <asm/kvm_para.h> #include <asm/virtext.h> @@ -41,6 +45,12 @@ MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); +static const struct x86_cpu_id svm_cpu_id[] = { + X86_FEATURE_MATCH(X86_FEATURE_SVM), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id); + #define IOPM_ALLOC_ORDER 2 #define MSRPM_ALLOC_ORDER 1 @@ -63,6 +73,10 @@ MODULE_LICENSE("GPL"); #define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) +#define TSC_RATIO_RSVD 0xffffff0000000000ULL +#define TSC_RATIO_MIN 0x0000000000000001ULL +#define TSC_RATIO_MAX 0x000000ffffffffffULL + static bool erratum_383_found __read_mostly; static const u32 host_save_user_msrs[] = { @@ -93,14 +107,6 @@ struct nested_state { /* A VMEXIT is required but not yet emulated */ bool exit_required; - /* - * If we vmexit during an instruction emulation we need this to restore - * the l1 guest rip after the emulation - */ - unsigned long vmexit_rip; - unsigned long vmexit_rsp; - unsigned long vmexit_rax; - /* cache for intercepts of the guest */ u32 intercept_cr; u32 intercept_dr; @@ -114,6 +120,12 @@ struct nested_state { #define MSRPM_OFFSETS 16 static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; +/* + * Set osvw_len to higher value when updated Revision Guides + * are published and we know what the new status bits are + */ +static uint64_t osvw_len = 4, osvw_status; + struct vcpu_svm { struct kvm_vcpu vcpu; struct vmcb *vmcb; @@ -135,6 +147,8 @@ struct vcpu_svm { u32 *msrpm; + ulong nmi_iret_rip; + struct nested_state nested; bool nmi_singlestep; @@ -142,11 +156,16 @@ struct vcpu_svm { unsigned int3_injected; unsigned long int3_rip; u32 apf_reason; + + u64 tsc_ratio; }; +static DEFINE_PER_CPU(u64, current_tsc_ratio); +#define TSC_RATIO_DEFAULT 0x0100000000ULL + #define MSR_INVALID 0xffffffffU -static struct svm_direct_access_msrs { +static const struct svm_direct_access_msrs { u32 index; /* Index of the MSR */ bool always; /* True if intercept is always on */ } direct_access_msrs[] = { @@ -173,11 +192,13 @@ static bool npt_enabled = true; #else static bool npt_enabled; #endif -static int npt = 1; +/* allow nested paging (virtualized MMU) for all guests */ +static int npt = true; module_param(npt, int, S_IRUGO); -static int nested = 1; +/* allow nested virtualization in KVM/SVM */ +static int nested = true; module_param(nested, int, S_IRUGO); static void svm_flush_tlb(struct kvm_vcpu *vcpu); @@ -188,6 +209,7 @@ static int nested_svm_intercept(struct vcpu_svm *svm); static int nested_svm_vmexit(struct vcpu_svm *svm); static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, bool has_error_code, u32 error_code); +static u64 __scale_tsc(u64 ratio, u64 tsc); enum { VMCB_INTERCEPTS, /* Intercept vectors, TSC offset, @@ -282,20 +304,35 @@ static inline bool is_cr_intercept(struct vcpu_svm *svm, int bit) return vmcb->control.intercept_cr & (1U << bit); } -static inline void set_dr_intercept(struct vcpu_svm *svm, int bit) +static inline void set_dr_intercepts(struct vcpu_svm *svm) { struct vmcb *vmcb = get_host_vmcb(svm); - vmcb->control.intercept_dr |= (1U << bit); + vmcb->control.intercept_dr = (1 << INTERCEPT_DR0_READ) + | (1 << INTERCEPT_DR1_READ) + | (1 << INTERCEPT_DR2_READ) + | (1 << INTERCEPT_DR3_READ) + | (1 << INTERCEPT_DR4_READ) + | (1 << INTERCEPT_DR5_READ) + | (1 << INTERCEPT_DR6_READ) + | (1 << INTERCEPT_DR7_READ) + | (1 << INTERCEPT_DR0_WRITE) + | (1 << INTERCEPT_DR1_WRITE) + | (1 << INTERCEPT_DR2_WRITE) + | (1 << INTERCEPT_DR3_WRITE) + | (1 << INTERCEPT_DR4_WRITE) + | (1 << INTERCEPT_DR5_WRITE) + | (1 << INTERCEPT_DR6_WRITE) + | (1 << INTERCEPT_DR7_WRITE); recalc_intercepts(svm); } -static inline void clr_dr_intercept(struct vcpu_svm *svm, int bit) +static inline void clr_dr_intercepts(struct vcpu_svm *svm) { struct vmcb *vmcb = get_host_vmcb(svm); - vmcb->control.intercept_dr &= ~(1U << bit); + vmcb->control.intercept_dr = 0; recalc_intercepts(svm); } @@ -374,14 +411,13 @@ struct svm_cpu_data { }; static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); -static uint32_t svm_features; struct svm_init_data { int cpu; int r; }; -static u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000}; +static const u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000}; #define NUM_MSR_MAPS ARRAY_SIZE(msrpm_ranges) #define MSRS_RANGE_SIZE 2048 @@ -535,7 +571,7 @@ static void svm_init_erratum_383(void) int err; u64 val; - if (!cpu_has_amd_erratum(amd_erratum_383)) + if (!static_cpu_has_bug(X86_BUG_AMD_TLB_MMATCH)) return; /* Use _safe variants to not break nested virtualization */ @@ -553,6 +589,27 @@ static void svm_init_erratum_383(void) erratum_383_found = true; } +static void svm_init_osvw(struct kvm_vcpu *vcpu) +{ + /* + * Guests should see errata 400 and 415 as fixed (assuming that + * HLT and IO instructions are intercepted). + */ + vcpu->arch.osvw.length = (osvw_len >= 3) ? (osvw_len) : 3; + vcpu->arch.osvw.status = osvw_status & ~(6ULL); + + /* + * By increasing VCPU's osvw.length to 3 we are telling the guest that + * all osvw.status bits inside that length, including bit 0 (which is + * reserved for erratum 298), are valid. However, if host processor's + * osvw_len is 0 then osvw_status[0] carries no information. We need to + * be conservative here and therefore we tell the guest that erratum 298 + * is present (because we really don't know). + */ + if (osvw_len == 0 && boot_cpu_data.x86 == 0x10) + vcpu->arch.osvw.status |= 1; +} + static int has_svm(void) { const char *msg; @@ -567,7 +624,13 @@ static int has_svm(void) static void svm_hardware_disable(void *garbage) { + /* Make sure we clean up behind us */ + if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) + wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT); + cpu_svm_disable(); + + amd_pmu_disable_virt(); } static int svm_hardware_enable(void *garbage) @@ -584,15 +647,12 @@ static int svm_hardware_enable(void *garbage) return -EBUSY; if (!has_svm()) { - printk(KERN_ERR "svm_hardware_enable: err EOPNOTSUPP on %d\n", - me); + pr_err("%s: err EOPNOTSUPP on %d\n", __func__, me); return -EINVAL; } sd = per_cpu(svm_data, me); - if (!sd) { - printk(KERN_ERR "svm_hardware_enable: svm_data is NULL on %d\n", - me); + pr_err("%s: svm_data is NULL on %d\n", __func__, me); return -EINVAL; } @@ -608,8 +668,45 @@ static int svm_hardware_enable(void *garbage) wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT); + if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) { + wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT); + __get_cpu_var(current_tsc_ratio) = TSC_RATIO_DEFAULT; + } + + + /* + * Get OSVW bits. + * + * Note that it is possible to have a system with mixed processor + * revisions and therefore different OSVW bits. If bits are not the same + * on different processors then choose the worst case (i.e. if erratum + * is present on one processor and not on another then assume that the + * erratum is present everywhere). + */ + if (cpu_has(&boot_cpu_data, X86_FEATURE_OSVW)) { + uint64_t len, status = 0; + int err; + + len = native_read_msr_safe(MSR_AMD64_OSVW_ID_LENGTH, &err); + if (!err) + status = native_read_msr_safe(MSR_AMD64_OSVW_STATUS, + &err); + + if (err) + osvw_status = osvw_len = 0; + else { + if (len < osvw_len) + osvw_len = len; + osvw_status |= status; + osvw_status &= (1ULL << osvw_len) - 1; + } + } else + osvw_status = osvw_len = 0; + svm_init_erratum_383(); + amd_pmu_enable_virt(); + return 0; } @@ -789,6 +886,23 @@ static __init int svm_hardware_setup(void) if (boot_cpu_has(X86_FEATURE_FXSR_OPT)) kvm_enable_efer_bits(EFER_FFXSR); + if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) { + u64 max; + + kvm_has_tsc_control = true; + + /* + * Make sure the user can only configure tsc_khz values that + * fit into a signed integer. + * A min value is not calculated needed because it will always + * be 1 on all machines and a value of 0 is used to disable + * tsc-scaling for the vcpu. + */ + max = min(0x7fffffffULL, __scale_tsc(tsc_khz, TSC_RATIO_MAX)); + + kvm_max_guest_tsc_khz = max; + } + if (nested) { printk(KERN_INFO "kvm: Nested Virtualization enabled\n"); kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE); @@ -800,8 +914,6 @@ static __init int svm_hardware_setup(void) goto err; } - svm_features = cpuid_edx(SVM_CPUID_FUNC); - if (!boot_cpu_has(X86_FEATURE_NPT)) npt_enabled = false; @@ -852,6 +964,75 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type) seg->base = 0; } +static u64 __scale_tsc(u64 ratio, u64 tsc) +{ + u64 mult, frac, _tsc; + + mult = ratio >> 32; + frac = ratio & ((1ULL << 32) - 1); + + _tsc = tsc; + _tsc *= mult; + _tsc += (tsc >> 32) * frac; + _tsc += ((tsc & ((1ULL << 32) - 1)) * frac) >> 32; + + return _tsc; +} + +static u64 svm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc) +{ + struct vcpu_svm *svm = to_svm(vcpu); + u64 _tsc = tsc; + + if (svm->tsc_ratio != TSC_RATIO_DEFAULT) + _tsc = __scale_tsc(svm->tsc_ratio, tsc); + + return _tsc; +} + +static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale) +{ + struct vcpu_svm *svm = to_svm(vcpu); + u64 ratio; + u64 khz; + + /* Guest TSC same frequency as host TSC? */ + if (!scale) { + svm->tsc_ratio = TSC_RATIO_DEFAULT; + return; + } + + /* TSC scaling supported? */ + if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) { + if (user_tsc_khz > tsc_khz) { + vcpu->arch.tsc_catchup = 1; + vcpu->arch.tsc_always_catchup = 1; + } else + WARN(1, "user requested TSC rate below hardware speed\n"); + return; + } + + khz = user_tsc_khz; + + /* TSC scaling required - calculate ratio */ + ratio = khz << 32; + do_div(ratio, tsc_khz); + + if (ratio == 0 || ratio & TSC_RATIO_RSVD) { + WARN_ONCE(1, "Invalid TSC ratio - virtual-tsc-khz=%u\n", + user_tsc_khz); + return; + } + svm->tsc_ratio = ratio; +} + +static u64 svm_read_tsc_offset(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + return svm->vmcb->control.tsc_offset; +} + static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) { struct vcpu_svm *svm = to_svm(vcpu); @@ -861,23 +1042,44 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) g_tsc_offset = svm->vmcb->control.tsc_offset - svm->nested.hsave->control.tsc_offset; svm->nested.hsave->control.tsc_offset = offset; - } + } else + trace_kvm_write_tsc_offset(vcpu->vcpu_id, + svm->vmcb->control.tsc_offset, + offset); svm->vmcb->control.tsc_offset = offset + g_tsc_offset; mark_dirty(svm->vmcb, VMCB_INTERCEPTS); } -static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment) +static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool host) { struct vcpu_svm *svm = to_svm(vcpu); + WARN_ON(adjustment < 0); + if (host) + adjustment = svm_scale_tsc(vcpu, adjustment); + svm->vmcb->control.tsc_offset += adjustment; if (is_guest_mode(vcpu)) svm->nested.hsave->control.tsc_offset += adjustment; + else + trace_kvm_write_tsc_offset(vcpu->vcpu_id, + svm->vmcb->control.tsc_offset - adjustment, + svm->vmcb->control.tsc_offset); + mark_dirty(svm->vmcb, VMCB_INTERCEPTS); } +static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) +{ + u64 tsc; + + tsc = svm_scale_tsc(vcpu, native_read_tsc()); + + return target_tsc - tsc; +} + static void init_vmcb(struct vcpu_svm *svm) { struct vmcb_control_area *control = &svm->vmcb->control; @@ -894,23 +1096,7 @@ static void init_vmcb(struct vcpu_svm *svm) set_cr_intercept(svm, INTERCEPT_CR4_WRITE); set_cr_intercept(svm, INTERCEPT_CR8_WRITE); - set_dr_intercept(svm, INTERCEPT_DR0_READ); - set_dr_intercept(svm, INTERCEPT_DR1_READ); - set_dr_intercept(svm, INTERCEPT_DR2_READ); - set_dr_intercept(svm, INTERCEPT_DR3_READ); - set_dr_intercept(svm, INTERCEPT_DR4_READ); - set_dr_intercept(svm, INTERCEPT_DR5_READ); - set_dr_intercept(svm, INTERCEPT_DR6_READ); - set_dr_intercept(svm, INTERCEPT_DR7_READ); - - set_dr_intercept(svm, INTERCEPT_DR0_WRITE); - set_dr_intercept(svm, INTERCEPT_DR1_WRITE); - set_dr_intercept(svm, INTERCEPT_DR2_WRITE); - set_dr_intercept(svm, INTERCEPT_DR3_WRITE); - set_dr_intercept(svm, INTERCEPT_DR4_WRITE); - set_dr_intercept(svm, INTERCEPT_DR5_WRITE); - set_dr_intercept(svm, INTERCEPT_DR6_WRITE); - set_dr_intercept(svm, INTERCEPT_DR7_WRITE); + set_dr_intercepts(svm); set_exception_intercept(svm, PF_VECTOR); set_exception_intercept(svm, UD_VECTOR); @@ -920,6 +1106,7 @@ static void init_vmcb(struct vcpu_svm *svm) set_intercept(svm, INTERCEPT_NMI); set_intercept(svm, INTERCEPT_SMI); set_intercept(svm, INTERCEPT_SELECTIVE_CR0); + set_intercept(svm, INTERCEPT_RDPMC); set_intercept(svm, INTERCEPT_CPUID); set_intercept(svm, INTERCEPT_INVD); set_intercept(svm, INTERCEPT_HLT); @@ -952,17 +1139,11 @@ static void init_vmcb(struct vcpu_svm *svm) init_seg(&save->gs); save->cs.selector = 0xf000; + save->cs.base = 0xffff0000; /* Executable/Readable Code Segment */ save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK; save->cs.limit = 0xffff; - /* - * cs.base should really be 0xffff0000, but vmx can't handle that, so - * be consistent with it. - * - * Replace when we have real mode working for vmx. - */ - save->cs.base = 0xf0000; save->gdtr.limit = 0xffff; save->idtr.limit = 0xffff; @@ -972,8 +1153,7 @@ static void init_vmcb(struct vcpu_svm *svm) svm_set_efer(&svm->vcpu, 0); save->dr6 = 0xffff0ff0; - save->dr7 = 0x400; - save->rflags = 2; + kvm_set_rflags(&svm->vcpu, 2); save->rip = 0x0000fff0; svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; @@ -990,7 +1170,6 @@ static void init_vmcb(struct vcpu_svm *svm) if (npt_enabled) { /* Setup VMCB for Nested Paging */ control->nested_ctl = 1; - clr_intercept(svm, INTERCEPT_TASK_SWITCH); clr_intercept(svm, INTERCEPT_INVLPG); clr_exception_intercept(svm, PF_VECTOR); clr_cr_intercept(svm, INTERCEPT_CR3_READ); @@ -1014,21 +1193,16 @@ static void init_vmcb(struct vcpu_svm *svm) enable_gif(svm); } -static int svm_vcpu_reset(struct kvm_vcpu *vcpu) +static void svm_vcpu_reset(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + u32 dummy; + u32 eax = 1; init_vmcb(svm); - if (!kvm_vcpu_is_bsp(vcpu)) { - kvm_rip_write(vcpu, 0); - svm->vmcb->save.cs.base = svm->vcpu.arch.sipi_vector << 12; - svm->vmcb->save.cs.selector = svm->vcpu.arch.sipi_vector << 8; - } - vcpu->arch.regs_avail = ~0; - vcpu->arch.regs_dirty = ~0; - - return 0; + kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy); + kvm_register_write(vcpu, VCPU_REGS_RDX, eax); } static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) @@ -1046,6 +1220,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) goto out; } + svm->tsc_ratio = TSC_RATIO_DEFAULT; + err = kvm_vcpu_init(&svm->vcpu, kvm, id); if (err) goto free_svm; @@ -1080,20 +1256,15 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; svm->asid_generation = 0; init_vmcb(svm); - kvm_write_tsc(&svm->vcpu, 0); - - err = fx_init(&svm->vcpu); - if (err) - goto free_page4; svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; if (kvm_vcpu_is_bsp(&svm->vcpu)) svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; + svm_init_osvw(&svm->vcpu); + return &svm->vcpu; -free_page4: - __free_page(hsave_page); free_page3: __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER); free_page2: @@ -1139,6 +1310,12 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); + + if (static_cpu_has(X86_FEATURE_TSCRATEMSR) && + svm->tsc_ratio != __get_cpu_var(current_tsc_ratio)) { + __get_cpu_var(current_tsc_ratio) = svm->tsc_ratio; + wrmsrl(MSR_AMD64_TSC_RATIO, svm->tsc_ratio); + } } static void svm_vcpu_put(struct kvm_vcpu *vcpu) @@ -1153,8 +1330,10 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu) wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs); load_gs_index(svm->host.gs); #else +#ifdef CONFIG_X86_32_LAZY_GS loadsegment(gs, svm->host.gs); #endif +#endif for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); } @@ -1166,6 +1345,11 @@ static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) { + /* + * Any change of EFLAGS.VM is accompained by a reload of SS + * (caused by either a task switch or an inter-privilege IRET), + * so we do not need to update the CPL here. + */ to_svm(vcpu)->vmcb->save.rflags = rflags; } @@ -1278,6 +1462,7 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, */ if (var->unusable) var->db = 0; + var->dpl = to_svm(vcpu)->vmcb->save.cpl; break; } } @@ -1361,31 +1546,6 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { struct vcpu_svm *svm = to_svm(vcpu); - if (is_guest_mode(vcpu)) { - /* - * We are here because we run in nested mode, the host kvm - * intercepts cr0 writes but the l1 hypervisor does not. - * But the L1 hypervisor may intercept selective cr0 writes. - * This needs to be checked here. - */ - unsigned long old, new; - - /* Remove bits that would trigger a real cr0 write intercept */ - old = vcpu->arch.cr0 & SVM_CR0_SELECTIVE_MASK; - new = cr0 & SVM_CR0_SELECTIVE_MASK; - - if (old == new) { - /* cr0 write with ts and mp unchanged */ - svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE; - if (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE) { - svm->nested.vmexit_rip = kvm_rip_read(vcpu); - svm->nested.vmexit_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP); - svm->nested.vmexit_rax = kvm_register_read(vcpu, VCPU_REGS_RAX); - return; - } - } - } - #ifdef CONFIG_X86_64 if (vcpu->arch.efer & EFER_LME) { if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { @@ -1417,11 +1577,14 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) update_cr0_intercept(svm); } -static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) +static int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { unsigned long host_cr4_mce = read_cr4() & X86_CR4_MCE; unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4; + if (cr4 & X86_CR4_VMXE) + return 1; + if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE)) svm_flush_tlb(vcpu); @@ -1431,6 +1594,7 @@ static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) cr4 |= host_cr4_mce; to_svm(vcpu)->vmcb->save.cr4 = cr4; mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR); + return 0; } static void svm_set_segment(struct kvm_vcpu *vcpu, @@ -1454,15 +1618,20 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT; s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT; } - if (seg == VCPU_SREG_CS) - svm->vmcb->save.cpl - = (svm->vmcb->save.cs.attrib - >> SVM_SELECTOR_DPL_SHIFT) & 3; + + /* + * This is always accurate, except if SYSRET returned to a segment + * with SS.DPL != 3. Intel does not have this quirk, and always + * forces SS.DPL to 3 on sysret, so we ignore that case; fixing it + * would entail passing the CPL to userspace and back. + */ + if (seg == VCPU_SREG_SS) + svm->vmcb->save.cpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3; mark_dirty(svm->vmcb, VMCB_SEG); } -static void update_db_intercept(struct kvm_vcpu *vcpu) +static void update_db_bp_intercept(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -1482,20 +1651,6 @@ static void update_db_intercept(struct kvm_vcpu *vcpu) vcpu->guest_debug = 0; } -static void svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) -{ - struct vcpu_svm *svm = to_svm(vcpu); - - if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) - svm->vmcb->save.dr7 = dbg->arch.debugreg[7]; - else - svm->vmcb->save.dr7 = vcpu->arch.dr7; - - mark_dirty(svm->vmcb, VMCB_DR); - - update_db_intercept(vcpu); -} - static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) { if (sd->next_asid > sd->max_asid) { @@ -1510,6 +1665,34 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) mark_dirty(svm->vmcb, VMCB_ASID); } +static u64 svm_get_dr6(struct kvm_vcpu *vcpu) +{ + return to_svm(vcpu)->vmcb->save.dr6; +} + +static void svm_set_dr6(struct kvm_vcpu *vcpu, unsigned long value) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + svm->vmcb->save.dr6 = value; + mark_dirty(svm->vmcb, VMCB_DR); +} + +static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + get_debugreg(vcpu->arch.db[0], 0); + get_debugreg(vcpu->arch.db[1], 1); + get_debugreg(vcpu->arch.db[2], 2); + get_debugreg(vcpu->arch.db[3], 3); + vcpu->arch.dr6 = svm_get_dr6(vcpu); + vcpu->arch.dr7 = svm->vmcb->save.dr7; + + vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT; + set_dr_intercepts(svm); +} + static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) { struct vcpu_svm *svm = to_svm(vcpu); @@ -1567,7 +1750,7 @@ static int db_interception(struct vcpu_svm *svm) if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) svm->vmcb->save.rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); - update_db_intercept(&svm->vcpu); + update_db_bp_intercept(&svm->vcpu); } if (svm->vcpu.guest_debug & @@ -1761,6 +1944,20 @@ static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu) return svm->nested.nested_cr3; } +static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index) +{ + struct vcpu_svm *svm = to_svm(vcpu); + u64 cr3 = svm->nested.nested_cr3; + u64 pdpte; + int ret; + + ret = kvm_read_guest_page(vcpu->kvm, gpa_to_gfn(cr3), &pdpte, + offset_in_page(cr3) + index * 8, 8); + if (ret) + return 0; + return pdpte; +} + static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root) { @@ -1784,19 +1981,16 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu, nested_svm_vmexit(svm); } -static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) +static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) { - int r; - - r = kvm_init_shadow_mmu(vcpu, &vcpu->arch.mmu); + kvm_init_shadow_mmu(vcpu, &vcpu->arch.mmu); vcpu->arch.mmu.set_cr3 = nested_svm_set_tdp_cr3; vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3; + vcpu->arch.mmu.get_pdptr = nested_svm_get_tdp_pdptr; vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit; vcpu->arch.mmu.shadow_root_level = get_npt_level(); vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; - - return r; } static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu) @@ -1867,7 +2061,7 @@ static inline bool nested_svm_intr(struct vcpu_svm *svm) if (svm->nested.intercept & 1ULL) { /* * The #vmexit can't be emulated here directly because this - * code path runs with irqs and preemtion disabled. A + * code path runs with irqs and preemption disabled. A * #vmexit emulation might sleep. Only signal request for * the #vmexit here. */ @@ -1909,7 +2103,6 @@ static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page) return kmap(page); error: - kvm_release_page_clean(page); kvm_inject_gp(&svm->vcpu, 0); return NULL; @@ -2099,7 +2292,8 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) vmcb->control.exit_info_1, vmcb->control.exit_info_2, vmcb->control.exit_int_info, - vmcb->control.exit_int_info_err); + vmcb->control.exit_int_info_err, + KVM_ISA_SVM); nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page); if (!nested_vmcb) @@ -2123,7 +2317,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) nested_vmcb->save.cr3 = kvm_read_cr3(&svm->vcpu); nested_vmcb->save.cr2 = vmcb->save.cr2; nested_vmcb->save.cr4 = svm->vcpu.arch.cr4; - nested_vmcb->save.rflags = vmcb->save.rflags; + nested_vmcb->save.rflags = kvm_get_rflags(&svm->vcpu); nested_vmcb->save.rip = vmcb->save.rip; nested_vmcb->save.rsp = vmcb->save.rsp; nested_vmcb->save.rax = vmcb->save.rax; @@ -2180,7 +2374,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) svm->vmcb->save.ds = hsave->save.ds; svm->vmcb->save.gdtr = hsave->save.gdtr; svm->vmcb->save.idtr = hsave->save.idtr; - svm->vmcb->save.rflags = hsave->save.rflags; + kvm_set_rflags(&svm->vcpu, hsave->save.rflags); svm_set_efer(&svm->vcpu, hsave->save.efer); svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE); svm_set_cr4(&svm->vcpu, hsave->save.cr4); @@ -2212,7 +2406,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) { /* * This function merges the msr permission bitmaps of kvm and the - * nested vmcb. It is omptimized in that it only merges the parts where + * nested vmcb. It is optimized in that it only merges the parts where * the kvm msr permission bitmap may contain zero bits */ int i; @@ -2308,7 +2502,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) hsave->save.efer = svm->vcpu.arch.efer; hsave->save.cr0 = kvm_read_cr0(&svm->vcpu); hsave->save.cr4 = svm->vcpu.arch.cr4; - hsave->save.rflags = vmcb->save.rflags; + hsave->save.rflags = kvm_get_rflags(&svm->vcpu); hsave->save.rip = kvm_rip_read(&svm->vcpu); hsave->save.rsp = vmcb->save.rsp; hsave->save.rax = vmcb->save.rax; @@ -2319,7 +2513,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) copy_vmcb_control_area(hsave, vmcb); - if (svm->vmcb->save.rflags & X86_EFLAGS_IF) + if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF) svm->vcpu.arch.hflags |= HF_HIF_MASK; else svm->vcpu.arch.hflags &= ~HF_HIF_MASK; @@ -2337,7 +2531,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) svm->vmcb->save.ds = nested_vmcb->save.ds; svm->vmcb->save.gdtr = nested_vmcb->save.gdtr; svm->vmcb->save.idtr = nested_vmcb->save.idtr; - svm->vmcb->save.rflags = nested_vmcb->save.rflags; + kvm_set_rflags(&svm->vcpu, nested_vmcb->save.rflags); svm_set_efer(&svm->vcpu, nested_vmcb->save.efer); svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0); svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4); @@ -2439,13 +2633,13 @@ static int vmload_interception(struct vcpu_svm *svm) if (nested_svm_check_permissions(svm)) return 1; - svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; - skip_emulated_instruction(&svm->vcpu); - nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); if (!nested_vmcb) return 1; + svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; + skip_emulated_instruction(&svm->vcpu); + nested_svm_vmloadsave(nested_vmcb, svm->vmcb); nested_svm_unmap(page); @@ -2460,13 +2654,13 @@ static int vmsave_interception(struct vcpu_svm *svm) if (nested_svm_check_permissions(svm)) return 1; - svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; - skip_emulated_instruction(&svm->vcpu); - nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); if (!nested_vmcb) return 1; + svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; + skip_emulated_instruction(&svm->vcpu); + nested_svm_vmloadsave(svm->vmcb, nested_vmcb); nested_svm_unmap(page); @@ -2570,12 +2764,6 @@ static int xsetbv_interception(struct vcpu_svm *svm) return 1; } -static int invalid_op_interception(struct vcpu_svm *svm) -{ - kvm_queue_exception(&svm->vcpu, UD_VECTOR); - return 1; -} - static int task_switch_interception(struct vcpu_svm *svm) { u16 tss_selector; @@ -2631,7 +2819,10 @@ static int task_switch_interception(struct vcpu_svm *svm) (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) skip_emulated_instruction(&svm->vcpu); - if (kvm_task_switch(&svm->vcpu, tss_selector, reason, + if (int_type != SVM_EXITINTINFO_TYPE_SOFT) + int_vec = -1; + + if (kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason, has_error_code, error_code) == EMULATE_FAIL) { svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR; svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; @@ -2653,6 +2844,8 @@ static int iret_interception(struct vcpu_svm *svm) ++svm->vcpu.stat.nmi_window_exits; clr_intercept(svm, INTERCEPT_IRET); svm->vcpu.arch.hflags |= HF_IRET_MASK; + svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu); + kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); return 1; } @@ -2671,6 +2864,42 @@ static int emulate_on_interception(struct vcpu_svm *svm) return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; } +static int rdpmc_interception(struct vcpu_svm *svm) +{ + int err; + + if (!static_cpu_has(X86_FEATURE_NRIPS)) + return emulate_on_interception(svm); + + err = kvm_rdpmc(&svm->vcpu); + kvm_complete_insn_gp(&svm->vcpu, err); + + return 1; +} + +bool check_selective_cr0_intercepted(struct vcpu_svm *svm, unsigned long val) +{ + unsigned long cr0 = svm->vcpu.arch.cr0; + bool ret = false; + u64 intercept; + + intercept = svm->nested.intercept; + + if (!is_guest_mode(&svm->vcpu) || + (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0)))) + return false; + + cr0 &= ~SVM_CR0_SELECTIVE_MASK; + val &= ~SVM_CR0_SELECTIVE_MASK; + + if (cr0 ^ val) { + svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE; + ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE); + } + + return ret; +} + #define CR_VALID (1ULL << 63) static int cr_interception(struct vcpu_svm *svm) @@ -2694,7 +2923,11 @@ static int cr_interception(struct vcpu_svm *svm) val = kvm_register_read(&svm->vcpu, reg); switch (cr) { case 0: - err = kvm_set_cr0(&svm->vcpu, val); + if (!check_selective_cr0_intercepted(svm, val)) + err = kvm_set_cr0(&svm->vcpu, val); + else + return 1; + break; case 3: err = kvm_set_cr3(&svm->vcpu, val); @@ -2739,29 +2972,23 @@ static int cr_interception(struct vcpu_svm *svm) return 1; } -static int cr0_write_interception(struct vcpu_svm *svm) -{ - struct kvm_vcpu *vcpu = &svm->vcpu; - int r; - - r = cr_interception(svm); - - if (svm->nested.vmexit_rip) { - kvm_register_write(vcpu, VCPU_REGS_RIP, svm->nested.vmexit_rip); - kvm_register_write(vcpu, VCPU_REGS_RSP, svm->nested.vmexit_rsp); - kvm_register_write(vcpu, VCPU_REGS_RAX, svm->nested.vmexit_rax); - svm->nested.vmexit_rip = 0; - } - - return r; -} - static int dr_interception(struct vcpu_svm *svm) { int reg, dr; unsigned long val; int err; + if (svm->vcpu.guest_debug == 0) { + /* + * No more DR vmexits; force a reload of the debug registers + * and reenter on this instruction. The next vmexit will + * retrieve the full state of the debug registers. + */ + clr_dr_intercepts(svm); + svm->vcpu.arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT; + return 1; + } + if (!boot_cpu_has(X86_FEATURE_DECODEASSISTS)) return emulate_on_interception(svm); @@ -2790,25 +3017,30 @@ static int cr8_write_interception(struct vcpu_svm *svm) u8 cr8_prev = kvm_get_cr8(&svm->vcpu); /* instruction emulation calls kvm_set_cr8() */ r = cr_interception(svm); - if (irqchip_in_kernel(svm->vcpu.kvm)) { - clr_cr_intercept(svm, INTERCEPT_CR8_WRITE); + if (irqchip_in_kernel(svm->vcpu.kvm)) return r; - } if (cr8_prev <= kvm_get_cr8(&svm->vcpu)) return r; kvm_run->exit_reason = KVM_EXIT_SET_TPR; return 0; } +u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) +{ + struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu)); + return vmcb->control.tsc_offset + + svm_scale_tsc(vcpu, host_tsc); +} + static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) { struct vcpu_svm *svm = to_svm(vcpu); switch (ecx) { case MSR_IA32_TSC: { - struct vmcb *vmcb = get_host_vmcb(svm); + *data = svm->vmcb->control.tsc_offset + + svm_scale_tsc(vcpu, native_read_tsc()); - *data = vmcb->control.tsc_offset + native_read_tsc(); break; } case MSR_STAR: @@ -2916,13 +3148,15 @@ static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data) return 0; } -static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) +static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) { struct vcpu_svm *svm = to_svm(vcpu); + u32 ecx = msr->index; + u64 data = msr->data; switch (ecx) { case MSR_IA32_TSC: - kvm_write_tsc(vcpu, data); + kvm_write_tsc(vcpu, msr); break; case MSR_STAR: svm->vmcb->save.star = data; @@ -2954,8 +3188,8 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) break; case MSR_IA32_DEBUGCTLMSR: if (!boot_cpu_has(X86_FEATURE_LBRV)) { - pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n", - __func__, data); + vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n", + __func__, data); break; } if (data & DEBUGCTL_RESERVED_BITS) @@ -2974,23 +3208,27 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) case MSR_VM_CR: return svm_set_vm_cr(vcpu, data); case MSR_VM_IGNNE: - pr_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); + vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); break; default: - return kvm_set_msr_common(vcpu, ecx, data); + return kvm_set_msr_common(vcpu, msr); } return 0; } static int wrmsr_interception(struct vcpu_svm *svm) { + struct msr_data msr; u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u) | ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32); + msr.data = data; + msr.index = ecx; + msr.host_initiated = false; svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; - if (svm_set_msr(&svm->vcpu, ecx, data)) { + if (svm_set_msr(&svm->vcpu, &msr)) { trace_kvm_msr_write_ex(ecx, data); kvm_inject_gp(&svm->vcpu, 0); } else { @@ -3016,6 +3254,7 @@ static int interrupt_window_interception(struct vcpu_svm *svm) svm_clear_vintr(svm); svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; mark_dirty(svm->vmcb, VMCB_INTR); + ++svm->vcpu.stat.irq_window_exits; /* * If the user space waits to inject interrupts, exit as soon as * possible @@ -3023,7 +3262,6 @@ static int interrupt_window_interception(struct vcpu_svm *svm) if (!irqchip_in_kernel(svm->vcpu.kvm) && kvm_run->request_interrupt_window && !kvm_cpu_has_interrupt(&svm->vcpu)) { - ++svm->vcpu.stat.irq_window_exits; kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; return 0; } @@ -3037,13 +3275,31 @@ static int pause_interception(struct vcpu_svm *svm) return 1; } -static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { +static int nop_interception(struct vcpu_svm *svm) +{ + skip_emulated_instruction(&(svm->vcpu)); + return 1; +} + +static int monitor_interception(struct vcpu_svm *svm) +{ + printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n"); + return nop_interception(svm); +} + +static int mwait_interception(struct vcpu_svm *svm) +{ + printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n"); + return nop_interception(svm); +} + +static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_READ_CR0] = cr_interception, [SVM_EXIT_READ_CR3] = cr_interception, [SVM_EXIT_READ_CR4] = cr_interception, [SVM_EXIT_READ_CR8] = cr_interception, [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, - [SVM_EXIT_WRITE_CR0] = cr0_write_interception, + [SVM_EXIT_WRITE_CR0] = cr_interception, [SVM_EXIT_WRITE_CR3] = cr_interception, [SVM_EXIT_WRITE_CR4] = cr_interception, [SVM_EXIT_WRITE_CR8] = cr8_write_interception, @@ -3074,6 +3330,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_SMI] = nop_on_interception, [SVM_EXIT_INIT] = nop_on_interception, [SVM_EXIT_VINTR] = interrupt_window_interception, + [SVM_EXIT_RDPMC] = rdpmc_interception, [SVM_EXIT_CPUID] = cpuid_interception, [SVM_EXIT_IRET] = iret_interception, [SVM_EXIT_INVD] = emulate_on_interception, @@ -3093,103 +3350,115 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_CLGI] = clgi_interception, [SVM_EXIT_SKINIT] = skinit_interception, [SVM_EXIT_WBINVD] = emulate_on_interception, - [SVM_EXIT_MONITOR] = invalid_op_interception, - [SVM_EXIT_MWAIT] = invalid_op_interception, + [SVM_EXIT_MONITOR] = monitor_interception, + [SVM_EXIT_MWAIT] = mwait_interception, [SVM_EXIT_XSETBV] = xsetbv_interception, [SVM_EXIT_NPF] = pf_interception, }; -void dump_vmcb(struct kvm_vcpu *vcpu) +static void dump_vmcb(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); struct vmcb_control_area *control = &svm->vmcb->control; struct vmcb_save_area *save = &svm->vmcb->save; pr_err("VMCB Control Area:\n"); - pr_err("cr_read: %04x\n", control->intercept_cr & 0xffff); - pr_err("cr_write: %04x\n", control->intercept_cr >> 16); - pr_err("dr_read: %04x\n", control->intercept_dr & 0xffff); - pr_err("dr_write: %04x\n", control->intercept_dr >> 16); - pr_err("exceptions: %08x\n", control->intercept_exceptions); - pr_err("intercepts: %016llx\n", control->intercept); - pr_err("pause filter count: %d\n", control->pause_filter_count); - pr_err("iopm_base_pa: %016llx\n", control->iopm_base_pa); - pr_err("msrpm_base_pa: %016llx\n", control->msrpm_base_pa); - pr_err("tsc_offset: %016llx\n", control->tsc_offset); - pr_err("asid: %d\n", control->asid); - pr_err("tlb_ctl: %d\n", control->tlb_ctl); - pr_err("int_ctl: %08x\n", control->int_ctl); - pr_err("int_vector: %08x\n", control->int_vector); - pr_err("int_state: %08x\n", control->int_state); - pr_err("exit_code: %08x\n", control->exit_code); - pr_err("exit_info1: %016llx\n", control->exit_info_1); - pr_err("exit_info2: %016llx\n", control->exit_info_2); - pr_err("exit_int_info: %08x\n", control->exit_int_info); - pr_err("exit_int_info_err: %08x\n", control->exit_int_info_err); - pr_err("nested_ctl: %lld\n", control->nested_ctl); - pr_err("nested_cr3: %016llx\n", control->nested_cr3); - pr_err("event_inj: %08x\n", control->event_inj); - pr_err("event_inj_err: %08x\n", control->event_inj_err); - pr_err("lbr_ctl: %lld\n", control->lbr_ctl); - pr_err("next_rip: %016llx\n", control->next_rip); + pr_err("%-20s%04x\n", "cr_read:", control->intercept_cr & 0xffff); + pr_err("%-20s%04x\n", "cr_write:", control->intercept_cr >> 16); + pr_err("%-20s%04x\n", "dr_read:", control->intercept_dr & 0xffff); + pr_err("%-20s%04x\n", "dr_write:", control->intercept_dr >> 16); + pr_err("%-20s%08x\n", "exceptions:", control->intercept_exceptions); + pr_err("%-20s%016llx\n", "intercepts:", control->intercept); + pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count); + pr_err("%-20s%016llx\n", "iopm_base_pa:", control->iopm_base_pa); + pr_err("%-20s%016llx\n", "msrpm_base_pa:", control->msrpm_base_pa); + pr_err("%-20s%016llx\n", "tsc_offset:", control->tsc_offset); + pr_err("%-20s%d\n", "asid:", control->asid); + pr_err("%-20s%d\n", "tlb_ctl:", control->tlb_ctl); + pr_err("%-20s%08x\n", "int_ctl:", control->int_ctl); + pr_err("%-20s%08x\n", "int_vector:", control->int_vector); + pr_err("%-20s%08x\n", "int_state:", control->int_state); + pr_err("%-20s%08x\n", "exit_code:", control->exit_code); + pr_err("%-20s%016llx\n", "exit_info1:", control->exit_info_1); + pr_err("%-20s%016llx\n", "exit_info2:", control->exit_info_2); + pr_err("%-20s%08x\n", "exit_int_info:", control->exit_int_info); + pr_err("%-20s%08x\n", "exit_int_info_err:", control->exit_int_info_err); + pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl); + pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3); + pr_err("%-20s%08x\n", "event_inj:", control->event_inj); + pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err); + pr_err("%-20s%lld\n", "lbr_ctl:", control->lbr_ctl); + pr_err("%-20s%016llx\n", "next_rip:", control->next_rip); pr_err("VMCB State Save Area:\n"); - pr_err("es: s: %04x a: %04x l: %08x b: %016llx\n", - save->es.selector, save->es.attrib, - save->es.limit, save->es.base); - pr_err("cs: s: %04x a: %04x l: %08x b: %016llx\n", - save->cs.selector, save->cs.attrib, - save->cs.limit, save->cs.base); - pr_err("ss: s: %04x a: %04x l: %08x b: %016llx\n", - save->ss.selector, save->ss.attrib, - save->ss.limit, save->ss.base); - pr_err("ds: s: %04x a: %04x l: %08x b: %016llx\n", - save->ds.selector, save->ds.attrib, - save->ds.limit, save->ds.base); - pr_err("fs: s: %04x a: %04x l: %08x b: %016llx\n", - save->fs.selector, save->fs.attrib, - save->fs.limit, save->fs.base); - pr_err("gs: s: %04x a: %04x l: %08x b: %016llx\n", - save->gs.selector, save->gs.attrib, - save->gs.limit, save->gs.base); - pr_err("gdtr: s: %04x a: %04x l: %08x b: %016llx\n", - save->gdtr.selector, save->gdtr.attrib, - save->gdtr.limit, save->gdtr.base); - pr_err("ldtr: s: %04x a: %04x l: %08x b: %016llx\n", - save->ldtr.selector, save->ldtr.attrib, - save->ldtr.limit, save->ldtr.base); - pr_err("idtr: s: %04x a: %04x l: %08x b: %016llx\n", - save->idtr.selector, save->idtr.attrib, - save->idtr.limit, save->idtr.base); - pr_err("tr: s: %04x a: %04x l: %08x b: %016llx\n", - save->tr.selector, save->tr.attrib, - save->tr.limit, save->tr.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "es:", + save->es.selector, save->es.attrib, + save->es.limit, save->es.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "cs:", + save->cs.selector, save->cs.attrib, + save->cs.limit, save->cs.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "ss:", + save->ss.selector, save->ss.attrib, + save->ss.limit, save->ss.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "ds:", + save->ds.selector, save->ds.attrib, + save->ds.limit, save->ds.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "fs:", + save->fs.selector, save->fs.attrib, + save->fs.limit, save->fs.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "gs:", + save->gs.selector, save->gs.attrib, + save->gs.limit, save->gs.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "gdtr:", + save->gdtr.selector, save->gdtr.attrib, + save->gdtr.limit, save->gdtr.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "ldtr:", + save->ldtr.selector, save->ldtr.attrib, + save->ldtr.limit, save->ldtr.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "idtr:", + save->idtr.selector, save->idtr.attrib, + save->idtr.limit, save->idtr.base); + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", + "tr:", + save->tr.selector, save->tr.attrib, + save->tr.limit, save->tr.base); pr_err("cpl: %d efer: %016llx\n", save->cpl, save->efer); - pr_err("cr0: %016llx cr2: %016llx\n", - save->cr0, save->cr2); - pr_err("cr3: %016llx cr4: %016llx\n", - save->cr3, save->cr4); - pr_err("dr6: %016llx dr7: %016llx\n", - save->dr6, save->dr7); - pr_err("rip: %016llx rflags: %016llx\n", - save->rip, save->rflags); - pr_err("rsp: %016llx rax: %016llx\n", - save->rsp, save->rax); - pr_err("star: %016llx lstar: %016llx\n", - save->star, save->lstar); - pr_err("cstar: %016llx sfmask: %016llx\n", - save->cstar, save->sfmask); - pr_err("kernel_gs_base: %016llx sysenter_cs: %016llx\n", - save->kernel_gs_base, save->sysenter_cs); - pr_err("sysenter_esp: %016llx sysenter_eip: %016llx\n", - save->sysenter_esp, save->sysenter_eip); - pr_err("gpat: %016llx dbgctl: %016llx\n", - save->g_pat, save->dbgctl); - pr_err("br_from: %016llx br_to: %016llx\n", - save->br_from, save->br_to); - pr_err("excp_from: %016llx excp_to: %016llx\n", - save->last_excp_from, save->last_excp_to); - + pr_err("%-15s %016llx %-13s %016llx\n", + "cr0:", save->cr0, "cr2:", save->cr2); + pr_err("%-15s %016llx %-13s %016llx\n", + "cr3:", save->cr3, "cr4:", save->cr4); + pr_err("%-15s %016llx %-13s %016llx\n", + "dr6:", save->dr6, "dr7:", save->dr7); + pr_err("%-15s %016llx %-13s %016llx\n", + "rip:", save->rip, "rflags:", save->rflags); + pr_err("%-15s %016llx %-13s %016llx\n", + "rsp:", save->rsp, "rax:", save->rax); + pr_err("%-15s %016llx %-13s %016llx\n", + "star:", save->star, "lstar:", save->lstar); + pr_err("%-15s %016llx %-13s %016llx\n", + "cstar:", save->cstar, "sfmask:", save->sfmask); + pr_err("%-15s %016llx %-13s %016llx\n", + "kernel_gs_base:", save->kernel_gs_base, + "sysenter_cs:", save->sysenter_cs); + pr_err("%-15s %016llx %-13s %016llx\n", + "sysenter_esp:", save->sysenter_esp, + "sysenter_eip:", save->sysenter_eip); + pr_err("%-15s %016llx %-13s %016llx\n", + "gpat:", save->g_pat, "dbgctl:", save->dbgctl); + pr_err("%-15s %016llx %-13s %016llx\n", + "br_from:", save->br_from, "br_to:", save->br_to); + pr_err("%-15s %016llx %-13s %016llx\n", + "excp_from:", save->last_excp_from, + "excp_to:", save->last_excp_to); } static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) @@ -3206,8 +3475,6 @@ static int handle_exit(struct kvm_vcpu *vcpu) struct kvm_run *kvm_run = vcpu->run; u32 exit_code = svm->vmcb->control.exit_code; - trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM); - if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE)) vcpu->arch.cr0 = svm->vmcb->save.cr0; if (npt_enabled) @@ -3227,7 +3494,8 @@ static int handle_exit(struct kvm_vcpu *vcpu) svm->vmcb->control.exit_info_1, svm->vmcb->control.exit_info_2, svm->vmcb->control.exit_int_info, - svm->vmcb->control.exit_int_info_err); + svm->vmcb->control.exit_int_info_err, + KVM_ISA_SVM); vmexit = nested_svm_exit_special(svm); @@ -3253,7 +3521,7 @@ static int handle_exit(struct kvm_vcpu *vcpu) exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR && exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH && exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI) - printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x " + printk(KERN_ERR "%s: unexpected exit_int_info 0x%x " "exit_code 0x%x\n", __func__, svm->vmcb->control.exit_int_info, exit_code); @@ -3330,6 +3598,8 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK)) return; + clr_cr_intercept(svm, INTERCEPT_CR8_WRITE); + if (irr == -1) return; @@ -3337,6 +3607,31 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) set_cr_intercept(svm, INTERCEPT_CR8_WRITE); } +static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) +{ + return; +} + +static int svm_vm_has_apicv(struct kvm *kvm) +{ + return 0; +} + +static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) +{ + return; +} + +static void svm_hwapic_isr_update(struct kvm *kvm, int isr) +{ + return; +} + +static void svm_sync_pir_to_irr(struct kvm_vcpu *vcpu) +{ + return; +} + static int svm_nmi_allowed(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -3379,7 +3674,7 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)) return 0; - ret = !!(vmcb->save.rflags & X86_EFLAGS_IF); + ret = !!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF); if (is_guest_mode(vcpu)) return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK); @@ -3417,7 +3712,7 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) */ svm->nmi_singlestep = true; svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); - update_db_intercept(vcpu); + update_db_bp_intercept(vcpu); } static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) @@ -3474,7 +3769,12 @@ static void svm_complete_interrupts(struct vcpu_svm *svm) svm->int3_injected = 0; - if (svm->vcpu.arch.hflags & HF_IRET_MASK) { + /* + * If we've made progress since setting HF_IRET_MASK, we've + * executed an IRET and can allow NMI injection. + */ + if ((svm->vcpu.arch.hflags & HF_IRET_MASK) + && kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip) { svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK); kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); } @@ -3535,12 +3835,6 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu) svm_complete_interrupts(svm); } -#ifdef CONFIG_X86_64 -#define R "r" -#else -#define R "e" -#endif - static void svm_vcpu_run(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -3567,13 +3861,13 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) local_irq_enable(); asm volatile ( - "push %%"R"bp; \n\t" - "mov %c[rbx](%[svm]), %%"R"bx \n\t" - "mov %c[rcx](%[svm]), %%"R"cx \n\t" - "mov %c[rdx](%[svm]), %%"R"dx \n\t" - "mov %c[rsi](%[svm]), %%"R"si \n\t" - "mov %c[rdi](%[svm]), %%"R"di \n\t" - "mov %c[rbp](%[svm]), %%"R"bp \n\t" + "push %%" _ASM_BP "; \n\t" + "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t" + "mov %c[rcx](%[svm]), %%" _ASM_CX " \n\t" + "mov %c[rdx](%[svm]), %%" _ASM_DX " \n\t" + "mov %c[rsi](%[svm]), %%" _ASM_SI " \n\t" + "mov %c[rdi](%[svm]), %%" _ASM_DI " \n\t" + "mov %c[rbp](%[svm]), %%" _ASM_BP " \n\t" #ifdef CONFIG_X86_64 "mov %c[r8](%[svm]), %%r8 \n\t" "mov %c[r9](%[svm]), %%r9 \n\t" @@ -3586,20 +3880,20 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) #endif /* Enter guest mode */ - "push %%"R"ax \n\t" - "mov %c[vmcb](%[svm]), %%"R"ax \n\t" + "push %%" _ASM_AX " \n\t" + "mov %c[vmcb](%[svm]), %%" _ASM_AX " \n\t" __ex(SVM_VMLOAD) "\n\t" __ex(SVM_VMRUN) "\n\t" __ex(SVM_VMSAVE) "\n\t" - "pop %%"R"ax \n\t" + "pop %%" _ASM_AX " \n\t" /* Save guest registers, load host registers */ - "mov %%"R"bx, %c[rbx](%[svm]) \n\t" - "mov %%"R"cx, %c[rcx](%[svm]) \n\t" - "mov %%"R"dx, %c[rdx](%[svm]) \n\t" - "mov %%"R"si, %c[rsi](%[svm]) \n\t" - "mov %%"R"di, %c[rdi](%[svm]) \n\t" - "mov %%"R"bp, %c[rbp](%[svm]) \n\t" + "mov %%" _ASM_BX ", %c[rbx](%[svm]) \n\t" + "mov %%" _ASM_CX ", %c[rcx](%[svm]) \n\t" + "mov %%" _ASM_DX ", %c[rdx](%[svm]) \n\t" + "mov %%" _ASM_SI ", %c[rsi](%[svm]) \n\t" + "mov %%" _ASM_DI ", %c[rdi](%[svm]) \n\t" + "mov %%" _ASM_BP ", %c[rbp](%[svm]) \n\t" #ifdef CONFIG_X86_64 "mov %%r8, %c[r8](%[svm]) \n\t" "mov %%r9, %c[r9](%[svm]) \n\t" @@ -3610,7 +3904,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) "mov %%r14, %c[r14](%[svm]) \n\t" "mov %%r15, %c[r15](%[svm]) \n\t" #endif - "pop %%"R"bp" + "pop %%" _ASM_BP : : [svm]"a"(svm), [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)), @@ -3631,9 +3925,11 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) [r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15])) #endif : "cc", "memory" - , R"bx", R"cx", R"dx", R"si", R"di" #ifdef CONFIG_X86_64 + , "rbx", "rcx", "rdx", "rsi", "rdi" , "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15" +#else + , "ebx", "ecx", "edx", "esi", "edi" #endif ); @@ -3641,19 +3937,32 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) wrmsrl(MSR_GS_BASE, svm->host.gs_base); #else loadsegment(fs, svm->host.fs); +#ifndef CONFIG_X86_32_LAZY_GS + loadsegment(gs, svm->host.gs); +#endif #endif reload_tss(vcpu); local_irq_disable(); - stgi(); - vcpu->arch.cr2 = svm->vmcb->save.cr2; vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax; vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; + trace_kvm_exit(svm->vmcb->control.exit_code, vcpu, KVM_ISA_SVM); + + if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) + kvm_before_handle_nmi(&svm->vcpu); + + stgi(); + + /* Any pending NMI will happen here */ + + if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) + kvm_after_handle_nmi(&svm->vcpu); + sync_cr8_to_lapic(vcpu); svm->next_rip = 0; @@ -3680,8 +3989,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) mark_all_clean(svm->vmcb); } -#undef R - static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) { struct vcpu_svm *svm = to_svm(vcpu); @@ -3773,60 +4080,6 @@ static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) } } -static const struct trace_print_flags svm_exit_reasons_str[] = { - { SVM_EXIT_READ_CR0, "read_cr0" }, - { SVM_EXIT_READ_CR3, "read_cr3" }, - { SVM_EXIT_READ_CR4, "read_cr4" }, - { SVM_EXIT_READ_CR8, "read_cr8" }, - { SVM_EXIT_WRITE_CR0, "write_cr0" }, - { SVM_EXIT_WRITE_CR3, "write_cr3" }, - { SVM_EXIT_WRITE_CR4, "write_cr4" }, - { SVM_EXIT_WRITE_CR8, "write_cr8" }, - { SVM_EXIT_READ_DR0, "read_dr0" }, - { SVM_EXIT_READ_DR1, "read_dr1" }, - { SVM_EXIT_READ_DR2, "read_dr2" }, - { SVM_EXIT_READ_DR3, "read_dr3" }, - { SVM_EXIT_WRITE_DR0, "write_dr0" }, - { SVM_EXIT_WRITE_DR1, "write_dr1" }, - { SVM_EXIT_WRITE_DR2, "write_dr2" }, - { SVM_EXIT_WRITE_DR3, "write_dr3" }, - { SVM_EXIT_WRITE_DR5, "write_dr5" }, - { SVM_EXIT_WRITE_DR7, "write_dr7" }, - { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, - { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, - { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, - { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, - { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, - { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, - { SVM_EXIT_INTR, "interrupt" }, - { SVM_EXIT_NMI, "nmi" }, - { SVM_EXIT_SMI, "smi" }, - { SVM_EXIT_INIT, "init" }, - { SVM_EXIT_VINTR, "vintr" }, - { SVM_EXIT_CPUID, "cpuid" }, - { SVM_EXIT_INVD, "invd" }, - { SVM_EXIT_HLT, "hlt" }, - { SVM_EXIT_INVLPG, "invlpg" }, - { SVM_EXIT_INVLPGA, "invlpga" }, - { SVM_EXIT_IOIO, "io" }, - { SVM_EXIT_MSR, "msr" }, - { SVM_EXIT_TASK_SWITCH, "task_switch" }, - { SVM_EXIT_SHUTDOWN, "shutdown" }, - { SVM_EXIT_VMRUN, "vmrun" }, - { SVM_EXIT_VMMCALL, "hypercall" }, - { SVM_EXIT_VMLOAD, "vmload" }, - { SVM_EXIT_VMSAVE, "vmsave" }, - { SVM_EXIT_STGI, "stgi" }, - { SVM_EXIT_CLGI, "clgi" }, - { SVM_EXIT_SKINIT, "skinit" }, - { SVM_EXIT_WBINVD, "wbinvd" }, - { SVM_EXIT_MONITOR, "monitor" }, - { SVM_EXIT_MWAIT, "mwait" }, - { SVM_EXIT_XSETBV, "xsetbv" }, - { SVM_EXIT_NPF, "npf" }, - { -1, NULL } -}; - static int svm_get_lpage_level(void) { return PT_PDPE_LEVEL; @@ -3837,6 +4090,16 @@ static bool svm_rdtscp_supported(void) return false; } +static bool svm_invpcid_supported(void) +{ + return false; +} + +static bool svm_mpx_supported(void) +{ + return false; +} + static bool svm_has_wbinvd_exit(void) { return true; @@ -3850,6 +4113,191 @@ static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) update_cr0_intercept(svm); } +#define PRE_EX(exit) { .exit_code = (exit), \ + .stage = X86_ICPT_PRE_EXCEPT, } +#define POST_EX(exit) { .exit_code = (exit), \ + .stage = X86_ICPT_POST_EXCEPT, } +#define POST_MEM(exit) { .exit_code = (exit), \ + .stage = X86_ICPT_POST_MEMACCESS, } + +static const struct __x86_intercept { + u32 exit_code; + enum x86_intercept_stage stage; +} x86_intercept_map[] = { + [x86_intercept_cr_read] = POST_EX(SVM_EXIT_READ_CR0), + [x86_intercept_cr_write] = POST_EX(SVM_EXIT_WRITE_CR0), + [x86_intercept_clts] = POST_EX(SVM_EXIT_WRITE_CR0), + [x86_intercept_lmsw] = POST_EX(SVM_EXIT_WRITE_CR0), + [x86_intercept_smsw] = POST_EX(SVM_EXIT_READ_CR0), + [x86_intercept_dr_read] = POST_EX(SVM_EXIT_READ_DR0), + [x86_intercept_dr_write] = POST_EX(SVM_EXIT_WRITE_DR0), + [x86_intercept_sldt] = POST_EX(SVM_EXIT_LDTR_READ), + [x86_intercept_str] = POST_EX(SVM_EXIT_TR_READ), + [x86_intercept_lldt] = POST_EX(SVM_EXIT_LDTR_WRITE), + [x86_intercept_ltr] = POST_EX(SVM_EXIT_TR_WRITE), + [x86_intercept_sgdt] = POST_EX(SVM_EXIT_GDTR_READ), + [x86_intercept_sidt] = POST_EX(SVM_EXIT_IDTR_READ), + [x86_intercept_lgdt] = POST_EX(SVM_EXIT_GDTR_WRITE), + [x86_intercept_lidt] = POST_EX(SVM_EXIT_IDTR_WRITE), + [x86_intercept_vmrun] = POST_EX(SVM_EXIT_VMRUN), + [x86_intercept_vmmcall] = POST_EX(SVM_EXIT_VMMCALL), + [x86_intercept_vmload] = POST_EX(SVM_EXIT_VMLOAD), + [x86_intercept_vmsave] = POST_EX(SVM_EXIT_VMSAVE), + [x86_intercept_stgi] = POST_EX(SVM_EXIT_STGI), + [x86_intercept_clgi] = POST_EX(SVM_EXIT_CLGI), + [x86_intercept_skinit] = POST_EX(SVM_EXIT_SKINIT), + [x86_intercept_invlpga] = POST_EX(SVM_EXIT_INVLPGA), + [x86_intercept_rdtscp] = POST_EX(SVM_EXIT_RDTSCP), + [x86_intercept_monitor] = POST_MEM(SVM_EXIT_MONITOR), + [x86_intercept_mwait] = POST_EX(SVM_EXIT_MWAIT), + [x86_intercept_invlpg] = POST_EX(SVM_EXIT_INVLPG), + [x86_intercept_invd] = POST_EX(SVM_EXIT_INVD), + [x86_intercept_wbinvd] = POST_EX(SVM_EXIT_WBINVD), + [x86_intercept_wrmsr] = POST_EX(SVM_EXIT_MSR), + [x86_intercept_rdtsc] = POST_EX(SVM_EXIT_RDTSC), + [x86_intercept_rdmsr] = POST_EX(SVM_EXIT_MSR), + [x86_intercept_rdpmc] = POST_EX(SVM_EXIT_RDPMC), + [x86_intercept_cpuid] = PRE_EX(SVM_EXIT_CPUID), + [x86_intercept_rsm] = PRE_EX(SVM_EXIT_RSM), + [x86_intercept_pause] = PRE_EX(SVM_EXIT_PAUSE), + [x86_intercept_pushf] = PRE_EX(SVM_EXIT_PUSHF), + [x86_intercept_popf] = PRE_EX(SVM_EXIT_POPF), + [x86_intercept_intn] = PRE_EX(SVM_EXIT_SWINT), + [x86_intercept_iret] = PRE_EX(SVM_EXIT_IRET), + [x86_intercept_icebp] = PRE_EX(SVM_EXIT_ICEBP), + [x86_intercept_hlt] = POST_EX(SVM_EXIT_HLT), + [x86_intercept_in] = POST_EX(SVM_EXIT_IOIO), + [x86_intercept_ins] = POST_EX(SVM_EXIT_IOIO), + [x86_intercept_out] = POST_EX(SVM_EXIT_IOIO), + [x86_intercept_outs] = POST_EX(SVM_EXIT_IOIO), +}; + +#undef PRE_EX +#undef POST_EX +#undef POST_MEM + +static int svm_check_intercept(struct kvm_vcpu *vcpu, + struct x86_instruction_info *info, + enum x86_intercept_stage stage) +{ + struct vcpu_svm *svm = to_svm(vcpu); + int vmexit, ret = X86EMUL_CONTINUE; + struct __x86_intercept icpt_info; + struct vmcb *vmcb = svm->vmcb; + + if (info->intercept >= ARRAY_SIZE(x86_intercept_map)) + goto out; + + icpt_info = x86_intercept_map[info->intercept]; + + if (stage != icpt_info.stage) + goto out; + + switch (icpt_info.exit_code) { + case SVM_EXIT_READ_CR0: + if (info->intercept == x86_intercept_cr_read) + icpt_info.exit_code += info->modrm_reg; + break; + case SVM_EXIT_WRITE_CR0: { + unsigned long cr0, val; + u64 intercept; + + if (info->intercept == x86_intercept_cr_write) + icpt_info.exit_code += info->modrm_reg; + + if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0) + break; + + intercept = svm->nested.intercept; + + if (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0))) + break; + + cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK; + val = info->src_val & ~SVM_CR0_SELECTIVE_MASK; + + if (info->intercept == x86_intercept_lmsw) { + cr0 &= 0xfUL; + val &= 0xfUL; + /* lmsw can't clear PE - catch this here */ + if (cr0 & X86_CR0_PE) + val |= X86_CR0_PE; + } + + if (cr0 ^ val) + icpt_info.exit_code = SVM_EXIT_CR0_SEL_WRITE; + + break; + } + case SVM_EXIT_READ_DR0: + case SVM_EXIT_WRITE_DR0: + icpt_info.exit_code += info->modrm_reg; + break; + case SVM_EXIT_MSR: + if (info->intercept == x86_intercept_wrmsr) + vmcb->control.exit_info_1 = 1; + else + vmcb->control.exit_info_1 = 0; + break; + case SVM_EXIT_PAUSE: + /* + * We get this for NOP only, but pause + * is rep not, check this here + */ + if (info->rep_prefix != REPE_PREFIX) + goto out; + case SVM_EXIT_IOIO: { + u64 exit_info; + u32 bytes; + + exit_info = (vcpu->arch.regs[VCPU_REGS_RDX] & 0xffff) << 16; + + if (info->intercept == x86_intercept_in || + info->intercept == x86_intercept_ins) { + exit_info |= SVM_IOIO_TYPE_MASK; + bytes = info->src_bytes; + } else { + bytes = info->dst_bytes; + } + + if (info->intercept == x86_intercept_outs || + info->intercept == x86_intercept_ins) + exit_info |= SVM_IOIO_STR_MASK; + + if (info->rep_prefix) + exit_info |= SVM_IOIO_REP_MASK; + + bytes = min(bytes, 4u); + + exit_info |= bytes << SVM_IOIO_SIZE_SHIFT; + + exit_info |= (u32)info->ad_bytes << (SVM_IOIO_ASIZE_SHIFT - 1); + + vmcb->control.exit_info_1 = exit_info; + vmcb->control.exit_info_2 = info->next_rip; + + break; + } + default: + break; + } + + vmcb->control.next_rip = info->next_rip; + vmcb->control.exit_code = icpt_info.exit_code; + vmexit = nested_svm_exit_handled(svm); + + ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED + : X86EMUL_CONTINUE; + +out: + return ret; +} + +static void svm_handle_external_intr(struct kvm_vcpu *vcpu) +{ + local_irq_enable(); +} + static struct kvm_x86_ops svm_x86_ops = { .cpu_has_kvm_support = has_svm, .disabled_by_bios = is_disabled, @@ -3868,7 +4316,7 @@ static struct kvm_x86_ops svm_x86_ops = { .vcpu_load = svm_vcpu_load, .vcpu_put = svm_vcpu_put, - .set_guest_debug = svm_guest_debug, + .update_db_bp_intercept = update_db_bp_intercept, .get_msr = svm_get_msr, .set_msr = svm_set_msr, .get_segment_base = svm_get_segment_base, @@ -3887,7 +4335,10 @@ static struct kvm_x86_ops svm_x86_ops = { .set_idt = svm_set_idt, .get_gdt = svm_get_gdt, .set_gdt = svm_set_gdt, + .get_dr6 = svm_get_dr6, + .set_dr6 = svm_set_dr6, .set_dr7 = svm_set_dr7, + .sync_dirty_debug_regs = svm_sync_dirty_debug_regs, .cache_reg = svm_cache_reg, .get_rflags = svm_get_rflags, .set_rflags = svm_set_rflags, @@ -3913,28 +4364,41 @@ static struct kvm_x86_ops svm_x86_ops = { .enable_nmi_window = enable_nmi_window, .enable_irq_window = enable_irq_window, .update_cr8_intercept = update_cr8_intercept, + .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode, + .vm_has_apicv = svm_vm_has_apicv, + .load_eoi_exitmap = svm_load_eoi_exitmap, + .hwapic_isr_update = svm_hwapic_isr_update, + .sync_pir_to_irr = svm_sync_pir_to_irr, .set_tss_addr = svm_set_tss_addr, .get_tdp_level = get_npt_level, .get_mt_mask = svm_get_mt_mask, .get_exit_info = svm_get_exit_info, - .exit_reasons_str = svm_exit_reasons_str, .get_lpage_level = svm_get_lpage_level, .cpuid_update = svm_cpuid_update, .rdtscp_supported = svm_rdtscp_supported, + .invpcid_supported = svm_invpcid_supported, + .mpx_supported = svm_mpx_supported, .set_supported_cpuid = svm_set_supported_cpuid, .has_wbinvd_exit = svm_has_wbinvd_exit, + .set_tsc_khz = svm_set_tsc_khz, + .read_tsc_offset = svm_read_tsc_offset, .write_tsc_offset = svm_write_tsc_offset, .adjust_tsc_offset = svm_adjust_tsc_offset, + .compute_tsc_offset = svm_compute_tsc_offset, + .read_l1_tsc = svm_read_l1_tsc, .set_tdp_cr3 = set_tdp_cr3, + + .check_intercept = svm_check_intercept, + .handle_external_intr = svm_handle_external_intr, }; static int __init svm_init(void) |
