diff options
Diffstat (limited to 'arch/x86/kvm/svm.c')
| -rw-r--r-- | arch/x86/kvm/svm.c | 470 |
1 files changed, 322 insertions, 148 deletions
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 5fa553babe5..b5e994ad013 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -20,8 +20,10 @@ #include "mmu.h" #include "kvm_cache_regs.h" #include "x86.h" +#include "cpuid.h" #include <linux/module.h> +#include <linux/mod_devicetable.h> #include <linux/kernel.h> #include <linux/vmalloc.h> #include <linux/highmem.h> @@ -29,8 +31,10 @@ #include <linux/ftrace_event.h> #include <linux/slab.h> +#include <asm/perf_event.h> #include <asm/tlbflush.h> #include <asm/desc.h> +#include <asm/debugreg.h> #include <asm/kvm_para.h> #include <asm/virtext.h> @@ -41,6 +45,12 @@ MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); +static const struct x86_cpu_id svm_cpu_id[] = { + X86_FEATURE_MATCH(X86_FEATURE_SVM), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id); + #define IOPM_ALLOC_ORDER 2 #define MSRPM_ALLOC_ORDER 1 @@ -110,6 +120,12 @@ struct nested_state { #define MSRPM_OFFSETS 16 static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; +/* + * Set osvw_len to higher value when updated Revision Guides + * are published and we know what the new status bits are + */ +static uint64_t osvw_len = 4, osvw_status; + struct vcpu_svm { struct kvm_vcpu vcpu; struct vmcb *vmcb; @@ -149,7 +165,7 @@ static DEFINE_PER_CPU(u64, current_tsc_ratio); #define MSR_INVALID 0xffffffffU -static struct svm_direct_access_msrs { +static const struct svm_direct_access_msrs { u32 index; /* Index of the MSR */ bool always; /* True if intercept is always on */ } direct_access_msrs[] = { @@ -176,11 +192,13 @@ static bool npt_enabled = true; #else static bool npt_enabled; #endif -static int npt = 1; +/* allow nested paging (virtualized MMU) for all guests */ +static int npt = true; module_param(npt, int, S_IRUGO); -static int nested = 1; +/* allow nested virtualization in KVM/SVM */ +static int nested = true; module_param(nested, int, S_IRUGO); static void svm_flush_tlb(struct kvm_vcpu *vcpu); @@ -286,20 +304,35 @@ static inline bool is_cr_intercept(struct vcpu_svm *svm, int bit) return vmcb->control.intercept_cr & (1U << bit); } -static inline void set_dr_intercept(struct vcpu_svm *svm, int bit) +static inline void set_dr_intercepts(struct vcpu_svm *svm) { struct vmcb *vmcb = get_host_vmcb(svm); - vmcb->control.intercept_dr |= (1U << bit); + vmcb->control.intercept_dr = (1 << INTERCEPT_DR0_READ) + | (1 << INTERCEPT_DR1_READ) + | (1 << INTERCEPT_DR2_READ) + | (1 << INTERCEPT_DR3_READ) + | (1 << INTERCEPT_DR4_READ) + | (1 << INTERCEPT_DR5_READ) + | (1 << INTERCEPT_DR6_READ) + | (1 << INTERCEPT_DR7_READ) + | (1 << INTERCEPT_DR0_WRITE) + | (1 << INTERCEPT_DR1_WRITE) + | (1 << INTERCEPT_DR2_WRITE) + | (1 << INTERCEPT_DR3_WRITE) + | (1 << INTERCEPT_DR4_WRITE) + | (1 << INTERCEPT_DR5_WRITE) + | (1 << INTERCEPT_DR6_WRITE) + | (1 << INTERCEPT_DR7_WRITE); recalc_intercepts(svm); } -static inline void clr_dr_intercept(struct vcpu_svm *svm, int bit) +static inline void clr_dr_intercepts(struct vcpu_svm *svm) { struct vmcb *vmcb = get_host_vmcb(svm); - vmcb->control.intercept_dr &= ~(1U << bit); + vmcb->control.intercept_dr = 0; recalc_intercepts(svm); } @@ -384,7 +417,7 @@ struct svm_init_data { int r; }; -static u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000}; +static const u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000}; #define NUM_MSR_MAPS ARRAY_SIZE(msrpm_ranges) #define MSRS_RANGE_SIZE 2048 @@ -538,7 +571,7 @@ static void svm_init_erratum_383(void) int err; u64 val; - if (!cpu_has_amd_erratum(amd_erratum_383)) + if (!static_cpu_has_bug(X86_BUG_AMD_TLB_MMATCH)) return; /* Use _safe variants to not break nested virtualization */ @@ -556,6 +589,27 @@ static void svm_init_erratum_383(void) erratum_383_found = true; } +static void svm_init_osvw(struct kvm_vcpu *vcpu) +{ + /* + * Guests should see errata 400 and 415 as fixed (assuming that + * HLT and IO instructions are intercepted). + */ + vcpu->arch.osvw.length = (osvw_len >= 3) ? (osvw_len) : 3; + vcpu->arch.osvw.status = osvw_status & ~(6ULL); + + /* + * By increasing VCPU's osvw.length to 3 we are telling the guest that + * all osvw.status bits inside that length, including bit 0 (which is + * reserved for erratum 298), are valid. However, if host processor's + * osvw_len is 0 then osvw_status[0] carries no information. We need to + * be conservative here and therefore we tell the guest that erratum 298 + * is present (because we really don't know). + */ + if (osvw_len == 0 && boot_cpu_data.x86 == 0x10) + vcpu->arch.osvw.status |= 1; +} + static int has_svm(void) { const char *msg; @@ -575,6 +629,8 @@ static void svm_hardware_disable(void *garbage) wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT); cpu_svm_disable(); + + amd_pmu_disable_virt(); } static int svm_hardware_enable(void *garbage) @@ -591,15 +647,12 @@ static int svm_hardware_enable(void *garbage) return -EBUSY; if (!has_svm()) { - printk(KERN_ERR "svm_hardware_enable: err EOPNOTSUPP on %d\n", - me); + pr_err("%s: err EOPNOTSUPP on %d\n", __func__, me); return -EINVAL; } sd = per_cpu(svm_data, me); - if (!sd) { - printk(KERN_ERR "svm_hardware_enable: svm_data is NULL on %d\n", - me); + pr_err("%s: svm_data is NULL on %d\n", __func__, me); return -EINVAL; } @@ -620,8 +673,40 @@ static int svm_hardware_enable(void *garbage) __get_cpu_var(current_tsc_ratio) = TSC_RATIO_DEFAULT; } + + /* + * Get OSVW bits. + * + * Note that it is possible to have a system with mixed processor + * revisions and therefore different OSVW bits. If bits are not the same + * on different processors then choose the worst case (i.e. if erratum + * is present on one processor and not on another then assume that the + * erratum is present everywhere). + */ + if (cpu_has(&boot_cpu_data, X86_FEATURE_OSVW)) { + uint64_t len, status = 0; + int err; + + len = native_read_msr_safe(MSR_AMD64_OSVW_ID_LENGTH, &err); + if (!err) + status = native_read_msr_safe(MSR_AMD64_OSVW_STATUS, + &err); + + if (err) + osvw_status = osvw_len = 0; + else { + if (len < osvw_len) + osvw_len = len; + osvw_status |= status; + osvw_status &= (1ULL << osvw_len) - 1; + } + } else + osvw_status = osvw_len = 0; + svm_init_erratum_383(); + amd_pmu_enable_virt(); + return 0; } @@ -905,20 +990,25 @@ static u64 svm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc) return _tsc; } -static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) +static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale) { struct vcpu_svm *svm = to_svm(vcpu); u64 ratio; u64 khz; - /* TSC scaling supported? */ - if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) + /* Guest TSC same frequency as host TSC? */ + if (!scale) { + svm->tsc_ratio = TSC_RATIO_DEFAULT; return; + } - /* TSC-Scaling disabled or guest TSC same frequency as host TSC? */ - if (user_tsc_khz == 0) { - vcpu->arch.virtual_tsc_khz = 0; - svm->tsc_ratio = TSC_RATIO_DEFAULT; + /* TSC scaling supported? */ + if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) { + if (user_tsc_khz > tsc_khz) { + vcpu->arch.tsc_catchup = 1; + vcpu->arch.tsc_always_catchup = 1; + } else + WARN(1, "user requested TSC rate below hardware speed\n"); return; } @@ -933,10 +1023,16 @@ static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) user_tsc_khz); return; } - vcpu->arch.virtual_tsc_khz = user_tsc_khz; svm->tsc_ratio = ratio; } +static u64 svm_read_tsc_offset(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + return svm->vmcb->control.tsc_offset; +} + static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) { struct vcpu_svm *svm = to_svm(vcpu); @@ -946,20 +1042,32 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) g_tsc_offset = svm->vmcb->control.tsc_offset - svm->nested.hsave->control.tsc_offset; svm->nested.hsave->control.tsc_offset = offset; - } + } else + trace_kvm_write_tsc_offset(vcpu->vcpu_id, + svm->vmcb->control.tsc_offset, + offset); svm->vmcb->control.tsc_offset = offset + g_tsc_offset; mark_dirty(svm->vmcb, VMCB_INTERCEPTS); } -static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment) +static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool host) { struct vcpu_svm *svm = to_svm(vcpu); + WARN_ON(adjustment < 0); + if (host) + adjustment = svm_scale_tsc(vcpu, adjustment); + svm->vmcb->control.tsc_offset += adjustment; if (is_guest_mode(vcpu)) svm->nested.hsave->control.tsc_offset += adjustment; + else + trace_kvm_write_tsc_offset(vcpu->vcpu_id, + svm->vmcb->control.tsc_offset - adjustment, + svm->vmcb->control.tsc_offset); + mark_dirty(svm->vmcb, VMCB_INTERCEPTS); } @@ -988,23 +1096,7 @@ static void init_vmcb(struct vcpu_svm *svm) set_cr_intercept(svm, INTERCEPT_CR4_WRITE); set_cr_intercept(svm, INTERCEPT_CR8_WRITE); - set_dr_intercept(svm, INTERCEPT_DR0_READ); - set_dr_intercept(svm, INTERCEPT_DR1_READ); - set_dr_intercept(svm, INTERCEPT_DR2_READ); - set_dr_intercept(svm, INTERCEPT_DR3_READ); - set_dr_intercept(svm, INTERCEPT_DR4_READ); - set_dr_intercept(svm, INTERCEPT_DR5_READ); - set_dr_intercept(svm, INTERCEPT_DR6_READ); - set_dr_intercept(svm, INTERCEPT_DR7_READ); - - set_dr_intercept(svm, INTERCEPT_DR0_WRITE); - set_dr_intercept(svm, INTERCEPT_DR1_WRITE); - set_dr_intercept(svm, INTERCEPT_DR2_WRITE); - set_dr_intercept(svm, INTERCEPT_DR3_WRITE); - set_dr_intercept(svm, INTERCEPT_DR4_WRITE); - set_dr_intercept(svm, INTERCEPT_DR5_WRITE); - set_dr_intercept(svm, INTERCEPT_DR6_WRITE); - set_dr_intercept(svm, INTERCEPT_DR7_WRITE); + set_dr_intercepts(svm); set_exception_intercept(svm, PF_VECTOR); set_exception_intercept(svm, UD_VECTOR); @@ -1047,17 +1139,11 @@ static void init_vmcb(struct vcpu_svm *svm) init_seg(&save->gs); save->cs.selector = 0xf000; + save->cs.base = 0xffff0000; /* Executable/Readable Code Segment */ save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK; save->cs.limit = 0xffff; - /* - * cs.base should really be 0xffff0000, but vmx can't handle that, so - * be consistent with it. - * - * Replace when we have real mode working for vmx. - */ - save->cs.base = 0xf0000; save->gdtr.limit = 0xffff; save->idtr.limit = 0xffff; @@ -1067,7 +1153,6 @@ static void init_vmcb(struct vcpu_svm *svm) svm_set_efer(&svm->vcpu, 0); save->dr6 = 0xffff0ff0; - save->dr7 = 0x400; kvm_set_rflags(&svm->vcpu, 2); save->rip = 0x0000fff0; svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; @@ -1108,21 +1193,16 @@ static void init_vmcb(struct vcpu_svm *svm) enable_gif(svm); } -static int svm_vcpu_reset(struct kvm_vcpu *vcpu) +static void svm_vcpu_reset(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + u32 dummy; + u32 eax = 1; init_vmcb(svm); - if (!kvm_vcpu_is_bsp(vcpu)) { - kvm_rip_write(vcpu, 0); - svm->vmcb->save.cs.base = svm->vcpu.arch.sipi_vector << 12; - svm->vmcb->save.cs.selector = svm->vcpu.arch.sipi_vector << 8; - } - vcpu->arch.regs_avail = ~0; - vcpu->arch.regs_dirty = ~0; - - return 0; + kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy); + kvm_register_write(vcpu, VCPU_REGS_RDX, eax); } static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) @@ -1176,20 +1256,15 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; svm->asid_generation = 0; init_vmcb(svm); - kvm_write_tsc(&svm->vcpu, 0); - - err = fx_init(&svm->vcpu); - if (err) - goto free_page4; svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; if (kvm_vcpu_is_bsp(&svm->vcpu)) svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; + svm_init_osvw(&svm->vcpu); + return &svm->vcpu; -free_page4: - __free_page(hsave_page); free_page3: __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER); free_page2: @@ -1270,6 +1345,11 @@ static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) { + /* + * Any change of EFLAGS.VM is accompained by a reload of SS + * (caused by either a task switch or an inter-privilege IRET), + * so we do not need to update the CPL here. + */ to_svm(vcpu)->vmcb->save.rflags = rflags; } @@ -1382,6 +1462,7 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, */ if (var->unusable) var->db = 0; + var->dpl = to_svm(vcpu)->vmcb->save.cpl; break; } } @@ -1537,15 +1618,20 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT; s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT; } - if (seg == VCPU_SREG_CS) - svm->vmcb->save.cpl - = (svm->vmcb->save.cs.attrib - >> SVM_SELECTOR_DPL_SHIFT) & 3; + + /* + * This is always accurate, except if SYSRET returned to a segment + * with SS.DPL != 3. Intel does not have this quirk, and always + * forces SS.DPL to 3 on sysret, so we ignore that case; fixing it + * would entail passing the CPL to userspace and back. + */ + if (seg == VCPU_SREG_SS) + svm->vmcb->save.cpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3; mark_dirty(svm->vmcb, VMCB_SEG); } -static void update_db_intercept(struct kvm_vcpu *vcpu) +static void update_db_bp_intercept(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -1565,20 +1651,6 @@ static void update_db_intercept(struct kvm_vcpu *vcpu) vcpu->guest_debug = 0; } -static void svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) -{ - struct vcpu_svm *svm = to_svm(vcpu); - - if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) - svm->vmcb->save.dr7 = dbg->arch.debugreg[7]; - else - svm->vmcb->save.dr7 = vcpu->arch.dr7; - - mark_dirty(svm->vmcb, VMCB_DR); - - update_db_intercept(vcpu); -} - static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) { if (sd->next_asid > sd->max_asid) { @@ -1593,6 +1665,34 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) mark_dirty(svm->vmcb, VMCB_ASID); } +static u64 svm_get_dr6(struct kvm_vcpu *vcpu) +{ + return to_svm(vcpu)->vmcb->save.dr6; +} + +static void svm_set_dr6(struct kvm_vcpu *vcpu, unsigned long value) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + svm->vmcb->save.dr6 = value; + mark_dirty(svm->vmcb, VMCB_DR); +} + +static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + get_debugreg(vcpu->arch.db[0], 0); + get_debugreg(vcpu->arch.db[1], 1); + get_debugreg(vcpu->arch.db[2], 2); + get_debugreg(vcpu->arch.db[3], 3); + vcpu->arch.dr6 = svm_get_dr6(vcpu); + vcpu->arch.dr7 = svm->vmcb->save.dr7; + + vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT; + set_dr_intercepts(svm); +} + static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) { struct vcpu_svm *svm = to_svm(vcpu); @@ -1650,7 +1750,7 @@ static int db_interception(struct vcpu_svm *svm) if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) svm->vmcb->save.rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); - update_db_intercept(&svm->vcpu); + update_db_bp_intercept(&svm->vcpu); } if (svm->vcpu.guest_debug & @@ -1881,11 +1981,9 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu, nested_svm_vmexit(svm); } -static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) +static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) { - int r; - - r = kvm_init_shadow_mmu(vcpu, &vcpu->arch.mmu); + kvm_init_shadow_mmu(vcpu, &vcpu->arch.mmu); vcpu->arch.mmu.set_cr3 = nested_svm_set_tdp_cr3; vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3; @@ -1893,8 +1991,6 @@ static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit; vcpu->arch.mmu.shadow_root_level = get_npt_level(); vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; - - return r; } static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu) @@ -1965,7 +2061,7 @@ static inline bool nested_svm_intr(struct vcpu_svm *svm) if (svm->nested.intercept & 1ULL) { /* * The #vmexit can't be emulated here directly because this - * code path runs with irqs and preemtion disabled. A + * code path runs with irqs and preemption disabled. A * #vmexit emulation might sleep. Only signal request for * the #vmexit here. */ @@ -2007,7 +2103,6 @@ static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page) return kmap(page); error: - kvm_release_page_clean(page); kvm_inject_gp(&svm->vcpu, 0); return NULL; @@ -2311,7 +2406,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) { /* * This function merges the msr permission bitmaps of kvm and the - * nested vmcb. It is omptimized in that it only merges the parts where + * nested vmcb. It is optimized in that it only merges the parts where * the kvm msr permission bitmap may contain zero bits */ int i; @@ -2669,12 +2764,6 @@ static int xsetbv_interception(struct vcpu_svm *svm) return 1; } -static int invalid_op_interception(struct vcpu_svm *svm) -{ - kvm_queue_exception(&svm->vcpu, UD_VECTOR); - return 1; -} - static int task_switch_interception(struct vcpu_svm *svm) { u16 tss_selector; @@ -2730,7 +2819,10 @@ static int task_switch_interception(struct vcpu_svm *svm) (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) skip_emulated_instruction(&svm->vcpu); - if (kvm_task_switch(&svm->vcpu, tss_selector, reason, + if (int_type != SVM_EXITINTINFO_TYPE_SOFT) + int_vec = -1; + + if (kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason, has_error_code, error_code) == EMULATE_FAIL) { svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR; svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; @@ -2753,6 +2845,7 @@ static int iret_interception(struct vcpu_svm *svm) clr_intercept(svm, INTERCEPT_IRET); svm->vcpu.arch.hflags |= HF_IRET_MASK; svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu); + kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); return 1; } @@ -2885,6 +2978,17 @@ static int dr_interception(struct vcpu_svm *svm) unsigned long val; int err; + if (svm->vcpu.guest_debug == 0) { + /* + * No more DR vmexits; force a reload of the debug registers + * and reenter on this instruction. The next vmexit will + * retrieve the full state of the debug registers. + */ + clr_dr_intercepts(svm); + svm->vcpu.arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT; + return 1; + } + if (!boot_cpu_has(X86_FEATURE_DECODEASSISTS)) return emulate_on_interception(svm); @@ -2913,21 +3017,19 @@ static int cr8_write_interception(struct vcpu_svm *svm) u8 cr8_prev = kvm_get_cr8(&svm->vcpu); /* instruction emulation calls kvm_set_cr8() */ r = cr_interception(svm); - if (irqchip_in_kernel(svm->vcpu.kvm)) { - clr_cr_intercept(svm, INTERCEPT_CR8_WRITE); + if (irqchip_in_kernel(svm->vcpu.kvm)) return r; - } if (cr8_prev <= kvm_get_cr8(&svm->vcpu)) return r; kvm_run->exit_reason = KVM_EXIT_SET_TPR; return 0; } -u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu) +u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) { struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu)); return vmcb->control.tsc_offset + - svm_scale_tsc(vcpu, native_read_tsc()); + svm_scale_tsc(vcpu, host_tsc); } static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) @@ -3046,13 +3148,15 @@ static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data) return 0; } -static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) +static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) { struct vcpu_svm *svm = to_svm(vcpu); + u32 ecx = msr->index; + u64 data = msr->data; switch (ecx) { case MSR_IA32_TSC: - kvm_write_tsc(vcpu, data); + kvm_write_tsc(vcpu, msr); break; case MSR_STAR: svm->vmcb->save.star = data; @@ -3084,8 +3188,8 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) break; case MSR_IA32_DEBUGCTLMSR: if (!boot_cpu_has(X86_FEATURE_LBRV)) { - pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n", - __func__, data); + vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n", + __func__, data); break; } if (data & DEBUGCTL_RESERVED_BITS) @@ -3104,23 +3208,27 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) case MSR_VM_CR: return svm_set_vm_cr(vcpu, data); case MSR_VM_IGNNE: - pr_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); + vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); break; default: - return kvm_set_msr_common(vcpu, ecx, data); + return kvm_set_msr_common(vcpu, msr); } return 0; } static int wrmsr_interception(struct vcpu_svm *svm) { + struct msr_data msr; u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u) | ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32); + msr.data = data; + msr.index = ecx; + msr.host_initiated = false; svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; - if (svm_set_msr(&svm->vcpu, ecx, data)) { + if (svm_set_msr(&svm->vcpu, &msr)) { trace_kvm_msr_write_ex(ecx, data); kvm_inject_gp(&svm->vcpu, 0); } else { @@ -3146,6 +3254,7 @@ static int interrupt_window_interception(struct vcpu_svm *svm) svm_clear_vintr(svm); svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; mark_dirty(svm->vmcb, VMCB_INTR); + ++svm->vcpu.stat.irq_window_exits; /* * If the user space waits to inject interrupts, exit as soon as * possible @@ -3153,7 +3262,6 @@ static int interrupt_window_interception(struct vcpu_svm *svm) if (!irqchip_in_kernel(svm->vcpu.kvm) && kvm_run->request_interrupt_window && !kvm_cpu_has_interrupt(&svm->vcpu)) { - ++svm->vcpu.stat.irq_window_exits; kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; return 0; } @@ -3167,7 +3275,25 @@ static int pause_interception(struct vcpu_svm *svm) return 1; } -static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { +static int nop_interception(struct vcpu_svm *svm) +{ + skip_emulated_instruction(&(svm->vcpu)); + return 1; +} + +static int monitor_interception(struct vcpu_svm *svm) +{ + printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n"); + return nop_interception(svm); +} + +static int mwait_interception(struct vcpu_svm *svm) +{ + printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n"); + return nop_interception(svm); +} + +static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_READ_CR0] = cr_interception, [SVM_EXIT_READ_CR3] = cr_interception, [SVM_EXIT_READ_CR4] = cr_interception, @@ -3224,8 +3350,8 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_CLGI] = clgi_interception, [SVM_EXIT_SKINIT] = skinit_interception, [SVM_EXIT_WBINVD] = emulate_on_interception, - [SVM_EXIT_MONITOR] = invalid_op_interception, - [SVM_EXIT_MWAIT] = invalid_op_interception, + [SVM_EXIT_MONITOR] = monitor_interception, + [SVM_EXIT_MWAIT] = mwait_interception, [SVM_EXIT_XSETBV] = xsetbv_interception, [SVM_EXIT_NPF] = pf_interception, }; @@ -3395,7 +3521,7 @@ static int handle_exit(struct kvm_vcpu *vcpu) exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR && exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH && exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI) - printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x " + printk(KERN_ERR "%s: unexpected exit_int_info 0x%x " "exit_code 0x%x\n", __func__, svm->vmcb->control.exit_int_info, exit_code); @@ -3472,6 +3598,8 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK)) return; + clr_cr_intercept(svm, INTERCEPT_CR8_WRITE); + if (irr == -1) return; @@ -3479,6 +3607,31 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) set_cr_intercept(svm, INTERCEPT_CR8_WRITE); } +static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) +{ + return; +} + +static int svm_vm_has_apicv(struct kvm *kvm) +{ + return 0; +} + +static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) +{ + return; +} + +static void svm_hwapic_isr_update(struct kvm *kvm, int isr) +{ + return; +} + +static void svm_sync_pir_to_irr(struct kvm_vcpu *vcpu) +{ + return; +} + static int svm_nmi_allowed(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -3559,7 +3712,7 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) */ svm->nmi_singlestep = true; svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); - update_db_intercept(vcpu); + update_db_bp_intercept(vcpu); } static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) @@ -3682,12 +3835,6 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu) svm_complete_interrupts(svm); } -#ifdef CONFIG_X86_64 -#define R "r" -#else -#define R "e" -#endif - static void svm_vcpu_run(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -3714,13 +3861,13 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) local_irq_enable(); asm volatile ( - "push %%"R"bp; \n\t" - "mov %c[rbx](%[svm]), %%"R"bx \n\t" - "mov %c[rcx](%[svm]), %%"R"cx \n\t" - "mov %c[rdx](%[svm]), %%"R"dx \n\t" - "mov %c[rsi](%[svm]), %%"R"si \n\t" - "mov %c[rdi](%[svm]), %%"R"di \n\t" - "mov %c[rbp](%[svm]), %%"R"bp \n\t" + "push %%" _ASM_BP "; \n\t" + "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t" + "mov %c[rcx](%[svm]), %%" _ASM_CX " \n\t" + "mov %c[rdx](%[svm]), %%" _ASM_DX " \n\t" + "mov %c[rsi](%[svm]), %%" _ASM_SI " \n\t" + "mov %c[rdi](%[svm]), %%" _ASM_DI " \n\t" + "mov %c[rbp](%[svm]), %%" _ASM_BP " \n\t" #ifdef CONFIG_X86_64 "mov %c[r8](%[svm]), %%r8 \n\t" "mov %c[r9](%[svm]), %%r9 \n\t" @@ -3733,20 +3880,20 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) #endif /* Enter guest mode */ - "push %%"R"ax \n\t" - "mov %c[vmcb](%[svm]), %%"R"ax \n\t" + "push %%" _ASM_AX " \n\t" + "mov %c[vmcb](%[svm]), %%" _ASM_AX " \n\t" __ex(SVM_VMLOAD) "\n\t" __ex(SVM_VMRUN) "\n\t" __ex(SVM_VMSAVE) "\n\t" - "pop %%"R"ax \n\t" + "pop %%" _ASM_AX " \n\t" /* Save guest registers, load host registers */ - "mov %%"R"bx, %c[rbx](%[svm]) \n\t" - "mov %%"R"cx, %c[rcx](%[svm]) \n\t" - "mov %%"R"dx, %c[rdx](%[svm]) \n\t" - "mov %%"R"si, %c[rsi](%[svm]) \n\t" - "mov %%"R"di, %c[rdi](%[svm]) \n\t" - "mov %%"R"bp, %c[rbp](%[svm]) \n\t" + "mov %%" _ASM_BX ", %c[rbx](%[svm]) \n\t" + "mov %%" _ASM_CX ", %c[rcx](%[svm]) \n\t" + "mov %%" _ASM_DX ", %c[rdx](%[svm]) \n\t" + "mov %%" _ASM_SI ", %c[rsi](%[svm]) \n\t" + "mov %%" _ASM_DI ", %c[rdi](%[svm]) \n\t" + "mov %%" _ASM_BP ", %c[rbp](%[svm]) \n\t" #ifdef CONFIG_X86_64 "mov %%r8, %c[r8](%[svm]) \n\t" "mov %%r9, %c[r9](%[svm]) \n\t" @@ -3757,7 +3904,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) "mov %%r14, %c[r14](%[svm]) \n\t" "mov %%r15, %c[r15](%[svm]) \n\t" #endif - "pop %%"R"bp" + "pop %%" _ASM_BP : : [svm]"a"(svm), [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)), @@ -3778,9 +3925,11 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) [r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15])) #endif : "cc", "memory" - , R"bx", R"cx", R"dx", R"si", R"di" #ifdef CONFIG_X86_64 + , "rbx", "rcx", "rdx", "rsi", "rdi" , "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15" +#else + , "ebx", "ecx", "edx", "esi", "edi" #endif ); @@ -3840,8 +3989,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) mark_all_clean(svm->vmcb); } -#undef R - static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) { struct vcpu_svm *svm = to_svm(vcpu); @@ -3943,6 +4090,16 @@ static bool svm_rdtscp_supported(void) return false; } +static bool svm_invpcid_supported(void) +{ + return false; +} + +static bool svm_mpx_supported(void) +{ + return false; +} + static bool svm_has_wbinvd_exit(void) { return true; @@ -3963,7 +4120,7 @@ static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) #define POST_MEM(exit) { .exit_code = (exit), \ .stage = X86_ICPT_POST_MEMACCESS, } -static struct __x86_intercept { +static const struct __x86_intercept { u32 exit_code; enum x86_intercept_stage stage; } x86_intercept_map[] = { @@ -4136,6 +4293,11 @@ out: return ret; } +static void svm_handle_external_intr(struct kvm_vcpu *vcpu) +{ + local_irq_enable(); +} + static struct kvm_x86_ops svm_x86_ops = { .cpu_has_kvm_support = has_svm, .disabled_by_bios = is_disabled, @@ -4154,7 +4316,7 @@ static struct kvm_x86_ops svm_x86_ops = { .vcpu_load = svm_vcpu_load, .vcpu_put = svm_vcpu_put, - .set_guest_debug = svm_guest_debug, + .update_db_bp_intercept = update_db_bp_intercept, .get_msr = svm_get_msr, .set_msr = svm_set_msr, .get_segment_base = svm_get_segment_base, @@ -4173,7 +4335,10 @@ static struct kvm_x86_ops svm_x86_ops = { .set_idt = svm_set_idt, .get_gdt = svm_get_gdt, .set_gdt = svm_set_gdt, + .get_dr6 = svm_get_dr6, + .set_dr6 = svm_set_dr6, .set_dr7 = svm_set_dr7, + .sync_dirty_debug_regs = svm_sync_dirty_debug_regs, .cache_reg = svm_cache_reg, .get_rflags = svm_get_rflags, .set_rflags = svm_set_rflags, @@ -4199,6 +4364,11 @@ static struct kvm_x86_ops svm_x86_ops = { .enable_nmi_window = enable_nmi_window, .enable_irq_window = enable_irq_window, .update_cr8_intercept = update_cr8_intercept, + .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode, + .vm_has_apicv = svm_vm_has_apicv, + .load_eoi_exitmap = svm_load_eoi_exitmap, + .hwapic_isr_update = svm_hwapic_isr_update, + .sync_pir_to_irr = svm_sync_pir_to_irr, .set_tss_addr = svm_set_tss_addr, .get_tdp_level = get_npt_level, @@ -4211,12 +4381,15 @@ static struct kvm_x86_ops svm_x86_ops = { .cpuid_update = svm_cpuid_update, .rdtscp_supported = svm_rdtscp_supported, + .invpcid_supported = svm_invpcid_supported, + .mpx_supported = svm_mpx_supported, .set_supported_cpuid = svm_set_supported_cpuid, .has_wbinvd_exit = svm_has_wbinvd_exit, .set_tsc_khz = svm_set_tsc_khz, + .read_tsc_offset = svm_read_tsc_offset, .write_tsc_offset = svm_write_tsc_offset, .adjust_tsc_offset = svm_adjust_tsc_offset, .compute_tsc_offset = svm_compute_tsc_offset, @@ -4225,6 +4398,7 @@ static struct kvm_x86_ops svm_x86_ops = { .set_tdp_cr3 = set_tdp_cr3, .check_intercept = svm_check_intercept, + .handle_external_intr = svm_handle_external_intr, }; static int __init svm_init(void) |
