aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/kvm/svm.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-03-28 14:35:31 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2012-03-28 14:35:31 -0700
commit2e7580b0e75d771d93e24e681031a165b1d31071 (patch)
treed9449702609eeaab28913a43b5a4434667e09d43 /arch/x86/kvm/svm.c
parentd25413efa9536e2f425ea45c7720598035c597bc (diff)
parentcf9eeac46350b8b43730b7dc5e999757bed089a4 (diff)
Merge branch 'kvm-updates/3.4' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Avi Kivity: "Changes include timekeeping improvements, support for assigning host PCI devices that share interrupt lines, s390 user-controlled guests, a large ppc update, and random fixes." This is with the sign-off's fixed, hopefully next merge window we won't have rebased commits. * 'kvm-updates/3.4' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (130 commits) KVM: Convert intx_mask_lock to spin lock KVM: x86: fix kvm_write_tsc() TSC matching thinko x86: kvmclock: abstract save/restore sched_clock_state KVM: nVMX: Fix erroneous exception bitmap check KVM: Ignore the writes to MSR_K7_HWCR(3) KVM: MMU: make use of ->root_level in reset_rsvds_bits_mask KVM: PMU: add proper support for fixed counter 2 KVM: PMU: Fix raw event check KVM: PMU: warn when pin control is set in eventsel msr KVM: VMX: Fix delayed load of shared MSRs KVM: use correct tlbs dirty type in cmpxchg KVM: Allow host IRQ sharing for assigned PCI 2.3 devices KVM: Ensure all vcpus are consistent with in-kernel irqchip settings KVM: x86 emulator: Allow PM/VM86 switch during task switch KVM: SVM: Fix CPL updates KVM: x86 emulator: VM86 segments must have DPL 3 KVM: x86 emulator: Fix task switch privilege checks arch/powerpc/kvm/book3s_hv.c: included linux/sched.h twice KVM: x86 emulator: correctly mask pmc index bits in RDPMC instruction emulation KVM: mmu_notifier: Flush TLBs before releasing mmu_lock ...
Diffstat (limited to 'arch/x86/kvm/svm.c')
-rw-r--r--arch/x86/kvm/svm.c119
1 files changed, 104 insertions, 15 deletions
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index e385214711c..e334389e1c7 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -111,6 +111,12 @@ struct nested_state {
#define MSRPM_OFFSETS 16
static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
+/*
+ * Set osvw_len to higher value when updated Revision Guides
+ * are published and we know what the new status bits are
+ */
+static uint64_t osvw_len = 4, osvw_status;
+
struct vcpu_svm {
struct kvm_vcpu vcpu;
struct vmcb *vmcb;
@@ -177,11 +183,13 @@ static bool npt_enabled = true;
#else
static bool npt_enabled;
#endif
-static int npt = 1;
+/* allow nested paging (virtualized MMU) for all guests */
+static int npt = true;
module_param(npt, int, S_IRUGO);
-static int nested = 1;
+/* allow nested virtualization in KVM/SVM */
+static int nested = true;
module_param(nested, int, S_IRUGO);
static void svm_flush_tlb(struct kvm_vcpu *vcpu);
@@ -557,6 +565,27 @@ static void svm_init_erratum_383(void)
erratum_383_found = true;
}
+static void svm_init_osvw(struct kvm_vcpu *vcpu)
+{
+ /*
+ * Guests should see errata 400 and 415 as fixed (assuming that
+ * HLT and IO instructions are intercepted).
+ */
+ vcpu->arch.osvw.length = (osvw_len >= 3) ? (osvw_len) : 3;
+ vcpu->arch.osvw.status = osvw_status & ~(6ULL);
+
+ /*
+ * By increasing VCPU's osvw.length to 3 we are telling the guest that
+ * all osvw.status bits inside that length, including bit 0 (which is
+ * reserved for erratum 298), are valid. However, if host processor's
+ * osvw_len is 0 then osvw_status[0] carries no information. We need to
+ * be conservative here and therefore we tell the guest that erratum 298
+ * is present (because we really don't know).
+ */
+ if (osvw_len == 0 && boot_cpu_data.x86 == 0x10)
+ vcpu->arch.osvw.status |= 1;
+}
+
static int has_svm(void)
{
const char *msg;
@@ -623,6 +652,36 @@ static int svm_hardware_enable(void *garbage)
__get_cpu_var(current_tsc_ratio) = TSC_RATIO_DEFAULT;
}
+
+ /*
+ * Get OSVW bits.
+ *
+ * Note that it is possible to have a system with mixed processor
+ * revisions and therefore different OSVW bits. If bits are not the same
+ * on different processors then choose the worst case (i.e. if erratum
+ * is present on one processor and not on another then assume that the
+ * erratum is present everywhere).
+ */
+ if (cpu_has(&boot_cpu_data, X86_FEATURE_OSVW)) {
+ uint64_t len, status = 0;
+ int err;
+
+ len = native_read_msr_safe(MSR_AMD64_OSVW_ID_LENGTH, &err);
+ if (!err)
+ status = native_read_msr_safe(MSR_AMD64_OSVW_STATUS,
+ &err);
+
+ if (err)
+ osvw_status = osvw_len = 0;
+ else {
+ if (len < osvw_len)
+ osvw_len = len;
+ osvw_status |= status;
+ osvw_status &= (1ULL << osvw_len) - 1;
+ }
+ } else
+ osvw_status = osvw_len = 0;
+
svm_init_erratum_383();
amd_pmu_enable_virt();
@@ -910,20 +969,25 @@ static u64 svm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc)
return _tsc;
}
-static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
+static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
{
struct vcpu_svm *svm = to_svm(vcpu);
u64 ratio;
u64 khz;
- /* TSC scaling supported? */
- if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR))
+ /* Guest TSC same frequency as host TSC? */
+ if (!scale) {
+ svm->tsc_ratio = TSC_RATIO_DEFAULT;
return;
+ }
- /* TSC-Scaling disabled or guest TSC same frequency as host TSC? */
- if (user_tsc_khz == 0) {
- vcpu->arch.virtual_tsc_khz = 0;
- svm->tsc_ratio = TSC_RATIO_DEFAULT;
+ /* TSC scaling supported? */
+ if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
+ if (user_tsc_khz > tsc_khz) {
+ vcpu->arch.tsc_catchup = 1;
+ vcpu->arch.tsc_always_catchup = 1;
+ } else
+ WARN(1, "user requested TSC rate below hardware speed\n");
return;
}
@@ -938,7 +1002,6 @@ static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
user_tsc_khz);
return;
}
- vcpu->arch.virtual_tsc_khz = user_tsc_khz;
svm->tsc_ratio = ratio;
}
@@ -958,10 +1021,14 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
}
-static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment)
+static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool host)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ WARN_ON(adjustment < 0);
+ if (host)
+ adjustment = svm_scale_tsc(vcpu, adjustment);
+
svm->vmcb->control.tsc_offset += adjustment;
if (is_guest_mode(vcpu))
svm->nested.hsave->control.tsc_offset += adjustment;
@@ -1191,6 +1258,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
if (kvm_vcpu_is_bsp(&svm->vcpu))
svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
+ svm_init_osvw(&svm->vcpu);
+
return &svm->vcpu;
free_page4:
@@ -1268,6 +1337,21 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
}
+static void svm_update_cpl(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ int cpl;
+
+ if (!is_protmode(vcpu))
+ cpl = 0;
+ else if (svm->vmcb->save.rflags & X86_EFLAGS_VM)
+ cpl = 3;
+ else
+ cpl = svm->vmcb->save.cs.selector & 0x3;
+
+ svm->vmcb->save.cpl = cpl;
+}
+
static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
{
return to_svm(vcpu)->vmcb->save.rflags;
@@ -1275,7 +1359,11 @@ static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
{
+ unsigned long old_rflags = to_svm(vcpu)->vmcb->save.rflags;
+
to_svm(vcpu)->vmcb->save.rflags = rflags;
+ if ((old_rflags ^ rflags) & X86_EFLAGS_VM)
+ svm_update_cpl(vcpu);
}
static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
@@ -1543,9 +1631,7 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
}
if (seg == VCPU_SREG_CS)
- svm->vmcb->save.cpl
- = (svm->vmcb->save.cs.attrib
- >> SVM_SELECTOR_DPL_SHIFT) & 3;
+ svm_update_cpl(vcpu);
mark_dirty(svm->vmcb, VMCB_SEG);
}
@@ -2735,7 +2821,10 @@ static int task_switch_interception(struct vcpu_svm *svm)
(int_vec == OF_VECTOR || int_vec == BP_VECTOR)))
skip_emulated_instruction(&svm->vcpu);
- if (kvm_task_switch(&svm->vcpu, tss_selector, reason,
+ if (int_type != SVM_EXITINTINFO_TYPE_SOFT)
+ int_vec = -1;
+
+ if (kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason,
has_error_code, error_code) == EMULATE_FAIL) {
svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;