aboutsummaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-03-26 15:47:52 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2009-03-26 15:47:52 -0700
commitd3f12d36f148f101c568bdbce795e41cd9ceadf3 (patch)
tree4d58ff7605a530fb052c95378d507e2350755825 /arch/x86
parent39b566eedbe9e35d38502cc5e62ef7abf1aff9c9 (diff)
parent16175a796d061833aacfbd9672235f2d2725df65 (diff)
Merge branch 'kvm-updates/2.6.30' of git://git.kernel.org/pub/scm/virt/kvm/kvm
* 'kvm-updates/2.6.30' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (113 commits) KVM: VMX: Don't allow uninhibited access to EFER on i386 KVM: Correct deassign device ioctl to IOW KVM: ppc: e500: Fix the bug that KVM is unstable in SMP KVM: ppc: e500: Fix the bug that mas0 update to wrong value when read TLB entry KVM: Fix missing smp tlb flush in invlpg KVM: Get support IRQ routing entry counts KVM: fix sparse warnings: Should it be static? KVM: fix sparse warnings: context imbalance KVM: is_long_mode() should check for EFER.LMA KVM: VMX: Update necessary state when guest enters long mode KVM: ia64: Fix the build errors due to lack of macros related to MSI. ia64: Move the macro definitions related to MSI to one header file. KVM: fix kvm_vm_ioctl_deassign_device KVM: define KVM_CAP_DEVICE_DEASSIGNMENT KVM: ppc: Add emulation of E500 register mmucsr0 KVM: Report IRQ injection status for MSI delivered interrupts KVM: MMU: Fix another largepage memory leak KVM: SVM: set accessed bit for VMCB segment selectors KVM: Report IRQ injection status to userspace. KVM: MMU: remove assertion in kvm_mmu_alloc_page ...
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/kvm.h24
-rw-r--r--arch/x86/include/asm/kvm_host.h61
-rw-r--r--arch/x86/include/asm/msr-index.h9
-rw-r--r--arch/x86/include/asm/svm.h4
-rw-r--r--arch/x86/include/asm/virtext.h2
-rw-r--r--arch/x86/include/asm/vmx.h5
-rw-r--r--arch/x86/kvm/Kconfig4
-rw-r--r--arch/x86/kvm/i8254.c21
-rw-r--r--arch/x86/kvm/i8254.h2
-rw-r--r--arch/x86/kvm/i8259.c25
-rw-r--r--arch/x86/kvm/irq.h2
-rw-r--r--arch/x86/kvm/kvm_svm.h16
-rw-r--r--arch/x86/kvm/mmu.c237
-rw-r--r--arch/x86/kvm/mmu.h2
-rw-r--r--arch/x86/kvm/paging_tmpl.h219
-rw-r--r--arch/x86/kvm/svm.c916
-rw-r--r--arch/x86/kvm/vmx.c393
-rw-r--r--arch/x86/kvm/x86.c432
-rw-r--r--arch/x86/kvm/x86_emulate.c56
19 files changed, 1744 insertions, 686 deletions
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index 886c9402ec4..dc3f6cf1170 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -15,6 +15,7 @@
#define __KVM_HAVE_DEVICE_ASSIGNMENT
#define __KVM_HAVE_MSI
#define __KVM_HAVE_USER_NMI
+#define __KVM_HAVE_GUEST_DEBUG
/* Architectural interrupt line count. */
#define KVM_NR_INTERRUPTS 256
@@ -212,7 +213,30 @@ struct kvm_pit_channel_state {
__s64 count_load_time;
};
+struct kvm_debug_exit_arch {
+ __u32 exception;
+ __u32 pad;
+ __u64 pc;
+ __u64 dr6;
+ __u64 dr7;
+};
+
+#define KVM_GUESTDBG_USE_SW_BP 0x00010000
+#define KVM_GUESTDBG_USE_HW_BP 0x00020000
+#define KVM_GUESTDBG_INJECT_DB 0x00040000
+#define KVM_GUESTDBG_INJECT_BP 0x00080000
+
+/* for KVM_SET_GUEST_DEBUG */
+struct kvm_guest_debug_arch {
+ __u64 debugreg[8];
+};
+
struct kvm_pit_state {
struct kvm_pit_channel_state channels[3];
};
+
+struct kvm_reinject_control {
+ __u8 pit_reinject;
+ __u8 reserved[31];
+};
#endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 730843d1d2f..f0faf58044f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -22,6 +22,7 @@
#include <asm/pvclock-abi.h>
#include <asm/desc.h>
#include <asm/mtrr.h>
+#include <asm/msr-index.h>
#define KVM_MAX_VCPUS 16
#define KVM_MEMORY_SLOTS 32
@@ -134,11 +135,18 @@ enum {
#define KVM_NR_MEM_OBJS 40
-struct kvm_guest_debug {
- int enabled;
- unsigned long bp[4];
- int singlestep;
-};
+#define KVM_NR_DB_REGS 4
+
+#define DR6_BD (1 << 13)
+#define DR6_BS (1 << 14)
+#define DR6_FIXED_1 0xffff0ff0
+#define DR6_VOLATILE 0x0000e00f
+
+#define DR7_BP_EN_MASK 0x000000ff
+#define DR7_GE (1 << 9)
+#define DR7_GD (1 << 13)
+#define DR7_FIXED_1 0x00000400
+#define DR7_VOLATILE 0xffff23ff
/*
* We don't want allocation failures within the mmu code, so we preallocate
@@ -162,7 +170,8 @@ struct kvm_pte_chain {
* bits 0:3 - total guest paging levels (2-4, or zero for real mode)
* bits 4:7 - page table level for this shadow (1-4)
* bits 8:9 - page table quadrant for 2-level guests
- * bit 16 - "metaphysical" - gfn is not a real page (huge page/real mode)
+ * bit 16 - direct mapping of virtual to physical mapping at gfn
+ * used for real mode and two-dimensional paging
* bits 17:19 - common access permissions for all ptes in this shadow page
*/
union kvm_mmu_page_role {
@@ -172,9 +181,10 @@ union kvm_mmu_page_role {
unsigned level:4;
unsigned quadrant:2;
unsigned pad_for_nice_hex_output:6;
- unsigned metaphysical:1;
+ unsigned direct:1;
unsigned access:3;
unsigned invalid:1;
+ unsigned cr4_pge:1;
};
};
@@ -218,6 +228,18 @@ struct kvm_pv_mmu_op_buffer {
char buf[512] __aligned(sizeof(long));
};
+struct kvm_pio_request {
+ unsigned long count;
+ int cur_count;
+ gva_t guest_gva;
+ int in;
+ int port;
+ int size;
+ int string;
+ int down;
+ int rep;
+};
+
/*
* x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
* 32-bit). The kvm_mmu structure abstracts the details of the current mmu
@@ -236,6 +258,7 @@ struct kvm_mmu {
hpa_t root_hpa;
int root_level;
int shadow_root_level;
+ union kvm_mmu_page_role base_role;
u64 *pae_root;
};
@@ -258,6 +281,7 @@ struct kvm_vcpu_arch {
unsigned long cr3;
unsigned long cr4;
unsigned long cr8;
+ u32 hflags;
u64 pdptrs[4]; /* pae */
u64 shadow_efer;
u64 apic_base;
@@ -338,6 +362,15 @@ struct kvm_vcpu_arch {
struct mtrr_state_type mtrr_state;
u32 pat;
+
+ int switch_db_regs;
+ unsigned long host_db[KVM_NR_DB_REGS];
+ unsigned long host_dr6;
+ unsigned long host_dr7;
+ unsigned long db[KVM_NR_DB_REGS];
+ unsigned long dr6;
+ unsigned long dr7;
+ unsigned long eff_db[KVM_NR_DB_REGS];
};
struct kvm_mem_alias {
@@ -378,6 +411,7 @@ struct kvm_arch{
unsigned long irq_sources_bitmap;
unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
+ u64 vm_init_tsc;
};
struct kvm_vm_stat {
@@ -446,8 +480,7 @@ struct kvm_x86_ops {
void (*vcpu_put)(struct kvm_vcpu *vcpu);
int (*set_guest_debug)(struct kvm_vcpu *vcpu,
- struct kvm_debug_guest *dbg);
- void (*guest_debug_pre)(struct kvm_vcpu *vcpu);
+ struct kvm_guest_debug *dbg);
int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
@@ -583,16 +616,12 @@ void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2,
u32 error_code);
-void kvm_pic_set_irq(void *opaque, int irq, int level);
+int kvm_pic_set_irq(void *opaque, int irq, int level);
void kvm_inject_nmi(struct kvm_vcpu *vcpu);
void fx_init(struct kvm_vcpu *vcpu);
-int emulator_read_std(unsigned long addr,
- void *val,
- unsigned int bytes,
- struct kvm_vcpu *vcpu);
int emulator_write_emulated(unsigned long addr,
const void *val,
unsigned int bytes,
@@ -737,6 +766,10 @@ enum {
TASK_SWITCH_GATE = 3,
};
+#define HF_GIF_MASK (1 << 0)
+#define HF_HIF_MASK (1 << 1)
+#define HF_VINTR_MASK (1 << 2)
+
/*
* Hardware virtualization extension instructions may fault if a
* reboot turns off virtualization while processes are running.
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 358acc59ae0..f4e505f286b 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -18,11 +18,15 @@
#define _EFER_LME 8 /* Long mode enable */
#define _EFER_LMA 10 /* Long mode active (read-only) */
#define _EFER_NX 11 /* No execute enable */
+#define _EFER_SVME 12 /* Enable virtualization */
+#define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */
#define EFER_SCE (1<<_EFER_SCE)
#define EFER_LME (1<<_EFER_LME)
#define EFER_LMA (1<<_EFER_LMA)
#define EFER_NX (1<<_EFER_NX)
+#define EFER_SVME (1<<_EFER_SVME)
+#define EFER_FFXSR (1<<_EFER_FFXSR)
/* Intel MSRs. Some also available on other CPUs */
#define MSR_IA32_PERFCTR0 0x000000c1
@@ -360,4 +364,9 @@
#define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b
#define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c
+/* AMD-V MSRs */
+
+#define MSR_VM_CR 0xc0010114
+#define MSR_VM_HSAVE_PA 0xc0010117
+
#endif /* _ASM_X86_MSR_INDEX_H */
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 1b8afa78e86..82ada75f3eb 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -174,10 +174,6 @@ struct __attribute__ ((__packed__)) vmcb {
#define SVM_CPUID_FEATURE_SHIFT 2
#define SVM_CPUID_FUNC 0x8000000a
-#define MSR_EFER_SVME_MASK (1ULL << 12)
-#define MSR_VM_CR 0xc0010114
-#define MSR_VM_HSAVE_PA 0xc0010117ULL
-
#define SVM_VM_CR_SVM_DISABLE 4
#define SVM_SELECTOR_S_SHIFT 4
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
index 59363627523..e0f9aa16358 100644
--- a/arch/x86/include/asm/virtext.h
+++ b/arch/x86/include/asm/virtext.h
@@ -118,7 +118,7 @@ static inline void cpu_svm_disable(void)
wrmsrl(MSR_VM_HSAVE_PA, 0);
rdmsrl(MSR_EFER, efer);
- wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK);
+ wrmsrl(MSR_EFER, efer & ~EFER_SVME);
}
/** Makes sure SVM is disabled, if it is supported on the CPU
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index d0238e6151d..498f944010b 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -270,8 +270,9 @@ enum vmcs_field {
#define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */
#define INTR_TYPE_NMI_INTR (2 << 8) /* NMI */
-#define INTR_TYPE_EXCEPTION (3 << 8) /* processor exception */
+#define INTR_TYPE_HARD_EXCEPTION (3 << 8) /* processor exception */
#define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */
+#define INTR_TYPE_SOFT_EXCEPTION (6 << 8) /* software exception */
/* GUEST_INTERRUPTIBILITY_INFO flags. */
#define GUEST_INTR_STATE_STI 0x00000001
@@ -311,7 +312,7 @@ enum vmcs_field {
#define DEBUG_REG_ACCESS_TYPE 0x10 /* 4, direction of access */
#define TYPE_MOV_TO_DR (0 << 4)
#define TYPE_MOV_FROM_DR (1 << 4)
-#define DEBUG_REG_ACCESS_REG 0xf00 /* 11:8, general purpose reg. */
+#define DEBUG_REG_ACCESS_REG(eq) (((eq) >> 8) & 0xf) /* 11:8, general purpose reg. */
/* segment AR */
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index b81125f0bde..0a303c3ed11 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -4,6 +4,10 @@
config HAVE_KVM
bool
+config HAVE_KVM_IRQCHIP
+ bool
+ default y
+
menuconfig VIRTUALIZATION
bool "Virtualization"
depends on HAVE_KVM || X86
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 72bd275a9b5..c13bb92d315 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -201,6 +201,9 @@ static int __pit_timer_fn(struct kvm_kpit_state *ps)
if (!atomic_inc_and_test(&pt->pending))
set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests);
+ if (!pt->reinject)
+ atomic_set(&pt->pending, 1);
+
if (vcpu0 && waitqueue_active(&vcpu0->wq))
wake_up_interruptible(&vcpu0->wq);
@@ -536,6 +539,16 @@ void kvm_pit_reset(struct kvm_pit *pit)
pit->pit_state.irq_ack = 1;
}
+static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask)
+{
+ struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier);
+
+ if (!mask) {
+ atomic_set(&pit->pit_state.pit_timer.pending, 0);
+ pit->pit_state.irq_ack = 1;
+ }
+}
+
struct kvm_pit *kvm_create_pit(struct kvm *kvm)
{
struct kvm_pit *pit;
@@ -545,9 +558,7 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm)
if (!pit)
return NULL;
- mutex_lock(&kvm->lock);
pit->irq_source_id = kvm_request_irq_source_id(kvm);
- mutex_unlock(&kvm->lock);
if (pit->irq_source_id < 0) {
kfree(pit);
return NULL;
@@ -580,10 +591,14 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm)
pit_state->irq_ack_notifier.gsi = 0;
pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq;
kvm_register_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier);
+ pit_state->pit_timer.reinject = true;
mutex_unlock(&pit->pit_state.lock);
kvm_pit_reset(pit);
+ pit->mask_notifier.func = pit_mask_notifer;
+ kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
+
return pit;
}
@@ -592,6 +607,8 @@ void kvm_free_pit(struct kvm *kvm)
struct hrtimer *timer;
if (kvm->arch.vpit) {
+ kvm_unregister_irq_mask_notifier(kvm, 0,
+ &kvm->arch.vpit->mask_notifier);
mutex_lock(&kvm->arch.vpit->pit_state.lock);
timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
hrtimer_cancel(timer);
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index 4178022b97a..6acbe4b505d 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -9,6 +9,7 @@ struct kvm_kpit_timer {
s64 period; /* unit: ns */
s64 scheduled;
atomic_t pending;
+ bool reinject;
};
struct kvm_kpit_channel_state {
@@ -45,6 +46,7 @@ struct kvm_pit {
struct kvm *kvm;
struct kvm_kpit_state pit_state;
int irq_source_id;
+ struct kvm_irq_mask_notifier mask_notifier;
};
#define KVM_PIT_BASE_ADDRESS 0x40
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 179dcb0103f..1ccb50c74f1 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -32,11 +32,13 @@
#include <linux/kvm_host.h>
static void pic_lock(struct kvm_pic *s)
+ __acquires(&s->lock)
{
spin_lock(&s->lock);
}
static void pic_unlock(struct kvm_pic *s)
+ __releases(&s->lock)
{
struct kvm *kvm = s->kvm;
unsigned acks = s->pending_acks;
@@ -49,7 +51,8 @@ static void pic_unlock(struct kvm_pic *s)
spin_unlock(&s->lock);
while (acks) {
- kvm_notify_acked_irq(kvm, __ffs(acks));
+ kvm_notify_acked_irq(kvm, SELECT_PIC(__ffs(acks)),
+ __ffs(acks));
acks &= acks - 1;
}
@@ -76,12 +79,13 @@ void kvm_pic_clear_isr_ack(struct kvm *kvm)
/*
* set irq level. If an edge is detected, then the IRR is set to 1
*/
-static inline void pic_set_irq1(struct kvm_kpic_state *s, int irq, int level)
+static inline int pic_set_irq1(struct kvm_kpic_state *s, int irq, int level)
{
- int mask;
+ int mask, ret = 1;
mask = 1 << irq;
if (s->elcr & mask) /* level triggered */
if (level) {
+ ret = !(s->irr & mask);
s->irr |= mask;
s->last_irr |= mask;
} else {
@@ -90,11 +94,15 @@ static inline void pic_set_irq1(struct kvm_kpic_state *s, int irq, int level)
}
else /* edge triggered */
if (level) {
- if ((s->last_irr & mask) == 0)
+ if ((s->last_irr & mask) == 0) {
+ ret = !(s->irr & mask);
s->irr |= mask;
+ }
s->last_irr |= mask;
} else
s->last_irr &= ~mask;
+
+ return (s->imr & mask) ? -1 : ret;
}
/*
@@ -171,16 +179,19 @@ void kvm_pic_update_irq(struct kvm_pic *s)
pic_unlock(s);
}
-void kvm_pic_set_irq(void *opaque, int irq, int level)
+int kvm_pic_set_irq(void *opaque, int irq, int level)
{
struct kvm_pic *s = opaque;
+ int ret = -1;
pic_lock(s);
if (irq >= 0 && irq < PIC_NUM_PINS) {
- pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
+ ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
pic_update_irq(s);
}
pic_unlock(s);
+
+ return ret;
}
/*
@@ -232,7 +243,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
}
pic_update_irq(s);
pic_unlock(s);
- kvm_notify_acked_irq(kvm, irq);
+ kvm_notify_acked_irq(kvm, SELECT_PIC(irq), irq);
return intno;
}
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 82579ee538d..9f593188129 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -32,6 +32,8 @@
#include "lapic.h"
#define PIC_NUM_PINS 16
+#define SELECT_PIC(irq) \
+ ((irq) < 8 ? KVM_IRQCHIP_PIC_MASTER : KVM_IRQCHIP_PIC_SLAVE)
struct kvm;
struct kvm_vcpu;
diff --git a/arch/x86/kvm/kvm_svm.h b/arch/x86/kvm/kvm_svm.h
index 8e5ee99551f..ed66e4c078d 100644
--- a/arch/x86/kvm/kvm_svm.h
+++ b/arch/x86/kvm/kvm_svm.h
@@ -18,7 +18,6 @@ static const u32 host_save_user_msrs[] = {
};
#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
-#define NUM_DB_REGS 4
struct kvm_vcpu;
@@ -29,18 +28,23 @@ struct vcpu_svm {
struct svm_cpu_data *svm_data;
uint64_t asid_generation;
- unsigned long db_regs[NUM_DB_REGS];
-
u64 next_rip;
u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
u64 host_gs_base;
unsigned long host_cr2;
- unsigned long host_db_regs[NUM_DB_REGS];
- unsigned long host_dr6;
- unsigned long host_dr7;
u32 *msrpm;
+ struct vmcb *hsave;
+ u64 hsave_msr;
+
+ u64 nested_vmcb;
+
+ /* These are the merged vectors */
+ u32 *nested_msrpm;
+
+ /* gpa pointers to the real vectors */
+ u64 nested_vmcb_msrpm;
};
#endif
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 2d4477c7147..2a36f7f7c4c 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -145,11 +145,20 @@ struct kvm_rmap_desc {
struct kvm_rmap_desc *more;
};
-struct kvm_shadow_walk {
- int (*entry)(struct kvm_shadow_walk *walk, struct kvm_vcpu *vcpu,
- u64 addr, u64 *spte, int level);
+struct kvm_shadow_walk_iterator {
+ u64 addr;
+ hpa_t shadow_addr;
+ int level;
+ u64 *sptep;
+ unsigned index;
};
+#define for_each_shadow_entry(_vcpu, _addr, _walker) \
+ for (shadow_walk_init(&(_walker), _vcpu, _addr); \
+ shadow_walk_okay(&(_walker)); \
+ shadow_walk_next(&(_walker)))
+
+
struct kvm_unsync_walk {
int (*entry) (struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk);
};
@@ -343,7 +352,6 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc,
BUG_ON(!mc->nobjs);
p = mc->objects[--mc->nobjs];
- memset(p, 0, size);
return p;
}
@@ -794,10 +802,8 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
INIT_LIST_HEAD(&sp->oos_link);
- ASSERT(is_empty_shadow_page(sp->spt));
bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
sp->multimapped = 0;
- sp->global = 1;
sp->parent_pte = parent_pte;
--vcpu->kvm->arch.n_free_mmu_pages;
return sp;
@@ -983,8 +989,8 @@ struct kvm_mmu_pages {
idx < 512; \
idx = find_next_bit(bitmap, 512, idx+1))
-int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
- int idx)
+static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
+ int idx)
{
int i;
@@ -1059,7 +1065,7 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
index = kvm_page_table_hashfn(gfn);
bucket = &kvm->arch.mmu_page_hash[index];
hlist_for_each_entry(sp, node, bucket, hash_link)
- if (sp->gfn == gfn && !sp->role.metaphysical
+ if (sp->gfn == gfn && !sp->role.direct
&& !sp->role.invalid) {
pgprintk("%s: found role %x\n",
__func__, sp->role.word);
@@ -1115,8 +1121,9 @@ struct mmu_page_path {
i < pvec.nr && ({ sp = pvec.page[i].sp; 1;}); \
i = mmu_pages_next(&pvec, &parents, i))
-int mmu_pages_next(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents,
- int i)
+static int mmu_pages_next(struct kvm_mmu_pages *pvec,
+ struct mmu_page_path *parents,
+ int i)
{
int n;
@@ -1135,7 +1142,7 @@ int mmu_pages_next(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents,
return n;
}
-void mmu_pages_clear_parents(struct mmu_page_path *parents)
+static void mmu_pages_clear_parents(struct mmu_page_path *parents)
{
struct kvm_mmu_page *sp;
unsigned int level = 0;
@@ -1193,7 +1200,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
gfn_t gfn,
gva_t gaddr,
unsigned level,
- int metaphysical,
+ int direct,
unsigned access,
u64 *parent_pte)
{
@@ -1204,10 +1211,9 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp;
struct hlist_node *node, *tmp;
- role.word = 0;
- role.glevels = vcpu->arch.mmu.root_level;
+ role = vcpu->arch.mmu.base_role;
role.level = level;
- role.metaphysical = metaphysical;
+ role.direct = direct;
role.access = access;
if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) {
quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level));
@@ -1242,8 +1248,9 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
pgprintk("%s: adding gfn %lx role %x\n", __func__, gfn, role.word);
sp->gfn = gfn;
sp->role = role;
+ sp->global = role.cr4_pge;
hlist_add_head(&sp->hash_link, bucket);
- if (!metaphysical) {
+ if (!direct) {
if (rmap_write_protect(vcpu->kvm, gfn))
kvm_flush_remote_tlbs(vcpu->kvm);
account_shadowed(vcpu->kvm, gfn);
@@ -1255,35 +1262,35 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
return sp;
}
-static int walk_shadow(struct kvm_shadow_walk *walker,
- struct kvm_vcpu *vcpu, u64 addr)
+static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator,
+ struct kvm_vcpu *vcpu, u64 addr)
{
- hpa_t shadow_addr;
- int level;
- int r;
- u64 *sptep;
- unsigned index;
-
- shadow_addr = vcpu->arch.mmu.root_hpa;
- level = vcpu->arch.mmu.shadow_root_level;
- if (level == PT32E_ROOT_LEVEL) {
- shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3];
- shadow_addr &= PT64_BASE_ADDR_MASK;
- if (!shadow_addr)
- return 1;
- --level;
+ iterator->addr = addr;
+ iterator->shadow_addr = vcpu->arch.mmu.root_hpa;
+ iterator->level = vcpu->arch.mmu.shadow_root_level;
+ if (iterator->level == PT32E_ROOT_LEVEL) {
+ iterator->shadow_addr
+ = vcpu->arch.mmu.pae_root[(addr >> 30) & 3];
+ iterator->shadow_addr &= PT64_BASE_ADDR_MASK;
+ --iterator->level;
+ if (!iterator->shadow_addr)
+ iterator->level = 0;
}
+}
- while (level >= PT_PAGE_TABLE_LEVEL) {
- index = SHADOW_PT_INDEX(addr, level);
- sptep = ((u64 *)__va(shadow_addr)) + index;
- r = walker->entry(walker, vcpu, addr, sptep, level);
- if (r)
- return r;
- shadow_addr = *sptep & PT64_BASE_ADDR_MASK;
- --level;
- }
- return 0;
+static bool shadow_walk_okay(struct kvm_shadow_walk_iterator *iterator)
+{
+ if (iterator->level < PT_PAGE_TABLE_LEVEL)
+ return false;
+ iterator->index = SHADOW_PT_INDEX(iterator->addr, iterator->level);
+ iterator->sptep = ((u64 *)__va(iterator->shadow_addr)) + iterator->index;
+ return true;
+}
+
+static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator)
+{
+ iterator->shadow_addr = *iterator->sptep & PT64_BASE_ADDR_MASK;
+ --iterator->level;
}
static void kvm_mmu_page_unlink_children(struct kvm *kvm,
@@ -1388,7 +1395,7 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
kvm_mmu_page_unlink_children(kvm, sp);
kvm_mmu_unlink_parents(kvm, sp);
kvm_flush_remote_tlbs(kvm);
- if (!sp->role.invalid && !sp->role.metaphysical)
+ if (!sp->role.invalid && !sp->role.direct)
unaccount_shadowed(kvm, sp->gfn);
if (sp->unsync)
kvm_unlink_unsync_page(kvm, sp);
@@ -1451,7 +1458,7 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
index = kvm_page_table_hashfn(gfn);
bucket = &kvm->arch.mmu_page_hash[index];
hlist_for_each_entry_safe(sp, node, n, bucket, hash_link)
- if (sp->gfn == gfn && !sp->role.metaphysical) {
+ if (sp->gfn == gfn && !sp->role.direct) {
pgprintk("%s: gfn %lx role %x\n", __func__, gfn,
sp->role.word);
r = 1;
@@ -1463,11 +1470,20 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
{
+ unsigned index;
+ struct hlist_head *bucket;
struct kvm_mmu_page *sp;
+ struct hlist_node *node, *nn;
- while ((sp = kvm_mmu_lookup_page(kvm, gfn)) != NULL) {
- pgprintk("%s: zap %lx %x\n", __func__, gfn, sp->role.word);
- kvm_mmu_zap_page(kvm, sp);
+ index = kvm_page_table_hashfn(gfn);
+ bucket = &kvm->arch.mmu_page_hash[index];
+ hlist_for_each_entry_safe(sp, node, nn, bucket, hash_link) {
+ if (sp->gfn == gfn && !sp->role.direct
+ && !sp->role.invalid) {
+ pgprintk("%s: zap %lx %x\n",
+ __func__, gfn, sp->role.word);
+ kvm_mmu_zap_page(kvm, sp);
+ }
}
}
@@ -1622,7 +1638,7 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
bucket = &vcpu->kvm->arch.mmu_page_hash[index];
/* don't unsync if pagetable is shadowed with multiple roles */
hlist_for_each_entry_safe(s, node, n, bucket, hash_link) {
- if (s->gfn != sp->gfn || s->role.metaphysical)
+ if (s->gfn != sp->gfn || s->role.direct)
continue;
if (s->role.word != sp->role.word)
return 1;
@@ -1669,8 +1685,6 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
u64 mt_mask = shadow_mt_mask;
struct kvm_mmu_page *sp = page_header(__pa(shadow_pte));
- if (!(vcpu->arch.cr4 & X86_CR4_PGE))
- global = 0;
if (!global && sp->global) {
sp->global = 0;
if (sp->unsync) {
@@ -1777,12 +1791,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
pgprintk("hfn old %lx new %lx\n",
spte_to_pfn(*shadow_pte), pfn);
rmap_remove(vcpu->kvm, shadow_pte);
- } else {
- if (largepage)
- was_rmapped = is_large_pte(*shadow_pte);
- else
- was_rmapped = 1;
- }
+ } else
+ was_rmapped = 1;
}
if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault,
dirty, largepage, global, gfn, pfn, speculative, true)) {
@@ -1820,67 +1830,42 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
{
}
-struct direct_shadow_walk {
- struct kvm_shadow_walk walker;
- pfn_t pfn;
- int write;
- int largepage;
- int pt_write;
-};
-
-static int direct_map_entry(struct kvm_shadow_walk *_walk,
- struct kvm_vcpu *vcpu,
- u64 addr, u64 *sptep, int level)
+static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
+ int largepage, gfn_t gfn, pfn_t pfn)
{
- struct direct_shadow_walk *walk =
- container_of(_walk, struct direct_shadow_walk, walker);
+ struct kvm_shadow_walk_iterator iterator;
struct kvm_mmu_page *sp;
+ int pt_write = 0;
gfn_t pseudo_gfn;
- gfn_t gfn = addr >> PAGE_SHIFT;
-
- if (level == PT_PAGE_TABLE_LEVEL
- || (walk->largepage && level == PT_DIRECTORY_LEVEL)) {
- mmu_set_spte(vcpu, sptep, ACC_ALL, ACC_ALL,
- 0, walk->write, 1, &walk->pt_write,
- walk->largepage, 0, gfn, walk->pfn, false);
- ++vcpu->stat.pf_fixed;
- return 1;
- }
- if (*sptep == shadow_trap_nonpresent_pte) {
- pseudo_gfn = (addr & PT64_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT;
- sp = kvm_mmu_get_page(vcpu, pseudo_gfn, (gva_t)addr, level - 1,
- 1, ACC_ALL, sptep);
- if (!sp) {
- pgprintk("nonpaging_map: ENOMEM\n");
- kvm_release_pfn_clean(walk->pfn);
- return -ENOMEM;
+ for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
+ if (iterator.level == PT_PAGE_TABLE_LEVEL
+ || (largepage && iterator.level == PT_DIRECTORY_LEVEL)) {
+ mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL,
+ 0, write, 1, &pt_write,
+ largepage, 0, gfn, pfn, false);
+ ++vcpu->stat.pf_fixed;
+ break;
}
- set_shadow_pte(sptep,
- __pa(sp->spt)
- | PT_PRESENT_MASK | PT_WRITABLE_MASK
- | shadow_user_mask | shadow_x_mask);
- }
- return 0;
-}
+ if (*iterator.sptep == shadow_trap_nonpresent_pte) {
+ pseudo_gfn = (iterator.addr & PT64_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT;
+ sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr,
+ iterator.level - 1,
+ 1, ACC_ALL, iterator.sptep);
+ if (!sp) {
+ pgprintk("nonpaging_map: ENOMEM\n");
+ kvm_release_pfn_clean(pfn);
+ return -ENOMEM;
+ }
-static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
- int largepage, gfn_t gfn, pfn_t pfn)
-{
- int r;
- struct direct_shadow_walk walker = {
- .walker = { .entry = direct_map_entry, },
- .pfn = pfn,
- .largepage = largepage,
- .write = write,
- .pt_write = 0,
- };
-
- r = walk_shadow(&walker.walker, vcpu, gfn << PAGE_SHIFT);
- if (r < 0)
- return r;
- return walker.pt_write;
+ set_shadow_pte(iterator.sptep,
+ __pa(sp->spt)
+ | PT_PRESENT_MASK | PT_WRITABLE_MASK
+ | shadow_user_mask | shadow_x_mask);
+ }
+ }
+ return pt_write;
}
static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
@@ -1962,7 +1947,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
int i;
gfn_t root_gfn;
struct kvm_mmu_page *sp;
- int metaphysical = 0;
+ int direct = 0;
root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT;
@@ -1971,18 +1956,18 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
ASSERT(!VALID_PAGE(root));
if (tdp_enabled)
- metaphysical = 1;
+ direct = 1;
sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
- PT64_ROOT_LEVEL, metaphysical,
+ PT64_ROOT_LEVEL, direct,
ACC_ALL, NULL);
root = __pa(sp->spt);
++sp->root_count;
vcpu->arch.mmu.root_hpa = root;
return;
}