aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-01-13 10:14:24 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2011-01-13 10:14:24 -0800
commit55065bc52795faae549abfb912aacc622dd63876 (patch)
tree63683547e41ed459a2a8747eeafb5e969633d54f
parent008d23e4852d78bb2618f2035f8b2110b6a6b968 (diff)
parente5c301428294cb8925667c9ee39f817c4ab1c2c9 (diff)
Merge branch 'kvm-updates/2.6.38' of git://git.kernel.org/pub/scm/virt/kvm/kvm
* 'kvm-updates/2.6.38' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (142 commits) KVM: Initialize fpu state in preemptible context KVM: VMX: when entering real mode align segment base to 16 bytes KVM: MMU: handle 'map_writable' in set_spte() function KVM: MMU: audit: allow audit more guests at the same time KVM: Fetch guest cr3 from hardware on demand KVM: Replace reads of vcpu->arch.cr3 by an accessor KVM: MMU: only write protect mappings at pagetable level KVM: VMX: Correct asm constraint in vmcs_load()/vmcs_clear() KVM: MMU: Initialize base_role for tdp mmus KVM: VMX: Optimize atomic EFER load KVM: VMX: Add definitions for more vm entry/exit control bits KVM: SVM: copy instruction bytes from VMCB KVM: SVM: implement enhanced INVLPG intercept KVM: SVM: enhance mov DR intercept handler KVM: SVM: enhance MOV CR intercept handler KVM: SVM: add new SVM feature bit names KVM: cleanup emulate_instruction KVM: move complete_insn_gp() into x86.c KVM: x86: fix CR8 handling KVM guest: Fix kvm clock initialization when it's configured out ...
-rw-r--r--Documentation/kernel-parameters.txt3
-rw-r--r--Documentation/kvm/api.txt178
-rw-r--r--Documentation/kvm/cpuid.txt3
-rw-r--r--Documentation/kvm/msr.txt36
-rw-r--r--arch/ia64/include/asm/kvm_host.h4
-rw-r--r--arch/ia64/kvm/kvm-ia64.c30
-rw-r--r--arch/powerpc/kvm/book3s.c4
-rw-r--r--arch/powerpc/kvm/powerpc.c20
-rw-r--r--arch/s390/kvm/kvm-s390.c23
-rw-r--r--arch/x86/include/asm/kvm_emulate.h35
-rw-r--r--arch/x86/include/asm/kvm_host.h99
-rw-r--r--arch/x86/include/asm/kvm_para.h24
-rw-r--r--arch/x86/include/asm/svm.h57
-rw-r--r--arch/x86/include/asm/traps.h1
-rw-r--r--arch/x86/include/asm/vmx.h15
-rw-r--r--arch/x86/kernel/entry_32.S10
-rw-r--r--arch/x86/kernel/entry_64.S3
-rw-r--r--arch/x86/kernel/i387.c1
-rw-r--r--arch/x86/kernel/kvm.c317
-rw-r--r--arch/x86/kernel/kvmclock.c13
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/kvm/Makefile3
-rw-r--r--arch/x86/kvm/emulate.c367
-rw-r--r--arch/x86/kvm/kvm_cache_regs.h22
-rw-r--r--arch/x86/kvm/lapic.c3
-rw-r--r--arch/x86/kvm/mmu.c251
-rw-r--r--arch/x86/kvm/mmu_audit.c39
-rw-r--r--arch/x86/kvm/paging_tmpl.h147
-rw-r--r--arch/x86/kvm/svm.c865
-rw-r--r--arch/x86/kvm/trace.h17
-rw-r--r--arch/x86/kvm/vmx.c156
-rw-r--r--arch/x86/kvm/x86.c474
-rw-r--r--include/linux/kvm.h1
-rw-r--r--include/linux/kvm_host.h101
-rw-r--r--include/linux/kvm_types.h7
-rw-r--r--include/trace/events/kvm.h121
-rw-r--r--virt/kvm/Kconfig3
-rw-r--r--virt/kvm/assigned-dev.c125
-rw-r--r--virt/kvm/async_pf.c216
-rw-r--r--virt/kvm/async_pf.h36
-rw-r--r--virt/kvm/eventfd.c91
-rw-r--r--virt/kvm/irq_comm.c7
-rw-r--r--virt/kvm/kvm_main.c334
43 files changed, 3078 insertions, 1185 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 338c96ea085..55fe7599bc8 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1705,6 +1705,9 @@ and is between 256 and 4096 characters. It is defined in the file
no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver
+ no-kvmapf [X86,KVM] Disable paravirtualized asynchronous page
+ fault handling.
+
nolapic [X86-32,APIC] Do not enable or use the local APIC.
nolapic_timer [X86-32,APIC] Do not use the local APIC timer.
diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt
index 50713e37c69..ad85797c1cf 100644
--- a/Documentation/kvm/api.txt
+++ b/Documentation/kvm/api.txt
@@ -1085,6 +1085,184 @@ of 4 instructions that make up a hypercall.
If any additional field gets added to this structure later on, a bit for that
additional piece of information will be set in the flags bitmap.
+4.47 KVM_ASSIGN_PCI_DEVICE
+
+Capability: KVM_CAP_DEVICE_ASSIGNMENT
+Architectures: x86 ia64
+Type: vm ioctl
+Parameters: struct kvm_assigned_pci_dev (in)
+Returns: 0 on success, -1 on error
+
+Assigns a host PCI device to the VM.
+
+struct kvm_assigned_pci_dev {
+ __u32 assigned_dev_id;
+ __u32 busnr;
+ __u32 devfn;
+ __u32 flags;
+ __u32 segnr;
+ union {
+ __u32 reserved[11];
+ };
+};
+
+The PCI device is specified by the triple segnr, busnr, and devfn.
+Identification in succeeding service requests is done via assigned_dev_id. The
+following flags are specified:
+
+/* Depends on KVM_CAP_IOMMU */
+#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
+
+4.48 KVM_DEASSIGN_PCI_DEVICE
+
+Capability: KVM_CAP_DEVICE_DEASSIGNMENT
+Architectures: x86 ia64
+Type: vm ioctl
+Parameters: struct kvm_assigned_pci_dev (in)
+Returns: 0 on success, -1 on error
+
+Ends PCI device assignment, releasing all associated resources.
+
+See KVM_CAP_DEVICE_ASSIGNMENT for the data structure. Only assigned_dev_id is
+used in kvm_assigned_pci_dev to identify the device.
+
+4.49 KVM_ASSIGN_DEV_IRQ
+
+Capability: KVM_CAP_ASSIGN_DEV_IRQ
+Architectures: x86 ia64
+Type: vm ioctl
+Parameters: struct kvm_assigned_irq (in)
+Returns: 0 on success, -1 on error
+
+Assigns an IRQ to a passed-through device.
+
+struct kvm_assigned_irq {
+ __u32 assigned_dev_id;
+ __u32 host_irq;
+ __u32 guest_irq;
+ __u32 flags;
+ union {
+ struct {
+ __u32 addr_lo;
+ __u32 addr_hi;
+ __u32 data;
+ } guest_msi;
+ __u32 reserved[12];
+ };
+};
+
+The following flags are defined:
+
+#define KVM_DEV_IRQ_HOST_INTX (1 << 0)
+#define KVM_DEV_IRQ_HOST_MSI (1 << 1)
+#define KVM_DEV_IRQ_HOST_MSIX (1 << 2)
+
+#define KVM_DEV_IRQ_GUEST_INTX (1 << 8)
+#define KVM_DEV_IRQ_GUEST_MSI (1 << 9)
+#define KVM_DEV_IRQ_GUEST_MSIX (1 << 10)
+
+It is not valid to specify multiple types per host or guest IRQ. However, the
+IRQ type of host and guest can differ or can even be null.
+
+4.50 KVM_DEASSIGN_DEV_IRQ
+
+Capability: KVM_CAP_ASSIGN_DEV_IRQ
+Architectures: x86 ia64
+Type: vm ioctl
+Parameters: struct kvm_assigned_irq (in)
+Returns: 0 on success, -1 on error
+
+Ends an IRQ assignment to a passed-through device.
+
+See KVM_ASSIGN_DEV_IRQ for the data structure. The target device is specified
+by assigned_dev_id, flags must correspond to the IRQ type specified on
+KVM_ASSIGN_DEV_IRQ. Partial deassignment of host or guest IRQ is allowed.
+
+4.51 KVM_SET_GSI_ROUTING
+
+Capability: KVM_CAP_IRQ_ROUTING
+Architectures: x86 ia64
+Type: vm ioctl
+Parameters: struct kvm_irq_routing (in)
+Returns: 0 on success, -1 on error
+
+Sets the GSI routing table entries, overwriting any previously set entries.
+
+struct kvm_irq_routing {
+ __u32 nr;
+ __u32 flags;
+ struct kvm_irq_routing_entry entries[0];
+};
+
+No flags are specified so far, the corresponding field must be set to zero.
+
+struct kvm_irq_routing_entry {
+ __u32 gsi;
+ __u32 type;
+ __u32 flags;
+ __u32 pad;
+ union {
+ struct kvm_irq_routing_irqchip irqchip;
+ struct kvm_irq_routing_msi msi;
+ __u32 pad[8];
+ } u;
+};
+
+/* gsi routing entry types */
+#define KVM_IRQ_ROUTING_IRQCHIP 1
+#define KVM_IRQ_ROUTING_MSI 2
+
+No flags are specified so far, the corresponding field must be set to zero.
+
+struct kvm_irq_routing_irqchip {
+ __u32 irqchip;
+ __u32 pin;
+};
+
+struct kvm_irq_routing_msi {
+ __u32 address_lo;
+ __u32 address_hi;
+ __u32 data;
+ __u32 pad;
+};
+
+4.52 KVM_ASSIGN_SET_MSIX_NR
+
+Capability: KVM_CAP_DEVICE_MSIX
+Architectures: x86 ia64
+Type: vm ioctl
+Parameters: struct kvm_assigned_msix_nr (in)
+Returns: 0 on success, -1 on error
+
+Set the number of MSI-X interrupts for an assigned device. This service can
+only be called once in the lifetime of an assigned device.
+
+struct kvm_assigned_msix_nr {
+ __u32 assigned_dev_id;
+ __u16 entry_nr;
+ __u16 padding;
+};
+
+#define KVM_MAX_MSIX_PER_DEV 256
+
+4.53 KVM_ASSIGN_SET_MSIX_ENTRY
+
+Capability: KVM_CAP_DEVICE_MSIX
+Architectures: x86 ia64
+Type: vm ioctl
+Parameters: struct kvm_assigned_msix_entry (in)
+Returns: 0 on success, -1 on error
+
+Specifies the routing of an MSI-X assigned device interrupt to a GSI. Setting
+the GSI vector to zero means disabling the interrupt.
+
+struct kvm_assigned_msix_entry {
+ __u32 assigned_dev_id;
+ __u32 gsi;
+ __u16 entry; /* The index of entry in the MSI-X table */
+ __u16 padding[3];
+};
+
5. The kvm_run structure
Application code obtains a pointer to the kvm_run structure by
diff --git a/Documentation/kvm/cpuid.txt b/Documentation/kvm/cpuid.txt
index 14a12ea92b7..882068538c9 100644
--- a/Documentation/kvm/cpuid.txt
+++ b/Documentation/kvm/cpuid.txt
@@ -36,6 +36,9 @@ KVM_FEATURE_MMU_OP || 2 || deprecated.
KVM_FEATURE_CLOCKSOURCE2 || 3 || kvmclock available at msrs
|| || 0x4b564d00 and 0x4b564d01
------------------------------------------------------------------------------
+KVM_FEATURE_ASYNC_PF || 4 || async pf can be enabled by
+ || || writing to msr 0x4b564d02
+------------------------------------------------------------------------------
KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side
|| || per-cpu warps are expected in
|| || kvmclock.
diff --git a/Documentation/kvm/msr.txt b/Documentation/kvm/msr.txt
index 8ddcfe84c09..d079aed27e0 100644
--- a/Documentation/kvm/msr.txt
+++ b/Documentation/kvm/msr.txt
@@ -3,7 +3,6 @@ Glauber Costa <glommer@redhat.com>, Red Hat Inc, 2010
=====================================================
KVM makes use of some custom MSRs to service some requests.
-At present, this facility is only used by kvmclock.
Custom MSRs have a range reserved for them, that goes from
0x4b564d00 to 0x4b564dff. There are MSRs outside this area,
@@ -151,3 +150,38 @@ MSR_KVM_SYSTEM_TIME: 0x12
return PRESENT;
} else
return NON_PRESENT;
+
+MSR_KVM_ASYNC_PF_EN: 0x4b564d02
+ data: Bits 63-6 hold 64-byte aligned physical address of a
+ 64 byte memory area which must be in guest RAM and must be
+ zeroed. Bits 5-2 are reserved and should be zero. Bit 0 is 1
+ when asynchronous page faults are enabled on the vcpu 0 when
+ disabled. Bit 2 is 1 if asynchronous page faults can be injected
+ when vcpu is in cpl == 0.
+
+ First 4 byte of 64 byte memory location will be written to by
+ the hypervisor at the time of asynchronous page fault (APF)
+ injection to indicate type of asynchronous page fault. Value
+ of 1 means that the page referred to by the page fault is not
+ present. Value 2 means that the page is now available. Disabling
+ interrupt inhibits APFs. Guest must not enable interrupt
+ before the reason is read, or it may be overwritten by another
+ APF. Since APF uses the same exception vector as regular page
+ fault guest must reset the reason to 0 before it does
+ something that can generate normal page fault. If during page
+ fault APF reason is 0 it means that this is regular page
+ fault.
+
+ During delivery of type 1 APF cr2 contains a token that will
+ be used to notify a guest when missing page becomes
+ available. When page becomes available type 2 APF is sent with
+ cr2 set to the token associated with the page. There is special
+ kind of token 0xffffffff which tells vcpu that it should wake
+ up all processes waiting for APFs and no individual type 2 APFs
+ will be sent.
+
+ If APF is disabled while there are outstanding APFs, they will
+ not be delivered.
+
+ Currently type 2 APF will be always delivered on the same vcpu as
+ type 1 was, but guest should not rely on that.
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index 2f229e5de49..2689ee54a1c 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -590,6 +590,10 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu);
int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);
void kvm_sal_emul(struct kvm_vcpu *vcpu);
+#define __KVM_HAVE_ARCH_VM_ALLOC 1
+struct kvm *kvm_arch_alloc_vm(void);
+void kvm_arch_free_vm(struct kvm *kvm);
+
#endif /* __ASSEMBLY__*/
#endif
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index f56a6316e13..70d224d4264 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -749,7 +749,7 @@ out:
return r;
}
-static struct kvm *kvm_alloc_kvm(void)
+struct kvm *kvm_arch_alloc_vm(void)
{
struct kvm *kvm;
@@ -760,7 +760,7 @@ static struct kvm *kvm_alloc_kvm(void)
vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE));
if (!vm_base)
- return ERR_PTR(-ENOMEM);
+ return NULL;
memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
kvm = (struct kvm *)(vm_base +
@@ -806,10 +806,12 @@ static void kvm_build_io_pmt(struct kvm *kvm)
#define GUEST_PHYSICAL_RR4 0x2739
#define VMM_INIT_RR 0x1660
-static void kvm_init_vm(struct kvm *kvm)
+int kvm_arch_init_vm(struct kvm *kvm)
{
BUG_ON(!kvm);
+ kvm->arch.is_sn2 = ia64_platform_is("sn2");
+
kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0;
kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4;
kvm->arch.vmm_init_rr = VMM_INIT_RR;
@@ -823,21 +825,8 @@ static void kvm_init_vm(struct kvm *kvm)
/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
-}
-
-struct kvm *kvm_arch_create_vm(void)
-{
- struct kvm *kvm = kvm_alloc_kvm();
-
- if (IS_ERR(kvm))
- return ERR_PTR(-ENOMEM);
-
- kvm->arch.is_sn2 = ia64_platform_is("sn2");
-
- kvm_init_vm(kvm);
-
- return kvm;
+ return 0;
}
static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm,
@@ -962,7 +951,9 @@ long kvm_arch_vm_ioctl(struct file *filp,
goto out;
r = kvm_setup_default_irq_routing(kvm);
if (r) {
+ mutex_lock(&kvm->slots_lock);
kvm_ioapic_destroy(kvm);
+ mutex_unlock(&kvm->slots_lock);
goto out;
}
break;
@@ -1357,7 +1348,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
return -EINVAL;
}
-static void free_kvm(struct kvm *kvm)
+void kvm_arch_free_vm(struct kvm *kvm)
{
unsigned long vm_base = kvm->arch.vm_base;
@@ -1399,9 +1390,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
#endif
kfree(kvm->arch.vioapic);
kvm_release_vm_pages(kvm);
- kvm_free_physmem(kvm);
- cleanup_srcu_struct(&kvm->srcu);
- free_kvm(kvm);
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index e316847c08c..badc983031b 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -1307,12 +1307,10 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
int err = -ENOMEM;
unsigned long p;
- vcpu_book3s = vmalloc(sizeof(struct kvmppc_vcpu_book3s));
+ vcpu_book3s = vzalloc(sizeof(struct kvmppc_vcpu_book3s));
if (!vcpu_book3s)
goto out;
- memset(vcpu_book3s, 0, sizeof(struct kvmppc_vcpu_book3s));
-
vcpu_book3s->shadow_vcpu = (struct kvmppc_book3s_shadow_vcpu *)
kzalloc(sizeof(*vcpu_book3s->shadow_vcpu), GFP_KERNEL);
if (!vcpu_book3s->shadow_vcpu)
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 38f756f2505..99758460efd 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -145,18 +145,12 @@ void kvm_arch_check_processor_compat(void *rtn)
*(int *)rtn = kvmppc_core_check_processor_compat();
}
-struct kvm *kvm_arch_create_vm(void)
+int kvm_arch_init_vm(struct kvm *kvm)
{
- struct kvm *kvm;
-
- kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
- if (!kvm)
- return ERR_PTR(-ENOMEM);
-
- return kvm;
+ return 0;
}
-static void kvmppc_free_vcpus(struct kvm *kvm)
+void kvm_arch_destroy_vm(struct kvm *kvm)
{
unsigned int i;
struct kvm_vcpu *vcpu;
@@ -176,14 +170,6 @@ void kvm_arch_sync_events(struct kvm *kvm)
{
}
-void kvm_arch_destroy_vm(struct kvm *kvm)
-{
- kvmppc_free_vcpus(kvm);
- kvm_free_physmem(kvm);
- cleanup_srcu_struct(&kvm->srcu);
- kfree(kvm);
-}
-
int kvm_dev_ioctl_check_extension(long ext)
{
int r;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 985d825494f..bade533ba28 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -164,24 +164,18 @@ long kvm_arch_vm_ioctl(struct file *filp,
return r;
}
-struct kvm *kvm_arch_create_vm(void)
+int kvm_arch_init_vm(struct kvm *kvm)
{
- struct kvm *kvm;
int rc;
char debug_name[16];
rc = s390_enable_sie();
if (rc)
- goto out_nokvm;
-
- rc = -ENOMEM;
- kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
- if (!kvm)
- goto out_nokvm;
+ goto out_err;
kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
if (!kvm->arch.sca)
- goto out_nosca;
+ goto out_err;
sprintf(debug_name, "kvm-%u", current->pid);
@@ -195,13 +189,11 @@ struct kvm *kvm_arch_create_vm(void)
debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
VM_EVENT(kvm, 3, "%s", "vm created");
- return kvm;
+ return 0;
out_nodbf:
free_page((unsigned long)(kvm->arch.sca));
-out_nosca:
- kfree(kvm);
-out_nokvm:
- return ERR_PTR(rc);
+out_err:
+ return rc;
}
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
@@ -240,11 +232,8 @@ void kvm_arch_sync_events(struct kvm *kvm)
void kvm_arch_destroy_vm(struct kvm *kvm)
{
kvm_free_vcpus(kvm);
- kvm_free_physmem(kvm);
free_page((unsigned long)(kvm->arch.sca));
debug_unregister(kvm->arch.dbf);
- cleanup_srcu_struct(&kvm->srcu);
- kfree(kvm);
}
/* Section: vcpu related */
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index b36c6b3fe14..8e37deb1eb3 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -15,6 +15,14 @@
struct x86_emulate_ctxt;
+struct x86_exception {
+ u8 vector;
+ bool error_code_valid;
+ u16 error_code;
+ bool nested_page_fault;
+ u64 address; /* cr2 or nested page fault gpa */
+};
+
/*
* x86_emulate_ops:
*
@@ -64,7 +72,8 @@ struct x86_emulate_ops {
* @bytes: [IN ] Number of bytes to read from memory.
*/
int (*read_std)(unsigned long addr, void *val,
- unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error);
+ unsigned int bytes, struct kvm_vcpu *vcpu,
+ struct x86_exception *fault);
/*
* write_std: Write bytes of standard (non-emulated/special) memory.
@@ -74,7 +83,8 @@ struct x86_emulate_ops {
* @bytes: [IN ] Number of bytes to write to memory.
*/
int (*write_std)(unsigned long addr, void *val,
- unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error);
+ unsigned int bytes, struct kvm_vcpu *vcpu,
+ struct x86_exception *fault);
/*
* fetch: Read bytes of standard (non-emulated/special) memory.
* Used for instruction fetch.
@@ -83,7 +93,8 @@ struct x86_emulate_ops {
* @bytes: [IN ] Number of bytes to read from memory.
*/
int (*fetch)(unsigned long addr, void *val,
- unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error);
+ unsigned int bytes, struct kvm_vcpu *vcpu,
+ struct x86_exception *fault);
/*
* read_emulated: Read bytes from emulated/special memory area.
@@ -94,7 +105,7 @@ struct x86_emulate_ops {
int (*read_emulated)(unsigned long addr,
void *val,
unsigned int bytes,
- unsigned int *error,
+ struct x86_exception *fault,
struct kvm_vcpu *vcpu);
/*
@@ -107,7 +118,7 @@ struct x86_emulate_ops {
int (*write_emulated)(unsigned long addr,
const void *val,
unsigned int bytes,
- unsigned int *error,
+ struct x86_exception *fault,
struct kvm_vcpu *vcpu);
/*
@@ -122,7 +133,7 @@ struct x86_emulate_ops {
const void *old,
const void *new,
unsigned int bytes,
- unsigned int *error,
+ struct x86_exception *fault,
struct kvm_vcpu *vcpu);
int (*pio_in_emulated)(int size, unsigned short port, void *val,
@@ -159,7 +170,10 @@ struct operand {
};
union {
unsigned long *reg;
- unsigned long mem;
+ struct segmented_address {
+ ulong ea;
+ unsigned seg;
+ } mem;
} addr;
union {
unsigned long val;
@@ -226,9 +240,8 @@ struct x86_emulate_ctxt {
bool perm_ok; /* do not check permissions if true */
- int exception; /* exception that happens during emulation or -1 */
- u32 error_code; /* error code for exception */
- bool error_code_valid;
+ bool have_exception;
+ struct x86_exception exception;
/* decode cache */
struct decode_cache decode;
@@ -252,7 +265,7 @@ struct x86_emulate_ctxt {
#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64
#endif
-int x86_decode_insn(struct x86_emulate_ctxt *ctxt);
+int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len);
#define EMULATION_FAILED -1
#define EMULATION_OK 0
#define EMULATION_RESTART 1
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f702f82aa1e..aa75f21a9fb 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -83,11 +83,14 @@
#define KVM_NR_FIXED_MTRR_REGION 88
#define KVM_NR_VAR_MTRR 8
+#define ASYNC_PF_PER_VCPU 64
+
extern spinlock_t kvm_lock;
extern struct list_head vm_list;
struct kvm_vcpu;
struct kvm;
+struct kvm_async_pf;
enum kvm_reg {
VCPU_REGS_RAX = 0,
@@ -114,6 +117,7 @@ enum kvm_reg {
enum kvm_reg_ex {
VCPU_EXREG_PDPTR = NR_VCPU_REGS,
+ VCPU_EXREG_CR3,
};
enum {
@@ -238,16 +242,18 @@ struct kvm_mmu {
void (*new_cr3)(struct kvm_vcpu *vcpu);
void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root);
unsigned long (*get_cr3)(struct kvm_vcpu *vcpu);
- int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err);
- void (*inject_page_fault)(struct kvm_vcpu *vcpu);
+ int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err,
+ bool prefault);
+ void (*inject_page_fault)(struct kvm_vcpu *vcpu,
+ struct x86_exception *fault);
void (*free)(struct kvm_vcpu *vcpu);
gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access,
- u32 *error);
+ struct x86_exception *exception);
gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access);
void (*prefetch_page)(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *page);
int (*sync_page)(struct kvm_vcpu *vcpu,
- struct kvm_mmu_page *sp, bool clear_unsync);
+ struct kvm_mmu_page *sp);
void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva);
hpa_t root_hpa;
int root_level;
@@ -315,16 +321,6 @@ struct kvm_vcpu_arch {
*/
struct kvm_mmu *walk_mmu;
- /*
- * This struct is filled with the necessary information to propagate a
- * page fault into the guest
- */
- struct {
- u64 address;
- unsigned error_code;
- bool nested;
- } fault;
-
/* only needed in kvm_pv_mmu_op() path, but it's hot so
* put it here to avoid allocation */
struct kvm_pv_mmu_op_buffer mmu_op_buffer;
@@ -412,6 +408,15 @@ struct kvm_vcpu_arch {
u64 hv_vapic;
cpumask_var_t wbinvd_dirty_mask;
+
+ struct {
+ bool halted;
+ gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)];
+ struct gfn_to_hva_cache data;
+ u64 msr_val;
+ u32 id;
+ bool send_user_only;
+ } apf;
};
struct kvm_arch {
@@ -456,6 +461,10 @@ struct kvm_arch {
/* fields used by HYPER-V emulation */
u64 hv_guest_os_id;
u64 hv_hypercall;
+
+ #ifdef CONFIG_KVM_MMU_AUDIT
+ int audit_point;
+ #endif
};
struct kvm_vm_stat {
@@ -529,6 +538,7 @@ struct kvm_x86_ops {
struct kvm_segment *var, int seg);
void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
void (*decache_cr0_guest_bits)(struct kvm_vcpu *vcpu);
+ void (*decache_cr3)(struct kvm_vcpu *vcpu);
void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu);
void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
@@ -582,9 +592,17 @@ struct kvm_x86_ops {
void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
+ void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2);
const struct trace_print_flags *exit_reasons_str;
};
+struct kvm_arch_async_pf {
+ u32 token;
+ gfn_t gfn;
+ unsigned long cr3;
+ bool direct_map;
+};
+
extern struct kvm_x86_ops *kvm_x86_ops;
int kvm_mmu_module_init(void);
@@ -594,7 +612,6 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
int kvm_mmu_create(struct kvm_vcpu *vcpu);
int kvm_mmu_setup(struct kvm_vcpu *vcpu);
void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte);
-void kvm_mmu_set_base_ptes(u64 base_pte);
void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
u64 dirty_mask, u64 nx_mask, u64 x_mask);
@@ -623,8 +640,15 @@ enum emulation_result {
#define EMULTYPE_NO_DECODE (1 << 0)
#define EMULTYPE_TRAP_UD (1 << 1)
#define EMULTYPE_SKIP (1 << 2)
-int emulate_instruction(struct kvm_vcpu *vcpu,
- unsigned long cr2, u16 error_code, int emulation_type);
+int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
+ int emulation_type, void *insn, int insn_len);
+
+static inline int emulate_instruction(struct kvm_vcpu *vcpu,
+ int emulation_type)
+{
+ return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0);
+}
+
void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
<