aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2007-05-06 13:21:18 -0700
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-05-06 13:21:18 -0700
commit6de410c2b0cc055ae9ee640c84331f6a70878d9b (patch)
tree49dfc7df2f1977c2d665c99266ded92afc98734b
parentc6799ade4ae04b53a5f677e5289116155ff01574 (diff)
parent2ff81f70b56dc1cdd3bf2f08414608069db6ef1a (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm: (66 commits) KVM: Remove unused 'instruction_length' KVM: Don't require explicit indication of completion of mmio or pio KVM: Remove extraneous guest entry on mmio read KVM: SVM: Only save/restore MSRs when needed KVM: fix an if() condition KVM: VMX: Add lazy FPU support for VT KVM: VMX: Properly shadow the CR0 register in the vcpu struct KVM: Don't complain about cpu erratum AA15 KVM: Lazy FPU support for SVM KVM: Allow passing 64-bit values to the emulated read/write API KVM: Per-vcpu statistics KVM: VMX: Avoid unnecessary vcpu_load()/vcpu_put() cycles KVM: MMU: Avoid heavy ASSERT at non debug mode. KVM: VMX: Only save/restore MSR_K6_STAR if necessary KVM: Fold drivers/kvm/kvm_vmx.h into drivers/kvm/vmx.c KVM: VMX: Don't switch 64-bit msrs for 32-bit guests KVM: VMX: Reduce unnecessary saving of host msrs KVM: Handle guest page faults when emulating mmio KVM: SVM: Report hardware exit reason to userspace instead of dmesg KVM: Retry sleeping allocation if atomic allocation fails ...
-rw-r--r--drivers/kvm/kvm.h100
-rw-r--r--drivers/kvm/kvm_main.c792
-rw-r--r--drivers/kvm/kvm_svm.h13
-rw-r--r--drivers/kvm/kvm_vmx.h14
-rw-r--r--drivers/kvm/mmu.c154
-rw-r--r--drivers/kvm/paging_tmpl.h12
-rw-r--r--drivers/kvm/svm.c197
-rw-r--r--drivers/kvm/svm.h6
-rw-r--r--drivers/kvm/vmx.c273
-rw-r--r--drivers/kvm/x86_emulate.c51
-rw-r--r--drivers/kvm/x86_emulate.h32
-rw-r--r--include/linux/Kbuild1
-rw-r--r--include/linux/kvm.h133
-rw-r--r--include/linux/miscdevice.h1
14 files changed, 1301 insertions, 478 deletions
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 0d122bf889d..41634fde8e1 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -51,16 +51,19 @@
#define UNMAPPED_GVA (~(gpa_t)0)
#define KVM_MAX_VCPUS 1
+#define KVM_ALIAS_SLOTS 4
#define KVM_MEMORY_SLOTS 4
#define KVM_NUM_MMU_PAGES 256
#define KVM_MIN_FREE_MMU_PAGES 5
#define KVM_REFILL_PAGES 25
+#define KVM_MAX_CPUID_ENTRIES 40
#define FX_IMAGE_SIZE 512
#define FX_IMAGE_ALIGN 16
#define FX_BUF_SIZE (2 * FX_IMAGE_SIZE + FX_IMAGE_ALIGN)
#define DE_VECTOR 0
+#define NM_VECTOR 7
#define DF_VECTOR 8
#define TS_VECTOR 10
#define NP_VECTOR 11
@@ -73,6 +76,8 @@
#define IOPL_SHIFT 12
+#define KVM_PIO_PAGE_OFFSET 1
+
/*
* Address types:
*
@@ -106,6 +111,7 @@ struct kvm_pte_chain {
* bits 4:7 - page table level for this shadow (1-4)
* bits 8:9 - page table quadrant for 2-level guests
* bit 16 - "metaphysical" - gfn is not a real page (huge page/real mode)
+ * bits 17:18 - "access" - the user and writable bits of a huge page pde
*/
union kvm_mmu_page_role {
unsigned word;
@@ -115,6 +121,7 @@ union kvm_mmu_page_role {
unsigned quadrant : 2;
unsigned pad_for_nice_hex_output : 6;
unsigned metaphysical : 1;
+ unsigned hugepage_access : 2;
};
};
@@ -133,7 +140,6 @@ struct kvm_mmu_page {
unsigned long slot_bitmap; /* One bit set per slot which has memory
* in this shadow page.
*/
- int global; /* Set if all ptes in this page are global */
int multimapped; /* More than one parent_pte? */
int root_count; /* Currently serving as active root */
union {
@@ -219,6 +225,34 @@ enum {
VCPU_SREG_LDTR,
};
+struct kvm_pio_request {
+ unsigned long count;
+ int cur_count;
+ struct page *guest_pages[2];
+ unsigned guest_page_offset;
+ int in;
+ int size;
+ int string;
+ int down;
+ int rep;
+};
+
+struct kvm_stat {
+ u32 pf_fixed;
+ u32 pf_guest;
+ u32 tlb_flush;
+ u32 invlpg;
+
+ u32 exits;
+ u32 io_exits;
+ u32 mmio_exits;
+ u32 signal_exits;
+ u32 irq_window_exits;
+ u32 halt_exits;
+ u32 request_irq_exits;
+ u32 irq_exits;
+};
+
struct kvm_vcpu {
struct kvm *kvm;
union {
@@ -228,6 +262,8 @@ struct kvm_vcpu {
struct mutex mutex;
int cpu;
int launched;
+ u64 host_tsc;
+ struct kvm_run *run;
int interrupt_window_open;
unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */
#define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long)
@@ -266,6 +302,7 @@ struct kvm_vcpu {
char fx_buf[FX_BUF_SIZE];
char *host_fx_image;
char *guest_fx_image;
+ int fpu_active;
int mmio_needed;
int mmio_read_completed;
@@ -273,6 +310,14 @@ struct kvm_vcpu {
int mmio_size;
unsigned char mmio_data[8];
gpa_t mmio_phys_addr;
+ gva_t mmio_fault_cr2;
+ struct kvm_pio_request pio;
+ void *pio_data;
+
+ int sigset_active;
+ sigset_t sigset;
+
+ struct kvm_stat stat;
struct {
int active;
@@ -284,6 +329,15 @@ struct kvm_vcpu {
u32 ar;
} tr, es, ds, fs, gs;
} rmode;
+
+ int cpuid_nent;
+ struct kvm_cpuid_entry cpuid_entries[KVM_MAX_CPUID_ENTRIES];
+};
+
+struct kvm_mem_alias {
+ gfn_t base_gfn;
+ unsigned long npages;
+ gfn_t target_gfn;
};
struct kvm_memory_slot {
@@ -296,6 +350,8 @@ struct kvm_memory_slot {
struct kvm {
spinlock_t lock; /* protects everything except vcpus */
+ int naliases;
+ struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS];
int nmemslots;
struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS];
/*
@@ -312,22 +368,6 @@ struct kvm {
struct file *filp;
};
-struct kvm_stat {
- u32 pf_fixed;
- u32 pf_guest;
- u32 tlb_flush;
- u32 invlpg;
-
- u32 exits;
- u32 io_exits;
- u32 mmio_exits;
- u32 signal_exits;
- u32 irq_window_exits;
- u32 halt_exits;
- u32 request_irq_exits;
- u32 irq_exits;
-};
-
struct descriptor_table {
u16 limit;
unsigned long base;
@@ -358,10 +398,8 @@ struct kvm_arch_ops {
void (*set_segment)(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg);
void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
- void (*decache_cr0_cr4_guest_bits)(struct kvm_vcpu *vcpu);
+ void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu);
void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
- void (*set_cr0_no_modeswitch)(struct kvm_vcpu *vcpu,
- unsigned long cr0);
void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer);
@@ -391,7 +429,6 @@ struct kvm_arch_ops {
unsigned char *hypercall_addr);
};
-extern struct kvm_stat kvm_stat;
extern struct kvm_arch_ops *kvm_arch_ops;
#define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt)
@@ -400,28 +437,29 @@ extern struct kvm_arch_ops *kvm_arch_ops;
int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module);
void kvm_exit_arch(void);
+int kvm_mmu_module_init(void);
+void kvm_mmu_module_exit(void);
+
void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
int kvm_mmu_create(struct kvm_vcpu *vcpu);
int kvm_mmu_setup(struct kvm_vcpu *vcpu);
int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
void kvm_mmu_slot_remove_write_access(struct kvm_vcpu *vcpu, int slot);
+void kvm_mmu_zap_all(struct kvm_vcpu *vcpu);
hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa);
#define HPA_MSB ((sizeof(hpa_t) * 8) - 1)
#define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB)
static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; }
hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva);
+struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva);
void kvm_emulator_want_group7_invlpg(void);
extern hpa_t bad_page_address;
-static inline struct page *gfn_to_page(struct kvm_memory_slot *slot, gfn_t gfn)
-{
- return slot->phys_mem[gfn - slot->base_gfn];
-}
-
+struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
@@ -444,6 +482,10 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long value,
struct x86_emulate_ctxt;
+int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
+ int size, unsigned long count, int string, int down,
+ gva_t address, int rep, unsigned port);
+void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address);
int emulate_clts(struct kvm_vcpu *vcpu);
int emulator_get_dr(struct x86_emulate_ctxt* ctxt, int dr,
@@ -493,12 +535,6 @@ static inline int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
return vcpu->mmu.page_fault(vcpu, gva, error_code);
}
-static inline struct page *_gfn_to_page(struct kvm *kvm, gfn_t gfn)
-{
- struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
- return (slot) ? slot->phys_mem[gfn - slot->base_gfn] : NULL;
-}
-
static inline int is_long_mode(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_X86_64
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index dc7a8c78cbf..c8b8cfa332b 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -51,27 +51,27 @@ static DEFINE_SPINLOCK(kvm_lock);
static LIST_HEAD(vm_list);
struct kvm_arch_ops *kvm_arch_ops;
-struct kvm_stat kvm_stat;
-EXPORT_SYMBOL_GPL(kvm_stat);
+
+#define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x)
static struct kvm_stats_debugfs_item {
const char *name;
- u32 *data;
+ int offset;
struct dentry *dentry;
} debugfs_entries[] = {
- { "pf_fixed", &kvm_stat.pf_fixed },
- { "pf_guest", &kvm_stat.pf_guest },
- { "tlb_flush", &kvm_stat.tlb_flush },
- { "invlpg", &kvm_stat.invlpg },
- { "exits", &kvm_stat.exits },
- { "io_exits", &kvm_stat.io_exits },
- { "mmio_exits", &kvm_stat.mmio_exits },
- { "signal_exits", &kvm_stat.signal_exits },
- { "irq_window", &kvm_stat.irq_window_exits },
- { "halt_exits", &kvm_stat.halt_exits },
- { "request_irq", &kvm_stat.request_irq_exits },
- { "irq_exits", &kvm_stat.irq_exits },
- { NULL, NULL }
+ { "pf_fixed", STAT_OFFSET(pf_fixed) },
+ { "pf_guest", STAT_OFFSET(pf_guest) },
+ { "tlb_flush", STAT_OFFSET(tlb_flush) },
+ { "invlpg", STAT_OFFSET(invlpg) },
+ { "exits", STAT_OFFSET(exits) },
+ { "io_exits", STAT_OFFSET(io_exits) },
+ { "mmio_exits", STAT_OFFSET(mmio_exits) },
+ { "signal_exits", STAT_OFFSET(signal_exits) },
+ { "irq_window", STAT_OFFSET(irq_window_exits) },
+ { "halt_exits", STAT_OFFSET(halt_exits) },
+ { "request_irq", STAT_OFFSET(request_irq_exits) },
+ { "irq_exits", STAT_OFFSET(irq_exits) },
+ { NULL }
};
static struct dentry *debugfs_dir;
@@ -346,6 +346,17 @@ static void kvm_free_physmem(struct kvm *kvm)
kvm_free_physmem_slot(&kvm->memslots[i], NULL);
}
+static void free_pio_guest_pages(struct kvm_vcpu *vcpu)
+{
+ int i;
+
+ for (i = 0; i < 2; ++i)
+ if (vcpu->pio.guest_pages[i]) {
+ __free_page(vcpu->pio.guest_pages[i]);
+ vcpu->pio.guest_pages[i] = NULL;
+ }
+}
+
static void kvm_free_vcpu(struct kvm_vcpu *vcpu)
{
if (!vcpu->vmcs)
@@ -355,6 +366,11 @@ static void kvm_free_vcpu(struct kvm_vcpu *vcpu)
kvm_mmu_destroy(vcpu);
vcpu_put(vcpu);
kvm_arch_ops->vcpu_free(vcpu);
+ free_page((unsigned long)vcpu->run);
+ vcpu->run = NULL;
+ free_page((unsigned long)vcpu->pio_data);
+ vcpu->pio_data = NULL;
+ free_pio_guest_pages(vcpu);
}
static void kvm_free_vcpus(struct kvm *kvm)
@@ -404,12 +420,12 @@ static int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
u64 pdpte;
u64 *pdpt;
int ret;
- struct kvm_memory_slot *memslot;
+ struct page *page;
spin_lock(&vcpu->kvm->lock);
- memslot = gfn_to_memslot(vcpu->kvm, pdpt_gfn);
- /* FIXME: !memslot - emulate? 0xff? */
- pdpt = kmap_atomic(gfn_to_page(memslot, pdpt_gfn), KM_USER0);
+ page = gfn_to_page(vcpu->kvm, pdpt_gfn);
+ /* FIXME: !page - emulate? 0xff? */
+ pdpt = kmap_atomic(page, KM_USER0);
ret = 1;
for (i = 0; i < 4; ++i) {
@@ -494,7 +510,6 @@ EXPORT_SYMBOL_GPL(set_cr0);
void lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
{
- kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu);
set_cr0(vcpu, (vcpu->cr0 & ~0x0ful) | (msw & 0x0f));
}
EXPORT_SYMBOL_GPL(lmsw);
@@ -830,7 +845,73 @@ out:
return r;
}
-struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
+/*
+ * Set a new alias region. Aliases map a portion of physical memory into
+ * another portion. This is useful for memory windows, for example the PC
+ * VGA region.
+ */
+static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
+ struct kvm_memory_alias *alias)
+{
+ int r, n;
+ struct kvm_mem_alias *p;
+
+ r = -EINVAL;
+ /* General sanity checks */
+ if (alias->memory_size & (PAGE_SIZE - 1))
+ goto out;
+ if (alias->guest_phys_addr & (PAGE_SIZE - 1))
+ goto out;
+ if (alias->slot >= KVM_ALIAS_SLOTS)
+ goto out;
+ if (alias->guest_phys_addr + alias->memory_size
+ < alias->guest_phys_addr)
+ goto out;
+ if (alias->target_phys_addr + alias->memory_size
+ < alias->target_phys_addr)
+ goto out;
+
+ spin_lock(&kvm->lock);
+
+ p = &kvm->aliases[alias->slot];
+ p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
+ p->npages = alias->memory_size >> PAGE_SHIFT;
+ p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT;
+
+ for (n = KVM_ALIAS_SLOTS; n > 0; --n)
+ if (kvm->aliases[n - 1].npages)
+ break;
+ kvm->naliases = n;
+
+ spin_unlock(&kvm->lock);
+
+ vcpu_load(&kvm->vcpus[0]);
+ spin_lock(&kvm->lock);
+ kvm_mmu_zap_all(&kvm->vcpus[0]);
+ spin_unlock(&kvm->lock);
+ vcpu_put(&kvm->vcpus[0]);
+
+ return 0;
+
+out:
+ return r;
+}
+
+static gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
+{
+ int i;
+ struct kvm_mem_alias *alias;
+
+ for (i = 0; i < kvm->naliases; ++i) {
+ alias = &kvm->aliases[i];
+ if (gfn >= alias->base_gfn
+ && gfn < alias->base_gfn + alias->npages)
+ return alias->target_gfn + gfn - alias->base_gfn;
+ }
+ return gfn;
+}
+
+static struct kvm_memory_slot *__gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
{
int i;
@@ -843,7 +924,24 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
}
return NULL;
}
-EXPORT_SYMBOL_GPL(gfn_to_memslot);
+
+struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
+{
+ gfn = unalias_gfn(kvm, gfn);
+ return __gfn_to_memslot(kvm, gfn);
+}
+
+struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
+{
+ struct kvm_memory_slot *slot;
+
+ gfn = unalias_gfn(kvm, gfn);
+ slot = __gfn_to_memslot(kvm, gfn);
+ if (!slot)
+ return NULL;
+ return slot->phys_mem[gfn - slot->base_gfn];
+}
+EXPORT_SYMBOL_GPL(gfn_to_page);
void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
{
@@ -871,7 +969,7 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
}
static int emulator_read_std(unsigned long addr,
- unsigned long *val,
+ void *val,
unsigned int bytes,
struct x86_emulate_ctxt *ctxt)
{
@@ -883,20 +981,20 @@ static int emulator_read_std(unsigned long addr,
unsigned offset = addr & (PAGE_SIZE-1);
unsigned tocopy = min(bytes, (unsigned)PAGE_SIZE - offset);
unsigned long pfn;
- struct kvm_memory_slot *memslot;
- void *page;
+ struct page *page;
+ void *page_virt;
if (gpa == UNMAPPED_GVA)
return X86EMUL_PROPAGATE_FAULT;
pfn = gpa >> PAGE_SHIFT;
- memslot = gfn_to_memslot(vcpu->kvm, pfn);
- if (!memslot)
+ page = gfn_to_page(vcpu->kvm, pfn);
+ if (!page)
return X86EMUL_UNHANDLEABLE;
- page = kmap_atomic(gfn_to_page(memslot, pfn), KM_USER0);
+ page_virt = kmap_atomic(page, KM_USER0);
- memcpy(data, page + offset, tocopy);
+ memcpy(data, page_virt + offset, tocopy);
- kunmap_atomic(page, KM_USER0);
+ kunmap_atomic(page_virt, KM_USER0);
bytes -= tocopy;
data += tocopy;
@@ -907,7 +1005,7 @@ static int emulator_read_std(unsigned long addr,
}
static int emulator_write_std(unsigned long addr,
- unsigned long val,
+ const void *val,
unsigned int bytes,
struct x86_emulate_ctxt *ctxt)
{
@@ -917,7 +1015,7 @@ static int emulator_write_std(unsigned long addr,
}
static int emulator_read_emulated(unsigned long addr,
- unsigned long *val,
+ void *val,
unsigned int bytes,
struct x86_emulate_ctxt *ctxt)
{
@@ -945,37 +1043,37 @@ static int emulator_read_emulated(unsigned long addr,
}
static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
- unsigned long val, int bytes)
+ const void *val, int bytes)
{
- struct kvm_memory_slot *m;
struct page *page;
void *virt;
if (((gpa + bytes - 1) >> PAGE_SHIFT) != (gpa >> PAGE_SHIFT))
return 0;
- m = gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT);
- if (!m)
+ page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
+ if (!page)
return 0;
- page = gfn_to_page(m, gpa >> PAGE_SHIFT);
kvm_mmu_pre_write(vcpu, gpa, bytes);
mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT);
virt = kmap_atomic(page, KM_USER0);
- memcpy(virt + offset_in_page(gpa), &val, bytes);
+ memcpy(virt + offset_in_page(gpa), val, bytes);
kunmap_atomic(virt, KM_USER0);
kvm_mmu_post_write(vcpu, gpa, bytes);
return 1;
}
static int emulator_write_emulated(unsigned long addr,
- unsigned long val,
+ const void *val,
unsigned int bytes,
struct x86_emulate_ctxt *ctxt)
{
struct kvm_vcpu *vcpu = ctxt->vcpu;
gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
- if (gpa == UNMAPPED_GVA)
+ if (gpa == UNMAPPED_GVA) {
+ kvm_arch_ops->inject_page_fault(vcpu, addr, 2);
return X86EMUL_PROPAGATE_FAULT;
+ }
if (emulator_write_phys(vcpu, gpa, val, bytes))
return X86EMUL_CONTINUE;
@@ -984,14 +1082,14 @@ static int emulator_write_emulated(unsigned long addr,
vcpu->mmio_phys_addr = gpa;
vcpu->mmio_size = bytes;
vcpu->mmio_is_write = 1;
- memcpy(vcpu->mmio_data, &val, bytes);
+ memcpy(vcpu->mmio_data, val, bytes);
return X86EMUL_CONTINUE;
}
static int emulator_cmpxchg_emulated(unsigned long addr,
- unsigned long old,
- unsigned long new,
+ const void *old,
+ const void *new,
unsigned int bytes,
struct x86_emulate_ctxt *ctxt)
{
@@ -1004,30 +1102,6 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
return emulator_write_emulated(addr, new, bytes, ctxt);
}
-#ifdef CONFIG_X86_32
-
-static int emulator_cmpxchg8b_emulated(unsigned long addr,
- unsigned long old_lo,
- unsigned long old_hi,
- unsigned long new_lo,
- unsigned long new_hi,
- struct x86_emulate_ctxt *ctxt)
-{
- static int reported;
- int r;
-
- if (!reported) {
- reported = 1;
- printk(KERN_WARNING "kvm: emulating exchange8b as write\n");
- }
- r = emulator_write_emulated(addr, new_lo, 4, ctxt);
- if (r != X86EMUL_CONTINUE)
- return r;
- return emulator_write_emulated(addr+4, new_hi, 4, ctxt);
-}
-
-#endif
-
static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
{
return kvm_arch_ops->get_segment_base(vcpu, seg);
@@ -1042,7 +1116,6 @@ int emulate_clts(struct kvm_vcpu *vcpu)
{
unsigned long cr0;
- kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu);
cr0 = vcpu->cr0 & ~CR0_TS_MASK;
kvm_arch_ops->set_cr0(vcpu, cr0);
return X86EMUL_CONTINUE;
@@ -1102,9 +1175,6 @@ struct x86_emulate_ops emulate_ops = {
.read_emulated = emulator_read_emulated,
.write_emulated = emulator_write_emulated,
.cmpxchg_emulated = emulator_cmpxchg_emulated,
-#ifdef CONFIG_X86_32
- .cmpxchg8b_emulated = emulator_cmpxchg8b_emulated,
-#endif
};
int emulate_instruction(struct kvm_vcpu *vcpu,
@@ -1116,6 +1186,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
int r;
int cs_db, cs_l;
+ vcpu->mmio_fault_cr2 = cr2;
kvm_arch_ops->cache_regs(vcpu);
kvm_arch_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
@@ -1166,8 +1237,10 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
kvm_arch_ops->decache_regs(vcpu);
kvm_arch_ops->set_rflags(vcpu, emulate_ctxt.eflags);
- if (vcpu->mmio_is_write)
+ if (vcpu->mmio_is_write) {
+ vcpu->mmio_needed = 0;
return EMULATE_DO_MMIO;
+ }
return EMULATE_DONE;
}
@@ -1177,7 +1250,7 @@ int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
unsigned long nr, a0, a1, a2, a3, a4, a5, ret;
- kvm_arch_ops->decache_regs(vcpu);
+ kvm_arch_ops->cache_regs(vcpu);
ret = -KVM_EINVAL;
#ifdef CONFIG_X86_64
if (is_long_mode(vcpu)) {
@@ -1201,10 +1274,19 @@ int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run)
}
switch (nr) {
default:
- ;
+ run->hypercall.args[0] = a0;
+ run->hypercall.args[1] = a1;
+ run->hypercall.args[2] = a2;
+ run->hypercall.args[3] = a3;
+ run->hypercall.args[4] = a4;
+ run->hypercall.args[5] = a5;
+ run->hypercall.ret = ret;
+ run->hypercall.longmode = is_long_mode(vcpu);
+ kvm_arch_ops->decache_regs(vcpu);
+ return 0;
}
vcpu->regs[VCPU_REGS_RAX] = ret;
- kvm_arch_ops->cache_regs(vcpu);
+ kvm_arch_ops->decache_regs(vcpu);
return 1;
}
EXPORT_SYMBOL_GPL(kvm_hypercall);
@@ -1237,7 +1319,7 @@ void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
{
- kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu);
+ kvm_arch_ops->decache_cr4_guest_bits(vcpu);
switch (cr) {
case 0:
return vcpu->cr0;
@@ -1442,6 +1524,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
printk(KERN_WARNING "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n",
__FUNCTION__, data);
break;
+ case MSR_IA32_MCG_STATUS:
+ printk(KERN_WARNING "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n",
+ __FUNCTION__, data);
+ break;
case MSR_IA32_UCODE_REV:
case MSR_IA32_UCODE_WRITE:
case 0x200 ... 0x2ff: /* MTRRs */
@@ -1478,6 +1564,8 @@ static int set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
void kvm_resched(struct kvm_vcpu *vcpu)
{
+ if (!need_resched())
+ return;
vcpu_put(vcpu);
cond_resched();
vcpu_load(vcpu);
@@ -1502,29 +1590,250 @@ void save_msrs(struct vmx_msr_entry *e, int n)
}
EXPORT_SYMBOL_GPL(save_msrs);
+void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
+{
+ int i;
+ u32 function;
+ struct kvm_cpuid_entry *e, *best;
+
+ kvm_arch_ops->cache_regs(vcpu);
+ function = vcpu->regs[VCPU_REGS_RAX];
+ vcpu->regs[VCPU_REGS_RAX] = 0;
+ vcpu->regs[VCPU_REGS_RBX] = 0;
+ vcpu->regs[VCPU_REGS_RCX] = 0;
+ vcpu->regs[VCPU_REGS_RDX] = 0;
+ best = NULL;
+ for (i = 0; i < vcpu->cpuid_nent; ++i) {
+ e = &vcpu->cpuid_entries[i];
+ if (e->function == function) {
+ best = e;
+ break;
+ }
+ /*
+ * Both basic or both extended?
+ */
+ if (((e->function ^ function) & 0x80000000) == 0)
+ if (!best || e->function > best->function)
+ best = e;
+ }
+ if (best) {
+ vcpu->regs[VCPU_REGS_RAX] = best->eax;
+ vcpu->regs[VCPU_REGS_RBX] = best->ebx;
+ vcpu->regs[VCPU_REGS_RCX] = best->ecx;
+ vcpu->regs[VCPU_REGS_RDX] = best->edx;
+ }
+ kvm_arch_ops->decache_regs(vcpu);
+ kvm_arch_ops->skip_emulated_instruction(vcpu);
+}
+EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
+
+static int pio_copy_data(struct kvm_vcpu *vcpu)
+{
+ void *p = vcpu->pio_data;
+ void *q;
+ unsigned bytes;
+ int nr_pages = vcpu->pio.guest_pages[1] ? 2 : 1;
+
+ kvm_arch_ops->vcpu_put(vcpu);
+ q = vmap(vcpu->pio.guest_pages, nr_pages, VM_READ|VM_WRITE,
+ PAGE_KERNEL);
+ if (!q) {
+ kvm_arch_ops->vcpu_load(vcpu);
+ free_pio_guest_pages(vcpu);
+ return -ENOMEM;
+ }
+ q += vcpu->pio.guest_page_offset;
+ bytes = vcpu->pio.size * vcpu->pio.cur_count;
+ if (vcpu->pio.in)
+ memcpy(q, p, bytes);
+ else
+ memcpy(p, q, bytes);
+ q -= vcpu->pio.guest_page_offset;
+ vunmap(q);
+ kvm_arch_ops->vcpu_load(vcpu);
+ free_pio_guest_pages(vcpu);
+ return 0;
+}
+
+static int complete_pio(struct kvm_vcpu *vcpu)
+{
+ struct kvm_pio_request *io = &vcpu->pio;
+ long delta;
+ int r;
+
+ kvm_arch_ops->cache_regs(vcpu);
+
+ if (!io->string) {
+ if (io->in)
+ memcpy(&vcpu->regs[VCPU_REGS_RAX], vcpu->pio_data,
+ io->size);
+ } else {
+ if (io->in) {
+ r = pio_copy_data(vcpu);
+ if (r) {
+ kvm_arch_ops->cache_regs(vcpu);
+ return r;
+ }
+ }
+
+ delta = 1;
+ if (io->rep) {
+ delta *= io->cur_count;
+ /*
+ * The size of the register should really depend on
+ * current address size.
+ */
+ vcpu->regs[VCPU_REGS_RCX] -= delta;
+ }
+ if (io->down)
+ delta = -delta;
+ delta *= io->size;
+ if (io->in)
+ vcpu->regs[VCPU_REGS_RDI] += delta;
+ else
+ vcpu->regs[VCPU_REGS_RSI] += delta;
+ }
+
+ kvm_arch_ops->decache_regs(vcpu);
+
+ io->count -= io->cur_count;
+ io->cur_count = 0;
+
+ if (!io->count)
+ kvm_arch_ops->skip_emulated_instruction(vcpu);
+ return 0;
+}
+
+int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
+ int size, unsigned long count, int string, int down,
+ gva_t address, int rep, unsigned port)
+{
+ unsigned now, in_page;
+ int i;
+ int nr_pages = 1;
+ struct page *page;
+
+ vcpu->run->exit_reason = KVM_EXIT_IO;
+ vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
+ vcpu->run->io.size = size;
+ vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
+ vcpu->run->io.count = count;
+ vcpu->run->io.port = port;
+ vcpu->pio.count = count;
+ vcpu->pio.cur_count = count;
+ vcpu->pio.size = size;
+ vcpu->pio.in = in;
+ vcpu->pio.string = string;
+ vcpu->pio.down = down;
+ vcpu->pio.guest_page_offset = offset_in_page(address);
+ vcpu->pio.rep = rep;
+
+ if (!string) {
+ kvm_arch_ops->cache_regs(vcpu);
+ memcpy(vcpu->pio_data, &vcpu->regs[VCPU_REGS_RAX], 4);
+ kvm_arch_ops->decache_regs(vcpu);
+ return 0;
+ }
+
+ if (!count) {
+ kvm_arch_ops->skip_emulated_instruction(vcpu);
+ return 1;
+ }
+
+ now = min(count, PAGE_SIZE / size);
+
+ if (!down)
+ in_page = PAGE_SIZE - offset_in_page(address);
+ else
+ in_page = offset_in_page(address) + size;
+ now = min(count, (unsigned long)in_page / size);
+ if (!now) {
+ /*
+ * String I/O straddles page boundary. Pin two guest pages
+ * so that we satisfy atomicity constraints. Do just one
+ * transaction to avoid complexity.
+ */
+ nr_pages = 2;
+ now = 1;
+ }
+ if (down) {
+ /*
+ * String I/O in reverse. Yuck. Kill the guest, fix later.
+ */
+ printk(KERN_ERR "kvm: guest string pio down\n");
+ inject_gp(vcpu);
+ return 1;
+ }
+ vcpu->run->io.count = now;
+ vcpu->pio.cur_count = now;
+
+ for (i = 0; i < nr_pages; ++i) {
+ spin_lock(&vcpu->kvm->lock);
+ page = gva_to_page(vcpu, address + i * PAGE_SIZE);
+ if (page)
+ get_page(page);
+ vcpu->pio.guest_pages[i] = page;
+ spin_unlock(&vcpu->kvm->lock);
+ if (!page) {
+ inject_gp(vcpu);
+ free_pio_guest_pages(vcpu);
+ return 1;
+ }
+ }
+
+ if (!vcpu->pio.in)
+ return pio_copy_data(vcpu);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_setup_pio);
+
static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
int r;
+ sigset_t sigsaved;
vcpu_load(vcpu);
+ if (vcpu->sigset_active)
+ sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
+
/* re-sync apic's tpr */
vcpu->cr8 = kvm_run->cr8;
- if (kvm_run->emulated) {
- kvm_arch_ops->skip_emulated_instruction(vcpu);
- kvm_run->emulated = 0;
+ if (vcpu->pio.cur_count) {
+ r = complete_pio(vcpu);
+ if (r)
+ goto out;
}
- if (kvm_run->mmio_completed) {
+ if (vcpu->mmio_needed) {
memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
vcpu->mmio_read_completed = 1;
+ vcpu->mmio_needed = 0;
+ r = emulate_instruction(vcpu, kvm_run,
+ vcpu->mmio_fault_cr2, 0);
+ if (r == EMULATE_DO_MMIO) {
+ /*
+ * Read-modify-write. Back to userspace.
+ */
+ kvm_run->exit_reason = KVM_EXIT_MMIO;
+ r = 0;
+ goto out;
+ }
}
- vcpu->mmio_needed = 0;
+ if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) {
+ kvm_arch_ops->cache_regs(vcpu);
+ vcpu->regs[VCPU_REGS_RAX] = kvm_run->hypercall.ret;
+ kvm_arch_ops->decache_regs(vcpu);
+ }
r = kvm_arch_ops->run(vcpu, kvm_run);
+out:
+ if (vcpu->sigset_active)
+ sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+
vcpu_put(vcpu);
return r;
}
@@ -1633,7 +1942,7 @@ static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
sregs->gdt.limit = dt.limit;
sregs->gdt.base = dt.base;
- kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu);
+ kvm_arch_ops->decache_cr4_guest_bits(vcpu);
sregs->cr0 = vcpu->cr0;
sregs->cr2 = vcpu->cr2;
sregs->cr3 = vcpu->cr3;
@@ -1665,16 +1974,6 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
vcpu_load(vcpu);
- set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
- set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
- set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
- set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
- set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
- set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
-
- set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
- set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
-
dt.limit = sregs->idt.limit;
dt.base = sregs->idt.base;
kvm_arch_ops->set_idt(vcpu, &dt);
@@ -1694,10 +1993,10 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
#endif
vcpu->apic_base = sregs->apic_base;
- kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu);
+ kvm_arch_ops->decache_cr4_guest_bits(vcpu);
mmu_reset_needed |= vcpu->cr0 != sregs->cr0;
- kvm_arch_ops->set_cr0_no_modeswitch(vcpu, sregs->cr0);
+ kvm_arch_ops->set_cr0(vcpu, sregs->cr0);
mmu_reset_needed |= vcpu->cr4 != sregs->cr4;
kvm_arch_ops->set_cr4(vcpu, sregs->cr4);
@@ -1714,6 +2013,16 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
if (vcpu->irq_pending[i])
__set_bit(i, &vcpu->irq_summary);
+ set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
+ set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
+ set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
+ set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
+ set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
+ set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
+
+ set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
+ set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
+
vcpu_put(vcpu);
return 0;
@@ -1887,6 +2196,36 @@ static int kvm_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
return r;
}
+static struct page *kvm_vcpu_nopage(struct vm_area_struct *vma,
+ unsigned long address,
+ int *type)
+{
+ struct kvm_vcpu *vcpu = vma->vm_file->private_data;
+ unsigned long pgoff;
+ struct page *page;
+
+ *type = VM_FAULT_MINOR;
+ pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
+ if (pgoff == 0)
+ page = virt_to_page(vcpu->run);
+ else if (pgoff == KVM_PIO_PAGE_OFFSET)
+ page = virt_to_page(vcpu->pio_data);
+ else
+ return NOPAGE_SIGBUS;
+ get_page(page);
+ return page;
+}
+
+static struct vm_operations_struct kvm_vcpu_vm_ops = {
+ .nopage = kvm_vcpu_nopage,
+};
+
+static int kvm_vcpu_mmap(struct file *file