aboutsummaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-10-04 09:30:33 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-04 09:30:33 -0700
commitecefbd94b834fa32559d854646d777c56749ef1c (patch)
treeca8958900ad9e208a8e5fb7704f1b66dc76131b4 /arch/x86
parentce57e981f2b996aaca2031003b3f866368307766 (diff)
parent3d11df7abbff013b811d5615320580cd5d9d7d31 (diff)
Merge tag 'kvm-3.7-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Avi Kivity: "Highlights of the changes for this release include support for vfio level triggered interrupts, improved big real mode support on older Intels, a streamlines guest page table walker, guest APIC speedups, PIO optimizations, better overcommit handling, and read-only memory." * tag 'kvm-3.7-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (138 commits) KVM: s390: Fix vcpu_load handling in interrupt code KVM: x86: Fix guest debug across vcpu INIT reset KVM: Add resampling irqfds for level triggered interrupts KVM: optimize apic interrupt delivery KVM: MMU: Eliminate pointless temporary 'ac' KVM: MMU: Avoid access/dirty update loop if all is well KVM: MMU: Eliminate eperm temporary KVM: MMU: Optimize is_last_gpte() KVM: MMU: Simplify walk_addr_generic() loop KVM: MMU: Optimize pte permission checks KVM: MMU: Update accessed and dirty bits after guest pagetable walk KVM: MMU: Move gpte_access() out of paging_tmpl.h KVM: MMU: Optimize gpte_access() slightly KVM: MMU: Push clean gpte write protection out of gpte_access() KVM: clarify kvmclock documentation KVM: make processes waiting on vcpu mutex killable KVM: SVM: Make use of asm.h KVM: VMX: Make use of asm.h KVM: VMX: Make lto-friendly KVM: x86: lapic: Clean up find_highest_vector() and count_vectors() ... Conflicts: arch/s390/include/asm/processor.h arch/x86/kvm/i8259.c
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig21
-rw-r--r--arch/x86/include/asm/kvm.h1
-rw-r--r--arch/x86/include/asm/kvm_emulate.h48
-rw-r--r--arch/x86/include/asm/kvm_host.h36
-rw-r--r--arch/x86/include/asm/kvm_para.h6
-rw-r--r--arch/x86/kernel/Makefile3
-rw-r--r--arch/x86/kernel/kvm.c3
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kvm/Kconfig2
-rw-r--r--arch/x86/kvm/Makefile2
-rw-r--r--arch/x86/kvm/cpuid.c14
-rw-r--r--arch/x86/kvm/emulate.c538
-rw-r--r--arch/x86/kvm/i8254.c64
-rw-r--r--arch/x86/kvm/i8254.h6
-rw-r--r--arch/x86/kvm/i8259.c70
-rw-r--r--arch/x86/kvm/irq.h2
-rw-r--r--arch/x86/kvm/kvm_timer.h18
-rw-r--r--arch/x86/kvm/lapic.c484
-rw-r--r--arch/x86/kvm/lapic.h61
-rw-r--r--arch/x86/kvm/mmu.c240
-rw-r--r--arch/x86/kvm/mmu.h25
-rw-r--r--arch/x86/kvm/mmu_audit.c8
-rw-r--r--arch/x86/kvm/paging_tmpl.h199
-rw-r--r--arch/x86/kvm/pmu.c2
-rw-r--r--arch/x86/kvm/svm.c82
-rw-r--r--arch/x86/kvm/timer.c47
-rw-r--r--arch/x86/kvm/vmx.c233
-rw-r--r--arch/x86/kvm/x86.c384
-rw-r--r--arch/x86/kvm/x86.h1
29 files changed, 1489 insertions, 1113 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7f9a395c525..b72777ff32a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -586,23 +586,18 @@ config PARAVIRT_TIME_ACCOUNTING
source "arch/x86/xen/Kconfig"
-config KVM_CLOCK
- bool "KVM paravirtualized clock"
- select PARAVIRT
- select PARAVIRT_CLOCK
- ---help---
- Turning on this option will allow you to run a paravirtualized clock
- when running over the KVM hypervisor. Instead of relying on a PIT
- (or probably other) emulation by the underlying device model, the host
- provides the guest with timing infrastructure such as time of day, and
- system time
-
config KVM_GUEST
- bool "KVM Guest support"
+ bool "KVM Guest support (including kvmclock)"
+ select PARAVIRT
select PARAVIRT
+ select PARAVIRT_CLOCK
+ default y if PARAVIRT_GUEST
---help---
This option enables various optimizations for running under the KVM
- hypervisor.
+ hypervisor. It includes a paravirtualized clock, so that instead
+ of relying on a PIT (or probably other) emulation by the
+ underlying device model, the host provides the guest with
+ timing infrastructure such as time of day, and system time
source "arch/x86/lguest/Kconfig"
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index 41e08cb6a09..a65ec29e6ff 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -41,6 +41,7 @@
#define __KVM_HAVE_DEBUGREGS
#define __KVM_HAVE_XSAVE
#define __KVM_HAVE_XCRS
+#define __KVM_HAVE_READONLY_MEM
/* Architectural interrupt line count. */
#define KVM_NR_INTERRUPTS 256
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index c764f43b71c..15f960c06ff 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -86,6 +86,19 @@ struct x86_instruction_info {
struct x86_emulate_ops {
/*
+ * read_gpr: read a general purpose register (rax - r15)
+ *
+ * @reg: gpr number.
+ */
+ ulong (*read_gpr)(struct x86_emulate_ctxt *ctxt, unsigned reg);
+ /*
+ * write_gpr: write a general purpose register (rax - r15)
+ *
+ * @reg: gpr number.
+ * @val: value to write.
+ */
+ void (*write_gpr)(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val);
+ /*
* read_std: Read bytes of standard (non-emulated/special) memory.
* Used for descriptor reading.
* @addr: [IN ] Linear address from which to read.
@@ -200,8 +213,9 @@ typedef u32 __attribute__((vector_size(16))) sse128_t;
/* Type, address-of, and value of an instruction's operand. */
struct operand {
- enum { OP_REG, OP_MEM, OP_IMM, OP_XMM, OP_MM, OP_NONE } type;
+ enum { OP_REG, OP_MEM, OP_MEM_STR, OP_IMM, OP_XMM, OP_MM, OP_NONE } type;
unsigned int bytes;
+ unsigned int count;
union {
unsigned long orig_val;
u64 orig_val64;
@@ -221,6 +235,7 @@ struct operand {
char valptr[sizeof(unsigned long) + 2];
sse128_t vec_val;
u64 mm_val;
+ void *data;
};
};
@@ -236,14 +251,23 @@ struct read_cache {
unsigned long end;
};
+/* Execution mode, passed to the emulator. */
+enum x86emul_mode {
+ X86EMUL_MODE_REAL, /* Real mode. */
+ X86EMUL_MODE_VM86, /* Virtual 8086 mode. */
+ X86EMUL_MODE_PROT16, /* 16-bit protected mode. */
+ X86EMUL_MODE_PROT32, /* 32-bit protected mode. */
+ X86EMUL_MODE_PROT64, /* 64-bit (long) mode. */
+};
+
struct x86_emulate_ctxt {
- struct x86_emulate_ops *ops;
+ const struct x86_emulate_ops *ops;
/* Register state before/after emulation. */
unsigned long eflags;
unsigned long eip; /* eip before instruction emulation */
/* Emulated execution mode, represented by an X86EMUL_MODE value. */
- int mode;
+ enum x86emul_mode mode;
/* interruptibility state, as a result of execution of STI or MOV SS */
int interruptibility;
@@ -281,8 +305,10 @@ struct x86_emulate_ctxt {
bool rip_relative;
unsigned long _eip;
struct operand memop;
+ u32 regs_valid; /* bitmaps of registers in _regs[] that can be read */
+ u32 regs_dirty; /* bitmaps of registers in _regs[] that have been written */
/* Fields above regs are cleared together. */
- unsigned long regs[NR_VCPU_REGS];
+ unsigned long _regs[NR_VCPU_REGS];
struct operand *memopp;
struct fetch_cache fetch;
struct read_cache io_read;
@@ -293,17 +319,6 @@ struct x86_emulate_ctxt {
#define REPE_PREFIX 0xf3
#define REPNE_PREFIX 0xf2
-/* Execution mode, passed to the emulator. */
-#define X86EMUL_MODE_REAL 0 /* Real mode. */
-#define X86EMUL_MODE_VM86 1 /* Virtual 8086 mode. */
-#define X86EMUL_MODE_PROT16 2 /* 16-bit protected mode. */
-#define X86EMUL_MODE_PROT32 4 /* 32-bit protected mode. */
-#define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */
-
-/* any protected mode */
-#define X86EMUL_MODE_PROT (X86EMUL_MODE_PROT16|X86EMUL_MODE_PROT32| \
- X86EMUL_MODE_PROT64)
-
/* CPUID vendors */
#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx 0x68747541
#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx 0x444d4163
@@ -394,4 +409,7 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
u16 tss_selector, int idt_index, int reason,
bool has_error_code, u32 error_code);
int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq);
+void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt);
+void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt);
+
#endif /* _ASM_X86_KVM_X86_EMULATE_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1eaa6b05667..b2e11f45243 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -271,10 +271,24 @@ struct kvm_mmu {
union kvm_mmu_page_role base_role;
bool direct_map;
+ /*
+ * Bitmap; bit set = permission fault
+ * Byte index: page fault error code [4:1]
+ * Bit index: pte permissions in ACC_* format
+ */
+ u8 permissions[16];
+
u64 *pae_root;
u64 *lm_root;
u64 rsvd_bits_mask[2][4];
+ /*
+ * Bitmap: bit set = last pte in walk
+ * index[0:1]: level (zero-based)
+ * index[2]: pte.ps
+ */
+ u8 last_pte_bitmap;
+
bool nx;
u64 pdptrs[4]; /* pae */
@@ -398,12 +412,15 @@ struct kvm_vcpu_arch {
struct x86_emulate_ctxt emulate_ctxt;
bool emulate_regs_need_sync_to_vcpu;
bool emulate_regs_need_sync_from_vcpu;
+ int (*complete_userspace_io)(struct kvm_vcpu *vcpu);
gpa_t time;
struct pvclock_vcpu_time_info hv_clock;
unsigned int hw_tsc_khz;
unsigned int time_offset;
struct page *time_page;
+ /* set guest stopped flag in pvclock flags field */
+ bool pvclock_set_guest_stopped_request;
struct {
u64 msr_val;
@@ -438,6 +455,7 @@ struct kvm_vcpu_arch {
unsigned long dr6;
unsigned long dr7;
unsigned long eff_db[KVM_NR_DB_REGS];
+ unsigned long guest_debug_dr7;
u64 mcg_cap;
u64 mcg_status;
@@ -484,14 +502,24 @@ struct kvm_vcpu_arch {
};
struct kvm_lpage_info {
- unsigned long rmap_pde;
int write_count;
};
struct kvm_arch_memory_slot {
+ unsigned long *rmap[KVM_NR_PAGE_SIZES];
struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
};
+struct kvm_apic_map {
+ struct rcu_head rcu;
+ u8 ldr_bits;
+ /* fields bellow are used to decode ldr values in different modes */
+ u32 cid_shift, cid_mask, lid_mask;
+ struct kvm_lapic *phys_map[256];
+ /* first index is cluster id second is cpu id in a cluster */
+ struct kvm_lapic *logical_map[16][16];
+};
+
struct kvm_arch {
unsigned int n_used_mmu_pages;
unsigned int n_requested_mmu_pages;
@@ -509,6 +537,8 @@ struct kvm_arch {
struct kvm_ioapic *vioapic;
struct kvm_pit *vpit;
int vapics_in_nmi_mode;
+ struct mutex apic_map_lock;
+ struct kvm_apic_map *apic_map;
unsigned int tss_addr;
struct page *apic_access_page;
@@ -602,8 +632,7 @@ struct kvm_x86_ops {
void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
void (*vcpu_put)(struct kvm_vcpu *vcpu);
- void (*set_guest_debug)(struct kvm_vcpu *vcpu,
- struct kvm_guest_debug *dbg);
+ void (*update_db_bp_intercept)(struct kvm_vcpu *vcpu);
int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
@@ -941,6 +970,7 @@ extern bool kvm_rebooting;
#define KVM_ARCH_WANT_MMU_NOTIFIER
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end);
int kvm_age_hva(struct kvm *kvm, unsigned long hva);
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 2f7712e08b1..eb3e9d85e1f 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -102,21 +102,21 @@ struct kvm_vcpu_pv_apf_data {
extern void kvmclock_init(void);
extern int kvm_register_clock(char *txt);
-#ifdef CONFIG_KVM_CLOCK
+#ifdef CONFIG_KVM_GUEST
bool kvm_check_and_clear_guest_paused(void);
#else
static inline bool kvm_check_and_clear_guest_paused(void)
{
return false;
}
-#endif /* CONFIG_KVMCLOCK */
+#endif /* CONFIG_KVM_GUEST */
/* This instruction is vmcall. On non-VT architectures, it will generate a
* trap that we will then rewrite to the appropriate instruction.
*/
#define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1"
-/* For KVM hypercalls, a three-byte sequence of either the vmrun or the vmmrun
+/* For KVM hypercalls, a three-byte sequence of either the vmcall or the vmmcall
* instruction. The hypervisor may replace it with something else but only the
* instructions are guaranteed to be supported.
*
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 8d7a619718b..a48ea05157d 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -81,8 +81,7 @@ obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o
obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o
obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o
-obj-$(CONFIG_KVM_GUEST) += kvm.o
-obj-$(CONFIG_KVM_CLOCK) += kvmclock.o
+obj-$(CONFIG_KVM_GUEST) += kvm.o kvmclock.o
obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index c1d61ee4b4f..b3e5e51bc90 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -354,6 +354,7 @@ static void kvm_pv_guest_cpu_reboot(void *unused)
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
wrmsrl(MSR_KVM_PV_EOI_EN, 0);
kvm_pv_disable_apf();
+ kvm_disable_steal_time();
}
static int kvm_pv_reboot_notify(struct notifier_block *nb,
@@ -396,9 +397,7 @@ void kvm_disable_steal_time(void)
#ifdef CONFIG_SMP
static void __init kvm_smp_prepare_boot_cpu(void)
{
-#ifdef CONFIG_KVM_CLOCK
WARN_ON(kvm_register_clock("primary cpu clock"));
-#endif
kvm_guest_cpu_init();
native_smp_prepare_boot_cpu();
}
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 4f165479c45..d609be046b5 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -957,7 +957,7 @@ void __init setup_arch(char **cmdline_p)
initmem_init();
memblock_find_dma_reserve();
-#ifdef CONFIG_KVM_CLOCK
+#ifdef CONFIG_KVM_GUEST
kvmclock_init();
#endif
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index a28f338843e..586f0005980 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -20,6 +20,7 @@ if VIRTUALIZATION
config KVM
tristate "Kernel-based Virtual Machine (KVM) support"
depends on HAVE_KVM
+ depends on HIGH_RES_TIMERS
# for device assignment:
depends on PCI
# for TASKSTATS/TASK_DELAY_ACCT:
@@ -37,6 +38,7 @@ config KVM
select TASK_DELAY_ACCT
select PERF_EVENTS
select HAVE_KVM_MSI
+ select HAVE_KVM_CPU_RELAX_INTERCEPT
---help---
Support hosting fully virtualized guest machines using hardware
virtualization extensions. You will need a fairly recent
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 4f579e8dcac..04d30401c5c 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -12,7 +12,7 @@ kvm-$(CONFIG_IOMMU_API) += $(addprefix ../../../virt/kvm/, iommu.o)
kvm-$(CONFIG_KVM_ASYNC_PF) += $(addprefix ../../../virt/kvm/, async_pf.o)
kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
- i8254.o timer.o cpuid.o pmu.o
+ i8254.o cpuid.o pmu.o
kvm-intel-y += vmx.o
kvm-amd-y += svm.o
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 0595f1397b7..ec79e773342 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -316,7 +316,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
}
case 7: {
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
- /* Mask ebx against host capbability word 9 */
+ /* Mask ebx against host capability word 9 */
if (index == 0) {
entry->ebx &= kvm_supported_word9_x86_features;
cpuid_mask(&entry->ebx, 9);
@@ -397,8 +397,8 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
break;
}
case KVM_CPUID_SIGNATURE: {
- char signature[12] = "KVMKVMKVM\0\0";
- u32 *sigptr = (u32 *)signature;
+ static const char signature[12] = "KVMKVMKVM\0\0";
+ const u32 *sigptr = (const u32 *)signature;
entry->eax = KVM_CPUID_FEATURES;
entry->ebx = sigptr[0];
entry->ecx = sigptr[1];
@@ -484,10 +484,10 @@ struct kvm_cpuid_param {
u32 func;
u32 idx;
bool has_leaf_count;
- bool (*qualifier)(struct kvm_cpuid_param *param);
+ bool (*qualifier)(const struct kvm_cpuid_param *param);
};
-static bool is_centaur_cpu(struct kvm_cpuid_param *param)
+static bool is_centaur_cpu(const struct kvm_cpuid_param *param)
{
return boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR;
}
@@ -498,7 +498,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
struct kvm_cpuid_entry2 *cpuid_entries;
int limit, nent = 0, r = -E2BIG, i;
u32 func;
- static struct kvm_cpuid_param param[] = {
+ static const struct kvm_cpuid_param param[] = {
{ .func = 0, .has_leaf_count = true },
{ .func = 0x80000000, .has_leaf_count = true },
{ .func = 0xC0000000, .qualifier = is_centaur_cpu, .has_leaf_count = true },
@@ -517,7 +517,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
r = 0;
for (i = 0; i < ARRAY_SIZE(param); i++) {
- struct kvm_cpuid_param *ent = &param[i];
+ const struct kvm_cpuid_param *ent = &param[i];
if (ent->qualifier && !ent->qualifier(ent))
continue;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index a3b57a27be8..39171cb307e 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -161,9 +161,9 @@ struct opcode {
u64 intercept : 8;
union {
int (*execute)(struct x86_emulate_ctxt *ctxt);
- struct opcode *group;
- struct group_dual *gdual;
- struct gprefix *gprefix;
+ const struct opcode *group;
+ const struct group_dual *gdual;
+ const struct gprefix *gprefix;
} u;
int (*check_perm)(struct x86_emulate_ctxt *ctxt);
};
@@ -202,6 +202,42 @@ struct gprefix {
#define EFLG_RESERVED_ZEROS_MASK 0xffc0802a
#define EFLG_RESERVED_ONE_MASK 2
+static ulong reg_read(struct x86_emulate_ctxt *ctxt, unsigned nr)
+{
+ if (!(ctxt->regs_valid & (1 << nr))) {
+ ctxt->regs_valid |= 1 << nr;
+ ctxt->_regs[nr] = ctxt->ops->read_gpr(ctxt, nr);
+ }
+ return ctxt->_regs[nr];
+}
+
+static ulong *reg_write(struct x86_emulate_ctxt *ctxt, unsigned nr)
+{
+ ctxt->regs_valid |= 1 << nr;
+ ctxt->regs_dirty |= 1 << nr;
+ return &ctxt->_regs[nr];
+}
+
+static ulong *reg_rmw(struct x86_emulate_ctxt *ctxt, unsigned nr)
+{
+ reg_read(ctxt, nr);
+ return reg_write(ctxt, nr);
+}
+
+static void writeback_registers(struct x86_emulate_ctxt *ctxt)
+{
+ unsigned reg;
+
+ for_each_set_bit(reg, (ulong *)&ctxt->regs_dirty, 16)
+ ctxt->ops->write_gpr(ctxt, reg, ctxt->_regs[reg]);
+}
+
+static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
+{
+ ctxt->regs_dirty = 0;
+ ctxt->regs_valid = 0;
+}
+
/*
* Instruction emulation:
* Most instructions are emulated directly via a fragment of inline assembly
@@ -374,8 +410,8 @@ struct gprefix {
#define __emulate_1op_rax_rdx(ctxt, _op, _suffix, _ex) \
do { \
unsigned long _tmp; \
- ulong *rax = &(ctxt)->regs[VCPU_REGS_RAX]; \
- ulong *rdx = &(ctxt)->regs[VCPU_REGS_RDX]; \
+ ulong *rax = reg_rmw((ctxt), VCPU_REGS_RAX); \
+ ulong *rdx = reg_rmw((ctxt), VCPU_REGS_RDX); \
\
__asm__ __volatile__ ( \
_PRE_EFLAGS("0", "5", "1") \
@@ -494,7 +530,7 @@ register_address_increment(struct x86_emulate_ctxt *ctxt, unsigned long *reg, in
static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
{
- masked_increment(&ctxt->regs[VCPU_REGS_RSP], stack_mask(ctxt), inc);
+ masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc);
}
static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
@@ -632,8 +668,6 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
la = seg_base(ctxt, addr.seg) + addr.ea;
switch (ctxt->mode) {
- case X86EMUL_MODE_REAL:
- break;
case X86EMUL_MODE_PROT64:
if (((signed long)la << 16) >> 16 != la)
return emulate_gp(ctxt, 0);
@@ -655,7 +689,7 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim)
goto bad;
} else {
- /* exapand-down segment */
+ /* expand-down segment */
if (addr.ea <= lim || (u32)(addr.ea + size - 1) <= lim)
goto bad;
lim = desc.d ? 0xffffffff : 0xffff;
@@ -663,7 +697,10 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
goto bad;
}
cpl = ctxt->ops->cpl(ctxt);
- rpl = sel & 3;
+ if (ctxt->mode == X86EMUL_MODE_REAL)
+ rpl = 0;
+ else
+ rpl = sel & 3;
cpl = max(cpl, rpl);
if (!(desc.type & 8)) {
/* data segment */
@@ -688,9 +725,9 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
return X86EMUL_CONTINUE;
bad:
if (addr.seg == VCPU_SREG_SS)
- return emulate_ss(ctxt, addr.seg);
+ return emulate_ss(ctxt, sel);
else
- return emulate_gp(ctxt, addr.seg);
+ return emulate_gp(ctxt, sel);
}
static int linearize(struct x86_emulate_ctxt *ctxt,
@@ -786,14 +823,15 @@ static int do_insn_fetch(struct x86_emulate_ctxt *ctxt,
* pointer into the block that addresses the relevant register.
* @highbyte_regs specifies whether to decode AH,CH,DH,BH.
*/
-static void *decode_register(u8 modrm_reg, unsigned long *regs,
+static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg,
int highbyte_regs)
{
void *p;
- p = &regs[modrm_reg];
if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
- p = (unsigned char *)&regs[modrm_reg & 3] + 1;
+ p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1;
+ else
+ p = reg_rmw(ctxt, modrm_reg);
return p;
}
@@ -871,23 +909,23 @@ static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg)
{
ctxt->ops->get_fpu(ctxt);
switch (reg) {
- case 0: asm("movdqu %%xmm0, %0" : "=m"(*data)); break;
- case 1: asm("movdqu %%xmm1, %0" : "=m"(*data)); break;
- case 2: asm("movdqu %%xmm2, %0" : "=m"(*data)); break;
- case 3: asm("movdqu %%xmm3, %0" : "=m"(*data)); break;
- case 4: asm("movdqu %%xmm4, %0" : "=m"(*data)); break;
- case 5: asm("movdqu %%xmm5, %0" : "=m"(*data)); break;
- case 6: asm("movdqu %%xmm6, %0" : "=m"(*data)); break;
- case 7: asm("movdqu %%xmm7, %0" : "=m"(*data)); break;
+ case 0: asm("movdqa %%xmm0, %0" : "=m"(*data)); break;
+ case 1: asm("movdqa %%xmm1, %0" : "=m"(*data)); break;
+ case 2: asm("movdqa %%xmm2, %0" : "=m"(*data)); break;
+ case 3: asm("movdqa %%xmm3, %0" : "=m"(*data)); break;
+ case 4: asm("movdqa %%xmm4, %0" : "=m"(*data)); break;
+ case 5: asm("movdqa %%xmm5, %0" : "=m"(*data)); break;
+ case 6: asm("movdqa %%xmm6, %0" : "=m"(*data)); break;
+ case 7: asm("movdqa %%xmm7, %0" : "=m"(*data)); break;
#ifdef CONFIG_X86_64
- case 8: asm("movdqu %%xmm8, %0" : "=m"(*data)); break;
- case 9: asm("movdqu %%xmm9, %0" : "=m"(*data)); break;
- case 10: asm("movdqu %%xmm10, %0" : "=m"(*data)); break;
- case 11: asm("movdqu %%xmm11, %0" : "=m"(*data)); break;
- case 12: asm("movdqu %%xmm12, %0" : "=m"(*data)); break;
- case 13: asm("movdqu %%xmm13, %0" : "=m"(*data)); break;
- case 14: asm("movdqu %%xmm14, %0" : "=m"(*data)); break;
- case 15: asm("movdqu %%xmm15, %0" : "=m"(*data)); break;
+ case 8: asm("movdqa %%xmm8, %0" : "=m"(*data)); break;
+ case 9: asm("movdqa %%xmm9, %0" : "=m"(*data)); break;
+ case 10: asm("movdqa %%xmm10, %0" : "=m"(*data)); break;
+ case 11: asm("movdqa %%xmm11, %0" : "=m"(*data)); break;
+ case 12: asm("movdqa %%xmm12, %0" : "=m"(*data)); break;
+ case 13: asm("movdqa %%xmm13, %0" : "=m"(*data)); break;
+ case 14: asm("movdqa %%xmm14, %0" : "=m"(*data)); break;
+ case 15: asm("movdqa %%xmm15, %0" : "=m"(*data)); break;
#endif
default: BUG();
}
@@ -899,23 +937,23 @@ static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
{
ctxt->ops->get_fpu(ctxt);
switch (reg) {
- case 0: asm("movdqu %0, %%xmm0" : : "m"(*data)); break;
- case 1: asm("movdqu %0, %%xmm1" : : "m"(*data)); break;
- case 2: asm("movdqu %0, %%xmm2" : : "m"(*data)); break;
- case 3: asm("movdqu %0, %%xmm3" : : "m"(*data)); break;
- case 4: asm("movdqu %0, %%xmm4" : : "m"(*data)); break;
- case 5: asm("movdqu %0, %%xmm5" : : "m"(*data)); break;
- case 6: asm("movdqu %0, %%xmm6" : : "m"(*data)); break;
- case 7: asm("movdqu %0, %%xmm7" : : "m"(*data)); break;
+ case 0: asm("movdqa %0, %%xmm0" : : "m"(*data)); break;
+ case 1: asm("movdqa %0, %%xmm1" : : "m"(*data)); break;
+ case 2: asm("movdqa %0, %%xmm2" : : "m"(*data)); break;
+ case 3: asm("movdqa %0, %%xmm3" : : "m"(*data)); break;
+ case 4: asm("movdqa %0, %%xmm4" : : "m"(*data)); break;
+ case 5: asm("movdqa %0, %%xmm5" : : "m"(*data)); break;
+ case 6: asm("movdqa %0, %%xmm6" : : "m"(*data)); break;
+ case 7: asm("movdqa %0, %%xmm7" : : "m"(*data)); break;
#ifdef CONFIG_X86_64
- case 8: asm("movdqu %0, %%xmm8" : : "m"(*data)); break;
- case 9: asm("movdqu %0, %%xmm9" : : "m"(*data)); break;
- case 10: asm("movdqu %0, %%xmm10" : : "m"(*data)); break;
- case 11: asm("movdqu %0, %%xmm11" : : "m"(*data)); break;
- case 12: asm("movdqu %0, %%xmm12" : : "m"(*data)); break;
- case 13: asm("movdqu %0, %%xmm13" : : "m"(*data)); break;
- case 14: asm("movdqu %0, %%xmm14" : : "m"(*data)); break;
- case 15: asm("movdqu %0, %%xmm15" : : "m"(*data)); break;
+ case 8: asm("movdqa %0, %%xmm8" : : "m"(*data)); break;
+ case 9: asm("movdqa %0, %%xmm9" : : "m"(*data)); break;
+ case 10: asm("movdqa %0, %%xmm10" : : "m"(*data)); break;
+ case 11: asm("movdqa %0, %%xmm11" : : "m"(*data)); break;
+ case 12: asm("movdqa %0, %%xmm12" : : "m"(*data)); break;
+ case 13: asm("movdqa %0, %%xmm13" : : "m"(*data)); break;
+ case 14: asm("movdqa %0, %%xmm14" : : "m"(*data)); break;
+ case 15: asm("movdqa %0, %%xmm15" : : "m"(*data)); break;
#endif
default: BUG();
}
@@ -982,10 +1020,10 @@ static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
op->type = OP_REG;
if (ctxt->d & ByteOp) {
- op->addr.reg = decode_register(reg, ctxt->regs, highbyte_regs);
+ op->addr.reg = decode_register(ctxt, reg, highbyte_regs);
op->bytes = 1;
} else {
- op->addr.reg = decode_register(reg, ctxt->regs, 0);
+ op->addr.reg = decode_register(ctxt, reg, 0);
op->bytes = ctxt->op_bytes;
}
fetch_register_operand(op);
@@ -1020,8 +1058,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
if (ctxt->modrm_mod == 3) {
op->type = OP_REG;
op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
- op->addr.reg = decode_register(ctxt->modrm_rm,
- ctxt->regs, ctxt->d & ByteOp);
+ op->addr.reg = decode_register(ctxt, ctxt->modrm_rm, ctxt->d & ByteOp);
if (ctxt->d & Sse) {
op->type = OP_XMM;
op->bytes = 16;
@@ -1042,10 +1079,10 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
op->type = OP_MEM;
if (ctxt->ad_bytes == 2) {
- unsigned bx = ctxt->regs[VCPU_REGS_RBX];
- unsigned bp = ctxt->regs[VCPU_REGS_RBP];
- unsigned si = ctxt->regs[VCPU_REGS_RSI];
- unsigned di = ctxt->regs[VCPU_REGS_RDI];
+ unsigned bx = reg_read(ctxt, VCPU_REGS_RBX);
+ unsigned bp = reg_read(ctxt, VCPU_REGS_RBP);
+ unsigned si = reg_read(ctxt, VCPU_REGS_RSI);
+ unsigned di = reg_read(ctxt, VCPU_REGS_RDI);
/* 16-bit ModR/M decode. */
switch (ctxt->modrm_mod) {
@@ -1102,17 +1139,17 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0)
modrm_ea += insn_fetch(s32, ctxt);
else {
- modrm_ea += ctxt->regs[base_reg];
+ modrm_ea += reg_read(ctxt, base_reg);
adjust_modrm_seg(ctxt, base_reg);
}
if (index_reg != 4)
- modrm_ea += ctxt->regs[index_reg] << scale;
+ modrm_ea += reg_read(ctxt, index_reg) << scale;
} else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) {
if (ctxt->mode == X86EMUL_MODE_PROT64)
ctxt->rip_relative = 1;
} else {
base_reg = ctxt->modrm_rm;
- modrm_ea += ctxt->regs[base_reg];
+ modrm_ea += reg_read(ctxt, base_reg);
adjust_modrm_seg(ctxt, base_reg);
}
switch (ctxt->modrm_mod) {
@@ -1179,24 +1216,21 @@ static int read_emulated(struct x86_emulate_ctxt *ctxt,
int rc;
struct read_cache *mc = &ctxt->mem_read;
- while (size) {
- int n = min(size, 8u);
- size -= n;
- if (mc->pos < mc->end)
- goto read_cached;
+ if (mc->pos < mc->end)
+ goto read_cached;
- rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, n,
- &ctxt->exception);
- if (rc != X86EMUL_CONTINUE)
- return rc;
- mc->end += n;
+ WARN_ON((mc->end + size) >= sizeof(mc->data));
- read_cached:
- memcpy(dest, mc->data + mc->pos, n);
- mc->pos += n;
- dest += n;
- addr += n;
- }
+ rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, size,
+ &ctxt->exception);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+
+ mc->end += size;
+
+read_cached:
+ memcpy(dest, mc->data + mc->pos, size);
+ mc->pos += size;
return X86EMUL_CONTINUE;
}
@@ -1253,10 +1287,10 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
if (rc->pos == rc->end) { /* refill pio read ahead */
unsigned int in_page, n;
unsigned int count = ctxt->rep_prefix ?
- address_mask(ctxt, ctxt->regs[VCPU_REGS_RCX]) : 1;
+ address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) : 1;
in_page = (ctxt->eflags & EFLG_DF) ?
- offset_in_page(ctxt->regs[VCPU_REGS_RDI]) :
- PAGE_SIZE - offset_in_page(ctxt->regs[VCPU_REGS_RDI]);
+ offset_in_page(reg_read(ctxt, VCP