aboutsummaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/kvm.h30
-rw-r--r--arch/x86/include/asm/kvm_emulate.h2
-rw-r--r--arch/x86/include/asm/kvm_host.h34
-rw-r--r--arch/x86/include/asm/svm.h3
-rw-r--r--arch/x86/include/asm/thread_info.h7
-rw-r--r--arch/x86/include/asm/vmx.h4
-rw-r--r--arch/x86/kernel/process.c2
-rw-r--r--arch/x86/kernel/signal.c3
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/kvm/Makefile3
-rw-r--r--arch/x86/kvm/emulate.c159
-rw-r--r--arch/x86/kvm/i8254.c2
-rw-r--r--arch/x86/kvm/i8259.c44
-rw-r--r--arch/x86/kvm/irq.h7
-rw-r--r--arch/x86/kvm/lapic.c8
-rw-r--r--arch/x86/kvm/mmu.c3
-rw-r--r--arch/x86/kvm/paging_tmpl.h1
-rw-r--r--arch/x86/kvm/svm.c331
-rw-r--r--arch/x86/kvm/trace.h165
-rw-r--r--arch/x86/kvm/vmx.c448
-rw-r--r--arch/x86/kvm/x86.c550
22 files changed, 1310 insertions, 498 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 178084b4377..1b2182b4d5c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -51,6 +51,7 @@ config X86
select HAVE_KERNEL_LZMA
select HAVE_HW_BREAKPOINT
select HAVE_ARCH_KMEMCHECK
+ select HAVE_USER_RETURN_NOTIFIER
config OUTPUT_FORMAT
string
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index 4a5fe914dc5..950df434763 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -19,6 +19,8 @@
#define __KVM_HAVE_MSIX
#define __KVM_HAVE_MCE
#define __KVM_HAVE_PIT_STATE2
+#define __KVM_HAVE_XEN_HVM
+#define __KVM_HAVE_VCPU_EVENTS
/* Architectural interrupt line count. */
#define KVM_NR_INTERRUPTS 256
@@ -79,6 +81,7 @@ struct kvm_ioapic_state {
#define KVM_IRQCHIP_PIC_MASTER 0
#define KVM_IRQCHIP_PIC_SLAVE 1
#define KVM_IRQCHIP_IOAPIC 2
+#define KVM_NR_IRQCHIPS 3
/* for KVM_GET_REGS and KVM_SET_REGS */
struct kvm_regs {
@@ -250,4 +253,31 @@ struct kvm_reinject_control {
__u8 pit_reinject;
__u8 reserved[31];
};
+
+/* for KVM_GET/SET_VCPU_EVENTS */
+struct kvm_vcpu_events {
+ struct {
+ __u8 injected;
+ __u8 nr;
+ __u8 has_error_code;
+ __u8 pad;
+ __u32 error_code;
+ } exception;
+ struct {
+ __u8 injected;
+ __u8 nr;
+ __u8 soft;
+ __u8 pad;
+ } interrupt;
+ struct {
+ __u8 injected;
+ __u8 pending;
+ __u8 masked;
+ __u8 pad;
+ } nmi;
+ __u32 sipi_vector;
+ __u32 flags;
+ __u32 reserved[10];
+};
+
#endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index b7ed2c42311..7c18e1230f5 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -129,7 +129,7 @@ struct decode_cache {
u8 seg_override;
unsigned int d;
unsigned long regs[NR_VCPU_REGS];
- unsigned long eip;
+ unsigned long eip, eip_orig;
/* modrm */
u8 modrm;
u8 modrm_mod;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d83892226f7..4f865e8b854 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -354,7 +354,6 @@ struct kvm_vcpu_arch {
unsigned int time_offset;
struct page *time_page;
- bool singlestep; /* guest is single stepped by KVM */
bool nmi_pending;
bool nmi_injected;
@@ -371,6 +370,10 @@ struct kvm_vcpu_arch {
u64 mcg_status;
u64 mcg_ctl;
u64 *mce_banks;
+
+ /* used for guest single stepping over the given code position */
+ u16 singlestep_cs;
+ unsigned long singlestep_rip;
};
struct kvm_mem_alias {
@@ -397,7 +400,6 @@ struct kvm_arch{
struct kvm_pic *vpic;
struct kvm_ioapic *vioapic;
struct kvm_pit *vpit;
- struct hlist_head irq_ack_notifier_list;
int vapics_in_nmi_mode;
unsigned int tss_addr;
@@ -410,8 +412,10 @@ struct kvm_arch{
gpa_t ept_identity_map_addr;
unsigned long irq_sources_bitmap;
- unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
u64 vm_init_tsc;
+ s64 kvmclock_offset;
+
+ struct kvm_xen_hvm_config xen_hvm_config;
};
struct kvm_vm_stat {
@@ -461,7 +465,7 @@ struct descriptor_table {
struct kvm_x86_ops {
int (*cpu_has_kvm_support)(void); /* __init */
int (*disabled_by_bios)(void); /* __init */
- void (*hardware_enable)(void *dummy); /* __init */
+ int (*hardware_enable)(void *dummy);
void (*hardware_disable)(void *dummy);
void (*check_processor_compatibility)(void *rtn);
int (*hardware_setup)(void); /* __init */
@@ -477,8 +481,8 @@ struct kvm_x86_ops {
void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
void (*vcpu_put)(struct kvm_vcpu *vcpu);
- int (*set_guest_debug)(struct kvm_vcpu *vcpu,
- struct kvm_guest_debug *dbg);
+ void (*set_guest_debug)(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *dbg);
int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
@@ -506,8 +510,8 @@ struct kvm_x86_ops {
void (*tlb_flush)(struct kvm_vcpu *vcpu);
- void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run);
- int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu);
+ void (*run)(struct kvm_vcpu *vcpu);
+ int (*handle_exit)(struct kvm_vcpu *vcpu);
void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
@@ -519,6 +523,8 @@ struct kvm_x86_ops {
bool has_error_code, u32 error_code);
int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
int (*nmi_allowed)(struct kvm_vcpu *vcpu);
+ bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
+ void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked);
void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
void (*enable_irq_window)(struct kvm_vcpu *vcpu);
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
@@ -568,7 +574,7 @@ enum emulation_result {
#define EMULTYPE_NO_DECODE (1 << 0)
#define EMULTYPE_TRAP_UD (1 << 1)
#define EMULTYPE_SKIP (1 << 2)
-int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run,
+int emulate_instruction(struct kvm_vcpu *vcpu,
unsigned long cr2, u16 error_code, int emulation_type);
void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context);
void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
@@ -585,9 +591,9 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
struct x86_emulate_ctxt;
-int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
+int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in,
int size, unsigned port);
-int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
+int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in,
int size, unsigned long count, int down,
gva_t address, int rep, unsigned port);
void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
@@ -616,6 +622,9 @@ void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l);
int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data);
+unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu);
+void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
+
void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr);
void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2,
@@ -802,4 +811,7 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
+void kvm_define_shared_msr(unsigned index, u32 msr);
+void kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
+
#endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 85574b7c1bc..1fecb7e6113 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -57,7 +57,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
u16 intercept_dr_write;
u32 intercept_exceptions;
u64 intercept;
- u8 reserved_1[44];
+ u8 reserved_1[42];
+ u16 pause_filter_count;
u64 iopm_base_pa;
u64 msrpm_base_pa;
u64 tsc_offset;
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index d27d0a2fec4..375c917c37d 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -83,6 +83,7 @@ struct thread_info {
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
#define TIF_SECCOMP 8 /* secure computing */
#define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */
+#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
#define TIF_NOTSC 16 /* TSC is not accessible in userland */
#define TIF_IA32 17 /* 32bit process */
#define TIF_FORK 18 /* ret_from_fork */
@@ -107,6 +108,7 @@ struct thread_info {
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY)
+#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
#define _TIF_NOTSC (1 << TIF_NOTSC)
#define _TIF_IA32 (1 << TIF_IA32)
#define _TIF_FORK (1 << TIF_FORK)
@@ -142,13 +144,14 @@ struct thread_info {
/* Only used for 64 bit */
#define _TIF_DO_NOTIFY_MASK \
- (_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_NOTIFY_RESUME)
+ (_TIF_SIGPENDING | _TIF_MCE_NOTIFY | _TIF_NOTIFY_RESUME | \
+ _TIF_USER_RETURN_NOTIFY)
/* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW \
(_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_NOTSC)
-#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW
+#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
#define PREEMPT_ACTIVE 0x10000000
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 272514c2d45..2b4945419a8 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -56,6 +56,7 @@
#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080
+#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
#define PIN_BASED_EXT_INTR_MASK 0x00000001
@@ -144,6 +145,8 @@ enum vmcs_field {
VM_ENTRY_INSTRUCTION_LEN = 0x0000401a,
TPR_THRESHOLD = 0x0000401c,
SECONDARY_VM_EXEC_CONTROL = 0x0000401e,
+ PLE_GAP = 0x00004020,
+ PLE_WINDOW = 0x00004022,
VM_INSTRUCTION_ERROR = 0x00004400,
VM_EXIT_REASON = 0x00004402,
VM_EXIT_INTR_INFO = 0x00004404,
@@ -248,6 +251,7 @@ enum vmcs_field {
#define EXIT_REASON_MSR_READ 31
#define EXIT_REASON_MSR_WRITE 32
#define EXIT_REASON_MWAIT_INSTRUCTION 36
+#define EXIT_REASON_PAUSE_INSTRUCTION 40
#define EXIT_REASON_MCE_DURING_VMENTRY 41
#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
#define EXIT_REASON_APIC_ACCESS 44
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 744508e7cfd..5e2ba634ea1 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -9,6 +9,7 @@
#include <linux/pm.h>
#include <linux/clockchips.h>
#include <linux/random.h>
+#include <linux/user-return-notifier.h>
#include <trace/events/power.h>
#include <linux/hw_breakpoint.h>
#include <asm/system.h>
@@ -209,6 +210,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
*/
memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
}
+ propagate_user_return_notify(prev_p, next_p);
}
int sys_fork(struct pt_regs *regs)
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index fbf3b07c856..74fe6d86dc5 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -19,6 +19,7 @@
#include <linux/stddef.h>
#include <linux/personality.h>
#include <linux/uaccess.h>
+#include <linux/user-return-notifier.h>
#include <asm/processor.h>
#include <asm/ucontext.h>
@@ -863,6 +864,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
if (current->replacement_session_keyring)
key_replace_session_keyring();
}
+ if (thread_info_flags & _TIF_USER_RETURN_NOTIFY)
+ fire_user_return_notifiers();
#ifdef CONFIG_X86_32
clear_thread_flag(TIF_IRET);
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index b84e571f417..4cd49833246 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -28,6 +28,7 @@ config KVM
select HAVE_KVM_IRQCHIP
select HAVE_KVM_EVENTFD
select KVM_APIC_ARCHITECTURE
+ select USER_RETURN_NOTIFIER
---help---
Support hosting fully virtualized guest machines using hardware
virtualization extensions. You will need a fairly recent
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 0e7fe78d0f7..31a7035c4bd 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -6,7 +6,8 @@ CFLAGS_svm.o := -I.
CFLAGS_vmx.o := -I.
kvm-y += $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
- coalesced_mmio.o irq_comm.o eventfd.o)
+ coalesced_mmio.o irq_comm.o eventfd.o \
+ assigned-dev.o)
kvm-$(CONFIG_IOMMU_API) += $(addprefix ../../../virt/kvm/, iommu.o)
kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 1be5cd640e9..7e8faea4651 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -75,6 +75,8 @@
#define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */
#define GroupDual (1<<15) /* Alternate decoding of mod == 3 */
#define GroupMask 0xff /* Group number stored in bits 0:7 */
+/* Misc flags */
+#define No64 (1<<28)
/* Source 2 operand type */
#define Src2None (0<<29)
#define Src2CL (1<<29)
@@ -92,19 +94,23 @@ static u32 opcode_table[256] = {
/* 0x00 - 0x07 */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
- ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0,
+ ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
+ ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
/* 0x08 - 0x0F */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
- 0, 0, 0, 0,
+ ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
+ ImplicitOps | Stack | No64, 0,
/* 0x10 - 0x17 */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
- ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0,
+ ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
+ ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
/* 0x18 - 0x1F */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
- ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0,
+ ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
+ ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
/* 0x20 - 0x27 */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
@@ -133,7 +139,8 @@ static u32 opcode_table[256] = {
DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack,
DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack,
/* 0x60 - 0x67 */
- 0, 0, 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ ,
+ ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
+ 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ ,
0, 0, 0, 0,
/* 0x68 - 0x6F */
SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0,
@@ -158,7 +165,7 @@ static u32 opcode_table[256] = {
/* 0x90 - 0x97 */
DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
/* 0x98 - 0x9F */
- 0, 0, SrcImm | Src2Imm16, 0,
+ 0, 0, SrcImm | Src2Imm16 | No64, 0,
ImplicitOps | Stack, ImplicitOps | Stack, 0, 0,
/* 0xA0 - 0xA7 */
ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs,
@@ -185,7 +192,7 @@ static u32 opcode_table[256] = {
ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov,
/* 0xC8 - 0xCF */
0, 0, 0, ImplicitOps | Stack,
- ImplicitOps, SrcImmByte, ImplicitOps, ImplicitOps,
+ ImplicitOps, SrcImmByte, ImplicitOps | No64, ImplicitOps,
/* 0xD0 - 0xD7 */
ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
@@ -198,7 +205,7 @@ static u32 opcode_table[256] = {
ByteOp | SrcImmUByte, SrcImmUByte,
/* 0xE8 - 0xEF */
SrcImm | Stack, SrcImm | ImplicitOps,
- SrcImmU | Src2Imm16, SrcImmByte | ImplicitOps,
+ SrcImmU | Src2Imm16 | No64, SrcImmByte | ImplicitOps,
SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
/* 0xF0 - 0xF7 */
@@ -244,11 +251,13 @@ static u32 twobyte_table[256] = {
/* 0x90 - 0x9F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 0xA0 - 0xA7 */
- 0, 0, 0, DstMem | SrcReg | ModRM | BitOp,
+ ImplicitOps | Stack, ImplicitOps | Stack,
+ 0, DstMem | SrcReg | ModRM | BitOp,
DstMem | SrcReg | Src2ImmByte | ModRM,
DstMem | SrcReg | Src2CL | ModRM, 0, 0,
/* 0xA8 - 0xAF */
- 0, 0, 0, DstMem | SrcReg | ModRM | BitOp,
+ ImplicitOps | Stack, ImplicitOps | Stack,
+ 0, DstMem | SrcReg | ModRM | BitOp,
DstMem | SrcReg | Src2ImmByte | ModRM,
DstMem | SrcReg | Src2CL | ModRM,
ModRM, 0,
@@ -613,6 +622,9 @@ static int do_insn_fetch(struct x86_emulate_ctxt *ctxt,
{
int rc = 0;
+ /* x86 instructions are limited to 15 bytes. */
+ if (eip + size - ctxt->decode.eip_orig > 15)
+ return X86EMUL_UNHANDLEABLE;
eip += ctxt->cs_base;
while (size--) {
rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++);
@@ -871,7 +883,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
/* Shadow copy of register state. Committed on successful emulation. */
memset(c, 0, sizeof(struct decode_cache));
- c->eip = kvm_rip_read(ctxt->vcpu);
+ c->eip = c->eip_orig = kvm_rip_read(ctxt->vcpu);
ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS);
memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
@@ -962,6 +974,11 @@ done_prefixes:
}
}
+ if (mode == X86EMUL_MODE_PROT64 && (c->d & No64)) {
+ kvm_report_emulation_failure(ctxt->vcpu, "invalid x86/64 instruction");;
+ return -1;
+ }
+
if (c->d & Group) {
group = c->d & GroupMask;
c->modrm = insn_fetch(u8, 1, c->eip);
@@ -1186,6 +1203,69 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt,
return rc;
}
+static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, int seg)
+{
+ struct decode_cache *c = &ctxt->decode;
+ struct kvm_segment segment;
+
+ kvm_x86_ops->get_segment(ctxt->vcpu, &segment, seg);
+
+ c->src.val = segment.selector;
+ emulate_push(ctxt);
+}
+
+static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt,
+ struct x86_emulate_ops *ops, int seg)
+{
+ struct decode_cache *c = &ctxt->decode;
+ unsigned long selector;
+ int rc;
+
+ rc = emulate_pop(ctxt, ops, &selector, c->op_bytes);
+ if (rc != 0)
+ return rc;
+
+ rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)selector, 1, seg);
+ return rc;
+}
+
+static void emulate_pusha(struct x86_emulate_ctxt *ctxt)
+{
+ struct decode_cache *c = &ctxt->decode;
+ unsigned long old_esp = c->regs[VCPU_REGS_RSP];
+ int reg = VCPU_REGS_RAX;
+
+ while (reg <= VCPU_REGS_RDI) {
+ (reg == VCPU_REGS_RSP) ?
+ (c->src.val = old_esp) : (c->src.val = c->regs[reg]);
+
+ emulate_push(ctxt);
+ ++reg;
+ }
+}
+
+static int emulate_popa(struct x86_emulate_ctxt *ctxt,
+ struct x86_emulate_ops *ops)
+{
+ struct decode_cache *c = &ctxt->decode;
+ int rc = 0;
+ int reg = VCPU_REGS_RDI;
+
+ while (reg >= VCPU_REGS_RAX) {
+ if (reg == VCPU_REGS_RSP) {
+ register_address_increment(c, &c->regs[VCPU_REGS_RSP],
+ c->op_bytes);
+ --reg;
+ }
+
+ rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes);
+ if (rc != 0)
+ break;
+ --reg;
+ }
+ return rc;
+}
+
static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
struct x86_emulate_ops *ops)
{
@@ -1707,18 +1787,45 @@ special_insn:
add: /* add */
emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags);
break;
+ case 0x06: /* push es */
+ emulate_push_sreg(ctxt, VCPU_SREG_ES);
+ break;
+ case 0x07: /* pop es */
+ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES);
+ if (rc != 0)
+ goto done;
+ break;
case 0x08 ... 0x0d:
or: /* or */
emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags);
break;
+ case 0x0e: /* push cs */
+ emulate_push_sreg(ctxt, VCPU_SREG_CS);
+ break;
case 0x10 ... 0x15:
adc: /* adc */
emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags);
break;
+ case 0x16: /* push ss */
+ emulate_push_sreg(ctxt, VCPU_SREG_SS);
+ break;
+ case 0x17: /* pop ss */
+ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS);
+ if (rc != 0)
+ goto done;
+ break;
case 0x18 ... 0x1d:
sbb: /* sbb */
emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags);
break;
+ case 0x1e: /* push ds */
+ emulate_push_sreg(ctxt, VCPU_SREG_DS);
+ break;
+ case 0x1f: /* pop ds */
+ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS);
+ if (rc != 0)
+ goto done;
+ break;
case 0x20 ... 0x25:
and: /* and */
emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags);
@@ -1750,6 +1857,14 @@ special_insn:
if (rc != 0)
goto done;
break;
+ case 0x60: /* pusha */
+ emulate_pusha(ctxt);
+ break;
+ case 0x61: /* popa */
+ rc = emulate_popa(ctxt, ops);
+ if (rc != 0)
+ goto done;
+ break;
case 0x63: /* movsxd */
if (ctxt->mode != X86EMUL_MODE_PROT64)
goto cannot_emulate;
@@ -1761,7 +1876,7 @@ special_insn:
break;
case 0x6c: /* insb */
case 0x6d: /* insw/insd */
- if (kvm_emulate_pio_string(ctxt->vcpu, NULL,
+ if (kvm_emulate_pio_string(ctxt->vcpu,
1,
(c->d & ByteOp) ? 1 : c->op_bytes,
c->rep_prefix ?
@@ -1777,7 +1892,7 @@ special_insn:
return 0;
case 0x6e: /* outsb */
case 0x6f: /* outsw/outsd */
- if (kvm_emulate_pio_string(ctxt->vcpu, NULL,
+ if (kvm_emulate_pio_string(ctxt->vcpu,
0,
(c->d & ByteOp) ? 1 : c->op_bytes,
c->rep_prefix ?
@@ -2070,7 +2185,7 @@ special_insn:
case 0xef: /* out (e/r)ax,dx */
port = c->regs[VCPU_REGS_RDX];
io_dir_in = 0;
- do_io: if (kvm_emulate_pio(ctxt->vcpu, NULL, io_dir_in,
+ do_io: if (kvm_emulate_pio(ctxt->vcpu, io_dir_in,
(c->d & ByteOp) ? 1 : c->op_bytes,
port) != 0) {
c->eip = saved_eip;
@@ -2297,6 +2412,14 @@ twobyte_insn:
jmp_rel(c, c->src.val);
c->dst.type = OP_NONE;
break;
+ case 0xa0: /* push fs */
+ emulate_push_sreg(ctxt, VCPU_SREG_FS);
+ break;
+ case 0xa1: /* pop fs */
+ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS);
+ if (rc != 0)
+ goto done;
+ break;
case 0xa3:
bt: /* bt */
c->dst.type = OP_NONE;
@@ -2308,6 +2431,14 @@ twobyte_insn:
case 0xa5: /* shld cl, r, r/m */
emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags);
break;
+ case 0xa8: /* push gs */
+ emulate_push_sreg(ctxt, VCPU_SREG_GS);
+ break;
+ case 0xa9: /* pop gs */
+ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS);
+ if (rc != 0)
+ goto done;
+ break;
case 0xab:
bts: /* bts */
/* only subword offset */
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 144e7f60b5e..fab7440c9bb 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -688,10 +688,8 @@ static void __inject_pit_timer_intr(struct kvm *kvm)
struct kvm_vcpu *vcpu;
int i;
- mutex_lock(&kvm->irq_lock);
kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
- mutex_unlock(&kvm->irq_lock);
/*
* Provides NMI watchdog support via Virtual Wire mode.
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 01f15168280..d057c0cbd24 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -38,7 +38,15 @@ static void pic_clear_isr(struct kvm_kpic_state *s, int irq)
s->isr_ack |= (1 << irq);
if (s != &s->pics_state->pics[0])
irq += 8;
+ /*
+ * We are dropping lock while calling ack notifiers since ack
+ * notifier callbacks for assigned devices call into PIC recursively.
+ * Other interrupt may be delivered to PIC while lock is dropped but
+ * it should be safe since PIC state is already updated at this stage.
+ */
+ spin_unlock(&s->pics_state->lock);
kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq);
+ spin_lock(&s->pics_state->lock);
}
void kvm_pic_clear_isr_ack(struct kvm *kvm)
@@ -176,16 +184,18 @@ int kvm_pic_set_irq(void *opaque, int irq, int level)
static inline void pic_intack(struct kvm_kpic_state *s, int irq)
{
s->isr |= 1 << irq;
- if (s->auto_eoi) {
- if (s->rotate_on_auto_eoi)
- s->priority_add = (irq + 1) & 7;
- pic_clear_isr(s, irq);
- }
/*
* We don't clear a level sensitive interrupt here
*/
if (!(s->elcr & (1 << irq)))
s->irr &= ~(1 << irq);
+
+ if (s->auto_eoi) {
+ if (s->rotate_on_auto_eoi)
+ s->priority_add = (irq + 1) & 7;
+ pic_clear_isr(s, irq);
+ }
+
}
int kvm_pic_read_irq(struct kvm *kvm)
@@ -225,22 +235,11 @@ int kvm_pic_read_irq(struct kvm *kvm)
void kvm_pic_reset(struct kvm_kpic_state *s)
{
- int irq, irqbase, n;
+ int irq;
struct kvm *kvm = s->pics_state->irq_request_opaque;
struct kvm_vcpu *vcpu0 = kvm->bsp_vcpu;
+ u8 irr = s->irr, isr = s->imr;
- if (s == &s->pics_state->pics[0])
- irqbase = 0;
- else
- irqbase = 8;
-
- for (irq = 0; irq < PIC_NUM_PINS/2; irq++) {
- if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0))
- if (s->irr & (1 << irq) || s->isr & (1 << irq)) {
- n = irq + irqbase;
- kvm_notify_acked_irq(kvm, SELECT_PIC(n), n);
- }
- }
s->last_irr = 0;
s->irr = 0;
s->imr = 0;
@@ -256,6 +255,13 @@ void kvm_pic_reset(struct kvm_kpic_state *s)
s->rotate_on_auto_eoi = 0;
s->special_fully_nested_mode = 0;
s->init4 = 0;
+
+ for (irq = 0; irq < PIC_NUM_PINS/2; irq++) {
+ if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0))
+ if (irr & (1 << irq) || isr & (1 << irq)) {
+ pic_clear_isr(s, irq);
+ }
+ }
}
static void pic_ioport_write(void *opaque, u32 addr, u32 val)
@@ -298,9 +304,9 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val)
priority = get_priority(s, s->isr);
if (priority != 8) {
irq = (priority + s->priority_add) & 7;
- pic_clear_isr(s, irq);
if (cmd == 5)
s->priority_add = (irq + 1) & 7;
+ pic_clear_isr(s, irq);
pic_update_irq(s->pics_state);
}
break;
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 7d6058a2fd3..be399e207d5 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -71,6 +71,7 @@ struct kvm_pic {
int output; /* intr from master PIC */
struct kvm_io_device dev;
void (*ack_notifier)(void *opaque, int irq);
+ unsigned long irq_states[16];
};
struct kvm_pic *kvm_create_pic(struct kvm *kvm);
@@ -85,7 +86,11 @@ static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
static inline int irqchip_in_kernel(struct kvm *kvm)
{
- return pic_irqchip(kvm) != NULL;
+ int ret;
+
+ ret = (pic_irqchip(kvm) != NULL);
+ smp_rmb();
+ return ret;
}
void kvm_pic_reset(struct kvm_kpic_state *s);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 23c217692ea..cd60c0bd1b3 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -32,7 +32,6 @@
#include <asm/current.h>
#include <asm/apicdef.h>
#include <asm/atomic.h>
-#include <asm/apicdef.h>
#include "kvm_cache_regs.h"
#include "irq.h"
#include "trace.h"
@@ -471,11 +470,8 @@ static void apic_set_eoi(struct kvm_lapic *apic)
trigger_mode = IOAPIC_LEVEL_TRIG;
else
trigger_mode = IOAPIC_EDGE_TRIG;
- if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)) {
- mutex_lock(&apic->vcpu->kvm->irq_lock);
+ if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI))
kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
- mutex_unlock(&apic->vcpu->kvm->irq_lock);
- }
}
static void apic_send_ipi(struct kvm_lapic *apic)
@@ -504,9 +500,7 @@ static void apic_send_ipi(struct kvm_lapic *apic)
irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
irq.vector);
- mutex_lock(&apic->vcpu->kvm->irq_lock);
kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq);
- mutex_unlock(&apic->vcpu->kvm->irq_lock);
}
static u32 apic_get_tmcct(struct kvm_lapic *apic)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 818b92ad82c..4c3e5b2314c 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2789,7 +2789,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
if (r)
goto out;
- er = emulate_instruction(vcpu, vcpu->run, cr2, error_code, 0);
+ er = emulate_instruction(vcpu, cr2, error_code, 0);
switch (er) {
case EMULATE_DONE:
@@ -2800,6 +2800,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
case EMULATE_FAIL:
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+ vcpu->run->internal.ndata = 0;
return 0;
default:
BUG();
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 72558f8ff3f..a6017132fba 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -467,7 +467,6 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
level = iterator.level;
sptep = iterator.sptep;
- /* FIXME: properly handle invlpg on large guest pages */
if (level == PT_PAGE_TABLE_LEVEL ||
((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) ||
((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) {
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index c17404add91..3de0b37ec03 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.