diff options
| author | Sage Weil <sage@inktank.com> | 2013-08-15 11:11:45 -0700 | 
|---|---|---|
| committer | Sage Weil <sage@inktank.com> | 2013-08-15 11:11:45 -0700 | 
| commit | ee3e542fec6e69bc9fb668698889a37d93950ddf (patch) | |
| tree | e74ee766a4764769ef1d3d45d266b4dea64101d3 /arch/x86/kvm/emulate.c | |
| parent | fe2a801b50c0bb8039d627e5ae1fec249d10ff39 (diff) | |
| parent | f1d6e17f540af37bb1891480143669ba7636c4cf (diff) | |
Merge remote-tracking branch 'linus/master' into testing
Diffstat (limited to 'arch/x86/kvm/emulate.c')
| -rw-r--r-- | arch/x86/kvm/emulate.c | 391 | 
1 files changed, 109 insertions, 282 deletions
| diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 5953dcea752..2bc1e81045b 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -61,6 +61,8 @@  #define OpMem8            26ull  /* 8-bit zero extended memory operand */  #define OpImm64           27ull  /* Sign extended 16/32/64-bit immediate */  #define OpXLat            28ull  /* memory at BX/EBX/RBX + zero-extended AL */ +#define OpAccLo           29ull  /* Low part of extended acc (AX/AX/EAX/RAX) */ +#define OpAccHi           30ull  /* High part of extended acc (-/DX/EDX/RDX) */  #define OpBits             5  /* Width of operand field */  #define OpMask             ((1ull << OpBits) - 1) @@ -86,6 +88,7 @@  #define DstMem64    (OpMem64 << DstShift)  #define DstImmUByte (OpImmUByte << DstShift)  #define DstDX       (OpDX << DstShift) +#define DstAccLo    (OpAccLo << DstShift)  #define DstMask     (OpMask << DstShift)  /* Source operand type. */  #define SrcShift    6 @@ -108,6 +111,7 @@  #define SrcImm64    (OpImm64 << SrcShift)  #define SrcDX       (OpDX << SrcShift)  #define SrcMem8     (OpMem8 << SrcShift) +#define SrcAccHi    (OpAccHi << SrcShift)  #define SrcMask     (OpMask << SrcShift)  #define BitOp       (1<<11)  #define MemAbs      (1<<12)      /* Memory operand is absolute displacement */ @@ -138,6 +142,7 @@  /* Source 2 operand type */  #define Src2Shift   (31)  #define Src2None    (OpNone << Src2Shift) +#define Src2Mem     (OpMem << Src2Shift)  #define Src2CL      (OpCL << Src2Shift)  #define Src2ImmByte (OpImmByte << Src2Shift)  #define Src2One     (OpOne << Src2Shift) @@ -155,6 +160,9 @@  #define Avx         ((u64)1 << 43)  /* Advanced Vector Extensions */  #define Fastop      ((u64)1 << 44)  /* Use opcode::u.fastop */  #define NoWrite     ((u64)1 << 45)  /* No writeback */ +#define SrcWrite    ((u64)1 << 46)  /* Write back src operand */ + +#define DstXacc     (DstAccLo | SrcAccHi | SrcWrite)  #define X2(x...) x, x  #define X3(x...) X2(x), x @@ -171,10 +179,11 @@  /*   * fastop functions have a special calling convention:   * - * dst:    [rdx]:rax  (in/out) - * src:    rbx        (in/out) + * dst:    rax        (in/out) + * src:    rdx        (in/out)   * src2:   rcx        (in)   * flags:  rflags     (in/out) + * ex:     rsi        (in:fastop pointer, out:zero if exception)   *   * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for   * different operand sizes can be reached by calculation, rather than a jump @@ -276,174 +285,17 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt)  }  /* - * Instruction emulation: - * Most instructions are emulated directly via a fragment of inline assembly - * code. This allows us to save/restore EFLAGS and thus very easily pick up - * any modified flags. - */ - -#if defined(CONFIG_X86_64) -#define _LO32 "k"		/* force 32-bit operand */ -#define _STK  "%%rsp"		/* stack pointer */ -#elif defined(__i386__) -#define _LO32 ""		/* force 32-bit operand */ -#define _STK  "%%esp"		/* stack pointer */ -#endif - -/*   * These EFLAGS bits are restored from saved value during emulation, and   * any changes are written back to the saved value after emulation.   */  #define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF) -/* Before executing instruction: restore necessary bits in EFLAGS. */ -#define _PRE_EFLAGS(_sav, _msk, _tmp)					\ -	/* EFLAGS = (_sav & _msk) | (EFLAGS & ~_msk); _sav &= ~_msk; */ \ -	"movl %"_sav",%"_LO32 _tmp"; "                                  \ -	"push %"_tmp"; "                                                \ -	"push %"_tmp"; "                                                \ -	"movl %"_msk",%"_LO32 _tmp"; "                                  \ -	"andl %"_LO32 _tmp",("_STK"); "                                 \ -	"pushf; "                                                       \ -	"notl %"_LO32 _tmp"; "                                          \ -	"andl %"_LO32 _tmp",("_STK"); "                                 \ -	"andl %"_LO32 _tmp","__stringify(BITS_PER_LONG/4)"("_STK"); "	\ -	"pop  %"_tmp"; "                                                \ -	"orl  %"_LO32 _tmp",("_STK"); "                                 \ -	"popf; "                                                        \ -	"pop  %"_sav"; " - -/* After executing instruction: write-back necessary bits in EFLAGS. */ -#define _POST_EFLAGS(_sav, _msk, _tmp) \ -	/* _sav |= EFLAGS & _msk; */		\ -	"pushf; "				\ -	"pop  %"_tmp"; "			\ -	"andl %"_msk",%"_LO32 _tmp"; "		\ -	"orl  %"_LO32 _tmp",%"_sav"; " -  #ifdef CONFIG_X86_64  #define ON64(x) x  #else  #define ON64(x)  #endif -#define ____emulate_2op(ctxt, _op, _x, _y, _suffix, _dsttype)	\ -	do {								\ -		__asm__ __volatile__ (					\ -			_PRE_EFLAGS("0", "4", "2")			\ -			_op _suffix " %"_x"3,%1; "			\ -			_POST_EFLAGS("0", "4", "2")			\ -			: "=m" ((ctxt)->eflags),			\ -			  "+q" (*(_dsttype*)&(ctxt)->dst.val),		\ -			  "=&r" (_tmp)					\ -			: _y ((ctxt)->src.val), "i" (EFLAGS_MASK));	\ -	} while (0) - - -/* Raw emulation: instruction has two explicit operands. */ -#define __emulate_2op_nobyte(ctxt,_op,_wx,_wy,_lx,_ly,_qx,_qy)		\ -	do {								\ -		unsigned long _tmp;					\ -									\ -		switch ((ctxt)->dst.bytes) {				\ -		case 2:							\ -			____emulate_2op(ctxt,_op,_wx,_wy,"w",u16);	\ -			break;						\ -		case 4:							\ -			____emulate_2op(ctxt,_op,_lx,_ly,"l",u32);	\ -			break;						\ -		case 8:							\ -			ON64(____emulate_2op(ctxt,_op,_qx,_qy,"q",u64)); \ -			break;						\ -		}							\ -	} while (0) - -#define __emulate_2op(ctxt,_op,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy)		     \ -	do {								     \ -		unsigned long _tmp;					     \ -		switch ((ctxt)->dst.bytes) {				     \ -		case 1:							     \ -			____emulate_2op(ctxt,_op,_bx,_by,"b",u8);	     \ -			break;						     \ -		default:						     \ -			__emulate_2op_nobyte(ctxt, _op,			     \ -					     _wx, _wy, _lx, _ly, _qx, _qy);  \ -			break;						     \ -		}							     \ -	} while (0) - -/* Source operand is byte-sized and may be restricted to just %cl. */ -#define emulate_2op_SrcB(ctxt, _op)					\ -	__emulate_2op(ctxt, _op, "b", "c", "b", "c", "b", "c", "b", "c") - -/* Source operand is byte, word, long or quad sized. */ -#define emulate_2op_SrcV(ctxt, _op)					\ -	__emulate_2op(ctxt, _op, "b", "q", "w", "r", _LO32, "r", "", "r") - -/* Source operand is word, long or quad sized. */ -#define emulate_2op_SrcV_nobyte(ctxt, _op)				\ -	__emulate_2op_nobyte(ctxt, _op, "w", "r", _LO32, "r", "", "r") - -/* Instruction has three operands and one operand is stored in ECX register */ -#define __emulate_2op_cl(ctxt, _op, _suffix, _type)		\ -	do {								\ -		unsigned long _tmp;					\ -		_type _clv  = (ctxt)->src2.val;				\ -		_type _srcv = (ctxt)->src.val;				\ -		_type _dstv = (ctxt)->dst.val;				\ -									\ -		__asm__ __volatile__ (					\ -			_PRE_EFLAGS("0", "5", "2")			\ -			_op _suffix " %4,%1 \n"				\ -			_POST_EFLAGS("0", "5", "2")			\ -			: "=m" ((ctxt)->eflags), "+r" (_dstv), "=&r" (_tmp) \ -			: "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK)	\ -			);						\ -									\ -		(ctxt)->src2.val  = (unsigned long) _clv;		\ -		(ctxt)->src2.val = (unsigned long) _srcv;		\ -		(ctxt)->dst.val = (unsigned long) _dstv;		\ -	} while (0) - -#define emulate_2op_cl(ctxt, _op)					\ -	do {								\ -		switch ((ctxt)->dst.bytes) {				\ -		case 2:							\ -			__emulate_2op_cl(ctxt, _op, "w", u16);		\ -			break;						\ -		case 4:							\ -			__emulate_2op_cl(ctxt, _op, "l", u32);		\ -			break;						\ -		case 8:							\ -			ON64(__emulate_2op_cl(ctxt, _op, "q", ulong));	\ -			break;						\ -		}							\ -	} while (0) - -#define __emulate_1op(ctxt, _op, _suffix)				\ -	do {								\ -		unsigned long _tmp;					\ -									\ -		__asm__ __volatile__ (					\ -			_PRE_EFLAGS("0", "3", "2")			\ -			_op _suffix " %1; "				\ -			_POST_EFLAGS("0", "3", "2")			\ -			: "=m" ((ctxt)->eflags), "+m" ((ctxt)->dst.val), \ -			  "=&r" (_tmp)					\ -			: "i" (EFLAGS_MASK));				\ -	} while (0) - -/* Instruction has only one explicit operand (no source operand). */ -#define emulate_1op(ctxt, _op)						\ -	do {								\ -		switch ((ctxt)->dst.bytes) {				\ -		case 1:	__emulate_1op(ctxt, _op, "b"); break;		\ -		case 2:	__emulate_1op(ctxt, _op, "w"); break;		\ -		case 4:	__emulate_1op(ctxt, _op, "l"); break;		\ -		case 8:	ON64(__emulate_1op(ctxt, _op, "q")); break;	\ -		}							\ -	} while (0) -  static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));  #define FOP_ALIGN ".align " __stringify(FASTOP_SIZE) " \n\t" @@ -462,7 +314,10 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));  #define FOPNOP() FOP_ALIGN FOP_RET  #define FOP1E(op,  dst) \ -	FOP_ALIGN #op " %" #dst " \n\t" FOP_RET +	FOP_ALIGN "10: " #op " %" #dst " \n\t" FOP_RET + +#define FOP1EEX(op,  dst) \ +	FOP1E(op, dst) _ASM_EXTABLE(10b, kvm_fastop_exception)  #define FASTOP1(op) \  	FOP_START(op) \ @@ -472,24 +327,42 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));  	ON64(FOP1E(op##q, rax))	\  	FOP_END +/* 1-operand, using src2 (for MUL/DIV r/m) */ +#define FASTOP1SRC2(op, name) \ +	FOP_START(name) \ +	FOP1E(op, cl) \ +	FOP1E(op, cx) \ +	FOP1E(op, ecx) \ +	ON64(FOP1E(op, rcx)) \ +	FOP_END + +/* 1-operand, using src2 (for MUL/DIV r/m), with exceptions */ +#define FASTOP1SRC2EX(op, name) \ +	FOP_START(name) \ +	FOP1EEX(op, cl) \ +	FOP1EEX(op, cx) \ +	FOP1EEX(op, ecx) \ +	ON64(FOP1EEX(op, rcx)) \ +	FOP_END +  #define FOP2E(op,  dst, src)	   \  	FOP_ALIGN #op " %" #src ", %" #dst " \n\t" FOP_RET  #define FASTOP2(op) \  	FOP_START(op) \ -	FOP2E(op##b, al, bl) \ -	FOP2E(op##w, ax, bx) \ -	FOP2E(op##l, eax, ebx) \ -	ON64(FOP2E(op##q, rax, rbx)) \ +	FOP2E(op##b, al, dl) \ +	FOP2E(op##w, ax, dx) \ +	FOP2E(op##l, eax, edx) \ +	ON64(FOP2E(op##q, rax, rdx)) \  	FOP_END  /* 2 operand, word only */  #define FASTOP2W(op) \  	FOP_START(op) \  	FOPNOP() \ -	FOP2E(op##w, ax, bx) \ -	FOP2E(op##l, eax, ebx) \ -	ON64(FOP2E(op##q, rax, rbx)) \ +	FOP2E(op##w, ax, dx) \ +	FOP2E(op##l, eax, edx) \ +	ON64(FOP2E(op##q, rax, rdx)) \  	FOP_END  /* 2 operand, src is CL */ @@ -508,14 +381,17 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));  #define FASTOP3WCL(op) \  	FOP_START(op) \  	FOPNOP() \ -	FOP3E(op##w, ax, bx, cl) \ -	FOP3E(op##l, eax, ebx, cl) \ -	ON64(FOP3E(op##q, rax, rbx, cl)) \ +	FOP3E(op##w, ax, dx, cl) \ +	FOP3E(op##l, eax, edx, cl) \ +	ON64(FOP3E(op##q, rax, rdx, cl)) \  	FOP_END  /* Special case for SETcc - 1 instruction per cc */  #define FOP_SETCC(op) ".align 4; " #op " %al; ret \n\t" +asm(".global kvm_fastop_exception \n" +    "kvm_fastop_exception: xor %esi, %esi; ret"); +  FOP_START(setcc)  FOP_SETCC(seto)  FOP_SETCC(setno) @@ -538,47 +414,6 @@ FOP_END;  FOP_START(salc) "pushf; sbb %al, %al; popf \n\t" FOP_RET  FOP_END; -#define __emulate_1op_rax_rdx(ctxt, _op, _suffix, _ex)			\ -	do {								\ -		unsigned long _tmp;					\ -		ulong *rax = reg_rmw((ctxt), VCPU_REGS_RAX);		\ -		ulong *rdx = reg_rmw((ctxt), VCPU_REGS_RDX);		\ -									\ -		__asm__ __volatile__ (					\ -			_PRE_EFLAGS("0", "5", "1")			\ -			"1: \n\t"					\ -			_op _suffix " %6; "				\ -			"2: \n\t"					\ -			_POST_EFLAGS("0", "5", "1")			\ -			".pushsection .fixup,\"ax\" \n\t"		\ -			"3: movb $1, %4 \n\t"				\ -			"jmp 2b \n\t"					\ -			".popsection \n\t"				\ -			_ASM_EXTABLE(1b, 3b)				\ -			: "=m" ((ctxt)->eflags), "=&r" (_tmp),		\ -			  "+a" (*rax), "+d" (*rdx), "+qm"(_ex)		\ -			: "i" (EFLAGS_MASK), "m" ((ctxt)->src.val));	\ -	} while (0) - -/* instruction has only one source operand, destination is implicit (e.g. mul, div, imul, idiv) */ -#define emulate_1op_rax_rdx(ctxt, _op, _ex)	\ -	do {								\ -		switch((ctxt)->src.bytes) {				\ -		case 1:							\ -			__emulate_1op_rax_rdx(ctxt, _op, "b", _ex);	\ -			break;						\ -		case 2:							\ -			__emulate_1op_rax_rdx(ctxt, _op, "w", _ex);	\ -			break;						\ -		case 4:							\ -			__emulate_1op_rax_rdx(ctxt, _op, "l", _ex);	\ -			break;						\ -		case 8: ON64(						\ -			__emulate_1op_rax_rdx(ctxt, _op, "q", _ex));	\ -			break;						\ -		}							\ -	} while (0) -  static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,  				    enum x86_intercept intercept,  				    enum x86_intercept_stage stage) @@ -988,6 +823,11 @@ FASTOP2(xor);  FASTOP2(cmp);  FASTOP2(test); +FASTOP1SRC2(mul, mul_ex); +FASTOP1SRC2(imul, imul_ex); +FASTOP1SRC2EX(div, div_ex); +FASTOP1SRC2EX(idiv, idiv_ex); +  FASTOP3WCL(shld);  FASTOP3WCL(shrd); @@ -1013,6 +853,8 @@ FASTOP2W(bts);  FASTOP2W(btr);  FASTOP2W(btc); +FASTOP2(xadd); +  static u8 test_cc(unsigned int condition, unsigned long flags)  {  	u8 rc; @@ -1726,45 +1568,42 @@ static void write_register_operand(struct operand *op)  	}  } -static int writeback(struct x86_emulate_ctxt *ctxt) +static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)  {  	int rc; -	if (ctxt->d & NoWrite) -		return X86EMUL_CONTINUE; - -	switch (ctxt->dst.type) { +	switch (op->type) {  	case OP_REG: -		write_register_operand(&ctxt->dst); +		write_register_operand(op);  		break;  	case OP_MEM:  		if (ctxt->lock_prefix)  			rc = segmented_cmpxchg(ctxt, -					       ctxt->dst.addr.mem, -					       &ctxt->dst.orig_val, -					       &ctxt->dst.val, -					       ctxt->dst.bytes); +					       op->addr.mem, +					       &op->orig_val, +					       &op->val, +					       op->bytes);  		else  			rc = segmented_write(ctxt, -					     ctxt->dst.addr.mem, -					     &ctxt->dst.val, -					     ctxt->dst.bytes); +					     op->addr.mem, +					     &op->val, +					     op->bytes);  		if (rc != X86EMUL_CONTINUE)  			return rc;  		break;  	case OP_MEM_STR:  		rc = segmented_write(ctxt, -				ctxt->dst.addr.mem, -				ctxt->dst.data, -				ctxt->dst.bytes * ctxt->dst.count); +				op->addr.mem, +				op->data, +				op->bytes * op->count);  		if (rc != X86EMUL_CONTINUE)  			return rc;  		break;  	case OP_XMM: -		write_sse_reg(ctxt, &ctxt->dst.vec_val, ctxt->dst.addr.xmm); +		write_sse_reg(ctxt, &op->vec_val, op->addr.xmm);  		break;  	case OP_MM: -		write_mmx_reg(ctxt, &ctxt->dst.mm_val, ctxt->dst.addr.mm); +		write_mmx_reg(ctxt, &op->mm_val, op->addr.mm);  		break;  	case OP_NONE:  		/* no writeback */ @@ -2117,42 +1956,6 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt)  	return X86EMUL_CONTINUE;  } -static int em_mul_ex(struct x86_emulate_ctxt *ctxt) -{ -	u8 ex = 0; - -	emulate_1op_rax_rdx(ctxt, "mul", ex); -	return X86EMUL_CONTINUE; -} - -static int em_imul_ex(struct x86_emulate_ctxt *ctxt) -{ -	u8 ex = 0; - -	emulate_1op_rax_rdx(ctxt, "imul", ex); -	return X86EMUL_CONTINUE; -} - -static int em_div_ex(struct x86_emulate_ctxt *ctxt) -{ -	u8 de = 0; - -	emulate_1op_rax_rdx(ctxt, "div", de); -	if (de) -		return emulate_de(ctxt); -	return X86EMUL_CONTINUE; -} - -static int em_idiv_ex(struct x86_emulate_ctxt *ctxt) -{ -	u8 de = 0; - -	emulate_1op_rax_rdx(ctxt, "idiv", de); -	if (de) -		return emulate_de(ctxt); -	return X86EMUL_CONTINUE; -} -  static int em_grp45(struct x86_emulate_ctxt *ctxt)  {  	int rc = X86EMUL_CONTINUE; @@ -3734,10 +3537,10 @@ static const struct opcode group3[] = {  	F(DstMem | SrcImm | NoWrite, em_test),  	F(DstMem | SrcNone | Lock, em_not),  	F(DstMem | SrcNone | Lock, em_neg), -	I(SrcMem, em_mul_ex), -	I(SrcMem, em_imul_ex), -	I(SrcMem, em_div_ex), -	I(SrcMem, em_idiv_ex), +	F(DstXacc | Src2Mem, em_mul_ex), +	F(DstXacc | Src2Mem, em_imul_ex), +	F(DstXacc | Src2Mem, em_div_ex), +	F(DstXacc | Src2Mem, em_idiv_ex),  };  static const struct opcode group4[] = { @@ -4064,7 +3867,7 @@ static const struct opcode twobyte_table[256] = {  	F(DstReg | SrcMem | ModRM, em_bsf), F(DstReg | SrcMem | ModRM, em_bsr),  	D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),  	/* 0xC0 - 0xC7 */ -	D2bv(DstMem | SrcReg | ModRM | Lock), +	F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd),  	N, D(DstMem | SrcReg | ModRM | Mov),  	N, N, N, GD(0, &group9),  	/* 0xC8 - 0xCF */ @@ -4172,6 +3975,24 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,  		fetch_register_operand(op);  		op->orig_val = op->val;  		break; +	case OpAccLo: +		op->type = OP_REG; +		op->bytes = (ctxt->d & ByteOp) ? 2 : ctxt->op_bytes; +		op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX); +		fetch_register_operand(op); +		op->orig_val = op->val; +		break; +	case OpAccHi: +		if (ctxt->d & ByteOp) { +			op->type = OP_NONE; +			break; +		} +		op->type = OP_REG; +		op->bytes = ctxt->op_bytes; +		op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX); +		fetch_register_operand(op); +		op->orig_val = op->val; +		break;  	case OpDI:  		op->type = OP_MEM;  		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; @@ -4553,11 +4374,15 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,  static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))  {  	ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF; -	fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE; +	if (!(ctxt->d & ByteOp)) +		fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;  	asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n" -	    : "+a"(ctxt->dst.val), "+b"(ctxt->src.val), [flags]"+D"(flags) -	: "c"(ctxt->src2.val), [fastop]"S"(fop)); +	    : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags), +	      [fastop]"+S"(fop) +	    : "c"(ctxt->src2.val));  	ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); +	if (!fop) /* exception is returned in fop variable */ +		return emulate_de(ctxt);  	return X86EMUL_CONTINUE;  } @@ -4773,9 +4598,17 @@ special_insn:  		goto done;  writeback: -	rc = writeback(ctxt); -	if (rc != X86EMUL_CONTINUE) -		goto done; +	if (!(ctxt->d & NoWrite)) { +		rc = writeback(ctxt, &ctxt->dst); +		if (rc != X86EMUL_CONTINUE) +			goto done; +	} +	if (ctxt->d & SrcWrite) { +		BUG_ON(ctxt->src.type == OP_MEM || ctxt->src.type == OP_MEM_STR); +		rc = writeback(ctxt, &ctxt->src); +		if (rc != X86EMUL_CONTINUE) +			goto done; +	}  	/*  	 * restore dst type in case the decoding will be reused @@ -4872,12 +4705,6 @@ twobyte_insn:  		ctxt->dst.val = (ctxt->src.bytes == 1) ? (s8) ctxt->src.val :  							(s16) ctxt->src.val;  		break; -	case 0xc0 ... 0xc1:	/* xadd */ -		fastop(ctxt, em_add); -		/* Write back the register source. */ -		ctxt->src.val = ctxt->dst.orig_val; -		write_register_operand(&ctxt->src); -		break;  	case 0xc3:		/* movnti */  		ctxt->dst.bytes = ctxt->op_bytes;  		ctxt->dst.val = (ctxt->op_bytes == 4) ? (u32) ctxt->src.val : | 
