diff options
Diffstat (limited to 'arch/x86/kvm/emulate.c')
| -rw-r--r-- | arch/x86/kvm/emulate.c | 669 | 
1 files changed, 400 insertions, 269 deletions
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index a27e7637110..a335cc6cde7 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -24,6 +24,7 @@  #include "kvm_cache_regs.h"  #include <linux/module.h>  #include <asm/kvm_emulate.h> +#include <linux/stringify.h>  #include "x86.h"  #include "tss.h" @@ -43,7 +44,7 @@  #define OpCL               9ull  /* CL register (for shifts) */  #define OpImmByte         10ull  /* 8-bit sign extended immediate */  #define OpOne             11ull  /* Implied 1 */ -#define OpImm             12ull  /* Sign extended immediate */ +#define OpImm             12ull  /* Sign extended up to 32-bit immediate */  #define OpMem16           13ull  /* Memory operand (16-bit). */  #define OpMem32           14ull  /* Memory operand (32-bit). */  #define OpImmU            15ull  /* Immediate operand, zero extended */ @@ -58,6 +59,7 @@  #define OpFS              24ull  /* FS */  #define OpGS              25ull  /* GS */  #define OpMem8            26ull  /* 8-bit zero extended memory operand */ +#define OpImm64           27ull  /* Sign extended 16/32/64-bit immediate */  #define OpBits             5  /* Width of operand field */  #define OpMask             ((1ull << OpBits) - 1) @@ -101,6 +103,7 @@  #define SrcMemFAddr (OpMemFAddr << SrcShift)  #define SrcAcc      (OpAcc << SrcShift)  #define SrcImmU16   (OpImmU16 << SrcShift) +#define SrcImm64    (OpImm64 << SrcShift)  #define SrcDX       (OpDX << SrcShift)  #define SrcMem8     (OpMem8 << SrcShift)  #define SrcMask     (OpMask << SrcShift) @@ -113,6 +116,7 @@  #define GroupDual   (2<<15)     /* Alternate decoding of mod == 3 */  #define Prefix      (3<<15)     /* Instruction varies with 66/f2/f3 prefix */  #define RMExt       (4<<15)     /* Opcode extension in ModRM r/m if mod == 3 */ +#define Escape      (5<<15)     /* Escape to coprocessor instruction */  #define Sse         (1<<18)     /* SSE Vector instruction */  /* Generic ModRM decode. */  #define ModRM       (1<<19) @@ -146,6 +150,8 @@  #define Aligned     ((u64)1 << 41)  /* Explicitly aligned (e.g. MOVDQA) */  #define Unaligned   ((u64)1 << 42)  /* Explicitly unaligned (e.g. MOVDQU) */  #define Avx         ((u64)1 << 43)  /* Advanced Vector Extensions */ +#define Fastop      ((u64)1 << 44)  /* Use opcode::u.fastop */ +#define NoWrite     ((u64)1 << 45)  /* No writeback */  #define X2(x...) x, x  #define X3(x...) X2(x), x @@ -156,6 +162,27 @@  #define X8(x...) X4(x), X4(x)  #define X16(x...) X8(x), X8(x) +#define NR_FASTOP (ilog2(sizeof(ulong)) + 1) +#define FASTOP_SIZE 8 + +/* + * fastop functions have a special calling convention: + * + * dst:    [rdx]:rax  (in/out) + * src:    rbx        (in/out) + * src2:   rcx        (in) + * flags:  rflags     (in/out) + * + * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for + * different operand sizes can be reached by calculation, rather than a jump + * table (which would be bigger than the code). + * + * fastop functions are declared as taking a never-defined fastop parameter, + * so they can't be called from C directly. + */ + +struct fastop; +  struct opcode {  	u64 flags : 56;  	u64 intercept : 8; @@ -164,6 +191,8 @@ struct opcode {  		const struct opcode *group;  		const struct group_dual *gdual;  		const struct gprefix *gprefix; +		const struct escape *esc; +		void (*fastop)(struct fastop *fake);  	} u;  	int (*check_perm)(struct x86_emulate_ctxt *ctxt);  }; @@ -180,6 +209,11 @@ struct gprefix {  	struct opcode pfx_f3;  }; +struct escape { +	struct opcode op[8]; +	struct opcode high[64]; +}; +  /* EFLAGS bit definitions. */  #define EFLG_ID (1<<21)  #define EFLG_VIP (1<<20) @@ -407,6 +441,97 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt)  		}							\  	} while (0) +static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)); + +#define FOP_ALIGN ".align " __stringify(FASTOP_SIZE) " \n\t" +#define FOP_RET   "ret \n\t" + +#define FOP_START(op) \ +	extern void em_##op(struct fastop *fake); \ +	asm(".pushsection .text, \"ax\" \n\t" \ +	    ".global em_" #op " \n\t" \ +            FOP_ALIGN \ +	    "em_" #op ": \n\t" + +#define FOP_END \ +	    ".popsection") + +#define FOPNOP() FOP_ALIGN FOP_RET + +#define FOP1E(op,  dst) \ +	FOP_ALIGN #op " %" #dst " \n\t" FOP_RET + +#define FASTOP1(op) \ +	FOP_START(op) \ +	FOP1E(op##b, al) \ +	FOP1E(op##w, ax) \ +	FOP1E(op##l, eax) \ +	ON64(FOP1E(op##q, rax))	\ +	FOP_END + +#define FOP2E(op,  dst, src)	   \ +	FOP_ALIGN #op " %" #src ", %" #dst " \n\t" FOP_RET + +#define FASTOP2(op) \ +	FOP_START(op) \ +	FOP2E(op##b, al, bl) \ +	FOP2E(op##w, ax, bx) \ +	FOP2E(op##l, eax, ebx) \ +	ON64(FOP2E(op##q, rax, rbx)) \ +	FOP_END + +/* 2 operand, word only */ +#define FASTOP2W(op) \ +	FOP_START(op) \ +	FOPNOP() \ +	FOP2E(op##w, ax, bx) \ +	FOP2E(op##l, eax, ebx) \ +	ON64(FOP2E(op##q, rax, rbx)) \ +	FOP_END + +/* 2 operand, src is CL */ +#define FASTOP2CL(op) \ +	FOP_START(op) \ +	FOP2E(op##b, al, cl) \ +	FOP2E(op##w, ax, cl) \ +	FOP2E(op##l, eax, cl) \ +	ON64(FOP2E(op##q, rax, cl)) \ +	FOP_END + +#define FOP3E(op,  dst, src, src2) \ +	FOP_ALIGN #op " %" #src2 ", %" #src ", %" #dst " \n\t" FOP_RET + +/* 3-operand, word-only, src2=cl */ +#define FASTOP3WCL(op) \ +	FOP_START(op) \ +	FOPNOP() \ +	FOP3E(op##w, ax, bx, cl) \ +	FOP3E(op##l, eax, ebx, cl) \ +	ON64(FOP3E(op##q, rax, rbx, cl)) \ +	FOP_END + +/* Special case for SETcc - 1 instruction per cc */ +#define FOP_SETCC(op) ".align 4; " #op " %al; ret \n\t" + +FOP_START(setcc) +FOP_SETCC(seto) +FOP_SETCC(setno) +FOP_SETCC(setc) +FOP_SETCC(setnc) +FOP_SETCC(setz) +FOP_SETCC(setnz) +FOP_SETCC(setbe) +FOP_SETCC(setnbe) +FOP_SETCC(sets) +FOP_SETCC(setns) +FOP_SETCC(setp) +FOP_SETCC(setnp) +FOP_SETCC(setl) +FOP_SETCC(setnl) +FOP_SETCC(setle) +FOP_SETCC(setnle) +FOP_END; +  #define __emulate_1op_rax_rdx(ctxt, _op, _suffix, _ex)			\  	do {								\  		unsigned long _tmp;					\ @@ -663,7 +788,7 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,  	ulong la;  	u32 lim;  	u16 sel; -	unsigned cpl, rpl; +	unsigned cpl;  	la = seg_base(ctxt, addr.seg) + addr.ea;  	switch (ctxt->mode) { @@ -697,11 +822,6 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,  				goto bad;  		}  		cpl = ctxt->ops->cpl(ctxt); -		if (ctxt->mode == X86EMUL_MODE_REAL) -			rpl = 0; -		else -			rpl = sel & 3; -		cpl = max(cpl, rpl);  		if (!(desc.type & 8)) {  			/* data segment */  			if (cpl > desc.dpl) @@ -852,39 +972,50 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt,  	return rc;  } -static int test_cc(unsigned int condition, unsigned int flags) -{ -	int rc = 0; +FASTOP2(add); +FASTOP2(or); +FASTOP2(adc); +FASTOP2(sbb); +FASTOP2(and); +FASTOP2(sub); +FASTOP2(xor); +FASTOP2(cmp); +FASTOP2(test); -	switch ((condition & 15) >> 1) { -	case 0: /* o */ -		rc |= (flags & EFLG_OF); -		break; -	case 1: /* b/c/nae */ -		rc |= (flags & EFLG_CF); -		break; -	case 2: /* z/e */ -		rc |= (flags & EFLG_ZF); -		break; -	case 3: /* be/na */ -		rc |= (flags & (EFLG_CF|EFLG_ZF)); -		break; -	case 4: /* s */ -		rc |= (flags & EFLG_SF); -		break; -	case 5: /* p/pe */ -		rc |= (flags & EFLG_PF); -		break; -	case 7: /* le/ng */ -		rc |= (flags & EFLG_ZF); -		/* fall through */ -	case 6: /* l/nge */ -		rc |= (!(flags & EFLG_SF) != !(flags & EFLG_OF)); -		break; -	} +FASTOP3WCL(shld); +FASTOP3WCL(shrd); + +FASTOP2W(imul); + +FASTOP1(not); +FASTOP1(neg); +FASTOP1(inc); +FASTOP1(dec); + +FASTOP2CL(rol); +FASTOP2CL(ror); +FASTOP2CL(rcl); +FASTOP2CL(rcr); +FASTOP2CL(shl); +FASTOP2CL(shr); +FASTOP2CL(sar); + +FASTOP2W(bsf); +FASTOP2W(bsr); +FASTOP2W(bt); +FASTOP2W(bts); +FASTOP2W(btr); +FASTOP2W(btc); + +static u8 test_cc(unsigned int condition, unsigned long flags) +{ +	u8 rc; +	void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf); -	/* Odd condition identifiers (lsb == 1) have inverted sense. */ -	return (!!rc ^ (condition & 1)); +	flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF; +	asm("push %[flags]; popf; call *%[fastop]" +	    : "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags)); +	return rc;  }  static void fetch_register_operand(struct operand *op) @@ -994,6 +1125,53 @@ static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)  	ctxt->ops->put_fpu(ctxt);  } +static int em_fninit(struct x86_emulate_ctxt *ctxt) +{ +	if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) +		return emulate_nm(ctxt); + +	ctxt->ops->get_fpu(ctxt); +	asm volatile("fninit"); +	ctxt->ops->put_fpu(ctxt); +	return X86EMUL_CONTINUE; +} + +static int em_fnstcw(struct x86_emulate_ctxt *ctxt) +{ +	u16 fcw; + +	if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) +		return emulate_nm(ctxt); + +	ctxt->ops->get_fpu(ctxt); +	asm volatile("fnstcw %0": "+m"(fcw)); +	ctxt->ops->put_fpu(ctxt); + +	/* force 2 byte destination */ +	ctxt->dst.bytes = 2; +	ctxt->dst.val = fcw; + +	return X86EMUL_CONTINUE; +} + +static int em_fnstsw(struct x86_emulate_ctxt *ctxt) +{ +	u16 fsw; + +	if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) +		return emulate_nm(ctxt); + +	ctxt->ops->get_fpu(ctxt); +	asm volatile("fnstsw %0": "+m"(fsw)); +	ctxt->ops->put_fpu(ctxt); + +	/* force 2 byte destination */ +	ctxt->dst.bytes = 2; +	ctxt->dst.val = fsw; + +	return X86EMUL_CONTINUE; +} +  static void decode_register_operand(struct x86_emulate_ctxt *ctxt,  				    struct operand *op)  { @@ -1534,6 +1712,9 @@ static int writeback(struct x86_emulate_ctxt *ctxt)  {  	int rc; +	if (ctxt->d & NoWrite) +		return X86EMUL_CONTINUE; +  	switch (ctxt->dst.type) {  	case OP_REG:  		write_register_operand(&ctxt->dst); @@ -1918,47 +2099,6 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt)  	return X86EMUL_CONTINUE;  } -static int em_grp2(struct x86_emulate_ctxt *ctxt) -{ -	switch (ctxt->modrm_reg) { -	case 0:	/* rol */ -		emulate_2op_SrcB(ctxt, "rol"); -		break; -	case 1:	/* ror */ -		emulate_2op_SrcB(ctxt, "ror"); -		break; -	case 2:	/* rcl */ -		emulate_2op_SrcB(ctxt, "rcl"); -		break; -	case 3:	/* rcr */ -		emulate_2op_SrcB(ctxt, "rcr"); -		break; -	case 4:	/* sal/shl */ -	case 6:	/* sal/shl */ -		emulate_2op_SrcB(ctxt, "sal"); -		break; -	case 5:	/* shr */ -		emulate_2op_SrcB(ctxt, "shr"); -		break; -	case 7:	/* sar */ -		emulate_2op_SrcB(ctxt, "sar"); -		break; -	} -	return X86EMUL_CONTINUE; -} - -static int em_not(struct x86_emulate_ctxt *ctxt) -{ -	ctxt->dst.val = ~ctxt->dst.val; -	return X86EMUL_CONTINUE; -} - -static int em_neg(struct x86_emulate_ctxt *ctxt) -{ -	emulate_1op(ctxt, "neg"); -	return X86EMUL_CONTINUE; -} -  static int em_mul_ex(struct x86_emulate_ctxt *ctxt)  {  	u8 ex = 0; @@ -2000,12 +2140,6 @@ static int em_grp45(struct x86_emulate_ctxt *ctxt)  	int rc = X86EMUL_CONTINUE;  	switch (ctxt->modrm_reg) { -	case 0:	/* inc */ -		emulate_1op(ctxt, "inc"); -		break; -	case 1:	/* dec */ -		emulate_1op(ctxt, "dec"); -		break;  	case 2: /* call near abs */ {  		long int old_eip;  		old_eip = ctxt->_eip; @@ -2075,7 +2209,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)  	/* Save real source value, then compare EAX against destination. */  	ctxt->src.orig_val = ctxt->src.val;  	ctxt->src.val = reg_read(ctxt, VCPU_REGS_RAX); -	emulate_2op_SrcV(ctxt, "cmp"); +	fastop(ctxt, em_cmp);  	if (ctxt->eflags & EFLG_ZF) {  		/* Success: write back to memory. */ @@ -2843,7 +2977,7 @@ static int em_das(struct x86_emulate_ctxt *ctxt)  	ctxt->src.type = OP_IMM;  	ctxt->src.val = 0;  	ctxt->src.bytes = 1; -	emulate_2op_SrcV(ctxt, "or"); +	fastop(ctxt, em_or);  	ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF);  	if (cf)  		ctxt->eflags |= X86_EFLAGS_CF; @@ -2852,6 +2986,24 @@ static int em_das(struct x86_emulate_ctxt *ctxt)  	return X86EMUL_CONTINUE;  } +static int em_aad(struct x86_emulate_ctxt *ctxt) +{ +	u8 al = ctxt->dst.val & 0xff; +	u8 ah = (ctxt->dst.val >> 8) & 0xff; + +	al = (al + (ah * ctxt->src.val)) & 0xff; + +	ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al; + +	/* Set PF, ZF, SF */ +	ctxt->src.type = OP_IMM; +	ctxt->src.val = 0; +	ctxt->src.bytes = 1; +	fastop(ctxt, em_or); + +	return X86EMUL_CONTINUE; +} +  static int em_call(struct x86_emulate_ctxt *ctxt)  {  	long rel = ctxt->src.val; @@ -2900,64 +3052,6 @@ static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)  	return X86EMUL_CONTINUE;  } -static int em_add(struct x86_emulate_ctxt *ctxt) -{ -	emulate_2op_SrcV(ctxt, "add"); -	return X86EMUL_CONTINUE; -} - -static int em_or(struct x86_emulate_ctxt *ctxt) -{ -	emulate_2op_SrcV(ctxt, "or"); -	return X86EMUL_CONTINUE; -} - -static int em_adc(struct x86_emulate_ctxt *ctxt) -{ -	emulate_2op_SrcV(ctxt, "adc"); -	return X86EMUL_CONTINUE; -} - -static int em_sbb(struct x86_emulate_ctxt *ctxt) -{ -	emulate_2op_SrcV(ctxt, "sbb"); -	return X86EMUL_CONTINUE; -} - -static int em_and(struct x86_emulate_ctxt *ctxt) -{ -	emulate_2op_SrcV(ctxt, "and"); -	return X86EMUL_CONTINUE; -} - -static int em_sub(struct x86_emulate_ctxt *ctxt) -{ -	emulate_2op_SrcV(ctxt, "sub"); -	return X86EMUL_CONTINUE; -} - -static int em_xor(struct x86_emulate_ctxt *ctxt) -{ -	emulate_2op_SrcV(ctxt, "xor"); -	return X86EMUL_CONTINUE; -} - -static int em_cmp(struct x86_emulate_ctxt *ctxt) -{ -	emulate_2op_SrcV(ctxt, "cmp"); -	/* Disable writeback. */ -	ctxt->dst.type = OP_NONE; -	return X86EMUL_CONTINUE; -} - -static int em_test(struct x86_emulate_ctxt *ctxt) -{ -	emulate_2op_SrcV(ctxt, "test"); -	/* Disable writeback. */ -	ctxt->dst.type = OP_NONE; -	return X86EMUL_CONTINUE; -} -  static int em_xchg(struct x86_emulate_ctxt *ctxt)  {  	/* Write back the register source. */ @@ -2970,16 +3064,10 @@ static int em_xchg(struct x86_emulate_ctxt *ctxt)  	return X86EMUL_CONTINUE;  } -static int em_imul(struct x86_emulate_ctxt *ctxt) -{ -	emulate_2op_SrcV_nobyte(ctxt, "imul"); -	return X86EMUL_CONTINUE; -} -  static int em_imul_3op(struct x86_emulate_ctxt *ctxt)  {  	ctxt->dst.val = ctxt->src2.val; -	return em_imul(ctxt); +	return fastop(ctxt, em_imul);  }  static int em_cwd(struct x86_emulate_ctxt *ctxt) @@ -3300,47 +3388,6 @@ static int em_sti(struct x86_emulate_ctxt *ctxt)  	return X86EMUL_CONTINUE;  } -static int em_bt(struct x86_emulate_ctxt *ctxt) -{ -	/* Disable writeback. */ -	ctxt->dst.type = OP_NONE; -	/* only subword offset */ -	ctxt->src.val &= (ctxt->dst.bytes << 3) - 1; - -	emulate_2op_SrcV_nobyte(ctxt, "bt"); -	return X86EMUL_CONTINUE; -} - -static int em_bts(struct x86_emulate_ctxt *ctxt) -{ -	emulate_2op_SrcV_nobyte(ctxt, "bts"); -	return X86EMUL_CONTINUE; -} - -static int em_btr(struct x86_emulate_ctxt *ctxt) -{ -	emulate_2op_SrcV_nobyte(ctxt, "btr"); -	return X86EMUL_CONTINUE; -} - -static int em_btc(struct x86_emulate_ctxt *ctxt) -{ -	emulate_2op_SrcV_nobyte(ctxt, "btc"); -	return X86EMUL_CONTINUE; -} - -static int em_bsf(struct x86_emulate_ctxt *ctxt) -{ -	emulate_2op_SrcV_nobyte(ctxt, "bsf"); -	return X86EMUL_CONTINUE; -} - -static int em_bsr(struct x86_emulate_ctxt *ctxt) -{ -	emulate_2op_SrcV_nobyte(ctxt, "bsr"); -	return X86EMUL_CONTINUE; -} -  static int em_cpuid(struct x86_emulate_ctxt *ctxt)  {  	u32 eax, ebx, ecx, edx; @@ -3572,7 +3619,9 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)  #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }  #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }  #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) } +#define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) }  #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } +#define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }  #define II(_f, _e, _i) \  	{ .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i }  #define IIP(_f, _e, _i, _p) \ @@ -3583,12 +3632,13 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)  #define D2bv(_f)      D((_f) | ByteOp), D(_f)  #define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p)  #define I2bv(_f, _e)  I((_f) | ByteOp, _e), I(_f, _e) +#define F2bv(_f, _e)  F((_f) | ByteOp, _e), F(_f, _e)  #define I2bvIP(_f, _e, _i, _p) \  	IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p) -#define I6ALU(_f, _e) I2bv((_f) | DstMem | SrcReg | ModRM, _e),		\ -		I2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e),	\ -		I2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e) +#define F6ALU(_f, _e) F2bv((_f) | DstMem | SrcReg | ModRM, _e),		\ +		F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e),	\ +		F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)  static const struct opcode group7_rm1[] = {  	DI(SrcNone | Priv, monitor), @@ -3614,25 +3664,36 @@ static const struct opcode group7_rm7[] = {  };  static const struct opcode group1[] = { -	I(Lock, em_add), -	I(Lock | PageTable, em_or), -	I(Lock, em_adc), -	I(Lock, em_sbb), -	I(Lock | PageTable, em_and), -	I(Lock, em_sub), -	I(Lock, em_xor), -	I(0, em_cmp), +	F(Lock, em_add), +	F(Lock | PageTable, em_or), +	F(Lock, em_adc), +	F(Lock, em_sbb), +	F(Lock | PageTable, em_and), +	F(Lock, em_sub), +	F(Lock, em_xor), +	F(NoWrite, em_cmp),  };  static const struct opcode group1A[] = {  	I(DstMem | SrcNone | Mov | Stack, em_pop), N, N, N, N, N, N, N,  }; +static const struct opcode group2[] = { +	F(DstMem | ModRM, em_rol), +	F(DstMem | ModRM, em_ror), +	F(DstMem | ModRM, em_rcl), +	F(DstMem | ModRM, em_rcr), +	F(DstMem | ModRM, em_shl), +	F(DstMem | ModRM, em_shr), +	F(DstMem | ModRM, em_shl), +	F(DstMem | ModRM, em_sar), +}; +  static const struct opcode group3[] = { -	I(DstMem | SrcImm, em_test), -	I(DstMem | SrcImm, em_test), -	I(DstMem | SrcNone | Lock, em_not), -	I(DstMem | SrcNone | Lock, em_neg), +	F(DstMem | SrcImm | NoWrite, em_test), +	F(DstMem | SrcImm | NoWrite, em_test), +	F(DstMem | SrcNone | Lock, em_not), +	F(DstMem | SrcNone | Lock, em_neg),  	I(SrcMem, em_mul_ex),  	I(SrcMem, em_imul_ex),  	I(SrcMem, em_div_ex), @@ -3640,14 +3701,14 @@ static const struct opcode group3[] = {  };  static const struct opcode group4[] = { -	I(ByteOp | DstMem | SrcNone | Lock, em_grp45), -	I(ByteOp | DstMem | SrcNone | Lock, em_grp45), +	F(ByteOp | DstMem | SrcNone | Lock, em_inc), +	F(ByteOp | DstMem | SrcNone | Lock, em_dec),  	N, N, N, N, N, N,  };  static const struct opcode group5[] = { -	I(DstMem | SrcNone | Lock,		em_grp45), -	I(DstMem | SrcNone | Lock,		em_grp45), +	F(DstMem | SrcNone | Lock,		em_inc), +	F(DstMem | SrcNone | Lock,		em_dec),  	I(SrcMem | Stack,			em_grp45),  	I(SrcMemFAddr | ImplicitOps | Stack,	em_call_far),  	I(SrcMem | Stack,			em_grp45), @@ -3682,10 +3743,10 @@ static const struct group_dual group7 = { {  static const struct opcode group8[] = {  	N, N, N, N, -	I(DstMem | SrcImmByte,				em_bt), -	I(DstMem | SrcImmByte | Lock | PageTable,	em_bts), -	I(DstMem | SrcImmByte | Lock,			em_btr), -	I(DstMem | SrcImmByte | Lock | PageTable,	em_btc), +	F(DstMem | SrcImmByte | NoWrite,		em_bt), +	F(DstMem | SrcImmByte | Lock | PageTable,	em_bts), +	F(DstMem | SrcImmByte | Lock,			em_btr), +	F(DstMem | SrcImmByte | Lock | PageTable,	em_btc),  };  static const struct group_dual group9 = { { @@ -3707,33 +3768,96 @@ static const struct gprefix pfx_vmovntpx = {  	I(0, em_mov), N, N, N,  }; +static const struct escape escape_d9 = { { +	N, N, N, N, N, N, N, I(DstMem, em_fnstcw), +}, { +	/* 0xC0 - 0xC7 */ +	N, N, N, N, N, N, N, N, +	/* 0xC8 - 0xCF */ +	N, N, N, N, N, N, N, N, +	/* 0xD0 - 0xC7 */ +	N, N, N, N, N, N, N, N, +	/* 0xD8 - 0xDF */ +	N, N, N, N, N, N, N, N, +	/* 0xE0 - 0xE7 */ +	N, N, N, N, N, N, N, N, +	/* 0xE8 - 0xEF */ +	N, N, N, N, N, N, N, N, +	/* 0xF0 - 0xF7 */ +	N, N, N, N, N, N, N, N, +	/* 0xF8 - 0xFF */ +	N, N, N, N, N, N, N, N, +} }; + +static const struct escape escape_db = { { +	N, N, N, N, N, N, N, N, +}, { +	/* 0xC0 - 0xC7 */ +	N, N, N, N, N, N, N, N, +	/* 0xC8 - 0xCF */ +	N, N, N, N, N, N, N, N, +	/* 0xD0 - 0xC7 */ +	N, N, N, N, N, N, N, N, +	/* 0xD8 - 0xDF */ +	N, N, N, N, N, N, N, N, +	/* 0xE0 - 0xE7 */ +	N, N, N, I(ImplicitOps, em_fninit), N, N, N, N, +	/* 0xE8 - 0xEF */ +	N, N, N, N, N, N, N, N, +	/* 0xF0 - 0xF7 */ +	N, N, N, N, N, N, N, N, +	/* 0xF8 - 0xFF */ +	N, N, N, N, N, N, N, N, +} }; + +static const struct escape escape_dd = { { +	N, N, N, N, N, N, N, I(DstMem, em_fnstsw), +}, { +	/* 0xC0 - 0xC7 */ +	N, N, N, N, N, N, N, N, +	/* 0xC8 - 0xCF */ +	N, N, N, N, N, N, N, N, +	/* 0xD0 - 0xC7 */ +	N, N, N, N, N, N, N, N, +	/* 0xD8 - 0xDF */ +	N, N, N, N, N, N, N, N, +	/* 0xE0 - 0xE7 */ +	N, N, N, N, N, N, N, N, +	/* 0xE8 - 0xEF */ +	N, N, N, N, N, N, N, N, +	/* 0xF0 - 0xF7 */ +	N, N, N, N, N, N, N, N, +	/* 0xF8 - 0xFF */ +	N, N, N, N, N, N, N, N, +} }; +  static const struct opcode opcode_table[256] = {  	/* 0x00 - 0x07 */ -	I6ALU(Lock, em_add), +	F6ALU(Lock, em_add),  	I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg),  	I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg),  	/* 0x08 - 0x0F */ -	I6ALU(Lock | PageTable, em_or), +	F6ALU(Lock | PageTable, em_or),  	I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg),  	N,  	/* 0x10 - 0x17 */ -	I6ALU(Lock, em_adc), +	F6ALU(Lock, em_adc),  	I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg),  	I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg),  	/* 0x18 - 0x1F */ -	I6ALU(Lock, em_sbb), +	F6ALU(Lock, em_sbb),  	I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg),  	I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg),  	/* 0x20 - 0x27 */ -	I6ALU(Lock | PageTable, em_and), N, N, +	F6ALU(Lock | PageTable, em_and), N, N,  	/* 0x28 - 0x2F */ -	I6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das), +	F6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das),  	/* 0x30 - 0x37 */ -	I6ALU(Lock, em_xor), N, N, +	F6ALU(Lock, em_xor), N, N,  	/* 0x38 - 0x3F */ -	I6ALU(0, em_cmp), N, N, +	F6ALU(NoWrite, em_cmp), N, N,  	/* 0x40 - 0x4F */ -	X16(D(DstReg)), +	X8(F(DstReg, em_inc)), X8(F(DstReg, em_dec)),  	/* 0x50 - 0x57 */  	X8(I(SrcReg | Stack, em_push)),  	/* 0x58 - 0x5F */ @@ -3757,7 +3881,7 @@ static const struct opcode opcode_table[256] = {  	G(DstMem | SrcImm, group1),  	G(ByteOp | DstMem | SrcImm | No64, group1),  	G(DstMem | SrcImmByte, group1), -	I2bv(DstMem | SrcReg | ModRM, em_test), +	F2bv(DstMem | SrcReg | ModRM | NoWrite, em_test),  	I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg),  	/* 0x88 - 0x8F */  	I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov), @@ -3777,18 +3901,18 @@ static const struct opcode opcode_table[256] = {  	I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),  	I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),  	I2bv(SrcSI | DstDI | Mov | String, em_mov), -	I2bv(SrcSI | DstDI | String, em_cmp), +	F2bv(SrcSI | DstDI | String | NoWrite, em_cmp),  	/* 0xA8 - 0xAF */ -	I2bv(DstAcc | SrcImm, em_test), +	F2bv(DstAcc | SrcImm | NoWrite, em_test),  	I2bv(SrcAcc | DstDI | Mov | String, em_mov),  	I2bv(SrcSI | DstAcc | Mov | String, em_mov), -	I2bv(SrcAcc | DstDI | String, em_cmp), +	F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp),  	/* 0xB0 - 0xB7 */  	X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),  	/* 0xB8 - 0xBF */ -	X8(I(DstReg | SrcImm | Mov, em_mov)), +	X8(I(DstReg | SrcImm64 | Mov, em_mov)),  	/* 0xC0 - 0xC7 */ -	D2bv(DstMem | SrcImmByte | ModRM), +	G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2),  	I(ImplicitOps | Stack | SrcImmU16, em_ret_near_imm),  	I(ImplicitOps | Stack, em_ret),  	I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg), @@ -3800,10 +3924,11 @@ static const struct opcode opcode_table[256] = {  	D(ImplicitOps), DI(SrcImmByte, intn),  	D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret),  	/* 0xD0 - 0xD7 */ -	D2bv(DstMem | SrcOne | ModRM), D2bv(DstMem | ModRM), -	N, N, N, N, +	G(Src2One | ByteOp, group2), G(Src2One, group2), +	G(Src2CL | ByteOp, group2), G(Src2CL, group2), +	N, I(DstAcc | SrcImmByte | No64, em_aad), N, N,  	/* 0xD8 - 0xDF */ -	N, N, N, N, N, N, N, N, +	N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N,  	/* 0xE0 - 0xE7 */  	X3(I(SrcImmByte, em_loop)),  	I(SrcImmByte, em_jcxz), @@ -3870,28 +3995,29 @@ static const struct opcode twobyte_table[256] = {  	X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),  	/* 0xA0 - 0xA7 */  	I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg), -	II(ImplicitOps, em_cpuid, cpuid), I(DstMem | SrcReg | ModRM | BitOp, em_bt), -	D(DstMem | SrcReg | Src2ImmByte | ModRM), -	D(DstMem | SrcReg | Src2CL | ModRM), N, N, +	II(ImplicitOps, em_cpuid, cpuid), +	F(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt), +	F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld), +	F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,  	/* 0xA8 - 0xAF */  	I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),  	DI(ImplicitOps, rsm), -	I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), -	D(DstMem | SrcReg | Src2ImmByte | ModRM), -	D(DstMem | SrcReg | Src2CL | ModRM), -	D(ModRM), I(DstReg | SrcMem | ModRM, em_imul), +	F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), +	F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd), +	F(DstMem | SrcReg | Src2CL | ModRM, em_shrd), +	D(ModRM), F(DstReg | SrcMem | ModRM, em_imul),  	/* 0xB0 - 0xB7 */  	I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg),  	I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg), -	I(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr), +	F(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr),  	I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg),  	I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg),  	D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),  	/* 0xB8 - 0xBF */  	N, N,  	G(BitOp, group8), -	I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc), -	I(DstReg | SrcMem | ModRM, em_bsf), I(DstReg | SrcMem | ModRM, em_bsr), +	F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc), +	F(DstReg | SrcMem | ModRM, em_bsf), F(DstReg | SrcMem | ModRM, em_bsr),  	D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),  	/* 0xC0 - 0xC7 */  	D2bv(DstMem | SrcReg | ModRM | Lock), @@ -3950,6 +4076,9 @@ static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op,  	case 4:  		op->val = insn_fetch(s32, ctxt);  		break; +	case 8: +		op->val = insn_fetch(s64, ctxt); +		break;  	}  	if (!sign_extension) {  		switch (op->bytes) { @@ -4028,6 +4157,9 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,  	case OpImm:  		rc = decode_imm(ctxt, op, imm_size(ctxt), true);  		break; +	case OpImm64: +		rc = decode_imm(ctxt, op, ctxt->op_bytes, true); +		break;  	case OpMem8:  		ctxt->memop.bytes = 1;  		goto mem_common; @@ -4222,6 +4354,12 @@ done_prefixes:  			case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break;  			}  			break; +		case Escape: +			if (ctxt->modrm > 0xbf) +				opcode = opcode.u.esc->high[ctxt->modrm - 0xc0]; +			else +				opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7]; +			break;  		default:  			return EMULATION_FAILED;  		} @@ -4354,6 +4492,16 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,  		read_mmx_reg(ctxt, &op->mm_val, op->addr.mm);  } +static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)) +{ +	ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF; +	fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE; +	asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n" +	    : "+a"(ctxt->dst.val), "+b"(ctxt->src.val), [flags]"+D"(flags) +	: "c"(ctxt->src2.val), [fastop]"S"(fop)); +	ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); +	return X86EMUL_CONTINUE; +}  int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)  { @@ -4483,6 +4631,13 @@ special_insn:  	}  	if (ctxt->execute) { +		if (ctxt->d & Fastop) { +			void (*fop)(struct fastop *) = (void *)ctxt->execute; +			rc = fastop(ctxt, fop); +			if (rc != X86EMUL_CONTINUE) +				goto done; +			goto writeback; +		}  		rc = ctxt->execute(ctxt);  		if (rc != X86EMUL_CONTINUE)  			goto done; @@ -4493,12 +4648,6 @@ special_insn:  		goto twobyte_insn;  	switch (ctxt->b) { -	case 0x40 ... 0x47: /* inc r16/r32 */ -		emulate_1op(ctxt, "inc"); -		break; -	case 0x48 ... 0x4f: /* dec r16/r32 */ -		emulate_1op(ctxt, "dec"); -		break;  	case 0x63:		/* movsxd */  		if (ctxt->mode != X86EMUL_MODE_PROT64)  			goto cannot_emulate; @@ -4523,9 +4672,6 @@ special_insn:  		case 8: ctxt->dst.val = (s32)ctxt->dst.val; break;  		}  		break; -	case 0xc0 ... 0xc1: -		rc = em_grp2(ctxt); -		break;  	case 0xcc:		/* int3 */  		rc = emulate_int(ctxt, 3);  		break; @@ -4536,13 +4682,6 @@ special_insn:  		if (ctxt->eflags & EFLG_OF)  			rc = emulate_int(ctxt, 4);  		break; -	case 0xd0 ... 0xd1:	/* Grp2 */ -		rc = em_grp2(ctxt); -		break; -	case 0xd2 ... 0xd3:	/* Grp2 */ -		ctxt->src.val = reg_read(ctxt, VCPU_REGS_RCX); -		rc = em_grp2(ctxt); -		break;  	case 0xe9: /* jmp rel */  	case 0xeb: /* jmp rel short */  		jmp_rel(ctxt, ctxt->src.val); @@ -4661,14 +4800,6 @@ twobyte_insn:  	case 0x90 ... 0x9f:     /* setcc r/m8 */  		ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);  		break; -	case 0xa4: /* shld imm8, r, r/m */ -	case 0xa5: /* shld cl, r, r/m */ -		emulate_2op_cl(ctxt, "shld"); -		break; -	case 0xac: /* shrd imm8, r, r/m */ -	case 0xad: /* shrd cl, r, r/m */ -		emulate_2op_cl(ctxt, "shrd"); -		break;  	case 0xae:              /* clflush */  		break;  	case 0xb6 ... 0xb7:	/* movzx */ @@ -4682,7 +4813,7 @@ twobyte_insn:  							(s16) ctxt->src.val;  		break;  	case 0xc0 ... 0xc1:	/* xadd */ -		emulate_2op_SrcV(ctxt, "add"); +		fastop(ctxt, em_add);  		/* Write back the register source. */  		ctxt->src.val = ctxt->dst.orig_val;  		write_register_operand(&ctxt->src);  |