1 // SPDX-License-Identifier: GPL-2.0-only
2 /******************************************************************************
5 * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
7 * Copyright (c) 2005 Keir Fraser
9 * Linux coding style, mod r/m decoder, segment base fixes, real-mode
10 * privileged instructions:
12 * Copyright (C) 2006 Qumranet
13 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
15 * Avi Kivity <avi@qumranet.com>
16 * Yaniv Kamay <yaniv@qumranet.com>
18 * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
21 #include <linux/kvm_host.h>
22 #include "kvm_cache_regs.h"
23 #include "kvm_emulate.h"
24 #include <linux/stringify.h>
25 #include <asm/debugreg.h>
26 #include <asm/nospec-branch.h>
38 #define OpImplicit 1ull /* No generic decode */
39 #define OpReg 2ull /* Register */
40 #define OpMem 3ull /* Memory */
41 #define OpAcc 4ull /* Accumulator: AL/AX/EAX/RAX */
42 #define OpDI 5ull /* ES:DI/EDI/RDI */
43 #define OpMem64 6ull /* Memory, 64-bit */
44 #define OpImmUByte 7ull /* Zero-extended 8-bit immediate */
45 #define OpDX 8ull /* DX register */
46 #define OpCL 9ull /* CL register (for shifts) */
47 #define OpImmByte 10ull /* 8-bit sign extended immediate */
48 #define OpOne 11ull /* Implied 1 */
49 #define OpImm 12ull /* Sign extended up to 32-bit immediate */
50 #define OpMem16 13ull /* Memory operand (16-bit). */
51 #define OpMem32 14ull /* Memory operand (32-bit). */
52 #define OpImmU 15ull /* Immediate operand, zero extended */
53 #define OpSI 16ull /* SI/ESI/RSI */
54 #define OpImmFAddr 17ull /* Immediate far address */
55 #define OpMemFAddr 18ull /* Far address in memory */
56 #define OpImmU16 19ull /* Immediate operand, 16 bits, zero extended */
57 #define OpES 20ull /* ES */
58 #define OpCS 21ull /* CS */
59 #define OpSS 22ull /* SS */
60 #define OpDS 23ull /* DS */
61 #define OpFS 24ull /* FS */
62 #define OpGS 25ull /* GS */
63 #define OpMem8 26ull /* 8-bit zero extended memory operand */
64 #define OpImm64 27ull /* Sign extended 16/32/64-bit immediate */
65 #define OpXLat 28ull /* memory at BX/EBX/RBX + zero-extended AL */
66 #define OpAccLo 29ull /* Low part of extended acc (AX/AX/EAX/RAX) */
67 #define OpAccHi 30ull /* High part of extended acc (-/DX/EDX/RDX) */
69 #define OpBits 5 /* Width of operand field */
70 #define OpMask ((1ull << OpBits) - 1)
73 * Opcode effective-address decode tables.
74 * Note that we only emulate instructions that have at least one memory
75 * operand (excluding implicit stack references). We assume that stack
76 * references and instruction fetches will never occur in special memory
77 * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
81 /* Operand sizes: 8-bit operands or specified/overridden size. */
82 #define ByteOp (1<<0) /* 8-bit operands. */
83 /* Destination operand type. */
85 #define ImplicitOps (OpImplicit << DstShift)
86 #define DstReg (OpReg << DstShift)
87 #define DstMem (OpMem << DstShift)
88 #define DstAcc (OpAcc << DstShift)
89 #define DstDI (OpDI << DstShift)
90 #define DstMem64 (OpMem64 << DstShift)
91 #define DstMem16 (OpMem16 << DstShift)
92 #define DstImmUByte (OpImmUByte << DstShift)
93 #define DstDX (OpDX << DstShift)
94 #define DstAccLo (OpAccLo << DstShift)
95 #define DstMask (OpMask << DstShift)
96 /* Source operand type. */
98 #define SrcNone (OpNone << SrcShift)
99 #define SrcReg (OpReg << SrcShift)
100 #define SrcMem (OpMem << SrcShift)
101 #define SrcMem16 (OpMem16 << SrcShift)
102 #define SrcMem32 (OpMem32 << SrcShift)
103 #define SrcImm (OpImm << SrcShift)
104 #define SrcImmByte (OpImmByte << SrcShift)
105 #define SrcOne (OpOne << SrcShift)
106 #define SrcImmUByte (OpImmUByte << SrcShift)
107 #define SrcImmU (OpImmU << SrcShift)
108 #define SrcSI (OpSI << SrcShift)
109 #define SrcXLat (OpXLat << SrcShift)
110 #define SrcImmFAddr (OpImmFAddr << SrcShift)
111 #define SrcMemFAddr (OpMemFAddr << SrcShift)
112 #define SrcAcc (OpAcc << SrcShift)
113 #define SrcImmU16 (OpImmU16 << SrcShift)
114 #define SrcImm64 (OpImm64 << SrcShift)
115 #define SrcDX (OpDX << SrcShift)
116 #define SrcMem8 (OpMem8 << SrcShift)
117 #define SrcAccHi (OpAccHi << SrcShift)
118 #define SrcMask (OpMask << SrcShift)
119 #define BitOp (1<<11)
120 #define MemAbs (1<<12) /* Memory operand is absolute displacement */
121 #define String (1<<13) /* String instruction (rep capable) */
122 #define Stack (1<<14) /* Stack instruction (push/pop) */
123 #define GroupMask (7<<15) /* Opcode uses one of the group mechanisms */
124 #define Group (1<<15) /* Bits 3:5 of modrm byte extend opcode */
125 #define GroupDual (2<<15) /* Alternate decoding of mod == 3 */
126 #define Prefix (3<<15) /* Instruction varies with 66/f2/f3 prefix */
127 #define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */
128 #define Escape (5<<15) /* Escape to coprocessor instruction */
129 #define InstrDual (6<<15) /* Alternate instruction decoding of mod == 3 */
130 #define ModeDual (7<<15) /* Different instruction for 32/64 bit */
131 #define Sse (1<<18) /* SSE Vector instruction */
132 /* Generic ModRM decode. */
133 #define ModRM (1<<19)
134 /* Destination is only written; never read. */
137 #define Prot (1<<21) /* instruction generates #UD if not in prot-mode */
138 #define EmulateOnUD (1<<22) /* Emulate if unsupported by the host */
139 #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
140 #define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */
141 #define Undefined (1<<25) /* No Such Instruction */
142 #define Lock (1<<26) /* lock prefix is allowed for the instruction */
143 #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */
145 #define PageTable (1 << 29) /* instruction used to write page table */
146 #define NotImpl (1 << 30) /* instruction is not implemented */
147 /* Source 2 operand type */
148 #define Src2Shift (31)
149 #define Src2None (OpNone << Src2Shift)
150 #define Src2Mem (OpMem << Src2Shift)
151 #define Src2CL (OpCL << Src2Shift)
152 #define Src2ImmByte (OpImmByte << Src2Shift)
153 #define Src2One (OpOne << Src2Shift)
154 #define Src2Imm (OpImm << Src2Shift)
155 #define Src2ES (OpES << Src2Shift)
156 #define Src2CS (OpCS << Src2Shift)
157 #define Src2SS (OpSS << Src2Shift)
158 #define Src2DS (OpDS << Src2Shift)
159 #define Src2FS (OpFS << Src2Shift)
160 #define Src2GS (OpGS << Src2Shift)
161 #define Src2Mask (OpMask << Src2Shift)
162 #define Mmx ((u64)1 << 40) /* MMX Vector instruction */
163 #define AlignMask ((u64)7 << 41)
164 #define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */
165 #define Unaligned ((u64)2 << 41) /* Explicitly unaligned (e.g. MOVDQU) */
166 #define Avx ((u64)3 << 41) /* Advanced Vector Extensions */
167 #define Aligned16 ((u64)4 << 41) /* Aligned to 16 byte boundary (e.g. FXSAVE) */
168 #define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */
169 #define NoWrite ((u64)1 << 45) /* No writeback */
170 #define SrcWrite ((u64)1 << 46) /* Write back src operand */
171 #define NoMod ((u64)1 << 47) /* Mod field is ignored */
172 #define Intercept ((u64)1 << 48) /* Has valid intercept field */
173 #define CheckPerm ((u64)1 << 49) /* Has valid check_perm field */
174 #define PrivUD ((u64)1 << 51) /* #UD instead of #GP on CPL > 0 */
175 #define NearBranch ((u64)1 << 52) /* Near branches */
176 #define No16 ((u64)1 << 53) /* No 16 bit operand */
177 #define IncSP ((u64)1 << 54) /* SP is incremented before ModRM calc */
178 #define TwoMemOp ((u64)1 << 55) /* Instruction has two memory operand */
179 #define IsBranch ((u64)1 << 56) /* Instruction is considered a branch. */
181 #define DstXacc (DstAccLo | SrcAccHi | SrcWrite)
183 #define X2(x...) x, x
184 #define X3(x...) X2(x), x
185 #define X4(x...) X2(x), X2(x)
186 #define X5(x...) X4(x), x
187 #define X6(x...) X4(x), X2(x)
188 #define X7(x...) X4(x), X3(x)
189 #define X8(x...) X4(x), X4(x)
190 #define X16(x...) X8(x), X8(x)
192 #define NR_FASTOP (ilog2(sizeof(ulong)) + 1)
193 #define FASTOP_SIZE (8 * (1 + HAS_KERNEL_IBT))
200 int (*execute
)(struct x86_emulate_ctxt
*ctxt
);
201 const struct opcode
*group
;
202 const struct group_dual
*gdual
;
203 const struct gprefix
*gprefix
;
204 const struct escape
*esc
;
205 const struct instr_dual
*idual
;
206 const struct mode_dual
*mdual
;
207 void (*fastop
)(struct fastop
*fake
);
209 int (*check_perm
)(struct x86_emulate_ctxt
*ctxt
);
213 struct opcode mod012
[8];
214 struct opcode mod3
[8];
218 struct opcode pfx_no
;
219 struct opcode pfx_66
;
220 struct opcode pfx_f2
;
221 struct opcode pfx_f3
;
226 struct opcode high
[64];
230 struct opcode mod012
;
235 struct opcode mode32
;
236 struct opcode mode64
;
239 #define EFLG_RESERVED_ZEROS_MASK 0xffc0802a
241 enum x86_transfer_type
{
243 X86_TRANSFER_CALL_JMP
,
245 X86_TRANSFER_TASK_SWITCH
,
248 static ulong
reg_read(struct x86_emulate_ctxt
*ctxt
, unsigned nr
)
250 if (!(ctxt
->regs_valid
& (1 << nr
))) {
251 ctxt
->regs_valid
|= 1 << nr
;
252 ctxt
->_regs
[nr
] = ctxt
->ops
->read_gpr(ctxt
, nr
);
254 return ctxt
->_regs
[nr
];
257 static ulong
*reg_write(struct x86_emulate_ctxt
*ctxt
, unsigned nr
)
259 ctxt
->regs_valid
|= 1 << nr
;
260 ctxt
->regs_dirty
|= 1 << nr
;
261 return &ctxt
->_regs
[nr
];
264 static ulong
*reg_rmw(struct x86_emulate_ctxt
*ctxt
, unsigned nr
)
267 return reg_write(ctxt
, nr
);
270 static void writeback_registers(struct x86_emulate_ctxt
*ctxt
)
274 for_each_set_bit(reg
, (ulong
*)&ctxt
->regs_dirty
, 16)
275 ctxt
->ops
->write_gpr(ctxt
, reg
, ctxt
->_regs
[reg
]);
278 static void invalidate_registers(struct x86_emulate_ctxt
*ctxt
)
280 ctxt
->regs_dirty
= 0;
281 ctxt
->regs_valid
= 0;
285 * These EFLAGS bits are restored from saved value during emulation, and
286 * any changes are written back to the saved value after emulation.
288 #define EFLAGS_MASK (X86_EFLAGS_OF|X86_EFLAGS_SF|X86_EFLAGS_ZF|X86_EFLAGS_AF|\
289 X86_EFLAGS_PF|X86_EFLAGS_CF)
298 * fastop functions have a special calling convention:
303 * flags: rflags (in/out)
304 * ex: rsi (in:fastop pointer, out:zero if exception)
306 * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
307 * different operand sizes can be reached by calculation, rather than a jump
308 * table (which would be bigger than the code).
310 static int fastop(struct x86_emulate_ctxt
*ctxt
, fastop_t fop
);
312 #define __FOP_FUNC(name) \
313 ".align " __stringify(FASTOP_SIZE) " \n\t" \
314 ".type " name ", @function \n\t" \
318 #define FOP_FUNC(name) \
321 #define __FOP_RET(name) \
323 ".size " name ", .-" name "\n\t"
325 #define FOP_RET(name) \
328 #define FOP_START(op) \
329 extern void em_##op(struct fastop *fake); \
330 asm(".pushsection .text, \"ax\" \n\t" \
331 ".global em_" #op " \n\t" \
332 ".align " __stringify(FASTOP_SIZE) " \n\t" \
338 #define __FOPNOP(name) \
343 __FOPNOP(__stringify(__UNIQUE_ID(nop)))
345 #define FOP1E(op, dst) \
346 __FOP_FUNC(#op "_" #dst) \
347 "10: " #op " %" #dst " \n\t" \
348 __FOP_RET(#op "_" #dst)
350 #define FOP1EEX(op, dst) \
351 FOP1E(op, dst) _ASM_EXTABLE_TYPE_REG(10b, 11b, EX_TYPE_ZERO_REG, %%esi)
353 #define FASTOP1(op) \
358 ON64(FOP1E(op##q, rax)) \
361 /* 1-operand, using src2 (for MUL/DIV r/m) */
362 #define FASTOP1SRC2(op, name) \
367 ON64(FOP1E(op, rcx)) \
370 /* 1-operand, using src2 (for MUL/DIV r/m), with exceptions */
371 #define FASTOP1SRC2EX(op, name) \
376 ON64(FOP1EEX(op, rcx)) \
379 #define FOP2E(op, dst, src) \
380 __FOP_FUNC(#op "_" #dst "_" #src) \
381 #op " %" #src ", %" #dst " \n\t" \
382 __FOP_RET(#op "_" #dst "_" #src)
384 #define FASTOP2(op) \
386 FOP2E(op##b, al, dl) \
387 FOP2E(op##w, ax, dx) \
388 FOP2E(op##l, eax, edx) \
389 ON64(FOP2E(op##q, rax, rdx)) \
392 /* 2 operand, word only */
393 #define FASTOP2W(op) \
396 FOP2E(op##w, ax, dx) \
397 FOP2E(op##l, eax, edx) \
398 ON64(FOP2E(op##q, rax, rdx)) \
401 /* 2 operand, src is CL */
402 #define FASTOP2CL(op) \
404 FOP2E(op##b, al, cl) \
405 FOP2E(op##w, ax, cl) \
406 FOP2E(op##l, eax, cl) \
407 ON64(FOP2E(op##q, rax, cl)) \
410 /* 2 operand, src and dest are reversed */
411 #define FASTOP2R(op, name) \
413 FOP2E(op##b, dl, al) \
414 FOP2E(op##w, dx, ax) \
415 FOP2E(op##l, edx, eax) \
416 ON64(FOP2E(op##q, rdx, rax)) \
419 #define FOP3E(op, dst, src, src2) \
420 __FOP_FUNC(#op "_" #dst "_" #src "_" #src2) \
421 #op " %" #src2 ", %" #src ", %" #dst " \n\t"\
422 __FOP_RET(#op "_" #dst "_" #src "_" #src2)
424 /* 3-operand, word-only, src2=cl */
425 #define FASTOP3WCL(op) \
428 FOP3E(op##w, ax, dx, cl) \
429 FOP3E(op##l, eax, edx, cl) \
430 ON64(FOP3E(op##q, rax, rdx, cl)) \
433 /* Special case for SETcc - 1 instruction per cc */
436 * Depending on .config the SETcc functions look like:
438 * ENDBR [4 bytes; CONFIG_X86_KERNEL_IBT]
439 * SETcc %al [3 bytes]
441 * INT3 [1 byte; CONFIG_SLS]
443 * Which gives possible sizes 4, 5, 8 or 9. When rounded up to the
444 * next power-of-two alignment they become 4, 8 or 16 resp.
446 #define SETCC_LENGTH (ENDBR_INSN_SIZE + 4 + IS_ENABLED(CONFIG_SLS))
447 #define SETCC_ALIGN (4 << IS_ENABLED(CONFIG_SLS) << HAS_KERNEL_IBT)
448 static_assert(SETCC_LENGTH
<= SETCC_ALIGN
);
450 #define FOP_SETCC(op) \
451 ".align " __stringify(SETCC_ALIGN) " \n\t" \
452 ".type " #op ", @function \n\t" \
479 "pushf; sbb %al, %al; popf \n\t"
484 * XXX: inoutclob user must know where the argument is being expanded.
485 * Relying on CONFIG_CC_HAS_ASM_GOTO would allow us to remove _fault.
487 #define asm_safe(insn, inoutclob...) \
491 asm volatile("1:" insn "\n" \
493 _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_ONE_REG, %[_fault]) \
494 : [_fault] "+r"(_fault) inoutclob ); \
496 _fault ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE; \
499 static int emulator_check_intercept(struct x86_emulate_ctxt
*ctxt
,
500 enum x86_intercept intercept
,
501 enum x86_intercept_stage stage
)
503 struct x86_instruction_info info
= {
504 .intercept
= intercept
,
505 .rep_prefix
= ctxt
->rep_prefix
,
506 .modrm_mod
= ctxt
->modrm_mod
,
507 .modrm_reg
= ctxt
->modrm_reg
,
508 .modrm_rm
= ctxt
->modrm_rm
,
509 .src_val
= ctxt
->src
.val64
,
510 .dst_val
= ctxt
->dst
.val64
,
511 .src_bytes
= ctxt
->src
.bytes
,
512 .dst_bytes
= ctxt
->dst
.bytes
,
513 .ad_bytes
= ctxt
->ad_bytes
,
514 .next_rip
= ctxt
->eip
,
517 return ctxt
->ops
->intercept(ctxt
, &info
, stage
);
520 static void assign_masked(ulong
*dest
, ulong src
, ulong mask
)
522 *dest
= (*dest
& ~mask
) | (src
& mask
);
525 static void assign_register(unsigned long *reg
, u64 val
, int bytes
)
527 /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
530 *(u8
*)reg
= (u8
)val
;
533 *(u16
*)reg
= (u16
)val
;
537 break; /* 64b: zero-extend */
544 static inline unsigned long ad_mask(struct x86_emulate_ctxt
*ctxt
)
546 return (1UL << (ctxt
->ad_bytes
<< 3)) - 1;
549 static ulong
stack_mask(struct x86_emulate_ctxt
*ctxt
)
552 struct desc_struct ss
;
554 if (ctxt
->mode
== X86EMUL_MODE_PROT64
)
556 ctxt
->ops
->get_segment(ctxt
, &sel
, &ss
, NULL
, VCPU_SREG_SS
);
557 return ~0U >> ((ss
.d
^ 1) * 16); /* d=0: 0xffff; d=1: 0xffffffff */
560 static int stack_size(struct x86_emulate_ctxt
*ctxt
)
562 return (__fls(stack_mask(ctxt
)) + 1) >> 3;
565 /* Access/update address held in a register, based on addressing mode. */
566 static inline unsigned long
567 address_mask(struct x86_emulate_ctxt
*ctxt
, unsigned long reg
)
569 if (ctxt
->ad_bytes
== sizeof(unsigned long))
572 return reg
& ad_mask(ctxt
);
575 static inline unsigned long
576 register_address(struct x86_emulate_ctxt
*ctxt
, int reg
)
578 return address_mask(ctxt
, reg_read(ctxt
, reg
));
581 static void masked_increment(ulong
*reg
, ulong mask
, int inc
)
583 assign_masked(reg
, *reg
+ inc
, mask
);
587 register_address_increment(struct x86_emulate_ctxt
*ctxt
, int reg
, int inc
)
589 ulong
*preg
= reg_rmw(ctxt
, reg
);
591 assign_register(preg
, *preg
+ inc
, ctxt
->ad_bytes
);
594 static void rsp_increment(struct x86_emulate_ctxt
*ctxt
, int inc
)
596 masked_increment(reg_rmw(ctxt
, VCPU_REGS_RSP
), stack_mask(ctxt
), inc
);
599 static u32
desc_limit_scaled(struct desc_struct
*desc
)
601 u32 limit
= get_desc_limit(desc
);
603 return desc
->g
? (limit
<< 12) | 0xfff : limit
;
606 static unsigned long seg_base(struct x86_emulate_ctxt
*ctxt
, int seg
)
608 if (ctxt
->mode
== X86EMUL_MODE_PROT64
&& seg
< VCPU_SREG_FS
)
611 return ctxt
->ops
->get_cached_segment_base(ctxt
, seg
);
614 static int emulate_exception(struct x86_emulate_ctxt
*ctxt
, int vec
,
615 u32 error
, bool valid
)
618 ctxt
->exception
.vector
= vec
;
619 ctxt
->exception
.error_code
= error
;
620 ctxt
->exception
.error_code_valid
= valid
;
621 return X86EMUL_PROPAGATE_FAULT
;
624 static int emulate_db(struct x86_emulate_ctxt
*ctxt
)
626 return emulate_exception(ctxt
, DB_VECTOR
, 0, false);
629 static int emulate_gp(struct x86_emulate_ctxt
*ctxt
, int err
)
631 return emulate_exception(ctxt
, GP_VECTOR
, err
, true);
634 static int emulate_ss(struct x86_emulate_ctxt
*ctxt
, int err
)
636 return emulate_exception(ctxt
, SS_VECTOR
, err
, true);
639 static int emulate_ud(struct x86_emulate_ctxt
*ctxt
)
641 return emulate_exception(ctxt
, UD_VECTOR
, 0, false);
644 static int emulate_ts(struct x86_emulate_ctxt
*ctxt
, int err
)
646 return emulate_exception(ctxt
, TS_VECTOR
, err
, true);
649 static int emulate_de(struct x86_emulate_ctxt
*ctxt
)
651 return emulate_exception(ctxt
, DE_VECTOR
, 0, false);
654 static int emulate_nm(struct x86_emulate_ctxt
*ctxt
)
656 return emulate_exception(ctxt
, NM_VECTOR
, 0, false);
659 static u16
get_segment_selector(struct x86_emulate_ctxt
*ctxt
, unsigned seg
)
662 struct desc_struct desc
;
664 ctxt
->ops
->get_segment(ctxt
, &selector
, &desc
, NULL
, seg
);
668 static void set_segment_selector(struct x86_emulate_ctxt
*ctxt
, u16 selector
,
673 struct desc_struct desc
;
675 ctxt
->ops
->get_segment(ctxt
, &dummy
, &desc
, &base3
, seg
);
676 ctxt
->ops
->set_segment(ctxt
, selector
, &desc
, base3
, seg
);
679 static inline u8
ctxt_virt_addr_bits(struct x86_emulate_ctxt
*ctxt
)
681 return (ctxt
->ops
->get_cr(ctxt
, 4) & X86_CR4_LA57
) ? 57 : 48;
684 static inline bool emul_is_noncanonical_address(u64 la
,
685 struct x86_emulate_ctxt
*ctxt
)
687 return !__is_canonical_address(la
, ctxt_virt_addr_bits(ctxt
));
691 * x86 defines three classes of vector instructions: explicitly
692 * aligned, explicitly unaligned, and the rest, which change behaviour
693 * depending on whether they're AVX encoded or not.
695 * Also included is CMPXCHG16B which is not a vector instruction, yet it is
696 * subject to the same check. FXSAVE and FXRSTOR are checked here too as their
697 * 512 bytes of data must be aligned to a 16 byte boundary.
699 static unsigned insn_alignment(struct x86_emulate_ctxt
*ctxt
, unsigned size
)
701 u64 alignment
= ctxt
->d
& AlignMask
;
703 if (likely(size
< 16))
718 static __always_inline
int __linearize(struct x86_emulate_ctxt
*ctxt
,
719 struct segmented_address addr
,
720 unsigned *max_size
, unsigned size
,
721 bool write
, bool fetch
,
722 enum x86emul_mode mode
, ulong
*linear
)
724 struct desc_struct desc
;
731 la
= seg_base(ctxt
, addr
.seg
) + addr
.ea
;
734 case X86EMUL_MODE_PROT64
:
736 va_bits
= ctxt_virt_addr_bits(ctxt
);
737 if (!__is_canonical_address(la
, va_bits
))
740 *max_size
= min_t(u64
, ~0u, (1ull << va_bits
) - la
);
741 if (size
> *max_size
)
745 *linear
= la
= (u32
)la
;
746 usable
= ctxt
->ops
->get_segment(ctxt
, &sel
, &desc
, NULL
,
750 /* code segment in protected mode or read-only data segment */
751 if ((((ctxt
->mode
!= X86EMUL_MODE_REAL
) && (desc
.type
& 8))
752 || !(desc
.type
& 2)) && write
)
754 /* unreadable code segment */
755 if (!fetch
&& (desc
.type
& 8) && !(desc
.type
& 2))
757 lim
= desc_limit_scaled(&desc
);
758 if (!(desc
.type
& 8) && (desc
.type
& 4)) {
759 /* expand-down segment */
762 lim
= desc
.d
? 0xffffffff : 0xffff;
766 if (lim
== 0xffffffff)
769 *max_size
= (u64
)lim
+ 1 - addr
.ea
;
770 if (size
> *max_size
)
775 if (la
& (insn_alignment(ctxt
, size
) - 1))
776 return emulate_gp(ctxt
, 0);
777 return X86EMUL_CONTINUE
;
779 if (addr
.seg
== VCPU_SREG_SS
)
780 return emulate_ss(ctxt
, 0);
782 return emulate_gp(ctxt
, 0);
785 static int linearize(struct x86_emulate_ctxt
*ctxt
,
786 struct segmented_address addr
,
787 unsigned size
, bool write
,
791 return __linearize(ctxt
, addr
, &max_size
, size
, write
, false,
795 static inline int assign_eip(struct x86_emulate_ctxt
*ctxt
, ulong dst
,
796 enum x86emul_mode mode
)
801 struct segmented_address addr
= { .seg
= VCPU_SREG_CS
,
804 if (ctxt
->op_bytes
!= sizeof(unsigned long))
805 addr
.ea
= dst
& ((1UL << (ctxt
->op_bytes
<< 3)) - 1);
806 rc
= __linearize(ctxt
, addr
, &max_size
, 1, false, true, mode
, &linear
);
807 if (rc
== X86EMUL_CONTINUE
)
808 ctxt
->_eip
= addr
.ea
;
812 static inline int assign_eip_near(struct x86_emulate_ctxt
*ctxt
, ulong dst
)
814 return assign_eip(ctxt
, dst
, ctxt
->mode
);
817 static int assign_eip_far(struct x86_emulate_ctxt
*ctxt
, ulong dst
,
818 const struct desc_struct
*cs_desc
)
820 enum x86emul_mode mode
= ctxt
->mode
;
824 if (ctxt
->mode
>= X86EMUL_MODE_PROT16
) {
828 ctxt
->ops
->get_msr(ctxt
, MSR_EFER
, &efer
);
830 mode
= X86EMUL_MODE_PROT64
;
832 mode
= X86EMUL_MODE_PROT32
; /* temporary value */
835 if (mode
== X86EMUL_MODE_PROT16
|| mode
== X86EMUL_MODE_PROT32
)
836 mode
= cs_desc
->d
? X86EMUL_MODE_PROT32
: X86EMUL_MODE_PROT16
;
837 rc
= assign_eip(ctxt
, dst
, mode
);
838 if (rc
== X86EMUL_CONTINUE
)
843 static inline int jmp_rel(struct x86_emulate_ctxt
*ctxt
, int rel
)
845 return assign_eip_near(ctxt
, ctxt
->_eip
+ rel
);
848 static int linear_read_system(struct x86_emulate_ctxt
*ctxt
, ulong linear
,
849 void *data
, unsigned size
)
851 return ctxt
->ops
->read_std(ctxt
, linear
, data
, size
, &ctxt
->exception
, true);
854 static int linear_write_system(struct x86_emulate_ctxt
*ctxt
,
855 ulong linear
, void *data
,
858 return ctxt
->ops
->write_std(ctxt
, linear
, data
, size
, &ctxt
->exception
, true);
861 static int segmented_read_std(struct x86_emulate_ctxt
*ctxt
,
862 struct segmented_address addr
,
869 rc
= linearize(ctxt
, addr
, size
, false, &linear
);
870 if (rc
!= X86EMUL_CONTINUE
)
872 return ctxt
->ops
->read_std(ctxt
, linear
, data
, size
, &ctxt
->exception
, false);
875 static int segmented_write_std(struct x86_emulate_ctxt
*ctxt
,
876 struct segmented_address addr
,
883 rc
= linearize(ctxt
, addr
, size
, true, &linear
);
884 if (rc
!= X86EMUL_CONTINUE
)
886 return ctxt
->ops
->write_std(ctxt
, linear
, data
, size
, &ctxt
->exception
, false);
890 * Prefetch the remaining bytes of the instruction without crossing page
891 * boundary if they are not in fetch_cache yet.
893 static int __do_insn_fetch_bytes(struct x86_emulate_ctxt
*ctxt
, int op_size
)
896 unsigned size
, max_size
;
897 unsigned long linear
;
898 int cur_size
= ctxt
->fetch
.end
- ctxt
->fetch
.data
;
899 struct segmented_address addr
= { .seg
= VCPU_SREG_CS
,
900 .ea
= ctxt
->eip
+ cur_size
};
903 * We do not know exactly how many bytes will be needed, and
904 * __linearize is expensive, so fetch as much as possible. We
905 * just have to avoid going beyond the 15 byte limit, the end
906 * of the segment, or the end of the page.
908 * __linearize is called with size 0 so that it does not do any
909 * boundary check itself. Instead, we use max_size to check
912 rc
= __linearize(ctxt
, addr
, &max_size
, 0, false, true, ctxt
->mode
,
914 if (unlikely(rc
!= X86EMUL_CONTINUE
))
917 size
= min_t(unsigned, 15UL ^ cur_size
, max_size
);
918 size
= min_t(unsigned, size
, PAGE_SIZE
- offset_in_page(linear
));
921 * One instruction can only straddle two pages,
922 * and one has been loaded at the beginning of
923 * x86_decode_insn. So, if not enough bytes
924 * still, we must have hit the 15-byte boundary.
926 if (unlikely(size
< op_size
))
927 return emulate_gp(ctxt
, 0);
929 rc
= ctxt
->ops
->fetch(ctxt
, linear
, ctxt
->fetch
.end
,
930 size
, &ctxt
->exception
);
931 if (unlikely(rc
!= X86EMUL_CONTINUE
))
933 ctxt
->fetch
.end
+= size
;
934 return X86EMUL_CONTINUE
;
937 static __always_inline
int do_insn_fetch_bytes(struct x86_emulate_ctxt
*ctxt
,
940 unsigned done_size
= ctxt
->fetch
.end
- ctxt
->fetch
.ptr
;
942 if (unlikely(done_size
< size
))
943 return __do_insn_fetch_bytes(ctxt
, size
- done_size
);
945 return X86EMUL_CONTINUE
;
948 /* Fetch next part of the instruction being emulated. */
949 #define insn_fetch(_type, _ctxt) \
952 rc = do_insn_fetch_bytes(_ctxt, sizeof(_type)); \
953 if (rc != X86EMUL_CONTINUE) \
955 ctxt->_eip += sizeof(_type); \
956 memcpy(&_x, ctxt->fetch.ptr, sizeof(_type)); \
957 ctxt->fetch.ptr += sizeof(_type); \
961 #define insn_fetch_arr(_arr, _size, _ctxt) \
963 rc = do_insn_fetch_bytes(_ctxt, _size); \
964 if (rc != X86EMUL_CONTINUE) \
966 ctxt->_eip += (_size); \
967 memcpy(_arr, ctxt->fetch.ptr, _size); \
968 ctxt->fetch.ptr += (_size); \
972 * Given the 'reg' portion of a ModRM byte, and a register block, return a
973 * pointer into the block that addresses the relevant register.
974 * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
976 static void *decode_register(struct x86_emulate_ctxt
*ctxt
, u8 modrm_reg
,
980 int highbyte_regs
= (ctxt
->rex_prefix
== 0) && byteop
;
982 if (highbyte_regs
&& modrm_reg
>= 4 && modrm_reg
< 8)
983 p
= (unsigned char *)reg_rmw(ctxt
, modrm_reg
& 3) + 1;
985 p
= reg_rmw(ctxt
, modrm_reg
);
989 static int read_descriptor(struct x86_emulate_ctxt
*ctxt
,
990 struct segmented_address addr
,
991 u16
*size
, unsigned long *address
, int op_bytes
)
998 rc
= segmented_read_std(ctxt
, addr
, size
, 2);
999 if (rc
!= X86EMUL_CONTINUE
)
1002 rc
= segmented_read_std(ctxt
, addr
, address
, op_bytes
);
1016 FASTOP1SRC2(mul
, mul_ex
);
1017 FASTOP1SRC2(imul
, imul_ex
);
1018 FASTOP1SRC2EX(div
, div_ex
);
1019 FASTOP1SRC2EX(idiv
, idiv_ex
);
1048 FASTOP2R(cmp
, cmp_r
);
1050 static int em_bsf_c(struct x86_emulate_ctxt
*ctxt
)
1052 /* If src is zero, do not writeback, but update flags */
1053 if (ctxt
->src
.val
== 0)
1054 ctxt
->dst
.type
= OP_NONE
;
1055 return fastop(ctxt
, em_bsf
);
1058 static int em_bsr_c(struct x86_emulate_ctxt
*ctxt
)
1060 /* If src is zero, do not writeback, but update flags */
1061 if (ctxt
->src
.val
== 0)
1062 ctxt
->dst
.type
= OP_NONE
;
1063 return fastop(ctxt
, em_bsr
);
1066 static __always_inline u8
test_cc(unsigned int condition
, unsigned long flags
)
1069 void (*fop
)(void) = (void *)em_setcc
+ SETCC_ALIGN
* (condition
& 0xf);
1071 flags
= (flags
& EFLAGS_MASK
) | X86_EFLAGS_IF
;
1072 asm("push %[flags]; popf; " CALL_NOSPEC
1073 : "=a"(rc
) : [thunk_target
]"r"(fop
), [flags
]"r"(flags
));
1077 static void fetch_register_operand(struct operand
*op
)
1079 switch (op
->bytes
) {
1081 op
->val
= *(u8
*)op
->addr
.reg
;
1084 op
->val
= *(u16
*)op
->addr
.reg
;
1087 op
->val
= *(u32
*)op
->addr
.reg
;
1090 op
->val
= *(u64
*)op
->addr
.reg
;
1095 static int em_fninit(struct x86_emulate_ctxt
*ctxt
)
1097 if (ctxt
->ops
->get_cr(ctxt
, 0) & (X86_CR0_TS
| X86_CR0_EM
))
1098 return emulate_nm(ctxt
);
1101 asm volatile("fninit");
1103 return X86EMUL_CONTINUE
;
1106 static int em_fnstcw(struct x86_emulate_ctxt
*ctxt
)
1110 if (ctxt
->ops
->get_cr(ctxt
, 0) & (X86_CR0_TS
| X86_CR0_EM
))
1111 return emulate_nm(ctxt
);
1114 asm volatile("fnstcw %0": "+m"(fcw
));
1117 ctxt
->dst
.val
= fcw
;
1119 return X86EMUL_CONTINUE
;
1122 static int em_fnstsw(struct x86_emulate_ctxt
*ctxt
)
1126 if (ctxt
->ops
->get_cr(ctxt
, 0) & (X86_CR0_TS
| X86_CR0_EM
))
1127 return emulate_nm(ctxt
);
1130 asm volatile("fnstsw %0": "+m"(fsw
));
1133 ctxt
->dst
.val
= fsw
;
1135 return X86EMUL_CONTINUE
;
1138 static void decode_register_operand(struct x86_emulate_ctxt
*ctxt
,
1141 unsigned reg
= ctxt
->modrm_reg
;
1143 if (!(ctxt
->d
& ModRM
))
1144 reg
= (ctxt
->b
& 7) | ((ctxt
->rex_prefix
& 1) << 3);
1146 if (ctxt
->d
& Sse
) {
1150 kvm_read_sse_reg(reg
, &op
->vec_val
);
1153 if (ctxt
->d
& Mmx
) {
1162 op
->bytes
= (ctxt
->d
& ByteOp
) ? 1 : ctxt
->op_bytes
;
1163 op
->addr
.reg
= decode_register(ctxt
, reg
, ctxt
->d
& ByteOp
);
1165 fetch_register_operand(op
);
1166 op
->orig_val
= op
->val
;
1169 static void adjust_modrm_seg(struct x86_emulate_ctxt
*ctxt
, int base_reg
)
1171 if (base_reg
== VCPU_REGS_RSP
|| base_reg
== VCPU_REGS_RBP
)
1172 ctxt
->modrm_seg
= VCPU_SREG_SS
;
1175 static int decode_modrm(struct x86_emulate_ctxt
*ctxt
,
1179 int index_reg
, base_reg
, scale
;
1180 int rc
= X86EMUL_CONTINUE
;
1183 ctxt
->modrm_reg
= ((ctxt
->rex_prefix
<< 1) & 8); /* REX.R */
1184 index_reg
= (ctxt
->rex_prefix
<< 2) & 8; /* REX.X */
1185 base_reg
= (ctxt
->rex_prefix
<< 3) & 8; /* REX.B */
1187 ctxt
->modrm_mod
= (ctxt
->modrm
& 0xc0) >> 6;
1188 ctxt
->modrm_reg
|= (ctxt
->modrm
& 0x38) >> 3;
1189 ctxt
->modrm_rm
= base_reg
| (ctxt
->modrm
& 0x07);
1190 ctxt
->modrm_seg
= VCPU_SREG_DS
;
1192 if (ctxt
->modrm_mod
== 3 || (ctxt
->d
& NoMod
)) {
1194 op
->bytes
= (ctxt
->d
& ByteOp
) ? 1 : ctxt
->op_bytes
;
1195 op
->addr
.reg
= decode_register(ctxt
, ctxt
->modrm_rm
,
1197 if (ctxt
->d
& Sse
) {
1200 op
->addr
.xmm
= ctxt
->modrm_rm
;
1201 kvm_read_sse_reg(ctxt
->modrm_rm
, &op
->vec_val
);
1204 if (ctxt
->d
& Mmx
) {
1207 op
->addr
.mm
= ctxt
->modrm_rm
& 7;
1210 fetch_register_operand(op
);
1216 if (ctxt
->ad_bytes
== 2) {
1217 unsigned bx
= reg_read(ctxt
, VCPU_REGS_RBX
);
1218 unsigned bp
= reg_read(ctxt
, VCPU_REGS_RBP
);
1219 unsigned si
= reg_read(ctxt
, VCPU_REGS_RSI
);
1220 unsigned di
= reg_read(ctxt
, VCPU_REGS_RDI
);
1222 /* 16-bit ModR/M decode. */
1223 switch (ctxt
->modrm_mod
) {
1225 if (ctxt
->modrm_rm
== 6)
1226 modrm_ea
+= insn_fetch(u16
, ctxt
);
1229 modrm_ea
+= insn_fetch(s8
, ctxt
);
1232 modrm_ea
+= insn_fetch(u16
, ctxt
);
1235 switch (ctxt
->modrm_rm
) {
1237 modrm_ea
+= bx
+ si
;
1240 modrm_ea
+= bx
+ di
;
1243 modrm_ea
+= bp
+ si
;
1246 modrm_ea
+= bp
+ di
;
1255 if (ctxt
->modrm_mod
!= 0)
1262 if (ctxt
->modrm_rm
== 2 || ctxt
->modrm_rm
== 3 ||
1263 (ctxt
->modrm_rm
== 6 && ctxt
->modrm_mod
!= 0))
1264 ctxt
->modrm_seg
= VCPU_SREG_SS
;
1265 modrm_ea
= (u16
)modrm_ea
;
1267 /* 32/64-bit ModR/M decode. */
1268 if ((ctxt
->modrm_rm
& 7) == 4) {
1269 sib
= insn_fetch(u8
, ctxt
);
1270 index_reg
|= (sib
>> 3) & 7;
1271 base_reg
|= sib
& 7;
1274 if ((base_reg
& 7) == 5 && ctxt
->modrm_mod
== 0)
1275 modrm_ea
+= insn_fetch(s32
, ctxt
);
1277 modrm_ea
+= reg_read(ctxt
, base_reg
);
1278 adjust_modrm_seg(ctxt
, base_reg
);
1279 /* Increment ESP on POP [ESP] */
1280 if ((ctxt
->d
& IncSP
) &&
1281 base_reg
== VCPU_REGS_RSP
)
1282 modrm_ea
+= ctxt
->op_bytes
;
1285 modrm_ea
+= reg_read(ctxt
, index_reg
) << scale
;
1286 } else if ((ctxt
->modrm_rm
& 7) == 5 && ctxt
->modrm_mod
== 0) {
1287 modrm_ea
+= insn_fetch(s32
, ctxt
);
1288 if (ctxt
->mode
== X86EMUL_MODE_PROT64
)
1289 ctxt
->rip_relative
= 1;
1291 base_reg
= ctxt
->modrm_rm
;
1292 modrm_ea
+= reg_read(ctxt
, base_reg
);
1293 adjust_modrm_seg(ctxt
, base_reg
);
1295 switch (ctxt
->modrm_mod
) {
1297 modrm_ea
+= insn_fetch(s8
, ctxt
);
1300 modrm_ea
+= insn_fetch(s32
, ctxt
);
1304 op
->addr
.mem
.ea
= modrm_ea
;
1305 if (ctxt
->ad_bytes
!= 8)
1306 ctxt
->memop
.addr
.mem
.ea
= (u32
)ctxt
->memop
.addr
.mem
.ea
;
1312 static int decode_abs(struct x86_emulate_ctxt
*ctxt
,
1315 int rc
= X86EMUL_CONTINUE
;
1318 switch (ctxt
->ad_bytes
) {
1320 op
->addr
.mem
.ea
= insn_fetch(u16
, ctxt
);
1323 op
->addr
.mem
.ea
= insn_fetch(u32
, ctxt
);
1326 op
->addr
.mem
.ea
= insn_fetch(u64
, ctxt
);
1333 static void fetch_bit_operand(struct x86_emulate_ctxt
*ctxt
)
1337 if (ctxt
->dst
.type
== OP_MEM
&& ctxt
->src
.type
== OP_REG
) {
1338 mask
= ~((long)ctxt
->dst
.bytes
* 8 - 1);
1340 if (ctxt
->src
.bytes
== 2)
1341 sv
= (s16
)ctxt
->src
.val
& (s16
)mask
;
1342 else if (ctxt
->src
.bytes
== 4)
1343 sv
= (s32
)ctxt
->src
.val
& (s32
)mask
;
1345 sv
= (s64
)ctxt
->src
.val
& (s64
)mask
;
1347 ctxt
->dst
.addr
.mem
.ea
= address_mask(ctxt
,
1348 ctxt
->dst
.addr
.mem
.ea
+ (sv
>> 3));
1351 /* only subword offset */
1352 ctxt
->src
.val
&= (ctxt
->dst
.bytes
<< 3) - 1;
1355 static int read_emulated(struct x86_emulate_ctxt
*ctxt
,
1356 unsigned long addr
, void *dest
, unsigned size
)
1359 struct read_cache
*mc
= &ctxt
->mem_read
;
1361 if (mc
->pos
< mc
->end
)
1364 WARN_ON((mc
->end
+ size
) >= sizeof(mc
->data
));
1366 rc
= ctxt
->ops
->read_emulated(ctxt
, addr
, mc
->data
+ mc
->end
, size
,
1368 if (rc
!= X86EMUL_CONTINUE
)
1374 memcpy(dest
, mc
->data
+ mc
->pos
, size
);
1376 return X86EMUL_CONTINUE
;
1379 static int segmented_read(struct x86_emulate_ctxt
*ctxt
,
1380 struct segmented_address addr
,
1387 rc
= linearize(ctxt
, addr
, size
, false, &linear
);
1388 if (rc
!= X86EMUL_CONTINUE
)
1390 return read_emulated(ctxt
, linear
, data
, size
);
1393 static int segmented_write(struct x86_emulate_ctxt
*ctxt
,
1394 struct segmented_address addr
,
1401 rc
= linearize(ctxt
, addr
, size
, true, &linear
);
1402 if (rc
!= X86EMUL_CONTINUE
)
1404 return ctxt
->ops
->write_emulated(ctxt
, linear
, data
, size
,
1408 static int segmented_cmpxchg(struct x86_emulate_ctxt
*ctxt
,
1409 struct segmented_address addr
,
1410 const void *orig_data
, const void *data
,
1416 rc
= linearize(ctxt
, addr
, size
, true, &linear
);
1417 if (rc
!= X86EMUL_CONTINUE
)
1419 return ctxt
->ops
->cmpxchg_emulated(ctxt
, linear
, orig_data
, data
,
1420 size
, &ctxt
->exception
);
1423 static int pio_in_emulated(struct x86_emulate_ctxt
*ctxt
,
1424 unsigned int size
, unsigned short port
,
1427 struct read_cache
*rc
= &ctxt
->io_read
;
1429 if (rc
->pos
== rc
->end
) { /* refill pio read ahead */
1430 unsigned int in_page
, n
;
1431 unsigned int count
= ctxt
->rep_prefix
?
1432 address_mask(ctxt
, reg_read(ctxt
, VCPU_REGS_RCX
)) : 1;
1433 in_page
= (ctxt
->eflags
& X86_EFLAGS_DF
) ?
1434 offset_in_page(reg_read(ctxt
, VCPU_REGS_RDI
)) :
1435 PAGE_SIZE
- offset_in_page(reg_read(ctxt
, VCPU_REGS_RDI
));
1436 n
= min3(in_page
, (unsigned int)sizeof(rc
->data
) / size
, count
);
1439 rc
->pos
= rc
->end
= 0;
1440 if (!ctxt
->ops
->pio_in_emulated(ctxt
, size
, port
, rc
->data
, n
))
1445 if (ctxt
->rep_prefix
&& (ctxt
->d
& String
) &&
1446 !(ctxt
->eflags
& X86_EFLAGS_DF
)) {
1447 ctxt
->dst
.data
= rc
->data
+ rc
->pos
;
1448 ctxt
->dst
.type
= OP_MEM_STR
;
1449 ctxt
->dst
.count
= (rc
->end
- rc
->pos
) / size
;
1452 memcpy(dest
, rc
->data
+ rc
->pos
, size
);
1458 static int read_interrupt_descriptor(struct x86_emulate_ctxt
*ctxt
,
1459 u16 index
, struct desc_struct
*desc
)
1464 ctxt
->ops
->get_idt(ctxt
, &dt
);
1466 if (dt
.size
< index
* 8 + 7)
1467 return emulate_gp(ctxt
, index
<< 3 | 0x2);
1469 addr
= dt
.address
+ index
* 8;
1470 return linear_read_system(ctxt
, addr
, desc
, sizeof(*desc
));
1473 static void get_descriptor_table_ptr(struct x86_emulate_ctxt
*ctxt
,
1474 u16 selector
, struct desc_ptr
*dt
)
1476 const struct x86_emulate_ops
*ops
= ctxt
->ops
;
1479 if (selector
& 1 << 2) {
1480 struct desc_struct desc
;
1483 memset(dt
, 0, sizeof(*dt
));
1484 if (!ops
->get_segment(ctxt
, &sel
, &desc
, &base3
,
1488 dt
->size
= desc_limit_scaled(&desc
); /* what if limit > 65535? */
1489 dt
->address
= get_desc_base(&desc
) | ((u64
)base3
<< 32);
1491 ops
->get_gdt(ctxt
, dt
);
1494 static int get_descriptor_ptr(struct x86_emulate_ctxt
*ctxt
,
1495 u16 selector
, ulong
*desc_addr_p
)
1498 u16 index
= selector
>> 3;
1501 get_descriptor_table_ptr(ctxt
, selector
, &dt
);
1503 if (dt
.size
< index
* 8 + 7)
1504 return emulate_gp(ctxt
, selector
& 0xfffc);
1506 addr
= dt
.address
+ index
* 8;
1508 #ifdef CONFIG_X86_64
1509 if (addr
>> 32 != 0) {
1512 ctxt
->ops
->get_msr(ctxt
, MSR_EFER
, &efer
);
1513 if (!(efer
& EFER_LMA
))
1518 *desc_addr_p
= addr
;
1519 return X86EMUL_CONTINUE
;
1522 /* allowed just for 8 bytes segments */
1523 static int read_segment_descriptor(struct x86_emulate_ctxt
*ctxt
,
1524 u16 selector
, struct desc_struct
*desc
,
1529 rc
= get_descriptor_ptr(ctxt
, selector
, desc_addr_p
);
1530 if (rc
!= X86EMUL_CONTINUE
)
1533 return linear_read_system(ctxt
, *desc_addr_p
, desc
, sizeof(*desc
));
1536 /* allowed just for 8 bytes segments */
1537 static int write_segment_descriptor(struct x86_emulate_ctxt
*ctxt
,
1538 u16 selector
, struct desc_struct
*desc
)
1543 rc
= get_descriptor_ptr(ctxt
, selector
, &addr
);
1544 if (rc
!= X86EMUL_CONTINUE
)
1547 return linear_write_system(ctxt
, addr
, desc
, sizeof(*desc
));
1550 static int __load_segment_descriptor(struct x86_emulate_ctxt
*ctxt
,
1551 u16 selector
, int seg
, u8 cpl
,
1552 enum x86_transfer_type transfer
,
1553 struct desc_struct
*desc
)
1555 struct desc_struct seg_desc
, old_desc
;
1557 unsigned err_vec
= GP_VECTOR
;
1559 bool null_selector
= !(selector
& ~0x3); /* 0000-0003 are null */
1565 memset(&seg_desc
, 0, sizeof(seg_desc
));
1567 if (ctxt
->mode
== X86EMUL_MODE_REAL
) {
1568 /* set real mode segment descriptor (keep limit etc. for
1570 ctxt
->ops
->get_segment(ctxt
, &dummy
, &seg_desc
, NULL
, seg
);
1571 set_desc_base(&seg_desc
, selector
<< 4);
1573 } else if (seg
<= VCPU_SREG_GS
&& ctxt
->mode
== X86EMUL_MODE_VM86
) {
1574 /* VM86 needs a clean new segment descriptor */
1575 set_desc_base(&seg_desc
, selector
<< 4);
1576 set_desc_limit(&seg_desc
, 0xffff);
1586 /* TR should be in GDT only */
1587 if (seg
== VCPU_SREG_TR
&& (selector
& (1 << 2)))
1590 /* NULL selector is not valid for TR, CS and (except for long mode) SS */
1591 if (null_selector
) {
1592 if (seg
== VCPU_SREG_CS
|| seg
== VCPU_SREG_TR
)
1595 if (seg
== VCPU_SREG_SS
) {
1596 if (ctxt
->mode
!= X86EMUL_MODE_PROT64
|| rpl
!= cpl
)
1600 * ctxt->ops->set_segment expects the CPL to be in
1601 * SS.DPL, so fake an expand-up 32-bit data segment.
1611 /* Skip all following checks */
1615 ret
= read_segment_descriptor(ctxt
, selector
, &seg_desc
, &desc_addr
);
1616 if (ret
!= X86EMUL_CONTINUE
)
1619 err_code
= selector
& 0xfffc;
1620 err_vec
= (transfer
== X86_TRANSFER_TASK_SWITCH
) ? TS_VECTOR
:
1623 /* can't load system descriptor into segment selector */
1624 if (seg
<= VCPU_SREG_GS
&& !seg_desc
.s
) {
1625 if (transfer
== X86_TRANSFER_CALL_JMP
)
1626 return X86EMUL_UNHANDLEABLE
;
1635 * segment is not a writable data segment or segment
1636 * selector's RPL != CPL or segment selector's RPL != CPL
1638 if (rpl
!= cpl
|| (seg_desc
.type
& 0xa) != 0x2 || dpl
!= cpl
)
1642 if (!(seg_desc
.type
& 8))
1645 if (transfer
== X86_TRANSFER_RET
) {
1646 /* RET can never return to an inner privilege level. */
1649 /* Outer-privilege level return is not implemented */
1651 return X86EMUL_UNHANDLEABLE
;
1653 if (transfer
== X86_TRANSFER_RET
|| transfer
== X86_TRANSFER_TASK_SWITCH
) {
1654 if (seg_desc
.type
& 4) {
1663 } else { /* X86_TRANSFER_CALL_JMP */
1664 if (seg_desc
.type
& 4) {
1670 if (rpl
> cpl
|| dpl
!= cpl
)
1674 /* in long-mode d/b must be clear if l is set */
1675 if (seg_desc
.d
&& seg_desc
.l
) {
1678 ctxt
->ops
->get_msr(ctxt
, MSR_EFER
, &efer
);
1679 if (efer
& EFER_LMA
)
1683 /* CS(RPL) <- CPL */
1684 selector
= (selector
& 0xfffc) | cpl
;
1687 if (seg_desc
.s
|| (seg_desc
.type
!= 1 && seg_desc
.type
!= 9))
1690 err_vec
= NP_VECTOR
;
1693 old_desc
= seg_desc
;
1694 seg_desc
.type
|= 2; /* busy */
1695 ret
= ctxt
->ops
->cmpxchg_emulated(ctxt
, desc_addr
, &old_desc
, &seg_desc
,
1696 sizeof(seg_desc
), &ctxt
->exception
);
1697 if (ret
!= X86EMUL_CONTINUE
)
1700 case VCPU_SREG_LDTR
:
1701 if (seg_desc
.s
|| seg_desc
.type
!= 2)
1704 default: /* DS, ES, FS, or GS */
1706 * segment is not a data or readable code segment or
1707 * ((segment is a data or nonconforming code segment)
1708 * and (both RPL and CPL > DPL))
1710 if ((seg_desc
.type
& 0xa) == 0x8 ||
1711 (((seg_desc
.type
& 0xc) != 0xc) &&
1712 (rpl
> dpl
&& cpl
> dpl
)))
1718 err_vec
= (seg
== VCPU_SREG_SS
) ? SS_VECTOR
: NP_VECTOR
;
1723 /* mark segment as accessed */
1724 if (!(seg_desc
.type
& 1)) {
1726 ret
= write_segment_descriptor(ctxt
, selector
,
1728 if (ret
!= X86EMUL_CONTINUE
)
1731 } else if (ctxt
->mode
== X86EMUL_MODE_PROT64
) {
1732 ret
= linear_read_system(ctxt
, desc_addr
+8, &base3
, sizeof(base3
));
1733 if (ret
!= X86EMUL_CONTINUE
)
1735 if (emul_is_noncanonical_address(get_desc_base(&seg_desc
) |
1736 ((u64
)base3
<< 32), ctxt
))
1737 return emulate_gp(ctxt
, 0);
1740 ctxt
->ops
->set_segment(ctxt
, selector
, &seg_desc
, base3
, seg
);
1743 return X86EMUL_CONTINUE
;
1745 return emulate_exception(ctxt
, err_vec
, err_code
, true);
1748 static int load_segment_descriptor(struct x86_emulate_ctxt
*ctxt
,
1749 u16 selector
, int seg
)
1751 u8 cpl
= ctxt
->ops
->cpl(ctxt
);
1754 * None of MOV, POP and LSS can load a NULL selector in CPL=3, but
1755 * they can load it at CPL<3 (Intel's manual says only LSS can,
1758 * However, the Intel manual says that putting IST=1/DPL=3 in
1759 * an interrupt gate will result in SS=3 (the AMD manual instead
1760 * says it doesn't), so allow SS=3 in __load_segment_descriptor
1761 * and only forbid it here.
1763 if (seg
== VCPU_SREG_SS
&& selector
== 3 &&
1764 ctxt
->mode
== X86EMUL_MODE_PROT64
)
1765 return emulate_exception(ctxt
, GP_VECTOR
, 0, true);
1767 return __load_segment_descriptor(ctxt
, selector
, seg
, cpl
,
1768 X86_TRANSFER_NONE
, NULL
);
1771 static void write_register_operand(struct operand
*op
)
1773 return assign_register(op
->addr
.reg
, op
->val
, op
->bytes
);
1776 static int writeback(struct x86_emulate_ctxt
*ctxt
, struct operand
*op
)
1780 write_register_operand(op
);
1783 if (ctxt
->lock_prefix
)
1784 return segmented_cmpxchg(ctxt
,
1790 return segmented_write(ctxt
,
1796 return segmented_write(ctxt
,
1799 op
->bytes
* op
->count
);
1802 kvm_write_sse_reg(op
->addr
.xmm
, &op
->vec_val
);
1805 kvm_write_mmx_reg(op
->addr
.mm
, &op
->mm_val
);
1813 return X86EMUL_CONTINUE
;
1816 static int push(struct x86_emulate_ctxt
*ctxt
, void *data
, int bytes
)
1818 struct segmented_address addr
;
1820 rsp_increment(ctxt
, -bytes
);
1821 addr
.ea
= reg_read(ctxt
, VCPU_REGS_RSP
) & stack_mask(ctxt
);
1822 addr
.seg
= VCPU_SREG_SS
;
1824 return segmented_write(ctxt
, addr
, data
, bytes
);
1827 static int em_push(struct x86_emulate_ctxt
*ctxt
)
1829 /* Disable writeback. */
1830 ctxt
->dst
.type
= OP_NONE
;
1831 return push(ctxt
, &ctxt
->src
.val
, ctxt
->op_bytes
);
1834 static int emulate_pop(struct x86_emulate_ctxt
*ctxt
,
1835 void *dest
, int len
)
1838 struct segmented_address addr
;
1840 addr
.ea
= reg_read(ctxt
, VCPU_REGS_RSP
) & stack_mask(ctxt
);
1841 addr
.seg
= VCPU_SREG_SS
;
1842 rc
= segmented_read(ctxt
, addr
, dest
, len
);
1843 if (rc
!= X86EMUL_CONTINUE
)
1846 rsp_increment(ctxt
, len
);
1850 static int em_pop(struct x86_emulate_ctxt
*ctxt
)
1852 return emulate_pop(ctxt
, &ctxt
->dst
.val
, ctxt
->op_bytes
);
1855 static int emulate_popf(struct x86_emulate_ctxt
*ctxt
,
1856 void *dest
, int len
)
1859 unsigned long val
, change_mask
;
1860 int iopl
= (ctxt
->eflags
& X86_EFLAGS_IOPL
) >> X86_EFLAGS_IOPL_BIT
;
1861 int cpl
= ctxt
->ops
->cpl(ctxt
);
1863 rc
= emulate_pop(ctxt
, &val
, len
);
1864 if (rc
!= X86EMUL_CONTINUE
)
1867 change_mask
= X86_EFLAGS_CF
| X86_EFLAGS_PF
| X86_EFLAGS_AF
|
1868 X86_EFLAGS_ZF
| X86_EFLAGS_SF
| X86_EFLAGS_OF
|
1869 X86_EFLAGS_TF
| X86_EFLAGS_DF
| X86_EFLAGS_NT
|
1870 X86_EFLAGS_AC
| X86_EFLAGS_ID
;
1872 switch(ctxt
->mode
) {
1873 case X86EMUL_MODE_PROT64
:
1874 case X86EMUL_MODE_PROT32
:
1875 case X86EMUL_MODE_PROT16
:
1877 change_mask
|= X86_EFLAGS_IOPL
;
1879 change_mask
|= X86_EFLAGS_IF
;
1881 case X86EMUL_MODE_VM86
:
1883 return emulate_gp(ctxt
, 0);
1884 change_mask
|= X86_EFLAGS_IF
;
1886 default: /* real mode */
1887 change_mask
|= (X86_EFLAGS_IOPL
| X86_EFLAGS_IF
);
1891 *(unsigned long *)dest
=
1892 (ctxt
->eflags
& ~change_mask
) | (val
& change_mask
);
1897 static int em_popf(struct x86_emulate_ctxt
*ctxt
)
1899 ctxt
->dst
.type
= OP_REG
;
1900 ctxt
->dst
.addr
.reg
= &ctxt
->eflags
;
1901 ctxt
->dst
.bytes
= ctxt
->op_bytes
;
1902 return emulate_popf(ctxt
, &ctxt
->dst
.val
, ctxt
->op_bytes
);
1905 static int em_enter(struct x86_emulate_ctxt
*ctxt
)
1908 unsigned frame_size
= ctxt
->src
.val
;
1909 unsigned nesting_level
= ctxt
->src2
.val
& 31;
1913 return X86EMUL_UNHANDLEABLE
;
1915 rbp
= reg_read(ctxt
, VCPU_REGS_RBP
);
1916 rc
= push(ctxt
, &rbp
, stack_size(ctxt
));
1917 if (rc
!= X86EMUL_CONTINUE
)
1919 assign_masked(reg_rmw(ctxt
, VCPU_REGS_RBP
), reg_read(ctxt
, VCPU_REGS_RSP
),
1921 assign_masked(reg_rmw(ctxt
, VCPU_REGS_RSP
),
1922 reg_read(ctxt
, VCPU_REGS_RSP
) - frame_size
,
1924 return X86EMUL_CONTINUE
;
1927 static int em_leave(struct x86_emulate_ctxt
*ctxt
)
1929 assign_masked(reg_rmw(ctxt
, VCPU_REGS_RSP
), reg_read(ctxt
, VCPU_REGS_RBP
),
1931 return emulate_pop(ctxt
, reg_rmw(ctxt
, VCPU_REGS_RBP
), ctxt
->op_bytes
);
1934 static int em_push_sreg(struct x86_emulate_ctxt
*ctxt
)
1936 int seg
= ctxt
->src2
.val
;
1938 ctxt
->src
.val
= get_segment_selector(ctxt
, seg
);
1939 if (ctxt
->op_bytes
== 4) {
1940 rsp_increment(ctxt
, -2);
1944 return em_push(ctxt
);
1947 static int em_pop_sreg(struct x86_emulate_ctxt
*ctxt
)
1949 int seg
= ctxt
->src2
.val
;
1950 unsigned long selector
;
1953 rc
= emulate_pop(ctxt
, &selector
, 2);
1954 if (rc
!= X86EMUL_CONTINUE
)
1957 if (ctxt
->modrm_reg
== VCPU_SREG_SS
)
1958 ctxt
->interruptibility
= KVM_X86_SHADOW_INT_MOV_SS
;
1959 if (ctxt
->op_bytes
> 2)
1960 rsp_increment(ctxt
, ctxt
->op_bytes
- 2);
1962 rc
= load_segment_descriptor(ctxt
, (u16
)selector
, seg
);
1966 static int em_pusha(struct x86_emulate_ctxt
*ctxt
)
1968 unsigned long old_esp
= reg_read(ctxt
, VCPU_REGS_RSP
);
1969 int rc
= X86EMUL_CONTINUE
;
1970 int reg
= VCPU_REGS_RAX
;
1972 while (reg
<= VCPU_REGS_RDI
) {
1973 (reg
== VCPU_REGS_RSP
) ?
1974 (ctxt
->src
.val
= old_esp
) : (ctxt
->src
.val
= reg_read(ctxt
, reg
));
1977 if (rc
!= X86EMUL_CONTINUE
)
1986 static int em_pushf(struct x86_emulate_ctxt
*ctxt
)
1988 ctxt
->src
.val
= (unsigned long)ctxt
->eflags
& ~X86_EFLAGS_VM
;
1989 return em_push(ctxt
);
1992 static int em_popa(struct x86_emulate_ctxt
*ctxt
)
1994 int rc
= X86EMUL_CONTINUE
;
1995 int reg
= VCPU_REGS_RDI
;
1998 while (reg
>= VCPU_REGS_RAX
) {
1999 if (reg
== VCPU_REGS_RSP
) {
2000 rsp_increment(ctxt
, ctxt
->op_bytes
);
2004 rc
= emulate_pop(ctxt
, &val
, ctxt
->op_bytes
);
2005 if (rc
!= X86EMUL_CONTINUE
)
2007 assign_register(reg_rmw(ctxt
, reg
), val
, ctxt
->op_bytes
);
2013 static int __emulate_int_real(struct x86_emulate_ctxt
*ctxt
, int irq
)
2015 const struct x86_emulate_ops
*ops
= ctxt
->ops
;
2022 /* TODO: Add limit checks */
2023 ctxt
->src
.val
= ctxt
->eflags
;
2025 if (rc
!= X86EMUL_CONTINUE
)
2028 ctxt
->eflags
&= ~(X86_EFLAGS_IF
| X86_EFLAGS_TF
| X86_EFLAGS_AC
);
2030 ctxt
->src
.val
= get_segment_selector(ctxt
, VCPU_SREG_CS
);
2032 if (rc
!= X86EMUL_CONTINUE
)
2035 ctxt
->src
.val
= ctxt
->_eip
;
2037 if (rc
!= X86EMUL_CONTINUE
)
2040 ops
->get_idt(ctxt
, &dt
);
2042 eip_addr
= dt
.address
+ (irq
<< 2);
2043 cs_addr
= dt
.address
+ (irq
<< 2) + 2;
2045 rc
= linear_read_system(ctxt
, cs_addr
, &cs
, 2);
2046 if (rc
!= X86EMUL_CONTINUE
)
2049 rc
= linear_read_system(ctxt
, eip_addr
, &eip
, 2);
2050 if (rc
!= X86EMUL_CONTINUE
)
2053 rc
= load_segment_descriptor(ctxt
, cs
, VCPU_SREG_CS
);
2054 if (rc
!= X86EMUL_CONTINUE
)
2062 int emulate_int_real(struct x86_emulate_ctxt
*ctxt
, int irq
)
2066 invalidate_registers(ctxt
);
2067 rc
= __emulate_int_real(ctxt
, irq
);
2068 if (rc
== X86EMUL_CONTINUE
)
2069 writeback_registers(ctxt
);
2073 static int emulate_int(struct x86_emulate_ctxt
*ctxt
, int irq
)
2075 switch(ctxt
->mode
) {
2076 case X86EMUL_MODE_REAL
:
2077 return __emulate_int_real(ctxt
, irq
);
2078 case X86EMUL_MODE_VM86
:
2079 case X86EMUL_MODE_PROT16
:
2080 case X86EMUL_MODE_PROT32
:
2081 case X86EMUL_MODE_PROT64
:
2083 /* Protected mode interrupts unimplemented yet */
2084 return X86EMUL_UNHANDLEABLE
;
2088 static int emulate_iret_real(struct x86_emulate_ctxt
*ctxt
)
2090 int rc
= X86EMUL_CONTINUE
;
2091 unsigned long temp_eip
= 0;
2092 unsigned long temp_eflags
= 0;
2093 unsigned long cs
= 0;
2094 unsigned long mask
= X86_EFLAGS_CF
| X86_EFLAGS_PF
| X86_EFLAGS_AF
|
2095 X86_EFLAGS_ZF
| X86_EFLAGS_SF
| X86_EFLAGS_TF
|
2096 X86_EFLAGS_IF
| X86_EFLAGS_DF
| X86_EFLAGS_OF
|
2097 X86_EFLAGS_IOPL
| X86_EFLAGS_NT
| X86_EFLAGS_RF
|
2098 X86_EFLAGS_AC
| X86_EFLAGS_ID
|
2100 unsigned long vm86_mask
= X86_EFLAGS_VM
| X86_EFLAGS_VIF
|
2103 /* TODO: Add stack limit check */
2105 rc
= emulate_pop(ctxt
, &temp_eip
, ctxt
->op_bytes
);
2107 if (rc
!= X86EMUL_CONTINUE
)
2110 if (temp_eip
& ~0xffff)
2111 return emulate_gp(ctxt
, 0);
2113 rc
= emulate_pop(ctxt
, &cs
, ctxt
->op_bytes
);
2115 if (rc
!= X86EMUL_CONTINUE
)
2118 rc
= emulate_pop(ctxt
, &temp_eflags
, ctxt
->op_bytes
);
2120 if (rc
!= X86EMUL_CONTINUE
)
2123 rc
= load_segment_descriptor(ctxt
, (u16
)cs
, VCPU_SREG_CS
);
2125 if (rc
!= X86EMUL_CONTINUE
)
2128 ctxt
->_eip
= temp_eip
;
2130 if (ctxt
->op_bytes
== 4)
2131 ctxt
->eflags
= ((temp_eflags
& mask
) | (ctxt
->eflags
& vm86_mask
));
2132 else if (ctxt
->op_bytes
== 2) {
2133 ctxt
->eflags
&= ~0xffff;
2134 ctxt
->eflags
|= temp_eflags
;
2137 ctxt
->eflags
&= ~EFLG_RESERVED_ZEROS_MASK
; /* Clear reserved zeros */
2138 ctxt
->eflags
|= X86_EFLAGS_FIXED
;
2139 ctxt
->ops
->set_nmi_mask(ctxt
, false);
2144 static int em_iret(struct x86_emulate_ctxt
*ctxt
)
2146 switch(ctxt
->mode
) {
2147 case X86EMUL_MODE_REAL
:
2148 return emulate_iret_real(ctxt
);
2149 case X86EMUL_MODE_VM86
:
2150 case X86EMUL_MODE_PROT16
:
2151 case X86EMUL_MODE_PROT32
:
2152 case X86EMUL_MODE_PROT64
:
2154 /* iret from protected mode unimplemented yet */
2155 return X86EMUL_UNHANDLEABLE
;
2159 static int em_jmp_far(struct x86_emulate_ctxt
*ctxt
)
2163 struct desc_struct new_desc
;
2164 u8 cpl
= ctxt
->ops
->cpl(ctxt
);
2166 memcpy(&sel
, ctxt
->src
.valptr
+ ctxt
->op_bytes
, 2);
2168 rc
= __load_segment_descriptor(ctxt
, sel
, VCPU_SREG_CS
, cpl
,
2169 X86_TRANSFER_CALL_JMP
,
2171 if (rc
!= X86EMUL_CONTINUE
)
2174 rc
= assign_eip_far(ctxt
, ctxt
->src
.val
, &new_desc
);
2175 /* Error handling is not implemented. */
2176 if (rc
!= X86EMUL_CONTINUE
)
2177 return X86EMUL_UNHANDLEABLE
;
2182 static int em_jmp_abs(struct x86_emulate_ctxt
*ctxt
)
2184 return assign_eip_near(ctxt
, ctxt
->src
.val
);
2187 static int em_call_near_abs(struct x86_emulate_ctxt
*ctxt
)
2192 old_eip
= ctxt
->_eip
;
2193 rc
= assign_eip_near(ctxt
, ctxt
->src
.val
);
2194 if (rc
!= X86EMUL_CONTINUE
)
2196 ctxt
->src
.val
= old_eip
;
2201 static int em_cmpxchg8b(struct x86_emulate_ctxt
*ctxt
)
2203 u64 old
= ctxt
->dst
.orig_val64
;
2205 if (ctxt
->dst
.bytes
== 16)
2206 return X86EMUL_UNHANDLEABLE
;
2208 if (((u32
) (old
>> 0) != (u32
) reg_read(ctxt
, VCPU_REGS_RAX
)) ||
2209 ((u32
) (old
>> 32) != (u32
) reg_read(ctxt
, VCPU_REGS_RDX
))) {
2210 *reg_write(ctxt
, VCPU_REGS_RAX
) = (u32
) (old
>> 0);
2211 *reg_write(ctxt
, VCPU_REGS_RDX
) = (u32
) (old
>> 32);
2212 ctxt
->eflags
&= ~X86_EFLAGS_ZF
;
2214 ctxt
->dst
.val64
= ((u64
)reg_read(ctxt
, VCPU_REGS_RCX
) << 32) |
2215 (u32
) reg_read(ctxt
, VCPU_REGS_RBX
);
2217 ctxt
->eflags
|= X86_EFLAGS_ZF
;
2219 return X86EMUL_CONTINUE
;
2222 static int em_ret(struct x86_emulate_ctxt
*ctxt
)
2227 rc
= emulate_pop(ctxt
, &eip
, ctxt
->op_bytes
);
2228 if (rc
!= X86EMUL_CONTINUE
)
2231 return assign_eip_near(ctxt
, eip
);
2234 static int em_ret_far(struct x86_emulate_ctxt
*ctxt
)
2237 unsigned long eip
, cs
;
2238 int cpl
= ctxt
->ops
->cpl(ctxt
);
2239 struct desc_struct new_desc
;
2241 rc
= emulate_pop(ctxt
, &eip
, ctxt
->op_bytes
);
2242 if (rc
!= X86EMUL_CONTINUE
)
2244 rc
= emulate_pop(ctxt
, &cs
, ctxt
->op_bytes
);
2245 if (rc
!= X86EMUL_CONTINUE
)
2247 rc
= __load_segment_descriptor(ctxt
, (u16
)cs
, VCPU_SREG_CS
, cpl
,
2250 if (rc
!= X86EMUL_CONTINUE
)
2252 rc
= assign_eip_far(ctxt
, eip
, &new_desc
);
2253 /* Error handling is not implemented. */
2254 if (rc
!= X86EMUL_CONTINUE
)
2255 return X86EMUL_UNHANDLEABLE
;
2260 static int em_ret_far_imm(struct x86_emulate_ctxt
*ctxt
)
2264 rc
= em_ret_far(ctxt
);
2265 if (rc
!= X86EMUL_CONTINUE
)
2267 rsp_increment(ctxt
, ctxt
->src
.val
);
2268 return X86EMUL_CONTINUE
;
2271 static int em_cmpxchg(struct x86_emulate_ctxt
*ctxt
)
2273 /* Save real source value, then compare EAX against destination. */
2274 ctxt
->dst
.orig_val
= ctxt
->dst
.val
;
2275 ctxt
->dst
.val
= reg_read(ctxt
, VCPU_REGS_RAX
);
2276 ctxt
->src
.orig_val
= ctxt
->src
.val
;
2277 ctxt
->src
.val
= ctxt
->dst
.orig_val
;
2278 fastop(ctxt
, em_cmp
);
2280 if (ctxt
->eflags
& X86_EFLAGS_ZF
) {
2281 /* Success: write back to memory; no update of EAX */
2282 ctxt
->src
.type
= OP_NONE
;
2283 ctxt
->dst
.val
= ctxt
->src
.orig_val
;
2285 /* Failure: write the value we saw to EAX. */
2286 ctxt
->src
.type
= OP_REG
;
2287 ctxt
->src
.addr
.reg
= reg_rmw(ctxt
, VCPU_REGS_RAX
);
2288 ctxt
->src
.val
= ctxt
->dst
.orig_val
;
2289 /* Create write-cycle to dest by writing the same value */
2290 ctxt
->dst
.val
= ctxt
->dst
.orig_val
;
2292 return X86EMUL_CONTINUE
;
2295 static int em_lseg(struct x86_emulate_ctxt
*ctxt
)
2297 int seg
= ctxt
->src2
.val
;
2301 memcpy(&sel
, ctxt
->src
.valptr
+ ctxt
->op_bytes
, 2);
2303 rc
= load_segment_descriptor(ctxt
, sel
, seg
);
2304 if (rc
!= X86EMUL_CONTINUE
)
2307 ctxt
->dst
.val
= ctxt
->src
.val
;
2311 static int emulator_has_longmode(struct x86_emulate_ctxt
*ctxt
)
2313 #ifdef CONFIG_X86_64
2314 return ctxt
->ops
->guest_has_long_mode(ctxt
);
2320 static void rsm_set_desc_flags(struct desc_struct
*desc
, u32 flags
)
2322 desc
->g
= (flags
>> 23) & 1;
2323 desc
->d
= (flags
>> 22) & 1;
2324 desc
->l
= (flags
>> 21) & 1;
2325 desc
->avl
= (flags
>> 20) & 1;
2326 desc
->p
= (flags
>> 15) & 1;
2327 desc
->dpl
= (flags
>> 13) & 3;
2328 desc
->s
= (flags
>> 12) & 1;
2329 desc
->type
= (flags
>> 8) & 15;
2332 static int rsm_load_seg_32(struct x86_emulate_ctxt
*ctxt
, const char *smstate
,
2335 struct desc_struct desc
;
2339 selector
= GET_SMSTATE(u32
, smstate
, 0x7fa8 + n
* 4);
2342 offset
= 0x7f84 + n
* 12;
2344 offset
= 0x7f2c + (n
- 3) * 12;
2346 set_desc_base(&desc
, GET_SMSTATE(u32
, smstate
, offset
+ 8));
2347 set_desc_limit(&desc
, GET_SMSTATE(u32
, smstate
, offset
+ 4));
2348 rsm_set_desc_flags(&desc
, GET_SMSTATE(u32
, smstate
, offset
));
2349 ctxt
->ops
->set_segment(ctxt
, selector
, &desc
, 0, n
);
2350 return X86EMUL_CONTINUE
;
2353 #ifdef CONFIG_X86_64
2354 static int rsm_load_seg_64(struct x86_emulate_ctxt
*ctxt
, const char *smstate
,
2357 struct desc_struct desc
;
2362 offset
= 0x7e00 + n
* 16;
2364 selector
= GET_SMSTATE(u16
, smstate
, offset
);
2365 rsm_set_desc_flags(&desc
, GET_SMSTATE(u16
, smstate
, offset
+ 2) << 8);
2366 set_desc_limit(&desc
, GET_SMSTATE(u32
, smstate
, offset
+ 4));
2367 set_desc_base(&desc
, GET_SMSTATE(u32
, smstate
, offset
+ 8));
2368 base3
= GET_SMSTATE(u32
, smstate
, offset
+ 12);
2370 ctxt
->ops
->set_segment(ctxt
, selector
, &desc
, base3
, n
);
2371 return X86EMUL_CONTINUE
;
2375 static int rsm_enter_protected_mode(struct x86_emulate_ctxt
*ctxt
,
2376 u64 cr0
, u64 cr3
, u64 cr4
)
2381 /* In order to later set CR4.PCIDE, CR3[11:0] must be zero. */
2383 if (cr4
& X86_CR4_PCIDE
) {
2388 bad
= ctxt
->ops
->set_cr(ctxt
, 3, cr3
);
2390 return X86EMUL_UNHANDLEABLE
;
2393 * First enable PAE, long mode needs it before CR0.PG = 1 is set.
2394 * Then enable protected mode. However, PCID cannot be enabled
2395 * if EFER.LMA=0, so set it separately.
2397 bad
= ctxt
->ops
->set_cr(ctxt
, 4, cr4
& ~X86_CR4_PCIDE
);
2399 return X86EMUL_UNHANDLEABLE
;
2401 bad
= ctxt
->ops
->set_cr(ctxt
, 0, cr0
);
2403 return X86EMUL_UNHANDLEABLE
;
2405 if (cr4
& X86_CR4_PCIDE
) {
2406 bad
= ctxt
->ops
->set_cr(ctxt
, 4, cr4
);
2408 return X86EMUL_UNHANDLEABLE
;
2410 bad
= ctxt
->ops
->set_cr(ctxt
, 3, cr3
| pcid
);
2412 return X86EMUL_UNHANDLEABLE
;
2417 return X86EMUL_CONTINUE
;
2420 static int rsm_load_state_32(struct x86_emulate_ctxt
*ctxt
,
2421 const char *smstate
)
2423 struct desc_struct desc
;
2426 u32 val
, cr0
, cr3
, cr4
;
2429 cr0
= GET_SMSTATE(u32
, smstate
, 0x7ffc);
2430 cr3
= GET_SMSTATE(u32
, smstate
, 0x7ff8);
2431 ctxt
->eflags
= GET_SMSTATE(u32
, smstate
, 0x7ff4) | X86_EFLAGS_FIXED
;
2432 ctxt
->_eip
= GET_SMSTATE(u32
, smstate
, 0x7ff0);
2434 for (i
= 0; i
< 8; i
++)
2435 *reg_write(ctxt
, i
) = GET_SMSTATE(u32
, smstate
, 0x7fd0 + i
* 4);
2437 val
= GET_SMSTATE(u32
, smstate
, 0x7fcc);
2439 if (ctxt
->ops
->set_dr(ctxt
, 6, val
))
2440 return X86EMUL_UNHANDLEABLE
;
2442 val
= GET_SMSTATE(u32
, smstate
, 0x7fc8);
2444 if (ctxt
->ops
->set_dr(ctxt
, 7, val
))
2445 return X86EMUL_UNHANDLEABLE
;
2447 selector
= GET_SMSTATE(u32
, smstate
, 0x7fc4);
2448 set_desc_base(&desc
, GET_SMSTATE(u32
, smstate
, 0x7f64));
2449 set_desc_limit(&desc
, GET_SMSTATE(u32
, smstate
, 0x7f60));
2450 rsm_set_desc_flags(&desc
, GET_SMSTATE(u32
, smstate
, 0x7f5c));
2451 ctxt
->ops
->set_segment(ctxt
, selector
, &desc
, 0, VCPU_SREG_TR
);
2453 selector
= GET_SMSTATE(u32
, smstate
, 0x7fc0);
2454 set_desc_base(&desc
, GET_SMSTATE(u32
, smstate
, 0x7f80));
2455 set_desc_limit(&desc
, GET_SMSTATE(u32
, smstate
, 0x7f7c));
2456 rsm_set_desc_flags(&desc
, GET_SMSTATE(u32
, smstate
, 0x7f78));
2457 ctxt
->ops
->set_segment(ctxt
, selector
, &desc
, 0, VCPU_SREG_LDTR
);
2459 dt
.address
= GET_SMSTATE(u32
, smstate
, 0x7f74);
2460 dt
.size
= GET_SMSTATE(u32
, smstate
, 0x7f70);
2461 ctxt
->ops
->set_gdt(ctxt
, &dt
);
2463 dt
.address
= GET_SMSTATE(u32
, smstate
, 0x7f58);
2464 dt
.size
= GET_SMSTATE(u32
, smstate
, 0x7f54);
2465 ctxt
->ops
->set_idt(ctxt
, &dt
);
2467 for (i
= 0; i
< 6; i
++) {
2468 int r
= rsm_load_seg_32(ctxt
, smstate
, i
);
2469 if (r
!= X86EMUL_CONTINUE
)
2473 cr4
= GET_SMSTATE(u32
, smstate
, 0x7f14);
2475 ctxt
->ops
->set_smbase(ctxt
, GET_SMSTATE(u32
, smstate
, 0x7ef8));
2477 return rsm_enter_protected_mode(ctxt
, cr0
, cr3
, cr4
);
2480 #ifdef CONFIG_X86_64
2481 static int rsm_load_state_64(struct x86_emulate_ctxt
*ctxt
,
2482 const char *smstate
)
2484 struct desc_struct desc
;
2486 u64 val
, cr0
, cr3
, cr4
;
2491 for (i
= 0; i
< 16; i
++)
2492 *reg_write(ctxt
, i
) = GET_SMSTATE(u64
, smstate
, 0x7ff8 - i
* 8);
2494 ctxt
->_eip
= GET_SMSTATE(u64
, smstate
, 0x7f78);
2495 ctxt
->eflags
= GET_SMSTATE(u32
, smstate
, 0x7f70) | X86_EFLAGS_FIXED
;
2497 val
= GET_SMSTATE(u64
, smstate
, 0x7f68);
2499 if (ctxt
->ops
->set_dr(ctxt
, 6, val
))
2500 return X86EMUL_UNHANDLEABLE
;
2502 val
= GET_SMSTATE(u64
, smstate
, 0x7f60);
2504 if (ctxt
->ops
->set_dr(ctxt
, 7, val
))
2505 return X86EMUL_UNHANDLEABLE
;
2507 cr0
= GET_SMSTATE(u64
, smstate
, 0x7f58);
2508 cr3
= GET_SMSTATE(u64
, smstate
, 0x7f50);
2509 cr4
= GET_SMSTATE(u64
, smstate
, 0x7f48);
2510 ctxt
->ops
->set_smbase(ctxt
, GET_SMSTATE(u32
, smstate
, 0x7f00));
2511 val
= GET_SMSTATE(u64
, smstate
, 0x7ed0);
2513 if (ctxt
->ops
->set_msr(ctxt
, MSR_EFER
, val
& ~EFER_LMA
))
2514 return X86EMUL_UNHANDLEABLE
;
2516 selector
= GET_SMSTATE(u32
, smstate
, 0x7e90);
2517 rsm_set_desc_flags(&desc
, GET_SMSTATE(u32
, smstate
, 0x7e92) << 8);
2518 set_desc_limit(&desc
, GET_SMSTATE(u32
, smstate
, 0x7e94));
2519 set_desc_base(&desc
, GET_SMSTATE(u32
, smstate
, 0x7e98));
2520 base3
= GET_SMSTATE(u32
, smstate
, 0x7e9c);
2521 ctxt
->ops
->set_segment(ctxt
, selector
, &desc
, base3
, VCPU_SREG_TR
);
2523 dt
.size
= GET_SMSTATE(u32
, smstate
, 0x7e84);
2524 dt
.address
= GET_SMSTATE(u64
, smstate
, 0x7e88);
2525 ctxt
->ops
->set_idt(ctxt
, &dt
);
2527 selector
= GET_SMSTATE(u32
, smstate
, 0x7e70);
2528 rsm_set_desc_flags(&desc
, GET_SMSTATE(u32
, smstate
, 0x7e72) << 8);
2529 set_desc_limit(&desc
, GET_SMSTATE(u32
, smstate
, 0x7e74));
2530 set_desc_base(&desc
, GET_SMSTATE(u32
, smstate
, 0x7e78));
2531 base3
= GET_SMSTATE(u32
, smstate
, 0x7e7c);
2532 ctxt
->ops
->set_segment(ctxt
, selector
, &desc
, base3
, VCPU_SREG_LDTR
);
2534 dt
.size
= GET_SMSTATE(u32
, smstate
, 0x7e64);
2535 dt
.address
= GET_SMSTATE(u64
, smstate
, 0x7e68);
2536 ctxt
->ops
->set_gdt(ctxt
, &dt
);
2538 r
= rsm_enter_protected_mode(ctxt
, cr0
, cr3
, cr4
);
2539 if (r
!= X86EMUL_CONTINUE
)
2542 for (i
= 0; i
< 6; i
++) {
2543 r
= rsm_load_seg_64(ctxt
, smstate
, i
);
2544 if (r
!= X86EMUL_CONTINUE
)
2548 return X86EMUL_CONTINUE
;
2552 static int em_rsm(struct x86_emulate_ctxt
*ctxt
)
2554 unsigned long cr0
, cr4
, efer
;
2559 if ((ctxt
->ops
->get_hflags(ctxt
) & X86EMUL_SMM_MASK
) == 0)
2560 return emulate_ud(ctxt
);
2562 smbase
= ctxt
->ops
->get_smbase(ctxt
);
2564 ret
= ctxt
->ops
->read_phys(ctxt
, smbase
+ 0xfe00, buf
, sizeof(buf
));
2565 if (ret
!= X86EMUL_CONTINUE
)
2566 return X86EMUL_UNHANDLEABLE
;
2568 if ((ctxt
->ops
->get_hflags(ctxt
) & X86EMUL_SMM_INSIDE_NMI_MASK
) == 0)
2569 ctxt
->ops
->set_nmi_mask(ctxt
, false);
2571 ctxt
->ops
->exiting_smm(ctxt
);
2574 * Get back to real mode, to prepare a safe state in which to load
2575 * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU
2576 * supports long mode.
2578 if (emulator_has_longmode(ctxt
)) {
2579 struct desc_struct cs_desc
;
2581 /* Zero CR4.PCIDE before CR0.PG. */
2582 cr4
= ctxt
->ops
->get_cr(ctxt
, 4);
2583 if (cr4
& X86_CR4_PCIDE
)
2584 ctxt
->ops
->set_cr(ctxt
, 4, cr4
& ~X86_CR4_PCIDE
);
2586 /* A 32-bit code segment is required to clear EFER.LMA. */
2587 memset(&cs_desc
, 0, sizeof(cs_desc
));
2589 cs_desc
.s
= cs_desc
.g
= cs_desc
.p
= 1;
2590 ctxt
->ops
->set_segment(ctxt
, 0, &cs_desc
, 0, VCPU_SREG_CS
);
2593 /* For the 64-bit case, this will clear EFER.LMA. */
2594 cr0
= ctxt
->ops
->get_cr(ctxt
, 0);
2595 if (cr0
& X86_CR0_PE
)
2596 ctxt
->ops
->set_cr(ctxt
, 0, cr0
& ~(X86_CR0_PG
| X86_CR0_PE
));
2598 if (emulator_has_longmode(ctxt
)) {
2599 /* Clear CR4.PAE before clearing EFER.LME. */
2600 cr4
= ctxt
->ops
->get_cr(ctxt
, 4);
2601 if (cr4
& X86_CR4_PAE
)
2602 ctxt
->ops
->set_cr(ctxt
, 4, cr4
& ~X86_CR4_PAE
);
2604 /* And finally go back to 32-bit mode. */
2606 ctxt
->ops
->set_msr(ctxt
, MSR_EFER
, efer
);
2610 * Give leave_smm() a chance to make ISA-specific changes to the vCPU
2611 * state (e.g. enter guest mode) before loading state from the SMM
2614 if (ctxt
->ops
->leave_smm(ctxt
, buf
))
2615 goto emulate_shutdown
;
2617 #ifdef CONFIG_X86_64
2618 if (emulator_has_longmode(ctxt
))
2619 ret
= rsm_load_state_64(ctxt
, buf
);
2622 ret
= rsm_load_state_32(ctxt
, buf
);
2624 if (ret
!= X86EMUL_CONTINUE
)
2625 goto emulate_shutdown
;
2628 * Note, the ctxt->ops callbacks are responsible for handling side
2629 * effects when writing MSRs and CRs, e.g. MMU context resets, CPUID
2630 * runtime updates, etc... If that changes, e.g. this flow is moved
2631 * out of the emulator to make it look more like enter_smm(), then
2632 * those side effects need to be explicitly handled for both success
2635 return X86EMUL_CONTINUE
;
2638 ctxt
->ops
->triple_fault(ctxt
);
2639 return X86EMUL_CONTINUE
;
2643 setup_syscalls_segments(struct desc_struct
*cs
, struct desc_struct
*ss
)
2645 cs
->l
= 0; /* will be adjusted later */
2646 set_desc_base(cs
, 0); /* flat segment */
2647 cs
->g
= 1; /* 4kb granularity */
2648 set_desc_limit(cs
, 0xfffff); /* 4GB limit */
2649 cs
->type
= 0x0b; /* Read, Execute, Accessed */
2651 cs
->dpl
= 0; /* will be adjusted later */
2656 set_desc_base(ss
, 0); /* flat segment */
2657 set_desc_limit(ss
, 0xfffff); /* 4GB limit */
2658 ss
->g
= 1; /* 4kb granularity */
2660 ss
->type
= 0x03; /* Read/Write, Accessed */
2661 ss
->d
= 1; /* 32bit stack segment */
2668 static bool vendor_intel(struct x86_emulate_ctxt
*ctxt
)
2670 u32 eax
, ebx
, ecx
, edx
;
2673 ctxt
->ops
->get_cpuid(ctxt
, &eax
, &ebx
, &ecx
, &edx
, true);
2674 return is_guest_vendor_intel(ebx
, ecx
, edx
);
2677 static bool em_syscall_is_enabled(struct x86_emulate_ctxt
*ctxt
)
2679 const struct x86_emulate_ops
*ops
= ctxt
->ops
;
2680 u32 eax
, ebx
, ecx
, edx
;
2683 * syscall should always be enabled in longmode - so only become
2684 * vendor specific (cpuid) if other modes are active...
2686 if (ctxt
->mode
== X86EMUL_MODE_PROT64
)
2691 ops
->get_cpuid(ctxt
, &eax
, &ebx
, &ecx
, &edx
, true);
2693 * remark: Intel CPUs only support "syscall" in 64bit longmode. Also a
2694 * 64bit guest with a 32bit compat-app running will #UD !! While this
2695 * behaviour can be fixed (by emulating) into AMD response - CPUs of
2696 * AMD can't behave like Intel.
2698 if (is_guest_vendor_intel(ebx
, ecx
, edx
))
2701 if (is_guest_vendor_amd(ebx
, ecx
, edx
) ||
2702 is_guest_vendor_hygon(ebx
, ecx
, edx
))
2706 * default: (not Intel, not AMD, not Hygon), apply Intel's
2712 static int em_syscall(struct x86_emulate_ctxt
*ctxt
)
2714 const struct x86_emulate_ops
*ops
= ctxt
->ops
;
2715 struct desc_struct cs
, ss
;
2720 /* syscall is not available in real mode */
2721 if (ctxt
->mode
== X86EMUL_MODE_REAL
||
2722 ctxt
->mode
== X86EMUL_MODE_VM86
)
2723 return emulate_ud(ctxt
);
2725 if (!(em_syscall_is_enabled(ctxt
)))
2726 return emulate_ud(ctxt
);
2728 ops
->get_msr(ctxt
, MSR_EFER
, &efer
);
2729 if (!(efer
& EFER_SCE
))
2730 return emulate_ud(ctxt
);
2732 setup_syscalls_segments(&cs
, &ss
);
2733 ops
->get_msr(ctxt
, MSR_STAR
, &msr_data
);
2735 cs_sel
= (u16
)(msr_data
& 0xfffc);
2736 ss_sel
= (u16
)(msr_data
+ 8);
2738 if (efer
& EFER_LMA
) {
2742 ops
->set_segment(ctxt
, cs_sel
, &cs
, 0, VCPU_SREG_CS
);
2743 ops
->set_segment(ctxt
, ss_sel
, &ss
, 0, VCPU_SREG_SS
);
2745 *reg_write(ctxt
, VCPU_REGS_RCX
) = ctxt
->_eip
;
2746 if (efer
& EFER_LMA
) {
2747 #ifdef CONFIG_X86_64
2748 *reg_write(ctxt
, VCPU_REGS_R11
) = ctxt
->eflags
;
2751 ctxt
->mode
== X86EMUL_MODE_PROT64
?
2752 MSR_LSTAR
: MSR_CSTAR
, &msr_data
);
2753 ctxt
->_eip
= msr_data
;
2755 ops
->get_msr(ctxt
, MSR_SYSCALL_MASK
, &msr_data
);
2756 ctxt
->eflags
&= ~msr_data
;
2757 ctxt
->eflags
|= X86_EFLAGS_FIXED
;
2761 ops
->get_msr(ctxt
, MSR_STAR
, &msr_data
);
2762 ctxt
->_eip
= (u32
)msr_data
;
2764 ctxt
->eflags
&= ~(X86_EFLAGS_VM
| X86_EFLAGS_IF
);
2767 ctxt
->tf
= (ctxt
->eflags
& X86_EFLAGS_TF
) != 0;
2768 return X86EMUL_CONTINUE
;
2771 static int em_sysenter(struct x86_emulate_ctxt
*ctxt
)
2773 const struct x86_emulate_ops
*ops
= ctxt
->ops
;
2774 struct desc_struct cs
, ss
;
2779 ops
->get_msr(ctxt
, MSR_EFER
, &efer
);
2780 /* inject #GP if in real mode */
2781 if (ctxt
->mode
== X86EMUL_MODE_REAL
)
2782 return emulate_gp(ctxt
, 0);
2785 * Not recognized on AMD in compat mode (but is recognized in legacy
2788 if ((ctxt
->mode
!= X86EMUL_MODE_PROT64
) && (efer
& EFER_LMA
)
2789 && !vendor_intel(ctxt
))
2790 return emulate_ud(ctxt
);
2792 /* sysenter/sysexit have not been tested in 64bit mode. */
2793 if (ctxt
->mode
== X86EMUL_MODE_PROT64
)
2794 return X86EMUL_UNHANDLEABLE
;
2796 ops
->get_msr(ctxt
, MSR_IA32_SYSENTER_CS
, &msr_data
);
2797 if ((msr_data
& 0xfffc) == 0x0)
2798 return emulate_gp(ctxt
, 0);
2800 setup_syscalls_segments(&cs
, &ss
);
2801 ctxt
->eflags
&= ~(X86_EFLAGS_VM
| X86_EFLAGS_IF
);
2802 cs_sel
= (u16
)msr_data
& ~SEGMENT_RPL_MASK
;
2803 ss_sel
= cs_sel
+ 8;
2804 if (efer
& EFER_LMA
) {
2809 ops
->set_segment(ctxt
, cs_sel
, &cs
, 0, VCPU_SREG_CS
);
2810 ops
->set_segment(ctxt
, ss_sel
, &ss
, 0, VCPU_SREG_SS
);
2812 ops
->get_msr(ctxt
, MSR_IA32_SYSENTER_EIP
, &msr_data
);
2813 ctxt
->_eip
= (efer
& EFER_LMA
) ? msr_data
: (u32
)msr_data
;
2815 ops
->get_msr(ctxt
, MSR_IA32_SYSENTER_ESP
, &msr_data
);
2816 *reg_write(ctxt
, VCPU_REGS_RSP
) = (efer
& EFER_LMA
) ? msr_data
:
2818 if (efer
& EFER_LMA
)
2819 ctxt
->mode
= X86EMUL_MODE_PROT64
;
2821 return X86EMUL_CONTINUE
;
2824 static int em_sysexit(struct x86_emulate_ctxt
*ctxt
)
2826 const struct x86_emulate_ops
*ops
= ctxt
->ops
;
2827 struct desc_struct cs
, ss
;
2828 u64 msr_data
, rcx
, rdx
;
2830 u16 cs_sel
= 0, ss_sel
= 0;
2832 /* inject #GP if in real mode or Virtual 8086 mode */
2833 if (ctxt
->mode
== X86EMUL_MODE_REAL
||
2834 ctxt
->mode
== X86EMUL_MODE_VM86
)
2835 return emulate_gp(ctxt
, 0);
2837 setup_syscalls_segments(&cs
, &ss
);
2839 if ((ctxt
->rex_prefix
& 0x8) != 0x0)
2840 usermode
= X86EMUL_MODE_PROT64
;
2842 usermode
= X86EMUL_MODE_PROT32
;
2844 rcx
= reg_read(ctxt
, VCPU_REGS_RCX
);
2845 rdx
= reg_read(ctxt
, VCPU_REGS_RDX
);
2849 ops
->get_msr(ctxt
, MSR_IA32_SYSENTER_CS
, &msr_data
);
2851 case X86EMUL_MODE_PROT32
:
2852 cs_sel
= (u16
)(msr_data
+ 16);
2853 if ((msr_data
& 0xfffc) == 0x0)
2854 return emulate_gp(ctxt
, 0);
2855 ss_sel
= (u16
)(msr_data
+ 24);
2859 case X86EMUL_MODE_PROT64
:
2860 cs_sel
= (u16
)(msr_data
+ 32);
2861 if (msr_data
== 0x0)
2862 return emulate_gp(ctxt
, 0);
2863 ss_sel
= cs_sel
+ 8;
2866 if (emul_is_noncanonical_address(rcx
, ctxt
) ||
2867 emul_is_noncanonical_address(rdx
, ctxt
))
2868 return emulate_gp(ctxt
, 0);
2871 cs_sel
|= SEGMENT_RPL_MASK
;
2872 ss_sel
|= SEGMENT_RPL_MASK
;
2874 ops
->set_segment(ctxt
, cs_sel
, &cs
, 0, VCPU_SREG_CS
);
2875 ops
->set_segment(ctxt
, ss_sel
, &ss
, 0, VCPU_SREG_SS
);
2878 *reg_write(ctxt
, VCPU_REGS_RSP
) = rcx
;
2880 return X86EMUL_CONTINUE
;
2883 static bool emulator_bad_iopl(struct x86_emulate_ctxt
*ctxt
)
2886 if (ctxt
->mode
== X86EMUL_MODE_REAL
)
2888 if (ctxt
->mode
== X86EMUL_MODE_VM86
)
2890 iopl
= (ctxt
->eflags
& X86_EFLAGS_IOPL
) >> X86_EFLAGS_IOPL_BIT
;
2891 return ctxt
->ops
->cpl(ctxt
) > iopl
;
2894 #define VMWARE_PORT_VMPORT (0x5658)
2895 #define VMWARE_PORT_VMRPC (0x5659)
2897 static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt
*ctxt
,
2900 const struct x86_emulate_ops
*ops
= ctxt
->ops
;
2901 struct desc_struct tr_seg
;
2904 u16 tr
, io_bitmap_ptr
, perm
, bit_idx
= port
& 0x7;
2905 unsigned mask
= (1 << len
) - 1;
2909 * VMware allows access to these ports even if denied
2910 * by TSS I/O permission bitmap. Mimic behavior.
2912 if (enable_vmware_backdoor
&&
2913 ((port
== VMWARE_PORT_VMPORT
) || (port
== VMWARE_PORT_VMRPC
)))
2916 ops
->get_segment(ctxt
, &tr
, &tr_seg
, &base3
, VCPU_SREG_TR
);
2919 if (desc_limit_scaled(&tr_seg
) < 103)
2921 base
= get_desc_base(&tr_seg
);
2922 #ifdef CONFIG_X86_64
2923 base
|= ((u64
)base3
) << 32;
2925 r
= ops
->read_std(ctxt
, base
+ 102, &io_bitmap_ptr
, 2, NULL
, true);
2926 if (r
!= X86EMUL_CONTINUE
)
2928 if (io_bitmap_ptr
+ port
/8 > desc_limit_scaled(&tr_seg
))
2930 r
= ops
->read_std(ctxt
, base
+ io_bitmap_ptr
+ port
/8, &perm
, 2, NULL
, true);
2931 if (r
!= X86EMUL_CONTINUE
)
2933 if ((perm
>> bit_idx
) & mask
)
2938 static bool emulator_io_permited(struct x86_emulate_ctxt
*ctxt
,
2944 if (emulator_bad_iopl(ctxt
))
2945 if (!emulator_io_port_access_allowed(ctxt
, port
, len
))
2948 ctxt
->perm_ok
= true;
2953 static void string_registers_quirk(struct x86_emulate_ctxt
*ctxt
)
2956 * Intel CPUs mask the counter and pointers in quite strange
2957 * manner when ECX is zero due to REP-string optimizations.
2959 #ifdef CONFIG_X86_64
2960 if (ctxt
->ad_bytes
!= 4 || !vendor_intel(ctxt
))
2963 *reg_write(ctxt
, VCPU_REGS_RCX
) = 0;
2966 case 0xa4: /* movsb */
2967 case 0xa5: /* movsd/w */
2968 *reg_rmw(ctxt
, VCPU_REGS_RSI
) &= (u32
)-1;
2970 case 0xaa: /* stosb */
2971 case 0xab: /* stosd/w */
2972 *reg_rmw(ctxt
, VCPU_REGS_RDI
) &= (u32
)-1;
2977 static void save_state_to_tss16(struct x86_emulate_ctxt
*ctxt
,
2978 struct tss_segment_16
*tss
)
2980 tss
->ip
= ctxt
->_eip
;
2981 tss
->flag
= ctxt
->eflags
;
2982 tss
->ax
= reg_read(ctxt
, VCPU_REGS_RAX
);
2983 tss
->cx
= reg_read(ctxt
, VCPU_REGS_RCX
);
2984 tss
->dx
= reg_read(ctxt
, VCPU_REGS_RDX
);
2985 tss
->bx
= reg_read(ctxt
, VCPU_REGS_RBX
);
2986 tss
->sp
= reg_read(ctxt
, VCPU_REGS_RSP
);
2987 tss
->bp
= reg_read(ctxt
, VCPU_REGS_RBP
);
2988 tss
->si
= reg_read(ctxt
, VCPU_REGS_RSI
);
2989 tss
->di
= reg_read(ctxt
, VCPU_REGS_RDI
);
2991 tss
->es
= get_segment_selector(ctxt
, VCPU_SREG_ES
);
2992 tss
->cs
= get_segment_selector(ctxt
, VCPU_SREG_CS
);
2993 tss
->ss
= get_segment_selector(ctxt
, VCPU_SREG_SS
);
2994 tss
->ds
= get_segment_selector(ctxt
, VCPU_SREG_DS
);
2995 tss
->ldt
= get_segment_selector(ctxt
, VCPU_SREG_LDTR
);
2998 static int load_state_from_tss16(struct x86_emulate_ctxt
*ctxt
,
2999 struct tss_segment_16
*tss
)
3004 ctxt
->_eip
= tss
->ip
;
3005 ctxt
->eflags
= tss
->flag
| 2;
3006 *reg_write(ctxt
, VCPU_REGS_RAX
) = tss
->ax
;
3007 *reg_write(ctxt
, VCPU_REGS_RCX
) = tss
->cx
;
3008 *reg_write(ctxt
, VCPU_REGS_RDX
) = tss
->dx
;
3009 *reg_write(ctxt
, VCPU_REGS_RBX
) = tss
->bx
;
3010 *reg_write(ctxt
, VCPU_REGS_RSP
) = tss
->sp
;
3011 *reg_write(ctxt
, VCPU_REGS_RBP
) = tss
->bp
;
3012 *reg_write(ctxt
, VCPU_REGS_RSI
) = tss
->si
;
3013 *reg_write(ctxt
, VCPU_REGS_RDI
) = tss
->di
;
3016 * SDM says that segment selectors are loaded before segment
3019 set_segment_selector(ctxt
, tss
->ldt
, VCPU_SREG_LDTR
);
3020 set_segment_selector(ctxt
, tss
->es
, VCPU_SREG_ES
);
3021 set_segment_selector(ctxt
, tss
->cs
, VCPU_SREG_CS
);
3022 set_segment_selector(ctxt
, tss
->ss
, VCPU_SREG_SS
);
3023 set_segment_selector(ctxt
, tss
->ds
, VCPU_SREG_DS
);
3028 * Now load segment descriptors. If fault happens at this stage
3029 * it is handled in a context of new task
3031 ret
= __load_segment_descriptor(ctxt
, tss
->ldt
, VCPU_SREG_LDTR
, cpl
,
3032 X86_TRANSFER_TASK_SWITCH
, NULL
);
3033 if (ret
!= X86EMUL_CONTINUE
)
3035 ret
= __load_segment_descriptor(ctxt
, tss
->es
, VCPU_SREG_ES
, cpl
,
3036 X86_TRANSFER_TASK_SWITCH
, NULL
);
3037 if (ret
!= X86EMUL_CONTINUE
)
3039 ret
= __load_segment_descriptor(ctxt
, tss
->cs
, VCPU_SREG_CS
, cpl
,
3040 X86_TRANSFER_TASK_SWITCH
, NULL
);
3041 if (ret
!= X86EMUL_CONTINUE
)
3043 ret
= __load_segment_descriptor(ctxt
, tss
->ss
, VCPU_SREG_SS
, cpl
,
3044 X86_TRANSFER_TASK_SWITCH
, NULL
);
3045 if (ret
!= X86EMUL_CONTINUE
)
3047 ret
= __load_segment_descriptor(ctxt
, tss
->ds
, VCPU_SREG_DS
, cpl
,
3048 X86_TRANSFER_TASK_SWITCH
, NULL
);
3049 if (ret
!= X86EMUL_CONTINUE
)
3052 return X86EMUL_CONTINUE
;
3055 static int task_switch_16(struct x86_emulate_ctxt
*ctxt
, u16 old_tss_sel
,
3056 ulong old_tss_base
, struct desc_struct
*new_desc
)
3058 struct tss_segment_16 tss_seg
;
3060 u32 new_tss_base
= get_desc_base(new_desc
);
3062 ret
= linear_read_system(ctxt
, old_tss_base
, &tss_seg
, sizeof(tss_seg
));
3063 if (ret
!= X86EMUL_CONTINUE
)
3066 save_state_to_tss16(ctxt
, &tss_seg
);
3068 ret
= linear_write_system(ctxt
, old_tss_base
, &tss_seg
, sizeof(tss_seg
));
3069 if (ret
!= X86EMUL_CONTINUE
)
3072 ret
= linear_read_system(ctxt
, new_tss_base
, &tss_seg
, sizeof(tss_seg
));
3073 if (ret
!= X86EMUL_CONTINUE
)
3076 if (old_tss_sel
!= 0xffff) {
3077 tss_seg
.prev_task_link
= old_tss_sel
;
3079 ret
= linear_write_system(ctxt
, new_tss_base
,
3080 &tss_seg
.prev_task_link
,
3081 sizeof(tss_seg
.prev_task_link
));
3082 if (ret
!= X86EMUL_CONTINUE
)
3086 return load_state_from_tss16(ctxt
, &tss_seg
);
3089 static void save_state_to_tss32(struct x86_emulate_ctxt
*ctxt
,
3090 struct tss_segment_32
*tss
)
3092 /* CR3 and ldt selector are not saved intentionally */
3093 tss
->eip
= ctxt
->_eip
;
3094 tss
->eflags
= ctxt
->eflags
;
3095 tss
->eax
= reg_read(ctxt
, VCPU_REGS_RAX
);
3096 tss
->ecx
= reg_read(ctxt
, VCPU_REGS_RCX
);
3097 tss
->edx
= reg_read(ctxt
, VCPU_REGS_RDX
);
3098 tss
->ebx
= reg_read(ctxt
, VCPU_REGS_RBX
);
3099 tss
->esp
= reg_read(ctxt
, VCPU_REGS_RSP
);
3100 tss
->ebp
= reg_read(ctxt
, VCPU_REGS_RBP
);
3101 tss
->esi
= reg_read(ctxt
, VCPU_REGS_RSI
);
3102 tss
->edi
= reg_read(ctxt
, VCPU_REGS_RDI
);
3104 tss
->es
= get_segment_selector(ctxt
, VCPU_SREG_ES
);
3105 tss
->cs
= get_segment_selector(ctxt
, VCPU_SREG_CS
);
3106 tss
->ss
= get_segment_selector(ctxt
, VCPU_SREG_SS
);
3107 tss
->ds
= get_segment_selector(ctxt
, VCPU_SREG_DS
);
3108 tss
->fs
= get_segment_selector(ctxt
, VCPU_SREG_FS
);
3109 tss
->gs
= get_segment_selector(ctxt
, VCPU_SREG_GS
);
3112 static int load_state_from_tss32(struct x86_emulate_ctxt
*ctxt
,
3113 struct tss_segment_32
*tss
)
3118 if (ctxt
->ops
->set_cr(ctxt
, 3, tss
->cr3
))
3119 return emulate_gp(ctxt
, 0);
3120 ctxt
->_eip
= tss
->eip
;
3121 ctxt
->eflags
= tss
->eflags
| 2;
3123 /* General purpose registers */
3124 *reg_write(ctxt
, VCPU_REGS_RAX
) = tss
->eax
;
3125 *reg_write(ctxt
, VCPU_REGS_RCX
) = tss
->ecx
;
3126 *reg_write(ctxt
, VCPU_REGS_RDX
) = tss
->edx
;
3127 *reg_write(ctxt
, VCPU_REGS_RBX
) = tss
->ebx
;
3128 *reg_write(ctxt
, VCPU_REGS_RSP
) = tss
->esp
;
3129 *reg_write(ctxt
, VCPU_REGS_RBP
) = tss
->ebp
;
3130 *reg_write(ctxt
, VCPU_REGS_RSI
) = tss
->esi
;
3131 *reg_write(ctxt
, VCPU_REGS_RDI
) = tss
->edi
;
3134 * SDM says that segment selectors are loaded before segment
3135 * descriptors. This is important because CPL checks will
3138 set_segment_selector(ctxt
, tss
->ldt_selector
, VCPU_SREG_LDTR
);
3139 set_segment_selector(ctxt
, tss
->es
, VCPU_SREG_ES
);
3140 set_segment_selector(ctxt
, tss
->cs
, VCPU_SREG_CS
);
3141 set_segment_selector(ctxt
, tss
->ss
, VCPU_SREG_SS
);
3142 set_segment_selector(ctxt
, tss
->ds
, VCPU_SREG_DS
);
3143 set_segment_selector(ctxt
, tss
->fs
, VCPU_SREG_FS
);
3144 set_segment_selector(ctxt
, tss
->gs
, VCPU_SREG_GS
);
3147 * If we're switching between Protected Mode and VM86, we need to make
3148 * sure to update the mode before loading the segment descriptors so
3149 * that the selectors are interpreted correctly.
3151 if (ctxt
->eflags
& X86_EFLAGS_VM
) {
3152 ctxt
->mode
= X86EMUL_MODE_VM86
;
3155 ctxt
->mode
= X86EMUL_MODE_PROT32
;
3160 * Now load segment descriptors. If fault happens at this stage
3161 * it is handled in a context of new task
3163 ret
= __load_segment_descriptor(ctxt
, tss
->ldt_selector
, VCPU_SREG_LDTR
,
3164 cpl
, X86_TRANSFER_TASK_SWITCH
, NULL
);
3165 if (ret
!= X86EMUL_CONTINUE
)
3167 ret
= __load_segment_descriptor(ctxt
, tss
->es
, VCPU_SREG_ES
, cpl
,
3168 X86_TRANSFER_TASK_SWITCH
, NULL
);
3169 if (ret
!= X86EMUL_CONTINUE
)
3171 ret
= __load_segment_descriptor(ctxt
, tss
->cs
, VCPU_SREG_CS
, cpl
,
3172 X86_TRANSFER_TASK_SWITCH
, NULL
);
3173 if (ret
!= X86EMUL_CONTINUE
)
3175 ret
= __load_segment_descriptor(ctxt
, tss
->ss
, VCPU_SREG_SS
, cpl
,
3176 X86_TRANSFER_TASK_SWITCH
, NULL
);
3177 if (ret
!= X86EMUL_CONTINUE
)
3179 ret
= __load_segment_descriptor(ctxt
, tss
->ds
, VCPU_SREG_DS
, cpl
,
3180 X86_TRANSFER_TASK_SWITCH
, NULL
);
3181 if (ret
!= X86EMUL_CONTINUE
)
3183 ret
= __load_segment_descriptor(ctxt
, tss
->fs
, VCPU_SREG_FS
, cpl
,
3184 X86_TRANSFER_TASK_SWITCH
, NULL
);
3185 if (ret
!= X86EMUL_CONTINUE
)
3187 ret
= __load_segment_descriptor(ctxt
, tss
->gs
, VCPU_SREG_GS
, cpl
,
3188 X86_TRANSFER_TASK_SWITCH
, NULL
);
3193 static int task_switch_32(struct x86_emulate_ctxt
*ctxt
, u16 old_tss_sel
,
3194 ulong old_tss_base
, struct desc_struct
*new_desc
)
3196 struct tss_segment_32 tss_seg
;
3198 u32 new_tss_base
= get_desc_base(new_desc
);
3199 u32 eip_offset
= offsetof(struct tss_segment_32
, eip
);
3200 u32 ldt_sel_offset
= offsetof(struct tss_segment_32
, ldt_selector
);
3202 ret
= linear_read_system(ctxt
, old_tss_base
, &tss_seg
, sizeof(tss_seg
));
3203 if (ret
!= X86EMUL_CONTINUE
)
3206 save_state_to_tss32(ctxt
, &tss_seg
);
3208 /* Only GP registers and segment selectors are saved */
3209 ret
= linear_write_system(ctxt
, old_tss_base
+ eip_offset
, &tss_seg
.eip
,
3210 ldt_sel_offset
- eip_offset
);
3211 if (ret
!= X86EMUL_CONTINUE
)
3214 ret
= linear_read_system(ctxt
, new_tss_base
, &tss_seg
, sizeof(tss_seg
));
3215 if (ret
!= X86EMUL_CONTINUE
)
3218 if (old_tss_sel
!= 0xffff) {
3219 tss_seg
.prev_task_link
= old_tss_sel
;
3221 ret
= linear_write_system(ctxt
, new_tss_base
,
3222 &tss_seg
.prev_task_link
,
3223 sizeof(tss_seg
.prev_task_link
));
3224 if (ret
!= X86EMUL_CONTINUE
)
3228 return load_state_from_tss32(ctxt
, &tss_seg
);
3231 static int emulator_do_task_switch(struct x86_emulate_ctxt
*ctxt
,
3232 u16 tss_selector
, int idt_index
, int reason
,
3233 bool has_error_code
, u32 error_code
)
3235 const struct x86_emulate_ops
*ops
= ctxt
->ops
;
3236 struct desc_struct curr_tss_desc
, next_tss_desc
;
3238 u16 old_tss_sel
= get_segment_selector(ctxt
, VCPU_SREG_TR
);
3239 ulong old_tss_base
=
3240 ops
->get_cached_segment_base(ctxt
, VCPU_SREG_TR
);
3242 ulong desc_addr
, dr7
;
3244 /* FIXME: old_tss_base == ~0 ? */
3246 ret
= read_segment_descriptor(ctxt
, tss_selector
, &next_tss_desc
, &desc_addr
);
3247 if (ret
!= X86EMUL_CONTINUE
)
3249 ret
= read_segment_descriptor(ctxt
, old_tss_sel
, &curr_tss_desc
, &desc_addr
);
3250 if (ret
!= X86EMUL_CONTINUE
)
3253 /* FIXME: check that next_tss_desc is tss */
3256 * Check privileges. The three cases are task switch caused by...
3258 * 1. jmp/call/int to task gate: Check against DPL of the task gate
3259 * 2. Exception/IRQ/iret: No check is performed
3260 * 3. jmp/call to TSS/task-gate: No check is performed since the
3261 * hardware checks it before exiting.
3263 if (reason
== TASK_SWITCH_GATE
) {
3264 if (idt_index
!= -1) {
3265 /* Software interrupts */
3266 struct desc_struct task_gate_desc
;
3269 ret
= read_interrupt_descriptor(ctxt
, idt_index
,
3271 if (ret
!= X86EMUL_CONTINUE
)
3274 dpl
= task_gate_desc
.dpl
;
3275 if ((tss_selector
& 3) > dpl
|| ops
->cpl(ctxt
) > dpl
)
3276 return emulate_gp(ctxt
, (idt_index
<< 3) | 0x2);
3280 desc_limit
= desc_limit_scaled(&next_tss_desc
);
3281 if (!next_tss_desc
.p
||
3282 ((desc_limit
< 0x67 && (next_tss_desc
.type
& 8)) ||
3283 desc_limit
< 0x2b)) {
3284 return emulate_ts(ctxt
, tss_selector
& 0xfffc);
3287 if (reason
== TASK_SWITCH_IRET
|| reason
== TASK_SWITCH_JMP
) {
3288 curr_tss_desc
.type
&= ~(1 << 1); /* clear busy flag */
3289 write_segment_descriptor(ctxt
, old_tss_sel
, &curr_tss_desc
);
3292 if (reason
== TASK_SWITCH_IRET
)
3293 ctxt
->eflags
= ctxt
->eflags
& ~X86_EFLAGS_NT
;
3295 /* set back link to prev task only if NT bit is set in eflags
3296 note that old_tss_sel is not used after this point */
3297 if (reason
!= TASK_SWITCH_CALL
&& reason
!= TASK_SWITCH_GATE
)
3298 old_tss_sel
= 0xffff;
3300 if (next_tss_desc
.type
& 8)
3301 ret
= task_switch_32(ctxt
, old_tss_sel
, old_tss_base
, &next_tss_desc
);
3303 ret
= task_switch_16(ctxt
, old_tss_sel
,
3304 old_tss_base
, &next_tss_desc
);
3305 if (ret
!= X86EMUL_CONTINUE
)
3308 if (reason
== TASK_SWITCH_CALL
|| reason
== TASK_SWITCH_GATE
)
3309 ctxt
->eflags
= ctxt
->eflags
| X86_EFLAGS_NT
;
3311 if (reason
!= TASK_SWITCH_IRET
) {
3312 next_tss_desc
.type
|= (1 << 1); /* set busy flag */
3313 write_segment_descriptor(ctxt
, tss_selector
, &next_tss_desc
);
3316 ops
->set_cr(ctxt
, 0, ops
->get_cr(ctxt
, 0) | X86_CR0_TS
);
3317 ops
->set_segment(ctxt
, tss_selector
, &next_tss_desc
, 0, VCPU_SREG_TR
);
3319 if (has_error_code
) {
3320 ctxt
->op_bytes
= ctxt
->ad_bytes
= (next_tss_desc
.type
& 8) ? 4 : 2;
3321 ctxt
->lock_prefix
= 0;
3322 ctxt
->src
.val
= (unsigned long) error_code
;
3323 ret
= em_push(ctxt
);
3326 ops
->get_dr(ctxt
, 7, &dr7
);
3327 ops
->set_dr(ctxt
, 7, dr7
& ~(DR_LOCAL_ENABLE_MASK
| DR_LOCAL_SLOWDOWN
));
3332 int emulator_task_switch(struct x86_emulate_ctxt
*ctxt
,
3333 u16 tss_selector
, int idt_index
, int reason
,
3334 bool has_error_code
, u32 error_code
)
3338 invalidate_registers(ctxt
);
3339 ctxt
->_eip
= ctxt
->eip
;
3340 ctxt
->dst
.type
= OP_NONE
;
3342 rc
= emulator_do_task_switch(ctxt
, tss_selector
, idt_index
, reason
,
3343 has_error_code
, error_code
);
3345 if (rc
== X86EMUL_CONTINUE
) {
3346 ctxt
->eip
= ctxt
->_eip
;
3347 writeback_registers(ctxt
);
3350 return (rc
== X86EMUL_UNHANDLEABLE
) ? EMULATION_FAILED
: EMULATION_OK
;
3353 static void string_addr_inc(struct x86_emulate_ctxt
*ctxt
, int reg
,
3356 int df
= (ctxt
->eflags
& X86_EFLAGS_DF
) ? -op
->count
: op
->count
;
3358 register_address_increment(ctxt
, reg
, df
* op
->bytes
);
3359 op
->addr
.mem
.ea
= register_address(ctxt
, reg
);
3362 static int em_das(struct x86_emulate_ctxt
*ctxt
)
3365 bool af
, cf
, old_cf
;
3367 cf
= ctxt
->eflags
& X86_EFLAGS_CF
;
3373 af
= ctxt
->eflags
& X86_EFLAGS_AF
;
3374 if ((al
& 0x0f) > 9 || af
) {
3376 cf
= old_cf
| (al
>= 250);
3381 if (old_al
> 0x99 || old_cf
) {
3387 /* Set PF, ZF, SF */
3388 ctxt
->src
.type
= OP_IMM
;
3390 ctxt
->src
.bytes
= 1;
3391 fastop(ctxt
, em_or
);
3392 ctxt
->eflags
&= ~(X86_EFLAGS_AF
| X86_EFLAGS_CF
);
3394 ctxt
->eflags
|= X86_EFLAGS_CF
;
3396 ctxt
->eflags
|= X86_EFLAGS_AF
;
3397 return X86EMUL_CONTINUE
;
3400 static int em_aam(struct x86_emulate_ctxt
*ctxt
)
3404 if (ctxt
->src
.val
== 0)
3405 return emulate_de(ctxt
);
3407 al
= ctxt
->dst
.val
& 0xff;
3408 ah
= al
/ ctxt
->src
.val
;
3409 al
%= ctxt
->src
.val
;
3411 ctxt
->dst
.val
= (ctxt
->dst
.val
& 0xffff0000) | al
| (ah
<< 8);
3413 /* Set PF, ZF, SF */
3414 ctxt
->src
.type
= OP_IMM
;
3416 ctxt
->src
.bytes
= 1;
3417 fastop(ctxt
, em_or
);
3419 return X86EMUL_CONTINUE
;
3422 static int em_aad(struct x86_emulate_ctxt
*ctxt
)
3424 u8 al
= ctxt
->dst
.val
& 0xff;
3425 u8 ah
= (ctxt
->dst
.val
>> 8) & 0xff;
3427 al
= (al
+ (ah
* ctxt
->src
.val
)) & 0xff;
3429 ctxt
->dst
.val
= (ctxt
->dst
.val
& 0xffff0000) | al
;
3431 /* Set PF, ZF, SF */
3432 ctxt
->src
.type
= OP_IMM
;
3434 ctxt
->src
.bytes
= 1;
3435 fastop(ctxt
, em_or
);
3437 return X86EMUL_CONTINUE
;
3440 static int em_call(struct x86_emulate_ctxt
*ctxt
)
3443 long rel
= ctxt
->src
.val
;
3445 ctxt
->src
.val
= (unsigned long)ctxt
->_eip
;
3446 rc
= jmp_rel(ctxt
, rel
);
3447 if (rc
!= X86EMUL_CONTINUE
)
3449 return em_push(ctxt
);
3452 static int em_call_far(struct x86_emulate_ctxt
*ctxt
)
3457 struct desc_struct old_desc
, new_desc
;
3458 const struct x86_emulate_ops
*ops
= ctxt
->ops
;
3459 int cpl
= ctxt
->ops
->cpl(ctxt
);
3460 enum x86emul_mode prev_mode
= ctxt
->mode
;
3462 old_eip
= ctxt
->_eip
;
3463 ops
->get_segment(ctxt
, &old_cs
, &old_desc
, NULL
, VCPU_SREG_CS
);
3465 memcpy(&sel
, ctxt
->src
.valptr
+ ctxt
->op_bytes
, 2);
3466 rc
= __load_segment_descriptor(ctxt
, sel
, VCPU_SREG_CS
, cpl
,
3467 X86_TRANSFER_CALL_JMP
, &new_desc
);
3468 if (rc
!= X86EMUL_CONTINUE
)
3471 rc
= assign_eip_far(ctxt
, ctxt
->src
.val
, &new_desc
);
3472 if (rc
!= X86EMUL_CONTINUE
)
3475 ctxt
->src
.val
= old_cs
;
3477 if (rc
!= X86EMUL_CONTINUE
)
3480 ctxt
->src
.val
= old_eip
;
3482 /* If we failed, we tainted the memory, but the very least we should
3484 if (rc
!= X86EMUL_CONTINUE
) {
3485 pr_warn_once("faulting far call emulation tainted memory\n");
3490 ops
->set_segment(ctxt
, old_cs
, &old_desc
, 0, VCPU_SREG_CS
);
3491 ctxt
->mode
= prev_mode
;
3496 static int em_ret_near_imm(struct x86_emulate_ctxt
*ctxt
)
3501 rc
= emulate_pop(ctxt
, &eip
, ctxt
->op_bytes
);
3502 if (rc
!= X86EMUL_CONTINUE
)
3504 rc
= assign_eip_near(ctxt
, eip
);
3505 if (rc
!= X86EMUL_CONTINUE
)
3507 rsp_increment(ctxt
, ctxt
->src
.val
);
3508 return X86EMUL_CONTINUE
;
3511 static int em_xchg(struct x86_emulate_ctxt
*ctxt
)
3513 /* Write back the register source. */
3514 ctxt
->src
.val
= ctxt
->dst
.val
;
3515 write_register_operand(&ctxt
->src
);
3517 /* Write back the memory destination with implicit LOCK prefix. */
3518 ctxt
->dst
.val
= ctxt
->src
.orig_val
;
3519 ctxt
->lock_prefix
= 1;
3520 return X86EMUL_CONTINUE
;
3523 static int em_imul_3op(struct x86_emulate_ctxt
*ctxt
)
3525 ctxt
->dst
.val
= ctxt
->src2
.val
;
3526 return fastop(ctxt
, em_imul
);
3529 static int em_cwd(struct x86_emulate_ctxt
*ctxt
)
3531 ctxt
->dst
.type
= OP_REG
;
3532 ctxt
->dst
.bytes
= ctxt
->src
.bytes
;
3533 ctxt
->dst
.addr
.reg
= reg_rmw(ctxt
, VCPU_REGS_RDX
);
3534 ctxt
->dst
.val
= ~((ctxt
->src
.val
>> (ctxt
->src
.bytes
* 8 - 1)) - 1);
3536 return X86EMUL_CONTINUE
;
3539 static int em_rdpid(struct x86_emulate_ctxt
*ctxt
)
3543 if (ctxt
->ops
->get_msr(ctxt
, MSR_TSC_AUX
, &tsc_aux
))
3544 return emulate_ud(ctxt
);
3545 ctxt
->dst
.val
= tsc_aux
;
3546 return X86EMUL_CONTINUE
;
3549 static int em_rdtsc(struct x86_emulate_ctxt
*ctxt
)
3553 ctxt
->ops
->get_msr(ctxt
, MSR_IA32_TSC
, &tsc
);
3554 *reg_write(ctxt
, VCPU_REGS_RAX
) = (u32
)tsc
;
3555 *reg_write(ctxt
, VCPU_REGS_RDX
) = tsc
>> 32;
3556 return X86EMUL_CONTINUE
;
3559 static int em_rdpmc(struct x86_emulate_ctxt
*ctxt
)
3563 if (ctxt
->ops
->read_pmc(ctxt
, reg_read(ctxt
, VCPU_REGS_RCX
), &pmc
))
3564 return emulate_gp(ctxt
, 0);
3565 *reg_write(ctxt
, VCPU_REGS_RAX
) = (u32
)pmc
;
3566 *reg_write(ctxt
, VCPU_REGS_RDX
) = pmc
>> 32;
3567 return X86EMUL_CONTINUE
;
3570 static int em_mov(struct x86_emulate_ctxt
*ctxt
)
3572 memcpy(ctxt
->dst
.valptr
, ctxt
->src
.valptr
, sizeof(ctxt
->src
.valptr
));
3573 return X86EMUL_CONTINUE
;
3576 static int em_movbe(struct x86_emulate_ctxt
*ctxt
)
3580 if (!ctxt
->ops
->guest_has_movbe(ctxt
))
3581 return emulate_ud(ctxt
);
3583 switch (ctxt
->op_bytes
) {
3586 * From MOVBE definition: "...When the operand size is 16 bits,
3587 * the upper word of the destination register remains unchanged
3590 * Both casting ->valptr and ->val to u16 breaks strict aliasing
3591 * rules so we have to do the operation almost per hand.
3593 tmp
= (u16
)ctxt
->src
.val
;
3594 ctxt
->dst
.val
&= ~0xffffUL
;
3595 ctxt
->dst
.val
|= (unsigned long)swab16(tmp
);
3598 ctxt
->dst
.val
= swab32((u32
)ctxt
->src
.val
);
3601 ctxt
->dst
.val
= swab64(ctxt
->src
.val
);
3606 return X86EMUL_CONTINUE
;
3609 static int em_cr_write(struct x86_emulate_ctxt
*ctxt
)
3611 if (ctxt
->ops
->set_cr(ctxt
, ctxt
->modrm_reg
, ctxt
->src
.val
))
3612 return emulate_gp(ctxt
, 0);
3614 /* Disable writeback. */
3615 ctxt
->dst
.type
= OP_NONE
;
3616 return X86EMUL_CONTINUE
;
3619 static int em_dr_write(struct x86_emulate_ctxt
*ctxt
)
3623 if (ctxt
->mode
== X86EMUL_MODE_PROT64
)
3624 val
= ctxt
->src
.val
& ~0ULL;
3626 val
= ctxt
->src
.val
& ~0U;
3628 /* #UD condition is already handled. */
3629 if (ctxt
->ops
->set_dr(ctxt
, ctxt
->modrm_reg
, val
) < 0)
3630 return emulate_gp(ctxt
, 0);
3632 /* Disable writeback. */
3633 ctxt
->dst
.type
= OP_NONE
;
3634 return X86EMUL_CONTINUE
;
3637 static int em_wrmsr(struct x86_emulate_ctxt
*ctxt
)
3639 u64 msr_index
= reg_read(ctxt
, VCPU_REGS_RCX
);
3643 msr_data
= (u32
)reg_read(ctxt
, VCPU_REGS_RAX
)
3644 | ((u64
)reg_read(ctxt
, VCPU_REGS_RDX
) << 32);
3645 r
= ctxt
->ops
->set_msr(ctxt
, msr_index
, msr_data
);
3647 if (r
== X86EMUL_IO_NEEDED
)
3651 return emulate_gp(ctxt
, 0);
3653 return r
< 0 ? X86EMUL_UNHANDLEABLE
: X86EMUL_CONTINUE
;
3656 static int em_rdmsr(struct x86_emulate_ctxt
*ctxt
)
3658 u64 msr_index
= reg_read(ctxt
, VCPU_REGS_RCX
);
3662 r
= ctxt
->ops
->get_msr(ctxt
, msr_index
, &msr_data
);
3664 if (r
== X86EMUL_IO_NEEDED
)
3668 return emulate_gp(ctxt
, 0);
3670 *reg_write(ctxt
, VCPU_REGS_RAX
) = (u32
)msr_data
;
3671 *reg_write(ctxt
, VCPU_REGS_RDX
) = msr_data
>> 32;
3672 return X86EMUL_CONTINUE
;
3675 static int em_store_sreg(struct x86_emulate_ctxt
*ctxt
, int segment
)
3677 if (segment
> VCPU_SREG_GS
&&
3678 (ctxt
->ops
->get_cr(ctxt
, 4) & X86_CR4_UMIP
) &&
3679 ctxt
->ops
->cpl(ctxt
) > 0)
3680 return emulate_gp(ctxt
, 0);
3682 ctxt
->dst
.val
= get_segment_selector(ctxt
, segment
);
3683 if (ctxt
->dst
.bytes
== 4 && ctxt
->dst
.type
== OP_MEM
)
3684 ctxt
->dst
.bytes
= 2;
3685 return X86EMUL_CONTINUE
;
3688 static int em_mov_rm_sreg(struct x86_emulate_ctxt
*ctxt
)
3690 if (ctxt
->modrm_reg
> VCPU_SREG_GS
)
3691 return emulate_ud(ctxt
);
3693 return em_store_sreg(ctxt
, ctxt
->modrm_reg
);
3696 static int em_mov_sreg_rm(struct x86_emulate_ctxt
*ctxt
)
3698 u16 sel
= ctxt
->src
.val
;
3700 if (ctxt
->modrm_reg
== VCPU_SREG_CS
|| ctxt
->modrm_reg
> VCPU_SREG_GS
)
3701 return emulate_ud(ctxt
);
3703 if (ctxt
->modrm_reg
== VCPU_SREG_SS
)
3704 ctxt
->interruptibility
= KVM_X86_SHADOW_INT_MOV_SS
;
3706 /* Disable writeback. */
3707 ctxt
->dst
.type
= OP_NONE
;
3708 return load_segment_descriptor(ctxt
, sel
, ctxt
->modrm_reg
);
3711 static int em_sldt(struct x86_emulate_ctxt
*ctxt
)
3713 return em_store_sreg(ctxt
, VCPU_SREG_LDTR
);
3716 static int em_lldt(struct x86_emulate_ctxt
*ctxt
)
3718 u16 sel
= ctxt
->src
.val
;
3720 /* Disable writeback. */
3721 ctxt
->dst
.type
= OP_NONE
;
3722 return load_segment_descriptor(ctxt
, sel
, VCPU_SREG_LDTR
);
3725 static int em_str(struct x86_emulate_ctxt
*ctxt
)
3727 return em_store_sreg(ctxt
, VCPU_SREG_TR
);
3730 static int em_ltr(struct x86_emulate_ctxt
*ctxt
)
3732 u16 sel
= ctxt
->src
.val
;
3734 /* Disable writeback. */
3735 ctxt
->dst
.type
= OP_NONE
;
3736 return load_segment_descriptor(ctxt
, sel
, VCPU_SREG_TR
);
3739 static int em_invlpg(struct x86_emulate_ctxt
*ctxt
)
3744 rc
= linearize(ctxt
, ctxt
->src
.addr
.mem
, 1, false, &linear
);
3745 if (rc
== X86EMUL_CONTINUE
)
3746 ctxt
->ops
->invlpg(ctxt
, linear
);
3747 /* Disable writeback. */
3748 ctxt
->dst
.type
= OP_NONE
;
3749 return X86EMUL_CONTINUE
;
3752 static int em_clts(struct x86_emulate_ctxt
*ctxt
)
3756 cr0
= ctxt
->ops
->get_cr(ctxt
, 0);
3758 ctxt
->ops
->set_cr(ctxt
, 0, cr0
);
3759 return X86EMUL_CONTINUE
;
3762 static int em_hypercall(struct x86_emulate_ctxt
*ctxt
)
3764 int rc
= ctxt
->ops
->fix_hypercall(ctxt
);
3766 if (rc
!= X86EMUL_CONTINUE
)
3769 /* Let the processor re-execute the fixed hypercall */
3770 ctxt
->_eip
= ctxt
->eip
;
3771 /* Disable writeback. */
3772 ctxt
->dst
.type
= OP_NONE
;
3773 return X86EMUL_CONTINUE
;
3776 static int emulate_store_desc_ptr(struct x86_emulate_ctxt
*ctxt
,
3777 void (*get
)(struct x86_emulate_ctxt
*ctxt
,
3778 struct desc_ptr
*ptr
))
3780 struct desc_ptr desc_ptr
;
3782 if ((ctxt
->ops
->get_cr(ctxt
, 4) & X86_CR4_UMIP
) &&
3783 ctxt
->ops
->cpl(ctxt
) > 0)
3784 return emulate_gp(ctxt
, 0);
3786 if (ctxt
->mode
== X86EMUL_MODE_PROT64
)
3788 get(ctxt
, &desc_ptr
);
3789 if (ctxt
->op_bytes
== 2) {
3791 desc_ptr
.address
&= 0x00ffffff;
3793 /* Disable writeback. */
3794 ctxt
->dst
.type
= OP_NONE
;
3795 return segmented_write_std(ctxt
, ctxt
->dst
.addr
.mem
,
3796 &desc_ptr
, 2 + ctxt
->op_bytes
);
3799 static int em_sgdt(struct x86_emulate_ctxt
*ctxt
)
3801 return emulate_store_desc_ptr(ctxt
, ctxt
->ops
->get_gdt
);
3804 static int em_sidt(struct x86_emulate_ctxt
*ctxt
)
3806 return emulate_store_desc_ptr(ctxt
, ctxt
->ops
->get_idt
);
3809 static int em_lgdt_lidt(struct x86_emulate_ctxt
*ctxt
, bool lgdt
)
3811 struct desc_ptr desc_ptr
;
3814 if (ctxt
->mode
== X86EMUL_MODE_PROT64
)
3816 rc
= read_descriptor(ctxt
, ctxt
->src
.addr
.mem
,
3817 &desc_ptr
.size
, &desc_ptr
.address
,
3819 if (rc
!= X86EMUL_CONTINUE
)
3821 if (ctxt
->mode
== X86EMUL_MODE_PROT64
&&
3822 emul_is_noncanonical_address(desc_ptr
.address
, ctxt
))
3823 return emulate_gp(ctxt
, 0);
3825 ctxt
->ops
->set_gdt(ctxt
, &desc_ptr
);
3827 ctxt
->ops
->set_idt(ctxt
, &desc_ptr
);
3828 /* Disable writeback. */
3829 ctxt
->dst
.type
= OP_NONE
;
3830 return X86EMUL_CONTINUE
;
3833 static int em_lgdt(struct x86_emulate_ctxt
*ctxt
)
3835 return em_lgdt_lidt(ctxt
, true);
3838 static int em_lidt(struct x86_emulate_ctxt
*ctxt
)
3840 return em_lgdt_lidt(ctxt
, false);
3843 static int em_smsw(struct x86_emulate_ctxt
*ctxt
)
3845 if ((ctxt
->ops
->get_cr(ctxt
, 4) & X86_CR4_UMIP
) &&
3846 ctxt
->ops
->cpl(ctxt
) > 0)
3847 return emulate_gp(ctxt
, 0);
3849 if (ctxt
->dst
.type
== OP_MEM
)
3850 ctxt
->dst
.bytes
= 2;
3851 ctxt
->dst
.val
= ctxt
->ops
->get_cr(ctxt
, 0);
3852 return X86EMUL_CONTINUE
;
3855 static int em_lmsw(struct x86_emulate_ctxt
*ctxt
)
3857 ctxt
->ops
->set_cr(ctxt
, 0, (ctxt
->ops
->get_cr(ctxt
, 0) & ~0x0eul
)
3858 | (ctxt
->src
.val
& 0x0f));
3859 ctxt
->dst
.type
= OP_NONE
;
3860 return X86EMUL_CONTINUE
;
3863 static int em_loop(struct x86_emulate_ctxt
*ctxt
)
3865 int rc
= X86EMUL_CONTINUE
;
3867 register_address_increment(ctxt
, VCPU_REGS_RCX
, -1);
3868 if ((address_mask(ctxt
, reg_read(ctxt
, VCPU_REGS_RCX
)) != 0) &&
3869 (ctxt
->b
== 0xe2 || test_cc(ctxt
->b
^ 0x5, ctxt
->eflags
)))
3870 rc
= jmp_rel(ctxt
, ctxt
->src
.val
);
3875 static int em_jcxz(struct x86_emulate_ctxt
*ctxt
)
3877 int rc
= X86EMUL_CONTINUE
;
3879 if (address_mask(ctxt
, reg_read(ctxt
, VCPU_REGS_RCX
)) == 0)
3880 rc
= jmp_rel(ctxt
, ctxt
->src
.val
);
3885 static int em_in(struct x86_emulate_ctxt
*ctxt
)
3887 if (!pio_in_emulated(ctxt
, ctxt
->dst
.bytes
, ctxt
->src
.val
,
3889 return X86EMUL_IO_NEEDED
;
3891 return X86EMUL_CONTINUE
;
3894 static int em_out(struct x86_emulate_ctxt
*ctxt
)
3896 ctxt
->ops
->pio_out_emulated(ctxt
, ctxt
->src
.bytes
, ctxt
->dst
.val
,
3898 /* Disable writeback. */
3899 ctxt
->dst
.type
= OP_NONE
;
3900 return X86EMUL_CONTINUE
;
3903 static int em_cli(struct x86_emulate_ctxt
*ctxt
)
3905 if (emulator_bad_iopl(ctxt
))
3906 return emulate_gp(ctxt
, 0);
3908 ctxt
->eflags
&= ~X86_EFLAGS_IF
;
3909 return X86EMUL_CONTINUE
;
3912 static int em_sti(struct x86_emulate_ctxt
*ctxt
)
3914 if (emulator_bad_iopl(ctxt
))
3915 return emulate_gp(ctxt
, 0);
3917 ctxt
->interruptibility
= KVM_X86_SHADOW_INT_STI
;
3918 ctxt
->eflags
|= X86_EFLAGS_IF
;
3919 return X86EMUL_CONTINUE
;
3922 static int em_cpuid(struct x86_emulate_ctxt
*ctxt
)
3924 u32 eax
, ebx
, ecx
, edx
;
3927 ctxt
->ops
->get_msr(ctxt
, MSR_MISC_FEATURES_ENABLES
, &msr
);
3928 if (msr
& MSR_MISC_FEATURES_ENABLES_CPUID_FAULT
&&
3929 ctxt
->ops
->cpl(ctxt
)) {
3930 return emulate_gp(ctxt
, 0);
3933 eax
= reg_read(ctxt
, VCPU_REGS_RAX
);
3934 ecx
= reg_read(ctxt
, VCPU_REGS_RCX
);
3935 ctxt
->ops
->get_cpuid(ctxt
, &eax
, &ebx
, &ecx
, &edx
, false);
3936 *reg_write(ctxt
, VCPU_REGS_RAX
) = eax
;
3937 *reg_write(ctxt
, VCPU_REGS_RBX
) = ebx
;
3938 *reg_write(ctxt
, VCPU_REGS_RCX
) = ecx
;
3939 *reg_write(ctxt
, VCPU_REGS_RDX
) = edx
;
3940 return X86EMUL_CONTINUE
;
3943 static int em_sahf(struct x86_emulate_ctxt
*ctxt
)
3947 flags
= X86_EFLAGS_CF
| X86_EFLAGS_PF
| X86_EFLAGS_AF
| X86_EFLAGS_ZF
|
3949 flags
&= *reg_rmw(ctxt
, VCPU_REGS_RAX
) >> 8;
3951 ctxt
->eflags
&= ~0xffUL
;
3952 ctxt
->eflags
|= flags
| X86_EFLAGS_FIXED
;
3953 return X86EMUL_CONTINUE
;
3956 static int em_lahf(struct x86_emulate_ctxt
*ctxt
)
3958 *reg_rmw(ctxt
, VCPU_REGS_RAX
) &= ~0xff00UL
;
3959 *reg_rmw(ctxt
, VCPU_REGS_RAX
) |= (ctxt
->eflags
& 0xff) << 8;
3960 return X86EMUL_CONTINUE
;
3963 static int em_bswap(struct x86_emulate_ctxt
*ctxt
)
3965 switch (ctxt
->op_bytes
) {
3966 #ifdef CONFIG_X86_64
3968 asm("bswap %0" : "+r"(ctxt
->dst
.val
));
3972 asm("bswap %0" : "+r"(*(u32
*)&ctxt
->dst
.val
));
3975 return X86EMUL_CONTINUE
;
3978 static int em_clflush(struct x86_emulate_ctxt
*ctxt
)
3980 /* emulating clflush regardless of cpuid */
3981 return X86EMUL_CONTINUE
;
3984 static int em_clflushopt(struct x86_emulate_ctxt
*ctxt
)
3986 /* emulating clflushopt regardless of cpuid */
3987 return X86EMUL_CONTINUE
;
3990 static int em_movsxd(struct x86_emulate_ctxt
*ctxt
)
3992 ctxt
->dst
.val
= (s32
) ctxt
->src
.val
;
3993 return X86EMUL_CONTINUE
;
3996 static int check_fxsr(struct x86_emulate_ctxt
*ctxt
)
3998 if (!ctxt
->ops
->guest_has_fxsr(ctxt
))
3999 return emulate_ud(ctxt
);
4001 if (ctxt
->ops
->get_cr(ctxt
, 0) & (X86_CR0_TS
| X86_CR0_EM
))
4002 return emulate_nm(ctxt
);
4005 * Don't emulate a case that should never be hit, instead of working
4006 * around a lack of fxsave64/fxrstor64 on old compilers.
4008 if (ctxt
->mode
>= X86EMUL_MODE_PROT64
)
4009 return X86EMUL_UNHANDLEABLE
;
4011 return X86EMUL_CONTINUE
;
4015 * Hardware doesn't save and restore XMM 0-7 without CR4.OSFXSR, but does save
4016 * and restore MXCSR.
4018 static size_t __fxstate_size(int nregs
)
4020 return offsetof(struct fxregs_state
, xmm_space
[0]) + nregs
* 16;
4023 static inline size_t fxstate_size(struct x86_emulate_ctxt
*ctxt
)
4026 if (ctxt
->mode
== X86EMUL_MODE_PROT64
)
4027 return __fxstate_size(16);
4029 cr4_osfxsr
= ctxt
->ops
->get_cr(ctxt
, 4) & X86_CR4_OSFXSR
;
4030 return __fxstate_size(cr4_osfxsr
? 8 : 0);
4034 * FXSAVE and FXRSTOR have 4 different formats depending on execution mode,
4037 * - like (1), but FIP and FDP (foo) are only 16 bit. At least Intel CPUs
4038 * preserve whole 32 bit values, though, so (1) and (2) are the same wrt.
4040 * 3) 64-bit mode with REX.W prefix
4041 * - like (2), but XMM 8-15 are being saved and restored
4042 * 4) 64-bit mode without REX.W prefix
4043 * - like (3), but FIP and FDP are 64 bit
4045 * Emulation uses (3) for (1) and (2) and preserves XMM 8-15 to reach the
4046 * desired result. (4) is not emulated.
4048 * Note: Guest and host CPUID.(EAX=07H,ECX=0H):EBX[bit 13] (deprecate FPU CS
4049 * and FPU DS) should match.
4051 static int em_fxsave(struct x86_emulate_ctxt
*ctxt
)
4053 struct fxregs_state fx_state
;
4056 rc
= check_fxsr(ctxt
);
4057 if (rc
!= X86EMUL_CONTINUE
)
4062 rc
= asm_safe("fxsave %[fx]", , [fx
] "+m"(fx_state
));
4066 if (rc
!= X86EMUL_CONTINUE
)
4069 return segmented_write_std(ctxt
, ctxt
->memop
.addr
.mem
, &fx_state
,
4070 fxstate_size(ctxt
));
4074 * FXRSTOR might restore XMM registers not provided by the guest. Fill
4075 * in the host registers (via FXSAVE) instead, so they won't be modified.
4076 * (preemption has to stay disabled until FXRSTOR).
4078 * Use noinline to keep the stack for other functions called by callers small.
4080 static noinline
int fxregs_fixup(struct fxregs_state
*fx_state
,
4081 const size_t used_size
)
4083 struct fxregs_state fx_tmp
;
4086 rc
= asm_safe("fxsave %[fx]", , [fx
] "+m"(fx_tmp
));
4087 memcpy((void *)fx_state
+ used_size
, (void *)&fx_tmp
+ used_size
,
4088 __fxstate_size(16) - used_size
);
4093 static int em_fxrstor(struct x86_emulate_ctxt
*ctxt
)
4095 struct fxregs_state fx_state
;
4099 rc
= check_fxsr(ctxt
);
4100 if (rc
!= X86EMUL_CONTINUE
)
4103 size
= fxstate_size(ctxt
);
4104 rc
= segmented_read_std(ctxt
, ctxt
->memop
.addr
.mem
, &fx_state
, size
);
4105 if (rc
!= X86EMUL_CONTINUE
)
4110 if (size
< __fxstate_size(16)) {
4111 rc
= fxregs_fixup(&fx_state
, size
);
4112 if (rc
!= X86EMUL_CONTINUE
)
4116 if (fx_state
.mxcsr
>> 16) {
4117 rc
= emulate_gp(ctxt
, 0);
4121 if (rc
== X86EMUL_CONTINUE
)
4122 rc
= asm_safe("fxrstor %[fx]", : [fx
] "m"(fx_state
));
4130 static int em_xsetbv(struct x86_emulate_ctxt
*ctxt
)
4134 eax
= reg_read(ctxt
, VCPU_REGS_RAX
);
4135 edx
= reg_read(ctxt
, VCPU_REGS_RDX
);
4136 ecx
= reg_read(ctxt
, VCPU_REGS_RCX
);
4138 if (ctxt
->ops
->set_xcr(ctxt
, ecx
, ((u64
)edx
<< 32) | eax
))
4139 return emulate_gp(ctxt
, 0);
4141 return X86EMUL_CONTINUE
;
4144 static bool valid_cr(int nr
)
4156 static int check_cr_access(struct x86_emulate_ctxt
*ctxt
)
4158 if (!valid_cr(ctxt
->modrm_reg
))
4159 return emulate_ud(ctxt
);
4161 return X86EMUL_CONTINUE
;
4164 static int check_dr7_gd(struct x86_emulate_ctxt
*ctxt
)
4168 ctxt
->ops
->get_dr(ctxt
, 7, &dr7
);
4170 /* Check if DR7.Global_Enable is set */
4171 return dr7
& (1 << 13);
4174 static int check_dr_read(struct x86_emulate_ctxt
*ctxt
)
4176 int dr
= ctxt
->modrm_reg
;
4180 return emulate_ud(ctxt
);
4182 cr4
= ctxt
->ops
->get_cr(ctxt
, 4);
4183 if ((cr4
& X86_CR4_DE
) && (dr
== 4 || dr
== 5))
4184 return emulate_ud(ctxt
);
4186 if (check_dr7_gd(ctxt
)) {
4189 ctxt
->ops
->get_dr(ctxt
, 6, &dr6
);
4190 dr6
&= ~DR_TRAP_BITS
;
4191 dr6
|= DR6_BD
| DR6_ACTIVE_LOW
;
4192 ctxt
->ops
->set_dr(ctxt
, 6, dr6
);
4193 return emulate_db(ctxt
);
4196 return X86EMUL_CONTINUE
;
4199 static int check_dr_write(struct x86_emulate_ctxt
*ctxt
)
4201 u64 new_val
= ctxt
->src
.val64
;
4202 int dr
= ctxt
->modrm_reg
;
4204 if ((dr
== 6 || dr
== 7) && (new_val
& 0xffffffff00000000ULL
))
4205 return emulate_gp(ctxt
, 0);
4207 return check_dr_read(ctxt
);
4210 static int check_svme(struct x86_emulate_ctxt
*ctxt
)
4214 ctxt
->ops
->get_msr(ctxt
, MSR_EFER
, &efer
);
4216 if (!(efer
& EFER_SVME
))
4217 return emulate_ud(ctxt
);
4219 return X86EMUL_CONTINUE
;
4222 static int check_svme_pa(struct x86_emulate_ctxt
*ctxt
)
4224 u64 rax
= reg_read(ctxt
, VCPU_REGS_RAX
);
4226 /* Valid physical address? */
4227 if (rax
& 0xffff000000000000ULL
)
4228 return emulate_gp(ctxt
, 0);
4230 return check_svme(ctxt
);
4233 static int check_rdtsc(struct x86_emulate_ctxt
*ctxt
)
4235 u64 cr4
= ctxt
->ops
->get_cr(ctxt
, 4);
4237 if (cr4
& X86_CR4_TSD
&& ctxt
->ops
->cpl(ctxt
))
4238 return emulate_gp(ctxt
, 0);
4240 return X86EMUL_CONTINUE
;
4243 static int check_rdpmc(struct x86_emulate_ctxt
*ctxt
)
4245 u64 cr4
= ctxt
->ops
->get_cr(ctxt
, 4);
4246 u64 rcx
= reg_read(ctxt
, VCPU_REGS_RCX
);
4249 * VMware allows access to these Pseduo-PMCs even when read via RDPMC
4250 * in Ring3 when CR4.PCE=0.
4252 if (enable_vmware_backdoor
&& is_vmware_backdoor_pmc(rcx
))
4253 return X86EMUL_CONTINUE
;
4256 * If CR4.PCE is set, the SDM requires CPL=0 or CR0.PE=0. The CR0.PE
4257 * check however is unnecessary because CPL is always 0 outside
4260 if ((!(cr4
& X86_CR4_PCE
) && ctxt
->ops
->cpl(ctxt
)) ||
4261 ctxt
->ops
->check_pmc(ctxt
, rcx
))
4262 return emulate_gp(ctxt
, 0);
4264 return X86EMUL_CONTINUE
;
4267 static int check_perm_in(struct x86_emulate_ctxt
*ctxt
)
4269 ctxt
->dst
.bytes
= min(ctxt
->dst
.bytes
, 4u);
4270 if (!emulator_io_permited(ctxt
, ctxt
->src
.val
, ctxt
->dst
.bytes
))
4271 return emulate_gp(ctxt
, 0);
4273 return X86EMUL_CONTINUE
;
4276 static int check_perm_out(struct x86_emulate_ctxt
*ctxt
)
4278 ctxt
->src
.bytes
= min(ctxt
->src
.bytes
, 4u);
4279 if (!emulator_io_permited(ctxt
, ctxt
->dst
.val
, ctxt
->src
.bytes
))
4280 return emulate_gp(ctxt
, 0);
4282 return X86EMUL_CONTINUE
;
4285 #define D(_y) { .flags = (_y) }
4286 #define DI(_y, _i) { .flags = (_y)|Intercept, .intercept = x86_intercept_##_i }
4287 #define DIP(_y, _i, _p) { .flags = (_y)|Intercept|CheckPerm, \
4288 .intercept = x86_intercept_##_i, .check_perm = (_p) }
4289 #define N D(NotImpl)
4290 #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
4291 #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
4292 #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
4293 #define ID(_f, _i) { .flags = ((_f) | InstrDual | ModRM), .u.idual = (_i) }
4294 #define MD(_f, _m) { .flags = ((_f) | ModeDual), .u.mdual = (_m) }
4295 #define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) }
4296 #define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
4297 #define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
4298 #define II(_f, _e, _i) \
4299 { .flags = (_f)|Intercept, .u.execute = (_e), .intercept = x86_intercept_##_i }
4300 #define IIP(_f, _e, _i, _p) \
4301 { .flags = (_f)|Intercept|CheckPerm, .u.execute = (_e), \
4302 .intercept = x86_intercept_##_i, .check_perm = (_p) }
4303 #define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) }
4305 #define D2bv(_f) D((_f) | ByteOp), D(_f)
4306 #define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p)
4307 #define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e)
4308 #define F2bv(_f, _e) F((_f) | ByteOp, _e), F(_f, _e)
4309 #define I2bvIP(_f, _e, _i, _p) \
4310 IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p)
4312 #define F6ALU(_f, _e) F2bv((_f) | DstMem | SrcReg | ModRM, _e), \
4313 F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \
4314 F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
4316 static const struct opcode group7_rm0
[] = {
4318 I(SrcNone
| Priv
| EmulateOnUD
, em_hypercall
),
4322 static const struct opcode group7_rm1
[] = {
4323 DI(SrcNone
| Priv
, monitor
),
4324 DI(SrcNone
| Priv
, mwait
),
4328 static const struct opcode group7_rm2
[] = {
4330 II(ImplicitOps
| Priv
, em_xsetbv
, xsetbv
),
4334 static const struct opcode group7_rm3
[] = {
4335 DIP(SrcNone
| Prot
| Priv
, vmrun
, check_svme_pa
),
4336 II(SrcNone
| Prot
| EmulateOnUD
, em_hypercall
, vmmcall
),
4337 DIP(SrcNone
| Prot
| Priv
, vmload
, check_svme_pa
),
4338 DIP(SrcNone
| Prot
| Priv
, vmsave
, check_svme_pa
),
4339 DIP(SrcNone
| Prot
| Priv
, stgi
, check_svme
),
4340 DIP(SrcNone
| Prot
| Priv
, clgi
, check_svme
),
4341 DIP(SrcNone
| Prot
| Priv
, skinit
, check_svme
),
4342 DIP(SrcNone
| Prot
| Priv
, invlpga
, check_svme
),
4345 static const struct opcode group7_rm7
[] = {
4347 DIP(SrcNone
, rdtscp
, check_rdtsc
),
4351 static const struct opcode group1
[] = {
4353 F(Lock
| PageTable
, em_or
),
4356 F(Lock
| PageTable
, em_and
),
4362 static const struct opcode group1A
[] = {
4363 I(DstMem
| SrcNone
| Mov
| Stack
| IncSP
| TwoMemOp
, em_pop
), N
, N
, N
, N
, N
, N
, N
,
4366 static const struct opcode group2
[] = {
4367 F(DstMem
| ModRM
, em_rol
),
4368 F(DstMem
| ModRM
, em_ror
),
4369 F(DstMem
| ModRM
, em_rcl
),
4370 F(DstMem
| ModRM
, em_rcr
),
4371 F(DstMem
| ModRM
, em_shl
),
4372 F(DstMem
| ModRM
, em_shr
),
4373 F(DstMem
| ModRM
, em_shl
),
4374 F(DstMem
| ModRM
, em_sar
),
4377 static const struct opcode group3
[] = {
4378 F(DstMem
| SrcImm
| NoWrite
, em_test
),
4379 F(DstMem
| SrcImm
| NoWrite
, em_test
),
4380 F(DstMem
| SrcNone
| Lock
, em_not
),
4381 F(DstMem
| SrcNone
| Lock
, em_neg
),
4382 F(DstXacc
| Src2Mem
, em_mul_ex
),
4383 F(DstXacc
| Src2Mem
, em_imul_ex
),
4384 F(DstXacc
| Src2Mem
, em_div_ex
),
4385 F(DstXacc
| Src2Mem
, em_idiv_ex
),
4388 static const struct opcode group4
[] = {
4389 F(ByteOp
| DstMem
| SrcNone
| Lock
, em_inc
),
4390 F(ByteOp
| DstMem
| SrcNone
| Lock
, em_dec
),
4394 static const struct opcode group5
[] = {
4395 F(DstMem
| SrcNone
| Lock
, em_inc
),
4396 F(DstMem
| SrcNone
| Lock
, em_dec
),
4397 I(SrcMem
| NearBranch
| IsBranch
, em_call_near_abs
),
4398 I(SrcMemFAddr
| ImplicitOps
| IsBranch
, em_call_far
),
4399 I(SrcMem
| NearBranch
| IsBranch
, em_jmp_abs
),
4400 I(SrcMemFAddr
| ImplicitOps
| IsBranch
, em_jmp_far
),
4401 I(SrcMem
| Stack
| TwoMemOp
, em_push
), D(Undefined
),
4404 static const struct opcode group6
[] = {
4405 II(Prot
| DstMem
, em_sldt
, sldt
),
4406 II(Prot
| DstMem
, em_str
, str
),
4407 II(Prot
| Priv
| SrcMem16
, em_lldt
, lldt
),
4408 II(Prot
| Priv
| SrcMem16
, em_ltr
, ltr
),
4412 static const struct group_dual group7
= { {
4413 II(Mov
| DstMem
, em_sgdt
, sgdt
),
4414 II(Mov
| DstMem
, em_sidt
, sidt
),
4415 II(SrcMem
| Priv
, em_lgdt
, lgdt
),
4416 II(SrcMem
| Priv
, em_lidt
, lidt
),
4417 II(SrcNone
| DstMem
| Mov
, em_smsw
, smsw
), N
,
4418 II(SrcMem16
| Mov
| Priv
, em_lmsw
, lmsw
),
4419 II(SrcMem
| ByteOp
| Priv
| NoAccess
, em_invlpg
, invlpg
),
4425 II(SrcNone
| DstMem
| Mov
, em_smsw
, smsw
), N
,
4426 II(SrcMem16
| Mov
| Priv
, em_lmsw
, lmsw
),
4430 static const struct opcode group8
[] = {
4432 F(DstMem
| SrcImmByte
| NoWrite
, em_bt
),
4433 F(DstMem
| SrcImmByte
| Lock
| PageTable
, em_bts
),
4434 F(DstMem
| SrcImmByte
| Lock
, em_btr
),
4435 F(DstMem
| SrcImmByte
| Lock
| PageTable
, em_btc
),
4439 * The "memory" destination is actually always a register, since we come
4440 * from the register case of group9.
4442 static const struct gprefix pfx_0f_c7_7
= {
4443 N
, N
, N
, II(DstMem
| ModRM
| Op3264
| EmulateOnUD
, em_rdpid
, rdpid
),
4447 static const struct group_dual group9
= { {
4448 N
, I(DstMem64
| Lock
| PageTable
, em_cmpxchg8b
), N
, N
, N
, N
, N
, N
,
4450 N
, N
, N
, N
, N
, N
, N
,
4451 GP(0, &pfx_0f_c7_7
),
4454 static const struct opcode group11
[] = {
4455 I(DstMem
| SrcImm
| Mov
| PageTable
, em_mov
),
4459 static const struct gprefix pfx_0f_ae_7
= {
4460 I(SrcMem
| ByteOp
, em_clflush
), I(SrcMem
| ByteOp
, em_clflushopt
), N
, N
,
4463 static const struct group_dual group15
= { {
4464 I(ModRM
| Aligned16
, em_fxsave
),
4465 I(ModRM
| Aligned16
, em_fxrstor
),
4466 N
, N
, N
, N
, N
, GP(0, &pfx_0f_ae_7
),
4468 N
, N
, N
, N
, N
, N
, N
, N
,
4471 static const struct gprefix pfx_0f_6f_0f_7f
= {
4472 I(Mmx
, em_mov
), I(Sse
| Aligned
, em_mov
), N
, I(Sse
| Unaligned
, em_mov
),
4475 static const struct instr_dual instr_dual_0f_2b
= {
4479 static const struct gprefix pfx_0f_2b
= {
4480 ID(0, &instr_dual_0f_2b
), ID(0, &instr_dual_0f_2b
), N
, N
,
4483 static const struct gprefix pfx_0f_10_0f_11
= {
4484 I(Unaligned
, em_mov
), I(Unaligned
, em_mov
), N
, N
,
4487 static const struct gprefix pfx_0f_28_0f_29
= {
4488 I(Aligned
, em_mov
), I(Aligned
, em_mov
), N
, N
,
4491 static const struct gprefix pfx_0f_e7
= {
4492 N
, I(Sse
, em_mov
), N
, N
,
4495 static const struct escape escape_d9
= { {
4496 N
, N
, N
, N
, N
, N
, N
, I(DstMem16
| Mov
, em_fnstcw
),
4499 N
, N
, N
, N
, N
, N
, N
, N
,
4501 N
, N
, N
, N
, N
, N
, N
, N
,
4503 N
, N
, N
, N
, N
, N
, N
, N
,
4505 N
, N
, N
, N
, N
, N
, N
, N
,
4507 N
, N
, N
, N
, N
, N
, N
, N
,
4509 N
, N
, N
, N
, N
, N
, N
, N
,
4511 N
, N
, N
, N
, N
, N
, N
, N
,
4513 N
, N
, N
, N
, N
, N
, N
, N
,
4516 static const struct escape escape_db
= { {
4517 N
, N
, N
, N
, N
, N
, N
, N
,
4520 N
, N
, N
, N
, N
, N
, N
, N
,
4522 N
, N
, N
, N
, N
, N
, N
, N
,
4524 N
, N
, N
, N
, N
, N
, N
, N
,
4526 N
, N
, N
, N
, N
, N
, N
, N
,
4528 N
, N
, N
, I(ImplicitOps
, em_fninit
), N
, N
, N
, N
,
4530 N
, N
, N
, N
, N
, N
, N
, N
,
4532 N
, N
, N
, N
, N
, N
, N
, N
,
4534 N
, N
, N
, N
, N
, N
, N
, N
,
4537 static const struct escape escape_dd
= { {
4538 N
, N
, N
, N
, N
, N
, N
, I(DstMem16
| Mov
, em_fnstsw
),
4541 N
, N
, N
, N
, N
, N
, N
, N
,
4543 N
, N
, N
, N
, N
, N
, N
, N
,
4545 N
, N
, N
, N
, N
, N
, N
, N
,
4547 N
, N
, N
, N
, N
, N
, N
, N
,
4549 N
, N
, N
, N
, N
, N
, N
, N
,
4551 N
, N
, N
, N
, N
, N
, N
, N
,
4553 N
, N
, N
, N
, N
, N
, N
, N
,
4555 N
, N
, N
, N
, N
, N
, N
, N
,
4558 static const struct instr_dual instr_dual_0f_c3
= {
4559 I(DstMem
| SrcReg
| ModRM
| No16
| Mov
, em_mov
), N
4562 static const struct mode_dual mode_dual_63
= {
4563 N
, I(DstReg
| SrcMem32
| ModRM
| Mov
, em_movsxd
)
4566 static const struct opcode opcode_table
[256] = {
4568 F6ALU(Lock
, em_add
),
4569 I(ImplicitOps
| Stack
| No64
| Src2ES
, em_push_sreg
),
4570 I(ImplicitOps
| Stack
| No64
| Src2ES
, em_pop_sreg
),
4572 F6ALU(Lock
| PageTable
, em_or
),
4573 I(ImplicitOps
| Stack
| No64
| Src2CS
, em_push_sreg
),
4576 F6ALU(Lock
, em_adc
),
4577 I(ImplicitOps
| Stack
| No64
| Src2SS
, em_push_sreg
),
4578 I(ImplicitOps
| Stack
| No64
| Src2SS
, em_pop_sreg
),
4580 F6ALU(Lock
, em_sbb
),
4581 I(ImplicitOps
| Stack
| No64
| Src2DS
, em_push_sreg
),
4582 I(ImplicitOps
| Stack
| No64
| Src2DS
, em_pop_sreg
),
4584 F6ALU(Lock
| PageTable
, em_and
), N
, N
,
4586 F6ALU(Lock
, em_sub
), N
, I(ByteOp
| DstAcc
| No64
, em_das
),
4588 F6ALU(Lock
, em_xor
), N
, N
,
4590 F6ALU(NoWrite
, em_cmp
), N
, N
,
4592 X8(F(DstReg
, em_inc
)), X8(F(DstReg
, em_dec
)),
4594 X8(I(SrcReg
| Stack
, em_push
)),
4596 X8(I(DstReg
| Stack
, em_pop
)),
4598 I(ImplicitOps
| Stack
| No64
, em_pusha
),
4599 I(ImplicitOps
| Stack
| No64
, em_popa
),
4600 N
, MD(ModRM
, &mode_dual_63
),
4603 I(SrcImm
| Mov
| Stack
, em_push
),
4604 I(DstReg
| SrcMem
| ModRM
| Src2Imm
, em_imul_3op
),
4605 I(SrcImmByte
| Mov
| Stack
, em_push
),
4606 I(DstReg
| SrcMem
| ModRM
| Src2ImmByte
, em_imul_3op
),
4607 I2bvIP(DstDI
| SrcDX
| Mov
| String
| Unaligned
, em_in
, ins
, check_perm_in
), /* insb, insw/insd */
4608 I2bvIP(SrcSI
| DstDX
| String
, em_out
, outs
, check_perm_out
), /* outsb, outsw/outsd */
4610 X16(D(SrcImmByte
| NearBranch
| IsBranch
)),
4612 G(ByteOp
| DstMem
| SrcImm
, group1
),
4613 G(DstMem
| SrcImm
, group1
),
4614 G(ByteOp
| DstMem
| SrcImm
| No64
, group1
),
4615 G(DstMem
| SrcImmByte
, group1
),
4616 F2bv(DstMem
| SrcReg
| ModRM
| NoWrite
, em_test
),
4617 I2bv(DstMem
| SrcReg
| ModRM
| Lock
| PageTable
, em_xchg
),
4619 I2bv(DstMem
| SrcReg
| ModRM
| Mov
| PageTable
, em_mov
),
4620 I2bv(DstReg
| SrcMem
| ModRM
| Mov
, em_mov
),
4621 I(DstMem
| SrcNone
| ModRM
| Mov
| PageTable
, em_mov_rm_sreg
),
4622 D(ModRM
| SrcMem
| NoAccess
| DstReg
),
4623 I(ImplicitOps
| SrcMem16
| ModRM
, em_mov_sreg_rm
),
4626 DI(SrcAcc
| DstReg
, pause
), X7(D(SrcAcc
| DstReg
)),
4628 D(DstAcc
| SrcNone
), I(ImplicitOps
| SrcAcc
, em_cwd
),
4629 I(SrcImmFAddr
| No64
| IsBranch
, em_call_far
), N
,
4630 II(ImplicitOps
| Stack
, em_pushf
, pushf
),
4631 II(ImplicitOps
| Stack
, em_popf
, popf
),
4632 I(ImplicitOps
, em_sahf
), I(ImplicitOps
, em_lahf
),
4634 I2bv(DstAcc
| SrcMem
| Mov
| MemAbs
, em_mov
),
4635 I2bv(DstMem
| SrcAcc
| Mov
| MemAbs
| PageTable
, em_mov
),
4636 I2bv(SrcSI
| DstDI
| Mov
| String
| TwoMemOp
, em_mov
),
4637 F2bv(SrcSI
| DstDI
| String
| NoWrite
| TwoMemOp
, em_cmp_r
),
4639 F2bv(DstAcc
| SrcImm
| NoWrite
, em_test
),
4640 I2bv(SrcAcc
| DstDI
| Mov
| String
, em_mov
),
4641 I2bv(SrcSI
| DstAcc
| Mov
| String
, em_mov
),
4642 F2bv(SrcAcc
| DstDI
| String
| NoWrite
, em_cmp_r
),
4644 X8(I(ByteOp
| DstReg
| SrcImm
| Mov
, em_mov
)),
4646 X8(I(DstReg
| SrcImm64
| Mov
, em_mov
)),
4648 G(ByteOp
| Src2ImmByte
, group2
), G(Src2ImmByte
, group2
),
4649 I(ImplicitOps
| NearBranch
| SrcImmU16
| IsBranch
, em_ret_near_imm
),
4650 I(ImplicitOps
| NearBranch
| IsBranch
, em_ret
),
4651 I(DstReg
| SrcMemFAddr
| ModRM
| No64
| Src2ES
, em_lseg
),
4652 I(DstReg
| SrcMemFAddr
| ModRM
| No64
| Src2DS
, em_lseg
),
4653 G(ByteOp
, group11
), G(0, group11
),
4655 I(Stack
| SrcImmU16
| Src2ImmByte
| IsBranch
, em_enter
),
4656 I(Stack
| IsBranch
, em_leave
),
4657 I(ImplicitOps
| SrcImmU16
| IsBranch
, em_ret_far_imm
),
4658 I(ImplicitOps
| IsBranch
, em_ret_far
),
4659 D(ImplicitOps
| IsBranch
), DI(SrcImmByte
| IsBranch
, intn
),
4660 D(ImplicitOps
| No64
| IsBranch
),
4661 II(ImplicitOps
| IsBranch
, em_iret
, iret
),
4663 G(Src2One
| ByteOp
, group2
), G(Src2One
, group2
),
4664 G(Src2CL
| ByteOp
, group2
), G(Src2CL
, group2
),
4665 I(DstAcc
| SrcImmUByte
| No64
, em_aam
),
4666 I(DstAcc
| SrcImmUByte
| No64
, em_aad
),
4667 F(DstAcc
| ByteOp
| No64
, em_salc
),
4668 I(DstAcc
| SrcXLat
| ByteOp
, em_mov
),
4670 N
, E(0, &escape_d9
), N
, E(0, &escape_db
), N
, E(0, &escape_dd
), N
, N
,
4672 X3(I(SrcImmByte
| NearBranch
| IsBranch
, em_loop
)),
4673 I(SrcImmByte
| NearBranch
| IsBranch
, em_jcxz
),
4674 I2bvIP(SrcImmUByte
| DstAcc
, em_in
, in
, check_perm_in
),
4675 I2bvIP(SrcAcc
| DstImmUByte
, em_out
, out
, check_perm_out
),
4677 I(SrcImm
| NearBranch
| IsBranch
, em_call
),
4678 D(SrcImm
| ImplicitOps
| NearBranch
| IsBranch
),
4679 I(SrcImmFAddr
| No64
| IsBranch
, em_jmp_far
),
4680 D(SrcImmByte
| ImplicitOps
| NearBranch
| IsBranch
),
4681 I2bvIP(SrcDX
| DstAcc
, em_in
, in
, check_perm_in
),
4682 I2bvIP(SrcAcc
| DstDX
, em_out
, out
, check_perm_out
),
4684 N
, DI(ImplicitOps
, icebp
), N
, N
,
4685 DI(ImplicitOps
| Priv
, hlt
), D(ImplicitOps
),
4686 G(ByteOp
, group3
), G(0, group3
),
4688 D(ImplicitOps
), D(ImplicitOps
),
4689 I(ImplicitOps
, em_cli
), I(ImplicitOps
, em_sti
),
4690 D(ImplicitOps
), D(ImplicitOps
), G(0, group4
), G(0, group5
),
4693 static const struct opcode twobyte_table
[256] = {
4695 G(0, group6
), GD(0, &group7
), N
, N
,
4696 N
, I(ImplicitOps
| EmulateOnUD
| IsBranch
, em_syscall
),
4697 II(ImplicitOps
| Priv
, em_clts
, clts
), N
,
4698 DI(ImplicitOps
| Priv
, invd
), DI(ImplicitOps
| Priv
, wbinvd
), N
, N
,
4699 N
, D(ImplicitOps
| ModRM
| SrcMem
| NoAccess
), N
, N
,
4701 GP(ModRM
| DstReg
| SrcMem
| Mov
| Sse
, &pfx_0f_10_0f_11
),
4702 GP(ModRM
| DstMem
| SrcReg
| Mov
| Sse
, &pfx_0f_10_0f_11
),
4704 D(ImplicitOps
| ModRM
| SrcMem
| NoAccess
), /* 4 * prefetch + 4 * reserved NOP */
4705 D(ImplicitOps
| ModRM
| SrcMem
| NoAccess
), N
, N
,
4706 D(ImplicitOps
| ModRM
| SrcMem
| NoAccess
), /* 8 * reserved NOP */
4707 D(ImplicitOps
| ModRM
| SrcMem
| NoAccess
), /* 8 * reserved NOP */
4708 D(ImplicitOps
| ModRM
| SrcMem
| NoAccess
), /* 8 * reserved NOP */
4709 D(ImplicitOps
| ModRM
| SrcMem
| NoAccess
), /* NOP + 7 * reserved NOP */
4711 DIP(ModRM
| DstMem
| Priv
| Op3264
| NoMod
, cr_read
, check_cr_access
),
4712 DIP(ModRM
| DstMem
| Priv
| Op3264
| NoMod
, dr_read
, check_dr_read
),
4713 IIP(ModRM
| SrcMem
| Priv
| Op3264
| NoMod
, em_cr_write
, cr_write
,
4715 IIP(ModRM
| SrcMem
| Priv
| Op3264
| NoMod
, em_dr_write
, dr_write
,
4718 GP(ModRM
| DstReg
| SrcMem
| Mov
| Sse
, &pfx_0f_28_0f_29
),
4719 GP(ModRM
| DstMem
| SrcReg
| Mov
| Sse
, &pfx_0f_28_0f_29
),
4720 N
, GP(ModRM
| DstMem
| SrcReg
| Mov
| Sse
, &pfx_0f_2b
),
4723 II(ImplicitOps
| Priv
, em_wrmsr
, wrmsr
),
4724 IIP(ImplicitOps
, em_rdtsc
, rdtsc
, check_rdtsc
),
4725 II(ImplicitOps
| Priv
, em_rdmsr
, rdmsr
),
4726 IIP(ImplicitOps
, em_rdpmc
, rdpmc
, check_rdpmc
),
4727 I(ImplicitOps
| EmulateOnUD
| IsBranch
, em_sysenter
),
4728 I(ImplicitOps
| Priv
| EmulateOnUD
| IsBranch
, em_sysexit
),
4730 N
, N
, N
, N
, N
, N
, N
, N
,
4732 X16(D(DstReg
| SrcMem
| ModRM
)),
4734 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,
4739 N
, N
, N
, GP(SrcMem
| DstReg
| ModRM
| Mov
, &pfx_0f_6f_0f_7f
),
4744 N
, N
, N
, GP(SrcReg
| DstMem
| ModRM
| Mov
, &pfx_0f_6f_0f_7f
),
4746 X16(D(SrcImm
| NearBranch
| IsBranch
)),
4748 X16(D(ByteOp
| DstMem
| SrcNone
| ModRM
| Mov
)),
4750 I(Stack
| Src2FS
, em_push_sreg
), I(Stack
| Src2FS
, em_pop_sreg
),
4751 II(ImplicitOps
, em_cpuid
, cpuid
),
4752 F(DstMem
| SrcReg
| ModRM
| BitOp
| NoWrite
, em_bt
),
4753 F(DstMem
| SrcReg
| Src2ImmByte
| ModRM
, em_shld
),
4754 F(DstMem
| SrcReg
| Src2CL
| ModRM
, em_shld
), N
, N
,
4756 I(Stack
| Src2GS
, em_push_sreg
), I(Stack
| Src2GS
, em_pop_sreg
),
4757 II(EmulateOnUD
| ImplicitOps
, em_rsm
, rsm
),
4758 F(DstMem
| SrcReg
| ModRM
| BitOp
| Lock
| PageTable
, em_bts
),
4759 F(DstMem
| SrcReg
| Src2ImmByte
| ModRM
, em_shrd
),
4760 F(DstMem
| SrcReg
| Src2CL
| ModRM
, em_shrd
),
4761 GD(0, &group15
), F(DstReg
| SrcMem
| ModRM
, em_imul
),
4763 I2bv(DstMem
| SrcReg
| ModRM
| Lock
| PageTable
| SrcWrite
, em_cmpxchg
),
4764 I(DstReg
| SrcMemFAddr
| ModRM
| Src2SS
, em_lseg
),
4765 F(DstMem
| SrcReg
| ModRM
| BitOp
| Lock
, em_btr
),
4766 I(DstReg
| SrcMemFAddr
| ModRM
| Src2FS
, em_lseg
),
4767 I(DstReg
| SrcMemFAddr
| ModRM
| Src2GS
, em_lseg
),
4768 D(DstReg
| SrcMem8
| ModRM
| Mov
), D(DstReg
| SrcMem16
| ModRM
| Mov
),
4772 F(DstMem
| SrcReg
| ModRM
| BitOp
| Lock
| PageTable
, em_btc
),
4773 I(DstReg
| SrcMem
| ModRM
, em_bsf_c
),
4774 I(DstReg
| SrcMem
| ModRM
, em_bsr_c
),
4775 D(DstReg
| SrcMem8
| ModRM
| Mov
), D(DstReg
| SrcMem16
| ModRM
| Mov
),
4777 F2bv(DstMem
| SrcReg
| ModRM
| SrcWrite
| Lock
, em_xadd
),
4778 N
, ID(0, &instr_dual_0f_c3
),
4779 N
, N
, N
, GD(0, &group9
),
4781 X8(I(DstReg
, em_bswap
)),
4783 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,
4785 N
, N
, N
, N
, N
, N
, N
, GP(SrcReg
| DstMem
| ModRM
| Mov
, &pfx_0f_e7
),
4786 N
, N
, N
, N
, N
, N
, N
, N
,
4788 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
4791 static const struct instr_dual instr_dual_0f_38_f0
= {
4792 I(DstReg
| SrcMem
| Mov
, em_movbe
), N
4795 static const struct instr_dual instr_dual_0f_38_f1
= {
4796 I(DstMem
| SrcReg
| Mov
, em_movbe
), N
4799 static const struct gprefix three_byte_0f_38_f0
= {
4800 ID(0, &instr_dual_0f_38_f0
), N
, N
, N
4803 static const struct gprefix three_byte_0f_38_f1
= {
4804 ID(0, &instr_dual_0f_38_f1
), N
, N
, N
4808 * Insns below are selected by the prefix which indexed by the third opcode
4811 static const struct opcode opcode_map_0f_38
[256] = {
4813 X16(N
), X16(N
), X16(N
), X16(N
), X16(N
), X16(N
), X16(N
), X16(N
),
4815 X16(N
), X16(N
), X16(N
), X16(N
), X16(N
), X16(N
), X16(N
),
4817 GP(EmulateOnUD
| ModRM
, &three_byte_0f_38_f0
),
4818 GP(EmulateOnUD
| ModRM
, &three_byte_0f_38_f1
),
4839 static unsigned imm_size(struct x86_emulate_ctxt
*ctxt
)
4843 size
= (ctxt
->d
& ByteOp
) ? 1 : ctxt
->op_bytes
;
4849 static int decode_imm(struct x86_emulate_ctxt
*ctxt
, struct operand
*op
,
4850 unsigned size
, bool sign_extension
)
4852 int rc
= X86EMUL_CONTINUE
;
4856 op
->addr
.mem
.ea
= ctxt
->_eip
;
4857 /* NB. Immediates are sign-extended as necessary. */
4858 switch (op
->bytes
) {
4860 op
->val
= insn_fetch(s8
, ctxt
);
4863 op
->val
= insn_fetch(s16
, ctxt
);
4866 op
->val
= insn_fetch(s32
, ctxt
);
4869 op
->val
= insn_fetch(s64
, ctxt
);
4872 if (!sign_extension
) {
4873 switch (op
->bytes
) {
4881 op
->val
&= 0xffffffff;
4889 static int decode_operand(struct x86_emulate_ctxt
*ctxt
, struct operand
*op
,
4892 int rc
= X86EMUL_CONTINUE
;
4896 decode_register_operand(ctxt
, op
);
4899 rc
= decode_imm(ctxt
, op
, 1, false);
4902 ctxt
->memop
.bytes
= (ctxt
->d
& ByteOp
) ? 1 : ctxt
->op_bytes
;
4906 if (ctxt
->d
& BitOp
)
4907 fetch_bit_operand(ctxt
);
4908 op
->orig_val
= op
->val
;
4911 ctxt
->memop
.bytes
= (ctxt
->op_bytes
== 8) ? 16 : 8;
4915 op
->bytes
= (ctxt
->d
& ByteOp
) ? 1 : ctxt
->op_bytes
;
4916 op
->addr
.reg
= reg_rmw(ctxt
, VCPU_REGS_RAX
);
4917 fetch_register_operand(op
);
4918 op
->orig_val
= op
->val
;
4922 op
->bytes
= (ctxt
->d
& ByteOp
) ? 2 : ctxt
->op_bytes
;
4923 op
->addr
.reg
= reg_rmw(ctxt
, VCPU_REGS_RAX
);
4924 fetch_register_operand(op
);
4925 op
->orig_val
= op
->val
;
4928 if (ctxt
->d
& ByteOp
) {
4933 op
->bytes
= ctxt
->op_bytes
;
4934 op
->addr
.reg
= reg_rmw(ctxt
, VCPU_REGS_RDX
);
4935 fetch_register_operand(op
);
4936 op
->orig_val
= op
->val
;
4940 op
->bytes
= (ctxt
->d
& ByteOp
) ? 1 : ctxt
->op_bytes
;
4942 register_address(ctxt
, VCPU_REGS_RDI
);
4943 op
->addr
.mem
.seg
= VCPU_SREG_ES
;
4950 op
->addr
.reg
= reg_rmw(ctxt
, VCPU_REGS_RDX
);
4951 fetch_register_operand(op
);
4956 op
->val
= reg_read(ctxt
, VCPU_REGS_RCX
) & 0xff;
4959 rc
= decode_imm(ctxt
, op
, 1, true);
4967 rc
= decode_imm(ctxt
, op
, imm_size(ctxt
), true);
4970 rc
= decode_imm(ctxt
, op
, ctxt
->op_bytes
, true);
4973 ctxt
->memop
.bytes
= 1;
4974 if (ctxt
->memop
.type
== OP_REG
) {
4975 ctxt
->memop
.addr
.reg
= decode_register(ctxt
,
4976 ctxt
->modrm_rm
, true);
4977 fetch_register_operand(&ctxt
->memop
);
4981 ctxt
->memop
.bytes
= 2;
4984 ctxt
->memop
.bytes
= 4;
4987 rc
= decode_imm(ctxt
, op
, 2, false);
4990 rc
= decode_imm(ctxt
, op
, imm_size(ctxt
), false);
4994 op
->bytes
= (ctxt
->d
& ByteOp
) ? 1 : ctxt
->op_bytes
;
4996 register_address(ctxt
, VCPU_REGS_RSI
);
4997 op
->addr
.mem
.seg
= ctxt
->seg_override
;
5003 op
->bytes
= (ctxt
->d
& ByteOp
) ? 1 : ctxt
->op_bytes
;
5006 reg_read(ctxt
, VCPU_REGS_RBX
) +
5007 (reg_read(ctxt
, VCPU_REGS_RAX
) & 0xff));
5008 op
->addr
.mem
.seg
= ctxt
->seg_override
;
5013 op
->addr
.mem
.ea
= ctxt
->_eip
;
5014 op
->bytes
= ctxt
->op_bytes
+ 2;
5015 insn_fetch_arr(op
->valptr
, op
->bytes
, ctxt
);
5018 ctxt
->memop
.bytes
= ctxt
->op_bytes
+ 2;
5022 op
->val
= VCPU_SREG_ES
;
5026 op
->val
= VCPU_SREG_CS
;
5030 op
->val
= VCPU_SREG_SS
;
5034 op
->val
= VCPU_SREG_DS
;
5038 op
->val
= VCPU_SREG_FS
;
5042 op
->val
= VCPU_SREG_GS
;
5045 /* Special instructions do their own operand decoding. */
5047 op
->type
= OP_NONE
; /* Disable writeback. */
5055 int x86_decode_insn(struct x86_emulate_ctxt
*ctxt
, void *insn
, int insn_len
, int emulation_type
)
5057 int rc
= X86EMUL_CONTINUE
;
5058 int mode
= ctxt
->mode
;
5059 int def_op_bytes
, def_ad_bytes
, goffset
, simd_prefix
;
5060 bool op_prefix
= false;
5061 bool has_seg_override
= false;
5062 struct opcode opcode
;
5064 struct desc_struct desc
;
5066 ctxt
->memop
.type
= OP_NONE
;
5067 ctxt
->memopp
= NULL
;
5068 ctxt
->_eip
= ctxt
->eip
;
5069 ctxt
->fetch
.ptr
= ctxt
->fetch
.data
;
5070 ctxt
->fetch
.end
= ctxt
->fetch
.data
+ insn_len
;
5071 ctxt
->opcode_len
= 1;
5072 ctxt
->intercept
= x86_intercept_none
;
5074 memcpy(ctxt
->fetch
.data
, insn
, insn_len
);
5076 rc
= __do_insn_fetch_bytes(ctxt
, 1);
5077 if (rc
!= X86EMUL_CONTINUE
)
5082 case X86EMUL_MODE_REAL
:
5083 case X86EMUL_MODE_VM86
:
5084 def_op_bytes
= def_ad_bytes
= 2;
5085 ctxt
->ops
->get_segment(ctxt
, &dummy
, &desc
, NULL
, VCPU_SREG_CS
);
5087 def_op_bytes
= def_ad_bytes
= 4;
5089 case X86EMUL_MODE_PROT16
:
5090 def_op_bytes
= def_ad_bytes
= 2;
5092 case X86EMUL_MODE_PROT32
:
5093 def_op_bytes
= def_ad_bytes
= 4;
5095 #ifdef CONFIG_X86_64
5096 case X86EMUL_MODE_PROT64
:
5102 return EMULATION_FAILED
;
5105 ctxt
->op_bytes
= def_op_bytes
;
5106 ctxt
->ad_bytes
= def_ad_bytes
;
5108 /* Legacy prefixes. */
5110 switch (ctxt
->b
= insn_fetch(u8
, ctxt
)) {
5111 case 0x66: /* operand-size override */
5113 /* switch between 2/4 bytes */
5114 ctxt
->op_bytes
= def_op_bytes
^ 6;
5116 case 0x67: /* address-size override */
5117 if (mode
== X86EMUL_MODE_PROT64
)
5118 /* switch between 4/8 bytes */
5119 ctxt
->ad_bytes
= def_ad_bytes
^ 12;
5121 /* switch between 2/4 bytes */
5122 ctxt
->ad_bytes
= def_ad_bytes
^ 6;
5124 case 0x26: /* ES override */
5125 has_seg_override
= true;
5126 ctxt
->seg_override
= VCPU_SREG_ES
;
5128 case 0x2e: /* CS override */
5129 has_seg_override
= true;
5130 ctxt
->seg_override
= VCPU_SREG_CS
;
5132 case 0x36: /* SS override */
5133 has_seg_override
= true;
5134 ctxt
->seg_override
= VCPU_SREG_SS
;
5136 case 0x3e: /* DS override */
5137 has_seg_override
= true;
5138 ctxt
->seg_override
= VCPU_SREG_DS
;
5140 case 0x64: /* FS override */
5141 has_seg_override
= true;
5142 ctxt
->seg_override
= VCPU_SREG_FS
;
5144 case 0x65: /* GS override */
5145 has_seg_override
= true;
5146 ctxt
->seg_override
= VCPU_SREG_GS
;
5148 case 0x40 ... 0x4f: /* REX */
5149 if (mode
!= X86EMUL_MODE_PROT64
)
5151 ctxt
->rex_prefix
= ctxt
->b
;
5153 case 0xf0: /* LOCK */
5154 ctxt
->lock_prefix
= 1;
5156 case 0xf2: /* REPNE/REPNZ */
5157 case 0xf3: /* REP/REPE/REPZ */
5158 ctxt
->rep_prefix
= ctxt
->b
;
5164 /* Any legacy prefix after a REX prefix nullifies its effect. */
5166 ctxt
->rex_prefix
= 0;
5172 if (ctxt
->rex_prefix
& 8)
5173 ctxt
->op_bytes
= 8; /* REX.W */
5175 /* Opcode byte(s). */
5176 opcode
= opcode_table
[ctxt
->b
];
5177 /* Two-byte opcode? */
5178 if (ctxt
->b
== 0x0f) {
5179 ctxt
->opcode_len
= 2;
5180 ctxt
->b
= insn_fetch(u8
, ctxt
);
5181 opcode
= twobyte_table
[ctxt
->b
];
5183 /* 0F_38 opcode map */
5184 if (ctxt
->b
== 0x38) {
5185 ctxt
->opcode_len
= 3;
5186 ctxt
->b
= insn_fetch(u8
, ctxt
);
5187 opcode
= opcode_map_0f_38
[ctxt
->b
];
5190 ctxt
->d
= opcode
.flags
;
5192 if (ctxt
->d
& ModRM
)
5193 ctxt
->modrm
= insn_fetch(u8
, ctxt
);
5195 /* vex-prefix instructions are not implemented */
5196 if (ctxt
->opcode_len
== 1 && (ctxt
->b
== 0xc5 || ctxt
->b
== 0xc4) &&
5197 (mode
== X86EMUL_MODE_PROT64
|| (ctxt
->modrm
& 0xc0) == 0xc0)) {
5201 while (ctxt
->d
& GroupMask
) {
5202 switch (ctxt
->d
& GroupMask
) {
5204 goffset
= (ctxt
->modrm
>> 3) & 7;
5205 opcode
= opcode
.u
.group
[goffset
];
5208 goffset
= (ctxt
->modrm
>> 3) & 7;
5209 if ((ctxt
->modrm
>> 6) == 3)
5210 opcode
= opcode
.u
.gdual
->mod3
[goffset
];
5212 opcode
= opcode
.u
.gdual
->mod012
[goffset
];
5215 goffset
= ctxt
->modrm
& 7;
5216 opcode
= opcode
.u
.group
[goffset
];
5219 if (ctxt
->rep_prefix
&& op_prefix
)
5220 return EMULATION_FAILED
;
5221 simd_prefix
= op_prefix
? 0x66 : ctxt
->rep_prefix
;
5222 switch (simd_prefix
) {
5223 case 0x00: opcode
= opcode
.u
.gprefix
->pfx_no
; break;
5224 case 0x66: opcode
= opcode
.u
.gprefix
->pfx_66
; break;
5225 case 0xf2: opcode
= opcode
.u
.gprefix
->pfx_f2
; break;
5226 case 0xf3: opcode
= opcode
.u
.gprefix
->pfx_f3
; break;
5230 if (ctxt
->modrm
> 0xbf) {
5231 size_t size
= ARRAY_SIZE(opcode
.u
.esc
->high
);
5232 u32 index
= array_index_nospec(
5233 ctxt
->modrm
- 0xc0, size
);
5235 opcode
= opcode
.u
.esc
->high
[index
];
5237 opcode
= opcode
.u
.esc
->op
[(ctxt
->modrm
>> 3) & 7];
5241 if ((ctxt
->modrm
>> 6) == 3)
5242 opcode
= opcode
.u
.idual
->mod3
;
5244 opcode
= opcode
.u
.idual
->mod012
;
5247 if (ctxt
->mode
== X86EMUL_MODE_PROT64
)
5248 opcode
= opcode
.u
.mdual
->mode64
;
5250 opcode
= opcode
.u
.mdual
->mode32
;
5253 return EMULATION_FAILED
;
5256 ctxt
->d
&= ~(u64
)GroupMask
;
5257 ctxt
->d
|= opcode
.flags
;
5260 ctxt
->is_branch
= opcode
.flags
& IsBranch
;
5264 return EMULATION_FAILED
;
5266 ctxt
->execute
= opcode
.u
.execute
;
5268 if (unlikely(emulation_type
& EMULTYPE_TRAP_UD
) &&
5269 likely(!(ctxt
->d
& EmulateOnUD
)))
5270 return EMULATION_FAILED
;
5272 if (unlikely(ctxt
->d
&
5273 (NotImpl
|Stack
|Op3264
|Sse
|Mmx
|Intercept
|CheckPerm
|NearBranch
|
5276 * These are copied unconditionally here, and checked unconditionally
5277 * in x86_emulate_insn.
5279 ctxt
->check_perm
= opcode
.check_perm
;
5280 ctxt
->intercept
= opcode
.intercept
;
5282 if (ctxt
->d
& NotImpl
)
5283 return EMULATION_FAILED
;
5285 if (mode
== X86EMUL_MODE_PROT64
) {
5286 if (ctxt
->op_bytes
== 4 && (ctxt
->d
& Stack
))
5288 else if (ctxt
->d
& NearBranch
)
5292 if (ctxt
->d
& Op3264
) {
5293 if (mode
== X86EMUL_MODE_PROT64
)
5299 if ((ctxt
->d
& No16
) && ctxt
->op_bytes
== 2)
5303 ctxt
->op_bytes
= 16;
5304 else if (ctxt
->d
& Mmx
)
5308 /* ModRM and SIB bytes. */
5309 if (ctxt
->d
& ModRM
) {
5310 rc
= decode_modrm(ctxt
, &ctxt
->memop
);
5311 if (!has_seg_override
) {
5312 has_seg_override
= true;
5313 ctxt
->seg_override
= ctxt
->modrm_seg
;
5315 } else if (ctxt
->d
& MemAbs
)
5316 rc
= decode_abs(ctxt
, &ctxt
->memop
);
5317 if (rc
!= X86EMUL_CONTINUE
)
5320 if (!has_seg_override
)
5321 ctxt
->seg_override
= VCPU_SREG_DS
;
5323 ctxt
->memop
.addr
.mem
.seg
= ctxt
->seg_override
;
5326 * Decode and fetch the source operand: register, memory
5329 rc
= decode_operand(ctxt
, &ctxt
->src
, (ctxt
->d
>> SrcShift
) & OpMask
);
5330 if (rc
!= X86EMUL_CONTINUE
)
5334 * Decode and fetch the second source operand: register, memory
5337 rc
= decode_operand(ctxt
, &ctxt
->src2
, (ctxt
->d
>> Src2Shift
) & OpMask
);
5338 if (rc
!= X86EMUL_CONTINUE
)
5341 /* Decode and fetch the destination operand: register or memory. */
5342 rc
= decode_operand(ctxt
, &ctxt
->dst
, (ctxt
->d
>> DstShift
) & OpMask
);
5344 if (ctxt
->rip_relative
&& likely(ctxt
->memopp
))
5345 ctxt
->memopp
->addr
.mem
.ea
= address_mask(ctxt
,
5346 ctxt
->memopp
->addr
.mem
.ea
+ ctxt
->_eip
);
5349 if (rc
== X86EMUL_PROPAGATE_FAULT
)
5350 ctxt
->have_exception
= true;
5351 return (rc
!= X86EMUL_CONTINUE
) ? EMULATION_FAILED
: EMULATION_OK
;
5354 bool x86_page_table_writing_insn(struct x86_emulate_ctxt
*ctxt
)
5356 return ctxt
->d
& PageTable
;
5359 static bool string_insn_completed(struct x86_emulate_ctxt
*ctxt
)
5361 /* The second termination condition only applies for REPE
5362 * and REPNE. Test if the repeat string operation prefix is
5363 * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
5364 * corresponding termination condition according to:
5365 * - if REPE/REPZ and ZF = 0 then done
5366 * - if REPNE/REPNZ and ZF = 1 then done
5368 if (((ctxt
->b
== 0xa6) || (ctxt
->b
== 0xa7) ||
5369 (ctxt
->b
== 0xae) || (ctxt
->b
== 0xaf))
5370 && (((ctxt
->rep_prefix
== REPE_PREFIX
) &&
5371 ((ctxt
->eflags
& X86_EFLAGS_ZF
) == 0))
5372 || ((ctxt
->rep_prefix
== REPNE_PREFIX
) &&
5373 ((ctxt
->eflags
& X86_EFLAGS_ZF
) == X86_EFLAGS_ZF
))))
5379 static int flush_pending_x87_faults(struct x86_emulate_ctxt
*ctxt
)
5384 rc
= asm_safe("fwait");
5387 if (unlikely(rc
!= X86EMUL_CONTINUE
))
5388 return emulate_exception(ctxt
, MF_VECTOR
, 0, false);
5390 return X86EMUL_CONTINUE
;
5393 static void fetch_possible_mmx_operand(struct operand
*op
)
5395 if (op
->type
== OP_MM
)
5396 kvm_read_mmx_reg(op
->addr
.mm
, &op
->mm_val
);
5399 static int fastop(struct x86_emulate_ctxt
*ctxt
, fastop_t fop
)
5401 ulong flags
= (ctxt
->eflags
& EFLAGS_MASK
) | X86_EFLAGS_IF
;
5403 if (!(ctxt
->d
& ByteOp
))
5404 fop
+= __ffs(ctxt
->dst
.bytes
) * FASTOP_SIZE
;
5406 asm("push %[flags]; popf; " CALL_NOSPEC
" ; pushf; pop %[flags]\n"
5407 : "+a"(ctxt
->dst
.val
), "+d"(ctxt
->src
.val
), [flags
]"+D"(flags
),
5408 [thunk_target
]"+S"(fop
), ASM_CALL_CONSTRAINT
5409 : "c"(ctxt
->src2
.val
));
5411 ctxt
->eflags
= (ctxt
->eflags
& ~EFLAGS_MASK
) | (flags
& EFLAGS_MASK
);
5412 if (!fop
) /* exception is returned in fop variable */
5413 return emulate_de(ctxt
);
5414 return X86EMUL_CONTINUE
;
5417 void init_decode_cache(struct x86_emulate_ctxt
*ctxt
)
5419 /* Clear fields that are set conditionally but read without a guard. */
5420 ctxt
->rip_relative
= false;
5421 ctxt
->rex_prefix
= 0;
5422 ctxt
->lock_prefix
= 0;
5423 ctxt
->rep_prefix
= 0;
5424 ctxt
->regs_valid
= 0;
5425 ctxt
->regs_dirty
= 0;
5427 ctxt
->io_read
.pos
= 0;
5428 ctxt
->io_read
.end
= 0;
5429 ctxt
->mem_read
.end
= 0;
5432 int x86_emulate_insn(struct x86_emulate_ctxt
*ctxt
)
5434 const struct x86_emulate_ops
*ops
= ctxt
->ops
;
5435 int rc
= X86EMUL_CONTINUE
;
5436 int saved_dst_type
= ctxt
->dst
.type
;
5437 unsigned emul_flags
;
5439 ctxt
->mem_read
.pos
= 0;
5441 /* LOCK prefix is allowed only with some instructions */
5442 if (ctxt
->lock_prefix
&& (!(ctxt
->d
& Lock
) || ctxt
->dst
.type
!= OP_MEM
)) {
5443 rc
= emulate_ud(ctxt
);
5447 if ((ctxt
->d
& SrcMask
) == SrcMemFAddr
&& ctxt
->src
.type
!= OP_MEM
) {
5448 rc
= emulate_ud(ctxt
);
5452 emul_flags
= ctxt
->ops
->get_hflags(ctxt
);
5453 if (unlikely(ctxt
->d
&
5454 (No64
|Undefined
|Sse
|Mmx
|Intercept
|CheckPerm
|Priv
|Prot
|String
))) {
5455 if ((ctxt
->mode
== X86EMUL_MODE_PROT64
&& (ctxt
->d
& No64
)) ||
5456 (ctxt
->d
& Undefined
)) {
5457 rc
= emulate_ud(ctxt
);
5461 if (((ctxt
->d
& (Sse
|Mmx
)) && ((ops
->get_cr(ctxt
, 0) & X86_CR0_EM
)))
5462 || ((ctxt
->d
& Sse
) && !(ops
->get_cr(ctxt
, 4) & X86_CR4_OSFXSR
))) {
5463 rc
= emulate_ud(ctxt
);
5467 if ((ctxt
->d
& (Sse
|Mmx
)) && (ops
->get_cr(ctxt
, 0) & X86_CR0_TS
)) {
5468 rc
= emulate_nm(ctxt
);
5472 if (ctxt
->d
& Mmx
) {
5473 rc
= flush_pending_x87_faults(ctxt
);
5474 if (rc
!= X86EMUL_CONTINUE
)
5477 * Now that we know the fpu is exception safe, we can fetch
5480 fetch_possible_mmx_operand(&ctxt
->src
);
5481 fetch_possible_mmx_operand(&ctxt
->src2
);
5482 if (!(ctxt
->d
& Mov
))
5483 fetch_possible_mmx_operand(&ctxt
->dst
);
5486 if (unlikely(emul_flags
& X86EMUL_GUEST_MASK
) && ctxt
->intercept
) {
5487 rc
= emulator_check_intercept(ctxt
, ctxt
->intercept
,
5488 X86_ICPT_PRE_EXCEPT
);
5489 if (rc
!= X86EMUL_CONTINUE
)
5493 /* Instruction can only be executed in protected mode */
5494 if ((ctxt
->d
& Prot
) && ctxt
->mode
< X86EMUL_MODE_PROT16
) {
5495 rc
= emulate_ud(ctxt
);
5499 /* Privileged instruction can be executed only in CPL=0 */
5500 if ((ctxt
->d
& Priv
) && ops
->cpl(ctxt
)) {
5501 if (ctxt
->d
& PrivUD
)
5502 rc
= emulate_ud(ctxt
);
5504 rc
= emulate_gp(ctxt
, 0);
5508 /* Do instruction specific permission checks */
5509 if (ctxt
->d
& CheckPerm
) {
5510 rc
= ctxt
->check_perm(ctxt
);
5511 if (rc
!= X86EMUL_CONTINUE
)
5515 if (unlikely(emul_flags
& X86EMUL_GUEST_MASK
) && (ctxt
->d
& Intercept
)) {
5516 rc
= emulator_check_intercept(ctxt
, ctxt
->intercept
,
5517 X86_ICPT_POST_EXCEPT
);
5518 if (rc
!= X86EMUL_CONTINUE
)
5522 if (ctxt
->rep_prefix
&& (ctxt
->d
& String
)) {
5523 /* All REP prefixes have the same first termination condition */
5524 if (address_mask(ctxt
, reg_read(ctxt
, VCPU_REGS_RCX
)) == 0) {
5525 string_registers_quirk(ctxt
);
5526 ctxt
->eip
= ctxt
->_eip
;
5527 ctxt
->eflags
&= ~X86_EFLAGS_RF
;
5533 if ((ctxt
->src
.type
== OP_MEM
) && !(ctxt
->d
& NoAccess
)) {
5534 rc
= segmented_read(ctxt
, ctxt
->src
.addr
.mem
,
5535 ctxt
->src
.valptr
, ctxt
->src
.bytes
);
5536 if (rc
!= X86EMUL_CONTINUE
)
5538 ctxt
->src
.orig_val64
= ctxt
->src
.val64
;
5541 if (ctxt
->src2
.type
== OP_MEM
) {
5542 rc
= segmented_read(ctxt
, ctxt
->src2
.addr
.mem
,
5543 &ctxt
->src2
.val
, ctxt
->src2
.bytes
);
5544 if (rc
!= X86EMUL_CONTINUE
)
5548 if ((ctxt
->d
& DstMask
) == ImplicitOps
)
5552 if ((ctxt
->dst
.type
== OP_MEM
) && !(ctxt
->d
& Mov
)) {
5553 /* optimisation - avoid slow emulated read if Mov */
5554 rc
= segmented_read(ctxt
, ctxt
->dst
.addr
.mem
,
5555 &ctxt
->dst
.val
, ctxt
->dst
.bytes
);
5556 if (rc
!= X86EMUL_CONTINUE
) {
5557 if (!(ctxt
->d
& NoWrite
) &&
5558 rc
== X86EMUL_PROPAGATE_FAULT
&&
5559 ctxt
->exception
.vector
== PF_VECTOR
)
5560 ctxt
->exception
.error_code
|= PFERR_WRITE_MASK
;
5564 /* Copy full 64-bit value for CMPXCHG8B. */
5565 ctxt
->dst
.orig_val64
= ctxt
->dst
.val64
;
5569 if (unlikely(emul_flags
& X86EMUL_GUEST_MASK
) && (ctxt
->d
& Intercept
)) {
5570 rc
= emulator_check_intercept(ctxt
, ctxt
->intercept
,
5571 X86_ICPT_POST_MEMACCESS
);
5572 if (rc
!= X86EMUL_CONTINUE
)
5576 if (ctxt
->rep_prefix
&& (ctxt
->d
& String
))
5577 ctxt
->eflags
|= X86_EFLAGS_RF
;
5579 ctxt
->eflags
&= ~X86_EFLAGS_RF
;
5581 if (ctxt
->execute
) {
5582 if (ctxt
->d
& Fastop
)
5583 rc
= fastop(ctxt
, ctxt
->fop
);
5585 rc
= ctxt
->execute(ctxt
);
5586 if (rc
!= X86EMUL_CONTINUE
)
5591 if (ctxt
->opcode_len
== 2)
5593 else if (ctxt
->opcode_len
== 3)
5594 goto threebyte_insn
;
5597 case 0x70 ... 0x7f: /* jcc (short) */
5598 if (test_cc(ctxt
->b
, ctxt
->eflags
))
5599 rc
= jmp_rel(ctxt
, ctxt
->src
.val
);
5601 case 0x8d: /* lea r16/r32, m */
5602 ctxt
->dst
.val
= ctxt
->src
.addr
.mem
.ea
;
5604 case 0x90 ... 0x97: /* nop / xchg reg, rax */
5605 if (ctxt
->dst
.addr
.reg
== reg_rmw(ctxt
, VCPU_REGS_RAX
))
5606 ctxt
->dst
.type
= OP_NONE
;
5610 case 0x98: /* cbw/cwde/cdqe */
5611 switch (ctxt
->op_bytes
) {
5612 case 2: ctxt
->dst
.val
= (s8
)ctxt
->dst
.val
; break;
5613 case 4: ctxt
->dst
.val
= (s16
)ctxt
->dst
.val
; break;
5614 case 8: ctxt
->dst
.val
= (s32
)ctxt
->dst
.val
; break;
5617 case 0xcc: /* int3 */
5618 rc
= emulate_int(ctxt
, 3);
5620 case 0xcd: /* int n */
5621 rc
= emulate_int(ctxt
, ctxt
->src
.val
);
5623 case 0xce: /* into */
5624 if (ctxt
->eflags
& X86_EFLAGS_OF
)
5625 rc
= emulate_int(ctxt
, 4);
5627 case 0xe9: /* jmp rel */
5628 case 0xeb: /* jmp rel short */
5629 rc
= jmp_rel(ctxt
, ctxt
->src
.val
);
5630 ctxt
->dst
.type
= OP_NONE
; /* Disable writeback. */
5632 case 0xf4: /* hlt */
5633 ctxt
->ops
->halt(ctxt
);
5635 case 0xf5: /* cmc */
5636 /* complement carry flag from eflags reg */
5637 ctxt
->eflags
^= X86_EFLAGS_CF
;
5639 case 0xf8: /* clc */
5640 ctxt
->eflags
&= ~X86_EFLAGS_CF
;
5642 case 0xf9: /* stc */
5643 ctxt
->eflags
|= X86_EFLAGS_CF
;
5645 case 0xfc: /* cld */
5646 ctxt
->eflags
&= ~X86_EFLAGS_DF
;
5648 case 0xfd: /* std */
5649 ctxt
->eflags
|= X86_EFLAGS_DF
;
5652 goto cannot_emulate
;
5655 if (rc
!= X86EMUL_CONTINUE
)
5659 if (ctxt
->d
& SrcWrite
) {
5660 BUG_ON(ctxt
->src
.type
== OP_MEM
|| ctxt
->src
.type
== OP_MEM_STR
);
5661 rc
= writeback(ctxt
, &ctxt
->src
);
5662 if (rc
!= X86EMUL_CONTINUE
)
5665 if (!(ctxt
->d
& NoWrite
)) {
5666 rc
= writeback(ctxt
, &ctxt
->dst
);
5667 if (rc
!= X86EMUL_CONTINUE
)
5672 * restore dst type in case the decoding will be reused
5673 * (happens for string instruction )
5675 ctxt
->dst
.type
= saved_dst_type
;
5677 if ((ctxt
->d
& SrcMask
) == SrcSI
)
5678 string_addr_inc(ctxt
, VCPU_REGS_RSI
, &ctxt
->src
);
5680 if ((ctxt
->d
& DstMask
) == DstDI
)
5681 string_addr_inc(ctxt
, VCPU_REGS_RDI
, &ctxt
->dst
);
5683 if (ctxt
->rep_prefix
&& (ctxt
->d
& String
)) {
5685 struct read_cache
*r
= &ctxt
->io_read
;
5686 if ((ctxt
->d
& SrcMask
) == SrcSI
)
5687 count
= ctxt
->src
.count
;
5689 count
= ctxt
->dst
.count
;
5690 register_address_increment(ctxt
, VCPU_REGS_RCX
, -count
);
5692 if (!string_insn_completed(ctxt
)) {
5694 * Re-enter guest when pio read ahead buffer is empty
5695 * or, if it is not used, after each 1024 iteration.
5697 if ((r
->end
!= 0 || reg_read(ctxt
, VCPU_REGS_RCX
) & 0x3ff) &&
5698 (r
->end
== 0 || r
->end
!= r
->pos
)) {
5700 * Reset read cache. Usually happens before
5701 * decode, but since instruction is restarted
5702 * we have to do it here.
5704 ctxt
->mem_read
.end
= 0;
5705 writeback_registers(ctxt
);
5706 return EMULATION_RESTART
;
5708 goto done
; /* skip rip writeback */
5710 ctxt
->eflags
&= ~X86_EFLAGS_RF
;
5713 ctxt
->eip
= ctxt
->_eip
;
5714 if (ctxt
->mode
!= X86EMUL_MODE_PROT64
)
5715 ctxt
->eip
= (u32
)ctxt
->_eip
;
5718 if (rc
== X86EMUL_PROPAGATE_FAULT
) {
5719 WARN_ON(ctxt
->exception
.vector
> 0x1f);
5720 ctxt
->have_exception
= true;
5722 if (rc
== X86EMUL_INTERCEPTED
)
5723 return EMULATION_INTERCEPTED
;
5725 if (rc
== X86EMUL_CONTINUE
)
5726 writeback_registers(ctxt
);
5728 return (rc
== X86EMUL_UNHANDLEABLE
) ? EMULATION_FAILED
: EMULATION_OK
;
5732 case 0x09: /* wbinvd */
5733 (ctxt
->ops
->wbinvd
)(ctxt
);
5735 case 0x08: /* invd */
5736 case 0x0d: /* GrpP (prefetch) */
5737 case 0x18: /* Grp16 (prefetch/nop) */
5738 case 0x1f: /* nop */
5740 case 0x20: /* mov cr, reg */
5741 ctxt
->dst
.val
= ops
->get_cr(ctxt
, ctxt
->modrm_reg
);
5743 case 0x21: /* mov from dr to reg */
5744 ops
->get_dr(ctxt
, ctxt
->modrm_reg
, &ctxt
->dst
.val
);
5746 case 0x40 ... 0x4f: /* cmov */
5747 if (test_cc(ctxt
->b
, ctxt
->eflags
))
5748 ctxt
->dst
.val
= ctxt
->src
.val
;
5749 else if (ctxt
->op_bytes
!= 4)
5750 ctxt
->dst
.type
= OP_NONE
; /* no writeback */
5752 case 0x80 ... 0x8f: /* jnz rel, etc*/
5753 if (test_cc(ctxt
->b
, ctxt
->eflags
))
5754 rc
= jmp_rel(ctxt
, ctxt
->src
.val
);
5756 case 0x90 ... 0x9f: /* setcc r/m8 */
5757 ctxt
->dst
.val
= test_cc(ctxt
->b
, ctxt
->eflags
);
5759 case 0xb6 ... 0xb7: /* movzx */
5760 ctxt
->dst
.bytes
= ctxt
->op_bytes
;
5761 ctxt
->dst
.val
= (ctxt
->src
.bytes
== 1) ? (u8
) ctxt
->src
.val
5762 : (u16
) ctxt
->src
.val
;
5764 case 0xbe ... 0xbf: /* movsx */
5765 ctxt
->dst
.bytes
= ctxt
->op_bytes
;
5766 ctxt
->dst
.val
= (ctxt
->src
.bytes
== 1) ? (s8
) ctxt
->src
.val
:
5767 (s16
) ctxt
->src
.val
;
5770 goto cannot_emulate
;
5775 if (rc
!= X86EMUL_CONTINUE
)
5781 return EMULATION_FAILED
;
5784 void emulator_invalidate_register_cache(struct x86_emulate_ctxt
*ctxt
)
5786 invalidate_registers(ctxt
);
5789 void emulator_writeback_register_cache(struct x86_emulate_ctxt
*ctxt
)
5791 writeback_registers(ctxt
);
5794 bool emulator_can_use_gpa(struct x86_emulate_ctxt
*ctxt
)
5796 if (ctxt
->rep_prefix
&& (ctxt
->d
& String
))
5799 if (ctxt
->d
& TwoMemOp
)