1 // SPDX-License-Identifier: GPL-2.0-only
2 /******************************************************************************
5 * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
7 * Copyright (c) 2005 Keir Fraser
9 * Linux coding style, mod r/m decoder, segment base fixes, real-mode
10 * privileged instructions:
12 * Copyright (C) 2006 Qumranet
13 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
15 * Avi Kivity <avi@qumranet.com>
16 * Yaniv Kamay <yaniv@qumranet.com>
18 * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
21 #include <linux/kvm_host.h>
22 #include "kvm_cache_regs.h"
23 #include "kvm_emulate.h"
24 #include <linux/stringify.h>
25 #include <asm/debugreg.h>
26 #include <asm/nospec-branch.h>
37 #define OpImplicit 1ull /* No generic decode */
38 #define OpReg 2ull /* Register */
39 #define OpMem 3ull /* Memory */
40 #define OpAcc 4ull /* Accumulator: AL/AX/EAX/RAX */
41 #define OpDI 5ull /* ES:DI/EDI/RDI */
42 #define OpMem64 6ull /* Memory, 64-bit */
43 #define OpImmUByte 7ull /* Zero-extended 8-bit immediate */
44 #define OpDX 8ull /* DX register */
45 #define OpCL 9ull /* CL register (for shifts) */
46 #define OpImmByte 10ull /* 8-bit sign extended immediate */
47 #define OpOne 11ull /* Implied 1 */
48 #define OpImm 12ull /* Sign extended up to 32-bit immediate */
49 #define OpMem16 13ull /* Memory operand (16-bit). */
50 #define OpMem32 14ull /* Memory operand (32-bit). */
51 #define OpImmU 15ull /* Immediate operand, zero extended */
52 #define OpSI 16ull /* SI/ESI/RSI */
53 #define OpImmFAddr 17ull /* Immediate far address */
54 #define OpMemFAddr 18ull /* Far address in memory */
55 #define OpImmU16 19ull /* Immediate operand, 16 bits, zero extended */
56 #define OpES 20ull /* ES */
57 #define OpCS 21ull /* CS */
58 #define OpSS 22ull /* SS */
59 #define OpDS 23ull /* DS */
60 #define OpFS 24ull /* FS */
61 #define OpGS 25ull /* GS */
62 #define OpMem8 26ull /* 8-bit zero extended memory operand */
63 #define OpImm64 27ull /* Sign extended 16/32/64-bit immediate */
64 #define OpXLat 28ull /* memory at BX/EBX/RBX + zero-extended AL */
65 #define OpAccLo 29ull /* Low part of extended acc (AX/AX/EAX/RAX) */
66 #define OpAccHi 30ull /* High part of extended acc (-/DX/EDX/RDX) */
68 #define OpBits 5 /* Width of operand field */
69 #define OpMask ((1ull << OpBits) - 1)
72 * Opcode effective-address decode tables.
73 * Note that we only emulate instructions that have at least one memory
74 * operand (excluding implicit stack references). We assume that stack
75 * references and instruction fetches will never occur in special memory
76 * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
80 /* Operand sizes: 8-bit operands or specified/overridden size. */
81 #define ByteOp (1<<0) /* 8-bit operands. */
82 /* Destination operand type. */
84 #define ImplicitOps (OpImplicit << DstShift)
85 #define DstReg (OpReg << DstShift)
86 #define DstMem (OpMem << DstShift)
87 #define DstAcc (OpAcc << DstShift)
88 #define DstDI (OpDI << DstShift)
89 #define DstMem64 (OpMem64 << DstShift)
90 #define DstMem16 (OpMem16 << DstShift)
91 #define DstImmUByte (OpImmUByte << DstShift)
92 #define DstDX (OpDX << DstShift)
93 #define DstAccLo (OpAccLo << DstShift)
94 #define DstMask (OpMask << DstShift)
95 /* Source operand type. */
97 #define SrcNone (OpNone << SrcShift)
98 #define SrcReg (OpReg << SrcShift)
99 #define SrcMem (OpMem << SrcShift)
100 #define SrcMem16 (OpMem16 << SrcShift)
101 #define SrcMem32 (OpMem32 << SrcShift)
102 #define SrcImm (OpImm << SrcShift)
103 #define SrcImmByte (OpImmByte << SrcShift)
104 #define SrcOne (OpOne << SrcShift)
105 #define SrcImmUByte (OpImmUByte << SrcShift)
106 #define SrcImmU (OpImmU << SrcShift)
107 #define SrcSI (OpSI << SrcShift)
108 #define SrcXLat (OpXLat << SrcShift)
109 #define SrcImmFAddr (OpImmFAddr << SrcShift)
110 #define SrcMemFAddr (OpMemFAddr << SrcShift)
111 #define SrcAcc (OpAcc << SrcShift)
112 #define SrcImmU16 (OpImmU16 << SrcShift)
113 #define SrcImm64 (OpImm64 << SrcShift)
114 #define SrcDX (OpDX << SrcShift)
115 #define SrcMem8 (OpMem8 << SrcShift)
116 #define SrcAccHi (OpAccHi << SrcShift)
117 #define SrcMask (OpMask << SrcShift)
118 #define BitOp (1<<11)
119 #define MemAbs (1<<12) /* Memory operand is absolute displacement */
120 #define String (1<<13) /* String instruction (rep capable) */
121 #define Stack (1<<14) /* Stack instruction (push/pop) */
122 #define GroupMask (7<<15) /* Opcode uses one of the group mechanisms */
123 #define Group (1<<15) /* Bits 3:5 of modrm byte extend opcode */
124 #define GroupDual (2<<15) /* Alternate decoding of mod == 3 */
125 #define Prefix (3<<15) /* Instruction varies with 66/f2/f3 prefix */
126 #define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */
127 #define Escape (5<<15) /* Escape to coprocessor instruction */
128 #define InstrDual (6<<15) /* Alternate instruction decoding of mod == 3 */
129 #define ModeDual (7<<15) /* Different instruction for 32/64 bit */
130 #define Sse (1<<18) /* SSE Vector instruction */
131 /* Generic ModRM decode. */
132 #define ModRM (1<<19)
133 /* Destination is only written; never read. */
136 #define Prot (1<<21) /* instruction generates #UD if not in prot-mode */
137 #define EmulateOnUD (1<<22) /* Emulate if unsupported by the host */
138 #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
139 #define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */
140 #define Undefined (1<<25) /* No Such Instruction */
141 #define Lock (1<<26) /* lock prefix is allowed for the instruction */
142 #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */
144 #define PageTable (1 << 29) /* instruction used to write page table */
145 #define NotImpl (1 << 30) /* instruction is not implemented */
146 /* Source 2 operand type */
147 #define Src2Shift (31)
148 #define Src2None (OpNone << Src2Shift)
149 #define Src2Mem (OpMem << Src2Shift)
150 #define Src2CL (OpCL << Src2Shift)
151 #define Src2ImmByte (OpImmByte << Src2Shift)
152 #define Src2One (OpOne << Src2Shift)
153 #define Src2Imm (OpImm << Src2Shift)
154 #define Src2ES (OpES << Src2Shift)
155 #define Src2CS (OpCS << Src2Shift)
156 #define Src2SS (OpSS << Src2Shift)
157 #define Src2DS (OpDS << Src2Shift)
158 #define Src2FS (OpFS << Src2Shift)
159 #define Src2GS (OpGS << Src2Shift)
160 #define Src2Mask (OpMask << Src2Shift)
161 #define Mmx ((u64)1 << 40) /* MMX Vector instruction */
162 #define AlignMask ((u64)7 << 41)
163 #define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */
164 #define Unaligned ((u64)2 << 41) /* Explicitly unaligned (e.g. MOVDQU) */
165 #define Avx ((u64)3 << 41) /* Advanced Vector Extensions */
166 #define Aligned16 ((u64)4 << 41) /* Aligned to 16 byte boundary (e.g. FXSAVE) */
167 #define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */
168 #define NoWrite ((u64)1 << 45) /* No writeback */
169 #define SrcWrite ((u64)1 << 46) /* Write back src operand */
170 #define NoMod ((u64)1 << 47) /* Mod field is ignored */
171 #define Intercept ((u64)1 << 48) /* Has valid intercept field */
172 #define CheckPerm ((u64)1 << 49) /* Has valid check_perm field */
173 #define PrivUD ((u64)1 << 51) /* #UD instead of #GP on CPL > 0 */
174 #define NearBranch ((u64)1 << 52) /* Near branches */
175 #define No16 ((u64)1 << 53) /* No 16 bit operand */
176 #define IncSP ((u64)1 << 54) /* SP is incremented before ModRM calc */
177 #define TwoMemOp ((u64)1 << 55) /* Instruction has two memory operand */
178 #define IsBranch ((u64)1 << 56) /* Instruction is considered a branch. */
180 #define DstXacc (DstAccLo | SrcAccHi | SrcWrite)
182 #define X2(x...) x, x
183 #define X3(x...) X2(x), x
184 #define X4(x...) X2(x), X2(x)
185 #define X5(x...) X4(x), x
186 #define X6(x...) X4(x), X2(x)
187 #define X7(x...) X4(x), X3(x)
188 #define X8(x...) X4(x), X4(x)
189 #define X16(x...) X8(x), X8(x)
191 #define NR_FASTOP (ilog2(sizeof(ulong)) + 1)
192 #define FASTOP_SIZE 8
199 int (*execute
)(struct x86_emulate_ctxt
*ctxt
);
200 const struct opcode
*group
;
201 const struct group_dual
*gdual
;
202 const struct gprefix
*gprefix
;
203 const struct escape
*esc
;
204 const struct instr_dual
*idual
;
205 const struct mode_dual
*mdual
;
206 void (*fastop
)(struct fastop
*fake
);
208 int (*check_perm
)(struct x86_emulate_ctxt
*ctxt
);
212 struct opcode mod012
[8];
213 struct opcode mod3
[8];
217 struct opcode pfx_no
;
218 struct opcode pfx_66
;
219 struct opcode pfx_f2
;
220 struct opcode pfx_f3
;
225 struct opcode high
[64];
229 struct opcode mod012
;
234 struct opcode mode32
;
235 struct opcode mode64
;
238 #define EFLG_RESERVED_ZEROS_MASK 0xffc0802a
240 enum x86_transfer_type
{
242 X86_TRANSFER_CALL_JMP
,
244 X86_TRANSFER_TASK_SWITCH
,
247 static ulong
reg_read(struct x86_emulate_ctxt
*ctxt
, unsigned nr
)
249 if (!(ctxt
->regs_valid
& (1 << nr
))) {
250 ctxt
->regs_valid
|= 1 << nr
;
251 ctxt
->_regs
[nr
] = ctxt
->ops
->read_gpr(ctxt
, nr
);
253 return ctxt
->_regs
[nr
];
256 static ulong
*reg_write(struct x86_emulate_ctxt
*ctxt
, unsigned nr
)
258 ctxt
->regs_valid
|= 1 << nr
;
259 ctxt
->regs_dirty
|= 1 << nr
;
260 return &ctxt
->_regs
[nr
];
263 static ulong
*reg_rmw(struct x86_emulate_ctxt
*ctxt
, unsigned nr
)
266 return reg_write(ctxt
, nr
);
269 static void writeback_registers(struct x86_emulate_ctxt
*ctxt
)
273 for_each_set_bit(reg
, (ulong
*)&ctxt
->regs_dirty
, 16)
274 ctxt
->ops
->write_gpr(ctxt
, reg
, ctxt
->_regs
[reg
]);
277 static void invalidate_registers(struct x86_emulate_ctxt
*ctxt
)
279 ctxt
->regs_dirty
= 0;
280 ctxt
->regs_valid
= 0;
284 * These EFLAGS bits are restored from saved value during emulation, and
285 * any changes are written back to the saved value after emulation.
287 #define EFLAGS_MASK (X86_EFLAGS_OF|X86_EFLAGS_SF|X86_EFLAGS_ZF|X86_EFLAGS_AF|\
288 X86_EFLAGS_PF|X86_EFLAGS_CF)
297 * fastop functions have a special calling convention:
302 * flags: rflags (in/out)
303 * ex: rsi (in:fastop pointer, out:zero if exception)
305 * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
306 * different operand sizes can be reached by calculation, rather than a jump
307 * table (which would be bigger than the code).
309 static int fastop(struct x86_emulate_ctxt
*ctxt
, fastop_t fop
);
311 #define __FOP_FUNC(name) \
312 ".align " __stringify(FASTOP_SIZE) " \n\t" \
313 ".type " name ", @function \n\t" \
316 #define FOP_FUNC(name) \
319 #define __FOP_RET(name) \
321 ".size " name ", .-" name "\n\t"
323 #define FOP_RET(name) \
326 #define FOP_START(op) \
327 extern void em_##op(struct fastop *fake); \
328 asm(".pushsection .text, \"ax\" \n\t" \
329 ".global em_" #op " \n\t" \
330 ".align " __stringify(FASTOP_SIZE) " \n\t" \
336 #define __FOPNOP(name) \
341 __FOPNOP(__stringify(__UNIQUE_ID(nop)))
343 #define FOP1E(op, dst) \
344 __FOP_FUNC(#op "_" #dst) \
345 "10: " #op " %" #dst " \n\t" \
346 __FOP_RET(#op "_" #dst)
348 #define FOP1EEX(op, dst) \
349 FOP1E(op, dst) _ASM_EXTABLE_TYPE_REG(10b, 11b, EX_TYPE_ZERO_REG, %%esi)
351 #define FASTOP1(op) \
356 ON64(FOP1E(op##q, rax)) \
359 /* 1-operand, using src2 (for MUL/DIV r/m) */
360 #define FASTOP1SRC2(op, name) \
365 ON64(FOP1E(op, rcx)) \
368 /* 1-operand, using src2 (for MUL/DIV r/m), with exceptions */
369 #define FASTOP1SRC2EX(op, name) \
374 ON64(FOP1EEX(op, rcx)) \
377 #define FOP2E(op, dst, src) \
378 __FOP_FUNC(#op "_" #dst "_" #src) \
379 #op " %" #src ", %" #dst " \n\t" \
380 __FOP_RET(#op "_" #dst "_" #src)
382 #define FASTOP2(op) \
384 FOP2E(op##b, al, dl) \
385 FOP2E(op##w, ax, dx) \
386 FOP2E(op##l, eax, edx) \
387 ON64(FOP2E(op##q, rax, rdx)) \
390 /* 2 operand, word only */
391 #define FASTOP2W(op) \
394 FOP2E(op##w, ax, dx) \
395 FOP2E(op##l, eax, edx) \
396 ON64(FOP2E(op##q, rax, rdx)) \
399 /* 2 operand, src is CL */
400 #define FASTOP2CL(op) \
402 FOP2E(op##b, al, cl) \
403 FOP2E(op##w, ax, cl) \
404 FOP2E(op##l, eax, cl) \
405 ON64(FOP2E(op##q, rax, cl)) \
408 /* 2 operand, src and dest are reversed */
409 #define FASTOP2R(op, name) \
411 FOP2E(op##b, dl, al) \
412 FOP2E(op##w, dx, ax) \
413 FOP2E(op##l, edx, eax) \
414 ON64(FOP2E(op##q, rdx, rax)) \
417 #define FOP3E(op, dst, src, src2) \
418 __FOP_FUNC(#op "_" #dst "_" #src "_" #src2) \
419 #op " %" #src2 ", %" #src ", %" #dst " \n\t"\
420 __FOP_RET(#op "_" #dst "_" #src "_" #src2)
422 /* 3-operand, word-only, src2=cl */
423 #define FASTOP3WCL(op) \
426 FOP3E(op##w, ax, dx, cl) \
427 FOP3E(op##l, eax, edx, cl) \
428 ON64(FOP3E(op##q, rax, rdx, cl)) \
431 /* Special case for SETcc - 1 instruction per cc */
434 * Depending on .config the SETcc functions look like:
436 * SETcc %al [3 bytes]
438 * INT3 [1 byte; CONFIG_SLS]
440 * Which gives possible sizes 4 or 5. When rounded up to the
441 * next power-of-two alignment they become 4 or 8.
443 #define SETCC_LENGTH (4 + IS_ENABLED(CONFIG_SLS))
444 #define SETCC_ALIGN (4 << IS_ENABLED(CONFIG_SLS))
445 static_assert(SETCC_LENGTH
<= SETCC_ALIGN
);
447 #define FOP_SETCC(op) \
448 ".align " __stringify(SETCC_ALIGN) " \n\t" \
449 ".type " #op ", @function \n\t" \
475 "pushf; sbb %al, %al; popf \n\t"
480 * XXX: inoutclob user must know where the argument is being expanded.
481 * Relying on CONFIG_CC_HAS_ASM_GOTO would allow us to remove _fault.
483 #define asm_safe(insn, inoutclob...) \
487 asm volatile("1:" insn "\n" \
489 _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_ONE_REG, %[_fault]) \
490 : [_fault] "+r"(_fault) inoutclob ); \
492 _fault ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE; \
495 static int emulator_check_intercept(struct x86_emulate_ctxt
*ctxt
,
496 enum x86_intercept intercept
,
497 enum x86_intercept_stage stage
)
499 struct x86_instruction_info info
= {
500 .intercept
= intercept
,
501 .rep_prefix
= ctxt
->rep_prefix
,
502 .modrm_mod
= ctxt
->modrm_mod
,
503 .modrm_reg
= ctxt
->modrm_reg
,
504 .modrm_rm
= ctxt
->modrm_rm
,
505 .src_val
= ctxt
->src
.val64
,
506 .dst_val
= ctxt
->dst
.val64
,
507 .src_bytes
= ctxt
->src
.bytes
,
508 .dst_bytes
= ctxt
->dst
.bytes
,
509 .ad_bytes
= ctxt
->ad_bytes
,
510 .next_rip
= ctxt
->eip
,
513 return ctxt
->ops
->intercept(ctxt
, &info
, stage
);
516 static void assign_masked(ulong
*dest
, ulong src
, ulong mask
)
518 *dest
= (*dest
& ~mask
) | (src
& mask
);
521 static void assign_register(unsigned long *reg
, u64 val
, int bytes
)
523 /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
526 *(u8
*)reg
= (u8
)val
;
529 *(u16
*)reg
= (u16
)val
;
533 break; /* 64b: zero-extend */
540 static inline unsigned long ad_mask(struct x86_emulate_ctxt
*ctxt
)
542 return (1UL << (ctxt
->ad_bytes
<< 3)) - 1;
545 static ulong
stack_mask(struct x86_emulate_ctxt
*ctxt
)
548 struct desc_struct ss
;
550 if (ctxt
->mode
== X86EMUL_MODE_PROT64
)
552 ctxt
->ops
->get_segment(ctxt
, &sel
, &ss
, NULL
, VCPU_SREG_SS
);
553 return ~0U >> ((ss
.d
^ 1) * 16); /* d=0: 0xffff; d=1: 0xffffffff */
556 static int stack_size(struct x86_emulate_ctxt
*ctxt
)
558 return (__fls(stack_mask(ctxt
)) + 1) >> 3;
561 /* Access/update address held in a register, based on addressing mode. */
562 static inline unsigned long
563 address_mask(struct x86_emulate_ctxt
*ctxt
, unsigned long reg
)
565 if (ctxt
->ad_bytes
== sizeof(unsigned long))
568 return reg
& ad_mask(ctxt
);
571 static inline unsigned long
572 register_address(struct x86_emulate_ctxt
*ctxt
, int reg
)
574 return address_mask(ctxt
, reg_read(ctxt
, reg
));
577 static void masked_increment(ulong
*reg
, ulong mask
, int inc
)
579 assign_masked(reg
, *reg
+ inc
, mask
);
583 register_address_increment(struct x86_emulate_ctxt
*ctxt
, int reg
, int inc
)
585 ulong
*preg
= reg_rmw(ctxt
, reg
);
587 assign_register(preg
, *preg
+ inc
, ctxt
->ad_bytes
);
590 static void rsp_increment(struct x86_emulate_ctxt
*ctxt
, int inc
)
592 masked_increment(reg_rmw(ctxt
, VCPU_REGS_RSP
), stack_mask(ctxt
), inc
);
595 static u32
desc_limit_scaled(struct desc_struct
*desc
)
597 u32 limit
= get_desc_limit(desc
);
599 return desc
->g
? (limit
<< 12) | 0xfff : limit
;
602 static unsigned long seg_base(struct x86_emulate_ctxt
*ctxt
, int seg
)
604 if (ctxt
->mode
== X86EMUL_MODE_PROT64
&& seg
< VCPU_SREG_FS
)
607 return ctxt
->ops
->get_cached_segment_base(ctxt
, seg
);
610 static int emulate_exception(struct x86_emulate_ctxt
*ctxt
, int vec
,
611 u32 error
, bool valid
)
614 ctxt
->exception
.vector
= vec
;
615 ctxt
->exception
.error_code
= error
;
616 ctxt
->exception
.error_code_valid
= valid
;
617 return X86EMUL_PROPAGATE_FAULT
;
620 static int emulate_db(struct x86_emulate_ctxt
*ctxt
)
622 return emulate_exception(ctxt
, DB_VECTOR
, 0, false);
625 static int emulate_gp(struct x86_emulate_ctxt
*ctxt
, int err
)
627 return emulate_exception(ctxt
, GP_VECTOR
, err
, true);
630 static int emulate_ss(struct x86_emulate_ctxt
*ctxt
, int err
)
632 return emulate_exception(ctxt
, SS_VECTOR
, err
, true);
635 static int emulate_ud(struct x86_emulate_ctxt
*ctxt
)
637 return emulate_exception(ctxt
, UD_VECTOR
, 0, false);
640 static int emulate_ts(struct x86_emulate_ctxt
*ctxt
, int err
)
642 return emulate_exception(ctxt
, TS_VECTOR
, err
, true);
645 static int emulate_de(struct x86_emulate_ctxt
*ctxt
)
647 return emulate_exception(ctxt
, DE_VECTOR
, 0, false);
650 static int emulate_nm(struct x86_emulate_ctxt
*ctxt
)
652 return emulate_exception(ctxt
, NM_VECTOR
, 0, false);
655 static u16
get_segment_selector(struct x86_emulate_ctxt
*ctxt
, unsigned seg
)
658 struct desc_struct desc
;
660 ctxt
->ops
->get_segment(ctxt
, &selector
, &desc
, NULL
, seg
);
664 static void set_segment_selector(struct x86_emulate_ctxt
*ctxt
, u16 selector
,
669 struct desc_struct desc
;
671 ctxt
->ops
->get_segment(ctxt
, &dummy
, &desc
, &base3
, seg
);
672 ctxt
->ops
->set_segment(ctxt
, selector
, &desc
, base3
, seg
);
675 static inline u8
ctxt_virt_addr_bits(struct x86_emulate_ctxt
*ctxt
)
677 return (ctxt
->ops
->get_cr(ctxt
, 4) & X86_CR4_LA57
) ? 57 : 48;
680 static inline bool emul_is_noncanonical_address(u64 la
,
681 struct x86_emulate_ctxt
*ctxt
)
683 return !__is_canonical_address(la
, ctxt_virt_addr_bits(ctxt
));
687 * x86 defines three classes of vector instructions: explicitly
688 * aligned, explicitly unaligned, and the rest, which change behaviour
689 * depending on whether they're AVX encoded or not.
691 * Also included is CMPXCHG16B which is not a vector instruction, yet it is
692 * subject to the same check. FXSAVE and FXRSTOR are checked here too as their
693 * 512 bytes of data must be aligned to a 16 byte boundary.
695 static unsigned insn_alignment(struct x86_emulate_ctxt
*ctxt
, unsigned size
)
697 u64 alignment
= ctxt
->d
& AlignMask
;
699 if (likely(size
< 16))
714 static __always_inline
int __linearize(struct x86_emulate_ctxt
*ctxt
,
715 struct segmented_address addr
,
716 unsigned *max_size
, unsigned size
,
717 bool write
, bool fetch
,
718 enum x86emul_mode mode
, ulong
*linear
)
720 struct desc_struct desc
;
727 la
= seg_base(ctxt
, addr
.seg
) + addr
.ea
;
730 case X86EMUL_MODE_PROT64
:
732 va_bits
= ctxt_virt_addr_bits(ctxt
);
733 if (!__is_canonical_address(la
, va_bits
))
736 *max_size
= min_t(u64
, ~0u, (1ull << va_bits
) - la
);
737 if (size
> *max_size
)
741 *linear
= la
= (u32
)la
;
742 usable
= ctxt
->ops
->get_segment(ctxt
, &sel
, &desc
, NULL
,
746 /* code segment in protected mode or read-only data segment */
747 if ((((ctxt
->mode
!= X86EMUL_MODE_REAL
) && (desc
.type
& 8))
748 || !(desc
.type
& 2)) && write
)
750 /* unreadable code segment */
751 if (!fetch
&& (desc
.type
& 8) && !(desc
.type
& 2))
753 lim
= desc_limit_scaled(&desc
);
754 if (!(desc
.type
& 8) && (desc
.type
& 4)) {
755 /* expand-down segment */
758 lim
= desc
.d
? 0xffffffff : 0xffff;
762 if (lim
== 0xffffffff)
765 *max_size
= (u64
)lim
+ 1 - addr
.ea
;
766 if (size
> *max_size
)
771 if (la
& (insn_alignment(ctxt
, size
) - 1))
772 return emulate_gp(ctxt
, 0);
773 return X86EMUL_CONTINUE
;
775 if (addr
.seg
== VCPU_SREG_SS
)
776 return emulate_ss(ctxt
, 0);
778 return emulate_gp(ctxt
, 0);
781 static int linearize(struct x86_emulate_ctxt
*ctxt
,
782 struct segmented_address addr
,
783 unsigned size
, bool write
,
787 return __linearize(ctxt
, addr
, &max_size
, size
, write
, false,
791 static inline int assign_eip(struct x86_emulate_ctxt
*ctxt
, ulong dst
,
792 enum x86emul_mode mode
)
797 struct segmented_address addr
= { .seg
= VCPU_SREG_CS
,
800 if (ctxt
->op_bytes
!= sizeof(unsigned long))
801 addr
.ea
= dst
& ((1UL << (ctxt
->op_bytes
<< 3)) - 1);
802 rc
= __linearize(ctxt
, addr
, &max_size
, 1, false, true, mode
, &linear
);
803 if (rc
== X86EMUL_CONTINUE
)
804 ctxt
->_eip
= addr
.ea
;
808 static inline int assign_eip_near(struct x86_emulate_ctxt
*ctxt
, ulong dst
)
810 return assign_eip(ctxt
, dst
, ctxt
->mode
);
813 static int assign_eip_far(struct x86_emulate_ctxt
*ctxt
, ulong dst
,
814 const struct desc_struct
*cs_desc
)
816 enum x86emul_mode mode
= ctxt
->mode
;
820 if (ctxt
->mode
>= X86EMUL_MODE_PROT16
) {
824 ctxt
->ops
->get_msr(ctxt
, MSR_EFER
, &efer
);
826 mode
= X86EMUL_MODE_PROT64
;
828 mode
= X86EMUL_MODE_PROT32
; /* temporary value */
831 if (mode
== X86EMUL_MODE_PROT16
|| mode
== X86EMUL_MODE_PROT32
)
832 mode
= cs_desc
->d
? X86EMUL_MODE_PROT32
: X86EMUL_MODE_PROT16
;
833 rc
= assign_eip(ctxt
, dst
, mode
);
834 if (rc
== X86EMUL_CONTINUE
)
839 static inline int jmp_rel(struct x86_emulate_ctxt
*ctxt
, int rel
)
841 return assign_eip_near(ctxt
, ctxt
->_eip
+ rel
);
844 static int linear_read_system(struct x86_emulate_ctxt
*ctxt
, ulong linear
,
845 void *data
, unsigned size
)
847 return ctxt
->ops
->read_std(ctxt
, linear
, data
, size
, &ctxt
->exception
, true);
850 static int linear_write_system(struct x86_emulate_ctxt
*ctxt
,
851 ulong linear
, void *data
,
854 return ctxt
->ops
->write_std(ctxt
, linear
, data
, size
, &ctxt
->exception
, true);
857 static int segmented_read_std(struct x86_emulate_ctxt
*ctxt
,
858 struct segmented_address addr
,
865 rc
= linearize(ctxt
, addr
, size
, false, &linear
);
866 if (rc
!= X86EMUL_CONTINUE
)
868 return ctxt
->ops
->read_std(ctxt
, linear
, data
, size
, &ctxt
->exception
, false);
871 static int segmented_write_std(struct x86_emulate_ctxt
*ctxt
,
872 struct segmented_address addr
,
879 rc
= linearize(ctxt
, addr
, size
, true, &linear
);
880 if (rc
!= X86EMUL_CONTINUE
)
882 return ctxt
->ops
->write_std(ctxt
, linear
, data
, size
, &ctxt
->exception
, false);
886 * Prefetch the remaining bytes of the instruction without crossing page
887 * boundary if they are not in fetch_cache yet.
889 static int __do_insn_fetch_bytes(struct x86_emulate_ctxt
*ctxt
, int op_size
)
892 unsigned size
, max_size
;
893 unsigned long linear
;
894 int cur_size
= ctxt
->fetch
.end
- ctxt
->fetch
.data
;
895 struct segmented_address addr
= { .seg
= VCPU_SREG_CS
,
896 .ea
= ctxt
->eip
+ cur_size
};
899 * We do not know exactly how many bytes will be needed, and
900 * __linearize is expensive, so fetch as much as possible. We
901 * just have to avoid going beyond the 15 byte limit, the end
902 * of the segment, or the end of the page.
904 * __linearize is called with size 0 so that it does not do any
905 * boundary check itself. Instead, we use max_size to check
908 rc
= __linearize(ctxt
, addr
, &max_size
, 0, false, true, ctxt
->mode
,
910 if (unlikely(rc
!= X86EMUL_CONTINUE
))
913 size
= min_t(unsigned, 15UL ^ cur_size
, max_size
);
914 size
= min_t(unsigned, size
, PAGE_SIZE
- offset_in_page(linear
));
917 * One instruction can only straddle two pages,
918 * and one has been loaded at the beginning of
919 * x86_decode_insn. So, if not enough bytes
920 * still, we must have hit the 15-byte boundary.
922 if (unlikely(size
< op_size
))
923 return emulate_gp(ctxt
, 0);
925 rc
= ctxt
->ops
->fetch(ctxt
, linear
, ctxt
->fetch
.end
,
926 size
, &ctxt
->exception
);
927 if (unlikely(rc
!= X86EMUL_CONTINUE
))
929 ctxt
->fetch
.end
+= size
;
930 return X86EMUL_CONTINUE
;
933 static __always_inline
int do_insn_fetch_bytes(struct x86_emulate_ctxt
*ctxt
,
936 unsigned done_size
= ctxt
->fetch
.end
- ctxt
->fetch
.ptr
;
938 if (unlikely(done_size
< size
))
939 return __do_insn_fetch_bytes(ctxt
, size
- done_size
);
941 return X86EMUL_CONTINUE
;
944 /* Fetch next part of the instruction being emulated. */
945 #define insn_fetch(_type, _ctxt) \
948 rc = do_insn_fetch_bytes(_ctxt, sizeof(_type)); \
949 if (rc != X86EMUL_CONTINUE) \
951 ctxt->_eip += sizeof(_type); \
952 memcpy(&_x, ctxt->fetch.ptr, sizeof(_type)); \
953 ctxt->fetch.ptr += sizeof(_type); \
957 #define insn_fetch_arr(_arr, _size, _ctxt) \
959 rc = do_insn_fetch_bytes(_ctxt, _size); \
960 if (rc != X86EMUL_CONTINUE) \
962 ctxt->_eip += (_size); \
963 memcpy(_arr, ctxt->fetch.ptr, _size); \
964 ctxt->fetch.ptr += (_size); \
968 * Given the 'reg' portion of a ModRM byte, and a register block, return a
969 * pointer into the block that addresses the relevant register.
970 * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
972 static void *decode_register(struct x86_emulate_ctxt
*ctxt
, u8 modrm_reg
,
976 int highbyte_regs
= (ctxt
->rex_prefix
== 0) && byteop
;
978 if (highbyte_regs
&& modrm_reg
>= 4 && modrm_reg
< 8)
979 p
= (unsigned char *)reg_rmw(ctxt
, modrm_reg
& 3) + 1;
981 p
= reg_rmw(ctxt
, modrm_reg
);
985 static int read_descriptor(struct x86_emulate_ctxt
*ctxt
,
986 struct segmented_address addr
,
987 u16
*size
, unsigned long *address
, int op_bytes
)
994 rc
= segmented_read_std(ctxt
, addr
, size
, 2);
995 if (rc
!= X86EMUL_CONTINUE
)
998 rc
= segmented_read_std(ctxt
, addr
, address
, op_bytes
);
1012 FASTOP1SRC2(mul
, mul_ex
);
1013 FASTOP1SRC2(imul
, imul_ex
);
1014 FASTOP1SRC2EX(div
, div_ex
);
1015 FASTOP1SRC2EX(idiv
, idiv_ex
);
1044 FASTOP2R(cmp
, cmp_r
);
1046 static int em_bsf_c(struct x86_emulate_ctxt
*ctxt
)
1048 /* If src is zero, do not writeback, but update flags */
1049 if (ctxt
->src
.val
== 0)
1050 ctxt
->dst
.type
= OP_NONE
;
1051 return fastop(ctxt
, em_bsf
);
1054 static int em_bsr_c(struct x86_emulate_ctxt
*ctxt
)
1056 /* If src is zero, do not writeback, but update flags */
1057 if (ctxt
->src
.val
== 0)
1058 ctxt
->dst
.type
= OP_NONE
;
1059 return fastop(ctxt
, em_bsr
);
1062 static __always_inline u8
test_cc(unsigned int condition
, unsigned long flags
)
1065 void (*fop
)(void) = (void *)em_setcc
+ SETCC_ALIGN
* (condition
& 0xf);
1067 flags
= (flags
& EFLAGS_MASK
) | X86_EFLAGS_IF
;
1068 asm("push %[flags]; popf; " CALL_NOSPEC
1069 : "=a"(rc
) : [thunk_target
]"r"(fop
), [flags
]"r"(flags
));
1073 static void fetch_register_operand(struct operand
*op
)
1075 switch (op
->bytes
) {
1077 op
->val
= *(u8
*)op
->addr
.reg
;
1080 op
->val
= *(u16
*)op
->addr
.reg
;
1083 op
->val
= *(u32
*)op
->addr
.reg
;
1086 op
->val
= *(u64
*)op
->addr
.reg
;
1091 static int em_fninit(struct x86_emulate_ctxt
*ctxt
)
1093 if (ctxt
->ops
->get_cr(ctxt
, 0) & (X86_CR0_TS
| X86_CR0_EM
))
1094 return emulate_nm(ctxt
);
1097 asm volatile("fninit");
1099 return X86EMUL_CONTINUE
;
1102 static int em_fnstcw(struct x86_emulate_ctxt
*ctxt
)
1106 if (ctxt
->ops
->get_cr(ctxt
, 0) & (X86_CR0_TS
| X86_CR0_EM
))
1107 return emulate_nm(ctxt
);
1110 asm volatile("fnstcw %0": "+m"(fcw
));
1113 ctxt
->dst
.val
= fcw
;
1115 return X86EMUL_CONTINUE
;
1118 static int em_fnstsw(struct x86_emulate_ctxt
*ctxt
)
1122 if (ctxt
->ops
->get_cr(ctxt
, 0) & (X86_CR0_TS
| X86_CR0_EM
))
1123 return emulate_nm(ctxt
);
1126 asm volatile("fnstsw %0": "+m"(fsw
));
1129 ctxt
->dst
.val
= fsw
;
1131 return X86EMUL_CONTINUE
;
1134 static void decode_register_operand(struct x86_emulate_ctxt
*ctxt
,
1137 unsigned reg
= ctxt
->modrm_reg
;
1139 if (!(ctxt
->d
& ModRM
))
1140 reg
= (ctxt
->b
& 7) | ((ctxt
->rex_prefix
& 1) << 3);
1142 if (ctxt
->d
& Sse
) {
1146 kvm_read_sse_reg(reg
, &op
->vec_val
);
1149 if (ctxt
->d
& Mmx
) {
1158 op
->bytes
= (ctxt
->d
& ByteOp
) ? 1 : ctxt
->op_bytes
;
1159 op
->addr
.reg
= decode_register(ctxt
, reg
, ctxt
->d
& ByteOp
);
1161 fetch_register_operand(op
);
1162 op
->orig_val
= op
->val
;
1165 static void adjust_modrm_seg(struct x86_emulate_ctxt
*ctxt
, int base_reg
)
1167 if (base_reg
== VCPU_REGS_RSP
|| base_reg
== VCPU_REGS_RBP
)
1168 ctxt
->modrm_seg
= VCPU_SREG_SS
;
1171 static int decode_modrm(struct x86_emulate_ctxt
*ctxt
,
1175 int index_reg
, base_reg
, scale
;
1176 int rc
= X86EMUL_CONTINUE
;
1179 ctxt
->modrm_reg
= ((ctxt
->rex_prefix
<< 1) & 8); /* REX.R */
1180 index_reg
= (ctxt
->rex_prefix
<< 2) & 8; /* REX.X */
1181 base_reg
= (ctxt
->rex_prefix
<< 3) & 8; /* REX.B */
1183 ctxt
->modrm_mod
= (ctxt
->modrm
& 0xc0) >> 6;
1184 ctxt
->modrm_reg
|= (ctxt
->modrm
& 0x38) >> 3;
1185 ctxt
->modrm_rm
= base_reg
| (ctxt
->modrm
& 0x07);
1186 ctxt
->modrm_seg
= VCPU_SREG_DS
;
1188 if (ctxt
->modrm_mod
== 3 || (ctxt
->d
& NoMod
)) {
1190 op
->bytes
= (ctxt
->d
& ByteOp
) ? 1 : ctxt
->op_bytes
;
1191 op
->addr
.reg
= decode_register(ctxt
, ctxt
->modrm_rm
,
1193 if (ctxt
->d
& Sse
) {
1196 op
->addr
.xmm
= ctxt
->modrm_rm
;
1197 kvm_read_sse_reg(ctxt
->modrm_rm
, &op
->vec_val
);
1200 if (ctxt
->d
& Mmx
) {
1203 op
->addr
.mm
= ctxt
->modrm_rm
& 7;
1206 fetch_register_operand(op
);
1212 if (ctxt
->ad_bytes
== 2) {
1213 unsigned bx
= reg_read(ctxt
, VCPU_REGS_RBX
);
1214 unsigned bp
= reg_read(ctxt
, VCPU_REGS_RBP
);
1215 unsigned si
= reg_read(ctxt
, VCPU_REGS_RSI
);
1216 unsigned di
= reg_read(ctxt
, VCPU_REGS_RDI
);
1218 /* 16-bit ModR/M decode. */
1219 switch (ctxt
->modrm_mod
) {
1221 if (ctxt
->modrm_rm
== 6)
1222 modrm_ea
+= insn_fetch(u16
, ctxt
);
1225 modrm_ea
+= insn_fetch(s8
, ctxt
);
1228 modrm_ea
+= insn_fetch(u16
, ctxt
);
1231 switch (ctxt
->modrm_rm
) {
1233 modrm_ea
+= bx
+ si
;
1236 modrm_ea
+= bx
+ di
;
1239 modrm_ea
+= bp
+ si
;
1242 modrm_ea
+= bp
+ di
;
1251 if (ctxt
->modrm_mod
!= 0)
1258 if (ctxt
->modrm_rm
== 2 || ctxt
->modrm_rm
== 3 ||
1259 (ctxt
->modrm_rm
== 6 && ctxt
->modrm_mod
!= 0))
1260 ctxt
->modrm_seg
= VCPU_SREG_SS
;
1261 modrm_ea
= (u16
)modrm_ea
;
1263 /* 32/64-bit ModR/M decode. */
1264 if ((ctxt
->modrm_rm
& 7) == 4) {
1265 sib
= insn_fetch(u8
, ctxt
);
1266 index_reg
|= (sib
>> 3) & 7;
1267 base_reg
|= sib
& 7;
1270 if ((base_reg
& 7) == 5 && ctxt
->modrm_mod
== 0)
1271 modrm_ea
+= insn_fetch(s32
, ctxt
);
1273 modrm_ea
+= reg_read(ctxt
, base_reg
);
1274 adjust_modrm_seg(ctxt
, base_reg
);
1275 /* Increment ESP on POP [ESP] */
1276 if ((ctxt
->d
& IncSP
) &&
1277 base_reg
== VCPU_REGS_RSP
)
1278 modrm_ea
+= ctxt
->op_bytes
;
1281 modrm_ea
+= reg_read(ctxt
, index_reg
) << scale
;
1282 } else if ((ctxt
->modrm_rm
& 7) == 5 && ctxt
->modrm_mod
== 0) {
1283 modrm_ea
+= insn_fetch(s32
, ctxt
);
1284 if (ctxt
->mode
== X86EMUL_MODE_PROT64
)
1285 ctxt
->rip_relative
= 1;
1287 base_reg
= ctxt
->modrm_rm
;
1288 modrm_ea
+= reg_read(ctxt
, base_reg
);
1289 adjust_modrm_seg(ctxt
, base_reg
);
1291 switch (ctxt
->modrm_mod
) {
1293 modrm_ea
+= insn_fetch(s8
, ctxt
);
1296 modrm_ea
+= insn_fetch(s32
, ctxt
);
1300 op
->addr
.mem
.ea
= modrm_ea
;
1301 if (ctxt
->ad_bytes
!= 8)
1302 ctxt
->memop
.addr
.mem
.ea
= (u32
)ctxt
->memop
.addr
.mem
.ea
;
1308 static int decode_abs(struct x86_emulate_ctxt
*ctxt
,
1311 int rc
= X86EMUL_CONTINUE
;
1314 switch (ctxt
->ad_bytes
) {
1316 op
->addr
.mem
.ea
= insn_fetch(u16
, ctxt
);
1319 op
->addr
.mem
.ea
= insn_fetch(u32
, ctxt
);
1322 op
->addr
.mem
.ea
= insn_fetch(u64
, ctxt
);
1329 static void fetch_bit_operand(struct x86_emulate_ctxt
*ctxt
)
1333 if (ctxt
->dst
.type
== OP_MEM
&& ctxt
->src
.type
== OP_REG
) {
1334 mask
= ~((long)ctxt
->dst
.bytes
* 8 - 1);
1336 if (ctxt
->src
.bytes
== 2)
1337 sv
= (s16
)ctxt
->src
.val
& (s16
)mask
;
1338 else if (ctxt
->src
.bytes
== 4)
1339 sv
= (s32
)ctxt
->src
.val
& (s32
)mask
;
1341 sv
= (s64
)ctxt
->src
.val
& (s64
)mask
;
1343 ctxt
->dst
.addr
.mem
.ea
= address_mask(ctxt
,
1344 ctxt
->dst
.addr
.mem
.ea
+ (sv
>> 3));
1347 /* only subword offset */
1348 ctxt
->src
.val
&= (ctxt
->dst
.bytes
<< 3) - 1;
1351 static int read_emulated(struct x86_emulate_ctxt
*ctxt
,
1352 unsigned long addr
, void *dest
, unsigned size
)
1355 struct read_cache
*mc
= &ctxt
->mem_read
;
1357 if (mc
->pos
< mc
->end
)
1360 WARN_ON((mc
->end
+ size
) >= sizeof(mc
->data
));
1362 rc
= ctxt
->ops
->read_emulated(ctxt
, addr
, mc
->data
+ mc
->end
, size
,
1364 if (rc
!= X86EMUL_CONTINUE
)
1370 memcpy(dest
, mc
->data
+ mc
->pos
, size
);
1372 return X86EMUL_CONTINUE
;
1375 static int segmented_read(struct x86_emulate_ctxt
*ctxt
,
1376 struct segmented_address addr
,
1383 rc
= linearize(ctxt
, addr
, size
, false, &linear
);
1384 if (rc
!= X86EMUL_CONTINUE
)
1386 return read_emulated(ctxt
, linear
, data
, size
);
1389 static int segmented_write(struct x86_emulate_ctxt
*ctxt
,
1390 struct segmented_address addr
,
1397 rc
= linearize(ctxt
, addr
, size
, true, &linear
);
1398 if (rc
!= X86EMUL_CONTINUE
)
1400 return ctxt
->ops
->write_emulated(ctxt
, linear
, data
, size
,
1404 static int segmented_cmpxchg(struct x86_emulate_ctxt
*ctxt
,
1405 struct segmented_address addr
,
1406 const void *orig_data
, const void *data
,
1412 rc
= linearize(ctxt
, addr
, size
, true, &linear
);
1413 if (rc
!= X86EMUL_CONTINUE
)
1415 return ctxt
->ops
->cmpxchg_emulated(ctxt
, linear
, orig_data
, data
,
1416 size
, &ctxt
->exception
);
1419 static int pio_in_emulated(struct x86_emulate_ctxt
*ctxt
,
1420 unsigned int size
, unsigned short port
,
1423 struct read_cache
*rc
= &ctxt
->io_read
;
1425 if (rc
->pos
== rc
->end
) { /* refill pio read ahead */
1426 unsigned int in_page
, n
;
1427 unsigned int count
= ctxt
->rep_prefix
?
1428 address_mask(ctxt
, reg_read(ctxt
, VCPU_REGS_RCX
)) : 1;
1429 in_page
= (ctxt
->eflags
& X86_EFLAGS_DF
) ?
1430 offset_in_page(reg_read(ctxt
, VCPU_REGS_RDI
)) :
1431 PAGE_SIZE
- offset_in_page(reg_read(ctxt
, VCPU_REGS_RDI
));
1432 n
= min3(in_page
, (unsigned int)sizeof(rc
->data
) / size
, count
);
1435 rc
->pos
= rc
->end
= 0;
1436 if (!ctxt
->ops
->pio_in_emulated(ctxt
, size
, port
, rc
->data
, n
))
1441 if (ctxt
->rep_prefix
&& (ctxt
->d
& String
) &&
1442 !(ctxt
->eflags
& X86_EFLAGS_DF
)) {
1443 ctxt
->dst
.data
= rc
->data
+ rc
->pos
;
1444 ctxt
->dst
.type
= OP_MEM_STR
;
1445 ctxt
->dst
.count
= (rc
->end
- rc
->pos
) / size
;
1448 memcpy(dest
, rc
->data
+ rc
->pos
, size
);
1454 static int read_interrupt_descriptor(struct x86_emulate_ctxt
*ctxt
,
1455 u16 index
, struct desc_struct
*desc
)
1460 ctxt
->ops
->get_idt(ctxt
, &dt
);
1462 if (dt
.size
< index
* 8 + 7)
1463 return emulate_gp(ctxt
, index
<< 3 | 0x2);
1465 addr
= dt
.address
+ index
* 8;
1466 return linear_read_system(ctxt
, addr
, desc
, sizeof(*desc
));
1469 static void get_descriptor_table_ptr(struct x86_emulate_ctxt
*ctxt
,
1470 u16 selector
, struct desc_ptr
*dt
)
1472 const struct x86_emulate_ops
*ops
= ctxt
->ops
;
1475 if (selector
& 1 << 2) {
1476 struct desc_struct desc
;
1479 memset(dt
, 0, sizeof(*dt
));
1480 if (!ops
->get_segment(ctxt
, &sel
, &desc
, &base3
,
1484 dt
->size
= desc_limit_scaled(&desc
); /* what if limit > 65535? */
1485 dt
->address
= get_desc_base(&desc
) | ((u64
)base3
<< 32);
1487 ops
->get_gdt(ctxt
, dt
);
1490 static int get_descriptor_ptr(struct x86_emulate_ctxt
*ctxt
,
1491 u16 selector
, ulong
*desc_addr_p
)
1494 u16 index
= selector
>> 3;
1497 get_descriptor_table_ptr(ctxt
, selector
, &dt
);
1499 if (dt
.size
< index
* 8 + 7)
1500 return emulate_gp(ctxt
, selector
& 0xfffc);
1502 addr
= dt
.address
+ index
* 8;
1504 #ifdef CONFIG_X86_64
1505 if (addr
>> 32 != 0) {
1508 ctxt
->ops
->get_msr(ctxt
, MSR_EFER
, &efer
);
1509 if (!(efer
& EFER_LMA
))
1514 *desc_addr_p
= addr
;
1515 return X86EMUL_CONTINUE
;
1518 /* allowed just for 8 bytes segments */
1519 static int read_segment_descriptor(struct x86_emulate_ctxt
*ctxt
,
1520 u16 selector
, struct desc_struct
*desc
,
1525 rc
= get_descriptor_ptr(ctxt
, selector
, desc_addr_p
);
1526 if (rc
!= X86EMUL_CONTINUE
)
1529 return linear_read_system(ctxt
, *desc_addr_p
, desc
, sizeof(*desc
));
1532 /* allowed just for 8 bytes segments */
1533 static int write_segment_descriptor(struct x86_emulate_ctxt
*ctxt
,
1534 u16 selector
, struct desc_struct
*desc
)
1539 rc
= get_descriptor_ptr(ctxt
, selector
, &addr
);
1540 if (rc
!= X86EMUL_CONTINUE
)
1543 return linear_write_system(ctxt
, addr
, desc
, sizeof(*desc
));
1546 static int __load_segment_descriptor(struct x86_emulate_ctxt
*ctxt
,
1547 u16 selector
, int seg
, u8 cpl
,
1548 enum x86_transfer_type transfer
,
1549 struct desc_struct
*desc
)
1551 struct desc_struct seg_desc
, old_desc
;
1553 unsigned err_vec
= GP_VECTOR
;
1555 bool null_selector
= !(selector
& ~0x3); /* 0000-0003 are null */
1561 memset(&seg_desc
, 0, sizeof(seg_desc
));
1563 if (ctxt
->mode
== X86EMUL_MODE_REAL
) {
1564 /* set real mode segment descriptor (keep limit etc. for
1566 ctxt
->ops
->get_segment(ctxt
, &dummy
, &seg_desc
, NULL
, seg
);
1567 set_desc_base(&seg_desc
, selector
<< 4);
1569 } else if (seg
<= VCPU_SREG_GS
&& ctxt
->mode
== X86EMUL_MODE_VM86
) {
1570 /* VM86 needs a clean new segment descriptor */
1571 set_desc_base(&seg_desc
, selector
<< 4);
1572 set_desc_limit(&seg_desc
, 0xffff);
1582 /* TR should be in GDT only */
1583 if (seg
== VCPU_SREG_TR
&& (selector
& (1 << 2)))
1586 /* NULL selector is not valid for TR, CS and (except for long mode) SS */
1587 if (null_selector
) {
1588 if (seg
== VCPU_SREG_CS
|| seg
== VCPU_SREG_TR
)
1591 if (seg
== VCPU_SREG_SS
) {
1592 if (ctxt
->mode
!= X86EMUL_MODE_PROT64
|| rpl
!= cpl
)
1596 * ctxt->ops->set_segment expects the CPL to be in
1597 * SS.DPL, so fake an expand-up 32-bit data segment.
1607 /* Skip all following checks */
1611 ret
= read_segment_descriptor(ctxt
, selector
, &seg_desc
, &desc_addr
);
1612 if (ret
!= X86EMUL_CONTINUE
)
1615 err_code
= selector
& 0xfffc;
1616 err_vec
= (transfer
== X86_TRANSFER_TASK_SWITCH
) ? TS_VECTOR
:
1619 /* can't load system descriptor into segment selector */
1620 if (seg
<= VCPU_SREG_GS
&& !seg_desc
.s
) {
1621 if (transfer
== X86_TRANSFER_CALL_JMP
)
1622 return X86EMUL_UNHANDLEABLE
;
1631 * segment is not a writable data segment or segment
1632 * selector's RPL != CPL or segment selector's RPL != CPL
1634 if (rpl
!= cpl
|| (seg_desc
.type
& 0xa) != 0x2 || dpl
!= cpl
)
1638 if (!(seg_desc
.type
& 8))
1641 if (transfer
== X86_TRANSFER_RET
) {
1642 /* RET can never return to an inner privilege level. */
1645 /* Outer-privilege level return is not implemented */
1647 return X86EMUL_UNHANDLEABLE
;
1649 if (transfer
== X86_TRANSFER_RET
|| transfer
== X86_TRANSFER_TASK_SWITCH
) {
1650 if (seg_desc
.type
& 4) {
1659 } else { /* X86_TRANSFER_CALL_JMP */
1660 if (seg_desc
.type
& 4) {
1666 if (rpl
> cpl
|| dpl
!= cpl
)
1670 /* in long-mode d/b must be clear if l is set */
1671 if (seg_desc
.d
&& seg_desc
.l
) {
1674 ctxt
->ops
->get_msr(ctxt
, MSR_EFER
, &efer
);
1675 if (efer
& EFER_LMA
)
1679 /* CS(RPL) <- CPL */
1680 selector
= (selector
& 0xfffc) | cpl
;
1683 if (seg_desc
.s
|| (seg_desc
.type
!= 1 && seg_desc
.type
!= 9))
1686 err_vec
= NP_VECTOR
;
1689 old_desc
= seg_desc
;
1690 seg_desc
.type
|= 2; /* busy */
1691 ret
= ctxt
->ops
->cmpxchg_emulated(ctxt
, desc_addr
, &old_desc
, &seg_desc
,
1692 sizeof(seg_desc
), &ctxt
->exception
);
1693 if (ret
!= X86EMUL_CONTINUE
)
1696 case VCPU_SREG_LDTR
:
1697 if (seg_desc
.s
|| seg_desc
.type
!= 2)
1700 default: /* DS, ES, FS, or GS */
1702 * segment is not a data or readable code segment or
1703 * ((segment is a data or nonconforming code segment)
1704 * and (both RPL and CPL > DPL))
1706 if ((seg_desc
.type
& 0xa) == 0x8 ||
1707 (((seg_desc
.type
& 0xc) != 0xc) &&
1708 (rpl
> dpl
&& cpl
> dpl
)))
1714 err_vec
= (seg
== VCPU_SREG_SS
) ? SS_VECTOR
: NP_VECTOR
;
1719 /* mark segment as accessed */
1720 if (!(seg_desc
.type
& 1)) {
1722 ret
= write_segment_descriptor(ctxt
, selector
,
1724 if (ret
!= X86EMUL_CONTINUE
)
1727 } else if (ctxt
->mode
== X86EMUL_MODE_PROT64
) {
1728 ret
= linear_read_system(ctxt
, desc_addr
+8, &base3
, sizeof(base3
));
1729 if (ret
!= X86EMUL_CONTINUE
)
1731 if (emul_is_noncanonical_address(get_desc_base(&seg_desc
) |
1732 ((u64
)base3
<< 32), ctxt
))
1733 return emulate_gp(ctxt
, 0);
1736 ctxt
->ops
->set_segment(ctxt
, selector
, &seg_desc
, base3
, seg
);
1739 return X86EMUL_CONTINUE
;
1741 return emulate_exception(ctxt
, err_vec
, err_code
, true);
1744 static int load_segment_descriptor(struct x86_emulate_ctxt
*ctxt
,
1745 u16 selector
, int seg
)
1747 u8 cpl
= ctxt
->ops
->cpl(ctxt
);
1750 * None of MOV, POP and LSS can load a NULL selector in CPL=3, but
1751 * they can load it at CPL<3 (Intel's manual says only LSS can,
1754 * However, the Intel manual says that putting IST=1/DPL=3 in
1755 * an interrupt gate will result in SS=3 (the AMD manual instead
1756 * says it doesn't), so allow SS=3 in __load_segment_descriptor
1757 * and only forbid it here.
1759 if (seg
== VCPU_SREG_SS
&& selector
== 3 &&
1760 ctxt
->mode
== X86EMUL_MODE_PROT64
)
1761 return emulate_exception(ctxt
, GP_VECTOR
, 0, true);
1763 return __load_segment_descriptor(ctxt
, selector
, seg
, cpl
,
1764 X86_TRANSFER_NONE
, NULL
);
1767 static void write_register_operand(struct operand
*op
)
1769 return assign_register(op
->addr
.reg
, op
->val
, op
->bytes
);
1772 static int writeback(struct x86_emulate_ctxt
*ctxt
, struct operand
*op
)
1776 write_register_operand(op
);
1779 if (ctxt
->lock_prefix
)
1780 return segmented_cmpxchg(ctxt
,
1786 return segmented_write(ctxt
,
1792 return segmented_write(ctxt
,
1795 op
->bytes
* op
->count
);
1798 kvm_write_sse_reg(op
->addr
.xmm
, &op
->vec_val
);
1801 kvm_write_mmx_reg(op
->addr
.mm
, &op
->mm_val
);
1809 return X86EMUL_CONTINUE
;
1812 static int push(struct x86_emulate_ctxt
*ctxt
, void *data
, int bytes
)
1814 struct segmented_address addr
;
1816 rsp_increment(ctxt
, -bytes
);
1817 addr
.ea
= reg_read(ctxt
, VCPU_REGS_RSP
) & stack_mask(ctxt
);
1818 addr
.seg
= VCPU_SREG_SS
;
1820 return segmented_write(ctxt
, addr
, data
, bytes
);
1823 static int em_push(struct x86_emulate_ctxt
*ctxt
)
1825 /* Disable writeback. */
1826 ctxt
->dst
.type
= OP_NONE
;
1827 return push(ctxt
, &ctxt
->src
.val
, ctxt
->op_bytes
);
1830 static int emulate_pop(struct x86_emulate_ctxt
*ctxt
,
1831 void *dest
, int len
)
1834 struct segmented_address addr
;
1836 addr
.ea
= reg_read(ctxt
, VCPU_REGS_RSP
) & stack_mask(ctxt
);
1837 addr
.seg
= VCPU_SREG_SS
;
1838 rc
= segmented_read(ctxt
, addr
, dest
, len
);
1839 if (rc
!= X86EMUL_CONTINUE
)
1842 rsp_increment(ctxt
, len
);
1846 static int em_pop(struct x86_emulate_ctxt
*ctxt
)
1848 return emulate_pop(ctxt
, &ctxt
->dst
.val
, ctxt
->op_bytes
);
1851 static int emulate_popf(struct x86_emulate_ctxt
*ctxt
,
1852 void *dest
, int len
)
1855 unsigned long val
, change_mask
;
1856 int iopl
= (ctxt
->eflags
& X86_EFLAGS_IOPL
) >> X86_EFLAGS_IOPL_BIT
;
1857 int cpl
= ctxt
->ops
->cpl(ctxt
);
1859 rc
= emulate_pop(ctxt
, &val
, len
);
1860 if (rc
!= X86EMUL_CONTINUE
)
1863 change_mask
= X86_EFLAGS_CF
| X86_EFLAGS_PF
| X86_EFLAGS_AF
|
1864 X86_EFLAGS_ZF
| X86_EFLAGS_SF
| X86_EFLAGS_OF
|
1865 X86_EFLAGS_TF
| X86_EFLAGS_DF
| X86_EFLAGS_NT
|
1866 X86_EFLAGS_AC
| X86_EFLAGS_ID
;
1868 switch(ctxt
->mode
) {
1869 case X86EMUL_MODE_PROT64
:
1870 case X86EMUL_MODE_PROT32
:
1871 case X86EMUL_MODE_PROT16
:
1873 change_mask
|= X86_EFLAGS_IOPL
;
1875 change_mask
|= X86_EFLAGS_IF
;
1877 case X86EMUL_MODE_VM86
:
1879 return emulate_gp(ctxt
, 0);
1880 change_mask
|= X86_EFLAGS_IF
;
1882 default: /* real mode */
1883 change_mask
|= (X86_EFLAGS_IOPL
| X86_EFLAGS_IF
);
1887 *(unsigned long *)dest
=
1888 (ctxt
->eflags
& ~change_mask
) | (val
& change_mask
);
1893 static int em_popf(struct x86_emulate_ctxt
*ctxt
)
1895 ctxt
->dst
.type
= OP_REG
;
1896 ctxt
->dst
.addr
.reg
= &ctxt
->eflags
;
1897 ctxt
->dst
.bytes
= ctxt
->op_bytes
;
1898 return emulate_popf(ctxt
, &ctxt
->dst
.val
, ctxt
->op_bytes
);
1901 static int em_enter(struct x86_emulate_ctxt
*ctxt
)
1904 unsigned frame_size
= ctxt
->src
.val
;
1905 unsigned nesting_level
= ctxt
->src2
.val
& 31;
1909 return X86EMUL_UNHANDLEABLE
;
1911 rbp
= reg_read(ctxt
, VCPU_REGS_RBP
);
1912 rc
= push(ctxt
, &rbp
, stack_size(ctxt
));
1913 if (rc
!= X86EMUL_CONTINUE
)
1915 assign_masked(reg_rmw(ctxt
, VCPU_REGS_RBP
), reg_read(ctxt
, VCPU_REGS_RSP
),
1917 assign_masked(reg_rmw(ctxt
, VCPU_REGS_RSP
),
1918 reg_read(ctxt
, VCPU_REGS_RSP
) - frame_size
,
1920 return X86EMUL_CONTINUE
;
1923 static int em_leave(struct x86_emulate_ctxt
*ctxt
)
1925 assign_masked(reg_rmw(ctxt
, VCPU_REGS_RSP
), reg_read(ctxt
, VCPU_REGS_RBP
),
1927 return emulate_pop(ctxt
, reg_rmw(ctxt
, VCPU_REGS_RBP
), ctxt
->op_bytes
);
1930 static int em_push_sreg(struct x86_emulate_ctxt
*ctxt
)
1932 int seg
= ctxt
->src2
.val
;
1934 ctxt
->src
.val
= get_segment_selector(ctxt
, seg
);
1935 if (ctxt
->op_bytes
== 4) {
1936 rsp_increment(ctxt
, -2);
1940 return em_push(ctxt
);
1943 static int em_pop_sreg(struct x86_emulate_ctxt
*ctxt
)
1945 int seg
= ctxt
->src2
.val
;
1946 unsigned long selector
;
1949 rc
= emulate_pop(ctxt
, &selector
, 2);
1950 if (rc
!= X86EMUL_CONTINUE
)
1953 if (ctxt
->modrm_reg
== VCPU_SREG_SS
)
1954 ctxt
->interruptibility
= KVM_X86_SHADOW_INT_MOV_SS
;
1955 if (ctxt
->op_bytes
> 2)
1956 rsp_increment(ctxt
, ctxt
->op_bytes
- 2);
1958 rc
= load_segment_descriptor(ctxt
, (u16
)selector
, seg
);
1962 static int em_pusha(struct x86_emulate_ctxt
*ctxt
)
1964 unsigned long old_esp
= reg_read(ctxt
, VCPU_REGS_RSP
);
1965 int rc
= X86EMUL_CONTINUE
;
1966 int reg
= VCPU_REGS_RAX
;
1968 while (reg
<= VCPU_REGS_RDI
) {
1969 (reg
== VCPU_REGS_RSP
) ?
1970 (ctxt
->src
.val
= old_esp
) : (ctxt
->src
.val
= reg_read(ctxt
, reg
));
1973 if (rc
!= X86EMUL_CONTINUE
)
1982 static int em_pushf(struct x86_emulate_ctxt
*ctxt
)
1984 ctxt
->src
.val
= (unsigned long)ctxt
->eflags
& ~X86_EFLAGS_VM
;
1985 return em_push(ctxt
);
1988 static int em_popa(struct x86_emulate_ctxt
*ctxt
)
1990 int rc
= X86EMUL_CONTINUE
;
1991 int reg
= VCPU_REGS_RDI
;
1994 while (reg
>= VCPU_REGS_RAX
) {
1995 if (reg
== VCPU_REGS_RSP
) {
1996 rsp_increment(ctxt
, ctxt
->op_bytes
);
2000 rc
= emulate_pop(ctxt
, &val
, ctxt
->op_bytes
);
2001 if (rc
!= X86EMUL_CONTINUE
)
2003 assign_register(reg_rmw(ctxt
, reg
), val
, ctxt
->op_bytes
);
2009 static int __emulate_int_real(struct x86_emulate_ctxt
*ctxt
, int irq
)
2011 const struct x86_emulate_ops
*ops
= ctxt
->ops
;
2018 /* TODO: Add limit checks */
2019 ctxt
->src
.val
= ctxt
->eflags
;
2021 if (rc
!= X86EMUL_CONTINUE
)
2024 ctxt
->eflags
&= ~(X86_EFLAGS_IF
| X86_EFLAGS_TF
| X86_EFLAGS_AC
);
2026 ctxt
->src
.val
= get_segment_selector(ctxt
, VCPU_SREG_CS
);
2028 if (rc
!= X86EMUL_CONTINUE
)
2031 ctxt
->src
.val
= ctxt
->_eip
;
2033 if (rc
!= X86EMUL_CONTINUE
)
2036 ops
->get_idt(ctxt
, &dt
);
2038 eip_addr
= dt
.address
+ (irq
<< 2);
2039 cs_addr
= dt
.address
+ (irq
<< 2) + 2;
2041 rc
= linear_read_system(ctxt
, cs_addr
, &cs
, 2);
2042 if (rc
!= X86EMUL_CONTINUE
)
2045 rc
= linear_read_system(ctxt
, eip_addr
, &eip
, 2);
2046 if (rc
!= X86EMUL_CONTINUE
)
2049 rc
= load_segment_descriptor(ctxt
, cs
, VCPU_SREG_CS
);
2050 if (rc
!= X86EMUL_CONTINUE
)
2058 int emulate_int_real(struct x86_emulate_ctxt
*ctxt
, int irq
)
2062 invalidate_registers(ctxt
);
2063 rc
= __emulate_int_real(ctxt
, irq
);
2064 if (rc
== X86EMUL_CONTINUE
)
2065 writeback_registers(ctxt
);
2069 static int emulate_int(struct x86_emulate_ctxt
*ctxt
, int irq
)
2071 switch(ctxt
->mode
) {
2072 case X86EMUL_MODE_REAL
:
2073 return __emulate_int_real(ctxt
, irq
);
2074 case X86EMUL_MODE_VM86
:
2075 case X86EMUL_MODE_PROT16
:
2076 case X86EMUL_MODE_PROT32
:
2077 case X86EMUL_MODE_PROT64
:
2079 /* Protected mode interrupts unimplemented yet */
2080 return X86EMUL_UNHANDLEABLE
;
2084 static int emulate_iret_real(struct x86_emulate_ctxt
*ctxt
)
2086 int rc
= X86EMUL_CONTINUE
;
2087 unsigned long temp_eip
= 0;
2088 unsigned long temp_eflags
= 0;
2089 unsigned long cs
= 0;
2090 unsigned long mask
= X86_EFLAGS_CF
| X86_EFLAGS_PF
| X86_EFLAGS_AF
|
2091 X86_EFLAGS_ZF
| X86_EFLAGS_SF
| X86_EFLAGS_TF
|
2092 X86_EFLAGS_IF
| X86_EFLAGS_DF
| X86_EFLAGS_OF
|
2093 X86_EFLAGS_IOPL
| X86_EFLAGS_NT
| X86_EFLAGS_RF
|
2094 X86_EFLAGS_AC
| X86_EFLAGS_ID
|
2096 unsigned long vm86_mask
= X86_EFLAGS_VM
| X86_EFLAGS_VIF
|
2099 /* TODO: Add stack limit check */
2101 rc
= emulate_pop(ctxt
, &temp_eip
, ctxt
->op_bytes
);
2103 if (rc
!= X86EMUL_CONTINUE
)
2106 if (temp_eip
& ~0xffff)
2107 return emulate_gp(ctxt
, 0);
2109 rc
= emulate_pop(ctxt
, &cs
, ctxt
->op_bytes
);
2111 if (rc
!= X86EMUL_CONTINUE
)
2114 rc
= emulate_pop(ctxt
, &temp_eflags
, ctxt
->op_bytes
);
2116 if (rc
!= X86EMUL_CONTINUE
)
2119 rc
= load_segment_descriptor(ctxt
, (u16
)cs
, VCPU_SREG_CS
);
2121 if (rc
!= X86EMUL_CONTINUE
)
2124 ctxt
->_eip
= temp_eip
;
2126 if (ctxt
->op_bytes
== 4)
2127 ctxt
->eflags
= ((temp_eflags
& mask
) | (ctxt
->eflags
& vm86_mask
));
2128 else if (ctxt
->op_bytes
== 2) {
2129 ctxt
->eflags
&= ~0xffff;
2130 ctxt
->eflags
|= temp_eflags
;
2133 ctxt
->eflags
&= ~EFLG_RESERVED_ZEROS_MASK
; /* Clear reserved zeros */
2134 ctxt
->eflags
|= X86_EFLAGS_FIXED
;
2135 ctxt
->ops
->set_nmi_mask(ctxt
, false);
2140 static int em_iret(struct x86_emulate_ctxt
*ctxt
)
2142 switch(ctxt
->mode
) {
2143 case X86EMUL_MODE_REAL
:
2144 return emulate_iret_real(ctxt
);
2145 case X86EMUL_MODE_VM86
:
2146 case X86EMUL_MODE_PROT16
:
2147 case X86EMUL_MODE_PROT32
:
2148 case X86EMUL_MODE_PROT64
:
2150 /* iret from protected mode unimplemented yet */
2151 return X86EMUL_UNHANDLEABLE
;
2155 static int em_jmp_far(struct x86_emulate_ctxt
*ctxt
)
2159 struct desc_struct new_desc
;
2160 u8 cpl
= ctxt
->ops
->cpl(ctxt
);
2162 memcpy(&sel
, ctxt
->src
.valptr
+ ctxt
->op_bytes
, 2);
2164 rc
= __load_segment_descriptor(ctxt
, sel
, VCPU_SREG_CS
, cpl
,
2165 X86_TRANSFER_CALL_JMP
,
2167 if (rc
!= X86EMUL_CONTINUE
)
2170 rc
= assign_eip_far(ctxt
, ctxt
->src
.val
, &new_desc
);
2171 /* Error handling is not implemented. */
2172 if (rc
!= X86EMUL_CONTINUE
)
2173 return X86EMUL_UNHANDLEABLE
;
2178 static int em_jmp_abs(struct x86_emulate_ctxt
*ctxt
)
2180 return assign_eip_near(ctxt
, ctxt
->src
.val
);
2183 static int em_call_near_abs(struct x86_emulate_ctxt
*ctxt
)
2188 old_eip
= ctxt
->_eip
;
2189 rc
= assign_eip_near(ctxt
, ctxt
->src
.val
);
2190 if (rc
!= X86EMUL_CONTINUE
)
2192 ctxt
->src
.val
= old_eip
;
2197 static int em_cmpxchg8b(struct x86_emulate_ctxt
*ctxt
)
2199 u64 old
= ctxt
->dst
.orig_val64
;
2201 if (ctxt
->dst
.bytes
== 16)
2202 return X86EMUL_UNHANDLEABLE
;
2204 if (((u32
) (old
>> 0) != (u32
) reg_read(ctxt
, VCPU_REGS_RAX
)) ||
2205 ((u32
) (old
>> 32) != (u32
) reg_read(ctxt
, VCPU_REGS_RDX
))) {
2206 *reg_write(ctxt
, VCPU_REGS_RAX
) = (u32
) (old
>> 0);
2207 *reg_write(ctxt
, VCPU_REGS_RDX
) = (u32
) (old
>> 32);
2208 ctxt
->eflags
&= ~X86_EFLAGS_ZF
;
2210 ctxt
->dst
.val64
= ((u64
)reg_read(ctxt
, VCPU_REGS_RCX
) << 32) |
2211 (u32
) reg_read(ctxt
, VCPU_REGS_RBX
);
2213 ctxt
->eflags
|= X86_EFLAGS_ZF
;
2215 return X86EMUL_CONTINUE
;
2218 static int em_ret(struct x86_emulate_ctxt
*ctxt
)
2223 rc
= emulate_pop(ctxt
, &eip
, ctxt
->op_bytes
);
2224 if (rc
!= X86EMUL_CONTINUE
)
2227 return assign_eip_near(ctxt
, eip
);
2230 static int em_ret_far(struct x86_emulate_ctxt
*ctxt
)
2233 unsigned long eip
, cs
;
2234 int cpl
= ctxt
->ops
->cpl(ctxt
);
2235 struct desc_struct new_desc
;
2237 rc
= emulate_pop(ctxt
, &eip
, ctxt
->op_bytes
);
2238 if (rc
!= X86EMUL_CONTINUE
)
2240 rc
= emulate_pop(ctxt
, &cs
, ctxt
->op_bytes
);
2241 if (rc
!= X86EMUL_CONTINUE
)
2243 rc
= __load_segment_descriptor(ctxt
, (u16
)cs
, VCPU_SREG_CS
, cpl
,
2246 if (rc
!= X86EMUL_CONTINUE
)
2248 rc
= assign_eip_far(ctxt
, eip
, &new_desc
);
2249 /* Error handling is not implemented. */
2250 if (rc
!= X86EMUL_CONTINUE
)
2251 return X86EMUL_UNHANDLEABLE
;
2256 static int em_ret_far_imm(struct x86_emulate_ctxt
*ctxt
)
2260 rc
= em_ret_far(ctxt
);
2261 if (rc
!= X86EMUL_CONTINUE
)
2263 rsp_increment(ctxt
, ctxt
->src
.val
);
2264 return X86EMUL_CONTINUE
;
2267 static int em_cmpxchg(struct x86_emulate_ctxt
*ctxt
)
2269 /* Save real source value, then compare EAX against destination. */
2270 ctxt
->dst
.orig_val
= ctxt
->dst
.val
;
2271 ctxt
->dst
.val
= reg_read(ctxt
, VCPU_REGS_RAX
);
2272 ctxt
->src
.orig_val
= ctxt
->src
.val
;
2273 ctxt
->src
.val
= ctxt
->dst
.orig_val
;
2274 fastop(ctxt
, em_cmp
);
2276 if (ctxt
->eflags
& X86_EFLAGS_ZF
) {
2277 /* Success: write back to memory; no update of EAX */
2278 ctxt
->src
.type
= OP_NONE
;
2279 ctxt
->dst
.val
= ctxt
->src
.orig_val
;
2281 /* Failure: write the value we saw to EAX. */
2282 ctxt
->src
.type
= OP_REG
;
2283 ctxt
->src
.addr
.reg
= reg_rmw(ctxt
, VCPU_REGS_RAX
);
2284 ctxt
->src
.val
= ctxt
->dst
.orig_val
;
2285 /* Create write-cycle to dest by writing the same value */
2286 ctxt
->dst
.val
= ctxt
->dst
.orig_val
;
2288 return X86EMUL_CONTINUE
;
2291 static int em_lseg(struct x86_emulate_ctxt
*ctxt
)
2293 int seg
= ctxt
->src2
.val
;
2297 memcpy(&sel
, ctxt
->src
.valptr
+ ctxt
->op_bytes
, 2);
2299 rc
= load_segment_descriptor(ctxt
, sel
, seg
);
2300 if (rc
!= X86EMUL_CONTINUE
)
2303 ctxt
->dst
.val
= ctxt
->src
.val
;
2307 static int emulator_has_longmode(struct x86_emulate_ctxt
*ctxt
)
2309 #ifdef CONFIG_X86_64
2310 return ctxt
->ops
->guest_has_long_mode(ctxt
);
2316 static void rsm_set_desc_flags(struct desc_struct
*desc
, u32 flags
)
2318 desc
->g
= (flags
>> 23) & 1;
2319 desc
->d
= (flags
>> 22) & 1;
2320 desc
->l
= (flags
>> 21) & 1;
2321 desc
->avl
= (flags
>> 20) & 1;
2322 desc
->p
= (flags
>> 15) & 1;
2323 desc
->dpl
= (flags
>> 13) & 3;
2324 desc
->s
= (flags
>> 12) & 1;
2325 desc
->type
= (flags
>> 8) & 15;
2328 static int rsm_load_seg_32(struct x86_emulate_ctxt
*ctxt
, const char *smstate
,
2331 struct desc_struct desc
;
2335 selector
= GET_SMSTATE(u32
, smstate
, 0x7fa8 + n
* 4);
2338 offset
= 0x7f84 + n
* 12;
2340 offset
= 0x7f2c + (n
- 3) * 12;
2342 set_desc_base(&desc
, GET_SMSTATE(u32
, smstate
, offset
+ 8));
2343 set_desc_limit(&desc
, GET_SMSTATE(u32
, smstate
, offset
+ 4));
2344 rsm_set_desc_flags(&desc
, GET_SMSTATE(u32
, smstate
, offset
));
2345 ctxt
->ops
->set_segment(ctxt
, selector
, &desc
, 0, n
);
2346 return X86EMUL_CONTINUE
;
2349 #ifdef CONFIG_X86_64
2350 static int rsm_load_seg_64(struct x86_emulate_ctxt
*ctxt
, const char *smstate
,
2353 struct desc_struct desc
;
2358 offset
= 0x7e00 + n
* 16;
2360 selector
= GET_SMSTATE(u16
, smstate
, offset
);
2361 rsm_set_desc_flags(&desc
, GET_SMSTATE(u16
, smstate
, offset
+ 2) << 8);
2362 set_desc_limit(&desc
, GET_SMSTATE(u32
, smstate
, offset
+ 4));
2363 set_desc_base(&desc
, GET_SMSTATE(u32
, smstate
, offset
+ 8));
2364 base3
= GET_SMSTATE(u32
, smstate
, offset
+ 12);
2366 ctxt
->ops
->set_segment(ctxt
, selector
, &desc
, base3
, n
);
2367 return X86EMUL_CONTINUE
;
2371 static int rsm_enter_protected_mode(struct x86_emulate_ctxt
*ctxt
,
2372 u64 cr0
, u64 cr3
, u64 cr4
)
2377 /* In order to later set CR4.PCIDE, CR3[11:0] must be zero. */
2379 if (cr4
& X86_CR4_PCIDE
) {
2384 bad
= ctxt
->ops
->set_cr(ctxt
, 3, cr3
);
2386 return X86EMUL_UNHANDLEABLE
;
2389 * First enable PAE, long mode needs it before CR0.PG = 1 is set.
2390 * Then enable protected mode. However, PCID cannot be enabled
2391 * if EFER.LMA=0, so set it separately.
2393 bad
= ctxt
->ops
->set_cr(ctxt
, 4, cr4
& ~X86_CR4_PCIDE
);
2395 return X86EMUL_UNHANDLEABLE
;
2397 bad
= ctxt
->ops
->set_cr(ctxt
, 0, cr0
);
2399 return X86EMUL_UNHANDLEABLE
;
2401 if (cr4
& X86_CR4_PCIDE
) {
2402 bad
= ctxt
->ops
->set_cr(ctxt
, 4, cr4
);
2404 return X86EMUL_UNHANDLEABLE
;
2406 bad
= ctxt
->ops
->set_cr(ctxt
, 3, cr3
| pcid
);
2408 return X86EMUL_UNHANDLEABLE
;
2413 return X86EMUL_CONTINUE
;
2416 static int rsm_load_state_32(struct x86_emulate_ctxt
*ctxt
,
2417 const char *smstate
)
2419 struct desc_struct desc
;
2422 u32 val
, cr0
, cr3
, cr4
;
2425 cr0
= GET_SMSTATE(u32
, smstate
, 0x7ffc);
2426 cr3
= GET_SMSTATE(u32
, smstate
, 0x7ff8);
2427 ctxt
->eflags
= GET_SMSTATE(u32
, smstate
, 0x7ff4) | X86_EFLAGS_FIXED
;
2428 ctxt
->_eip
= GET_SMSTATE(u32
, smstate
, 0x7ff0);
2430 for (i
= 0; i
< 8; i
++)
2431 *reg_write(ctxt
, i
) = GET_SMSTATE(u32
, smstate
, 0x7fd0 + i
* 4);
2433 val
= GET_SMSTATE(u32
, smstate
, 0x7fcc);
2435 if (ctxt
->ops
->set_dr(ctxt
, 6, val
))
2436 return X86EMUL_UNHANDLEABLE
;
2438 val
= GET_SMSTATE(u32
, smstate
, 0x7fc8);
2440 if (ctxt
->ops
->set_dr(ctxt
, 7, val
))
2441 return X86EMUL_UNHANDLEABLE
;
2443 selector
= GET_SMSTATE(u32
, smstate
, 0x7fc4);
2444 set_desc_base(&desc
, GET_SMSTATE(u32
, smstate
, 0x7f64));
2445 set_desc_limit(&desc
, GET_SMSTATE(u32
, smstate
, 0x7f60));
2446 rsm_set_desc_flags(&desc
, GET_SMSTATE(u32
, smstate
, 0x7f5c));
2447 ctxt
->ops
->set_segment(ctxt
, selector
, &desc
, 0, VCPU_SREG_TR
);
2449 selector
= GET_SMSTATE(u32
, smstate
, 0x7fc0);
2450 set_desc_base(&desc
, GET_SMSTATE(u32
, smstate
, 0x7f80));
2451 set_desc_limit(&desc
, GET_SMSTATE(u32
, smstate
, 0x7f7c));
2452 rsm_set_desc_flags(&desc
, GET_SMSTATE(u32
, smstate
, 0x7f78));
2453 ctxt
->ops
->set_segment(ctxt
, selector
, &desc
, 0, VCPU_SREG_LDTR
);
2455 dt
.address
= GET_SMSTATE(u32
, smstate
, 0x7f74);
2456 dt
.size
= GET_SMSTATE(u32
, smstate
, 0x7f70);
2457 ctxt
->ops
->set_gdt(ctxt
, &dt
);
2459 dt
.address
= GET_SMSTATE(u32
, smstate
, 0x7f58);
2460 dt
.size
= GET_SMSTATE(u32
, smstate
, 0x7f54);
2461 ctxt
->ops
->set_idt(ctxt
, &dt
);
2463 for (i
= 0; i
< 6; i
++) {
2464 int r
= rsm_load_seg_32(ctxt
, smstate
, i
);
2465 if (r
!= X86EMUL_CONTINUE
)
2469 cr4
= GET_SMSTATE(u32
, smstate
, 0x7f14);
2471 ctxt
->ops
->set_smbase(ctxt
, GET_SMSTATE(u32
, smstate
, 0x7ef8));
2473 return rsm_enter_protected_mode(ctxt
, cr0
, cr3
, cr4
);
2476 #ifdef CONFIG_X86_64
2477 static int rsm_load_state_64(struct x86_emulate_ctxt
*ctxt
,
2478 const char *smstate
)
2480 struct desc_struct desc
;
2482 u64 val
, cr0
, cr3
, cr4
;
2487 for (i
= 0; i
< 16; i
++)
2488 *reg_write(ctxt
, i
) = GET_SMSTATE(u64
, smstate
, 0x7ff8 - i
* 8);
2490 ctxt
->_eip
= GET_SMSTATE(u64
, smstate
, 0x7f78);
2491 ctxt
->eflags
= GET_SMSTATE(u32
, smstate
, 0x7f70) | X86_EFLAGS_FIXED
;
2493 val
= GET_SMSTATE(u64
, smstate
, 0x7f68);
2495 if (ctxt
->ops
->set_dr(ctxt
, 6, val
))
2496 return X86EMUL_UNHANDLEABLE
;
2498 val
= GET_SMSTATE(u64
, smstate
, 0x7f60);
2500 if (ctxt
->ops
->set_dr(ctxt
, 7, val
))
2501 return X86EMUL_UNHANDLEABLE
;
2503 cr0
= GET_SMSTATE(u64
, smstate
, 0x7f58);
2504 cr3
= GET_SMSTATE(u64
, smstate
, 0x7f50);
2505 cr4
= GET_SMSTATE(u64
, smstate
, 0x7f48);
2506 ctxt
->ops
->set_smbase(ctxt
, GET_SMSTATE(u32
, smstate
, 0x7f00));
2507 val
= GET_SMSTATE(u64
, smstate
, 0x7ed0);
2509 if (ctxt
->ops
->set_msr(ctxt
, MSR_EFER
, val
& ~EFER_LMA
))
2510 return X86EMUL_UNHANDLEABLE
;
2512 selector
= GET_SMSTATE(u32
, smstate
, 0x7e90);
2513 rsm_set_desc_flags(&desc
, GET_SMSTATE(u32
, smstate
, 0x7e92) << 8);
2514 set_desc_limit(&desc
, GET_SMSTATE(u32
, smstate
, 0x7e94));
2515 set_desc_base(&desc
, GET_SMSTATE(u32
, smstate
, 0x7e98));
2516 base3
= GET_SMSTATE(u32
, smstate
, 0x7e9c);
2517 ctxt
->ops
->set_segment(ctxt
, selector
, &desc
, base3
, VCPU_SREG_TR
);
2519 dt
.size
= GET_SMSTATE(u32
, smstate
, 0x7e84);
2520 dt
.address
= GET_SMSTATE(u64
, smstate
, 0x7e88);
2521 ctxt
->ops
->set_idt(ctxt
, &dt
);
2523 selector
= GET_SMSTATE(u32
, smstate
, 0x7e70);
2524 rsm_set_desc_flags(&desc
, GET_SMSTATE(u32
, smstate
, 0x7e72) << 8);
2525 set_desc_limit(&desc
, GET_SMSTATE(u32
, smstate
, 0x7e74));
2526 set_desc_base(&desc
, GET_SMSTATE(u32
, smstate
, 0x7e78));
2527 base3
= GET_SMSTATE(u32
, smstate
, 0x7e7c);
2528 ctxt
->ops
->set_segment(ctxt
, selector
, &desc
, base3
, VCPU_SREG_LDTR
);
2530 dt
.size
= GET_SMSTATE(u32
, smstate
, 0x7e64);
2531 dt
.address
= GET_SMSTATE(u64
, smstate
, 0x7e68);
2532 ctxt
->ops
->set_gdt(ctxt
, &dt
);
2534 r
= rsm_enter_protected_mode(ctxt
, cr0
, cr3
, cr4
);
2535 if (r
!= X86EMUL_CONTINUE
)
2538 for (i
= 0; i
< 6; i
++) {
2539 r
= rsm_load_seg_64(ctxt
, smstate
, i
);
2540 if (r
!= X86EMUL_CONTINUE
)
2544 return X86EMUL_CONTINUE
;
2548 static int em_rsm(struct x86_emulate_ctxt
*ctxt
)
2550 unsigned long cr0
, cr4
, efer
;
2555 if ((ctxt
->ops
->get_hflags(ctxt
) & X86EMUL_SMM_MASK
) == 0)
2556 return emulate_ud(ctxt
);
2558 smbase
= ctxt
->ops
->get_smbase(ctxt
);
2560 ret
= ctxt
->ops
->read_phys(ctxt
, smbase
+ 0xfe00, buf
, sizeof(buf
));
2561 if (ret
!= X86EMUL_CONTINUE
)
2562 return X86EMUL_UNHANDLEABLE
;
2564 if ((ctxt
->ops
->get_hflags(ctxt
) & X86EMUL_SMM_INSIDE_NMI_MASK
) == 0)
2565 ctxt
->ops
->set_nmi_mask(ctxt
, false);
2567 ctxt
->ops
->exiting_smm(ctxt
);
2570 * Get back to real mode, to prepare a safe state in which to load
2571 * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU
2572 * supports long mode.
2574 if (emulator_has_longmode(ctxt
)) {
2575 struct desc_struct cs_desc
;
2577 /* Zero CR4.PCIDE before CR0.PG. */
2578 cr4
= ctxt
->ops
->get_cr(ctxt
, 4);
2579 if (cr4
& X86_CR4_PCIDE
)
2580 ctxt
->ops
->set_cr(ctxt
, 4, cr4
& ~X86_CR4_PCIDE
);
2582 /* A 32-bit code segment is required to clear EFER.LMA. */
2583 memset(&cs_desc
, 0, sizeof(cs_desc
));
2585 cs_desc
.s
= cs_desc
.g
= cs_desc
.p
= 1;
2586 ctxt
->ops
->set_segment(ctxt
, 0, &cs_desc
, 0, VCPU_SREG_CS
);
2589 /* For the 64-bit case, this will clear EFER.LMA. */
2590 cr0
= ctxt
->ops
->get_cr(ctxt
, 0);
2591 if (cr0
& X86_CR0_PE
)
2592 ctxt
->ops
->set_cr(ctxt
, 0, cr0
& ~(X86_CR0_PG
| X86_CR0_PE
));
2594 if (emulator_has_longmode(ctxt
)) {
2595 /* Clear CR4.PAE before clearing EFER.LME. */
2596 cr4
= ctxt
->ops
->get_cr(ctxt
, 4);
2597 if (cr4
& X86_CR4_PAE
)
2598 ctxt
->ops
->set_cr(ctxt
, 4, cr4
& ~X86_CR4_PAE
);
2600 /* And finally go back to 32-bit mode. */
2602 ctxt
->ops
->set_msr(ctxt
, MSR_EFER
, efer
);
2606 * Give leave_smm() a chance to make ISA-specific changes to the vCPU
2607 * state (e.g. enter guest mode) before loading state from the SMM
2610 if (ctxt
->ops
->leave_smm(ctxt
, buf
))
2611 goto emulate_shutdown
;
2613 #ifdef CONFIG_X86_64
2614 if (emulator_has_longmode(ctxt
))
2615 ret
= rsm_load_state_64(ctxt
, buf
);
2618 ret
= rsm_load_state_32(ctxt
, buf
);
2620 if (ret
!= X86EMUL_CONTINUE
)
2621 goto emulate_shutdown
;
2624 * Note, the ctxt->ops callbacks are responsible for handling side
2625 * effects when writing MSRs and CRs, e.g. MMU context resets, CPUID
2626 * runtime updates, etc... If that changes, e.g. this flow is moved
2627 * out of the emulator to make it look more like enter_smm(), then
2628 * those side effects need to be explicitly handled for both success
2631 return X86EMUL_CONTINUE
;
2634 ctxt
->ops
->triple_fault(ctxt
);
2635 return X86EMUL_CONTINUE
;
2639 setup_syscalls_segments(struct desc_struct
*cs
, struct desc_struct
*ss
)
2641 cs
->l
= 0; /* will be adjusted later */
2642 set_desc_base(cs
, 0); /* flat segment */
2643 cs
->g
= 1; /* 4kb granularity */
2644 set_desc_limit(cs
, 0xfffff); /* 4GB limit */
2645 cs
->type
= 0x0b; /* Read, Execute, Accessed */
2647 cs
->dpl
= 0; /* will be adjusted later */
2652 set_desc_base(ss
, 0); /* flat segment */
2653 set_desc_limit(ss
, 0xfffff); /* 4GB limit */
2654 ss
->g
= 1; /* 4kb granularity */
2656 ss
->type
= 0x03; /* Read/Write, Accessed */
2657 ss
->d
= 1; /* 32bit stack segment */
2664 static bool vendor_intel(struct x86_emulate_ctxt
*ctxt
)
2666 u32 eax
, ebx
, ecx
, edx
;
2669 ctxt
->ops
->get_cpuid(ctxt
, &eax
, &ebx
, &ecx
, &edx
, true);
2670 return is_guest_vendor_intel(ebx
, ecx
, edx
);
2673 static bool em_syscall_is_enabled(struct x86_emulate_ctxt
*ctxt
)
2675 const struct x86_emulate_ops
*ops
= ctxt
->ops
;
2676 u32 eax
, ebx
, ecx
, edx
;
2679 * syscall should always be enabled in longmode - so only become
2680 * vendor specific (cpuid) if other modes are active...
2682 if (ctxt
->mode
== X86EMUL_MODE_PROT64
)
2687 ops
->get_cpuid(ctxt
, &eax
, &ebx
, &ecx
, &edx
, true);
2689 * remark: Intel CPUs only support "syscall" in 64bit longmode. Also a
2690 * 64bit guest with a 32bit compat-app running will #UD !! While this
2691 * behaviour can be fixed (by emulating) into AMD response - CPUs of
2692 * AMD can't behave like Intel.
2694 if (is_guest_vendor_intel(ebx
, ecx
, edx
))
2697 if (is_guest_vendor_amd(ebx
, ecx
, edx
) ||
2698 is_guest_vendor_hygon(ebx
, ecx
, edx
))
2702 * default: (not Intel, not AMD, not Hygon), apply Intel's
2708 static int em_syscall(struct x86_emulate_ctxt
*ctxt
)
2710 const struct x86_emulate_ops
*ops
= ctxt
->ops
;
2711 struct desc_struct cs
, ss
;
2716 /* syscall is not available in real mode */
2717 if (ctxt
->mode
== X86EMUL_MODE_REAL
||
2718 ctxt
->mode
== X86EMUL_MODE_VM86
)
2719 return emulate_ud(ctxt
);
2721 if (!(em_syscall_is_enabled(ctxt
)))
2722 return emulate_ud(ctxt
);
2724 ops
->get_msr(ctxt
, MSR_EFER
, &efer
);
2725 if (!(efer
& EFER_SCE
))
2726 return emulate_ud(ctxt
);
2728 setup_syscalls_segments(&cs
, &ss
);
2729 ops
->get_msr(ctxt
, MSR_STAR
, &msr_data
);
2731 cs_sel
= (u16
)(msr_data
& 0xfffc);
2732 ss_sel
= (u16
)(msr_data
+ 8);
2734 if (efer
& EFER_LMA
) {
2738 ops
->set_segment(ctxt
, cs_sel
, &cs
, 0, VCPU_SREG_CS
);
2739 ops
->set_segment(ctxt
, ss_sel
, &ss
, 0, VCPU_SREG_SS
);
2741 *reg_write(ctxt
, VCPU_REGS_RCX
) = ctxt
->_eip
;
2742 if (efer
& EFER_LMA
) {
2743 #ifdef CONFIG_X86_64
2744 *reg_write(ctxt
, VCPU_REGS_R11
) = ctxt
->eflags
;
2747 ctxt
->mode
== X86EMUL_MODE_PROT64
?
2748 MSR_LSTAR
: MSR_CSTAR
, &msr_data
);
2749 ctxt
->_eip
= msr_data
;
2751 ops
->get_msr(ctxt
, MSR_SYSCALL_MASK
, &msr_data
);
2752 ctxt
->eflags
&= ~msr_data
;
2753 ctxt
->eflags
|= X86_EFLAGS_FIXED
;
2757 ops
->get_msr(ctxt
, MSR_STAR
, &msr_data
);
2758 ctxt
->_eip
= (u32
)msr_data
;
2760 ctxt
->eflags
&= ~(X86_EFLAGS_VM
| X86_EFLAGS_IF
);
2763 ctxt
->tf
= (ctxt
->eflags
& X86_EFLAGS_TF
) != 0;
2764 return X86EMUL_CONTINUE
;
2767 static int em_sysenter(struct x86_emulate_ctxt
*ctxt
)
2769 const struct x86_emulate_ops
*ops
= ctxt
->ops
;
2770 struct desc_struct cs
, ss
;
2775 ops
->get_msr(ctxt
, MSR_EFER
, &efer
);
2776 /* inject #GP if in real mode */
2777 if (ctxt
->mode
== X86EMUL_MODE_REAL
)
2778 return emulate_gp(ctxt
, 0);
2781 * Not recognized on AMD in compat mode (but is recognized in legacy
2784 if ((ctxt
->mode
!= X86EMUL_MODE_PROT64
) && (efer
& EFER_LMA
)
2785 && !vendor_intel(ctxt
))
2786 return emulate_ud(ctxt
);
2788 /* sysenter/sysexit have not been tested in 64bit mode. */
2789 if (ctxt
->mode
== X86EMUL_MODE_PROT64
)
2790 return X86EMUL_UNHANDLEABLE
;
2792 ops
->get_msr(ctxt
, MSR_IA32_SYSENTER_CS
, &msr_data
);
2793 if ((msr_data
& 0xfffc) == 0x0)
2794 return emulate_gp(ctxt
, 0);
2796 setup_syscalls_segments(&cs
, &ss
);
2797 ctxt
->eflags
&= ~(X86_EFLAGS_VM
| X86_EFLAGS_IF
);
2798 cs_sel
= (u16
)msr_data
& ~SEGMENT_RPL_MASK
;
2799 ss_sel
= cs_sel
+ 8;
2800 if (efer
& EFER_LMA
) {
2805 ops
->set_segment(ctxt
, cs_sel
, &cs
, 0, VCPU_SREG_CS
);
2806 ops
->set_segment(ctxt
, ss_sel
, &ss
, 0, VCPU_SREG_SS
);
2808 ops
->get_msr(ctxt
, MSR_IA32_SYSENTER_EIP
, &msr_data
);
2809 ctxt
->_eip
= (efer
& EFER_LMA
) ? msr_data
: (u32
)msr_data
;
2811 ops
->get_msr(ctxt
, MSR_IA32_SYSENTER_ESP
, &msr_data
);
2812 *reg_write(ctxt
, VCPU_REGS_RSP
) = (efer
& EFER_LMA
) ? msr_data
:
2814 if (efer
& EFER_LMA
)
2815 ctxt
->mode
= X86EMUL_MODE_PROT64
;
2817 return X86EMUL_CONTINUE
;
2820 static int em_sysexit(struct x86_emulate_ctxt
*ctxt
)
2822 const struct x86_emulate_ops
*ops
= ctxt
->ops
;
2823 struct desc_struct cs
, ss
;
2824 u64 msr_data
, rcx
, rdx
;
2826 u16 cs_sel
= 0, ss_sel
= 0;
2828 /* inject #GP if in real mode or Virtual 8086 mode */
2829 if (ctxt
->mode
== X86EMUL_MODE_REAL
||
2830 ctxt
->mode
== X86EMUL_MODE_VM86
)
2831 return emulate_gp(ctxt
, 0);
2833 setup_syscalls_segments(&cs
, &ss
);
2835 if ((ctxt
->rex_prefix
& 0x8) != 0x0)
2836 usermode
= X86EMUL_MODE_PROT64
;
2838 usermode
= X86EMUL_MODE_PROT32
;
2840 rcx
= reg_read(ctxt
, VCPU_REGS_RCX
);
2841 rdx
= reg_read(ctxt
, VCPU_REGS_RDX
);
2845 ops
->get_msr(ctxt
, MSR_IA32_SYSENTER_CS
, &msr_data
);
2847 case X86EMUL_MODE_PROT32
:
2848 cs_sel
= (u16
)(msr_data
+ 16);
2849 if ((msr_data
& 0xfffc) == 0x0)
2850 return emulate_gp(ctxt
, 0);
2851 ss_sel
= (u16
)(msr_data
+ 24);
2855 case X86EMUL_MODE_PROT64
:
2856 cs_sel
= (u16
)(msr_data
+ 32);
2857 if (msr_data
== 0x0)
2858 return emulate_gp(ctxt
, 0);
2859 ss_sel
= cs_sel
+ 8;
2862 if (emul_is_noncanonical_address(rcx
, ctxt
) ||
2863 emul_is_noncanonical_address(rdx
, ctxt
))
2864 return emulate_gp(ctxt
, 0);
2867 cs_sel
|= SEGMENT_RPL_MASK
;
2868 ss_sel
|= SEGMENT_RPL_MASK
;
2870 ops
->set_segment(ctxt
, cs_sel
, &cs
, 0, VCPU_SREG_CS
);
2871 ops
->set_segment(ctxt
, ss_sel
, &ss
, 0, VCPU_SREG_SS
);
2874 *reg_write(ctxt
, VCPU_REGS_RSP
) = rcx
;
2876 return X86EMUL_CONTINUE
;
2879 static bool emulator_bad_iopl(struct x86_emulate_ctxt
*ctxt
)
2882 if (ctxt
->mode
== X86EMUL_MODE_REAL
)
2884 if (ctxt
->mode
== X86EMUL_MODE_VM86
)
2886 iopl
= (ctxt
->eflags
& X86_EFLAGS_IOPL
) >> X86_EFLAGS_IOPL_BIT
;
2887 return ctxt
->ops
->cpl(ctxt
) > iopl
;
2890 #define VMWARE_PORT_VMPORT (0x5658)
2891 #define VMWARE_PORT_VMRPC (0x5659)
2893 static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt
*ctxt
,
2896 const struct x86_emulate_ops
*ops
= ctxt
->ops
;
2897 struct desc_struct tr_seg
;
2900 u16 tr
, io_bitmap_ptr
, perm
, bit_idx
= port
& 0x7;
2901 unsigned mask
= (1 << len
) - 1;
2905 * VMware allows access to these ports even if denied
2906 * by TSS I/O permission bitmap. Mimic behavior.
2908 if (enable_vmware_backdoor
&&
2909 ((port
== VMWARE_PORT_VMPORT
) || (port
== VMWARE_PORT_VMRPC
)))
2912 ops
->get_segment(ctxt
, &tr
, &tr_seg
, &base3
, VCPU_SREG_TR
);
2915 if (desc_limit_scaled(&tr_seg
) < 103)
2917 base
= get_desc_base(&tr_seg
);
2918 #ifdef CONFIG_X86_64
2919 base
|= ((u64
)base3
) << 32;
2921 r
= ops
->read_std(ctxt
, base
+ 102, &io_bitmap_ptr
, 2, NULL
, true);
2922 if (r
!= X86EMUL_CONTINUE
)
2924 if (io_bitmap_ptr
+ port
/8 > desc_limit_scaled(&tr_seg
))
2926 r
= ops
->read_std(ctxt
, base
+ io_bitmap_ptr
+ port
/8, &perm
, 2, NULL
, true);
2927 if (r
!= X86EMUL_CONTINUE
)
2929 if ((perm
>> bit_idx
) & mask
)
2934 static bool emulator_io_permited(struct x86_emulate_ctxt
*ctxt
,
2940 if (emulator_bad_iopl(ctxt
))
2941 if (!emulator_io_port_access_allowed(ctxt
, port
, len
))
2944 ctxt
->perm_ok
= true;
2949 static void string_registers_quirk(struct x86_emulate_ctxt
*ctxt
)
2952 * Intel CPUs mask the counter and pointers in quite strange
2953 * manner when ECX is zero due to REP-string optimizations.
2955 #ifdef CONFIG_X86_64
2956 if (ctxt
->ad_bytes
!= 4 || !vendor_intel(ctxt
))
2959 *reg_write(ctxt
, VCPU_REGS_RCX
) = 0;
2962 case 0xa4: /* movsb */
2963 case 0xa5: /* movsd/w */
2964 *reg_rmw(ctxt
, VCPU_REGS_RSI
) &= (u32
)-1;
2966 case 0xaa: /* stosb */
2967 case 0xab: /* stosd/w */
2968 *reg_rmw(ctxt
, VCPU_REGS_RDI
) &= (u32
)-1;
2973 static void save_state_to_tss16(struct x86_emulate_ctxt
*ctxt
,
2974 struct tss_segment_16
*tss
)
2976 tss
->ip
= ctxt
->_eip
;
2977 tss
->flag
= ctxt
->eflags
;
2978 tss
->ax
= reg_read(ctxt
, VCPU_REGS_RAX
);
2979 tss
->cx
= reg_read(ctxt
, VCPU_REGS_RCX
);
2980 tss
->dx
= reg_read(ctxt
, VCPU_REGS_RDX
);
2981 tss
->bx
= reg_read(ctxt
, VCPU_REGS_RBX
);
2982 tss
->sp
= reg_read(ctxt
, VCPU_REGS_RSP
);
2983 tss
->bp
= reg_read(ctxt
, VCPU_REGS_RBP
);
2984 tss
->si
= reg_read(ctxt
, VCPU_REGS_RSI
);
2985 tss
->di
= reg_read(ctxt
, VCPU_REGS_RDI
);
2987 tss
->es
= get_segment_selector(ctxt
, VCPU_SREG_ES
);
2988 tss
->cs
= get_segment_selector(ctxt
, VCPU_SREG_CS
);
2989 tss
->ss
= get_segment_selector(ctxt
, VCPU_SREG_SS
);
2990 tss
->ds
= get_segment_selector(ctxt
, VCPU_SREG_DS
);
2991 tss
->ldt
= get_segment_selector(ctxt
, VCPU_SREG_LDTR
);
2994 static int load_state_from_tss16(struct x86_emulate_ctxt
*ctxt
,
2995 struct tss_segment_16
*tss
)
3000 ctxt
->_eip
= tss
->ip
;
3001 ctxt
->eflags
= tss
->flag
| 2;
3002 *reg_write(ctxt
, VCPU_REGS_RAX
) = tss
->ax
;
3003 *reg_write(ctxt
, VCPU_REGS_RCX
) = tss
->cx
;
3004 *reg_write(ctxt
, VCPU_REGS_RDX
) = tss
->dx
;
3005 *reg_write(ctxt
, VCPU_REGS_RBX
) = tss
->bx
;
3006 *reg_write(ctxt
, VCPU_REGS_RSP
) = tss
->sp
;
3007 *reg_write(ctxt
, VCPU_REGS_RBP
) = tss
->bp
;
3008 *reg_write(ctxt
, VCPU_REGS_RSI
) = tss
->si
;
3009 *reg_write(ctxt
, VCPU_REGS_RDI
) = tss
->di
;
3012 * SDM says that segment selectors are loaded before segment
3015 set_segment_selector(ctxt
, tss
->ldt
, VCPU_SREG_LDTR
);
3016 set_segment_selector(ctxt
, tss
->es
, VCPU_SREG_ES
);
3017 set_segment_selector(ctxt
, tss
->cs
, VCPU_SREG_CS
);
3018 set_segment_selector(ctxt
, tss
->ss
, VCPU_SREG_SS
);
3019 set_segment_selector(ctxt
, tss
->ds
, VCPU_SREG_DS
);
3024 * Now load segment descriptors. If fault happens at this stage
3025 * it is handled in a context of new task
3027 ret
= __load_segment_descriptor(ctxt
, tss
->ldt
, VCPU_SREG_LDTR
, cpl
,
3028 X86_TRANSFER_TASK_SWITCH
, NULL
);
3029 if (ret
!= X86EMUL_CONTINUE
)
3031 ret
= __load_segment_descriptor(ctxt
, tss
->es
, VCPU_SREG_ES
, cpl
,
3032 X86_TRANSFER_TASK_SWITCH
, NULL
);
3033 if (ret
!= X86EMUL_CONTINUE
)
3035 ret
= __load_segment_descriptor(ctxt
, tss
->cs
, VCPU_SREG_CS
, cpl
,
3036 X86_TRANSFER_TASK_SWITCH
, NULL
);
3037 if (ret
!= X86EMUL_CONTINUE
)
3039 ret
= __load_segment_descriptor(ctxt
, tss
->ss
, VCPU_SREG_SS
, cpl
,
3040 X86_TRANSFER_TASK_SWITCH
, NULL
);
3041 if (ret
!= X86EMUL_CONTINUE
)
3043 ret
= __load_segment_descriptor(ctxt
, tss
->ds
, VCPU_SREG_DS
, cpl
,
3044 X86_TRANSFER_TASK_SWITCH
, NULL
);
3045 if (ret
!= X86EMUL_CONTINUE
)
3048 return X86EMUL_CONTINUE
;
3051 static int task_switch_16(struct x86_emulate_ctxt
*ctxt
, u16 old_tss_sel
,
3052 ulong old_tss_base
, struct desc_struct
*new_desc
)
3054 struct tss_segment_16 tss_seg
;
3056 u32 new_tss_base
= get_desc_base(new_desc
);
3058 ret
= linear_read_system(ctxt
, old_tss_base
, &tss_seg
, sizeof(tss_seg
));
3059 if (ret
!= X86EMUL_CONTINUE
)
3062 save_state_to_tss16(ctxt
, &tss_seg
);
3064 ret
= linear_write_system(ctxt
, old_tss_base
, &tss_seg
, sizeof(tss_seg
));
3065 if (ret
!= X86EMUL_CONTINUE
)
3068 ret
= linear_read_system(ctxt
, new_tss_base
, &tss_seg
, sizeof(tss_seg
));
3069 if (ret
!= X86EMUL_CONTINUE
)
3072 if (old_tss_sel
!= 0xffff) {
3073 tss_seg
.prev_task_link
= old_tss_sel
;
3075 ret
= linear_write_system(ctxt
, new_tss_base
,
3076 &tss_seg
.prev_task_link
,
3077 sizeof(tss_seg
.prev_task_link
));
3078 if (ret
!= X86EMUL_CONTINUE
)
3082 return load_state_from_tss16(ctxt
, &tss_seg
);
3085 static void save_state_to_tss32(struct x86_emulate_ctxt
*ctxt
,
3086 struct tss_segment_32
*tss
)
3088 /* CR3 and ldt selector are not saved intentionally */
3089 tss
->eip
= ctxt
->_eip
;
3090 tss
->eflags
= ctxt
->eflags
;
3091 tss
->eax
= reg_read(ctxt
, VCPU_REGS_RAX
);
3092 tss
->ecx
= reg_read(ctxt
, VCPU_REGS_RCX
);
3093 tss
->edx
= reg_read(ctxt
, VCPU_REGS_RDX
);
3094 tss
->ebx
= reg_read(ctxt
, VCPU_REGS_RBX
);
3095 tss
->esp
= reg_read(ctxt
, VCPU_REGS_RSP
);
3096 tss
->ebp
= reg_read(ctxt
, VCPU_REGS_RBP
);
3097 tss
->esi
= reg_read(ctxt
, VCPU_REGS_RSI
);
3098 tss
->edi
= reg_read(ctxt
, VCPU_REGS_RDI
);
3100 tss
->es
= get_segment_selector(ctxt
, VCPU_SREG_ES
);
3101 tss
->cs
= get_segment_selector(ctxt
, VCPU_SREG_CS
);
3102 tss
->ss
= get_segment_selector(ctxt
, VCPU_SREG_SS
);
3103 tss
->ds
= get_segment_selector(ctxt
, VCPU_SREG_DS
);
3104 tss
->fs
= get_segment_selector(ctxt
, VCPU_SREG_FS
);
3105 tss
->gs
= get_segment_selector(ctxt
, VCPU_SREG_GS
);
3108 static int load_state_from_tss32(struct x86_emulate_ctxt
*ctxt
,
3109 struct tss_segment_32
*tss
)
3114 if (ctxt
->ops
->set_cr(ctxt
, 3, tss
->cr3
))
3115 return emulate_gp(ctxt
, 0);
3116 ctxt
->_eip
= tss
->eip
;
3117 ctxt
->eflags
= tss
->eflags
| 2;
3119 /* General purpose registers */
3120 *reg_write(ctxt
, VCPU_REGS_RAX
) = tss
->eax
;
3121 *reg_write(ctxt
, VCPU_REGS_RCX
) = tss
->ecx
;
3122 *reg_write(ctxt
, VCPU_REGS_RDX
) = tss
->edx
;
3123 *reg_write(ctxt
, VCPU_REGS_RBX
) = tss
->ebx
;
3124 *reg_write(ctxt
, VCPU_REGS_RSP
) = tss
->esp
;
3125 *reg_write(ctxt
, VCPU_REGS_RBP
) = tss
->ebp
;
3126 *reg_write(ctxt
, VCPU_REGS_RSI
) = tss
->esi
;
3127 *reg_write(ctxt
, VCPU_REGS_RDI
) = tss
->edi
;
3130 * SDM says that segment selectors are loaded before segment
3131 * descriptors. This is important because CPL checks will
3134 set_segment_selector(ctxt
, tss
->ldt_selector
, VCPU_SREG_LDTR
);
3135 set_segment_selector(ctxt
, tss
->es
, VCPU_SREG_ES
);
3136 set_segment_selector(ctxt
, tss
->cs
, VCPU_SREG_CS
);
3137 set_segment_selector(ctxt
, tss
->ss
, VCPU_SREG_SS
);
3138 set_segment_selector(ctxt
, tss
->ds
, VCPU_SREG_DS
);
3139 set_segment_selector(ctxt
, tss
->fs
, VCPU_SREG_FS
);
3140 set_segment_selector(ctxt
, tss
->gs
, VCPU_SREG_GS
);
3143 * If we're switching between Protected Mode and VM86, we need to make
3144 * sure to update the mode before loading the segment descriptors so
3145 * that the selectors are interpreted correctly.
3147 if (ctxt
->eflags
& X86_EFLAGS_VM
) {
3148 ctxt
->mode
= X86EMUL_MODE_VM86
;
3151 ctxt
->mode
= X86EMUL_MODE_PROT32
;
3156 * Now load segment descriptors. If fault happens at this stage
3157 * it is handled in a context of new task
3159 ret
= __load_segment_descriptor(ctxt
, tss
->ldt_selector
, VCPU_SREG_LDTR
,
3160 cpl
, X86_TRANSFER_TASK_SWITCH
, NULL
);
3161 if (ret
!= X86EMUL_CONTINUE
)
3163 ret
= __load_segment_descriptor(ctxt
, tss
->es
, VCPU_SREG_ES
, cpl
,
3164 X86_TRANSFER_TASK_SWITCH
, NULL
);
3165 if (ret
!= X86EMUL_CONTINUE
)
3167 ret
= __load_segment_descriptor(ctxt
, tss
->cs
, VCPU_SREG_CS
, cpl
,
3168 X86_TRANSFER_TASK_SWITCH
, NULL
);
3169 if (ret
!= X86EMUL_CONTINUE
)
3171 ret
= __load_segment_descriptor(ctxt
, tss
->ss
, VCPU_SREG_SS
, cpl
,
3172 X86_TRANSFER_TASK_SWITCH
, NULL
);
3173 if (ret
!= X86EMUL_CONTINUE
)
3175 ret
= __load_segment_descriptor(ctxt
, tss
->ds
, VCPU_SREG_DS
, cpl
,
3176 X86_TRANSFER_TASK_SWITCH
, NULL
);
3177 if (ret
!= X86EMUL_CONTINUE
)
3179 ret
= __load_segment_descriptor(ctxt
, tss
->fs
, VCPU_SREG_FS
, cpl
,
3180 X86_TRANSFER_TASK_SWITCH
, NULL
);
3181 if (ret
!= X86EMUL_CONTINUE
)
3183 ret
= __load_segment_descriptor(ctxt
, tss
->gs
, VCPU_SREG_GS
, cpl
,
3184 X86_TRANSFER_TASK_SWITCH
, NULL
);
3189 static int task_switch_32(struct x86_emulate_ctxt
*ctxt
, u16 old_tss_sel
,
3190 ulong old_tss_base
, struct desc_struct
*new_desc
)
3192 struct tss_segment_32 tss_seg
;
3194 u32 new_tss_base
= get_desc_base(new_desc
);
3195 u32 eip_offset
= offsetof(struct tss_segment_32
, eip
);
3196 u32 ldt_sel_offset
= offsetof(struct tss_segment_32
, ldt_selector
);
3198 ret
= linear_read_system(ctxt
, old_tss_base
, &tss_seg
, sizeof(tss_seg
));
3199 if (ret
!= X86EMUL_CONTINUE
)
3202 save_state_to_tss32(ctxt
, &tss_seg
);
3204 /* Only GP registers and segment selectors are saved */
3205 ret
= linear_write_system(ctxt
, old_tss_base
+ eip_offset
, &tss_seg
.eip
,
3206 ldt_sel_offset
- eip_offset
);
3207 if (ret
!= X86EMUL_CONTINUE
)
3210 ret
= linear_read_system(ctxt
, new_tss_base
, &tss_seg
, sizeof(tss_seg
));
3211 if (ret
!= X86EMUL_CONTINUE
)
3214 if (old_tss_sel
!= 0xffff) {
3215 tss_seg
.prev_task_link
= old_tss_sel
;
3217 ret
= linear_write_system(ctxt
, new_tss_base
,
3218 &tss_seg
.prev_task_link
,
3219 sizeof(tss_seg
.prev_task_link
));
3220 if (ret
!= X86EMUL_CONTINUE
)
3224 return load_state_from_tss32(ctxt
, &tss_seg
);
3227 static int emulator_do_task_switch(struct x86_emulate_ctxt
*ctxt
,
3228 u16 tss_selector
, int idt_index
, int reason
,
3229 bool has_error_code
, u32 error_code
)
3231 const struct x86_emulate_ops
*ops
= ctxt
->ops
;
3232 struct desc_struct curr_tss_desc
, next_tss_desc
;
3234 u16 old_tss_sel
= get_segment_selector(ctxt
, VCPU_SREG_TR
);
3235 ulong old_tss_base
=
3236 ops
->get_cached_segment_base(ctxt
, VCPU_SREG_TR
);
3238 ulong desc_addr
, dr7
;
3240 /* FIXME: old_tss_base == ~0 ? */
3242 ret
= read_segment_descriptor(ctxt
, tss_selector
, &next_tss_desc
, &desc_addr
);
3243 if (ret
!= X86EMUL_CONTINUE
)
3245 ret
= read_segment_descriptor(ctxt
, old_tss_sel
, &curr_tss_desc
, &desc_addr
);
3246 if (ret
!= X86EMUL_CONTINUE
)
3249 /* FIXME: check that next_tss_desc is tss */
3252 * Check privileges. The three cases are task switch caused by...
3254 * 1. jmp/call/int to task gate: Check against DPL of the task gate
3255 * 2. Exception/IRQ/iret: No check is performed
3256 * 3. jmp/call to TSS/task-gate: No check is performed since the
3257 * hardware checks it before exiting.
3259 if (reason
== TASK_SWITCH_GATE
) {
3260 if (idt_index
!= -1) {
3261 /* Software interrupts */
3262 struct desc_struct task_gate_desc
;
3265 ret
= read_interrupt_descriptor(ctxt
, idt_index
,
3267 if (ret
!= X86EMUL_CONTINUE
)
3270 dpl
= task_gate_desc
.dpl
;
3271 if ((tss_selector
& 3) > dpl
|| ops
->cpl(ctxt
) > dpl
)
3272 return emulate_gp(ctxt
, (idt_index
<< 3) | 0x2);
3276 desc_limit
= desc_limit_scaled(&next_tss_desc
);
3277 if (!next_tss_desc
.p
||
3278 ((desc_limit
< 0x67 && (next_tss_desc
.type
& 8)) ||
3279 desc_limit
< 0x2b)) {
3280 return emulate_ts(ctxt
, tss_selector
& 0xfffc);
3283 if (reason
== TASK_SWITCH_IRET
|| reason
== TASK_SWITCH_JMP
) {
3284 curr_tss_desc
.type
&= ~(1 << 1); /* clear busy flag */
3285 write_segment_descriptor(ctxt
, old_tss_sel
, &curr_tss_desc
);
3288 if (reason
== TASK_SWITCH_IRET
)
3289 ctxt
->eflags
= ctxt
->eflags
& ~X86_EFLAGS_NT
;
3291 /* set back link to prev task only if NT bit is set in eflags
3292 note that old_tss_sel is not used after this point */
3293 if (reason
!= TASK_SWITCH_CALL
&& reason
!= TASK_SWITCH_GATE
)
3294 old_tss_sel
= 0xffff;
3296 if (next_tss_desc
.type
& 8)
3297 ret
= task_switch_32(ctxt
, old_tss_sel
, old_tss_base
, &next_tss_desc
);
3299 ret
= task_switch_16(ctxt
, old_tss_sel
,
3300 old_tss_base
, &next_tss_desc
);
3301 if (ret
!= X86EMUL_CONTINUE
)
3304 if (reason
== TASK_SWITCH_CALL
|| reason
== TASK_SWITCH_GATE
)
3305 ctxt
->eflags
= ctxt
->eflags
| X86_EFLAGS_NT
;
3307 if (reason
!= TASK_SWITCH_IRET
) {
3308 next_tss_desc
.type
|= (1 << 1); /* set busy flag */
3309 write_segment_descriptor(ctxt
, tss_selector
, &next_tss_desc
);
3312 ops
->set_cr(ctxt
, 0, ops
->get_cr(ctxt
, 0) | X86_CR0_TS
);
3313 ops
->set_segment(ctxt
, tss_selector
, &next_tss_desc
, 0, VCPU_SREG_TR
);
3315 if (has_error_code
) {
3316 ctxt
->op_bytes
= ctxt
->ad_bytes
= (next_tss_desc
.type
& 8) ? 4 : 2;
3317 ctxt
->lock_prefix
= 0;
3318 ctxt
->src
.val
= (unsigned long) error_code
;
3319 ret
= em_push(ctxt
);
3322 ops
->get_dr(ctxt
, 7, &dr7
);
3323 ops
->set_dr(ctxt
, 7, dr7
& ~(DR_LOCAL_ENABLE_MASK
| DR_LOCAL_SLOWDOWN
));
3328 int emulator_task_switch(struct x86_emulate_ctxt
*ctxt
,
3329 u16 tss_selector
, int idt_index
, int reason
,
3330 bool has_error_code
, u32 error_code
)
3334 invalidate_registers(ctxt
);
3335 ctxt
->_eip
= ctxt
->eip
;
3336 ctxt
->dst
.type
= OP_NONE
;
3338 rc
= emulator_do_task_switch(ctxt
, tss_selector
, idt_index
, reason
,
3339 has_error_code
, error_code
);
3341 if (rc
== X86EMUL_CONTINUE
) {
3342 ctxt
->eip
= ctxt
->_eip
;
3343 writeback_registers(ctxt
);
3346 return (rc
== X86EMUL_UNHANDLEABLE
) ? EMULATION_FAILED
: EMULATION_OK
;
3349 static void string_addr_inc(struct x86_emulate_ctxt
*ctxt
, int reg
,
3352 int df
= (ctxt
->eflags
& X86_EFLAGS_DF
) ? -op
->count
: op
->count
;
3354 register_address_increment(ctxt
, reg
, df
* op
->bytes
);
3355 op
->addr
.mem
.ea
= register_address(ctxt
, reg
);
3358 static int em_das(struct x86_emulate_ctxt
*ctxt
)
3361 bool af
, cf
, old_cf
;
3363 cf
= ctxt
->eflags
& X86_EFLAGS_CF
;
3369 af
= ctxt
->eflags
& X86_EFLAGS_AF
;
3370 if ((al
& 0x0f) > 9 || af
) {
3372 cf
= old_cf
| (al
>= 250);
3377 if (old_al
> 0x99 || old_cf
) {
3383 /* Set PF, ZF, SF */
3384 ctxt
->src
.type
= OP_IMM
;
3386 ctxt
->src
.bytes
= 1;
3387 fastop(ctxt
, em_or
);
3388 ctxt
->eflags
&= ~(X86_EFLAGS_AF
| X86_EFLAGS_CF
);
3390 ctxt
->eflags
|= X86_EFLAGS_CF
;
3392 ctxt
->eflags
|= X86_EFLAGS_AF
;
3393 return X86EMUL_CONTINUE
;
3396 static int em_aam(struct x86_emulate_ctxt
*ctxt
)
3400 if (ctxt
->src
.val
== 0)
3401 return emulate_de(ctxt
);
3403 al
= ctxt
->dst
.val
& 0xff;
3404 ah
= al
/ ctxt
->src
.val
;
3405 al
%= ctxt
->src
.val
;
3407 ctxt
->dst
.val
= (ctxt
->dst
.val
& 0xffff0000) | al
| (ah
<< 8);
3409 /* Set PF, ZF, SF */
3410 ctxt
->src
.type
= OP_IMM
;
3412 ctxt
->src
.bytes
= 1;
3413 fastop(ctxt
, em_or
);
3415 return X86EMUL_CONTINUE
;
3418 static int em_aad(struct x86_emulate_ctxt
*ctxt
)
3420 u8 al
= ctxt
->dst
.val
& 0xff;
3421 u8 ah
= (ctxt
->dst
.val
>> 8) & 0xff;
3423 al
= (al
+ (ah
* ctxt
->src
.val
)) & 0xff;
3425 ctxt
->dst
.val
= (ctxt
->dst
.val
& 0xffff0000) | al
;
3427 /* Set PF, ZF, SF */
3428 ctxt
->src
.type
= OP_IMM
;
3430 ctxt
->src
.bytes
= 1;
3431 fastop(ctxt
, em_or
);
3433 return X86EMUL_CONTINUE
;
3436 static int em_call(struct x86_emulate_ctxt
*ctxt
)
3439 long rel
= ctxt
->src
.val
;
3441 ctxt
->src
.val
= (unsigned long)ctxt
->_eip
;
3442 rc
= jmp_rel(ctxt
, rel
);
3443 if (rc
!= X86EMUL_CONTINUE
)
3445 return em_push(ctxt
);
3448 static int em_call_far(struct x86_emulate_ctxt
*ctxt
)
3453 struct desc_struct old_desc
, new_desc
;
3454 const struct x86_emulate_ops
*ops
= ctxt
->ops
;
3455 int cpl
= ctxt
->ops
->cpl(ctxt
);
3456 enum x86emul_mode prev_mode
= ctxt
->mode
;
3458 old_eip
= ctxt
->_eip
;
3459 ops
->get_segment(ctxt
, &old_cs
, &old_desc
, NULL
, VCPU_SREG_CS
);
3461 memcpy(&sel
, ctxt
->src
.valptr
+ ctxt
->op_bytes
, 2);
3462 rc
= __load_segment_descriptor(ctxt
, sel
, VCPU_SREG_CS
, cpl
,
3463 X86_TRANSFER_CALL_JMP
, &new_desc
);
3464 if (rc
!= X86EMUL_CONTINUE
)
3467 rc
= assign_eip_far(ctxt
, ctxt
->src
.val
, &new_desc
);
3468 if (rc
!= X86EMUL_CONTINUE
)
3471 ctxt
->src
.val
= old_cs
;
3473 if (rc
!= X86EMUL_CONTINUE
)
3476 ctxt
->src
.val
= old_eip
;
3478 /* If we failed, we tainted the memory, but the very least we should
3480 if (rc
!= X86EMUL_CONTINUE
) {
3481 pr_warn_once("faulting far call emulation tainted memory\n");
3486 ops
->set_segment(ctxt
, old_cs
, &old_desc
, 0, VCPU_SREG_CS
);
3487 ctxt
->mode
= prev_mode
;
3492 static int em_ret_near_imm(struct x86_emulate_ctxt
*ctxt
)
3497 rc
= emulate_pop(ctxt
, &eip
, ctxt
->op_bytes
);
3498 if (rc
!= X86EMUL_CONTINUE
)
3500 rc
= assign_eip_near(ctxt
, eip
);
3501 if (rc
!= X86EMUL_CONTINUE
)
3503 rsp_increment(ctxt
, ctxt
->src
.val
);
3504 return X86EMUL_CONTINUE
;
3507 static int em_xchg(struct x86_emulate_ctxt
*ctxt
)
3509 /* Write back the register source. */
3510 ctxt
->src
.val
= ctxt
->dst
.val
;
3511 write_register_operand(&ctxt
->src
);
3513 /* Write back the memory destination with implicit LOCK prefix. */
3514 ctxt
->dst
.val
= ctxt
->src
.orig_val
;
3515 ctxt
->lock_prefix
= 1;
3516 return X86EMUL_CONTINUE
;
3519 static int em_imul_3op(struct x86_emulate_ctxt
*ctxt
)
3521 ctxt
->dst
.val
= ctxt
->src2
.val
;
3522 return fastop(ctxt
, em_imul
);
3525 static int em_cwd(struct x86_emulate_ctxt
*ctxt
)
3527 ctxt
->dst
.type
= OP_REG
;
3528 ctxt
->dst
.bytes
= ctxt
->src
.bytes
;
3529 ctxt
->dst
.addr
.reg
= reg_rmw(ctxt
, VCPU_REGS_RDX
);
3530 ctxt
->dst
.val
= ~((ctxt
->src
.val
>> (ctxt
->src
.bytes
* 8 - 1)) - 1);
3532 return X86EMUL_CONTINUE
;
3535 static int em_rdpid(struct x86_emulate_ctxt
*ctxt
)
3539 if (ctxt
->ops
->get_msr(ctxt
, MSR_TSC_AUX
, &tsc_aux
))
3540 return emulate_ud(ctxt
);
3541 ctxt
->dst
.val
= tsc_aux
;
3542 return X86EMUL_CONTINUE
;
3545 static int em_rdtsc(struct x86_emulate_ctxt
*ctxt
)
3549 ctxt
->ops
->get_msr(ctxt
, MSR_IA32_TSC
, &tsc
);
3550 *reg_write(ctxt
, VCPU_REGS_RAX
) = (u32
)tsc
;
3551 *reg_write(ctxt
, VCPU_REGS_RDX
) = tsc
>> 32;
3552 return X86EMUL_CONTINUE
;
3555 static int em_rdpmc(struct x86_emulate_ctxt
*ctxt
)
3559 if (ctxt
->ops
->read_pmc(ctxt
, reg_read(ctxt
, VCPU_REGS_RCX
), &pmc
))
3560 return emulate_gp(ctxt
, 0);
3561 *reg_write(ctxt
, VCPU_REGS_RAX
) = (u32
)pmc
;
3562 *reg_write(ctxt
, VCPU_REGS_RDX
) = pmc
>> 32;
3563 return X86EMUL_CONTINUE
;
3566 static int em_mov(struct x86_emulate_ctxt
*ctxt
)
3568 memcpy(ctxt
->dst
.valptr
, ctxt
->src
.valptr
, sizeof(ctxt
->src
.valptr
));
3569 return X86EMUL_CONTINUE
;
3572 static int em_movbe(struct x86_emulate_ctxt
*ctxt
)
3576 if (!ctxt
->ops
->guest_has_movbe(ctxt
))
3577 return emulate_ud(ctxt
);
3579 switch (ctxt
->op_bytes
) {
3582 * From MOVBE definition: "...When the operand size is 16 bits,
3583 * the upper word of the destination register remains unchanged
3586 * Both casting ->valptr and ->val to u16 breaks strict aliasing
3587 * rules so we have to do the operation almost per hand.
3589 tmp
= (u16
)ctxt
->src
.val
;
3590 ctxt
->dst
.val
&= ~0xffffUL
;
3591 ctxt
->dst
.val
|= (unsigned long)swab16(tmp
);
3594 ctxt
->dst
.val
= swab32((u32
)ctxt
->src
.val
);
3597 ctxt
->dst
.val
= swab64(ctxt
->src
.val
);
3602 return X86EMUL_CONTINUE
;
3605 static int em_cr_write(struct x86_emulate_ctxt
*ctxt
)
3607 if (ctxt
->ops
->set_cr(ctxt
, ctxt
->modrm_reg
, ctxt
->src
.val
))
3608 return emulate_gp(ctxt
, 0);
3610 /* Disable writeback. */
3611 ctxt
->dst
.type
= OP_NONE
;
3612 return X86EMUL_CONTINUE
;
3615 static int em_dr_write(struct x86_emulate_ctxt
*ctxt
)
3619 if (ctxt
->mode
== X86EMUL_MODE_PROT64
)
3620 val
= ctxt
->src
.val
& ~0ULL;
3622 val
= ctxt
->src
.val
& ~0U;
3624 /* #UD condition is already handled. */
3625 if (ctxt
->ops
->set_dr(ctxt
, ctxt
->modrm_reg
, val
) < 0)
3626 return emulate_gp(ctxt
, 0);
3628 /* Disable writeback. */
3629 ctxt
->dst
.type
= OP_NONE
;
3630 return X86EMUL_CONTINUE
;
3633 static int em_wrmsr(struct x86_emulate_ctxt
*ctxt
)
3635 u64 msr_index
= reg_read(ctxt
, VCPU_REGS_RCX
);
3639 msr_data
= (u32
)reg_read(ctxt
, VCPU_REGS_RAX
)
3640 | ((u64
)reg_read(ctxt
, VCPU_REGS_RDX
) << 32);
3641 r
= ctxt
->ops
->set_msr(ctxt
, msr_index
, msr_data
);
3643 if (r
== X86EMUL_IO_NEEDED
)
3647 return emulate_gp(ctxt
, 0);
3649 return r
< 0 ? X86EMUL_UNHANDLEABLE
: X86EMUL_CONTINUE
;
3652 static int em_rdmsr(struct x86_emulate_ctxt
*ctxt
)
3654 u64 msr_index
= reg_read(ctxt
, VCPU_REGS_RCX
);
3658 r
= ctxt
->ops
->get_msr(ctxt
, msr_index
, &msr_data
);
3660 if (r
== X86EMUL_IO_NEEDED
)
3664 return emulate_gp(ctxt
, 0);
3666 *reg_write(ctxt
, VCPU_REGS_RAX
) = (u32
)msr_data
;
3667 *reg_write(ctxt
, VCPU_REGS_RDX
) = msr_data
>> 32;
3668 return X86EMUL_CONTINUE
;
3671 static int em_store_sreg(struct x86_emulate_ctxt
*ctxt
, int segment
)
3673 if (segment
> VCPU_SREG_GS
&&
3674 (ctxt
->ops
->get_cr(ctxt
, 4) & X86_CR4_UMIP
) &&
3675 ctxt
->ops
->cpl(ctxt
) > 0)
3676 return emulate_gp(ctxt
, 0);
3678 ctxt
->dst
.val
= get_segment_selector(ctxt
, segment
);
3679 if (ctxt
->dst
.bytes
== 4 && ctxt
->dst
.type
== OP_MEM
)
3680 ctxt
->dst
.bytes
= 2;
3681 return X86EMUL_CONTINUE
;
3684 static int em_mov_rm_sreg(struct x86_emulate_ctxt
*ctxt
)
3686 if (ctxt
->modrm_reg
> VCPU_SREG_GS
)
3687 return emulate_ud(ctxt
);
3689 return em_store_sreg(ctxt
, ctxt
->modrm_reg
);
3692 static int em_mov_sreg_rm(struct x86_emulate_ctxt
*ctxt
)
3694 u16 sel
= ctxt
->src
.val
;
3696 if (ctxt
->modrm_reg
== VCPU_SREG_CS
|| ctxt
->modrm_reg
> VCPU_SREG_GS
)
3697 return emulate_ud(ctxt
);
3699 if (ctxt
->modrm_reg
== VCPU_SREG_SS
)
3700 ctxt
->interruptibility
= KVM_X86_SHADOW_INT_MOV_SS
;
3702 /* Disable writeback. */
3703 ctxt
->dst
.type
= OP_NONE
;
3704 return load_segment_descriptor(ctxt
, sel
, ctxt
->modrm_reg
);
3707 static int em_sldt(struct x86_emulate_ctxt
*ctxt
)
3709 return em_store_sreg(ctxt
, VCPU_SREG_LDTR
);
3712 static int em_lldt(struct x86_emulate_ctxt
*ctxt
)
3714 u16 sel
= ctxt
->src
.val
;
3716 /* Disable writeback. */
3717 ctxt
->dst
.type
= OP_NONE
;
3718 return load_segment_descriptor(ctxt
, sel
, VCPU_SREG_LDTR
);
3721 static int em_str(struct x86_emulate_ctxt
*ctxt
)
3723 return em_store_sreg(ctxt
, VCPU_SREG_TR
);
3726 static int em_ltr(struct x86_emulate_ctxt
*ctxt
)
3728 u16 sel
= ctxt
->src
.val
;
3730 /* Disable writeback. */
3731 ctxt
->dst
.type
= OP_NONE
;
3732 return load_segment_descriptor(ctxt
, sel
, VCPU_SREG_TR
);
3735 static int em_invlpg(struct x86_emulate_ctxt
*ctxt
)
3740 rc
= linearize(ctxt
, ctxt
->src
.addr
.mem
, 1, false, &linear
);
3741 if (rc
== X86EMUL_CONTINUE
)
3742 ctxt
->ops
->invlpg(ctxt
, linear
);
3743 /* Disable writeback. */
3744 ctxt
->dst
.type
= OP_NONE
;
3745 return X86EMUL_CONTINUE
;
3748 static int em_clts(struct x86_emulate_ctxt
*ctxt
)
3752 cr0
= ctxt
->ops
->get_cr(ctxt
, 0);
3754 ctxt
->ops
->set_cr(ctxt
, 0, cr0
);
3755 return X86EMUL_CONTINUE
;
3758 static int em_hypercall(struct x86_emulate_ctxt
*ctxt
)
3760 int rc
= ctxt
->ops
->fix_hypercall(ctxt
);
3762 if (rc
!= X86EMUL_CONTINUE
)
3765 /* Let the processor re-execute the fixed hypercall */
3766 ctxt
->_eip
= ctxt
->eip
;
3767 /* Disable writeback. */
3768 ctxt
->dst
.type
= OP_NONE
;
3769 return X86EMUL_CONTINUE
;
3772 static int emulate_store_desc_ptr(struct x86_emulate_ctxt
*ctxt
,
3773 void (*get
)(struct x86_emulate_ctxt
*ctxt
,
3774 struct desc_ptr
*ptr
))
3776 struct desc_ptr desc_ptr
;
3778 if ((ctxt
->ops
->get_cr(ctxt
, 4) & X86_CR4_UMIP
) &&
3779 ctxt
->ops
->cpl(ctxt
) > 0)
3780 return emulate_gp(ctxt
, 0);
3782 if (ctxt
->mode
== X86EMUL_MODE_PROT64
)
3784 get(ctxt
, &desc_ptr
);
3785 if (ctxt
->op_bytes
== 2) {
3787 desc_ptr
.address
&= 0x00ffffff;
3789 /* Disable writeback. */
3790 ctxt
->dst
.type
= OP_NONE
;
3791 return segmented_write_std(ctxt
, ctxt
->dst
.addr
.mem
,
3792 &desc_ptr
, 2 + ctxt
->op_bytes
);
3795 static int em_sgdt(struct x86_emulate_ctxt
*ctxt
)
3797 return emulate_store_desc_ptr(ctxt
, ctxt
->ops
->get_gdt
);
3800 static int em_sidt(struct x86_emulate_ctxt
*ctxt
)
3802 return emulate_store_desc_ptr(ctxt
, ctxt
->ops
->get_idt
);
3805 static int em_lgdt_lidt(struct x86_emulate_ctxt
*ctxt
, bool lgdt
)
3807 struct desc_ptr desc_ptr
;
3810 if (ctxt
->mode
== X86EMUL_MODE_PROT64
)
3812 rc
= read_descriptor(ctxt
, ctxt
->src
.addr
.mem
,
3813 &desc_ptr
.size
, &desc_ptr
.address
,
3815 if (rc
!= X86EMUL_CONTINUE
)
3817 if (ctxt
->mode
== X86EMUL_MODE_PROT64
&&
3818 emul_is_noncanonical_address(desc_ptr
.address
, ctxt
))
3819 return emulate_gp(ctxt
, 0);
3821 ctxt
->ops
->set_gdt(ctxt
, &desc_ptr
);
3823 ctxt
->ops
->set_idt(ctxt
, &desc_ptr
);
3824 /* Disable writeback. */
3825 ctxt
->dst
.type
= OP_NONE
;
3826 return X86EMUL_CONTINUE
;
3829 static int em_lgdt(struct x86_emulate_ctxt
*ctxt
)
3831 return em_lgdt_lidt(ctxt
, true);
3834 static int em_lidt(struct x86_emulate_ctxt
*ctxt
)
3836 return em_lgdt_lidt(ctxt
, false);
3839 static int em_smsw(struct x86_emulate_ctxt
*ctxt
)
3841 if ((ctxt
->ops
->get_cr(ctxt
, 4) & X86_CR4_UMIP
) &&
3842 ctxt
->ops
->cpl(ctxt
) > 0)
3843 return emulate_gp(ctxt
, 0);
3845 if (ctxt
->dst
.type
== OP_MEM
)
3846 ctxt
->dst
.bytes
= 2;
3847 ctxt
->dst
.val
= ctxt
->ops
->get_cr(ctxt
, 0);
3848 return X86EMUL_CONTINUE
;
3851 static int em_lmsw(struct x86_emulate_ctxt
*ctxt
)
3853 ctxt
->ops
->set_cr(ctxt
, 0, (ctxt
->ops
->get_cr(ctxt
, 0) & ~0x0eul
)
3854 | (ctxt
->src
.val
& 0x0f));
3855 ctxt
->dst
.type
= OP_NONE
;
3856 return X86EMUL_CONTINUE
;
3859 static int em_loop(struct x86_emulate_ctxt
*ctxt
)
3861 int rc
= X86EMUL_CONTINUE
;
3863 register_address_increment(ctxt
, VCPU_REGS_RCX
, -1);
3864 if ((address_mask(ctxt
, reg_read(ctxt
, VCPU_REGS_RCX
)) != 0) &&
3865 (ctxt
->b
== 0xe2 || test_cc(ctxt
->b
^ 0x5, ctxt
->eflags
)))
3866 rc
= jmp_rel(ctxt
, ctxt
->src
.val
);
3871 static int em_jcxz(struct x86_emulate_ctxt
*ctxt
)
3873 int rc
= X86EMUL_CONTINUE
;
3875 if (address_mask(ctxt
, reg_read(ctxt
, VCPU_REGS_RCX
)) == 0)
3876 rc
= jmp_rel(ctxt
, ctxt
->src
.val
);
3881 static int em_in(struct x86_emulate_ctxt
*ctxt
)
3883 if (!pio_in_emulated(ctxt
, ctxt
->dst
.bytes
, ctxt
->src
.val
,
3885 return X86EMUL_IO_NEEDED
;
3887 return X86EMUL_CONTINUE
;
3890 static int em_out(struct x86_emulate_ctxt
*ctxt
)
3892 ctxt
->ops
->pio_out_emulated(ctxt
, ctxt
->src
.bytes
, ctxt
->dst
.val
,
3894 /* Disable writeback. */
3895 ctxt
->dst
.type
= OP_NONE
;
3896 return X86EMUL_CONTINUE
;
3899 static int em_cli(struct x86_emulate_ctxt
*ctxt
)
3901 if (emulator_bad_iopl(ctxt
))
3902 return emulate_gp(ctxt
, 0);
3904 ctxt
->eflags
&= ~X86_EFLAGS_IF
;
3905 return X86EMUL_CONTINUE
;
3908 static int em_sti(struct x86_emulate_ctxt
*ctxt
)
3910 if (emulator_bad_iopl(ctxt
))
3911 return emulate_gp(ctxt
, 0);
3913 ctxt
->interruptibility
= KVM_X86_SHADOW_INT_STI
;
3914 ctxt
->eflags
|= X86_EFLAGS_IF
;
3915 return X86EMUL_CONTINUE
;
3918 static int em_cpuid(struct x86_emulate_ctxt
*ctxt
)
3920 u32 eax
, ebx
, ecx
, edx
;
3923 ctxt
->ops
->get_msr(ctxt
, MSR_MISC_FEATURES_ENABLES
, &msr
);
3924 if (msr
& MSR_MISC_FEATURES_ENABLES_CPUID_FAULT
&&
3925 ctxt
->ops
->cpl(ctxt
)) {
3926 return emulate_gp(ctxt
, 0);
3929 eax
= reg_read(ctxt
, VCPU_REGS_RAX
);
3930 ecx
= reg_read(ctxt
, VCPU_REGS_RCX
);
3931 ctxt
->ops
->get_cpuid(ctxt
, &eax
, &ebx
, &ecx
, &edx
, false);
3932 *reg_write(ctxt
, VCPU_REGS_RAX
) = eax
;
3933 *reg_write(ctxt
, VCPU_REGS_RBX
) = ebx
;
3934 *reg_write(ctxt
, VCPU_REGS_RCX
) = ecx
;
3935 *reg_write(ctxt
, VCPU_REGS_RDX
) = edx
;
3936 return X86EMUL_CONTINUE
;
3939 static int em_sahf(struct x86_emulate_ctxt
*ctxt
)
3943 flags
= X86_EFLAGS_CF
| X86_EFLAGS_PF
| X86_EFLAGS_AF
| X86_EFLAGS_ZF
|
3945 flags
&= *reg_rmw(ctxt
, VCPU_REGS_RAX
) >> 8;
3947 ctxt
->eflags
&= ~0xffUL
;
3948 ctxt
->eflags
|= flags
| X86_EFLAGS_FIXED
;
3949 return X86EMUL_CONTINUE
;
3952 static int em_lahf(struct x86_emulate_ctxt
*ctxt
)
3954 *reg_rmw(ctxt
, VCPU_REGS_RAX
) &= ~0xff00UL
;
3955 *reg_rmw(ctxt
, VCPU_REGS_RAX
) |= (ctxt
->eflags
& 0xff) << 8;
3956 return X86EMUL_CONTINUE
;
3959 static int em_bswap(struct x86_emulate_ctxt
*ctxt
)
3961 switch (ctxt
->op_bytes
) {
3962 #ifdef CONFIG_X86_64
3964 asm("bswap %0" : "+r"(ctxt
->dst
.val
));
3968 asm("bswap %0" : "+r"(*(u32
*)&ctxt
->dst
.val
));
3971 return X86EMUL_CONTINUE
;
3974 static int em_clflush(struct x86_emulate_ctxt
*ctxt
)
3976 /* emulating clflush regardless of cpuid */
3977 return X86EMUL_CONTINUE
;
3980 static int em_clflushopt(struct x86_emulate_ctxt
*ctxt
)
3982 /* emulating clflushopt regardless of cpuid */
3983 return X86EMUL_CONTINUE
;
3986 static int em_movsxd(struct x86_emulate_ctxt
*ctxt
)
3988 ctxt
->dst
.val
= (s32
) ctxt
->src
.val
;
3989 return X86EMUL_CONTINUE
;
3992 static int check_fxsr(struct x86_emulate_ctxt
*ctxt
)
3994 if (!ctxt
->ops
->guest_has_fxsr(ctxt
))
3995 return emulate_ud(ctxt
);
3997 if (ctxt
->ops
->get_cr(ctxt
, 0) & (X86_CR0_TS
| X86_CR0_EM
))
3998 return emulate_nm(ctxt
);
4001 * Don't emulate a case that should never be hit, instead of working
4002 * around a lack of fxsave64/fxrstor64 on old compilers.
4004 if (ctxt
->mode
>= X86EMUL_MODE_PROT64
)
4005 return X86EMUL_UNHANDLEABLE
;
4007 return X86EMUL_CONTINUE
;
4011 * Hardware doesn't save and restore XMM 0-7 without CR4.OSFXSR, but does save
4012 * and restore MXCSR.
4014 static size_t __fxstate_size(int nregs
)
4016 return offsetof(struct fxregs_state
, xmm_space
[0]) + nregs
* 16;
4019 static inline size_t fxstate_size(struct x86_emulate_ctxt
*ctxt
)
4022 if (ctxt
->mode
== X86EMUL_MODE_PROT64
)
4023 return __fxstate_size(16);
4025 cr4_osfxsr
= ctxt
->ops
->get_cr(ctxt
, 4) & X86_CR4_OSFXSR
;
4026 return __fxstate_size(cr4_osfxsr
? 8 : 0);
4030 * FXSAVE and FXRSTOR have 4 different formats depending on execution mode,
4033 * - like (1), but FIP and FDP (foo) are only 16 bit. At least Intel CPUs
4034 * preserve whole 32 bit values, though, so (1) and (2) are the same wrt.
4036 * 3) 64-bit mode with REX.W prefix
4037 * - like (2), but XMM 8-15 are being saved and restored
4038 * 4) 64-bit mode without REX.W prefix
4039 * - like (3), but FIP and FDP are 64 bit
4041 * Emulation uses (3) for (1) and (2) and preserves XMM 8-15 to reach the
4042 * desired result. (4) is not emulated.
4044 * Note: Guest and host CPUID.(EAX=07H,ECX=0H):EBX[bit 13] (deprecate FPU CS
4045 * and FPU DS) should match.
4047 static int em_fxsave(struct x86_emulate_ctxt
*ctxt
)
4049 struct fxregs_state fx_state
;
4052 rc
= check_fxsr(ctxt
);
4053 if (rc
!= X86EMUL_CONTINUE
)
4058 rc
= asm_safe("fxsave %[fx]", , [fx
] "+m"(fx_state
));
4062 if (rc
!= X86EMUL_CONTINUE
)
4065 return segmented_write_std(ctxt
, ctxt
->memop
.addr
.mem
, &fx_state
,
4066 fxstate_size(ctxt
));
4070 * FXRSTOR might restore XMM registers not provided by the guest. Fill
4071 * in the host registers (via FXSAVE) instead, so they won't be modified.
4072 * (preemption has to stay disabled until FXRSTOR).
4074 * Use noinline to keep the stack for other functions called by callers small.
4076 static noinline
int fxregs_fixup(struct fxregs_state
*fx_state
,
4077 const size_t used_size
)
4079 struct fxregs_state fx_tmp
;
4082 rc
= asm_safe("fxsave %[fx]", , [fx
] "+m"(fx_tmp
));
4083 memcpy((void *)fx_state
+ used_size
, (void *)&fx_tmp
+ used_size
,
4084 __fxstate_size(16) - used_size
);
4089 static int em_fxrstor(struct x86_emulate_ctxt
*ctxt
)
4091 struct fxregs_state fx_state
;
4095 rc
= check_fxsr(ctxt
);
4096 if (rc
!= X86EMUL_CONTINUE
)
4099 size
= fxstate_size(ctxt
);
4100 rc
= segmented_read_std(ctxt
, ctxt
->memop
.addr
.mem
, &fx_state
, size
);
4101 if (rc
!= X86EMUL_CONTINUE
)
4106 if (size
< __fxstate_size(16)) {
4107 rc
= fxregs_fixup(&fx_state
, size
);
4108 if (rc
!= X86EMUL_CONTINUE
)
4112 if (fx_state
.mxcsr
>> 16) {
4113 rc
= emulate_gp(ctxt
, 0);
4117 if (rc
== X86EMUL_CONTINUE
)
4118 rc
= asm_safe("fxrstor %[fx]", : [fx
] "m"(fx_state
));
4126 static int em_xsetbv(struct x86_emulate_ctxt
*ctxt
)
4130 eax
= reg_read(ctxt
, VCPU_REGS_RAX
);
4131 edx
= reg_read(ctxt
, VCPU_REGS_RDX
);
4132 ecx
= reg_read(ctxt
, VCPU_REGS_RCX
);
4134 if (ctxt
->ops
->set_xcr(ctxt
, ecx
, ((u64
)edx
<< 32) | eax
))
4135 return emulate_gp(ctxt
, 0);
4137 return X86EMUL_CONTINUE
;
4140 static bool valid_cr(int nr
)
4152 static int check_cr_access(struct x86_emulate_ctxt
*ctxt
)
4154 if (!valid_cr(ctxt
->modrm_reg
))
4155 return emulate_ud(ctxt
);
4157 return X86EMUL_CONTINUE
;
4160 static int check_dr7_gd(struct x86_emulate_ctxt
*ctxt
)
4164 ctxt
->ops
->get_dr(ctxt
, 7, &dr7
);
4166 /* Check if DR7.Global_Enable is set */
4167 return dr7
& (1 << 13);
4170 static int check_dr_read(struct x86_emulate_ctxt
*ctxt
)
4172 int dr
= ctxt
->modrm_reg
;
4176 return emulate_ud(ctxt
);
4178 cr4
= ctxt
->ops
->get_cr(ctxt
, 4);
4179 if ((cr4
& X86_CR4_DE
) && (dr
== 4 || dr
== 5))
4180 return emulate_ud(ctxt
);
4182 if (check_dr7_gd(ctxt
)) {
4185 ctxt
->ops
->get_dr(ctxt
, 6, &dr6
);
4186 dr6
&= ~DR_TRAP_BITS
;
4187 dr6
|= DR6_BD
| DR6_ACTIVE_LOW
;
4188 ctxt
->ops
->set_dr(ctxt
, 6, dr6
);
4189 return emulate_db(ctxt
);
4192 return X86EMUL_CONTINUE
;
4195 static int check_dr_write(struct x86_emulate_ctxt
*ctxt
)
4197 u64 new_val
= ctxt
->src
.val64
;
4198 int dr
= ctxt
->modrm_reg
;
4200 if ((dr
== 6 || dr
== 7) && (new_val
& 0xffffffff00000000ULL
))
4201 return emulate_gp(ctxt
, 0);
4203 return check_dr_read(ctxt
);
4206 static int check_svme(struct x86_emulate_ctxt
*ctxt
)
4210 ctxt
->ops
->get_msr(ctxt
, MSR_EFER
, &efer
);
4212 if (!(efer
& EFER_SVME
))
4213 return emulate_ud(ctxt
);
4215 return X86EMUL_CONTINUE
;
4218 static int check_svme_pa(struct x86_emulate_ctxt
*ctxt
)
4220 u64 rax
= reg_read(ctxt
, VCPU_REGS_RAX
);
4222 /* Valid physical address? */
4223 if (rax
& 0xffff000000000000ULL
)
4224 return emulate_gp(ctxt
, 0);
4226 return check_svme(ctxt
);
4229 static int check_rdtsc(struct x86_emulate_ctxt
*ctxt
)
4231 u64 cr4
= ctxt
->ops
->get_cr(ctxt
, 4);
4233 if (cr4
& X86_CR4_TSD
&& ctxt
->ops
->cpl(ctxt
))
4234 return emulate_gp(ctxt
, 0);
4236 return X86EMUL_CONTINUE
;
4239 static int check_rdpmc(struct x86_emulate_ctxt
*ctxt
)
4241 u64 cr4
= ctxt
->ops
->get_cr(ctxt
, 4);
4242 u64 rcx
= reg_read(ctxt
, VCPU_REGS_RCX
);
4245 * VMware allows access to these Pseduo-PMCs even when read via RDPMC
4246 * in Ring3 when CR4.PCE=0.
4248 if (enable_vmware_backdoor
&& is_vmware_backdoor_pmc(rcx
))
4249 return X86EMUL_CONTINUE
;
4252 * If CR4.PCE is set, the SDM requires CPL=0 or CR0.PE=0. The CR0.PE
4253 * check however is unnecessary because CPL is always 0 outside
4256 if ((!(cr4
& X86_CR4_PCE
) && ctxt
->ops
->cpl(ctxt
)) ||
4257 ctxt
->ops
->check_pmc(ctxt
, rcx
))
4258 return emulate_gp(ctxt
, 0);
4260 return X86EMUL_CONTINUE
;
4263 static int check_perm_in(struct x86_emulate_ctxt
*ctxt
)
4265 ctxt
->dst
.bytes
= min(ctxt
->dst
.bytes
, 4u);
4266 if (!emulator_io_permited(ctxt
, ctxt
->src
.val
, ctxt
->dst
.bytes
))
4267 return emulate_gp(ctxt
, 0);
4269 return X86EMUL_CONTINUE
;
4272 static int check_perm_out(struct x86_emulate_ctxt
*ctxt
)
4274 ctxt
->src
.bytes
= min(ctxt
->src
.bytes
, 4u);
4275 if (!emulator_io_permited(ctxt
, ctxt
->dst
.val
, ctxt
->src
.bytes
))
4276 return emulate_gp(ctxt
, 0);
4278 return X86EMUL_CONTINUE
;
4281 #define D(_y) { .flags = (_y) }
4282 #define DI(_y, _i) { .flags = (_y)|Intercept, .intercept = x86_intercept_##_i }
4283 #define DIP(_y, _i, _p) { .flags = (_y)|Intercept|CheckPerm, \
4284 .intercept = x86_intercept_##_i, .check_perm = (_p) }
4285 #define N D(NotImpl)
4286 #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
4287 #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
4288 #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
4289 #define ID(_f, _i) { .flags = ((_f) | InstrDual | ModRM), .u.idual = (_i) }
4290 #define MD(_f, _m) { .flags = ((_f) | ModeDual), .u.mdual = (_m) }
4291 #define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) }
4292 #define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
4293 #define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
4294 #define II(_f, _e, _i) \
4295 { .flags = (_f)|Intercept, .u.execute = (_e), .intercept = x86_intercept_##_i }
4296 #define IIP(_f, _e, _i, _p) \
4297 { .flags = (_f)|Intercept|CheckPerm, .u.execute = (_e), \
4298 .intercept = x86_intercept_##_i, .check_perm = (_p) }
4299 #define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) }
4301 #define D2bv(_f) D((_f) | ByteOp), D(_f)
4302 #define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p)
4303 #define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e)
4304 #define F2bv(_f, _e) F((_f) | ByteOp, _e), F(_f, _e)
4305 #define I2bvIP(_f, _e, _i, _p) \
4306 IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p)
4308 #define F6ALU(_f, _e) F2bv((_f) | DstMem | SrcReg | ModRM, _e), \
4309 F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \
4310 F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
4312 static const struct opcode group7_rm0
[] = {
4314 I(SrcNone
| Priv
| EmulateOnUD
, em_hypercall
),
4318 static const struct opcode group7_rm1
[] = {
4319 DI(SrcNone
| Priv
, monitor
),
4320 DI(SrcNone
| Priv
, mwait
),
4324 static const struct opcode group7_rm2
[] = {
4326 II(ImplicitOps
| Priv
, em_xsetbv
, xsetbv
),
4330 static const struct opcode group7_rm3
[] = {
4331 DIP(SrcNone
| Prot
| Priv
, vmrun
, check_svme_pa
),
4332 II(SrcNone
| Prot
| EmulateOnUD
, em_hypercall
, vmmcall
),
4333 DIP(SrcNone
| Prot
| Priv
, vmload
, check_svme_pa
),
4334 DIP(SrcNone
| Prot
| Priv
, vmsave
, check_svme_pa
),
4335 DIP(SrcNone
| Prot
| Priv
, stgi
, check_svme
),
4336 DIP(SrcNone
| Prot
| Priv
, clgi
, check_svme
),
4337 DIP(SrcNone
| Prot
| Priv
, skinit
, check_svme
),
4338 DIP(SrcNone
| Prot
| Priv
, invlpga
, check_svme
),
4341 static const struct opcode group7_rm7
[] = {
4343 DIP(SrcNone
, rdtscp
, check_rdtsc
),
4347 static const struct opcode group1
[] = {
4349 F(Lock
| PageTable
, em_or
),
4352 F(Lock
| PageTable
, em_and
),
4358 static const struct opcode group1A
[] = {
4359 I(DstMem
| SrcNone
| Mov
| Stack
| IncSP
| TwoMemOp
, em_pop
), N
, N
, N
, N
, N
, N
, N
,
4362 static const struct opcode group2
[] = {
4363 F(DstMem
| ModRM
, em_rol
),
4364 F(DstMem
| ModRM
, em_ror
),
4365 F(DstMem
| ModRM
, em_rcl
),
4366 F(DstMem
| ModRM
, em_rcr
),
4367 F(DstMem
| ModRM
, em_shl
),
4368 F(DstMem
| ModRM
, em_shr
),
4369 F(DstMem
| ModRM
, em_shl
),
4370 F(DstMem
| ModRM
, em_sar
),
4373 static const struct opcode group3
[] = {
4374 F(DstMem
| SrcImm
| NoWrite
, em_test
),
4375 F(DstMem
| SrcImm
| NoWrite
, em_test
),
4376 F(DstMem
| SrcNone
| Lock
, em_not
),
4377 F(DstMem
| SrcNone
| Lock
, em_neg
),
4378 F(DstXacc
| Src2Mem
, em_mul_ex
),
4379 F(DstXacc
| Src2Mem
, em_imul_ex
),
4380 F(DstXacc
| Src2Mem
, em_div_ex
),
4381 F(DstXacc
| Src2Mem
, em_idiv_ex
),
4384 static const struct opcode group4
[] = {
4385 F(ByteOp
| DstMem
| SrcNone
| Lock
, em_inc
),
4386 F(ByteOp
| DstMem
| SrcNone
| Lock
, em_dec
),
4390 static const struct opcode group5
[] = {
4391 F(DstMem
| SrcNone
| Lock
, em_inc
),
4392 F(DstMem
| SrcNone
| Lock
, em_dec
),
4393 I(SrcMem
| NearBranch
| IsBranch
, em_call_near_abs
),
4394 I(SrcMemFAddr
| ImplicitOps
| IsBranch
, em_call_far
),
4395 I(SrcMem
| NearBranch
| IsBranch
, em_jmp_abs
),
4396 I(SrcMemFAddr
| ImplicitOps
| IsBranch
, em_jmp_far
),
4397 I(SrcMem
| Stack
| TwoMemOp
, em_push
), D(Undefined
),
4400 static const struct opcode group6
[] = {
4401 II(Prot
| DstMem
, em_sldt
, sldt
),
4402 II(Prot
| DstMem
, em_str
, str
),
4403 II(Prot
| Priv
| SrcMem16
, em_lldt
, lldt
),
4404 II(Prot
| Priv
| SrcMem16
, em_ltr
, ltr
),
4408 static const struct group_dual group7
= { {
4409 II(Mov
| DstMem
, em_sgdt
, sgdt
),
4410 II(Mov
| DstMem
, em_sidt
, sidt
),
4411 II(SrcMem
| Priv
, em_lgdt
, lgdt
),
4412 II(SrcMem
| Priv
, em_lidt
, lidt
),
4413 II(SrcNone
| DstMem
| Mov
, em_smsw
, smsw
), N
,
4414 II(SrcMem16
| Mov
| Priv
, em_lmsw
, lmsw
),
4415 II(SrcMem
| ByteOp
| Priv
| NoAccess
, em_invlpg
, invlpg
),
4421 II(SrcNone
| DstMem
| Mov
, em_smsw
, smsw
), N
,
4422 II(SrcMem16
| Mov
| Priv
, em_lmsw
, lmsw
),
4426 static const struct opcode group8
[] = {
4428 F(DstMem
| SrcImmByte
| NoWrite
, em_bt
),
4429 F(DstMem
| SrcImmByte
| Lock
| PageTable
, em_bts
),
4430 F(DstMem
| SrcImmByte
| Lock
, em_btr
),
4431 F(DstMem
| SrcImmByte
| Lock
| PageTable
, em_btc
),
4435 * The "memory" destination is actually always a register, since we come
4436 * from the register case of group9.
4438 static const struct gprefix pfx_0f_c7_7
= {
4439 N
, N
, N
, II(DstMem
| ModRM
| Op3264
| EmulateOnUD
, em_rdpid
, rdpid
),
4443 static const struct group_dual group9
= { {
4444 N
, I(DstMem64
| Lock
| PageTable
, em_cmpxchg8b
), N
, N
, N
, N
, N
, N
,
4446 N
, N
, N
, N
, N
, N
, N
,
4447 GP(0, &pfx_0f_c7_7
),
4450 static const struct opcode group11
[] = {
4451 I(DstMem
| SrcImm
| Mov
| PageTable
, em_mov
),
4455 static const struct gprefix pfx_0f_ae_7
= {
4456 I(SrcMem
| ByteOp
, em_clflush
), I(SrcMem
| ByteOp
, em_clflushopt
), N
, N
,
4459 static const struct group_dual group15
= { {
4460 I(ModRM
| Aligned16
, em_fxsave
),
4461 I(ModRM
| Aligned16
, em_fxrstor
),
4462 N
, N
, N
, N
, N
, GP(0, &pfx_0f_ae_7
),
4464 N
, N
, N
, N
, N
, N
, N
, N
,
4467 static const struct gprefix pfx_0f_6f_0f_7f
= {
4468 I(Mmx
, em_mov
), I(Sse
| Aligned
, em_mov
), N
, I(Sse
| Unaligned
, em_mov
),
4471 static const struct instr_dual instr_dual_0f_2b
= {
4475 static const struct gprefix pfx_0f_2b
= {
4476 ID(0, &instr_dual_0f_2b
), ID(0, &instr_dual_0f_2b
), N
, N
,
4479 static const struct gprefix pfx_0f_10_0f_11
= {
4480 I(Unaligned
, em_mov
), I(Unaligned
, em_mov
), N
, N
,
4483 static const struct gprefix pfx_0f_28_0f_29
= {
4484 I(Aligned
, em_mov
), I(Aligned
, em_mov
), N
, N
,
4487 static const struct gprefix pfx_0f_e7
= {
4488 N
, I(Sse
, em_mov
), N
, N
,
4491 static const struct escape escape_d9
= { {
4492 N
, N
, N
, N
, N
, N
, N
, I(DstMem16
| Mov
, em_fnstcw
),
4495 N
, N
, N
, N
, N
, N
, N
, N
,
4497 N
, N
, N
, N
, N
, N
, N
, N
,
4499 N
, N
, N
, N
, N
, N
, N
, N
,
4501 N
, N
, N
, N
, N
, N
, N
, N
,
4503 N
, N
, N
, N
, N
, N
, N
, N
,
4505 N
, N
, N
, N
, N
, N
, N
, N
,
4507 N
, N
, N
, N
, N
, N
, N
, N
,
4509 N
, N
, N
, N
, N
, N
, N
, N
,
4512 static const struct escape escape_db
= { {
4513 N
, N
, N
, N
, N
, N
, N
, N
,
4516 N
, N
, N
, N
, N
, N
, N
, N
,
4518 N
, N
, N
, N
, N
, N
, N
, N
,
4520 N
, N
, N
, N
, N
, N
, N
, N
,
4522 N
, N
, N
, N
, N
, N
, N
, N
,
4524 N
, N
, N
, I(ImplicitOps
, em_fninit
), N
, N
, N
, N
,
4526 N
, N
, N
, N
, N
, N
, N
, N
,
4528 N
, N
, N
, N
, N
, N
, N
, N
,
4530 N
, N
, N
, N
, N
, N
, N
, N
,
4533 static const struct escape escape_dd
= { {
4534 N
, N
, N
, N
, N
, N
, N
, I(DstMem16
| Mov
, em_fnstsw
),
4537 N
, N
, N
, N
, N
, N
, N
, N
,
4539 N
, N
, N
, N
, N
, N
, N
, N
,
4541 N
, N
, N
, N
, N
, N
, N
, N
,
4543 N
, N
, N
, N
, N
, N
, N
, N
,
4545 N
, N
, N
, N
, N
, N
, N
, N
,
4547 N
, N
, N
, N
, N
, N
, N
, N
,
4549 N
, N
, N
, N
, N
, N
, N
, N
,
4551 N
, N
, N
, N
, N
, N
, N
, N
,
4554 static const struct instr_dual instr_dual_0f_c3
= {
4555 I(DstMem
| SrcReg
| ModRM
| No16
| Mov
, em_mov
), N
4558 static const struct mode_dual mode_dual_63
= {
4559 N
, I(DstReg
| SrcMem32
| ModRM
| Mov
, em_movsxd
)
4562 static const struct opcode opcode_table
[256] = {
4564 F6ALU(Lock
, em_add
),
4565 I(ImplicitOps
| Stack
| No64
| Src2ES
, em_push_sreg
),
4566 I(ImplicitOps
| Stack
| No64
| Src2ES
, em_pop_sreg
),
4568 F6ALU(Lock
| PageTable
, em_or
),
4569 I(ImplicitOps
| Stack
| No64
| Src2CS
, em_push_sreg
),
4572 F6ALU(Lock
, em_adc
),
4573 I(ImplicitOps
| Stack
| No64
| Src2SS
, em_push_sreg
),
4574 I(ImplicitOps
| Stack
| No64
| Src2SS
, em_pop_sreg
),
4576 F6ALU(Lock
, em_sbb
),
4577 I(ImplicitOps
| Stack
| No64
| Src2DS
, em_push_sreg
),
4578 I(ImplicitOps
| Stack
| No64
| Src2DS
, em_pop_sreg
),
4580 F6ALU(Lock
| PageTable
, em_and
), N
, N
,
4582 F6ALU(Lock
, em_sub
), N
, I(ByteOp
| DstAcc
| No64
, em_das
),
4584 F6ALU(Lock
, em_xor
), N
, N
,
4586 F6ALU(NoWrite
, em_cmp
), N
, N
,
4588 X8(F(DstReg
, em_inc
)), X8(F(DstReg
, em_dec
)),
4590 X8(I(SrcReg
| Stack
, em_push
)),
4592 X8(I(DstReg
| Stack
, em_pop
)),
4594 I(ImplicitOps
| Stack
| No64
, em_pusha
),
4595 I(ImplicitOps
| Stack
| No64
, em_popa
),
4596 N
, MD(ModRM
, &mode_dual_63
),
4599 I(SrcImm
| Mov
| Stack
, em_push
),
4600 I(DstReg
| SrcMem
| ModRM
| Src2Imm
, em_imul_3op
),
4601 I(SrcImmByte
| Mov
| Stack
, em_push
),
4602 I(DstReg
| SrcMem
| ModRM
| Src2ImmByte
, em_imul_3op
),
4603 I2bvIP(DstDI
| SrcDX
| Mov
| String
| Unaligned
, em_in
, ins
, check_perm_in
), /* insb, insw/insd */
4604 I2bvIP(SrcSI
| DstDX
| String
, em_out
, outs
, check_perm_out
), /* outsb, outsw/outsd */
4606 X16(D(SrcImmByte
| NearBranch
| IsBranch
)),
4608 G(ByteOp
| DstMem
| SrcImm
, group1
),
4609 G(DstMem
| SrcImm
, group1
),
4610 G(ByteOp
| DstMem
| SrcImm
| No64
, group1
),
4611 G(DstMem
| SrcImmByte
, group1
),
4612 F2bv(DstMem
| SrcReg
| ModRM
| NoWrite
, em_test
),
4613 I2bv(DstMem
| SrcReg
| ModRM
| Lock
| PageTable
, em_xchg
),
4615 I2bv(DstMem
| SrcReg
| ModRM
| Mov
| PageTable
, em_mov
),
4616 I2bv(DstReg
| SrcMem
| ModRM
| Mov
, em_mov
),
4617 I(DstMem
| SrcNone
| ModRM
| Mov
| PageTable
, em_mov_rm_sreg
),
4618 D(ModRM
| SrcMem
| NoAccess
| DstReg
),
4619 I(ImplicitOps
| SrcMem16
| ModRM
, em_mov_sreg_rm
),
4622 DI(SrcAcc
| DstReg
, pause
), X7(D(SrcAcc
| DstReg
)),
4624 D(DstAcc
| SrcNone
), I(ImplicitOps
| SrcAcc
, em_cwd
),
4625 I(SrcImmFAddr
| No64
| IsBranch
, em_call_far
), N
,
4626 II(ImplicitOps
| Stack
, em_pushf
, pushf
),
4627 II(ImplicitOps
| Stack
, em_popf
, popf
),
4628 I(ImplicitOps
, em_sahf
), I(ImplicitOps
, em_lahf
),
4630 I2bv(DstAcc
| SrcMem
| Mov
| MemAbs
, em_mov
),
4631 I2bv(DstMem
| SrcAcc
| Mov
| MemAbs
| PageTable
, em_mov
),
4632 I2bv(SrcSI
| DstDI
| Mov
| String
| TwoMemOp
, em_mov
),
4633 F2bv(SrcSI
| DstDI
| String
| NoWrite
| TwoMemOp
, em_cmp_r
),
4635 F2bv(DstAcc
| SrcImm
| NoWrite
, em_test
),
4636 I2bv(SrcAcc
| DstDI
| Mov
| String
, em_mov
),
4637 I2bv(SrcSI
| DstAcc
| Mov
| String
, em_mov
),
4638 F2bv(SrcAcc
| DstDI
| String
| NoWrite
, em_cmp_r
),
4640 X8(I(ByteOp
| DstReg
| SrcImm
| Mov
, em_mov
)),
4642 X8(I(DstReg
| SrcImm64
| Mov
, em_mov
)),
4644 G(ByteOp
| Src2ImmByte
, group2
), G(Src2ImmByte
, group2
),
4645 I(ImplicitOps
| NearBranch
| SrcImmU16
| IsBranch
, em_ret_near_imm
),
4646 I(ImplicitOps
| NearBranch
| IsBranch
, em_ret
),
4647 I(DstReg
| SrcMemFAddr
| ModRM
| No64
| Src2ES
, em_lseg
),
4648 I(DstReg
| SrcMemFAddr
| ModRM
| No64
| Src2DS
, em_lseg
),
4649 G(ByteOp
, group11
), G(0, group11
),
4651 I(Stack
| SrcImmU16
| Src2ImmByte
| IsBranch
, em_enter
),
4652 I(Stack
| IsBranch
, em_leave
),
4653 I(ImplicitOps
| SrcImmU16
| IsBranch
, em_ret_far_imm
),
4654 I(ImplicitOps
| IsBranch
, em_ret_far
),
4655 D(ImplicitOps
| IsBranch
), DI(SrcImmByte
| IsBranch
, intn
),
4656 D(ImplicitOps
| No64
| IsBranch
),
4657 II(ImplicitOps
| IsBranch
, em_iret
, iret
),
4659 G(Src2One
| ByteOp
, group2
), G(Src2One
, group2
),
4660 G(Src2CL
| ByteOp
, group2
), G(Src2CL
, group2
),
4661 I(DstAcc
| SrcImmUByte
| No64
, em_aam
),
4662 I(DstAcc
| SrcImmUByte
| No64
, em_aad
),
4663 F(DstAcc
| ByteOp
| No64
, em_salc
),
4664 I(DstAcc
| SrcXLat
| ByteOp
, em_mov
),
4666 N
, E(0, &escape_d9
), N
, E(0, &escape_db
), N
, E(0, &escape_dd
), N
, N
,
4668 X3(I(SrcImmByte
| NearBranch
| IsBranch
, em_loop
)),
4669 I(SrcImmByte
| NearBranch
| IsBranch
, em_jcxz
),
4670 I2bvIP(SrcImmUByte
| DstAcc
, em_in
, in
, check_perm_in
),
4671 I2bvIP(SrcAcc
| DstImmUByte
, em_out
, out
, check_perm_out
),
4673 I(SrcImm
| NearBranch
| IsBranch
, em_call
),
4674 D(SrcImm
| ImplicitOps
| NearBranch
| IsBranch
),
4675 I(SrcImmFAddr
| No64
| IsBranch
, em_jmp_far
),
4676 D(SrcImmByte
| ImplicitOps
| NearBranch
| IsBranch
),
4677 I2bvIP(SrcDX
| DstAcc
, em_in
, in
, check_perm_in
),
4678 I2bvIP(SrcAcc
| DstDX
, em_out
, out
, check_perm_out
),
4680 N
, DI(ImplicitOps
, icebp
), N
, N
,
4681 DI(ImplicitOps
| Priv
, hlt
), D(ImplicitOps
),
4682 G(ByteOp
, group3
), G(0, group3
),
4684 D(ImplicitOps
), D(ImplicitOps
),
4685 I(ImplicitOps
, em_cli
), I(ImplicitOps
, em_sti
),
4686 D(ImplicitOps
), D(ImplicitOps
), G(0, group4
), G(0, group5
),
4689 static const struct opcode twobyte_table
[256] = {
4691 G(0, group6
), GD(0, &group7
), N
, N
,
4692 N
, I(ImplicitOps
| EmulateOnUD
| IsBranch
, em_syscall
),
4693 II(ImplicitOps
| Priv
, em_clts
, clts
), N
,
4694 DI(ImplicitOps
| Priv
, invd
), DI(ImplicitOps
| Priv
, wbinvd
), N
, N
,
4695 N
, D(ImplicitOps
| ModRM
| SrcMem
| NoAccess
), N
, N
,
4697 GP(ModRM
| DstReg
| SrcMem
| Mov
| Sse
, &pfx_0f_10_0f_11
),
4698 GP(ModRM
| DstMem
| SrcReg
| Mov
| Sse
, &pfx_0f_10_0f_11
),
4700 D(ImplicitOps
| ModRM
| SrcMem
| NoAccess
), /* 4 * prefetch + 4 * reserved NOP */
4701 D(ImplicitOps
| ModRM
| SrcMem
| NoAccess
), N
, N
,
4702 D(ImplicitOps
| ModRM
| SrcMem
| NoAccess
), /* 8 * reserved NOP */
4703 D(ImplicitOps
| ModRM
| SrcMem
| NoAccess
), /* 8 * reserved NOP */
4704 D(ImplicitOps
| ModRM
| SrcMem
| NoAccess
), /* 8 * reserved NOP */
4705 D(ImplicitOps
| ModRM
| SrcMem
| NoAccess
), /* NOP + 7 * reserved NOP */
4707 DIP(ModRM
| DstMem
| Priv
| Op3264
| NoMod
, cr_read
, check_cr_access
),
4708 DIP(ModRM
| DstMem
| Priv
| Op3264
| NoMod
, dr_read
, check_dr_read
),
4709 IIP(ModRM
| SrcMem
| Priv
| Op3264
| NoMod
, em_cr_write
, cr_write
,
4711 IIP(ModRM
| SrcMem
| Priv
| Op3264
| NoMod
, em_dr_write
, dr_write
,
4714 GP(ModRM
| DstReg
| SrcMem
| Mov
| Sse
, &pfx_0f_28_0f_29
),
4715 GP(ModRM
| DstMem
| SrcReg
| Mov
| Sse
, &pfx_0f_28_0f_29
),
4716 N
, GP(ModRM
| DstMem
| SrcReg
| Mov
| Sse
, &pfx_0f_2b
),
4719 II(ImplicitOps
| Priv
, em_wrmsr
, wrmsr
),
4720 IIP(ImplicitOps
, em_rdtsc
, rdtsc
, check_rdtsc
),
4721 II(ImplicitOps
| Priv
, em_rdmsr
, rdmsr
),
4722 IIP(ImplicitOps
, em_rdpmc
, rdpmc
, check_rdpmc
),
4723 I(ImplicitOps
| EmulateOnUD
| IsBranch
, em_sysenter
),
4724 I(ImplicitOps
| Priv
| EmulateOnUD
| IsBranch
, em_sysexit
),
4726 N
, N
, N
, N
, N
, N
, N
, N
,
4728 X16(D(DstReg
| SrcMem
| ModRM
)),
4730 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,
4735 N
, N
, N
, GP(SrcMem
| DstReg
| ModRM
| Mov
, &pfx_0f_6f_0f_7f
),
4740 N
, N
, N
, GP(SrcReg
| DstMem
| ModRM
| Mov
, &pfx_0f_6f_0f_7f
),
4742 X16(D(SrcImm
| NearBranch
| IsBranch
)),
4744 X16(D(ByteOp
| DstMem
| SrcNone
| ModRM
| Mov
)),
4746 I(Stack
| Src2FS
, em_push_sreg
), I(Stack
| Src2FS
, em_pop_sreg
),
4747 II(ImplicitOps
, em_cpuid
, cpuid
),
4748 F(DstMem
| SrcReg
| ModRM
| BitOp
| NoWrite
, em_bt
),
4749 F(DstMem
| SrcReg
| Src2ImmByte
| ModRM
, em_shld
),
4750 F(DstMem
| SrcReg
| Src2CL
| ModRM
, em_shld
), N
, N
,
4752 I(Stack
| Src2GS
, em_push_sreg
), I(Stack
| Src2GS
, em_pop_sreg
),
4753 II(EmulateOnUD
| ImplicitOps
, em_rsm
, rsm
),
4754 F(DstMem
| SrcReg
| ModRM
| BitOp
| Lock
| PageTable
, em_bts
),
4755 F(DstMem
| SrcReg
| Src2ImmByte
| ModRM
, em_shrd
),
4756 F(DstMem
| SrcReg
| Src2CL
| ModRM
, em_shrd
),
4757 GD(0, &group15
), F(DstReg
| SrcMem
| ModRM
, em_imul
),
4759 I2bv(DstMem
| SrcReg
| ModRM
| Lock
| PageTable
| SrcWrite
, em_cmpxchg
),
4760 I(DstReg
| SrcMemFAddr
| ModRM
| Src2SS
, em_lseg
),
4761 F(DstMem
| SrcReg
| ModRM
| BitOp
| Lock
, em_btr
),
4762 I(DstReg
| SrcMemFAddr
| ModRM
| Src2FS
, em_lseg
),
4763 I(DstReg
| SrcMemFAddr
| ModRM
| Src2GS
, em_lseg
),
4764 D(DstReg
| SrcMem8
| ModRM
| Mov
), D(DstReg
| SrcMem16
| ModRM
| Mov
),
4768 F(DstMem
| SrcReg
| ModRM
| BitOp
| Lock
| PageTable
, em_btc
),
4769 I(DstReg
| SrcMem
| ModRM
, em_bsf_c
),
4770 I(DstReg
| SrcMem
| ModRM
, em_bsr_c
),
4771 D(DstReg
| SrcMem8
| ModRM
| Mov
), D(DstReg
| SrcMem16
| ModRM
| Mov
),
4773 F2bv(DstMem
| SrcReg
| ModRM
| SrcWrite
| Lock
, em_xadd
),
4774 N
, ID(0, &instr_dual_0f_c3
),
4775 N
, N
, N
, GD(0, &group9
),
4777 X8(I(DstReg
, em_bswap
)),
4779 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,
4781 N
, N
, N
, N
, N
, N
, N
, GP(SrcReg
| DstMem
| ModRM
| Mov
, &pfx_0f_e7
),
4782 N
, N
, N
, N
, N
, N
, N
, N
,
4784 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
4787 static const struct instr_dual instr_dual_0f_38_f0
= {
4788 I(DstReg
| SrcMem
| Mov
, em_movbe
), N
4791 static const struct instr_dual instr_dual_0f_38_f1
= {
4792 I(DstMem
| SrcReg
| Mov
, em_movbe
), N
4795 static const struct gprefix three_byte_0f_38_f0
= {
4796 ID(0, &instr_dual_0f_38_f0
), N
, N
, N
4799 static const struct gprefix three_byte_0f_38_f1
= {
4800 ID(0, &instr_dual_0f_38_f1
), N
, N
, N
4804 * Insns below are selected by the prefix which indexed by the third opcode
4807 static const struct opcode opcode_map_0f_38
[256] = {
4809 X16(N
), X16(N
), X16(N
), X16(N
), X16(N
), X16(N
), X16(N
), X16(N
),
4811 X16(N
), X16(N
), X16(N
), X16(N
), X16(N
), X16(N
), X16(N
),
4813 GP(EmulateOnUD
| ModRM
, &three_byte_0f_38_f0
),
4814 GP(EmulateOnUD
| ModRM
, &three_byte_0f_38_f1
),
4835 static unsigned imm_size(struct x86_emulate_ctxt
*ctxt
)
4839 size
= (ctxt
->d
& ByteOp
) ? 1 : ctxt
->op_bytes
;
4845 static int decode_imm(struct x86_emulate_ctxt
*ctxt
, struct operand
*op
,
4846 unsigned size
, bool sign_extension
)
4848 int rc
= X86EMUL_CONTINUE
;
4852 op
->addr
.mem
.ea
= ctxt
->_eip
;
4853 /* NB. Immediates are sign-extended as necessary. */
4854 switch (op
->bytes
) {
4856 op
->val
= insn_fetch(s8
, ctxt
);
4859 op
->val
= insn_fetch(s16
, ctxt
);
4862 op
->val
= insn_fetch(s32
, ctxt
);
4865 op
->val
= insn_fetch(s64
, ctxt
);
4868 if (!sign_extension
) {
4869 switch (op
->bytes
) {
4877 op
->val
&= 0xffffffff;
4885 static int decode_operand(struct x86_emulate_ctxt
*ctxt
, struct operand
*op
,
4888 int rc
= X86EMUL_CONTINUE
;
4892 decode_register_operand(ctxt
, op
);
4895 rc
= decode_imm(ctxt
, op
, 1, false);
4898 ctxt
->memop
.bytes
= (ctxt
->d
& ByteOp
) ? 1 : ctxt
->op_bytes
;
4902 if (ctxt
->d
& BitOp
)
4903 fetch_bit_operand(ctxt
);
4904 op
->orig_val
= op
->val
;
4907 ctxt
->memop
.bytes
= (ctxt
->op_bytes
== 8) ? 16 : 8;
4911 op
->bytes
= (ctxt
->d
& ByteOp
) ? 1 : ctxt
->op_bytes
;
4912 op
->addr
.reg
= reg_rmw(ctxt
, VCPU_REGS_RAX
);
4913 fetch_register_operand(op
);
4914 op
->orig_val
= op
->val
;
4918 op
->bytes
= (ctxt
->d
& ByteOp
) ? 2 : ctxt
->op_bytes
;
4919 op
->addr
.reg
= reg_rmw(ctxt
, VCPU_REGS_RAX
);
4920 fetch_register_operand(op
);
4921 op
->orig_val
= op
->val
;
4924 if (ctxt
->d
& ByteOp
) {
4929 op
->bytes
= ctxt
->op_bytes
;
4930 op
->addr
.reg
= reg_rmw(ctxt
, VCPU_REGS_RDX
);
4931 fetch_register_operand(op
);
4932 op
->orig_val
= op
->val
;
4936 op
->bytes
= (ctxt
->d
& ByteOp
) ? 1 : ctxt
->op_bytes
;
4938 register_address(ctxt
, VCPU_REGS_RDI
);
4939 op
->addr
.mem
.seg
= VCPU_SREG_ES
;
4946 op
->addr
.reg
= reg_rmw(ctxt
, VCPU_REGS_RDX
);
4947 fetch_register_operand(op
);
4952 op
->val
= reg_read(ctxt
, VCPU_REGS_RCX
) & 0xff;
4955 rc
= decode_imm(ctxt
, op
, 1, true);
4963 rc
= decode_imm(ctxt
, op
, imm_size(ctxt
), true);
4966 rc
= decode_imm(ctxt
, op
, ctxt
->op_bytes
, true);
4969 ctxt
->memop
.bytes
= 1;
4970 if (ctxt
->memop
.type
== OP_REG
) {
4971 ctxt
->memop
.addr
.reg
= decode_register(ctxt
,
4972 ctxt
->modrm_rm
, true);
4973 fetch_register_operand(&ctxt
->memop
);
4977 ctxt
->memop
.bytes
= 2;
4980 ctxt
->memop
.bytes
= 4;
4983 rc
= decode_imm(ctxt
, op
, 2, false);
4986 rc
= decode_imm(ctxt
, op
, imm_size(ctxt
), false);
4990 op
->bytes
= (ctxt
->d
& ByteOp
) ? 1 : ctxt
->op_bytes
;
4992 register_address(ctxt
, VCPU_REGS_RSI
);
4993 op
->addr
.mem
.seg
= ctxt
->seg_override
;
4999 op
->bytes
= (ctxt
->d
& ByteOp
) ? 1 : ctxt
->op_bytes
;
5002 reg_read(ctxt
, VCPU_REGS_RBX
) +
5003 (reg_read(ctxt
, VCPU_REGS_RAX
) & 0xff));
5004 op
->addr
.mem
.seg
= ctxt
->seg_override
;
5009 op
->addr
.mem
.ea
= ctxt
->_eip
;
5010 op
->bytes
= ctxt
->op_bytes
+ 2;
5011 insn_fetch_arr(op
->valptr
, op
->bytes
, ctxt
);
5014 ctxt
->memop
.bytes
= ctxt
->op_bytes
+ 2;
5018 op
->val
= VCPU_SREG_ES
;
5022 op
->val
= VCPU_SREG_CS
;
5026 op
->val
= VCPU_SREG_SS
;
5030 op
->val
= VCPU_SREG_DS
;
5034 op
->val
= VCPU_SREG_FS
;
5038 op
->val
= VCPU_SREG_GS
;
5041 /* Special instructions do their own operand decoding. */
5043 op
->type
= OP_NONE
; /* Disable writeback. */
5051 int x86_decode_insn(struct x86_emulate_ctxt
*ctxt
, void *insn
, int insn_len
, int emulation_type
)
5053 int rc
= X86EMUL_CONTINUE
;
5054 int mode
= ctxt
->mode
;
5055 int def_op_bytes
, def_ad_bytes
, goffset
, simd_prefix
;
5056 bool op_prefix
= false;
5057 bool has_seg_override
= false;
5058 struct opcode opcode
;
5060 struct desc_struct desc
;
5062 ctxt
->memop
.type
= OP_NONE
;
5063 ctxt
->memopp
= NULL
;
5064 ctxt
->_eip
= ctxt
->eip
;
5065 ctxt
->fetch
.ptr
= ctxt
->fetch
.data
;
5066 ctxt
->fetch
.end
= ctxt
->fetch
.data
+ insn_len
;
5067 ctxt
->opcode_len
= 1;
5068 ctxt
->intercept
= x86_intercept_none
;
5070 memcpy(ctxt
->fetch
.data
, insn
, insn_len
);
5072 rc
= __do_insn_fetch_bytes(ctxt
, 1);
5073 if (rc
!= X86EMUL_CONTINUE
)
5078 case X86EMUL_MODE_REAL
:
5079 case X86EMUL_MODE_VM86
:
5080 def_op_bytes
= def_ad_bytes
= 2;
5081 ctxt
->ops
->get_segment(ctxt
, &dummy
, &desc
, NULL
, VCPU_SREG_CS
);
5083 def_op_bytes
= def_ad_bytes
= 4;
5085 case X86EMUL_MODE_PROT16
:
5086 def_op_bytes
= def_ad_bytes
= 2;
5088 case X86EMUL_MODE_PROT32
:
5089 def_op_bytes
= def_ad_bytes
= 4;
5091 #ifdef CONFIG_X86_64
5092 case X86EMUL_MODE_PROT64
:
5098 return EMULATION_FAILED
;
5101 ctxt
->op_bytes
= def_op_bytes
;
5102 ctxt
->ad_bytes
= def_ad_bytes
;
5104 /* Legacy prefixes. */
5106 switch (ctxt
->b
= insn_fetch(u8
, ctxt
)) {
5107 case 0x66: /* operand-size override */
5109 /* switch between 2/4 bytes */
5110 ctxt
->op_bytes
= def_op_bytes
^ 6;
5112 case 0x67: /* address-size override */
5113 if (mode
== X86EMUL_MODE_PROT64
)
5114 /* switch between 4/8 bytes */
5115 ctxt
->ad_bytes
= def_ad_bytes
^ 12;
5117 /* switch between 2/4 bytes */
5118 ctxt
->ad_bytes
= def_ad_bytes
^ 6;
5120 case 0x26: /* ES override */
5121 has_seg_override
= true;
5122 ctxt
->seg_override
= VCPU_SREG_ES
;
5124 case 0x2e: /* CS override */
5125 has_seg_override
= true;
5126 ctxt
->seg_override
= VCPU_SREG_CS
;
5128 case 0x36: /* SS override */
5129 has_seg_override
= true;
5130 ctxt
->seg_override
= VCPU_SREG_SS
;
5132 case 0x3e: /* DS override */
5133 has_seg_override
= true;
5134 ctxt
->seg_override
= VCPU_SREG_DS
;
5136 case 0x64: /* FS override */
5137 has_seg_override
= true;
5138 ctxt
->seg_override
= VCPU_SREG_FS
;
5140 case 0x65: /* GS override */
5141 has_seg_override
= true;
5142 ctxt
->seg_override
= VCPU_SREG_GS
;
5144 case 0x40 ... 0x4f: /* REX */
5145 if (mode
!= X86EMUL_MODE_PROT64
)
5147 ctxt
->rex_prefix
= ctxt
->b
;
5149 case 0xf0: /* LOCK */
5150 ctxt
->lock_prefix
= 1;
5152 case 0xf2: /* REPNE/REPNZ */
5153 case 0xf3: /* REP/REPE/REPZ */
5154 ctxt
->rep_prefix
= ctxt
->b
;
5160 /* Any legacy prefix after a REX prefix nullifies its effect. */
5162 ctxt
->rex_prefix
= 0;
5168 if (ctxt
->rex_prefix
& 8)
5169 ctxt
->op_bytes
= 8; /* REX.W */
5171 /* Opcode byte(s). */
5172 opcode
= opcode_table
[ctxt
->b
];
5173 /* Two-byte opcode? */
5174 if (ctxt
->b
== 0x0f) {
5175 ctxt
->opcode_len
= 2;
5176 ctxt
->b
= insn_fetch(u8
, ctxt
);
5177 opcode
= twobyte_table
[ctxt
->b
];
5179 /* 0F_38 opcode map */
5180 if (ctxt
->b
== 0x38) {
5181 ctxt
->opcode_len
= 3;
5182 ctxt
->b
= insn_fetch(u8
, ctxt
);
5183 opcode
= opcode_map_0f_38
[ctxt
->b
];
5186 ctxt
->d
= opcode
.flags
;
5188 if (ctxt
->d
& ModRM
)
5189 ctxt
->modrm
= insn_fetch(u8
, ctxt
);
5191 /* vex-prefix instructions are not implemented */
5192 if (ctxt
->opcode_len
== 1 && (ctxt
->b
== 0xc5 || ctxt
->b
== 0xc4) &&
5193 (mode
== X86EMUL_MODE_PROT64
|| (ctxt
->modrm
& 0xc0) == 0xc0)) {
5197 while (ctxt
->d
& GroupMask
) {
5198 switch (ctxt
->d
& GroupMask
) {
5200 goffset
= (ctxt
->modrm
>> 3) & 7;
5201 opcode
= opcode
.u
.group
[goffset
];
5204 goffset
= (ctxt
->modrm
>> 3) & 7;
5205 if ((ctxt
->modrm
>> 6) == 3)
5206 opcode
= opcode
.u
.gdual
->mod3
[goffset
];
5208 opcode
= opcode
.u
.gdual
->mod012
[goffset
];
5211 goffset
= ctxt
->modrm
& 7;
5212 opcode
= opcode
.u
.group
[goffset
];
5215 if (ctxt
->rep_prefix
&& op_prefix
)
5216 return EMULATION_FAILED
;
5217 simd_prefix
= op_prefix
? 0x66 : ctxt
->rep_prefix
;
5218 switch (simd_prefix
) {
5219 case 0x00: opcode
= opcode
.u
.gprefix
->pfx_no
; break;
5220 case 0x66: opcode
= opcode
.u
.gprefix
->pfx_66
; break;
5221 case 0xf2: opcode
= opcode
.u
.gprefix
->pfx_f2
; break;
5222 case 0xf3: opcode
= opcode
.u
.gprefix
->pfx_f3
; break;
5226 if (ctxt
->modrm
> 0xbf) {
5227 size_t size
= ARRAY_SIZE(opcode
.u
.esc
->high
);
5228 u32 index
= array_index_nospec(
5229 ctxt
->modrm
- 0xc0, size
);
5231 opcode
= opcode
.u
.esc
->high
[index
];
5233 opcode
= opcode
.u
.esc
->op
[(ctxt
->modrm
>> 3) & 7];
5237 if ((ctxt
->modrm
>> 6) == 3)
5238 opcode
= opcode
.u
.idual
->mod3
;
5240 opcode
= opcode
.u
.idual
->mod012
;
5243 if (ctxt
->mode
== X86EMUL_MODE_PROT64
)
5244 opcode
= opcode
.u
.mdual
->mode64
;
5246 opcode
= opcode
.u
.mdual
->mode32
;
5249 return EMULATION_FAILED
;
5252 ctxt
->d
&= ~(u64
)GroupMask
;
5253 ctxt
->d
|= opcode
.flags
;
5256 ctxt
->is_branch
= opcode
.flags
& IsBranch
;
5260 return EMULATION_FAILED
;
5262 ctxt
->execute
= opcode
.u
.execute
;
5264 if (unlikely(emulation_type
& EMULTYPE_TRAP_UD
) &&
5265 likely(!(ctxt
->d
& EmulateOnUD
)))
5266 return EMULATION_FAILED
;
5268 if (unlikely(ctxt
->d
&
5269 (NotImpl
|Stack
|Op3264
|Sse
|Mmx
|Intercept
|CheckPerm
|NearBranch
|
5272 * These are copied unconditionally here, and checked unconditionally
5273 * in x86_emulate_insn.
5275 ctxt
->check_perm
= opcode
.check_perm
;
5276 ctxt
->intercept
= opcode
.intercept
;
5278 if (ctxt
->d
& NotImpl
)
5279 return EMULATION_FAILED
;
5281 if (mode
== X86EMUL_MODE_PROT64
) {
5282 if (ctxt
->op_bytes
== 4 && (ctxt
->d
& Stack
))
5284 else if (ctxt
->d
& NearBranch
)
5288 if (ctxt
->d
& Op3264
) {
5289 if (mode
== X86EMUL_MODE_PROT64
)
5295 if ((ctxt
->d
& No16
) && ctxt
->op_bytes
== 2)
5299 ctxt
->op_bytes
= 16;
5300 else if (ctxt
->d
& Mmx
)
5304 /* ModRM and SIB bytes. */
5305 if (ctxt
->d
& ModRM
) {
5306 rc
= decode_modrm(ctxt
, &ctxt
->memop
);
5307 if (!has_seg_override
) {
5308 has_seg_override
= true;
5309 ctxt
->seg_override
= ctxt
->modrm_seg
;
5311 } else if (ctxt
->d
& MemAbs
)
5312 rc
= decode_abs(ctxt
, &ctxt
->memop
);
5313 if (rc
!= X86EMUL_CONTINUE
)
5316 if (!has_seg_override
)
5317 ctxt
->seg_override
= VCPU_SREG_DS
;
5319 ctxt
->memop
.addr
.mem
.seg
= ctxt
->seg_override
;
5322 * Decode and fetch the source operand: register, memory
5325 rc
= decode_operand(ctxt
, &ctxt
->src
, (ctxt
->d
>> SrcShift
) & OpMask
);
5326 if (rc
!= X86EMUL_CONTINUE
)
5330 * Decode and fetch the second source operand: register, memory
5333 rc
= decode_operand(ctxt
, &ctxt
->src2
, (ctxt
->d
>> Src2Shift
) & OpMask
);
5334 if (rc
!= X86EMUL_CONTINUE
)
5337 /* Decode and fetch the destination operand: register or memory. */
5338 rc
= decode_operand(ctxt
, &ctxt
->dst
, (ctxt
->d
>> DstShift
) & OpMask
);
5340 if (ctxt
->rip_relative
&& likely(ctxt
->memopp
))
5341 ctxt
->memopp
->addr
.mem
.ea
= address_mask(ctxt
,
5342 ctxt
->memopp
->addr
.mem
.ea
+ ctxt
->_eip
);
5345 if (rc
== X86EMUL_PROPAGATE_FAULT
)
5346 ctxt
->have_exception
= true;
5347 return (rc
!= X86EMUL_CONTINUE
) ? EMULATION_FAILED
: EMULATION_OK
;
5350 bool x86_page_table_writing_insn(struct x86_emulate_ctxt
*ctxt
)
5352 return ctxt
->d
& PageTable
;
5355 static bool string_insn_completed(struct x86_emulate_ctxt
*ctxt
)
5357 /* The second termination condition only applies for REPE
5358 * and REPNE. Test if the repeat string operation prefix is
5359 * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
5360 * corresponding termination condition according to:
5361 * - if REPE/REPZ and ZF = 0 then done
5362 * - if REPNE/REPNZ and ZF = 1 then done
5364 if (((ctxt
->b
== 0xa6) || (ctxt
->b
== 0xa7) ||
5365 (ctxt
->b
== 0xae) || (ctxt
->b
== 0xaf))
5366 && (((ctxt
->rep_prefix
== REPE_PREFIX
) &&
5367 ((ctxt
->eflags
& X86_EFLAGS_ZF
) == 0))
5368 || ((ctxt
->rep_prefix
== REPNE_PREFIX
) &&
5369 ((ctxt
->eflags
& X86_EFLAGS_ZF
) == X86_EFLAGS_ZF
))))
5375 static int flush_pending_x87_faults(struct x86_emulate_ctxt
*ctxt
)
5380 rc
= asm_safe("fwait");
5383 if (unlikely(rc
!= X86EMUL_CONTINUE
))
5384 return emulate_exception(ctxt
, MF_VECTOR
, 0, false);
5386 return X86EMUL_CONTINUE
;
5389 static void fetch_possible_mmx_operand(struct operand
*op
)
5391 if (op
->type
== OP_MM
)
5392 kvm_read_mmx_reg(op
->addr
.mm
, &op
->mm_val
);
5395 static int fastop(struct x86_emulate_ctxt
*ctxt
, fastop_t fop
)
5397 ulong flags
= (ctxt
->eflags
& EFLAGS_MASK
) | X86_EFLAGS_IF
;
5399 if (!(ctxt
->d
& ByteOp
))
5400 fop
+= __ffs(ctxt
->dst
.bytes
) * FASTOP_SIZE
;
5402 asm("push %[flags]; popf; " CALL_NOSPEC
" ; pushf; pop %[flags]\n"
5403 : "+a"(ctxt
->dst
.val
), "+d"(ctxt
->src
.val
), [flags
]"+D"(flags
),
5404 [thunk_target
]"+S"(fop
), ASM_CALL_CONSTRAINT
5405 : "c"(ctxt
->src2
.val
));
5407 ctxt
->eflags
= (ctxt
->eflags
& ~EFLAGS_MASK
) | (flags
& EFLAGS_MASK
);
5408 if (!fop
) /* exception is returned in fop variable */
5409 return emulate_de(ctxt
);
5410 return X86EMUL_CONTINUE
;
5413 void init_decode_cache(struct x86_emulate_ctxt
*ctxt
)
5415 /* Clear fields that are set conditionally but read without a guard. */
5416 ctxt
->rip_relative
= false;
5417 ctxt
->rex_prefix
= 0;
5418 ctxt
->lock_prefix
= 0;
5419 ctxt
->rep_prefix
= 0;
5420 ctxt
->regs_valid
= 0;
5421 ctxt
->regs_dirty
= 0;
5423 ctxt
->io_read
.pos
= 0;
5424 ctxt
->io_read
.end
= 0;
5425 ctxt
->mem_read
.end
= 0;
5428 int x86_emulate_insn(struct x86_emulate_ctxt
*ctxt
)
5430 const struct x86_emulate_ops
*ops
= ctxt
->ops
;
5431 int rc
= X86EMUL_CONTINUE
;
5432 int saved_dst_type
= ctxt
->dst
.type
;
5433 unsigned emul_flags
;
5435 ctxt
->mem_read
.pos
= 0;
5437 /* LOCK prefix is allowed only with some instructions */
5438 if (ctxt
->lock_prefix
&& (!(ctxt
->d
& Lock
) || ctxt
->dst
.type
!= OP_MEM
)) {
5439 rc
= emulate_ud(ctxt
);
5443 if ((ctxt
->d
& SrcMask
) == SrcMemFAddr
&& ctxt
->src
.type
!= OP_MEM
) {
5444 rc
= emulate_ud(ctxt
);
5448 emul_flags
= ctxt
->ops
->get_hflags(ctxt
);
5449 if (unlikely(ctxt
->d
&
5450 (No64
|Undefined
|Sse
|Mmx
|Intercept
|CheckPerm
|Priv
|Prot
|String
))) {
5451 if ((ctxt
->mode
== X86EMUL_MODE_PROT64
&& (ctxt
->d
& No64
)) ||
5452 (ctxt
->d
& Undefined
)) {
5453 rc
= emulate_ud(ctxt
);
5457 if (((ctxt
->d
& (Sse
|Mmx
)) && ((ops
->get_cr(ctxt
, 0) & X86_CR0_EM
)))
5458 || ((ctxt
->d
& Sse
) && !(ops
->get_cr(ctxt
, 4) & X86_CR4_OSFXSR
))) {
5459 rc
= emulate_ud(ctxt
);
5463 if ((ctxt
->d
& (Sse
|Mmx
)) && (ops
->get_cr(ctxt
, 0) & X86_CR0_TS
)) {
5464 rc
= emulate_nm(ctxt
);
5468 if (ctxt
->d
& Mmx
) {
5469 rc
= flush_pending_x87_faults(ctxt
);
5470 if (rc
!= X86EMUL_CONTINUE
)
5473 * Now that we know the fpu is exception safe, we can fetch
5476 fetch_possible_mmx_operand(&ctxt
->src
);
5477 fetch_possible_mmx_operand(&ctxt
->src2
);
5478 if (!(ctxt
->d
& Mov
))
5479 fetch_possible_mmx_operand(&ctxt
->dst
);
5482 if (unlikely(emul_flags
& X86EMUL_GUEST_MASK
) && ctxt
->intercept
) {
5483 rc
= emulator_check_intercept(ctxt
, ctxt
->intercept
,
5484 X86_ICPT_PRE_EXCEPT
);
5485 if (rc
!= X86EMUL_CONTINUE
)
5489 /* Instruction can only be executed in protected mode */
5490 if ((ctxt
->d
& Prot
) && ctxt
->mode
< X86EMUL_MODE_PROT16
) {
5491 rc
= emulate_ud(ctxt
);
5495 /* Privileged instruction can be executed only in CPL=0 */
5496 if ((ctxt
->d
& Priv
) && ops
->cpl(ctxt
)) {
5497 if (ctxt
->d
& PrivUD
)
5498 rc
= emulate_ud(ctxt
);
5500 rc
= emulate_gp(ctxt
, 0);
5504 /* Do instruction specific permission checks */
5505 if (ctxt
->d
& CheckPerm
) {
5506 rc
= ctxt
->check_perm(ctxt
);
5507 if (rc
!= X86EMUL_CONTINUE
)
5511 if (unlikely(emul_flags
& X86EMUL_GUEST_MASK
) && (ctxt
->d
& Intercept
)) {
5512 rc
= emulator_check_intercept(ctxt
, ctxt
->intercept
,
5513 X86_ICPT_POST_EXCEPT
);
5514 if (rc
!= X86EMUL_CONTINUE
)
5518 if (ctxt
->rep_prefix
&& (ctxt
->d
& String
)) {
5519 /* All REP prefixes have the same first termination condition */
5520 if (address_mask(ctxt
, reg_read(ctxt
, VCPU_REGS_RCX
)) == 0) {
5521 string_registers_quirk(ctxt
);
5522 ctxt
->eip
= ctxt
->_eip
;
5523 ctxt
->eflags
&= ~X86_EFLAGS_RF
;
5529 if ((ctxt
->src
.type
== OP_MEM
) && !(ctxt
->d
& NoAccess
)) {
5530 rc
= segmented_read(ctxt
, ctxt
->src
.addr
.mem
,
5531 ctxt
->src
.valptr
, ctxt
->src
.bytes
);
5532 if (rc
!= X86EMUL_CONTINUE
)
5534 ctxt
->src
.orig_val64
= ctxt
->src
.val64
;
5537 if (ctxt
->src2
.type
== OP_MEM
) {
5538 rc
= segmented_read(ctxt
, ctxt
->src2
.addr
.mem
,
5539 &ctxt
->src2
.val
, ctxt
->src2
.bytes
);
5540 if (rc
!= X86EMUL_CONTINUE
)
5544 if ((ctxt
->d
& DstMask
) == ImplicitOps
)
5548 if ((ctxt
->dst
.type
== OP_MEM
) && !(ctxt
->d
& Mov
)) {
5549 /* optimisation - avoid slow emulated read if Mov */
5550 rc
= segmented_read(ctxt
, ctxt
->dst
.addr
.mem
,
5551 &ctxt
->dst
.val
, ctxt
->dst
.bytes
);
5552 if (rc
!= X86EMUL_CONTINUE
) {
5553 if (!(ctxt
->d
& NoWrite
) &&
5554 rc
== X86EMUL_PROPAGATE_FAULT
&&
5555 ctxt
->exception
.vector
== PF_VECTOR
)
5556 ctxt
->exception
.error_code
|= PFERR_WRITE_MASK
;
5560 /* Copy full 64-bit value for CMPXCHG8B. */
5561 ctxt
->dst
.orig_val64
= ctxt
->dst
.val64
;
5565 if (unlikely(emul_flags
& X86EMUL_GUEST_MASK
) && (ctxt
->d
& Intercept
)) {
5566 rc
= emulator_check_intercept(ctxt
, ctxt
->intercept
,
5567 X86_ICPT_POST_MEMACCESS
);
5568 if (rc
!= X86EMUL_CONTINUE
)
5572 if (ctxt
->rep_prefix
&& (ctxt
->d
& String
))
5573 ctxt
->eflags
|= X86_EFLAGS_RF
;
5575 ctxt
->eflags
&= ~X86_EFLAGS_RF
;
5577 if (ctxt
->execute
) {
5578 if (ctxt
->d
& Fastop
)
5579 rc
= fastop(ctxt
, ctxt
->fop
);
5581 rc
= ctxt
->execute(ctxt
);
5582 if (rc
!= X86EMUL_CONTINUE
)
5587 if (ctxt
->opcode_len
== 2)
5589 else if (ctxt
->opcode_len
== 3)
5590 goto threebyte_insn
;
5593 case 0x70 ... 0x7f: /* jcc (short) */
5594 if (test_cc(ctxt
->b
, ctxt
->eflags
))
5595 rc
= jmp_rel(ctxt
, ctxt
->src
.val
);
5597 case 0x8d: /* lea r16/r32, m */
5598 ctxt
->dst
.val
= ctxt
->src
.addr
.mem
.ea
;
5600 case 0x90 ... 0x97: /* nop / xchg reg, rax */
5601 if (ctxt
->dst
.addr
.reg
== reg_rmw(ctxt
, VCPU_REGS_RAX
))
5602 ctxt
->dst
.type
= OP_NONE
;
5606 case 0x98: /* cbw/cwde/cdqe */
5607 switch (ctxt
->op_bytes
) {
5608 case 2: ctxt
->dst
.val
= (s8
)ctxt
->dst
.val
; break;
5609 case 4: ctxt
->dst
.val
= (s16
)ctxt
->dst
.val
; break;
5610 case 8: ctxt
->dst
.val
= (s32
)ctxt
->dst
.val
; break;
5613 case 0xcc: /* int3 */
5614 rc
= emulate_int(ctxt
, 3);
5616 case 0xcd: /* int n */
5617 rc
= emulate_int(ctxt
, ctxt
->src
.val
);
5619 case 0xce: /* into */
5620 if (ctxt
->eflags
& X86_EFLAGS_OF
)
5621 rc
= emulate_int(ctxt
, 4);
5623 case 0xe9: /* jmp rel */
5624 case 0xeb: /* jmp rel short */
5625 rc
= jmp_rel(ctxt
, ctxt
->src
.val
);
5626 ctxt
->dst
.type
= OP_NONE
; /* Disable writeback. */
5628 case 0xf4: /* hlt */
5629 ctxt
->ops
->halt(ctxt
);
5631 case 0xf5: /* cmc */
5632 /* complement carry flag from eflags reg */
5633 ctxt
->eflags
^= X86_EFLAGS_CF
;
5635 case 0xf8: /* clc */
5636 ctxt
->eflags
&= ~X86_EFLAGS_CF
;
5638 case 0xf9: /* stc */
5639 ctxt
->eflags
|= X86_EFLAGS_CF
;
5641 case 0xfc: /* cld */
5642 ctxt
->eflags
&= ~X86_EFLAGS_DF
;
5644 case 0xfd: /* std */
5645 ctxt
->eflags
|= X86_EFLAGS_DF
;
5648 goto cannot_emulate
;
5651 if (rc
!= X86EMUL_CONTINUE
)
5655 if (ctxt
->d
& SrcWrite
) {
5656 BUG_ON(ctxt
->src
.type
== OP_MEM
|| ctxt
->src
.type
== OP_MEM_STR
);
5657 rc
= writeback(ctxt
, &ctxt
->src
);
5658 if (rc
!= X86EMUL_CONTINUE
)
5661 if (!(ctxt
->d
& NoWrite
)) {
5662 rc
= writeback(ctxt
, &ctxt
->dst
);
5663 if (rc
!= X86EMUL_CONTINUE
)
5668 * restore dst type in case the decoding will be reused
5669 * (happens for string instruction )
5671 ctxt
->dst
.type
= saved_dst_type
;
5673 if ((ctxt
->d
& SrcMask
) == SrcSI
)
5674 string_addr_inc(ctxt
, VCPU_REGS_RSI
, &ctxt
->src
);
5676 if ((ctxt
->d
& DstMask
) == DstDI
)
5677 string_addr_inc(ctxt
, VCPU_REGS_RDI
, &ctxt
->dst
);
5679 if (ctxt
->rep_prefix
&& (ctxt
->d
& String
)) {
5681 struct read_cache
*r
= &ctxt
->io_read
;
5682 if ((ctxt
->d
& SrcMask
) == SrcSI
)
5683 count
= ctxt
->src
.count
;
5685 count
= ctxt
->dst
.count
;
5686 register_address_increment(ctxt
, VCPU_REGS_RCX
, -count
);
5688 if (!string_insn_completed(ctxt
)) {
5690 * Re-enter guest when pio read ahead buffer is empty
5691 * or, if it is not used, after each 1024 iteration.
5693 if ((r
->end
!= 0 || reg_read(ctxt
, VCPU_REGS_RCX
) & 0x3ff) &&
5694 (r
->end
== 0 || r
->end
!= r
->pos
)) {
5696 * Reset read cache. Usually happens before
5697 * decode, but since instruction is restarted
5698 * we have to do it here.
5700 ctxt
->mem_read
.end
= 0;
5701 writeback_registers(ctxt
);
5702 return EMULATION_RESTART
;
5704 goto done
; /* skip rip writeback */
5706 ctxt
->eflags
&= ~X86_EFLAGS_RF
;
5709 ctxt
->eip
= ctxt
->_eip
;
5710 if (ctxt
->mode
!= X86EMUL_MODE_PROT64
)
5711 ctxt
->eip
= (u32
)ctxt
->_eip
;
5714 if (rc
== X86EMUL_PROPAGATE_FAULT
) {
5715 WARN_ON(ctxt
->exception
.vector
> 0x1f);
5716 ctxt
->have_exception
= true;
5718 if (rc
== X86EMUL_INTERCEPTED
)
5719 return EMULATION_INTERCEPTED
;
5721 if (rc
== X86EMUL_CONTINUE
)
5722 writeback_registers(ctxt
);
5724 return (rc
== X86EMUL_UNHANDLEABLE
) ? EMULATION_FAILED
: EMULATION_OK
;
5728 case 0x09: /* wbinvd */
5729 (ctxt
->ops
->wbinvd
)(ctxt
);
5731 case 0x08: /* invd */
5732 case 0x0d: /* GrpP (prefetch) */
5733 case 0x18: /* Grp16 (prefetch/nop) */
5734 case 0x1f: /* nop */
5736 case 0x20: /* mov cr, reg */
5737 ctxt
->dst
.val
= ops
->get_cr(ctxt
, ctxt
->modrm_reg
);
5739 case 0x21: /* mov from dr to reg */
5740 ops
->get_dr(ctxt
, ctxt
->modrm_reg
, &ctxt
->dst
.val
);
5742 case 0x40 ... 0x4f: /* cmov */
5743 if (test_cc(ctxt
->b
, ctxt
->eflags
))
5744 ctxt
->dst
.val
= ctxt
->src
.val
;
5745 else if (ctxt
->op_bytes
!= 4)
5746 ctxt
->dst
.type
= OP_NONE
; /* no writeback */
5748 case 0x80 ... 0x8f: /* jnz rel, etc*/
5749 if (test_cc(ctxt
->b
, ctxt
->eflags
))
5750 rc
= jmp_rel(ctxt
, ctxt
->src
.val
);
5752 case 0x90 ... 0x9f: /* setcc r/m8 */
5753 ctxt
->dst
.val
= test_cc(ctxt
->b
, ctxt
->eflags
);
5755 case 0xb6 ... 0xb7: /* movzx */
5756 ctxt
->dst
.bytes
= ctxt
->op_bytes
;
5757 ctxt
->dst
.val
= (ctxt
->src
.bytes
== 1) ? (u8
) ctxt
->src
.val
5758 : (u16
) ctxt
->src
.val
;
5760 case 0xbe ... 0xbf: /* movsx */
5761 ctxt
->dst
.bytes
= ctxt
->op_bytes
;
5762 ctxt
->dst
.val
= (ctxt
->src
.bytes
== 1) ? (s8
) ctxt
->src
.val
:
5763 (s16
) ctxt
->src
.val
;
5766 goto cannot_emulate
;
5771 if (rc
!= X86EMUL_CONTINUE
)
5777 return EMULATION_FAILED
;
5780 void emulator_invalidate_register_cache(struct x86_emulate_ctxt
*ctxt
)
5782 invalidate_registers(ctxt
);
5785 void emulator_writeback_register_cache(struct x86_emulate_ctxt
*ctxt
)
5787 writeback_registers(ctxt
);
5790 bool emulator_can_use_gpa(struct x86_emulate_ctxt
*ctxt
)
5792 if (ctxt
->rep_prefix
&& (ctxt
->d
& String
))
5795 if (ctxt
->d
& TwoMemOp
)