]> git.proxmox.com Git - mirror_qemu.git/blob - target/i386/translate.c
Merge remote-tracking branch 'remotes/stsquad/tags/pull-mttcg-fixups-for-rc2-280317...
[mirror_qemu.git] / target / i386 / translate.c
1 /*
2 * i386 translation
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "qemu/osdep.h"
20
21 #include "qemu/host-utils.h"
22 #include "cpu.h"
23 #include "disas/disas.h"
24 #include "exec/exec-all.h"
25 #include "tcg-op.h"
26 #include "exec/cpu_ldst.h"
27
28 #include "exec/helper-proto.h"
29 #include "exec/helper-gen.h"
30
31 #include "trace-tcg.h"
32 #include "exec/log.h"
33
34
35 #define PREFIX_REPZ 0x01
36 #define PREFIX_REPNZ 0x02
37 #define PREFIX_LOCK 0x04
38 #define PREFIX_DATA 0x08
39 #define PREFIX_ADR 0x10
40 #define PREFIX_VEX 0x20
41
42 #ifdef TARGET_X86_64
43 #define CODE64(s) ((s)->code64)
44 #define REX_X(s) ((s)->rex_x)
45 #define REX_B(s) ((s)->rex_b)
46 #else
47 #define CODE64(s) 0
48 #define REX_X(s) 0
49 #define REX_B(s) 0
50 #endif
51
52 #ifdef TARGET_X86_64
53 # define ctztl ctz64
54 # define clztl clz64
55 #else
56 # define ctztl ctz32
57 # define clztl clz32
58 #endif
59
60 /* For a switch indexed by MODRM, match all memory operands for a given OP. */
61 #define CASE_MODRM_MEM_OP(OP) \
62 case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
63 case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
64 case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7
65
66 #define CASE_MODRM_OP(OP) \
67 case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
68 case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
69 case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7: \
70 case (3 << 6) | (OP << 3) | 0 ... (3 << 6) | (OP << 3) | 7
71
72 //#define MACRO_TEST 1
73
74 /* global register indexes */
75 static TCGv_env cpu_env;
76 static TCGv cpu_A0;
77 static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2, cpu_cc_srcT;
78 static TCGv_i32 cpu_cc_op;
79 static TCGv cpu_regs[CPU_NB_REGS];
80 static TCGv cpu_seg_base[6];
81 static TCGv_i64 cpu_bndl[4];
82 static TCGv_i64 cpu_bndu[4];
83 /* local temps */
84 static TCGv cpu_T0, cpu_T1;
85 /* local register indexes (only used inside old micro ops) */
86 static TCGv cpu_tmp0, cpu_tmp4;
87 static TCGv_ptr cpu_ptr0, cpu_ptr1;
88 static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
89 static TCGv_i64 cpu_tmp1_i64;
90
91 #include "exec/gen-icount.h"
92
93 #ifdef TARGET_X86_64
94 static int x86_64_hregs;
95 #endif
96
97 typedef struct DisasContext {
98 /* current insn context */
99 int override; /* -1 if no override */
100 int prefix;
101 TCGMemOp aflag;
102 TCGMemOp dflag;
103 target_ulong pc_start;
104 target_ulong pc; /* pc = eip + cs_base */
105 int is_jmp; /* 1 = means jump (stop translation), 2 means CPU
106 static state change (stop translation) */
107 /* current block context */
108 target_ulong cs_base; /* base of CS segment */
109 int pe; /* protected mode */
110 int code32; /* 32 bit code segment */
111 #ifdef TARGET_X86_64
112 int lma; /* long mode active */
113 int code64; /* 64 bit code segment */
114 int rex_x, rex_b;
115 #endif
116 int vex_l; /* vex vector length */
117 int vex_v; /* vex vvvv register, without 1's compliment. */
118 int ss32; /* 32 bit stack segment */
119 CCOp cc_op; /* current CC operation */
120 bool cc_op_dirty;
121 int addseg; /* non zero if either DS/ES/SS have a non zero base */
122 int f_st; /* currently unused */
123 int vm86; /* vm86 mode */
124 int cpl;
125 int iopl;
126 int tf; /* TF cpu flag */
127 int singlestep_enabled; /* "hardware" single step enabled */
128 int jmp_opt; /* use direct block chaining for direct jumps */
129 int repz_opt; /* optimize jumps within repz instructions */
130 int mem_index; /* select memory access functions */
131 uint64_t flags; /* all execution flags */
132 struct TranslationBlock *tb;
133 int popl_esp_hack; /* for correct popl with esp base handling */
134 int rip_offset; /* only used in x86_64, but left for simplicity */
135 int cpuid_features;
136 int cpuid_ext_features;
137 int cpuid_ext2_features;
138 int cpuid_ext3_features;
139 int cpuid_7_0_ebx_features;
140 int cpuid_xsave_features;
141 } DisasContext;
142
143 static void gen_eob(DisasContext *s);
144 static void gen_jmp(DisasContext *s, target_ulong eip);
145 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
146 static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d);
147
148 /* i386 arith/logic operations */
149 enum {
150 OP_ADDL,
151 OP_ORL,
152 OP_ADCL,
153 OP_SBBL,
154 OP_ANDL,
155 OP_SUBL,
156 OP_XORL,
157 OP_CMPL,
158 };
159
160 /* i386 shift ops */
161 enum {
162 OP_ROL,
163 OP_ROR,
164 OP_RCL,
165 OP_RCR,
166 OP_SHL,
167 OP_SHR,
168 OP_SHL1, /* undocumented */
169 OP_SAR = 7,
170 };
171
172 enum {
173 JCC_O,
174 JCC_B,
175 JCC_Z,
176 JCC_BE,
177 JCC_S,
178 JCC_P,
179 JCC_L,
180 JCC_LE,
181 };
182
183 enum {
184 /* I386 int registers */
185 OR_EAX, /* MUST be even numbered */
186 OR_ECX,
187 OR_EDX,
188 OR_EBX,
189 OR_ESP,
190 OR_EBP,
191 OR_ESI,
192 OR_EDI,
193
194 OR_TMP0 = 16, /* temporary operand register */
195 OR_TMP1,
196 OR_A0, /* temporary register used when doing address evaluation */
197 };
198
199 enum {
200 USES_CC_DST = 1,
201 USES_CC_SRC = 2,
202 USES_CC_SRC2 = 4,
203 USES_CC_SRCT = 8,
204 };
205
206 /* Bit set if the global variable is live after setting CC_OP to X. */
207 static const uint8_t cc_op_live[CC_OP_NB] = {
208 [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
209 [CC_OP_EFLAGS] = USES_CC_SRC,
210 [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC,
211 [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC,
212 [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
213 [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRCT,
214 [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
215 [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST,
216 [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC,
217 [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC,
218 [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
219 [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
220 [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC,
221 [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
222 [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
223 [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
224 [CC_OP_CLR] = 0,
225 [CC_OP_POPCNT] = USES_CC_SRC,
226 };
227
228 static void set_cc_op(DisasContext *s, CCOp op)
229 {
230 int dead;
231
232 if (s->cc_op == op) {
233 return;
234 }
235
236 /* Discard CC computation that will no longer be used. */
237 dead = cc_op_live[s->cc_op] & ~cc_op_live[op];
238 if (dead & USES_CC_DST) {
239 tcg_gen_discard_tl(cpu_cc_dst);
240 }
241 if (dead & USES_CC_SRC) {
242 tcg_gen_discard_tl(cpu_cc_src);
243 }
244 if (dead & USES_CC_SRC2) {
245 tcg_gen_discard_tl(cpu_cc_src2);
246 }
247 if (dead & USES_CC_SRCT) {
248 tcg_gen_discard_tl(cpu_cc_srcT);
249 }
250
251 if (op == CC_OP_DYNAMIC) {
252 /* The DYNAMIC setting is translator only, and should never be
253 stored. Thus we always consider it clean. */
254 s->cc_op_dirty = false;
255 } else {
256 /* Discard any computed CC_OP value (see shifts). */
257 if (s->cc_op == CC_OP_DYNAMIC) {
258 tcg_gen_discard_i32(cpu_cc_op);
259 }
260 s->cc_op_dirty = true;
261 }
262 s->cc_op = op;
263 }
264
265 static void gen_update_cc_op(DisasContext *s)
266 {
267 if (s->cc_op_dirty) {
268 tcg_gen_movi_i32(cpu_cc_op, s->cc_op);
269 s->cc_op_dirty = false;
270 }
271 }
272
273 #ifdef TARGET_X86_64
274
275 #define NB_OP_SIZES 4
276
277 #else /* !TARGET_X86_64 */
278
279 #define NB_OP_SIZES 3
280
281 #endif /* !TARGET_X86_64 */
282
283 #if defined(HOST_WORDS_BIGENDIAN)
284 #define REG_B_OFFSET (sizeof(target_ulong) - 1)
285 #define REG_H_OFFSET (sizeof(target_ulong) - 2)
286 #define REG_W_OFFSET (sizeof(target_ulong) - 2)
287 #define REG_L_OFFSET (sizeof(target_ulong) - 4)
288 #define REG_LH_OFFSET (sizeof(target_ulong) - 8)
289 #else
290 #define REG_B_OFFSET 0
291 #define REG_H_OFFSET 1
292 #define REG_W_OFFSET 0
293 #define REG_L_OFFSET 0
294 #define REG_LH_OFFSET 4
295 #endif
296
297 /* In instruction encodings for byte register accesses the
298 * register number usually indicates "low 8 bits of register N";
299 * however there are some special cases where N 4..7 indicates
300 * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return
301 * true for this special case, false otherwise.
302 */
303 static inline bool byte_reg_is_xH(int reg)
304 {
305 if (reg < 4) {
306 return false;
307 }
308 #ifdef TARGET_X86_64
309 if (reg >= 8 || x86_64_hregs) {
310 return false;
311 }
312 #endif
313 return true;
314 }
315
316 /* Select the size of a push/pop operation. */
317 static inline TCGMemOp mo_pushpop(DisasContext *s, TCGMemOp ot)
318 {
319 if (CODE64(s)) {
320 return ot == MO_16 ? MO_16 : MO_64;
321 } else {
322 return ot;
323 }
324 }
325
326 /* Select the size of the stack pointer. */
327 static inline TCGMemOp mo_stacksize(DisasContext *s)
328 {
329 return CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16;
330 }
331
332 /* Select only size 64 else 32. Used for SSE operand sizes. */
333 static inline TCGMemOp mo_64_32(TCGMemOp ot)
334 {
335 #ifdef TARGET_X86_64
336 return ot == MO_64 ? MO_64 : MO_32;
337 #else
338 return MO_32;
339 #endif
340 }
341
342 /* Select size 8 if lsb of B is clear, else OT. Used for decoding
343 byte vs word opcodes. */
344 static inline TCGMemOp mo_b_d(int b, TCGMemOp ot)
345 {
346 return b & 1 ? ot : MO_8;
347 }
348
349 /* Select size 8 if lsb of B is clear, else OT capped at 32.
350 Used for decoding operand size of port opcodes. */
351 static inline TCGMemOp mo_b_d32(int b, TCGMemOp ot)
352 {
353 return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8;
354 }
355
356 static void gen_op_mov_reg_v(TCGMemOp ot, int reg, TCGv t0)
357 {
358 switch(ot) {
359 case MO_8:
360 if (!byte_reg_is_xH(reg)) {
361 tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
362 } else {
363 tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
364 }
365 break;
366 case MO_16:
367 tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 16);
368 break;
369 case MO_32:
370 /* For x86_64, this sets the higher half of register to zero.
371 For i386, this is equivalent to a mov. */
372 tcg_gen_ext32u_tl(cpu_regs[reg], t0);
373 break;
374 #ifdef TARGET_X86_64
375 case MO_64:
376 tcg_gen_mov_tl(cpu_regs[reg], t0);
377 break;
378 #endif
379 default:
380 tcg_abort();
381 }
382 }
383
384 static inline void gen_op_mov_v_reg(TCGMemOp ot, TCGv t0, int reg)
385 {
386 if (ot == MO_8 && byte_reg_is_xH(reg)) {
387 tcg_gen_extract_tl(t0, cpu_regs[reg - 4], 8, 8);
388 } else {
389 tcg_gen_mov_tl(t0, cpu_regs[reg]);
390 }
391 }
392
393 static void gen_add_A0_im(DisasContext *s, int val)
394 {
395 tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
396 if (!CODE64(s)) {
397 tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
398 }
399 }
400
401 static inline void gen_op_jmp_v(TCGv dest)
402 {
403 tcg_gen_st_tl(dest, cpu_env, offsetof(CPUX86State, eip));
404 }
405
406 static inline void gen_op_add_reg_im(TCGMemOp size, int reg, int32_t val)
407 {
408 tcg_gen_addi_tl(cpu_tmp0, cpu_regs[reg], val);
409 gen_op_mov_reg_v(size, reg, cpu_tmp0);
410 }
411
412 static inline void gen_op_add_reg_T0(TCGMemOp size, int reg)
413 {
414 tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T0);
415 gen_op_mov_reg_v(size, reg, cpu_tmp0);
416 }
417
418 static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
419 {
420 tcg_gen_qemu_ld_tl(t0, a0, s->mem_index, idx | MO_LE);
421 }
422
423 static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
424 {
425 tcg_gen_qemu_st_tl(t0, a0, s->mem_index, idx | MO_LE);
426 }
427
428 static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
429 {
430 if (d == OR_TMP0) {
431 gen_op_st_v(s, idx, cpu_T0, cpu_A0);
432 } else {
433 gen_op_mov_reg_v(idx, d, cpu_T0);
434 }
435 }
436
437 static inline void gen_jmp_im(target_ulong pc)
438 {
439 tcg_gen_movi_tl(cpu_tmp0, pc);
440 gen_op_jmp_v(cpu_tmp0);
441 }
442
443 /* Compute SEG:REG into A0. SEG is selected from the override segment
444 (OVR_SEG) and the default segment (DEF_SEG). OVR_SEG may be -1 to
445 indicate no override. */
446 static void gen_lea_v_seg(DisasContext *s, TCGMemOp aflag, TCGv a0,
447 int def_seg, int ovr_seg)
448 {
449 switch (aflag) {
450 #ifdef TARGET_X86_64
451 case MO_64:
452 if (ovr_seg < 0) {
453 tcg_gen_mov_tl(cpu_A0, a0);
454 return;
455 }
456 break;
457 #endif
458 case MO_32:
459 /* 32 bit address */
460 if (ovr_seg < 0 && s->addseg) {
461 ovr_seg = def_seg;
462 }
463 if (ovr_seg < 0) {
464 tcg_gen_ext32u_tl(cpu_A0, a0);
465 return;
466 }
467 break;
468 case MO_16:
469 /* 16 bit address */
470 tcg_gen_ext16u_tl(cpu_A0, a0);
471 a0 = cpu_A0;
472 if (ovr_seg < 0) {
473 if (s->addseg) {
474 ovr_seg = def_seg;
475 } else {
476 return;
477 }
478 }
479 break;
480 default:
481 tcg_abort();
482 }
483
484 if (ovr_seg >= 0) {
485 TCGv seg = cpu_seg_base[ovr_seg];
486
487 if (aflag == MO_64) {
488 tcg_gen_add_tl(cpu_A0, a0, seg);
489 } else if (CODE64(s)) {
490 tcg_gen_ext32u_tl(cpu_A0, a0);
491 tcg_gen_add_tl(cpu_A0, cpu_A0, seg);
492 } else {
493 tcg_gen_add_tl(cpu_A0, a0, seg);
494 tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
495 }
496 }
497 }
498
499 static inline void gen_string_movl_A0_ESI(DisasContext *s)
500 {
501 gen_lea_v_seg(s, s->aflag, cpu_regs[R_ESI], R_DS, s->override);
502 }
503
504 static inline void gen_string_movl_A0_EDI(DisasContext *s)
505 {
506 gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_ES, -1);
507 }
508
509 static inline void gen_op_movl_T0_Dshift(TCGMemOp ot)
510 {
511 tcg_gen_ld32s_tl(cpu_T0, cpu_env, offsetof(CPUX86State, df));
512 tcg_gen_shli_tl(cpu_T0, cpu_T0, ot);
513 };
514
515 static TCGv gen_ext_tl(TCGv dst, TCGv src, TCGMemOp size, bool sign)
516 {
517 switch (size) {
518 case MO_8:
519 if (sign) {
520 tcg_gen_ext8s_tl(dst, src);
521 } else {
522 tcg_gen_ext8u_tl(dst, src);
523 }
524 return dst;
525 case MO_16:
526 if (sign) {
527 tcg_gen_ext16s_tl(dst, src);
528 } else {
529 tcg_gen_ext16u_tl(dst, src);
530 }
531 return dst;
532 #ifdef TARGET_X86_64
533 case MO_32:
534 if (sign) {
535 tcg_gen_ext32s_tl(dst, src);
536 } else {
537 tcg_gen_ext32u_tl(dst, src);
538 }
539 return dst;
540 #endif
541 default:
542 return src;
543 }
544 }
545
546 static void gen_extu(TCGMemOp ot, TCGv reg)
547 {
548 gen_ext_tl(reg, reg, ot, false);
549 }
550
551 static void gen_exts(TCGMemOp ot, TCGv reg)
552 {
553 gen_ext_tl(reg, reg, ot, true);
554 }
555
556 static inline void gen_op_jnz_ecx(TCGMemOp size, TCGLabel *label1)
557 {
558 tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
559 gen_extu(size, cpu_tmp0);
560 tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, label1);
561 }
562
563 static inline void gen_op_jz_ecx(TCGMemOp size, TCGLabel *label1)
564 {
565 tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
566 gen_extu(size, cpu_tmp0);
567 tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
568 }
569
570 static void gen_helper_in_func(TCGMemOp ot, TCGv v, TCGv_i32 n)
571 {
572 switch (ot) {
573 case MO_8:
574 gen_helper_inb(v, cpu_env, n);
575 break;
576 case MO_16:
577 gen_helper_inw(v, cpu_env, n);
578 break;
579 case MO_32:
580 gen_helper_inl(v, cpu_env, n);
581 break;
582 default:
583 tcg_abort();
584 }
585 }
586
587 static void gen_helper_out_func(TCGMemOp ot, TCGv_i32 v, TCGv_i32 n)
588 {
589 switch (ot) {
590 case MO_8:
591 gen_helper_outb(cpu_env, v, n);
592 break;
593 case MO_16:
594 gen_helper_outw(cpu_env, v, n);
595 break;
596 case MO_32:
597 gen_helper_outl(cpu_env, v, n);
598 break;
599 default:
600 tcg_abort();
601 }
602 }
603
604 static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip,
605 uint32_t svm_flags)
606 {
607 target_ulong next_eip;
608
609 if (s->pe && (s->cpl > s->iopl || s->vm86)) {
610 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
611 switch (ot) {
612 case MO_8:
613 gen_helper_check_iob(cpu_env, cpu_tmp2_i32);
614 break;
615 case MO_16:
616 gen_helper_check_iow(cpu_env, cpu_tmp2_i32);
617 break;
618 case MO_32:
619 gen_helper_check_iol(cpu_env, cpu_tmp2_i32);
620 break;
621 default:
622 tcg_abort();
623 }
624 }
625 if(s->flags & HF_SVMI_MASK) {
626 gen_update_cc_op(s);
627 gen_jmp_im(cur_eip);
628 svm_flags |= (1 << (4 + ot));
629 next_eip = s->pc - s->cs_base;
630 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
631 gen_helper_svm_check_io(cpu_env, cpu_tmp2_i32,
632 tcg_const_i32(svm_flags),
633 tcg_const_i32(next_eip - cur_eip));
634 }
635 }
636
637 static inline void gen_movs(DisasContext *s, TCGMemOp ot)
638 {
639 gen_string_movl_A0_ESI(s);
640 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
641 gen_string_movl_A0_EDI(s);
642 gen_op_st_v(s, ot, cpu_T0, cpu_A0);
643 gen_op_movl_T0_Dshift(ot);
644 gen_op_add_reg_T0(s->aflag, R_ESI);
645 gen_op_add_reg_T0(s->aflag, R_EDI);
646 }
647
648 static void gen_op_update1_cc(void)
649 {
650 tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
651 }
652
653 static void gen_op_update2_cc(void)
654 {
655 tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
656 tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
657 }
658
659 static void gen_op_update3_cc(TCGv reg)
660 {
661 tcg_gen_mov_tl(cpu_cc_src2, reg);
662 tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
663 tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
664 }
665
666 static inline void gen_op_testl_T0_T1_cc(void)
667 {
668 tcg_gen_and_tl(cpu_cc_dst, cpu_T0, cpu_T1);
669 }
670
671 static void gen_op_update_neg_cc(void)
672 {
673 tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
674 tcg_gen_neg_tl(cpu_cc_src, cpu_T0);
675 tcg_gen_movi_tl(cpu_cc_srcT, 0);
676 }
677
678 /* compute all eflags to cc_src */
679 static void gen_compute_eflags(DisasContext *s)
680 {
681 TCGv zero, dst, src1, src2;
682 int live, dead;
683
684 if (s->cc_op == CC_OP_EFLAGS) {
685 return;
686 }
687 if (s->cc_op == CC_OP_CLR) {
688 tcg_gen_movi_tl(cpu_cc_src, CC_Z | CC_P);
689 set_cc_op(s, CC_OP_EFLAGS);
690 return;
691 }
692
693 TCGV_UNUSED(zero);
694 dst = cpu_cc_dst;
695 src1 = cpu_cc_src;
696 src2 = cpu_cc_src2;
697
698 /* Take care to not read values that are not live. */
699 live = cc_op_live[s->cc_op] & ~USES_CC_SRCT;
700 dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2);
701 if (dead) {
702 zero = tcg_const_tl(0);
703 if (dead & USES_CC_DST) {
704 dst = zero;
705 }
706 if (dead & USES_CC_SRC) {
707 src1 = zero;
708 }
709 if (dead & USES_CC_SRC2) {
710 src2 = zero;
711 }
712 }
713
714 gen_update_cc_op(s);
715 gen_helper_cc_compute_all(cpu_cc_src, dst, src1, src2, cpu_cc_op);
716 set_cc_op(s, CC_OP_EFLAGS);
717
718 if (dead) {
719 tcg_temp_free(zero);
720 }
721 }
722
723 typedef struct CCPrepare {
724 TCGCond cond;
725 TCGv reg;
726 TCGv reg2;
727 target_ulong imm;
728 target_ulong mask;
729 bool use_reg2;
730 bool no_setcond;
731 } CCPrepare;
732
733 /* compute eflags.C to reg */
734 static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
735 {
736 TCGv t0, t1;
737 int size, shift;
738
739 switch (s->cc_op) {
740 case CC_OP_SUBB ... CC_OP_SUBQ:
741 /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
742 size = s->cc_op - CC_OP_SUBB;
743 t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
744 /* If no temporary was used, be careful not to alias t1 and t0. */
745 t0 = TCGV_EQUAL(t1, cpu_cc_src) ? cpu_tmp0 : reg;
746 tcg_gen_mov_tl(t0, cpu_cc_srcT);
747 gen_extu(size, t0);
748 goto add_sub;
749
750 case CC_OP_ADDB ... CC_OP_ADDQ:
751 /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
752 size = s->cc_op - CC_OP_ADDB;
753 t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
754 t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
755 add_sub:
756 return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
757 .reg2 = t1, .mask = -1, .use_reg2 = true };
758
759 case CC_OP_LOGICB ... CC_OP_LOGICQ:
760 case CC_OP_CLR:
761 case CC_OP_POPCNT:
762 return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
763
764 case CC_OP_INCB ... CC_OP_INCQ:
765 case CC_OP_DECB ... CC_OP_DECQ:
766 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
767 .mask = -1, .no_setcond = true };
768
769 case CC_OP_SHLB ... CC_OP_SHLQ:
770 /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
771 size = s->cc_op - CC_OP_SHLB;
772 shift = (8 << size) - 1;
773 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
774 .mask = (target_ulong)1 << shift };
775
776 case CC_OP_MULB ... CC_OP_MULQ:
777 return (CCPrepare) { .cond = TCG_COND_NE,
778 .reg = cpu_cc_src, .mask = -1 };
779
780 case CC_OP_BMILGB ... CC_OP_BMILGQ:
781 size = s->cc_op - CC_OP_BMILGB;
782 t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
783 return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
784
785 case CC_OP_ADCX:
786 case CC_OP_ADCOX:
787 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
788 .mask = -1, .no_setcond = true };
789
790 case CC_OP_EFLAGS:
791 case CC_OP_SARB ... CC_OP_SARQ:
792 /* CC_SRC & 1 */
793 return (CCPrepare) { .cond = TCG_COND_NE,
794 .reg = cpu_cc_src, .mask = CC_C };
795
796 default:
797 /* The need to compute only C from CC_OP_DYNAMIC is important
798 in efficiently implementing e.g. INC at the start of a TB. */
799 gen_update_cc_op(s);
800 gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src,
801 cpu_cc_src2, cpu_cc_op);
802 return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
803 .mask = -1, .no_setcond = true };
804 }
805 }
806
807 /* compute eflags.P to reg */
808 static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg)
809 {
810 gen_compute_eflags(s);
811 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
812 .mask = CC_P };
813 }
814
815 /* compute eflags.S to reg */
816 static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
817 {
818 switch (s->cc_op) {
819 case CC_OP_DYNAMIC:
820 gen_compute_eflags(s);
821 /* FALLTHRU */
822 case CC_OP_EFLAGS:
823 case CC_OP_ADCX:
824 case CC_OP_ADOX:
825 case CC_OP_ADCOX:
826 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
827 .mask = CC_S };
828 case CC_OP_CLR:
829 case CC_OP_POPCNT:
830 return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
831 default:
832 {
833 TCGMemOp size = (s->cc_op - CC_OP_ADDB) & 3;
834 TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true);
835 return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 };
836 }
837 }
838 }
839
840 /* compute eflags.O to reg */
841 static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
842 {
843 switch (s->cc_op) {
844 case CC_OP_ADOX:
845 case CC_OP_ADCOX:
846 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
847 .mask = -1, .no_setcond = true };
848 case CC_OP_CLR:
849 case CC_OP_POPCNT:
850 return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
851 default:
852 gen_compute_eflags(s);
853 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
854 .mask = CC_O };
855 }
856 }
857
858 /* compute eflags.Z to reg */
859 static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
860 {
861 switch (s->cc_op) {
862 case CC_OP_DYNAMIC:
863 gen_compute_eflags(s);
864 /* FALLTHRU */
865 case CC_OP_EFLAGS:
866 case CC_OP_ADCX:
867 case CC_OP_ADOX:
868 case CC_OP_ADCOX:
869 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
870 .mask = CC_Z };
871 case CC_OP_CLR:
872 return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
873 case CC_OP_POPCNT:
874 return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_src,
875 .mask = -1 };
876 default:
877 {
878 TCGMemOp size = (s->cc_op - CC_OP_ADDB) & 3;
879 TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
880 return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
881 }
882 }
883 }
884
885 /* perform a conditional store into register 'reg' according to jump opcode
886 value 'b'. In the fast case, T0 is guaranted not to be used. */
887 static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
888 {
889 int inv, jcc_op, cond;
890 TCGMemOp size;
891 CCPrepare cc;
892 TCGv t0;
893
894 inv = b & 1;
895 jcc_op = (b >> 1) & 7;
896
897 switch (s->cc_op) {
898 case CC_OP_SUBB ... CC_OP_SUBQ:
899 /* We optimize relational operators for the cmp/jcc case. */
900 size = s->cc_op - CC_OP_SUBB;
901 switch (jcc_op) {
902 case JCC_BE:
903 tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT);
904 gen_extu(size, cpu_tmp4);
905 t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
906 cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = cpu_tmp4,
907 .reg2 = t0, .mask = -1, .use_reg2 = true };
908 break;
909
910 case JCC_L:
911 cond = TCG_COND_LT;
912 goto fast_jcc_l;
913 case JCC_LE:
914 cond = TCG_COND_LE;
915 fast_jcc_l:
916 tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT);
917 gen_exts(size, cpu_tmp4);
918 t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, true);
919 cc = (CCPrepare) { .cond = cond, .reg = cpu_tmp4,
920 .reg2 = t0, .mask = -1, .use_reg2 = true };
921 break;
922
923 default:
924 goto slow_jcc;
925 }
926 break;
927
928 default:
929 slow_jcc:
930 /* This actually generates good code for JC, JZ and JS. */
931 switch (jcc_op) {
932 case JCC_O:
933 cc = gen_prepare_eflags_o(s, reg);
934 break;
935 case JCC_B:
936 cc = gen_prepare_eflags_c(s, reg);
937 break;
938 case JCC_Z:
939 cc = gen_prepare_eflags_z(s, reg);
940 break;
941 case JCC_BE:
942 gen_compute_eflags(s);
943 cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
944 .mask = CC_Z | CC_C };
945 break;
946 case JCC_S:
947 cc = gen_prepare_eflags_s(s, reg);
948 break;
949 case JCC_P:
950 cc = gen_prepare_eflags_p(s, reg);
951 break;
952 case JCC_L:
953 gen_compute_eflags(s);
954 if (TCGV_EQUAL(reg, cpu_cc_src)) {
955 reg = cpu_tmp0;
956 }
957 tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
958 tcg_gen_xor_tl(reg, reg, cpu_cc_src);
959 cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
960 .mask = CC_S };
961 break;
962 default:
963 case JCC_LE:
964 gen_compute_eflags(s);
965 if (TCGV_EQUAL(reg, cpu_cc_src)) {
966 reg = cpu_tmp0;
967 }
968 tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
969 tcg_gen_xor_tl(reg, reg, cpu_cc_src);
970 cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
971 .mask = CC_S | CC_Z };
972 break;
973 }
974 break;
975 }
976
977 if (inv) {
978 cc.cond = tcg_invert_cond(cc.cond);
979 }
980 return cc;
981 }
982
983 static void gen_setcc1(DisasContext *s, int b, TCGv reg)
984 {
985 CCPrepare cc = gen_prepare_cc(s, b, reg);
986
987 if (cc.no_setcond) {
988 if (cc.cond == TCG_COND_EQ) {
989 tcg_gen_xori_tl(reg, cc.reg, 1);
990 } else {
991 tcg_gen_mov_tl(reg, cc.reg);
992 }
993 return;
994 }
995
996 if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 &&
997 cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) {
998 tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask));
999 tcg_gen_andi_tl(reg, reg, 1);
1000 return;
1001 }
1002 if (cc.mask != -1) {
1003 tcg_gen_andi_tl(reg, cc.reg, cc.mask);
1004 cc.reg = reg;
1005 }
1006 if (cc.use_reg2) {
1007 tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2);
1008 } else {
1009 tcg_gen_setcondi_tl(cc.cond, reg, cc.reg, cc.imm);
1010 }
1011 }
1012
1013 static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
1014 {
1015 gen_setcc1(s, JCC_B << 1, reg);
1016 }
1017
1018 /* generate a conditional jump to label 'l1' according to jump opcode
1019 value 'b'. In the fast case, T0 is guaranted not to be used. */
1020 static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
1021 {
1022 CCPrepare cc = gen_prepare_cc(s, b, cpu_T0);
1023
1024 if (cc.mask != -1) {
1025 tcg_gen_andi_tl(cpu_T0, cc.reg, cc.mask);
1026 cc.reg = cpu_T0;
1027 }
1028 if (cc.use_reg2) {
1029 tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1030 } else {
1031 tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1032 }
1033 }
1034
1035 /* Generate a conditional jump to label 'l1' according to jump opcode
1036 value 'b'. In the fast case, T0 is guaranted not to be used.
1037 A translation block must end soon. */
1038 static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
1039 {
1040 CCPrepare cc = gen_prepare_cc(s, b, cpu_T0);
1041
1042 gen_update_cc_op(s);
1043 if (cc.mask != -1) {
1044 tcg_gen_andi_tl(cpu_T0, cc.reg, cc.mask);
1045 cc.reg = cpu_T0;
1046 }
1047 set_cc_op(s, CC_OP_DYNAMIC);
1048 if (cc.use_reg2) {
1049 tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1050 } else {
1051 tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1052 }
1053 }
1054
1055 /* XXX: does not work with gdbstub "ice" single step - not a
1056 serious problem */
1057 static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
1058 {
1059 TCGLabel *l1 = gen_new_label();
1060 TCGLabel *l2 = gen_new_label();
1061 gen_op_jnz_ecx(s->aflag, l1);
1062 gen_set_label(l2);
1063 gen_jmp_tb(s, next_eip, 1);
1064 gen_set_label(l1);
1065 return l2;
1066 }
1067
1068 static inline void gen_stos(DisasContext *s, TCGMemOp ot)
1069 {
1070 gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
1071 gen_string_movl_A0_EDI(s);
1072 gen_op_st_v(s, ot, cpu_T0, cpu_A0);
1073 gen_op_movl_T0_Dshift(ot);
1074 gen_op_add_reg_T0(s->aflag, R_EDI);
1075 }
1076
1077 static inline void gen_lods(DisasContext *s, TCGMemOp ot)
1078 {
1079 gen_string_movl_A0_ESI(s);
1080 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1081 gen_op_mov_reg_v(ot, R_EAX, cpu_T0);
1082 gen_op_movl_T0_Dshift(ot);
1083 gen_op_add_reg_T0(s->aflag, R_ESI);
1084 }
1085
1086 static inline void gen_scas(DisasContext *s, TCGMemOp ot)
1087 {
1088 gen_string_movl_A0_EDI(s);
1089 gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
1090 gen_op(s, OP_CMPL, ot, R_EAX);
1091 gen_op_movl_T0_Dshift(ot);
1092 gen_op_add_reg_T0(s->aflag, R_EDI);
1093 }
1094
1095 static inline void gen_cmps(DisasContext *s, TCGMemOp ot)
1096 {
1097 gen_string_movl_A0_EDI(s);
1098 gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
1099 gen_string_movl_A0_ESI(s);
1100 gen_op(s, OP_CMPL, ot, OR_TMP0);
1101 gen_op_movl_T0_Dshift(ot);
1102 gen_op_add_reg_T0(s->aflag, R_ESI);
1103 gen_op_add_reg_T0(s->aflag, R_EDI);
1104 }
1105
1106 static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
1107 {
1108 if (s->flags & HF_IOBPT_MASK) {
1109 TCGv_i32 t_size = tcg_const_i32(1 << ot);
1110 TCGv t_next = tcg_const_tl(s->pc - s->cs_base);
1111
1112 gen_helper_bpt_io(cpu_env, t_port, t_size, t_next);
1113 tcg_temp_free_i32(t_size);
1114 tcg_temp_free(t_next);
1115 }
1116 }
1117
1118
1119 static inline void gen_ins(DisasContext *s, TCGMemOp ot)
1120 {
1121 if (s->tb->cflags & CF_USE_ICOUNT) {
1122 gen_io_start();
1123 }
1124 gen_string_movl_A0_EDI(s);
1125 /* Note: we must do this dummy write first to be restartable in
1126 case of page fault. */
1127 tcg_gen_movi_tl(cpu_T0, 0);
1128 gen_op_st_v(s, ot, cpu_T0, cpu_A0);
1129 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
1130 tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
1131 gen_helper_in_func(ot, cpu_T0, cpu_tmp2_i32);
1132 gen_op_st_v(s, ot, cpu_T0, cpu_A0);
1133 gen_op_movl_T0_Dshift(ot);
1134 gen_op_add_reg_T0(s->aflag, R_EDI);
1135 gen_bpt_io(s, cpu_tmp2_i32, ot);
1136 if (s->tb->cflags & CF_USE_ICOUNT) {
1137 gen_io_end();
1138 }
1139 }
1140
1141 static inline void gen_outs(DisasContext *s, TCGMemOp ot)
1142 {
1143 if (s->tb->cflags & CF_USE_ICOUNT) {
1144 gen_io_start();
1145 }
1146 gen_string_movl_A0_ESI(s);
1147 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1148
1149 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
1150 tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
1151 tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T0);
1152 gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
1153 gen_op_movl_T0_Dshift(ot);
1154 gen_op_add_reg_T0(s->aflag, R_ESI);
1155 gen_bpt_io(s, cpu_tmp2_i32, ot);
1156 if (s->tb->cflags & CF_USE_ICOUNT) {
1157 gen_io_end();
1158 }
1159 }
1160
1161 /* same method as Valgrind : we generate jumps to current or next
1162 instruction */
1163 #define GEN_REPZ(op) \
1164 static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot, \
1165 target_ulong cur_eip, target_ulong next_eip) \
1166 { \
1167 TCGLabel *l2; \
1168 gen_update_cc_op(s); \
1169 l2 = gen_jz_ecx_string(s, next_eip); \
1170 gen_ ## op(s, ot); \
1171 gen_op_add_reg_im(s->aflag, R_ECX, -1); \
1172 /* a loop would cause two single step exceptions if ECX = 1 \
1173 before rep string_insn */ \
1174 if (s->repz_opt) \
1175 gen_op_jz_ecx(s->aflag, l2); \
1176 gen_jmp(s, cur_eip); \
1177 }
1178
1179 #define GEN_REPZ2(op) \
1180 static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot, \
1181 target_ulong cur_eip, \
1182 target_ulong next_eip, \
1183 int nz) \
1184 { \
1185 TCGLabel *l2; \
1186 gen_update_cc_op(s); \
1187 l2 = gen_jz_ecx_string(s, next_eip); \
1188 gen_ ## op(s, ot); \
1189 gen_op_add_reg_im(s->aflag, R_ECX, -1); \
1190 gen_update_cc_op(s); \
1191 gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2); \
1192 if (s->repz_opt) \
1193 gen_op_jz_ecx(s->aflag, l2); \
1194 gen_jmp(s, cur_eip); \
1195 }
1196
1197 GEN_REPZ(movs)
1198 GEN_REPZ(stos)
1199 GEN_REPZ(lods)
1200 GEN_REPZ(ins)
1201 GEN_REPZ(outs)
1202 GEN_REPZ2(scas)
1203 GEN_REPZ2(cmps)
1204
1205 static void gen_helper_fp_arith_ST0_FT0(int op)
1206 {
1207 switch (op) {
1208 case 0:
1209 gen_helper_fadd_ST0_FT0(cpu_env);
1210 break;
1211 case 1:
1212 gen_helper_fmul_ST0_FT0(cpu_env);
1213 break;
1214 case 2:
1215 gen_helper_fcom_ST0_FT0(cpu_env);
1216 break;
1217 case 3:
1218 gen_helper_fcom_ST0_FT0(cpu_env);
1219 break;
1220 case 4:
1221 gen_helper_fsub_ST0_FT0(cpu_env);
1222 break;
1223 case 5:
1224 gen_helper_fsubr_ST0_FT0(cpu_env);
1225 break;
1226 case 6:
1227 gen_helper_fdiv_ST0_FT0(cpu_env);
1228 break;
1229 case 7:
1230 gen_helper_fdivr_ST0_FT0(cpu_env);
1231 break;
1232 }
1233 }
1234
1235 /* NOTE the exception in "r" op ordering */
1236 static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
1237 {
1238 TCGv_i32 tmp = tcg_const_i32(opreg);
1239 switch (op) {
1240 case 0:
1241 gen_helper_fadd_STN_ST0(cpu_env, tmp);
1242 break;
1243 case 1:
1244 gen_helper_fmul_STN_ST0(cpu_env, tmp);
1245 break;
1246 case 4:
1247 gen_helper_fsubr_STN_ST0(cpu_env, tmp);
1248 break;
1249 case 5:
1250 gen_helper_fsub_STN_ST0(cpu_env, tmp);
1251 break;
1252 case 6:
1253 gen_helper_fdivr_STN_ST0(cpu_env, tmp);
1254 break;
1255 case 7:
1256 gen_helper_fdiv_STN_ST0(cpu_env, tmp);
1257 break;
1258 }
1259 }
1260
1261 /* if d == OR_TMP0, it means memory operand (address in A0) */
1262 static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
1263 {
1264 if (d != OR_TMP0) {
1265 gen_op_mov_v_reg(ot, cpu_T0, d);
1266 } else if (!(s1->prefix & PREFIX_LOCK)) {
1267 gen_op_ld_v(s1, ot, cpu_T0, cpu_A0);
1268 }
1269 switch(op) {
1270 case OP_ADCL:
1271 gen_compute_eflags_c(s1, cpu_tmp4);
1272 if (s1->prefix & PREFIX_LOCK) {
1273 tcg_gen_add_tl(cpu_T0, cpu_tmp4, cpu_T1);
1274 tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
1275 s1->mem_index, ot | MO_LE);
1276 } else {
1277 tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
1278 tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_tmp4);
1279 gen_op_st_rm_T0_A0(s1, ot, d);
1280 }
1281 gen_op_update3_cc(cpu_tmp4);
1282 set_cc_op(s1, CC_OP_ADCB + ot);
1283 break;
1284 case OP_SBBL:
1285 gen_compute_eflags_c(s1, cpu_tmp4);
1286 if (s1->prefix & PREFIX_LOCK) {
1287 tcg_gen_add_tl(cpu_T0, cpu_T1, cpu_tmp4);
1288 tcg_gen_neg_tl(cpu_T0, cpu_T0);
1289 tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
1290 s1->mem_index, ot | MO_LE);
1291 } else {
1292 tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
1293 tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_tmp4);
1294 gen_op_st_rm_T0_A0(s1, ot, d);
1295 }
1296 gen_op_update3_cc(cpu_tmp4);
1297 set_cc_op(s1, CC_OP_SBBB + ot);
1298 break;
1299 case OP_ADDL:
1300 if (s1->prefix & PREFIX_LOCK) {
1301 tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
1302 s1->mem_index, ot | MO_LE);
1303 } else {
1304 tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
1305 gen_op_st_rm_T0_A0(s1, ot, d);
1306 }
1307 gen_op_update2_cc();
1308 set_cc_op(s1, CC_OP_ADDB + ot);
1309 break;
1310 case OP_SUBL:
1311 if (s1->prefix & PREFIX_LOCK) {
1312 tcg_gen_neg_tl(cpu_T0, cpu_T1);
1313 tcg_gen_atomic_fetch_add_tl(cpu_cc_srcT, cpu_A0, cpu_T0,
1314 s1->mem_index, ot | MO_LE);
1315 tcg_gen_sub_tl(cpu_T0, cpu_cc_srcT, cpu_T1);
1316 } else {
1317 tcg_gen_mov_tl(cpu_cc_srcT, cpu_T0);
1318 tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
1319 gen_op_st_rm_T0_A0(s1, ot, d);
1320 }
1321 gen_op_update2_cc();
1322 set_cc_op(s1, CC_OP_SUBB + ot);
1323 break;
1324 default:
1325 case OP_ANDL:
1326 if (s1->prefix & PREFIX_LOCK) {
1327 tcg_gen_atomic_and_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
1328 s1->mem_index, ot | MO_LE);
1329 } else {
1330 tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
1331 gen_op_st_rm_T0_A0(s1, ot, d);
1332 }
1333 gen_op_update1_cc();
1334 set_cc_op(s1, CC_OP_LOGICB + ot);
1335 break;
1336 case OP_ORL:
1337 if (s1->prefix & PREFIX_LOCK) {
1338 tcg_gen_atomic_or_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
1339 s1->mem_index, ot | MO_LE);
1340 } else {
1341 tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
1342 gen_op_st_rm_T0_A0(s1, ot, d);
1343 }
1344 gen_op_update1_cc();
1345 set_cc_op(s1, CC_OP_LOGICB + ot);
1346 break;
1347 case OP_XORL:
1348 if (s1->prefix & PREFIX_LOCK) {
1349 tcg_gen_atomic_xor_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
1350 s1->mem_index, ot | MO_LE);
1351 } else {
1352 tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
1353 gen_op_st_rm_T0_A0(s1, ot, d);
1354 }
1355 gen_op_update1_cc();
1356 set_cc_op(s1, CC_OP_LOGICB + ot);
1357 break;
1358 case OP_CMPL:
1359 tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
1360 tcg_gen_mov_tl(cpu_cc_srcT, cpu_T0);
1361 tcg_gen_sub_tl(cpu_cc_dst, cpu_T0, cpu_T1);
1362 set_cc_op(s1, CC_OP_SUBB + ot);
1363 break;
1364 }
1365 }
1366
1367 /* if d == OR_TMP0, it means memory operand (address in A0) */
1368 static void gen_inc(DisasContext *s1, TCGMemOp ot, int d, int c)
1369 {
1370 if (s1->prefix & PREFIX_LOCK) {
1371 tcg_gen_movi_tl(cpu_T0, c > 0 ? 1 : -1);
1372 tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
1373 s1->mem_index, ot | MO_LE);
1374 } else {
1375 if (d != OR_TMP0) {
1376 gen_op_mov_v_reg(ot, cpu_T0, d);
1377 } else {
1378 gen_op_ld_v(s1, ot, cpu_T0, cpu_A0);
1379 }
1380 tcg_gen_addi_tl(cpu_T0, cpu_T0, (c > 0 ? 1 : -1));
1381 gen_op_st_rm_T0_A0(s1, ot, d);
1382 }
1383
1384 gen_compute_eflags_c(s1, cpu_cc_src);
1385 tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
1386 set_cc_op(s1, (c > 0 ? CC_OP_INCB : CC_OP_DECB) + ot);
1387 }
1388
1389 static void gen_shift_flags(DisasContext *s, TCGMemOp ot, TCGv result,
1390 TCGv shm1, TCGv count, bool is_right)
1391 {
1392 TCGv_i32 z32, s32, oldop;
1393 TCGv z_tl;
1394
1395 /* Store the results into the CC variables. If we know that the
1396 variable must be dead, store unconditionally. Otherwise we'll
1397 need to not disrupt the current contents. */
1398 z_tl = tcg_const_tl(0);
1399 if (cc_op_live[s->cc_op] & USES_CC_DST) {
1400 tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl,
1401 result, cpu_cc_dst);
1402 } else {
1403 tcg_gen_mov_tl(cpu_cc_dst, result);
1404 }
1405 if (cc_op_live[s->cc_op] & USES_CC_SRC) {
1406 tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl,
1407 shm1, cpu_cc_src);
1408 } else {
1409 tcg_gen_mov_tl(cpu_cc_src, shm1);
1410 }
1411 tcg_temp_free(z_tl);
1412
1413 /* Get the two potential CC_OP values into temporaries. */
1414 tcg_gen_movi_i32(cpu_tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1415 if (s->cc_op == CC_OP_DYNAMIC) {
1416 oldop = cpu_cc_op;
1417 } else {
1418 tcg_gen_movi_i32(cpu_tmp3_i32, s->cc_op);
1419 oldop = cpu_tmp3_i32;
1420 }
1421
1422 /* Conditionally store the CC_OP value. */
1423 z32 = tcg_const_i32(0);
1424 s32 = tcg_temp_new_i32();
1425 tcg_gen_trunc_tl_i32(s32, count);
1426 tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, cpu_tmp2_i32, oldop);
1427 tcg_temp_free_i32(z32);
1428 tcg_temp_free_i32(s32);
1429
1430 /* The CC_OP value is no longer predictable. */
1431 set_cc_op(s, CC_OP_DYNAMIC);
1432 }
1433
1434 static void gen_shift_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1435 int is_right, int is_arith)
1436 {
1437 target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1438
1439 /* load */
1440 if (op1 == OR_TMP0) {
1441 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1442 } else {
1443 gen_op_mov_v_reg(ot, cpu_T0, op1);
1444 }
1445
1446 tcg_gen_andi_tl(cpu_T1, cpu_T1, mask);
1447 tcg_gen_subi_tl(cpu_tmp0, cpu_T1, 1);
1448
1449 if (is_right) {
1450 if (is_arith) {
1451 gen_exts(ot, cpu_T0);
1452 tcg_gen_sar_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1453 tcg_gen_sar_tl(cpu_T0, cpu_T0, cpu_T1);
1454 } else {
1455 gen_extu(ot, cpu_T0);
1456 tcg_gen_shr_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1457 tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_T1);
1458 }
1459 } else {
1460 tcg_gen_shl_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1461 tcg_gen_shl_tl(cpu_T0, cpu_T0, cpu_T1);
1462 }
1463
1464 /* store */
1465 gen_op_st_rm_T0_A0(s, ot, op1);
1466
1467 gen_shift_flags(s, ot, cpu_T0, cpu_tmp0, cpu_T1, is_right);
1468 }
1469
1470 static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
1471 int is_right, int is_arith)
1472 {
1473 int mask = (ot == MO_64 ? 0x3f : 0x1f);
1474
1475 /* load */
1476 if (op1 == OR_TMP0)
1477 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1478 else
1479 gen_op_mov_v_reg(ot, cpu_T0, op1);
1480
1481 op2 &= mask;
1482 if (op2 != 0) {
1483 if (is_right) {
1484 if (is_arith) {
1485 gen_exts(ot, cpu_T0);
1486 tcg_gen_sari_tl(cpu_tmp4, cpu_T0, op2 - 1);
1487 tcg_gen_sari_tl(cpu_T0, cpu_T0, op2);
1488 } else {
1489 gen_extu(ot, cpu_T0);
1490 tcg_gen_shri_tl(cpu_tmp4, cpu_T0, op2 - 1);
1491 tcg_gen_shri_tl(cpu_T0, cpu_T0, op2);
1492 }
1493 } else {
1494 tcg_gen_shli_tl(cpu_tmp4, cpu_T0, op2 - 1);
1495 tcg_gen_shli_tl(cpu_T0, cpu_T0, op2);
1496 }
1497 }
1498
1499 /* store */
1500 gen_op_st_rm_T0_A0(s, ot, op1);
1501
1502 /* update eflags if non zero shift */
1503 if (op2 != 0) {
1504 tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
1505 tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
1506 set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1507 }
1508 }
1509
1510 static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
1511 {
1512 target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1513 TCGv_i32 t0, t1;
1514
1515 /* load */
1516 if (op1 == OR_TMP0) {
1517 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1518 } else {
1519 gen_op_mov_v_reg(ot, cpu_T0, op1);
1520 }
1521
1522 tcg_gen_andi_tl(cpu_T1, cpu_T1, mask);
1523
1524 switch (ot) {
1525 case MO_8:
1526 /* Replicate the 8-bit input so that a 32-bit rotate works. */
1527 tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
1528 tcg_gen_muli_tl(cpu_T0, cpu_T0, 0x01010101);
1529 goto do_long;
1530 case MO_16:
1531 /* Replicate the 16-bit input so that a 32-bit rotate works. */
1532 tcg_gen_deposit_tl(cpu_T0, cpu_T0, cpu_T0, 16, 16);
1533 goto do_long;
1534 do_long:
1535 #ifdef TARGET_X86_64
1536 case MO_32:
1537 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
1538 tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
1539 if (is_right) {
1540 tcg_gen_rotr_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
1541 } else {
1542 tcg_gen_rotl_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
1543 }
1544 tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
1545 break;
1546 #endif
1547 default:
1548 if (is_right) {
1549 tcg_gen_rotr_tl(cpu_T0, cpu_T0, cpu_T1);
1550 } else {
1551 tcg_gen_rotl_tl(cpu_T0, cpu_T0, cpu_T1);
1552 }
1553 break;
1554 }
1555
1556 /* store */
1557 gen_op_st_rm_T0_A0(s, ot, op1);
1558
1559 /* We'll need the flags computed into CC_SRC. */
1560 gen_compute_eflags(s);
1561
1562 /* The value that was "rotated out" is now present at the other end
1563 of the word. Compute C into CC_DST and O into CC_SRC2. Note that
1564 since we've computed the flags into CC_SRC, these variables are
1565 currently dead. */
1566 if (is_right) {
1567 tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask - 1);
1568 tcg_gen_shri_tl(cpu_cc_dst, cpu_T0, mask);
1569 tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1570 } else {
1571 tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask);
1572 tcg_gen_andi_tl(cpu_cc_dst, cpu_T0, 1);
1573 }
1574 tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1575 tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1576
1577 /* Now conditionally store the new CC_OP value. If the shift count
1578 is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live.
1579 Otherwise reuse CC_OP_ADCOX which have the C and O flags split out
1580 exactly as we computed above. */
1581 t0 = tcg_const_i32(0);
1582 t1 = tcg_temp_new_i32();
1583 tcg_gen_trunc_tl_i32(t1, cpu_T1);
1584 tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX);
1585 tcg_gen_movi_i32(cpu_tmp3_i32, CC_OP_EFLAGS);
1586 tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
1587 cpu_tmp2_i32, cpu_tmp3_i32);
1588 tcg_temp_free_i32(t0);
1589 tcg_temp_free_i32(t1);
1590
1591 /* The CC_OP value is no longer predictable. */
1592 set_cc_op(s, CC_OP_DYNAMIC);
1593 }
1594
1595 static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
1596 int is_right)
1597 {
1598 int mask = (ot == MO_64 ? 0x3f : 0x1f);
1599 int shift;
1600
1601 /* load */
1602 if (op1 == OR_TMP0) {
1603 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1604 } else {
1605 gen_op_mov_v_reg(ot, cpu_T0, op1);
1606 }
1607
1608 op2 &= mask;
1609 if (op2 != 0) {
1610 switch (ot) {
1611 #ifdef TARGET_X86_64
1612 case MO_32:
1613 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
1614 if (is_right) {
1615 tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
1616 } else {
1617 tcg_gen_rotli_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
1618 }
1619 tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
1620 break;
1621 #endif
1622 default:
1623 if (is_right) {
1624 tcg_gen_rotri_tl(cpu_T0, cpu_T0, op2);
1625 } else {
1626 tcg_gen_rotli_tl(cpu_T0, cpu_T0, op2);
1627 }
1628 break;
1629 case MO_8:
1630 mask = 7;
1631 goto do_shifts;
1632 case MO_16:
1633 mask = 15;
1634 do_shifts:
1635 shift = op2 & mask;
1636 if (is_right) {
1637 shift = mask + 1 - shift;
1638 }
1639 gen_extu(ot, cpu_T0);
1640 tcg_gen_shli_tl(cpu_tmp0, cpu_T0, shift);
1641 tcg_gen_shri_tl(cpu_T0, cpu_T0, mask + 1 - shift);
1642 tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_tmp0);
1643 break;
1644 }
1645 }
1646
1647 /* store */
1648 gen_op_st_rm_T0_A0(s, ot, op1);
1649
1650 if (op2 != 0) {
1651 /* Compute the flags into CC_SRC. */
1652 gen_compute_eflags(s);
1653
1654 /* The value that was "rotated out" is now present at the other end
1655 of the word. Compute C into CC_DST and O into CC_SRC2. Note that
1656 since we've computed the flags into CC_SRC, these variables are
1657 currently dead. */
1658 if (is_right) {
1659 tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask - 1);
1660 tcg_gen_shri_tl(cpu_cc_dst, cpu_T0, mask);
1661 tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1662 } else {
1663 tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask);
1664 tcg_gen_andi_tl(cpu_cc_dst, cpu_T0, 1);
1665 }
1666 tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1667 tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1668 set_cc_op(s, CC_OP_ADCOX);
1669 }
1670 }
1671
1672 /* XXX: add faster immediate = 1 case */
1673 static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1674 int is_right)
1675 {
1676 gen_compute_eflags(s);
1677 assert(s->cc_op == CC_OP_EFLAGS);
1678
1679 /* load */
1680 if (op1 == OR_TMP0)
1681 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1682 else
1683 gen_op_mov_v_reg(ot, cpu_T0, op1);
1684
1685 if (is_right) {
1686 switch (ot) {
1687 case MO_8:
1688 gen_helper_rcrb(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1689 break;
1690 case MO_16:
1691 gen_helper_rcrw(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1692 break;
1693 case MO_32:
1694 gen_helper_rcrl(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1695 break;
1696 #ifdef TARGET_X86_64
1697 case MO_64:
1698 gen_helper_rcrq(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1699 break;
1700 #endif
1701 default:
1702 tcg_abort();
1703 }
1704 } else {
1705 switch (ot) {
1706 case MO_8:
1707 gen_helper_rclb(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1708 break;
1709 case MO_16:
1710 gen_helper_rclw(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1711 break;
1712 case MO_32:
1713 gen_helper_rcll(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1714 break;
1715 #ifdef TARGET_X86_64
1716 case MO_64:
1717 gen_helper_rclq(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1718 break;
1719 #endif
1720 default:
1721 tcg_abort();
1722 }
1723 }
1724 /* store */
1725 gen_op_st_rm_T0_A0(s, ot, op1);
1726 }
1727
1728 /* XXX: add faster immediate case */
1729 static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1730 bool is_right, TCGv count_in)
1731 {
1732 target_ulong mask = (ot == MO_64 ? 63 : 31);
1733 TCGv count;
1734
1735 /* load */
1736 if (op1 == OR_TMP0) {
1737 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1738 } else {
1739 gen_op_mov_v_reg(ot, cpu_T0, op1);
1740 }
1741
1742 count = tcg_temp_new();
1743 tcg_gen_andi_tl(count, count_in, mask);
1744
1745 switch (ot) {
1746 case MO_16:
1747 /* Note: we implement the Intel behaviour for shift count > 16.
1748 This means "shrdw C, B, A" shifts A:B:A >> C. Build the B:A
1749 portion by constructing it as a 32-bit value. */
1750 if (is_right) {
1751 tcg_gen_deposit_tl(cpu_tmp0, cpu_T0, cpu_T1, 16, 16);
1752 tcg_gen_mov_tl(cpu_T1, cpu_T0);
1753 tcg_gen_mov_tl(cpu_T0, cpu_tmp0);
1754 } else {
1755 tcg_gen_deposit_tl(cpu_T1, cpu_T0, cpu_T1, 16, 16);
1756 }
1757 /* FALLTHRU */
1758 #ifdef TARGET_X86_64
1759 case MO_32:
1760 /* Concatenate the two 32-bit values and use a 64-bit shift. */
1761 tcg_gen_subi_tl(cpu_tmp0, count, 1);
1762 if (is_right) {
1763 tcg_gen_concat_tl_i64(cpu_T0, cpu_T0, cpu_T1);
1764 tcg_gen_shr_i64(cpu_tmp0, cpu_T0, cpu_tmp0);
1765 tcg_gen_shr_i64(cpu_T0, cpu_T0, count);
1766 } else {
1767 tcg_gen_concat_tl_i64(cpu_T0, cpu_T1, cpu_T0);
1768 tcg_gen_shl_i64(cpu_tmp0, cpu_T0, cpu_tmp0);
1769 tcg_gen_shl_i64(cpu_T0, cpu_T0, count);
1770 tcg_gen_shri_i64(cpu_tmp0, cpu_tmp0, 32);
1771 tcg_gen_shri_i64(cpu_T0, cpu_T0, 32);
1772 }
1773 break;
1774 #endif
1775 default:
1776 tcg_gen_subi_tl(cpu_tmp0, count, 1);
1777 if (is_right) {
1778 tcg_gen_shr_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1779
1780 tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
1781 tcg_gen_shr_tl(cpu_T0, cpu_T0, count);
1782 tcg_gen_shl_tl(cpu_T1, cpu_T1, cpu_tmp4);
1783 } else {
1784 tcg_gen_shl_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1785 if (ot == MO_16) {
1786 /* Only needed if count > 16, for Intel behaviour. */
1787 tcg_gen_subfi_tl(cpu_tmp4, 33, count);
1788 tcg_gen_shr_tl(cpu_tmp4, cpu_T1, cpu_tmp4);
1789 tcg_gen_or_tl(cpu_tmp0, cpu_tmp0, cpu_tmp4);
1790 }
1791
1792 tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
1793 tcg_gen_shl_tl(cpu_T0, cpu_T0, count);
1794 tcg_gen_shr_tl(cpu_T1, cpu_T1, cpu_tmp4);
1795 }
1796 tcg_gen_movi_tl(cpu_tmp4, 0);
1797 tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T1, count, cpu_tmp4,
1798 cpu_tmp4, cpu_T1);
1799 tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
1800 break;
1801 }
1802
1803 /* store */
1804 gen_op_st_rm_T0_A0(s, ot, op1);
1805
1806 gen_shift_flags(s, ot, cpu_T0, cpu_tmp0, count, is_right);
1807 tcg_temp_free(count);
1808 }
1809
1810 static void gen_shift(DisasContext *s1, int op, TCGMemOp ot, int d, int s)
1811 {
1812 if (s != OR_TMP1)
1813 gen_op_mov_v_reg(ot, cpu_T1, s);
1814 switch(op) {
1815 case OP_ROL:
1816 gen_rot_rm_T1(s1, ot, d, 0);
1817 break;
1818 case OP_ROR:
1819 gen_rot_rm_T1(s1, ot, d, 1);
1820 break;
1821 case OP_SHL:
1822 case OP_SHL1:
1823 gen_shift_rm_T1(s1, ot, d, 0, 0);
1824 break;
1825 case OP_SHR:
1826 gen_shift_rm_T1(s1, ot, d, 1, 0);
1827 break;
1828 case OP_SAR:
1829 gen_shift_rm_T1(s1, ot, d, 1, 1);
1830 break;
1831 case OP_RCL:
1832 gen_rotc_rm_T1(s1, ot, d, 0);
1833 break;
1834 case OP_RCR:
1835 gen_rotc_rm_T1(s1, ot, d, 1);
1836 break;
1837 }
1838 }
1839
1840 static void gen_shifti(DisasContext *s1, int op, TCGMemOp ot, int d, int c)
1841 {
1842 switch(op) {
1843 case OP_ROL:
1844 gen_rot_rm_im(s1, ot, d, c, 0);
1845 break;
1846 case OP_ROR:
1847 gen_rot_rm_im(s1, ot, d, c, 1);
1848 break;
1849 case OP_SHL:
1850 case OP_SHL1:
1851 gen_shift_rm_im(s1, ot, d, c, 0, 0);
1852 break;
1853 case OP_SHR:
1854 gen_shift_rm_im(s1, ot, d, c, 1, 0);
1855 break;
1856 case OP_SAR:
1857 gen_shift_rm_im(s1, ot, d, c, 1, 1);
1858 break;
1859 default:
1860 /* currently not optimized */
1861 tcg_gen_movi_tl(cpu_T1, c);
1862 gen_shift(s1, op, ot, d, OR_TMP1);
1863 break;
1864 }
1865 }
1866
1867 /* Decompose an address. */
1868
1869 typedef struct AddressParts {
1870 int def_seg;
1871 int base;
1872 int index;
1873 int scale;
1874 target_long disp;
1875 } AddressParts;
1876
1877 static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s,
1878 int modrm)
1879 {
1880 int def_seg, base, index, scale, mod, rm;
1881 target_long disp;
1882 bool havesib;
1883
1884 def_seg = R_DS;
1885 index = -1;
1886 scale = 0;
1887 disp = 0;
1888
1889 mod = (modrm >> 6) & 3;
1890 rm = modrm & 7;
1891 base = rm | REX_B(s);
1892
1893 if (mod == 3) {
1894 /* Normally filtered out earlier, but including this path
1895 simplifies multi-byte nop, as well as bndcl, bndcu, bndcn. */
1896 goto done;
1897 }
1898
1899 switch (s->aflag) {
1900 case MO_64:
1901 case MO_32:
1902 havesib = 0;
1903 if (rm == 4) {
1904 int code = cpu_ldub_code(env, s->pc++);
1905 scale = (code >> 6) & 3;
1906 index = ((code >> 3) & 7) | REX_X(s);
1907 if (index == 4) {
1908 index = -1; /* no index */
1909 }
1910 base = (code & 7) | REX_B(s);
1911 havesib = 1;
1912 }
1913
1914 switch (mod) {
1915 case 0:
1916 if ((base & 7) == 5) {
1917 base = -1;
1918 disp = (int32_t)cpu_ldl_code(env, s->pc);
1919 s->pc += 4;
1920 if (CODE64(s) && !havesib) {
1921 base = -2;
1922 disp += s->pc + s->rip_offset;
1923 }
1924 }
1925 break;
1926 case 1:
1927 disp = (int8_t)cpu_ldub_code(env, s->pc++);
1928 break;
1929 default:
1930 case 2:
1931 disp = (int32_t)cpu_ldl_code(env, s->pc);
1932 s->pc += 4;
1933 break;
1934 }
1935
1936 /* For correct popl handling with esp. */
1937 if (base == R_ESP && s->popl_esp_hack) {
1938 disp += s->popl_esp_hack;
1939 }
1940 if (base == R_EBP || base == R_ESP) {
1941 def_seg = R_SS;
1942 }
1943 break;
1944
1945 case MO_16:
1946 if (mod == 0) {
1947 if (rm == 6) {
1948 base = -1;
1949 disp = cpu_lduw_code(env, s->pc);
1950 s->pc += 2;
1951 break;
1952 }
1953 } else if (mod == 1) {
1954 disp = (int8_t)cpu_ldub_code(env, s->pc++);
1955 } else {
1956 disp = (int16_t)cpu_lduw_code(env, s->pc);
1957 s->pc += 2;
1958 }
1959
1960 switch (rm) {
1961 case 0:
1962 base = R_EBX;
1963 index = R_ESI;
1964 break;
1965 case 1:
1966 base = R_EBX;
1967 index = R_EDI;
1968 break;
1969 case 2:
1970 base = R_EBP;
1971 index = R_ESI;
1972 def_seg = R_SS;
1973 break;
1974 case 3:
1975 base = R_EBP;
1976 index = R_EDI;
1977 def_seg = R_SS;
1978 break;
1979 case 4:
1980 base = R_ESI;
1981 break;
1982 case 5:
1983 base = R_EDI;
1984 break;
1985 case 6:
1986 base = R_EBP;
1987 def_seg = R_SS;
1988 break;
1989 default:
1990 case 7:
1991 base = R_EBX;
1992 break;
1993 }
1994 break;
1995
1996 default:
1997 tcg_abort();
1998 }
1999
2000 done:
2001 return (AddressParts){ def_seg, base, index, scale, disp };
2002 }
2003
2004 /* Compute the address, with a minimum number of TCG ops. */
2005 static TCGv gen_lea_modrm_1(AddressParts a)
2006 {
2007 TCGv ea;
2008
2009 TCGV_UNUSED(ea);
2010 if (a.index >= 0) {
2011 if (a.scale == 0) {
2012 ea = cpu_regs[a.index];
2013 } else {
2014 tcg_gen_shli_tl(cpu_A0, cpu_regs[a.index], a.scale);
2015 ea = cpu_A0;
2016 }
2017 if (a.base >= 0) {
2018 tcg_gen_add_tl(cpu_A0, ea, cpu_regs[a.base]);
2019 ea = cpu_A0;
2020 }
2021 } else if (a.base >= 0) {
2022 ea = cpu_regs[a.base];
2023 }
2024 if (TCGV_IS_UNUSED(ea)) {
2025 tcg_gen_movi_tl(cpu_A0, a.disp);
2026 ea = cpu_A0;
2027 } else if (a.disp != 0) {
2028 tcg_gen_addi_tl(cpu_A0, ea, a.disp);
2029 ea = cpu_A0;
2030 }
2031
2032 return ea;
2033 }
2034
2035 static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
2036 {
2037 AddressParts a = gen_lea_modrm_0(env, s, modrm);
2038 TCGv ea = gen_lea_modrm_1(a);
2039 gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
2040 }
2041
2042 static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
2043 {
2044 (void)gen_lea_modrm_0(env, s, modrm);
2045 }
2046
2047 /* Used for BNDCL, BNDCU, BNDCN. */
2048 static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
2049 TCGCond cond, TCGv_i64 bndv)
2050 {
2051 TCGv ea = gen_lea_modrm_1(gen_lea_modrm_0(env, s, modrm));
2052
2053 tcg_gen_extu_tl_i64(cpu_tmp1_i64, ea);
2054 if (!CODE64(s)) {
2055 tcg_gen_ext32u_i64(cpu_tmp1_i64, cpu_tmp1_i64);
2056 }
2057 tcg_gen_setcond_i64(cond, cpu_tmp1_i64, cpu_tmp1_i64, bndv);
2058 tcg_gen_extrl_i64_i32(cpu_tmp2_i32, cpu_tmp1_i64);
2059 gen_helper_bndck(cpu_env, cpu_tmp2_i32);
2060 }
2061
2062 /* used for LEA and MOV AX, mem */
2063 static void gen_add_A0_ds_seg(DisasContext *s)
2064 {
2065 gen_lea_v_seg(s, s->aflag, cpu_A0, R_DS, s->override);
2066 }
2067
2068 /* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
2069 OR_TMP0 */
2070 static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
2071 TCGMemOp ot, int reg, int is_store)
2072 {
2073 int mod, rm;
2074
2075 mod = (modrm >> 6) & 3;
2076 rm = (modrm & 7) | REX_B(s);
2077 if (mod == 3) {
2078 if (is_store) {
2079 if (reg != OR_TMP0)
2080 gen_op_mov_v_reg(ot, cpu_T0, reg);
2081 gen_op_mov_reg_v(ot, rm, cpu_T0);
2082 } else {
2083 gen_op_mov_v_reg(ot, cpu_T0, rm);
2084 if (reg != OR_TMP0)
2085 gen_op_mov_reg_v(ot, reg, cpu_T0);
2086 }
2087 } else {
2088 gen_lea_modrm(env, s, modrm);
2089 if (is_store) {
2090 if (reg != OR_TMP0)
2091 gen_op_mov_v_reg(ot, cpu_T0, reg);
2092 gen_op_st_v(s, ot, cpu_T0, cpu_A0);
2093 } else {
2094 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
2095 if (reg != OR_TMP0)
2096 gen_op_mov_reg_v(ot, reg, cpu_T0);
2097 }
2098 }
2099 }
2100
2101 static inline uint32_t insn_get(CPUX86State *env, DisasContext *s, TCGMemOp ot)
2102 {
2103 uint32_t ret;
2104
2105 switch (ot) {
2106 case MO_8:
2107 ret = cpu_ldub_code(env, s->pc);
2108 s->pc++;
2109 break;
2110 case MO_16:
2111 ret = cpu_lduw_code(env, s->pc);
2112 s->pc += 2;
2113 break;
2114 case MO_32:
2115 #ifdef TARGET_X86_64
2116 case MO_64:
2117 #endif
2118 ret = cpu_ldl_code(env, s->pc);
2119 s->pc += 4;
2120 break;
2121 default:
2122 tcg_abort();
2123 }
2124 return ret;
2125 }
2126
2127 static inline int insn_const_size(TCGMemOp ot)
2128 {
2129 if (ot <= MO_32) {
2130 return 1 << ot;
2131 } else {
2132 return 4;
2133 }
2134 }
2135
2136 static inline bool use_goto_tb(DisasContext *s, target_ulong pc)
2137 {
2138 #ifndef CONFIG_USER_ONLY
2139 return (pc & TARGET_PAGE_MASK) == (s->tb->pc & TARGET_PAGE_MASK) ||
2140 (pc & TARGET_PAGE_MASK) == (s->pc_start & TARGET_PAGE_MASK);
2141 #else
2142 return true;
2143 #endif
2144 }
2145
2146 static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
2147 {
2148 target_ulong pc = s->cs_base + eip;
2149
2150 if (use_goto_tb(s, pc)) {
2151 /* jump to same page: we can use a direct jump */
2152 tcg_gen_goto_tb(tb_num);
2153 gen_jmp_im(eip);
2154 tcg_gen_exit_tb((uintptr_t)s->tb + tb_num);
2155 } else {
2156 /* jump to another page: currently not optimized */
2157 gen_jmp_im(eip);
2158 gen_eob(s);
2159 }
2160 }
2161
2162 static inline void gen_jcc(DisasContext *s, int b,
2163 target_ulong val, target_ulong next_eip)
2164 {
2165 TCGLabel *l1, *l2;
2166
2167 if (s->jmp_opt) {
2168 l1 = gen_new_label();
2169 gen_jcc1(s, b, l1);
2170
2171 gen_goto_tb(s, 0, next_eip);
2172
2173 gen_set_label(l1);
2174 gen_goto_tb(s, 1, val);
2175 s->is_jmp = DISAS_TB_JUMP;
2176 } else {
2177 l1 = gen_new_label();
2178 l2 = gen_new_label();
2179 gen_jcc1(s, b, l1);
2180
2181 gen_jmp_im(next_eip);
2182 tcg_gen_br(l2);
2183
2184 gen_set_label(l1);
2185 gen_jmp_im(val);
2186 gen_set_label(l2);
2187 gen_eob(s);
2188 }
2189 }
2190
2191 static void gen_cmovcc1(CPUX86State *env, DisasContext *s, TCGMemOp ot, int b,
2192 int modrm, int reg)
2193 {
2194 CCPrepare cc;
2195
2196 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
2197
2198 cc = gen_prepare_cc(s, b, cpu_T1);
2199 if (cc.mask != -1) {
2200 TCGv t0 = tcg_temp_new();
2201 tcg_gen_andi_tl(t0, cc.reg, cc.mask);
2202 cc.reg = t0;
2203 }
2204 if (!cc.use_reg2) {
2205 cc.reg2 = tcg_const_tl(cc.imm);
2206 }
2207
2208 tcg_gen_movcond_tl(cc.cond, cpu_T0, cc.reg, cc.reg2,
2209 cpu_T0, cpu_regs[reg]);
2210 gen_op_mov_reg_v(ot, reg, cpu_T0);
2211
2212 if (cc.mask != -1) {
2213 tcg_temp_free(cc.reg);
2214 }
2215 if (!cc.use_reg2) {
2216 tcg_temp_free(cc.reg2);
2217 }
2218 }
2219
2220 static inline void gen_op_movl_T0_seg(int seg_reg)
2221 {
2222 tcg_gen_ld32u_tl(cpu_T0, cpu_env,
2223 offsetof(CPUX86State,segs[seg_reg].selector));
2224 }
2225
2226 static inline void gen_op_movl_seg_T0_vm(int seg_reg)
2227 {
2228 tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
2229 tcg_gen_st32_tl(cpu_T0, cpu_env,
2230 offsetof(CPUX86State,segs[seg_reg].selector));
2231 tcg_gen_shli_tl(cpu_seg_base[seg_reg], cpu_T0, 4);
2232 }
2233
2234 /* move T0 to seg_reg and compute if the CPU state may change. Never
2235 call this function with seg_reg == R_CS */
2236 static void gen_movl_seg_T0(DisasContext *s, int seg_reg)
2237 {
2238 if (s->pe && !s->vm86) {
2239 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
2240 gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), cpu_tmp2_i32);
2241 /* abort translation because the addseg value may change or
2242 because ss32 may change. For R_SS, translation must always
2243 stop as a special handling must be done to disable hardware
2244 interrupts for the next instruction */
2245 if (seg_reg == R_SS || (s->code32 && seg_reg < R_FS))
2246 s->is_jmp = DISAS_TB_JUMP;
2247 } else {
2248 gen_op_movl_seg_T0_vm(seg_reg);
2249 if (seg_reg == R_SS)
2250 s->is_jmp = DISAS_TB_JUMP;
2251 }
2252 }
2253
2254 static inline int svm_is_rep(int prefixes)
2255 {
2256 return ((prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) ? 8 : 0);
2257 }
2258
2259 static inline void
2260 gen_svm_check_intercept_param(DisasContext *s, target_ulong pc_start,
2261 uint32_t type, uint64_t param)
2262 {
2263 /* no SVM activated; fast case */
2264 if (likely(!(s->flags & HF_SVMI_MASK)))
2265 return;
2266 gen_update_cc_op(s);
2267 gen_jmp_im(pc_start - s->cs_base);
2268 gen_helper_svm_check_intercept_param(cpu_env, tcg_const_i32(type),
2269 tcg_const_i64(param));
2270 }
2271
2272 static inline void
2273 gen_svm_check_intercept(DisasContext *s, target_ulong pc_start, uint64_t type)
2274 {
2275 gen_svm_check_intercept_param(s, pc_start, type, 0);
2276 }
2277
2278 static inline void gen_stack_update(DisasContext *s, int addend)
2279 {
2280 gen_op_add_reg_im(mo_stacksize(s), R_ESP, addend);
2281 }
2282
2283 /* Generate a push. It depends on ss32, addseg and dflag. */
2284 static void gen_push_v(DisasContext *s, TCGv val)
2285 {
2286 TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2287 TCGMemOp a_ot = mo_stacksize(s);
2288 int size = 1 << d_ot;
2289 TCGv new_esp = cpu_A0;
2290
2291 tcg_gen_subi_tl(cpu_A0, cpu_regs[R_ESP], size);
2292
2293 if (!CODE64(s)) {
2294 if (s->addseg) {
2295 new_esp = cpu_tmp4;
2296 tcg_gen_mov_tl(new_esp, cpu_A0);
2297 }
2298 gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
2299 }
2300
2301 gen_op_st_v(s, d_ot, val, cpu_A0);
2302 gen_op_mov_reg_v(a_ot, R_ESP, new_esp);
2303 }
2304
2305 /* two step pop is necessary for precise exceptions */
2306 static TCGMemOp gen_pop_T0(DisasContext *s)
2307 {
2308 TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2309
2310 gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1);
2311 gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
2312
2313 return d_ot;
2314 }
2315
2316 static inline void gen_pop_update(DisasContext *s, TCGMemOp ot)
2317 {
2318 gen_stack_update(s, 1 << ot);
2319 }
2320
2321 static inline void gen_stack_A0(DisasContext *s)
2322 {
2323 gen_lea_v_seg(s, s->ss32 ? MO_32 : MO_16, cpu_regs[R_ESP], R_SS, -1);
2324 }
2325
2326 static void gen_pusha(DisasContext *s)
2327 {
2328 TCGMemOp s_ot = s->ss32 ? MO_32 : MO_16;
2329 TCGMemOp d_ot = s->dflag;
2330 int size = 1 << d_ot;
2331 int i;
2332
2333 for (i = 0; i < 8; i++) {
2334 tcg_gen_addi_tl(cpu_A0, cpu_regs[R_ESP], (i - 8) * size);
2335 gen_lea_v_seg(s, s_ot, cpu_A0, R_SS, -1);
2336 gen_op_st_v(s, d_ot, cpu_regs[7 - i], cpu_A0);
2337 }
2338
2339 gen_stack_update(s, -8 * size);
2340 }
2341
2342 static void gen_popa(DisasContext *s)
2343 {
2344 TCGMemOp s_ot = s->ss32 ? MO_32 : MO_16;
2345 TCGMemOp d_ot = s->dflag;
2346 int size = 1 << d_ot;
2347 int i;
2348
2349 for (i = 0; i < 8; i++) {
2350 /* ESP is not reloaded */
2351 if (7 - i == R_ESP) {
2352 continue;
2353 }
2354 tcg_gen_addi_tl(cpu_A0, cpu_regs[R_ESP], i * size);
2355 gen_lea_v_seg(s, s_ot, cpu_A0, R_SS, -1);
2356 gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
2357 gen_op_mov_reg_v(d_ot, 7 - i, cpu_T0);
2358 }
2359
2360 gen_stack_update(s, 8 * size);
2361 }
2362
2363 static void gen_enter(DisasContext *s, int esp_addend, int level)
2364 {
2365 TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2366 TCGMemOp a_ot = CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16;
2367 int size = 1 << d_ot;
2368
2369 /* Push BP; compute FrameTemp into T1. */
2370 tcg_gen_subi_tl(cpu_T1, cpu_regs[R_ESP], size);
2371 gen_lea_v_seg(s, a_ot, cpu_T1, R_SS, -1);
2372 gen_op_st_v(s, d_ot, cpu_regs[R_EBP], cpu_A0);
2373
2374 level &= 31;
2375 if (level != 0) {
2376 int i;
2377
2378 /* Copy level-1 pointers from the previous frame. */
2379 for (i = 1; i < level; ++i) {
2380 tcg_gen_subi_tl(cpu_A0, cpu_regs[R_EBP], size * i);
2381 gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
2382 gen_op_ld_v(s, d_ot, cpu_tmp0, cpu_A0);
2383
2384 tcg_gen_subi_tl(cpu_A0, cpu_T1, size * i);
2385 gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
2386 gen_op_st_v(s, d_ot, cpu_tmp0, cpu_A0);
2387 }
2388
2389 /* Push the current FrameTemp as the last level. */
2390 tcg_gen_subi_tl(cpu_A0, cpu_T1, size * level);
2391 gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
2392 gen_op_st_v(s, d_ot, cpu_T1, cpu_A0);
2393 }
2394
2395 /* Copy the FrameTemp value to EBP. */
2396 gen_op_mov_reg_v(a_ot, R_EBP, cpu_T1);
2397
2398 /* Compute the final value of ESP. */
2399 tcg_gen_subi_tl(cpu_T1, cpu_T1, esp_addend + size * level);
2400 gen_op_mov_reg_v(a_ot, R_ESP, cpu_T1);
2401 }
2402
2403 static void gen_leave(DisasContext *s)
2404 {
2405 TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2406 TCGMemOp a_ot = mo_stacksize(s);
2407
2408 gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
2409 gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
2410
2411 tcg_gen_addi_tl(cpu_T1, cpu_regs[R_EBP], 1 << d_ot);
2412
2413 gen_op_mov_reg_v(d_ot, R_EBP, cpu_T0);
2414 gen_op_mov_reg_v(a_ot, R_ESP, cpu_T1);
2415 }
2416
2417 static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
2418 {
2419 gen_update_cc_op(s);
2420 gen_jmp_im(cur_eip);
2421 gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
2422 s->is_jmp = DISAS_TB_JUMP;
2423 }
2424
2425 /* Generate #UD for the current instruction. The assumption here is that
2426 the instruction is known, but it isn't allowed in the current cpu mode. */
2427 static void gen_illegal_opcode(DisasContext *s)
2428 {
2429 gen_exception(s, EXCP06_ILLOP, s->pc_start - s->cs_base);
2430 }
2431
2432 /* Similarly, except that the assumption here is that we don't decode
2433 the instruction at all -- either a missing opcode, an unimplemented
2434 feature, or just a bogus instruction stream. */
2435 static void gen_unknown_opcode(CPUX86State *env, DisasContext *s)
2436 {
2437 gen_illegal_opcode(s);
2438
2439 if (qemu_loglevel_mask(LOG_UNIMP)) {
2440 target_ulong pc = s->pc_start, end = s->pc;
2441 qemu_log_lock();
2442 qemu_log("ILLOPC: " TARGET_FMT_lx ":", pc);
2443 for (; pc < end; ++pc) {
2444 qemu_log(" %02x", cpu_ldub_code(env, pc));
2445 }
2446 qemu_log("\n");
2447 qemu_log_unlock();
2448 }
2449 }
2450
2451 /* an interrupt is different from an exception because of the
2452 privilege checks */
2453 static void gen_interrupt(DisasContext *s, int intno,
2454 target_ulong cur_eip, target_ulong next_eip)
2455 {
2456 gen_update_cc_op(s);
2457 gen_jmp_im(cur_eip);
2458 gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
2459 tcg_const_i32(next_eip - cur_eip));
2460 s->is_jmp = DISAS_TB_JUMP;
2461 }
2462
2463 static void gen_debug(DisasContext *s, target_ulong cur_eip)
2464 {
2465 gen_update_cc_op(s);
2466 gen_jmp_im(cur_eip);
2467 gen_helper_debug(cpu_env);
2468 s->is_jmp = DISAS_TB_JUMP;
2469 }
2470
2471 static void gen_set_hflag(DisasContext *s, uint32_t mask)
2472 {
2473 if ((s->flags & mask) == 0) {
2474 TCGv_i32 t = tcg_temp_new_i32();
2475 tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2476 tcg_gen_ori_i32(t, t, mask);
2477 tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2478 tcg_temp_free_i32(t);
2479 s->flags |= mask;
2480 }
2481 }
2482
2483 static void gen_reset_hflag(DisasContext *s, uint32_t mask)
2484 {
2485 if (s->flags & mask) {
2486 TCGv_i32 t = tcg_temp_new_i32();
2487 tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2488 tcg_gen_andi_i32(t, t, ~mask);
2489 tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2490 tcg_temp_free_i32(t);
2491 s->flags &= ~mask;
2492 }
2493 }
2494
2495 /* Clear BND registers during legacy branches. */
2496 static void gen_bnd_jmp(DisasContext *s)
2497 {
2498 /* Clear the registers only if BND prefix is missing, MPX is enabled,
2499 and if the BNDREGs are known to be in use (non-zero) already.
2500 The helper itself will check BNDPRESERVE at runtime. */
2501 if ((s->prefix & PREFIX_REPNZ) == 0
2502 && (s->flags & HF_MPX_EN_MASK) != 0
2503 && (s->flags & HF_MPX_IU_MASK) != 0) {
2504 gen_helper_bnd_jmp(cpu_env);
2505 }
2506 }
2507
2508 /* Generate an end of block. Trace exception is also generated if needed.
2509 If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.
2510 If RECHECK_TF, emit a rechecking helper for #DB, ignoring the state of
2511 S->TF. This is used by the syscall/sysret insns. */
2512 static void gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf)
2513 {
2514 gen_update_cc_op(s);
2515
2516 /* If several instructions disable interrupts, only the first does it. */
2517 if (inhibit && !(s->flags & HF_INHIBIT_IRQ_MASK)) {
2518 gen_set_hflag(s, HF_INHIBIT_IRQ_MASK);
2519 } else {
2520 gen_reset_hflag(s, HF_INHIBIT_IRQ_MASK);
2521 }
2522
2523 if (s->tb->flags & HF_RF_MASK) {
2524 gen_helper_reset_rf(cpu_env);
2525 }
2526 if (s->singlestep_enabled) {
2527 gen_helper_debug(cpu_env);
2528 } else if (recheck_tf) {
2529 gen_helper_rechecking_single_step(cpu_env);
2530 tcg_gen_exit_tb(0);
2531 } else if (s->tf) {
2532 gen_helper_single_step(cpu_env);
2533 } else {
2534 tcg_gen_exit_tb(0);
2535 }
2536 s->is_jmp = DISAS_TB_JUMP;
2537 }
2538
2539 /* End of block.
2540 If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set. */
2541 static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
2542 {
2543 gen_eob_worker(s, inhibit, false);
2544 }
2545
2546 /* End of block, resetting the inhibit irq flag. */
2547 static void gen_eob(DisasContext *s)
2548 {
2549 gen_eob_worker(s, false, false);
2550 }
2551
2552 /* generate a jump to eip. No segment change must happen before as a
2553 direct call to the next block may occur */
2554 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
2555 {
2556 gen_update_cc_op(s);
2557 set_cc_op(s, CC_OP_DYNAMIC);
2558 if (s->jmp_opt) {
2559 gen_goto_tb(s, tb_num, eip);
2560 s->is_jmp = DISAS_TB_JUMP;
2561 } else {
2562 gen_jmp_im(eip);
2563 gen_eob(s);
2564 }
2565 }
2566
2567 static void gen_jmp(DisasContext *s, target_ulong eip)
2568 {
2569 gen_jmp_tb(s, eip, 0);
2570 }
2571
2572 static inline void gen_ldq_env_A0(DisasContext *s, int offset)
2573 {
2574 tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
2575 tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset);
2576 }
2577
2578 static inline void gen_stq_env_A0(DisasContext *s, int offset)
2579 {
2580 tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset);
2581 tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
2582 }
2583
2584 static inline void gen_ldo_env_A0(DisasContext *s, int offset)
2585 {
2586 int mem_index = s->mem_index;
2587 tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, mem_index, MO_LEQ);
2588 tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2589 tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
2590 tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ);
2591 tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2592 }
2593
2594 static inline void gen_sto_env_A0(DisasContext *s, int offset)
2595 {
2596 int mem_index = s->mem_index;
2597 tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2598 tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, mem_index, MO_LEQ);
2599 tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
2600 tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2601 tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ);
2602 }
2603
2604 static inline void gen_op_movo(int d_offset, int s_offset)
2605 {
2606 tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0)));
2607 tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0)));
2608 tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1)));
2609 tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1)));
2610 }
2611
2612 static inline void gen_op_movq(int d_offset, int s_offset)
2613 {
2614 tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset);
2615 tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
2616 }
2617
2618 static inline void gen_op_movl(int d_offset, int s_offset)
2619 {
2620 tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env, s_offset);
2621 tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, d_offset);
2622 }
2623
2624 static inline void gen_op_movq_env_0(int d_offset)
2625 {
2626 tcg_gen_movi_i64(cpu_tmp1_i64, 0);
2627 tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
2628 }
2629
2630 typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg);
2631 typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg);
2632 typedef void (*SSEFunc_0_epi)(TCGv_ptr env, TCGv_ptr reg, TCGv_i32 val);
2633 typedef void (*SSEFunc_0_epl)(TCGv_ptr env, TCGv_ptr reg, TCGv_i64 val);
2634 typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b);
2635 typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2636 TCGv_i32 val);
2637 typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val);
2638 typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2639 TCGv val);
2640
2641 #define SSE_SPECIAL ((void *)1)
2642 #define SSE_DUMMY ((void *)2)
2643
2644 #define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm }
2645 #define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \
2646 gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, }
2647
2648 static const SSEFunc_0_epp sse_op_table1[256][4] = {
2649 /* 3DNow! extensions */
2650 [0x0e] = { SSE_DUMMY }, /* femms */
2651 [0x0f] = { SSE_DUMMY }, /* pf... */
2652 /* pure SSE operations */
2653 [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2654 [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2655 [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
2656 [0x13] = { SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd */
2657 [0x14] = { gen_helper_punpckldq_xmm, gen_helper_punpcklqdq_xmm },
2658 [0x15] = { gen_helper_punpckhdq_xmm, gen_helper_punpckhqdq_xmm },
2659 [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movhps, movhpd, movshdup */
2660 [0x17] = { SSE_SPECIAL, SSE_SPECIAL }, /* movhps, movhpd */
2661
2662 [0x28] = { SSE_SPECIAL, SSE_SPECIAL }, /* movaps, movapd */
2663 [0x29] = { SSE_SPECIAL, SSE_SPECIAL }, /* movaps, movapd */
2664 [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
2665 [0x2b] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movntps, movntpd, movntss, movntsd */
2666 [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
2667 [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
2668 [0x2e] = { gen_helper_ucomiss, gen_helper_ucomisd },
2669 [0x2f] = { gen_helper_comiss, gen_helper_comisd },
2670 [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
2671 [0x51] = SSE_FOP(sqrt),
2672 [0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL },
2673 [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL },
2674 [0x54] = { gen_helper_pand_xmm, gen_helper_pand_xmm }, /* andps, andpd */
2675 [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd */
2676 [0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */
2677 [0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */
2678 [0x58] = SSE_FOP(add),
2679 [0x59] = SSE_FOP(mul),
2680 [0x5a] = { gen_helper_cvtps2pd, gen_helper_cvtpd2ps,
2681 gen_helper_cvtss2sd, gen_helper_cvtsd2ss },
2682 [0x5b] = { gen_helper_cvtdq2ps, gen_helper_cvtps2dq, gen_helper_cvttps2dq },
2683 [0x5c] = SSE_FOP(sub),
2684 [0x5d] = SSE_FOP(min),
2685 [0x5e] = SSE_FOP(div),
2686 [0x5f] = SSE_FOP(max),
2687
2688 [0xc2] = SSE_FOP(cmpeq),
2689 [0xc6] = { (SSEFunc_0_epp)gen_helper_shufps,
2690 (SSEFunc_0_epp)gen_helper_shufpd }, /* XXX: casts */
2691
2692 /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX. */
2693 [0x38] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2694 [0x3a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2695
2696 /* MMX ops and their SSE extensions */
2697 [0x60] = MMX_OP2(punpcklbw),
2698 [0x61] = MMX_OP2(punpcklwd),
2699 [0x62] = MMX_OP2(punpckldq),
2700 [0x63] = MMX_OP2(packsswb),
2701 [0x64] = MMX_OP2(pcmpgtb),
2702 [0x65] = MMX_OP2(pcmpgtw),
2703 [0x66] = MMX_OP2(pcmpgtl),
2704 [0x67] = MMX_OP2(packuswb),
2705 [0x68] = MMX_OP2(punpckhbw),
2706 [0x69] = MMX_OP2(punpckhwd),
2707 [0x6a] = MMX_OP2(punpckhdq),
2708 [0x6b] = MMX_OP2(packssdw),
2709 [0x6c] = { NULL, gen_helper_punpcklqdq_xmm },
2710 [0x6d] = { NULL, gen_helper_punpckhqdq_xmm },
2711 [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
2712 [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
2713 [0x70] = { (SSEFunc_0_epp)gen_helper_pshufw_mmx,
2714 (SSEFunc_0_epp)gen_helper_pshufd_xmm,
2715 (SSEFunc_0_epp)gen_helper_pshufhw_xmm,
2716 (SSEFunc_0_epp)gen_helper_pshuflw_xmm }, /* XXX: casts */
2717 [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
2718 [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
2719 [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
2720 [0x74] = MMX_OP2(pcmpeqb),
2721 [0x75] = MMX_OP2(pcmpeqw),
2722 [0x76] = MMX_OP2(pcmpeql),
2723 [0x77] = { SSE_DUMMY }, /* emms */
2724 [0x78] = { NULL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* extrq_i, insertq_i */
2725 [0x79] = { NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r },
2726 [0x7c] = { NULL, gen_helper_haddpd, NULL, gen_helper_haddps },
2727 [0x7d] = { NULL, gen_helper_hsubpd, NULL, gen_helper_hsubps },
2728 [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
2729 [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */
2730 [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
2731 [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
2732 [0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps },
2733 [0xd1] = MMX_OP2(psrlw),
2734 [0xd2] = MMX_OP2(psrld),
2735 [0xd3] = MMX_OP2(psrlq),
2736 [0xd4] = MMX_OP2(paddq),
2737 [0xd5] = MMX_OP2(pmullw),
2738 [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2739 [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
2740 [0xd8] = MMX_OP2(psubusb),
2741 [0xd9] = MMX_OP2(psubusw),
2742 [0xda] = MMX_OP2(pminub),
2743 [0xdb] = MMX_OP2(pand),
2744 [0xdc] = MMX_OP2(paddusb),
2745 [0xdd] = MMX_OP2(paddusw),
2746 [0xde] = MMX_OP2(pmaxub),
2747 [0xdf] = MMX_OP2(pandn),
2748 [0xe0] = MMX_OP2(pavgb),
2749 [0xe1] = MMX_OP2(psraw),
2750 [0xe2] = MMX_OP2(psrad),
2751 [0xe3] = MMX_OP2(pavgw),
2752 [0xe4] = MMX_OP2(pmulhuw),
2753 [0xe5] = MMX_OP2(pmulhw),
2754 [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq },
2755 [0xe7] = { SSE_SPECIAL , SSE_SPECIAL }, /* movntq, movntq */
2756 [0xe8] = MMX_OP2(psubsb),
2757 [0xe9] = MMX_OP2(psubsw),
2758 [0xea] = MMX_OP2(pminsw),
2759 [0xeb] = MMX_OP2(por),
2760 [0xec] = MMX_OP2(paddsb),
2761 [0xed] = MMX_OP2(paddsw),
2762 [0xee] = MMX_OP2(pmaxsw),
2763 [0xef] = MMX_OP2(pxor),
2764 [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
2765 [0xf1] = MMX_OP2(psllw),
2766 [0xf2] = MMX_OP2(pslld),
2767 [0xf3] = MMX_OP2(psllq),
2768 [0xf4] = MMX_OP2(pmuludq),
2769 [0xf5] = MMX_OP2(pmaddwd),
2770 [0xf6] = MMX_OP2(psadbw),
2771 [0xf7] = { (SSEFunc_0_epp)gen_helper_maskmov_mmx,
2772 (SSEFunc_0_epp)gen_helper_maskmov_xmm }, /* XXX: casts */
2773 [0xf8] = MMX_OP2(psubb),
2774 [0xf9] = MMX_OP2(psubw),
2775 [0xfa] = MMX_OP2(psubl),
2776 [0xfb] = MMX_OP2(psubq),
2777 [0xfc] = MMX_OP2(paddb),
2778 [0xfd] = MMX_OP2(paddw),
2779 [0xfe] = MMX_OP2(paddl),
2780 };
2781
2782 static const SSEFunc_0_epp sse_op_table2[3 * 8][2] = {
2783 [0 + 2] = MMX_OP2(psrlw),
2784 [0 + 4] = MMX_OP2(psraw),
2785 [0 + 6] = MMX_OP2(psllw),
2786 [8 + 2] = MMX_OP2(psrld),
2787 [8 + 4] = MMX_OP2(psrad),
2788 [8 + 6] = MMX_OP2(pslld),
2789 [16 + 2] = MMX_OP2(psrlq),
2790 [16 + 3] = { NULL, gen_helper_psrldq_xmm },
2791 [16 + 6] = MMX_OP2(psllq),
2792 [16 + 7] = { NULL, gen_helper_pslldq_xmm },
2793 };
2794
2795 static const SSEFunc_0_epi sse_op_table3ai[] = {
2796 gen_helper_cvtsi2ss,
2797 gen_helper_cvtsi2sd
2798 };
2799
2800 #ifdef TARGET_X86_64
2801 static const SSEFunc_0_epl sse_op_table3aq[] = {
2802 gen_helper_cvtsq2ss,
2803 gen_helper_cvtsq2sd
2804 };
2805 #endif
2806
2807 static const SSEFunc_i_ep sse_op_table3bi[] = {
2808 gen_helper_cvttss2si,
2809 gen_helper_cvtss2si,
2810 gen_helper_cvttsd2si,
2811 gen_helper_cvtsd2si
2812 };
2813
2814 #ifdef TARGET_X86_64
2815 static const SSEFunc_l_ep sse_op_table3bq[] = {
2816 gen_helper_cvttss2sq,
2817 gen_helper_cvtss2sq,
2818 gen_helper_cvttsd2sq,
2819 gen_helper_cvtsd2sq
2820 };
2821 #endif
2822
2823 static const SSEFunc_0_epp sse_op_table4[8][4] = {
2824 SSE_FOP(cmpeq),
2825 SSE_FOP(cmplt),
2826 SSE_FOP(cmple),
2827 SSE_FOP(cmpunord),
2828 SSE_FOP(cmpneq),
2829 SSE_FOP(cmpnlt),
2830 SSE_FOP(cmpnle),
2831 SSE_FOP(cmpord),
2832 };
2833
2834 static const SSEFunc_0_epp sse_op_table5[256] = {
2835 [0x0c] = gen_helper_pi2fw,
2836 [0x0d] = gen_helper_pi2fd,
2837 [0x1c] = gen_helper_pf2iw,
2838 [0x1d] = gen_helper_pf2id,
2839 [0x8a] = gen_helper_pfnacc,
2840 [0x8e] = gen_helper_pfpnacc,
2841 [0x90] = gen_helper_pfcmpge,
2842 [0x94] = gen_helper_pfmin,
2843 [0x96] = gen_helper_pfrcp,
2844 [0x97] = gen_helper_pfrsqrt,
2845 [0x9a] = gen_helper_pfsub,
2846 [0x9e] = gen_helper_pfadd,
2847 [0xa0] = gen_helper_pfcmpgt,
2848 [0xa4] = gen_helper_pfmax,
2849 [0xa6] = gen_helper_movq, /* pfrcpit1; no need to actually increase precision */
2850 [0xa7] = gen_helper_movq, /* pfrsqit1 */
2851 [0xaa] = gen_helper_pfsubr,
2852 [0xae] = gen_helper_pfacc,
2853 [0xb0] = gen_helper_pfcmpeq,
2854 [0xb4] = gen_helper_pfmul,
2855 [0xb6] = gen_helper_movq, /* pfrcpit2 */
2856 [0xb7] = gen_helper_pmulhrw_mmx,
2857 [0xbb] = gen_helper_pswapd,
2858 [0xbf] = gen_helper_pavgb_mmx /* pavgusb */
2859 };
2860
2861 struct SSEOpHelper_epp {
2862 SSEFunc_0_epp op[2];
2863 uint32_t ext_mask;
2864 };
2865
2866 struct SSEOpHelper_eppi {
2867 SSEFunc_0_eppi op[2];
2868 uint32_t ext_mask;
2869 };
2870
2871 #define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 }
2872 #define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
2873 #define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
2874 #define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
2875 #define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \
2876 CPUID_EXT_PCLMULQDQ }
2877 #define AESNI_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_AES }
2878
2879 static const struct SSEOpHelper_epp sse_op_table6[256] = {
2880 [0x00] = SSSE3_OP(pshufb),
2881 [0x01] = SSSE3_OP(phaddw),
2882 [0x02] = SSSE3_OP(phaddd),
2883 [0x03] = SSSE3_OP(phaddsw),
2884 [0x04] = SSSE3_OP(pmaddubsw),
2885 [0x05] = SSSE3_OP(phsubw),
2886 [0x06] = SSSE3_OP(phsubd),
2887 [0x07] = SSSE3_OP(phsubsw),
2888 [0x08] = SSSE3_OP(psignb),
2889 [0x09] = SSSE3_OP(psignw),
2890 [0x0a] = SSSE3_OP(psignd),
2891 [0x0b] = SSSE3_OP(pmulhrsw),
2892 [0x10] = SSE41_OP(pblendvb),
2893 [0x14] = SSE41_OP(blendvps),
2894 [0x15] = SSE41_OP(blendvpd),
2895 [0x17] = SSE41_OP(ptest),
2896 [0x1c] = SSSE3_OP(pabsb),
2897 [0x1d] = SSSE3_OP(pabsw),
2898 [0x1e] = SSSE3_OP(pabsd),
2899 [0x20] = SSE41_OP(pmovsxbw),
2900 [0x21] = SSE41_OP(pmovsxbd),
2901 [0x22] = SSE41_OP(pmovsxbq),
2902 [0x23] = SSE41_OP(pmovsxwd),
2903 [0x24] = SSE41_OP(pmovsxwq),
2904 [0x25] = SSE41_OP(pmovsxdq),
2905 [0x28] = SSE41_OP(pmuldq),
2906 [0x29] = SSE41_OP(pcmpeqq),
2907 [0x2a] = SSE41_SPECIAL, /* movntqda */
2908 [0x2b] = SSE41_OP(packusdw),
2909 [0x30] = SSE41_OP(pmovzxbw),
2910 [0x31] = SSE41_OP(pmovzxbd),
2911 [0x32] = SSE41_OP(pmovzxbq),
2912 [0x33] = SSE41_OP(pmovzxwd),
2913 [0x34] = SSE41_OP(pmovzxwq),
2914 [0x35] = SSE41_OP(pmovzxdq),
2915 [0x37] = SSE42_OP(pcmpgtq),
2916 [0x38] = SSE41_OP(pminsb),
2917 [0x39] = SSE41_OP(pminsd),
2918 [0x3a] = SSE41_OP(pminuw),
2919 [0x3b] = SSE41_OP(pminud),
2920 [0x3c] = SSE41_OP(pmaxsb),
2921 [0x3d] = SSE41_OP(pmaxsd),
2922 [0x3e] = SSE41_OP(pmaxuw),
2923 [0x3f] = SSE41_OP(pmaxud),
2924 [0x40] = SSE41_OP(pmulld),
2925 [0x41] = SSE41_OP(phminposuw),
2926 [0xdb] = AESNI_OP(aesimc),
2927 [0xdc] = AESNI_OP(aesenc),
2928 [0xdd] = AESNI_OP(aesenclast),
2929 [0xde] = AESNI_OP(aesdec),
2930 [0xdf] = AESNI_OP(aesdeclast),
2931 };
2932
2933 static const struct SSEOpHelper_eppi sse_op_table7[256] = {
2934 [0x08] = SSE41_OP(roundps),
2935 [0x09] = SSE41_OP(roundpd),
2936 [0x0a] = SSE41_OP(roundss),
2937 [0x0b] = SSE41_OP(roundsd),
2938 [0x0c] = SSE41_OP(blendps),
2939 [0x0d] = SSE41_OP(blendpd),
2940 [0x0e] = SSE41_OP(pblendw),
2941 [0x0f] = SSSE3_OP(palignr),
2942 [0x14] = SSE41_SPECIAL, /* pextrb */
2943 [0x15] = SSE41_SPECIAL, /* pextrw */
2944 [0x16] = SSE41_SPECIAL, /* pextrd/pextrq */
2945 [0x17] = SSE41_SPECIAL, /* extractps */
2946 [0x20] = SSE41_SPECIAL, /* pinsrb */
2947 [0x21] = SSE41_SPECIAL, /* insertps */
2948 [0x22] = SSE41_SPECIAL, /* pinsrd/pinsrq */
2949 [0x40] = SSE41_OP(dpps),
2950 [0x41] = SSE41_OP(dppd),
2951 [0x42] = SSE41_OP(mpsadbw),
2952 [0x44] = PCLMULQDQ_OP(pclmulqdq),
2953 [0x60] = SSE42_OP(pcmpestrm),
2954 [0x61] = SSE42_OP(pcmpestri),
2955 [0x62] = SSE42_OP(pcmpistrm),
2956 [0x63] = SSE42_OP(pcmpistri),
2957 [0xdf] = AESNI_OP(aeskeygenassist),
2958 };
2959
2960 static void gen_sse(CPUX86State *env, DisasContext *s, int b,
2961 target_ulong pc_start, int rex_r)
2962 {
2963 int b1, op1_offset, op2_offset, is_xmm, val;
2964 int modrm, mod, rm, reg;
2965 SSEFunc_0_epp sse_fn_epp;
2966 SSEFunc_0_eppi sse_fn_eppi;
2967 SSEFunc_0_ppi sse_fn_ppi;
2968 SSEFunc_0_eppt sse_fn_eppt;
2969 TCGMemOp ot;
2970
2971 b &= 0xff;
2972 if (s->prefix & PREFIX_DATA)
2973 b1 = 1;
2974 else if (s->prefix & PREFIX_REPZ)
2975 b1 = 2;
2976 else if (s->prefix & PREFIX_REPNZ)
2977 b1 = 3;
2978 else
2979 b1 = 0;
2980 sse_fn_epp = sse_op_table1[b][b1];
2981 if (!sse_fn_epp) {
2982 goto unknown_op;
2983 }
2984 if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
2985 is_xmm = 1;
2986 } else {
2987 if (b1 == 0) {
2988 /* MMX case */
2989 is_xmm = 0;
2990 } else {
2991 is_xmm = 1;
2992 }
2993 }
2994 /* simple MMX/SSE operation */
2995 if (s->flags & HF_TS_MASK) {
2996 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
2997 return;
2998 }
2999 if (s->flags & HF_EM_MASK) {
3000 illegal_op:
3001 gen_illegal_opcode(s);
3002 return;
3003 }
3004 if (is_xmm
3005 && !(s->flags & HF_OSFXSR_MASK)
3006 && ((b != 0x38 && b != 0x3a) || (s->prefix & PREFIX_DATA))) {
3007 goto unknown_op;
3008 }
3009 if (b == 0x0e) {
3010 if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
3011 /* If we were fully decoding this we might use illegal_op. */
3012 goto unknown_op;
3013 }
3014 /* femms */
3015 gen_helper_emms(cpu_env);
3016 return;
3017 }
3018 if (b == 0x77) {
3019 /* emms */
3020 gen_helper_emms(cpu_env);
3021 return;
3022 }
3023 /* prepare MMX state (XXX: optimize by storing fptt and fptags in
3024 the static cpu state) */
3025 if (!is_xmm) {
3026 gen_helper_enter_mmx(cpu_env);
3027 }
3028
3029 modrm = cpu_ldub_code(env, s->pc++);
3030 reg = ((modrm >> 3) & 7);
3031 if (is_xmm)
3032 reg |= rex_r;
3033 mod = (modrm >> 6) & 3;
3034 if (sse_fn_epp == SSE_SPECIAL) {
3035 b |= (b1 << 8);
3036 switch(b) {
3037 case 0x0e7: /* movntq */
3038 if (mod == 3) {
3039 goto illegal_op;
3040 }
3041 gen_lea_modrm(env, s, modrm);
3042 gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3043 break;
3044 case 0x1e7: /* movntdq */
3045 case 0x02b: /* movntps */
3046 case 0x12b: /* movntps */
3047 if (mod == 3)
3048 goto illegal_op;
3049 gen_lea_modrm(env, s, modrm);
3050 gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3051 break;
3052 case 0x3f0: /* lddqu */
3053 if (mod == 3)
3054 goto illegal_op;
3055 gen_lea_modrm(env, s, modrm);
3056 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3057 break;
3058 case 0x22b: /* movntss */
3059 case 0x32b: /* movntsd */
3060 if (mod == 3)
3061 goto illegal_op;
3062 gen_lea_modrm(env, s, modrm);
3063 if (b1 & 1) {
3064 gen_stq_env_A0(s, offsetof(CPUX86State,
3065 xmm_regs[reg].ZMM_Q(0)));
3066 } else {
3067 tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
3068 xmm_regs[reg].ZMM_L(0)));
3069 gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
3070 }
3071 break;
3072 case 0x6e: /* movd mm, ea */
3073 #ifdef TARGET_X86_64
3074 if (s->dflag == MO_64) {
3075 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3076 tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State,fpregs[reg].mmx));
3077 } else
3078 #endif
3079 {
3080 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3081 tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
3082 offsetof(CPUX86State,fpregs[reg].mmx));
3083 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
3084 gen_helper_movl_mm_T0_mmx(cpu_ptr0, cpu_tmp2_i32);
3085 }
3086 break;
3087 case 0x16e: /* movd xmm, ea */
3088 #ifdef TARGET_X86_64
3089 if (s->dflag == MO_64) {
3090 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3091 tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
3092 offsetof(CPUX86State,xmm_regs[reg]));
3093 gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T0);
3094 } else
3095 #endif
3096 {
3097 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3098 tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
3099 offsetof(CPUX86State,xmm_regs[reg]));
3100 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
3101 gen_helper_movl_mm_T0_xmm(cpu_ptr0, cpu_tmp2_i32);
3102 }
3103 break;
3104 case 0x6f: /* movq mm, ea */
3105 if (mod != 3) {
3106 gen_lea_modrm(env, s, modrm);
3107 gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3108 } else {
3109 rm = (modrm & 7);
3110 tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
3111 offsetof(CPUX86State,fpregs[rm].mmx));
3112 tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
3113 offsetof(CPUX86State,fpregs[reg].mmx));
3114 }
3115 break;
3116 case 0x010: /* movups */
3117 case 0x110: /* movupd */
3118 case 0x028: /* movaps */
3119 case 0x128: /* movapd */
3120 case 0x16f: /* movdqa xmm, ea */
3121 case 0x26f: /* movdqu xmm, ea */
3122 if (mod != 3) {
3123 gen_lea_modrm(env, s, modrm);
3124 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3125 } else {
3126 rm = (modrm & 7) | REX_B(s);
3127 gen_op_movo(offsetof(CPUX86State,xmm_regs[reg]),
3128 offsetof(CPUX86State,xmm_regs[rm]));
3129 }
3130 break;
3131 case 0x210: /* movss xmm, ea */
3132 if (mod != 3) {
3133 gen_lea_modrm(env, s, modrm);
3134 gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0);
3135 tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3136 tcg_gen_movi_tl(cpu_T0, 0);
3137 tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
3138 tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3139 tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3140 } else {
3141 rm = (modrm & 7) | REX_B(s);
3142 gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
3143 offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3144 }
3145 break;
3146 case 0x310: /* movsd xmm, ea */
3147 if (mod != 3) {
3148 gen_lea_modrm(env, s, modrm);
3149 gen_ldq_env_A0(s, offsetof(CPUX86State,
3150 xmm_regs[reg].ZMM_Q(0)));
3151 tcg_gen_movi_tl(cpu_T0, 0);
3152 tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3153 tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3154 } else {
3155 rm = (modrm & 7) | REX_B(s);
3156 gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3157 offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3158 }
3159 break;
3160 case 0x012: /* movlps */
3161 case 0x112: /* movlpd */
3162 if (mod != 3) {
3163 gen_lea_modrm(env, s, modrm);
3164 gen_ldq_env_A0(s, offsetof(CPUX86State,
3165 xmm_regs[reg].ZMM_Q(0)));
3166 } else {
3167 /* movhlps */
3168 rm = (modrm & 7) | REX_B(s);
3169 gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3170 offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
3171 }
3172 break;
3173 case 0x212: /* movsldup */
3174 if (mod != 3) {
3175 gen_lea_modrm(env, s, modrm);
3176 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3177 } else {
3178 rm = (modrm & 7) | REX_B(s);
3179 gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
3180 offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3181 gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)),
3182 offsetof(CPUX86State,xmm_regs[rm].ZMM_L(2)));
3183 }
3184 gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)),
3185 offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3186 gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)),
3187 offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3188 break;
3189 case 0x312: /* movddup */
3190 if (mod != 3) {
3191 gen_lea_modrm(env, s, modrm);
3192 gen_ldq_env_A0(s, offsetof(CPUX86State,
3193 xmm_regs[reg].ZMM_Q(0)));
3194 } else {
3195 rm = (modrm & 7) | REX_B(s);
3196 gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3197 offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3198 }
3199 gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)),
3200 offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3201 break;
3202 case 0x016: /* movhps */
3203 case 0x116: /* movhpd */
3204 if (mod != 3) {
3205 gen_lea_modrm(env, s, modrm);
3206 gen_ldq_env_A0(s, offsetof(CPUX86State,
3207 xmm_regs[reg].ZMM_Q(1)));
3208 } else {
3209 /* movlhps */
3210 rm = (modrm & 7) | REX_B(s);
3211 gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)),
3212 offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3213 }
3214 break;
3215 case 0x216: /* movshdup */
3216 if (mod != 3) {
3217 gen_lea_modrm(env, s, modrm);
3218 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3219 } else {
3220 rm = (modrm & 7) | REX_B(s);
3221 gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)),
3222 offsetof(CPUX86State,xmm_regs[rm].ZMM_L(1)));
3223 gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)),
3224 offsetof(CPUX86State,xmm_regs[rm].ZMM_L(3)));
3225 }
3226 gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
3227 offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
3228 gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)),
3229 offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3230 break;
3231 case 0x178:
3232 case 0x378:
3233 {
3234 int bit_index, field_length;
3235
3236 if (b1 == 1 && reg != 0)
3237 goto illegal_op;
3238 field_length = cpu_ldub_code(env, s->pc++) & 0x3F;
3239 bit_index = cpu_ldub_code(env, s->pc++) & 0x3F;
3240 tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
3241 offsetof(CPUX86State,xmm_regs[reg]));
3242 if (b1 == 1)
3243 gen_helper_extrq_i(cpu_env, cpu_ptr0,
3244 tcg_const_i32(bit_index),
3245 tcg_const_i32(field_length));
3246 else
3247 gen_helper_insertq_i(cpu_env, cpu_ptr0,
3248 tcg_const_i32(bit_index),
3249 tcg_const_i32(field_length));
3250 }
3251 break;
3252 case 0x7e: /* movd ea, mm */
3253 #ifdef TARGET_X86_64
3254 if (s->dflag == MO_64) {
3255 tcg_gen_ld_i64(cpu_T0, cpu_env,
3256 offsetof(CPUX86State,fpregs[reg].mmx));
3257 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3258 } else
3259 #endif
3260 {
3261 tcg_gen_ld32u_tl(cpu_T0, cpu_env,
3262 offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
3263 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3264 }
3265 break;
3266 case 0x17e: /* movd ea, xmm */
3267 #ifdef TARGET_X86_64
3268 if (s->dflag == MO_64) {
3269 tcg_gen_ld_i64(cpu_T0, cpu_env,
3270 offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3271 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3272 } else
3273 #endif
3274 {
3275 tcg_gen_ld32u_tl(cpu_T0, cpu_env,
3276 offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3277 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3278 }
3279 break;
3280 case 0x27e: /* movq xmm, ea */
3281 if (mod != 3) {
3282 gen_lea_modrm(env, s, modrm);
3283 gen_ldq_env_A0(s, offsetof(CPUX86State,
3284 xmm_regs[reg].ZMM_Q(0)));
3285 } else {
3286 rm = (modrm & 7) | REX_B(s);
3287 gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3288 offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3289 }
3290 gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)));
3291 break;
3292 case 0x7f: /* movq ea, mm */
3293 if (mod != 3) {
3294 gen_lea_modrm(env, s, modrm);
3295 gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3296 } else {
3297 rm = (modrm & 7);
3298 gen_op_movq(offsetof(CPUX86State,fpregs[rm].mmx),
3299 offsetof(CPUX86State,fpregs[reg].mmx));
3300 }
3301 break;
3302 case 0x011: /* movups */
3303 case 0x111: /* movupd */
3304 case 0x029: /* movaps */
3305 case 0x129: /* movapd */
3306 case 0x17f: /* movdqa ea, xmm */
3307 case 0x27f: /* movdqu ea, xmm */
3308 if (mod != 3) {
3309 gen_lea_modrm(env, s, modrm);
3310 gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3311 } else {
3312 rm = (modrm & 7) | REX_B(s);
3313 gen_op_movo(offsetof(CPUX86State,xmm_regs[rm]),
3314 offsetof(CPUX86State,xmm_regs[reg]));
3315 }
3316 break;
3317 case 0x211: /* movss ea, xmm */
3318 if (mod != 3) {
3319 gen_lea_modrm(env, s, modrm);
3320 tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3321 gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
3322 } else {
3323 rm = (modrm & 7) | REX_B(s);
3324 gen_op_movl(offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)),
3325 offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3326 }
3327 break;
3328 case 0x311: /* movsd ea, xmm */
3329 if (mod != 3) {
3330 gen_lea_modrm(env, s, modrm);
3331 gen_stq_env_A0(s, offsetof(CPUX86State,
3332 xmm_regs[reg].ZMM_Q(0)));
3333 } else {
3334 rm = (modrm & 7) | REX_B(s);
3335 gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)),
3336 offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3337 }
3338 break;
3339 case 0x013: /* movlps */
3340 case 0x113: /* movlpd */
3341 if (mod != 3) {
3342 gen_lea_modrm(env, s, modrm);
3343 gen_stq_env_A0(s, offsetof(CPUX86State,
3344 xmm_regs[reg].ZMM_Q(0)));
3345 } else {
3346 goto illegal_op;
3347 }
3348 break;
3349 case 0x017: /* movhps */
3350 case 0x117: /* movhpd */
3351 if (mod != 3) {
3352 gen_lea_modrm(env, s, modrm);
3353 gen_stq_env_A0(s, offsetof(CPUX86State,
3354 xmm_regs[reg].ZMM_Q(1)));
3355 } else {
3356 goto illegal_op;
3357 }
3358 break;
3359 case 0x71: /* shift mm, im */
3360 case 0x72:
3361 case 0x73:
3362 case 0x171: /* shift xmm, im */
3363 case 0x172:
3364 case 0x173:
3365 if (b1 >= 2) {
3366 goto unknown_op;
3367 }
3368 val = cpu_ldub_code(env, s->pc++);
3369 if (is_xmm) {
3370 tcg_gen_movi_tl(cpu_T0, val);
3371 tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
3372 tcg_gen_movi_tl(cpu_T0, 0);
3373 tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(1)));
3374 op1_offset = offsetof(CPUX86State,xmm_t0);
3375 } else {
3376 tcg_gen_movi_tl(cpu_T0, val);
3377 tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(0)));
3378 tcg_gen_movi_tl(cpu_T0, 0);
3379 tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(1)));
3380 op1_offset = offsetof(CPUX86State,mmx_t0);
3381 }
3382 sse_fn_epp = sse_op_table2[((b - 1) & 3) * 8 +
3383 (((modrm >> 3)) & 7)][b1];
3384 if (!sse_fn_epp) {
3385 goto unknown_op;
3386 }
3387 if (is_xmm) {
3388 rm = (modrm & 7) | REX_B(s);
3389 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3390 } else {
3391 rm = (modrm & 7);
3392 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3393 }
3394 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
3395 tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op1_offset);
3396 sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
3397 break;
3398 case 0x050: /* movmskps */
3399 rm = (modrm & 7) | REX_B(s);
3400 tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
3401 offsetof(CPUX86State,xmm_regs[rm]));
3402 gen_helper_movmskps(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3403 tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
3404 break;
3405 case 0x150: /* movmskpd */
3406 rm = (modrm & 7) | REX_B(s);
3407 tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
3408 offsetof(CPUX86State,xmm_regs[rm]));
3409 gen_helper_movmskpd(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3410 tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
3411 break;
3412 case 0x02a: /* cvtpi2ps */
3413 case 0x12a: /* cvtpi2pd */
3414 gen_helper_enter_mmx(cpu_env);
3415 if (mod != 3) {
3416 gen_lea_modrm(env, s, modrm);
3417 op2_offset = offsetof(CPUX86State,mmx_t0);
3418 gen_ldq_env_A0(s, op2_offset);
3419 } else {
3420 rm = (modrm & 7);
3421 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3422 }
3423 op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3424 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3425 tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3426 switch(b >> 8) {
3427 case 0x0:
3428 gen_helper_cvtpi2ps(cpu_env, cpu_ptr0, cpu_ptr1);
3429 break;
3430 default:
3431 case 0x1:
3432 gen_helper_cvtpi2pd(cpu_env, cpu_ptr0, cpu_ptr1);
3433 break;
3434 }
3435 break;
3436 case 0x22a: /* cvtsi2ss */
3437 case 0x32a: /* cvtsi2sd */
3438 ot = mo_64_32(s->dflag);
3439 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3440 op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3441 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3442 if (ot == MO_32) {
3443 SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1];
3444 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
3445 sse_fn_epi(cpu_env, cpu_ptr0, cpu_tmp2_i32);
3446 } else {
3447 #ifdef TARGET_X86_64
3448 SSEFunc_0_epl sse_fn_epl = sse_op_table3aq[(b >> 8) & 1];
3449 sse_fn_epl(cpu_env, cpu_ptr0, cpu_T0);
3450 #else
3451 goto illegal_op;
3452 #endif
3453 }
3454 break;
3455 case 0x02c: /* cvttps2pi */
3456 case 0x12c: /* cvttpd2pi */
3457 case 0x02d: /* cvtps2pi */
3458 case 0x12d: /* cvtpd2pi */
3459 gen_helper_enter_mmx(cpu_env);
3460 if (mod != 3) {
3461 gen_lea_modrm(env, s, modrm);
3462 op2_offset = offsetof(CPUX86State,xmm_t0);
3463 gen_ldo_env_A0(s, op2_offset);
3464 } else {
3465 rm = (modrm & 7) | REX_B(s);
3466 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3467 }
3468 op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
3469 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3470 tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3471 switch(b) {
3472 case 0x02c:
3473 gen_helper_cvttps2pi(cpu_env, cpu_ptr0, cpu_ptr1);
3474 break;
3475 case 0x12c:
3476 gen_helper_cvttpd2pi(cpu_env, cpu_ptr0, cpu_ptr1);
3477 break;
3478 case 0x02d:
3479 gen_helper_cvtps2pi(cpu_env, cpu_ptr0, cpu_ptr1);
3480 break;
3481 case 0x12d:
3482 gen_helper_cvtpd2pi(cpu_env, cpu_ptr0, cpu_ptr1);
3483 break;
3484 }
3485 break;
3486 case 0x22c: /* cvttss2si */
3487 case 0x32c: /* cvttsd2si */
3488 case 0x22d: /* cvtss2si */
3489 case 0x32d: /* cvtsd2si */
3490 ot = mo_64_32(s->dflag);
3491 if (mod != 3) {
3492 gen_lea_modrm(env, s, modrm);
3493 if ((b >> 8) & 1) {
3494 gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_Q(0)));
3495 } else {
3496 gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0);
3497 tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
3498 }
3499 op2_offset = offsetof(CPUX86State,xmm_t0);
3500 } else {
3501 rm = (modrm & 7) | REX_B(s);
3502 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3503 }
3504 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
3505 if (ot == MO_32) {
3506 SSEFunc_i_ep sse_fn_i_ep =
3507 sse_op_table3bi[((b >> 7) & 2) | (b & 1)];
3508 sse_fn_i_ep(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3509 tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
3510 } else {
3511 #ifdef TARGET_X86_64
3512 SSEFunc_l_ep sse_fn_l_ep =
3513 sse_op_table3bq[((b >> 7) & 2) | (b & 1)];
3514 sse_fn_l_ep(cpu_T0, cpu_env, cpu_ptr0);
3515 #else
3516 goto illegal_op;
3517 #endif
3518 }
3519 gen_op_mov_reg_v(ot, reg, cpu_T0);
3520 break;
3521 case 0xc4: /* pinsrw */
3522 case 0x1c4:
3523 s->rip_offset = 1;
3524 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
3525 val = cpu_ldub_code(env, s->pc++);
3526 if (b1) {
3527 val &= 7;
3528 tcg_gen_st16_tl(cpu_T0, cpu_env,
3529 offsetof(CPUX86State,xmm_regs[reg].ZMM_W(val)));
3530 } else {
3531 val &= 3;
3532 tcg_gen_st16_tl(cpu_T0, cpu_env,
3533 offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
3534 }
3535 break;
3536 case 0xc5: /* pextrw */
3537 case 0x1c5:
3538 if (mod != 3)
3539 goto illegal_op;
3540 ot = mo_64_32(s->dflag);
3541 val = cpu_ldub_code(env, s->pc++);
3542 if (b1) {
3543 val &= 7;
3544 rm = (modrm & 7) | REX_B(s);
3545 tcg_gen_ld16u_tl(cpu_T0, cpu_env,
3546 offsetof(CPUX86State,xmm_regs[rm].ZMM_W(val)));
3547 } else {
3548 val &= 3;
3549 rm = (modrm & 7);
3550 tcg_gen_ld16u_tl(cpu_T0, cpu_env,
3551 offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
3552 }
3553 reg = ((modrm >> 3) & 7) | rex_r;
3554 gen_op_mov_reg_v(ot, reg, cpu_T0);
3555 break;
3556 case 0x1d6: /* movq ea, xmm */
3557 if (mod != 3) {
3558 gen_lea_modrm(env, s, modrm);
3559 gen_stq_env_A0(s, offsetof(CPUX86State,
3560 xmm_regs[reg].ZMM_Q(0)));
3561 } else {
3562 rm = (modrm & 7) | REX_B(s);
3563 gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)),
3564 offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3565 gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
3566 }
3567 break;
3568 case 0x2d6: /* movq2dq */
3569 gen_helper_enter_mmx(cpu_env);
3570 rm = (modrm & 7);
3571 gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3572 offsetof(CPUX86State,fpregs[rm].mmx));
3573 gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)));
3574 break;
3575 case 0x3d6: /* movdq2q */
3576 gen_helper_enter_mmx(cpu_env);
3577 rm = (modrm & 7) | REX_B(s);
3578 gen_op_movq(offsetof(CPUX86State,fpregs[reg & 7].mmx),
3579 offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3580 break;
3581 case 0xd7: /* pmovmskb */
3582 case 0x1d7:
3583 if (mod != 3)
3584 goto illegal_op;
3585 if (b1) {
3586 rm = (modrm & 7) | REX_B(s);
3587 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,xmm_regs[rm]));
3588 gen_helper_pmovmskb_xmm(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3589 } else {
3590 rm = (modrm & 7);
3591 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,fpregs[rm].mmx));
3592 gen_helper_pmovmskb_mmx(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3593 }
3594 reg = ((modrm >> 3) & 7) | rex_r;
3595 tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
3596 break;
3597
3598 case 0x138:
3599 case 0x038:
3600 b = modrm;
3601 if ((b & 0xf0) == 0xf0) {
3602 goto do_0f_38_fx;
3603 }
3604 modrm = cpu_ldub_code(env, s->pc++);
3605 rm = modrm & 7;
3606 reg = ((modrm >> 3) & 7) | rex_r;
3607 mod = (modrm >> 6) & 3;
3608 if (b1 >= 2) {
3609 goto unknown_op;
3610 }
3611
3612 sse_fn_epp = sse_op_table6[b].op[b1];
3613 if (!sse_fn_epp) {
3614 goto unknown_op;
3615 }
3616 if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask))
3617 goto illegal_op;
3618
3619 if (b1) {
3620 op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3621 if (mod == 3) {
3622 op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
3623 } else {
3624 op2_offset = offsetof(CPUX86State,xmm_t0);
3625 gen_lea_modrm(env, s, modrm);
3626 switch (b) {
3627 case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
3628 case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
3629 case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */
3630 gen_ldq_env_A0(s, op2_offset +
3631 offsetof(ZMMReg, ZMM_Q(0)));
3632 break;
3633 case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
3634 case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
3635 tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
3636 s->mem_index, MO_LEUL);
3637 tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, op2_offset +
3638 offsetof(ZMMReg, ZMM_L(0)));
3639 break;
3640 case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
3641 tcg_gen_qemu_ld_tl(cpu_tmp0, cpu_A0,
3642 s->mem_index, MO_LEUW);
3643 tcg_gen_st16_tl(cpu_tmp0, cpu_env, op2_offset +
3644 offsetof(ZMMReg, ZMM_W(0)));
3645 break;
3646 case 0x2a: /* movntqda */
3647 gen_ldo_env_A0(s, op1_offset);
3648 return;
3649 default:
3650 gen_ldo_env_A0(s, op2_offset);
3651 }
3652 }
3653 } else {
3654 op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
3655 if (mod == 3) {
3656 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3657 } else {
3658 op2_offset = offsetof(CPUX86State,mmx_t0);
3659 gen_lea_modrm(env, s, modrm);
3660 gen_ldq_env_A0(s, op2_offset);
3661 }
3662 }
3663 if (sse_fn_epp == SSE_SPECIAL) {
3664 goto unknown_op;
3665 }
3666
3667 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3668 tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3669 sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
3670
3671 if (b == 0x17) {
3672 set_cc_op(s, CC_OP_EFLAGS);
3673 }
3674 break;
3675
3676 case 0x238:
3677 case 0x338:
3678 do_0f_38_fx:
3679 /* Various integer extensions at 0f 38 f[0-f]. */
3680 b = modrm | (b1 << 8);
3681 modrm = cpu_ldub_code(env, s->pc++);
3682 reg = ((modrm >> 3) & 7) | rex_r;
3683
3684 switch (b) {
3685 case 0x3f0: /* crc32 Gd,Eb */
3686 case 0x3f1: /* crc32 Gd,Ey */
3687 do_crc32:
3688 if (!(s->cpuid_ext_features & CPUID_EXT_SSE42)) {
3689 goto illegal_op;
3690 }
3691 if ((b & 0xff) == 0xf0) {
3692 ot = MO_8;
3693 } else if (s->dflag != MO_64) {
3694 ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3695 } else {
3696 ot = MO_64;
3697 }
3698
3699 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[reg]);
3700 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3701 gen_helper_crc32(cpu_T0, cpu_tmp2_i32,
3702 cpu_T0, tcg_const_i32(8 << ot));
3703
3704 ot = mo_64_32(s->dflag);
3705 gen_op_mov_reg_v(ot, reg, cpu_T0);
3706 break;
3707
3708 case 0x1f0: /* crc32 or movbe */
3709 case 0x1f1:
3710 /* For these insns, the f3 prefix is supposed to have priority
3711 over the 66 prefix, but that's not what we implement above
3712 setting b1. */
3713 if (s->prefix & PREFIX_REPNZ) {
3714 goto do_crc32;
3715 }
3716 /* FALLTHRU */
3717 case 0x0f0: /* movbe Gy,My */
3718 case 0x0f1: /* movbe My,Gy */
3719 if (!(s->cpuid_ext_features & CPUID_EXT_MOVBE)) {
3720 goto illegal_op;
3721 }
3722 if (s->dflag != MO_64) {
3723 ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3724 } else {
3725 ot = MO_64;
3726 }
3727
3728 gen_lea_modrm(env, s, modrm);
3729 if ((b & 1) == 0) {
3730 tcg_gen_qemu_ld_tl(cpu_T0, cpu_A0,
3731 s->mem_index, ot | MO_BE);
3732 gen_op_mov_reg_v(ot, reg, cpu_T0);
3733 } else {
3734 tcg_gen_qemu_st_tl(cpu_regs[reg], cpu_A0,
3735 s->mem_index, ot | MO_BE);
3736 }
3737 break;
3738
3739 case 0x0f2: /* andn Gy, By, Ey */
3740 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3741 || !(s->prefix & PREFIX_VEX)
3742 || s->vex_l != 0) {
3743 goto illegal_op;
3744 }
3745 ot = mo_64_32(s->dflag);
3746 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3747 tcg_gen_andc_tl(cpu_T0, cpu_regs[s->vex_v], cpu_T0);
3748 gen_op_mov_reg_v(ot, reg, cpu_T0);
3749 gen_op_update1_cc();
3750 set_cc_op(s, CC_OP_LOGICB + ot);
3751 break;
3752
3753 case 0x0f7: /* bextr Gy, Ey, By */
3754 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3755 || !(s->prefix & PREFIX_VEX)
3756 || s->vex_l != 0) {
3757 goto illegal_op;
3758 }
3759 ot = mo_64_32(s->dflag);
3760 {
3761 TCGv bound, zero;
3762
3763 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3764 /* Extract START, and shift the operand.
3765 Shifts larger than operand size get zeros. */
3766 tcg_gen_ext8u_tl(cpu_A0, cpu_regs[s->vex_v]);
3767 tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_A0);
3768
3769 bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3770 zero = tcg_const_tl(0);
3771 tcg_gen_movcond_tl(TCG_COND_LEU, cpu_T0, cpu_A0, bound,
3772 cpu_T0, zero);
3773 tcg_temp_free(zero);
3774
3775 /* Extract the LEN into a mask. Lengths larger than
3776 operand size get all ones. */
3777 tcg_gen_extract_tl(cpu_A0, cpu_regs[s->vex_v], 8, 8);
3778 tcg_gen_movcond_tl(TCG_COND_LEU, cpu_A0, cpu_A0, bound,
3779 cpu_A0, bound);
3780 tcg_temp_free(bound);
3781 tcg_gen_movi_tl(cpu_T1, 1);
3782 tcg_gen_shl_tl(cpu_T1, cpu_T1, cpu_A0);
3783 tcg_gen_subi_tl(cpu_T1, cpu_T1, 1);
3784 tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
3785
3786 gen_op_mov_reg_v(ot, reg, cpu_T0);
3787 gen_op_update1_cc();
3788 set_cc_op(s, CC_OP_LOGICB + ot);
3789 }
3790 break;
3791
3792 case 0x0f5: /* bzhi Gy, Ey, By */
3793 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3794 || !(s->prefix & PREFIX_VEX)
3795 || s->vex_l != 0) {
3796 goto illegal_op;
3797 }
3798 ot = mo_64_32(s->dflag);
3799 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3800 tcg_gen_ext8u_tl(cpu_T1, cpu_regs[s->vex_v]);
3801 {
3802 TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3803 /* Note that since we're using BMILG (in order to get O
3804 cleared) we need to store the inverse into C. */
3805 tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src,
3806 cpu_T1, bound);
3807 tcg_gen_movcond_tl(TCG_COND_GT, cpu_T1, cpu_T1,
3808 bound, bound, cpu_T1);
3809 tcg_temp_free(bound);
3810 }
3811 tcg_gen_movi_tl(cpu_A0, -1);
3812 tcg_gen_shl_tl(cpu_A0, cpu_A0, cpu_T1);
3813 tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_A0);
3814 gen_op_mov_reg_v(ot, reg, cpu_T0);
3815 gen_op_update1_cc();
3816 set_cc_op(s, CC_OP_BMILGB + ot);
3817 break;
3818
3819 case 0x3f6: /* mulx By, Gy, rdx, Ey */
3820 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3821 || !(s->prefix & PREFIX_VEX)
3822 || s->vex_l != 0) {
3823 goto illegal_op;
3824 }
3825 ot = mo_64_32(s->dflag);
3826 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3827 switch (ot) {
3828 default:
3829 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
3830 tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EDX]);
3831 tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
3832 cpu_tmp2_i32, cpu_tmp3_i32);
3833 tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], cpu_tmp2_i32);
3834 tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp3_i32);
3835 break;
3836 #ifdef TARGET_X86_64
3837 case MO_64:
3838 tcg_gen_mulu2_i64(cpu_T0, cpu_T1,
3839 cpu_T0, cpu_regs[R_EDX]);
3840 tcg_gen_mov_i64(cpu_regs[s->vex_v], cpu_T0);
3841 tcg_gen_mov_i64(cpu_regs[reg], cpu_T1);
3842 break;
3843 #endif
3844 }
3845 break;
3846
3847 case 0x3f5: /* pdep Gy, By, Ey */
3848 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3849 || !(s->prefix & PREFIX_VEX)
3850 || s->vex_l != 0) {
3851 goto illegal_op;
3852 }
3853 ot = mo_64_32(s->dflag);
3854 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3855 /* Note that by zero-extending the mask operand, we
3856 automatically handle zero-extending the result. */
3857 if (ot == MO_64) {
3858 tcg_gen_mov_tl(cpu_T1, cpu_regs[s->vex_v]);
3859 } else {
3860 tcg_gen_ext32u_tl(cpu_T1, cpu_regs[s->vex_v]);
3861 }
3862 gen_helper_pdep(cpu_regs[reg], cpu_T0, cpu_T1);
3863 break;
3864
3865 case 0x2f5: /* pext Gy, By, Ey */
3866 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3867 || !(s->prefix & PREFIX_VEX)
3868 || s->vex_l != 0) {
3869 goto illegal_op;
3870 }
3871 ot = mo_64_32(s->dflag);
3872 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3873 /* Note that by zero-extending the mask operand, we
3874 automatically handle zero-extending the result. */
3875 if (ot == MO_64) {
3876 tcg_gen_mov_tl(cpu_T1, cpu_regs[s->vex_v]);
3877 } else {
3878 tcg_gen_ext32u_tl(cpu_T1, cpu_regs[s->vex_v]);
3879 }
3880 gen_helper_pext(cpu_regs[reg], cpu_T0, cpu_T1);
3881 break;
3882
3883 case 0x1f6: /* adcx Gy, Ey */
3884 case 0x2f6: /* adox Gy, Ey */
3885 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX)) {
3886 goto illegal_op;
3887 } else {
3888 TCGv carry_in, carry_out, zero;
3889 int end_op;
3890
3891 ot = mo_64_32(s->dflag);
3892 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3893
3894 /* Re-use the carry-out from a previous round. */
3895 TCGV_UNUSED(carry_in);
3896 carry_out = (b == 0x1f6 ? cpu_cc_dst : cpu_cc_src2);
3897 switch (s->cc_op) {
3898 case CC_OP_ADCX:
3899 if (b == 0x1f6) {
3900 carry_in = cpu_cc_dst;
3901 end_op = CC_OP_ADCX;
3902 } else {
3903 end_op = CC_OP_ADCOX;
3904 }
3905 break;
3906 case CC_OP_ADOX:
3907 if (b == 0x1f6) {
3908 end_op = CC_OP_ADCOX;
3909 } else {
3910 carry_in = cpu_cc_src2;
3911 end_op = CC_OP_ADOX;
3912 }
3913 break;
3914 case CC_OP_ADCOX:
3915 end_op = CC_OP_ADCOX;
3916 carry_in = carry_out;
3917 break;
3918 default:
3919 end_op = (b == 0x1f6 ? CC_OP_ADCX : CC_OP_ADOX);
3920 break;
3921 }
3922 /* If we can't reuse carry-out, get it out of EFLAGS. */
3923 if (TCGV_IS_UNUSED(carry_in)) {
3924 if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
3925 gen_compute_eflags(s);
3926 }
3927 carry_in = cpu_tmp0;
3928 tcg_gen_extract_tl(carry_in, cpu_cc_src,
3929 ctz32(b == 0x1f6 ? CC_C : CC_O), 1);
3930 }
3931
3932 switch (ot) {
3933 #ifdef TARGET_X86_64
3934 case MO_32:
3935 /* If we know TL is 64-bit, and we want a 32-bit
3936 result, just do everything in 64-bit arithmetic. */
3937 tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]);
3938 tcg_gen_ext32u_i64(cpu_T0, cpu_T0);
3939 tcg_gen_add_i64(cpu_T0, cpu_T0, cpu_regs[reg]);
3940 tcg_gen_add_i64(cpu_T0, cpu_T0, carry_in);
3941 tcg_gen_ext32u_i64(cpu_regs[reg], cpu_T0);
3942 tcg_gen_shri_i64(carry_out, cpu_T0, 32);
3943 break;
3944 #endif
3945 default:
3946 /* Otherwise compute the carry-out in two steps. */
3947 zero = tcg_const_tl(0);
3948 tcg_gen_add2_tl(cpu_T0, carry_out,
3949 cpu_T0, zero,
3950 carry_in, zero);
3951 tcg_gen_add2_tl(cpu_regs[reg], carry_out,
3952 cpu_regs[reg], carry_out,
3953 cpu_T0, zero);
3954 tcg_temp_free(zero);
3955 break;
3956 }
3957 set_cc_op(s, end_op);
3958 }
3959 break;
3960
3961 case 0x1f7: /* shlx Gy, Ey, By */
3962 case 0x2f7: /* sarx Gy, Ey, By */
3963 case 0x3f7: /* shrx Gy, Ey, By */
3964 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3965 || !(s->prefix & PREFIX_VEX)
3966 || s->vex_l != 0) {
3967 goto illegal_op;
3968 }
3969 ot = mo_64_32(s->dflag);
3970 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3971 if (ot == MO_64) {
3972 tcg_gen_andi_tl(cpu_T1, cpu_regs[s->vex_v], 63);
3973 } else {
3974 tcg_gen_andi_tl(cpu_T1, cpu_regs[s->vex_v], 31);
3975 }
3976 if (b == 0x1f7) {
3977 tcg_gen_shl_tl(cpu_T0, cpu_T0, cpu_T1);
3978 } else if (b == 0x2f7) {
3979 if (ot != MO_64) {
3980 tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
3981 }
3982 tcg_gen_sar_tl(cpu_T0, cpu_T0, cpu_T1);
3983 } else {
3984 if (ot != MO_64) {
3985 tcg_gen_ext32u_tl(cpu_T0, cpu_T0);
3986 }
3987 tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_T1);
3988 }
3989 gen_op_mov_reg_v(ot, reg, cpu_T0);
3990 break;
3991
3992 case 0x0f3:
3993 case 0x1f3:
3994 case 0x2f3:
3995 case 0x3f3: /* Group 17 */
3996 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3997 || !(s->prefix & PREFIX_VEX)
3998 || s->vex_l != 0) {
3999 goto illegal_op;
4000 }
4001 ot = mo_64_32(s->dflag);
4002 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4003
4004 switch (reg & 7) {
4005 case 1: /* blsr By,Ey */
4006 tcg_gen_neg_tl(cpu_T1, cpu_T0);
4007 tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
4008 gen_op_mov_reg_v(ot, s->vex_v, cpu_T0);
4009 gen_op_update2_cc();
4010 set_cc_op(s, CC_OP_BMILGB + ot);
4011 break;
4012
4013 case 2: /* blsmsk By,Ey */
4014 tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
4015 tcg_gen_subi_tl(cpu_T0, cpu_T0, 1);
4016 tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_cc_src);
4017 tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
4018 set_cc_op(s, CC_OP_BMILGB + ot);
4019 break;
4020
4021 case 3: /* blsi By, Ey */
4022 tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
4023 tcg_gen_subi_tl(cpu_T0, cpu_T0, 1);
4024 tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_cc_src);
4025 tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
4026 set_cc_op(s, CC_OP_BMILGB + ot);
4027 break;
4028
4029 default:
4030 goto unknown_op;
4031 }
4032 break;
4033
4034 default:
4035 goto unknown_op;
4036 }
4037 break;
4038
4039 case 0x03a:
4040 case 0x13a:
4041 b = modrm;
4042 modrm = cpu_ldub_code(env, s->pc++);
4043 rm = modrm & 7;
4044 reg = ((modrm >> 3) & 7) | rex_r;
4045 mod = (modrm >> 6) & 3;
4046 if (b1 >= 2) {
4047 goto unknown_op;
4048 }
4049
4050 sse_fn_eppi = sse_op_table7[b].op[b1];
4051 if (!sse_fn_eppi) {
4052 goto unknown_op;
4053 }
4054 if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask))
4055 goto illegal_op;
4056
4057 if (sse_fn_eppi == SSE_SPECIAL) {
4058 ot = mo_64_32(s->dflag);
4059 rm = (modrm & 7) | REX_B(s);
4060 if (mod != 3)
4061 gen_lea_modrm(env, s, modrm);
4062 reg = ((modrm >> 3) & 7) | rex_r;
4063 val = cpu_ldub_code(env, s->pc++);
4064 switch (b) {
4065 case 0x14: /* pextrb */
4066 tcg_gen_ld8u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
4067 xmm_regs[reg].ZMM_B(val & 15)));
4068 if (mod == 3) {
4069 gen_op_mov_reg_v(ot, rm, cpu_T0);
4070 } else {
4071 tcg_gen_qemu_st_tl(cpu_T0, cpu_A0,
4072 s->mem_index, MO_UB);
4073 }
4074 break;
4075 case 0x15: /* pextrw */
4076 tcg_gen_ld16u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
4077 xmm_regs[reg].ZMM_W(val & 7)));
4078 if (mod == 3) {
4079 gen_op_mov_reg_v(ot, rm, cpu_T0);
4080 } else {
4081 tcg_gen_qemu_st_tl(cpu_T0, cpu_A0,
4082 s->mem_index, MO_LEUW);
4083 }
4084 break;
4085 case 0x16:
4086 if (ot == MO_32) { /* pextrd */
4087 tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
4088 offsetof(CPUX86State,
4089 xmm_regs[reg].ZMM_L(val & 3)));
4090 if (mod == 3) {
4091 tcg_gen_extu_i32_tl(cpu_regs[rm], cpu_tmp2_i32);
4092 } else {
4093 tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
4094 s->mem_index, MO_LEUL);
4095 }
4096 } else { /* pextrq */
4097 #ifdef TARGET_X86_64
4098 tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
4099 offsetof(CPUX86State,
4100 xmm_regs[reg].ZMM_Q(val & 1)));
4101 if (mod == 3) {
4102 tcg_gen_mov_i64(cpu_regs[rm], cpu_tmp1_i64);
4103 } else {
4104 tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
4105 s->mem_index, MO_LEQ);
4106 }
4107 #else
4108 goto illegal_op;
4109 #endif
4110 }
4111 break;
4112 case 0x17: /* extractps */
4113 tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
4114 xmm_regs[reg].ZMM_L(val & 3)));
4115 if (mod == 3) {
4116 gen_op_mov_reg_v(ot, rm, cpu_T0);
4117 } else {
4118 tcg_gen_qemu_st_tl(cpu_T0, cpu_A0,
4119 s->mem_index, MO_LEUL);
4120 }
4121 break;
4122 case 0x20: /* pinsrb */
4123 if (mod == 3) {
4124 gen_op_mov_v_reg(MO_32, cpu_T0, rm);
4125 } else {
4126 tcg_gen_qemu_ld_tl(cpu_T0, cpu_A0,
4127 s->mem_index, MO_UB);
4128 }
4129 tcg_gen_st8_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
4130 xmm_regs[reg].ZMM_B(val & 15)));
4131 break;
4132 case 0x21: /* insertps */
4133 if (mod == 3) {
4134 tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
4135 offsetof(CPUX86State,xmm_regs[rm]
4136 .ZMM_L((val >> 6) & 3)));
4137 } else {
4138 tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
4139 s->mem_index, MO_LEUL);
4140 }
4141 tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
4142 offsetof(CPUX86State,xmm_regs[reg]
4143 .ZMM_L((val >> 4) & 3)));
4144 if ((val >> 0) & 1)
4145 tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4146 cpu_env, offsetof(CPUX86State,
4147 xmm_regs[reg].ZMM_L(0)));
4148 if ((val >> 1) & 1)
4149 tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4150 cpu_env, offsetof(CPUX86State,
4151 xmm_regs[reg].ZMM_L(1)));
4152 if ((val >> 2) & 1)
4153 tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4154 cpu_env, offsetof(CPUX86State,
4155 xmm_regs[reg].ZMM_L(2)));
4156 if ((val >> 3) & 1)
4157 tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4158 cpu_env, offsetof(CPUX86State,
4159 xmm_regs[reg].ZMM_L(3)));
4160 break;
4161 case 0x22:
4162 if (ot == MO_32) { /* pinsrd */
4163 if (mod == 3) {
4164 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[rm]);
4165 } else {
4166 tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
4167 s->mem_index, MO_LEUL);
4168 }
4169 tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
4170 offsetof(CPUX86State,
4171 xmm_regs[reg].ZMM_L(val & 3)));
4172 } else { /* pinsrq */
4173 #ifdef TARGET_X86_64
4174 if (mod == 3) {
4175 gen_op_mov_v_reg(ot, cpu_tmp1_i64, rm);
4176 } else {
4177 tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
4178 s->mem_index, MO_LEQ);
4179 }
4180 tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
4181 offsetof(CPUX86State,
4182 xmm_regs[reg].ZMM_Q(val & 1)));
4183 #else
4184 goto illegal_op;
4185 #endif
4186 }
4187 break;
4188 }
4189 return;
4190 }
4191
4192 if (b1) {
4193 op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4194 if (mod == 3) {
4195 op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
4196 } else {
4197 op2_offset = offsetof(CPUX86State,xmm_t0);
4198 gen_lea_modrm(env, s, modrm);
4199 gen_ldo_env_A0(s, op2_offset);
4200 }
4201 } else {
4202 op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4203 if (mod == 3) {
4204 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4205 } else {
4206 op2_offset = offsetof(CPUX86State,mmx_t0);
4207 gen_lea_modrm(env, s, modrm);
4208 gen_ldq_env_A0(s, op2_offset);
4209 }
4210 }
4211 val = cpu_ldub_code(env, s->pc++);
4212
4213 if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
4214 set_cc_op(s, CC_OP_EFLAGS);
4215
4216 if (s->dflag == MO_64) {
4217 /* The helper must use entire 64-bit gp registers */
4218 val |= 1 << 8;
4219 }
4220 }
4221
4222 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4223 tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4224 sse_fn_eppi(cpu_env, cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
4225 break;
4226
4227 case 0x33a:
4228 /* Various integer extensions at 0f 3a f[0-f]. */
4229 b = modrm | (b1 << 8);
4230 modrm = cpu_ldub_code(env, s->pc++);
4231 reg = ((modrm >> 3) & 7) | rex_r;
4232
4233 switch (b) {
4234 case 0x3f0: /* rorx Gy,Ey, Ib */
4235 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4236 || !(s->prefix & PREFIX_VEX)
4237 || s->vex_l != 0) {
4238 goto illegal_op;
4239 }
4240 ot = mo_64_32(s->dflag);
4241 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4242 b = cpu_ldub_code(env, s->pc++);
4243 if (ot == MO_64) {
4244 tcg_gen_rotri_tl(cpu_T0, cpu_T0, b & 63);
4245 } else {
4246 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
4247 tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, b & 31);
4248 tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
4249 }
4250 gen_op_mov_reg_v(ot, reg, cpu_T0);
4251 break;
4252
4253 default:
4254 goto unknown_op;
4255 }
4256 break;
4257
4258 default:
4259 unknown_op:
4260 gen_unknown_opcode(env, s);
4261 return;
4262 }
4263 } else {
4264 /* generic MMX or SSE operation */
4265 switch(b) {
4266 case 0x70: /* pshufx insn */
4267 case 0xc6: /* pshufx insn */
4268 case 0xc2: /* compare insns */
4269 s->rip_offset = 1;
4270 break;
4271 default:
4272 break;
4273 }
4274 if (is_xmm) {
4275 op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4276 if (mod != 3) {
4277 int sz = 4;
4278
4279 gen_lea_modrm(env, s, modrm);
4280 op2_offset = offsetof(CPUX86State,xmm_t0);
4281
4282 switch (b) {
4283 case 0x50 ... 0x5a:
4284 case 0x5c ... 0x5f:
4285 case 0xc2:
4286 /* Most sse scalar operations. */
4287 if (b1 == 2) {
4288 sz = 2;
4289 } else if (b1 == 3) {
4290 sz = 3;
4291 }
4292 break;
4293
4294 case 0x2e: /* ucomis[sd] */
4295 case 0x2f: /* comis[sd] */
4296 if (b1 == 0) {
4297 sz = 2;
4298 } else {
4299 sz = 3;
4300 }
4301 break;
4302 }
4303
4304 switch (sz) {
4305 case 2:
4306 /* 32 bit access */
4307 gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0);
4308 tcg_gen_st32_tl(cpu_T0, cpu_env,
4309 offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
4310 break;
4311 case 3:
4312 /* 64 bit access */
4313 gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_D(0)));
4314 break;
4315 default:
4316 /* 128 bit access */
4317 gen_ldo_env_A0(s, op2_offset);
4318 break;
4319 }
4320 } else {
4321 rm = (modrm & 7) | REX_B(s);
4322 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
4323 }
4324 } else {
4325 op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4326 if (mod != 3) {
4327 gen_lea_modrm(env, s, modrm);
4328 op2_offset = offsetof(CPUX86State,mmx_t0);
4329 gen_ldq_env_A0(s, op2_offset);
4330 } else {
4331 rm = (modrm & 7);
4332 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4333 }
4334 }
4335 switch(b) {
4336 case 0x0f: /* 3DNow! data insns */
4337 val = cpu_ldub_code(env, s->pc++);
4338 sse_fn_epp = sse_op_table5[val];
4339 if (!sse_fn_epp) {
4340 goto unknown_op;
4341 }
4342 if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
4343 goto illegal_op;
4344 }
4345 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4346 tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4347 sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
4348 break;
4349 case 0x70: /* pshufx insn */
4350 case 0xc6: /* pshufx insn */
4351 val = cpu_ldub_code(env, s->pc++);
4352 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4353 tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4354 /* XXX: introduce a new table? */
4355 sse_fn_ppi = (SSEFunc_0_ppi)sse_fn_epp;
4356 sse_fn_ppi(cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
4357 break;
4358 case 0xc2:
4359 /* compare insns */
4360 val = cpu_ldub_code(env, s->pc++);
4361 if (val >= 8)
4362 goto unknown_op;
4363 sse_fn_epp = sse_op_table4[val][b1];
4364
4365 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4366 tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4367 sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
4368 break;
4369 case 0xf7:
4370 /* maskmov : we must prepare A0 */
4371 if (mod != 3)
4372 goto illegal_op;
4373 tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EDI]);
4374 gen_extu(s->aflag, cpu_A0);
4375 gen_add_A0_ds_seg(s);
4376
4377 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4378 tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4379 /* XXX: introduce a new table? */
4380 sse_fn_eppt = (SSEFunc_0_eppt)sse_fn_epp;
4381 sse_fn_eppt(cpu_env, cpu_ptr0, cpu_ptr1, cpu_A0);
4382 break;
4383 default:
4384 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4385 tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4386 sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
4387 break;
4388 }
4389 if (b == 0x2e || b == 0x2f) {
4390 set_cc_op(s, CC_OP_EFLAGS);
4391 }
4392 }
4393 }
4394
4395 /* convert one instruction. s->is_jmp is set if the translation must
4396 be stopped. Return the next pc value */
4397 static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
4398 target_ulong pc_start)
4399 {
4400 int b, prefixes;
4401 int shift;
4402 TCGMemOp ot, aflag, dflag;
4403 int modrm, reg, rm, mod, op, opreg, val;
4404 target_ulong next_eip, tval;
4405 int rex_w, rex_r;
4406
4407 s->pc_start = s->pc = pc_start;
4408 prefixes = 0;
4409 s->override = -1;
4410 rex_w = -1;
4411 rex_r = 0;
4412 #ifdef TARGET_X86_64
4413 s->rex_x = 0;
4414 s->rex_b = 0;
4415 x86_64_hregs = 0;
4416 #endif
4417 s->rip_offset = 0; /* for relative ip address */
4418 s->vex_l = 0;
4419 s->vex_v = 0;
4420 next_byte:
4421 /* x86 has an upper limit of 15 bytes for an instruction. Since we
4422 * do not want to decode and generate IR for an illegal
4423 * instruction, the following check limits the instruction size to
4424 * 25 bytes: 14 prefix + 1 opc + 6 (modrm+sib+ofs) + 4 imm */
4425 if (s->pc - pc_start > 14) {
4426 goto illegal_op;
4427 }
4428 b = cpu_ldub_code(env, s->pc);
4429 s->pc++;
4430 /* Collect prefixes. */
4431 switch (b) {
4432 case 0xf3:
4433 prefixes |= PREFIX_REPZ;
4434 goto next_byte;
4435 case 0xf2:
4436 prefixes |= PREFIX_REPNZ;
4437 goto next_byte;
4438 case 0xf0:
4439 prefixes |= PREFIX_LOCK;
4440 goto next_byte;
4441 case 0x2e:
4442 s->override = R_CS;
4443 goto next_byte;
4444 case 0x36:
4445 s->override = R_SS;
4446 goto next_byte;
4447 case 0x3e:
4448 s->override = R_DS;
4449 goto next_byte;
4450 case 0x26:
4451 s->override = R_ES;
4452 goto next_byte;
4453 case 0x64:
4454 s->override = R_FS;
4455 goto next_byte;
4456 case 0x65:
4457 s->override = R_GS;
4458 goto next_byte;
4459 case 0x66:
4460 prefixes |= PREFIX_DATA;
4461 goto next_byte;
4462 case 0x67:
4463 prefixes |= PREFIX_ADR;
4464 goto next_byte;
4465 #ifdef TARGET_X86_64
4466 case 0x40 ... 0x4f:
4467 if (CODE64(s)) {
4468 /* REX prefix */
4469 rex_w = (b >> 3) & 1;
4470 rex_r = (b & 0x4) << 1;
4471 s->rex_x = (b & 0x2) << 2;
4472 REX_B(s) = (b & 0x1) << 3;
4473 x86_64_hregs = 1; /* select uniform byte register addressing */
4474 goto next_byte;
4475 }
4476 break;
4477 #endif
4478 case 0xc5: /* 2-byte VEX */
4479 case 0xc4: /* 3-byte VEX */
4480 /* VEX prefixes cannot be used except in 32-bit mode.
4481 Otherwise the instruction is LES or LDS. */
4482 if (s->code32 && !s->vm86) {
4483 static const int pp_prefix[4] = {
4484 0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
4485 };
4486 int vex3, vex2 = cpu_ldub_code(env, s->pc);
4487
4488 if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
4489 /* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
4490 otherwise the instruction is LES or LDS. */
4491 break;
4492 }
4493 s->pc++;
4494
4495 /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
4496 if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ
4497 | PREFIX_LOCK | PREFIX_DATA)) {
4498 goto illegal_op;
4499 }
4500 #ifdef TARGET_X86_64
4501 if (x86_64_hregs) {
4502 goto illegal_op;
4503 }
4504 #endif
4505 rex_r = (~vex2 >> 4) & 8;
4506 if (b == 0xc5) {
4507 vex3 = vex2;
4508 b = cpu_ldub_code(env, s->pc++);
4509 } else {
4510 #ifdef TARGET_X86_64
4511 s->rex_x = (~vex2 >> 3) & 8;
4512 s->rex_b = (~vex2 >> 2) & 8;
4513 #endif
4514 vex3 = cpu_ldub_code(env, s->pc++);
4515 rex_w = (vex3 >> 7) & 1;
4516 switch (vex2 & 0x1f) {
4517 case 0x01: /* Implied 0f leading opcode bytes. */
4518 b = cpu_ldub_code(env, s->pc++) | 0x100;
4519 break;
4520 case 0x02: /* Implied 0f 38 leading opcode bytes. */
4521 b = 0x138;
4522 break;
4523 case 0x03: /* Implied 0f 3a leading opcode bytes. */
4524 b = 0x13a;
4525 break;
4526 default: /* Reserved for future use. */
4527 goto unknown_op;
4528 }
4529 }
4530 s->vex_v = (~vex3 >> 3) & 0xf;
4531 s->vex_l = (vex3 >> 2) & 1;
4532 prefixes |= pp_prefix[vex3 & 3] | PREFIX_VEX;
4533 }
4534 break;
4535 }
4536
4537 /* Post-process prefixes. */
4538 if (CODE64(s)) {
4539 /* In 64-bit mode, the default data size is 32-bit. Select 64-bit
4540 data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
4541 over 0x66 if both are present. */
4542 dflag = (rex_w > 0 ? MO_64 : prefixes & PREFIX_DATA ? MO_16 : MO_32);
4543 /* In 64-bit mode, 0x67 selects 32-bit addressing. */
4544 aflag = (prefixes & PREFIX_ADR ? MO_32 : MO_64);
4545 } else {
4546 /* In 16/32-bit mode, 0x66 selects the opposite data size. */
4547 if (s->code32 ^ ((prefixes & PREFIX_DATA) != 0)) {
4548 dflag = MO_32;
4549 } else {
4550 dflag = MO_16;
4551 }
4552 /* In 16/32-bit mode, 0x67 selects the opposite addressing. */
4553 if (s->code32 ^ ((prefixes & PREFIX_ADR) != 0)) {
4554 aflag = MO_32;
4555 } else {
4556 aflag = MO_16;
4557 }
4558 }
4559
4560 s->prefix = prefixes;
4561 s->aflag = aflag;
4562 s->dflag = dflag;
4563
4564 /* now check op code */
4565 reswitch:
4566 switch(b) {
4567 case 0x0f:
4568 /**************************/
4569 /* extended op code */
4570 b = cpu_ldub_code(env, s->pc++) | 0x100;
4571 goto reswitch;
4572
4573 /**************************/
4574 /* arith & logic */
4575 case 0x00 ... 0x05:
4576 case 0x08 ... 0x0d:
4577 case 0x10 ... 0x15:
4578 case 0x18 ... 0x1d:
4579 case 0x20 ... 0x25:
4580 case 0x28 ... 0x2d:
4581 case 0x30 ... 0x35:
4582 case 0x38 ... 0x3d:
4583 {
4584 int op, f, val;
4585 op = (b >> 3) & 7;
4586 f = (b >> 1) & 3;
4587
4588 ot = mo_b_d(b, dflag);
4589
4590 switch(f) {
4591 case 0: /* OP Ev, Gv */
4592 modrm = cpu_ldub_code(env, s->pc++);
4593 reg = ((modrm >> 3) & 7) | rex_r;
4594 mod = (modrm >> 6) & 3;
4595 rm = (modrm & 7) | REX_B(s);
4596 if (mod != 3) {
4597 gen_lea_modrm(env, s, modrm);
4598 opreg = OR_TMP0;
4599 } else if (op == OP_XORL && rm == reg) {
4600 xor_zero:
4601 /* xor reg, reg optimisation */
4602 set_cc_op(s, CC_OP_CLR);
4603 tcg_gen_movi_tl(cpu_T0, 0);
4604 gen_op_mov_reg_v(ot, reg, cpu_T0);
4605 break;
4606 } else {
4607 opreg = rm;
4608 }
4609 gen_op_mov_v_reg(ot, cpu_T1, reg);
4610 gen_op(s, op, ot, opreg);
4611 break;
4612 case 1: /* OP Gv, Ev */
4613 modrm = cpu_ldub_code(env, s->pc++);
4614 mod = (modrm >> 6) & 3;
4615 reg = ((modrm >> 3) & 7) | rex_r;
4616 rm = (modrm & 7) | REX_B(s);
4617 if (mod != 3) {
4618 gen_lea_modrm(env, s, modrm);
4619 gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
4620 } else if (op == OP_XORL && rm == reg) {
4621 goto xor_zero;
4622 } else {
4623 gen_op_mov_v_reg(ot, cpu_T1, rm);
4624 }
4625 gen_op(s, op, ot, reg);
4626 break;
4627 case 2: /* OP A, Iv */
4628 val = insn_get(env, s, ot);
4629 tcg_gen_movi_tl(cpu_T1, val);
4630 gen_op(s, op, ot, OR_EAX);
4631 break;
4632 }
4633 }
4634 break;
4635
4636 case 0x82:
4637 if (CODE64(s))
4638 goto illegal_op;
4639 case 0x80: /* GRP1 */
4640 case 0x81:
4641 case 0x83:
4642 {
4643 int val;
4644
4645 ot = mo_b_d(b, dflag);
4646
4647 modrm = cpu_ldub_code(env, s->pc++);
4648 mod = (modrm >> 6) & 3;
4649 rm = (modrm & 7) | REX_B(s);
4650 op = (modrm >> 3) & 7;
4651
4652 if (mod != 3) {
4653 if (b == 0x83)
4654 s->rip_offset = 1;
4655 else
4656 s->rip_offset = insn_const_size(ot);
4657 gen_lea_modrm(env, s, modrm);
4658 opreg = OR_TMP0;
4659 } else {
4660 opreg = rm;
4661 }
4662
4663 switch(b) {
4664 default:
4665 case 0x80:
4666 case 0x81:
4667 case 0x82:
4668 val = insn_get(env, s, ot);
4669 break;
4670 case 0x83:
4671 val = (int8_t)insn_get(env, s, MO_8);
4672 break;
4673 }
4674 tcg_gen_movi_tl(cpu_T1, val);
4675 gen_op(s, op, ot, opreg);
4676 }
4677 break;
4678
4679 /**************************/
4680 /* inc, dec, and other misc arith */
4681 case 0x40 ... 0x47: /* inc Gv */
4682 ot = dflag;
4683 gen_inc(s, ot, OR_EAX + (b & 7), 1);
4684 break;
4685 case 0x48 ... 0x4f: /* dec Gv */
4686 ot = dflag;
4687 gen_inc(s, ot, OR_EAX + (b & 7), -1);
4688 break;
4689 case 0xf6: /* GRP3 */
4690 case 0xf7:
4691 ot = mo_b_d(b, dflag);
4692
4693 modrm = cpu_ldub_code(env, s->pc++);
4694 mod = (modrm >> 6) & 3;
4695 rm = (modrm & 7) | REX_B(s);
4696 op = (modrm >> 3) & 7;
4697 if (mod != 3) {
4698 if (op == 0) {
4699 s->rip_offset = insn_const_size(ot);
4700 }
4701 gen_lea_modrm(env, s, modrm);
4702 /* For those below that handle locked memory, don't load here. */
4703 if (!(s->prefix & PREFIX_LOCK)
4704 || op != 2) {
4705 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
4706 }
4707 } else {
4708 gen_op_mov_v_reg(ot, cpu_T0, rm);
4709 }
4710
4711 switch(op) {
4712 case 0: /* test */
4713 val = insn_get(env, s, ot);
4714 tcg_gen_movi_tl(cpu_T1, val);
4715 gen_op_testl_T0_T1_cc();
4716 set_cc_op(s, CC_OP_LOGICB + ot);
4717 break;
4718 case 2: /* not */
4719 if (s->prefix & PREFIX_LOCK) {
4720 if (mod == 3) {
4721 goto illegal_op;
4722 }
4723 tcg_gen_movi_tl(cpu_T0, ~0);
4724 tcg_gen_atomic_xor_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
4725 s->mem_index, ot | MO_LE);
4726 } else {
4727 tcg_gen_not_tl(cpu_T0, cpu_T0);
4728 if (mod != 3) {
4729 gen_op_st_v(s, ot, cpu_T0, cpu_A0);
4730 } else {
4731 gen_op_mov_reg_v(ot, rm, cpu_T0);
4732 }
4733 }
4734 break;
4735 case 3: /* neg */
4736 if (s->prefix & PREFIX_LOCK) {
4737 TCGLabel *label1;
4738 TCGv a0, t0, t1, t2;
4739
4740 if (mod == 3) {
4741 goto illegal_op;
4742 }
4743 a0 = tcg_temp_local_new();
4744 t0 = tcg_temp_local_new();
4745 label1 = gen_new_label();
4746
4747 tcg_gen_mov_tl(a0, cpu_A0);
4748 tcg_gen_mov_tl(t0, cpu_T0);
4749
4750 gen_set_label(label1);
4751 t1 = tcg_temp_new();
4752 t2 = tcg_temp_new();
4753 tcg_gen_mov_tl(t2, t0);
4754 tcg_gen_neg_tl(t1, t0);
4755 tcg_gen_atomic_cmpxchg_tl(t0, a0, t0, t1,
4756 s->mem_index, ot | MO_LE);
4757 tcg_temp_free(t1);
4758 tcg_gen_brcond_tl(TCG_COND_NE, t0, t2, label1);
4759
4760 tcg_temp_free(t2);
4761 tcg_temp_free(a0);
4762 tcg_gen_mov_tl(cpu_T0, t0);
4763 tcg_temp_free(t0);
4764 } else {
4765 tcg_gen_neg_tl(cpu_T0, cpu_T0);
4766 if (mod != 3) {
4767 gen_op_st_v(s, ot, cpu_T0, cpu_A0);
4768 } else {
4769 gen_op_mov_reg_v(ot, rm, cpu_T0);
4770 }
4771 }
4772 gen_op_update_neg_cc();
4773 set_cc_op(s, CC_OP_SUBB + ot);
4774 break;
4775 case 4: /* mul */
4776 switch(ot) {
4777 case MO_8:
4778 gen_op_mov_v_reg(MO_8, cpu_T1, R_EAX);
4779 tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
4780 tcg_gen_ext8u_tl(cpu_T1, cpu_T1);
4781 /* XXX: use 32 bit mul which could be faster */
4782 tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
4783 gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
4784 tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
4785 tcg_gen_andi_tl(cpu_cc_src, cpu_T0, 0xff00);
4786 set_cc_op(s, CC_OP_MULB);
4787 break;
4788 case MO_16:
4789 gen_op_mov_v_reg(MO_16, cpu_T1, R_EAX);
4790 tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
4791 tcg_gen_ext16u_tl(cpu_T1, cpu_T1);
4792 /* XXX: use 32 bit mul which could be faster */
4793 tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
4794 gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
4795 tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
4796 tcg_gen_shri_tl(cpu_T0, cpu_T0, 16);
4797 gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0);
4798 tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
4799 set_cc_op(s, CC_OP_MULW);
4800 break;
4801 default:
4802 case MO_32:
4803 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
4804 tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
4805 tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
4806 cpu_tmp2_i32, cpu_tmp3_i32);
4807 tcg_gen_extu_i32_tl(cpu_regs[R_EAX], cpu_tmp2_i32);
4808 tcg_gen_extu_i32_tl(cpu_regs[R_EDX], cpu_tmp3_i32);
4809 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4810 tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4811 set_cc_op(s, CC_OP_MULL);
4812 break;
4813 #ifdef TARGET_X86_64
4814 case MO_64:
4815 tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4816 cpu_T0, cpu_regs[R_EAX]);
4817 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4818 tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4819 set_cc_op(s, CC_OP_MULQ);
4820 break;
4821 #endif
4822 }
4823 break;
4824 case 5: /* imul */
4825 switch(ot) {
4826 case MO_8:
4827 gen_op_mov_v_reg(MO_8, cpu_T1, R_EAX);
4828 tcg_gen_ext8s_tl(cpu_T0, cpu_T0);
4829 tcg_gen_ext8s_tl(cpu_T1, cpu_T1);
4830 /* XXX: use 32 bit mul which could be faster */
4831 tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
4832 gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
4833 tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
4834 tcg_gen_ext8s_tl(cpu_tmp0, cpu_T0);
4835 tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0);
4836 set_cc_op(s, CC_OP_MULB);
4837 break;
4838 case MO_16:
4839 gen_op_mov_v_reg(MO_16, cpu_T1, R_EAX);
4840 tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
4841 tcg_gen_ext16s_tl(cpu_T1, cpu_T1);
4842 /* XXX: use 32 bit mul which could be faster */
4843 tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
4844 gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
4845 tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
4846 tcg_gen_ext16s_tl(cpu_tmp0, cpu_T0);
4847 tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0);
4848 tcg_gen_shri_tl(cpu_T0, cpu_T0, 16);
4849 gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0);
4850 set_cc_op(s, CC_OP_MULW);
4851 break;
4852 default:
4853 case MO_32:
4854 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
4855 tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
4856 tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
4857 cpu_tmp2_i32, cpu_tmp3_i32);
4858 tcg_gen_extu_i32_tl(cpu_regs[R_EAX], cpu_tmp2_i32);
4859 tcg_gen_extu_i32_tl(cpu_regs[R_EDX], cpu_tmp3_i32);
4860 tcg_gen_sari_i32(cpu_tmp2_i32, cpu_tmp2_i32, 31);
4861 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4862 tcg_gen_sub_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
4863 tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
4864 set_cc_op(s, CC_OP_MULL);
4865 break;
4866 #ifdef TARGET_X86_64
4867 case MO_64:
4868 tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4869 cpu_T0, cpu_regs[R_EAX]);
4870 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4871 tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63);
4872 tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]);
4873 set_cc_op(s, CC_OP_MULQ);
4874 break;
4875 #endif
4876 }
4877 break;
4878 case 6: /* div */
4879 switch(ot) {
4880 case MO_8:
4881 gen_helper_divb_AL(cpu_env, cpu_T0);
4882 break;
4883 case MO_16:
4884 gen_helper_divw_AX(cpu_env, cpu_T0);
4885 break;
4886 default:
4887 case MO_32:
4888 gen_helper_divl_EAX(cpu_env, cpu_T0);
4889 break;
4890 #ifdef TARGET_X86_64
4891 case MO_64:
4892 gen_helper_divq_EAX(cpu_env, cpu_T0);
4893 break;
4894 #endif
4895 }
4896 break;
4897 case 7: /* idiv */
4898 switch(ot) {
4899 case MO_8:
4900 gen_helper_idivb_AL(cpu_env, cpu_T0);
4901 break;
4902 case MO_16:
4903 gen_helper_idivw_AX(cpu_env, cpu_T0);
4904 break;
4905 default:
4906 case MO_32:
4907 gen_helper_idivl_EAX(cpu_env, cpu_T0);
4908 break;
4909 #ifdef TARGET_X86_64
4910 case MO_64:
4911 gen_helper_idivq_EAX(cpu_env, cpu_T0);
4912 break;
4913 #endif
4914 }
4915 break;
4916 default:
4917 goto unknown_op;
4918 }
4919 break;
4920
4921 case 0xfe: /* GRP4 */
4922 case 0xff: /* GRP5 */
4923 ot = mo_b_d(b, dflag);
4924
4925 modrm = cpu_ldub_code(env, s->pc++);
4926 mod = (modrm >> 6) & 3;
4927 rm = (modrm & 7) | REX_B(s);
4928 op = (modrm >> 3) & 7;
4929 if (op >= 2 && b == 0xfe) {
4930 goto unknown_op;
4931 }
4932 if (CODE64(s)) {
4933 if (op == 2 || op == 4) {
4934 /* operand size for jumps is 64 bit */
4935 ot = MO_64;
4936 } else if (op == 3 || op == 5) {
4937 ot = dflag != MO_16 ? MO_32 + (rex_w == 1) : MO_16;
4938 } else if (op == 6) {
4939 /* default push size is 64 bit */
4940 ot = mo_pushpop(s, dflag);
4941 }
4942 }
4943 if (mod != 3) {
4944 gen_lea_modrm(env, s, modrm);
4945 if (op >= 2 && op != 3 && op != 5)
4946 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
4947 } else {
4948 gen_op_mov_v_reg(ot, cpu_T0, rm);
4949 }
4950
4951 switch(op) {
4952 case 0: /* inc Ev */
4953 if (mod != 3)
4954 opreg = OR_TMP0;
4955 else
4956 opreg = rm;
4957 gen_inc(s, ot, opreg, 1);
4958 break;
4959 case 1: /* dec Ev */
4960 if (mod != 3)
4961 opreg = OR_TMP0;
4962 else
4963 opreg = rm;
4964 gen_inc(s, ot, opreg, -1);
4965 break;
4966 case 2: /* call Ev */
4967 /* XXX: optimize if memory (no 'and' is necessary) */
4968 if (dflag == MO_16) {
4969 tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
4970 }
4971 next_eip = s->pc - s->cs_base;
4972 tcg_gen_movi_tl(cpu_T1, next_eip);
4973 gen_push_v(s, cpu_T1);
4974 gen_op_jmp_v(cpu_T0);
4975 gen_bnd_jmp(s);
4976 gen_eob(s);
4977 break;
4978 case 3: /* lcall Ev */
4979 gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
4980 gen_add_A0_im(s, 1 << ot);
4981 gen_op_ld_v(s, MO_16, cpu_T0, cpu_A0);
4982 do_lcall:
4983 if (s->pe && !s->vm86) {
4984 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
4985 gen_helper_lcall_protected(cpu_env, cpu_tmp2_i32, cpu_T1,
4986 tcg_const_i32(dflag - 1),
4987 tcg_const_tl(s->pc - s->cs_base));
4988 } else {
4989 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
4990 gen_helper_lcall_real(cpu_env, cpu_tmp2_i32, cpu_T1,
4991 tcg_const_i32(dflag - 1),
4992 tcg_const_i32(s->pc - s->cs_base));
4993 }
4994 gen_eob(s);
4995 break;
4996 case 4: /* jmp Ev */
4997 if (dflag == MO_16) {
4998 tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
4999 }
5000 gen_op_jmp_v(cpu_T0);
5001 gen_bnd_jmp(s);
5002 gen_eob(s);
5003 break;
5004 case 5: /* ljmp Ev */
5005 gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
5006 gen_add_A0_im(s, 1 << ot);
5007 gen_op_ld_v(s, MO_16, cpu_T0, cpu_A0);
5008 do_ljmp:
5009 if (s->pe && !s->vm86) {
5010 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
5011 gen_helper_ljmp_protected(cpu_env, cpu_tmp2_i32, cpu_T1,
5012 tcg_const_tl(s->pc - s->cs_base));
5013 } else {
5014 gen_op_movl_seg_T0_vm(R_CS);
5015 gen_op_jmp_v(cpu_T1);
5016 }
5017 gen_eob(s);
5018 break;
5019 case 6: /* push Ev */
5020 gen_push_v(s, cpu_T0);
5021 break;
5022 default:
5023 goto unknown_op;
5024 }
5025 break;
5026
5027 case 0x84: /* test Ev, Gv */
5028 case 0x85:
5029 ot = mo_b_d(b, dflag);
5030
5031 modrm = cpu_ldub_code(env, s->pc++);
5032 reg = ((modrm >> 3) & 7) | rex_r;
5033
5034 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5035 gen_op_mov_v_reg(ot, cpu_T1, reg);
5036 gen_op_testl_T0_T1_cc();
5037 set_cc_op(s, CC_OP_LOGICB + ot);
5038 break;
5039
5040 case 0xa8: /* test eAX, Iv */
5041 case 0xa9:
5042 ot = mo_b_d(b, dflag);
5043 val = insn_get(env, s, ot);
5044
5045 gen_op_mov_v_reg(ot, cpu_T0, OR_EAX);
5046 tcg_gen_movi_tl(cpu_T1, val);
5047 gen_op_testl_T0_T1_cc();
5048 set_cc_op(s, CC_OP_LOGICB + ot);
5049 break;
5050
5051 case 0x98: /* CWDE/CBW */
5052 switch (dflag) {
5053 #ifdef TARGET_X86_64
5054 case MO_64:
5055 gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
5056 tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
5057 gen_op_mov_reg_v(MO_64, R_EAX, cpu_T0);
5058 break;
5059 #endif
5060 case MO_32:
5061 gen_op_mov_v_reg(MO_16, cpu_T0, R_EAX);
5062 tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
5063 gen_op_mov_reg_v(MO_32, R_EAX, cpu_T0);
5064 break;
5065 case MO_16:
5066 gen_op_mov_v_reg(MO_8, cpu_T0, R_EAX);
5067 tcg_gen_ext8s_tl(cpu_T0, cpu_T0);
5068 gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
5069 break;
5070 default:
5071 tcg_abort();
5072 }
5073 break;
5074 case 0x99: /* CDQ/CWD */
5075 switch (dflag) {
5076 #ifdef TARGET_X86_64
5077 case MO_64:
5078 gen_op_mov_v_reg(MO_64, cpu_T0, R_EAX);
5079 tcg_gen_sari_tl(cpu_T0, cpu_T0, 63);
5080 gen_op_mov_reg_v(MO_64, R_EDX, cpu_T0);
5081 break;
5082 #endif
5083 case MO_32:
5084 gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
5085 tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
5086 tcg_gen_sari_tl(cpu_T0, cpu_T0, 31);
5087 gen_op_mov_reg_v(MO_32, R_EDX, cpu_T0);
5088 break;
5089 case MO_16:
5090 gen_op_mov_v_reg(MO_16, cpu_T0, R_EAX);
5091 tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
5092 tcg_gen_sari_tl(cpu_T0, cpu_T0, 15);
5093 gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0);
5094 break;
5095 default:
5096 tcg_abort();
5097 }
5098 break;
5099 case 0x1af: /* imul Gv, Ev */
5100 case 0x69: /* imul Gv, Ev, I */
5101 case 0x6b:
5102 ot = dflag;
5103 modrm = cpu_ldub_code(env, s->pc++);
5104 reg = ((modrm >> 3) & 7) | rex_r;
5105 if (b == 0x69)
5106 s->rip_offset = insn_const_size(ot);
5107 else if (b == 0x6b)
5108 s->rip_offset = 1;
5109 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5110 if (b == 0x69) {
5111 val = insn_get(env, s, ot);
5112 tcg_gen_movi_tl(cpu_T1, val);
5113 } else if (b == 0x6b) {
5114 val = (int8_t)insn_get(env, s, MO_8);
5115 tcg_gen_movi_tl(cpu_T1, val);
5116 } else {
5117 gen_op_mov_v_reg(ot, cpu_T1, reg);
5118 }
5119 switch (ot) {
5120 #ifdef TARGET_X86_64
5121 case MO_64:
5122 tcg_gen_muls2_i64(cpu_regs[reg], cpu_T1, cpu_T0, cpu_T1);
5123 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5124 tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
5125 tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_T1);
5126 break;
5127 #endif
5128 case MO_32:
5129 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
5130 tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
5131 tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
5132 cpu_tmp2_i32, cpu_tmp3_i32);
5133 tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
5134 tcg_gen_sari_i32(cpu_tmp2_i32, cpu_tmp2_i32, 31);
5135 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5136 tcg_gen_sub_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
5137 tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
5138 break;
5139 default:
5140 tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
5141 tcg_gen_ext16s_tl(cpu_T1, cpu_T1);
5142 /* XXX: use 32 bit mul which could be faster */
5143 tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
5144 tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
5145 tcg_gen_ext16s_tl(cpu_tmp0, cpu_T0);
5146 tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0);
5147 gen_op_mov_reg_v(ot, reg, cpu_T0);
5148 break;
5149 }
5150 set_cc_op(s, CC_OP_MULB + ot);
5151 break;
5152 case 0x1c0:
5153 case 0x1c1: /* xadd Ev, Gv */
5154 ot = mo_b_d(b, dflag);
5155 modrm = cpu_ldub_code(env, s->pc++);
5156 reg = ((modrm >> 3) & 7) | rex_r;
5157 mod = (modrm >> 6) & 3;
5158 gen_op_mov_v_reg(ot, cpu_T0, reg);
5159 if (mod == 3) {
5160 rm = (modrm & 7) | REX_B(s);
5161 gen_op_mov_v_reg(ot, cpu_T1, rm);
5162 tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
5163 gen_op_mov_reg_v(ot, reg, cpu_T1);
5164 gen_op_mov_reg_v(ot, rm, cpu_T0);
5165 } else {
5166 gen_lea_modrm(env, s, modrm);
5167 if (s->prefix & PREFIX_LOCK) {
5168 tcg_gen_atomic_fetch_add_tl(cpu_T1, cpu_A0, cpu_T0,
5169 s->mem_index, ot | MO_LE);
5170 tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
5171 } else {
5172 gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
5173 tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
5174 gen_op_st_v(s, ot, cpu_T0, cpu_A0);
5175 }
5176 gen_op_mov_reg_v(ot, reg, cpu_T1);
5177 }
5178 gen_op_update2_cc();
5179 set_cc_op(s, CC_OP_ADDB + ot);
5180 break;
5181 case 0x1b0:
5182 case 0x1b1: /* cmpxchg Ev, Gv */
5183 {
5184 TCGv oldv, newv, cmpv;
5185
5186 ot = mo_b_d(b, dflag);
5187 modrm = cpu_ldub_code(env, s->pc++);
5188 reg = ((modrm >> 3) & 7) | rex_r;
5189 mod = (modrm >> 6) & 3;
5190 oldv = tcg_temp_new();
5191 newv = tcg_temp_new();
5192 cmpv = tcg_temp_new();
5193 gen_op_mov_v_reg(ot, newv, reg);
5194 tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]);
5195
5196 if (s->prefix & PREFIX_LOCK) {
5197 if (mod == 3) {
5198 goto illegal_op;
5199 }
5200 gen_lea_modrm(env, s, modrm);
5201 tcg_gen_atomic_cmpxchg_tl(oldv, cpu_A0, cmpv, newv,
5202 s->mem_index, ot | MO_LE);
5203 gen_op_mov_reg_v(ot, R_EAX, oldv);
5204 } else {
5205 if (mod == 3) {
5206 rm = (modrm & 7) | REX_B(s);
5207 gen_op_mov_v_reg(ot, oldv, rm);
5208 } else {
5209 gen_lea_modrm(env, s, modrm);
5210 gen_op_ld_v(s, ot, oldv, cpu_A0);
5211 rm = 0; /* avoid warning */
5212 }
5213 gen_extu(ot, oldv);
5214 gen_extu(ot, cmpv);
5215 /* store value = (old == cmp ? new : old); */
5216 tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv);
5217 if (mod == 3) {
5218 gen_op_mov_reg_v(ot, R_EAX, oldv);
5219 gen_op_mov_reg_v(ot, rm, newv);
5220 } else {
5221 /* Perform an unconditional store cycle like physical cpu;
5222 must be before changing accumulator to ensure
5223 idempotency if the store faults and the instruction
5224 is restarted */
5225 gen_op_st_v(s, ot, newv, cpu_A0);
5226 gen_op_mov_reg_v(ot, R_EAX, oldv);
5227 }
5228 }
5229 tcg_gen_mov_tl(cpu_cc_src, oldv);
5230 tcg_gen_mov_tl(cpu_cc_srcT, cmpv);
5231 tcg_gen_sub_tl(cpu_cc_dst, cmpv, oldv);
5232 set_cc_op(s, CC_OP_SUBB + ot);
5233 tcg_temp_free(oldv);
5234 tcg_temp_free(newv);
5235 tcg_temp_free(cmpv);
5236 }
5237 break;
5238 case 0x1c7: /* cmpxchg8b */
5239 modrm = cpu_ldub_code(env, s->pc++);
5240 mod = (modrm >> 6) & 3;
5241 if ((mod == 3) || ((modrm & 0x38) != 0x8))
5242 goto illegal_op;
5243 #ifdef TARGET_X86_64
5244 if (dflag == MO_64) {
5245 if (!(s->cpuid_ext_features & CPUID_EXT_CX16))
5246 goto illegal_op;
5247 gen_lea_modrm(env, s, modrm);
5248 if ((s->prefix & PREFIX_LOCK) && parallel_cpus) {
5249 gen_helper_cmpxchg16b(cpu_env, cpu_A0);
5250 } else {
5251 gen_helper_cmpxchg16b_unlocked(cpu_env, cpu_A0);
5252 }
5253 } else
5254 #endif
5255 {
5256 if (!(s->cpuid_features & CPUID_CX8))
5257 goto illegal_op;
5258 gen_lea_modrm(env, s, modrm);
5259 if ((s->prefix & PREFIX_LOCK) && parallel_cpus) {
5260 gen_helper_cmpxchg8b(cpu_env, cpu_A0);
5261 } else {
5262 gen_helper_cmpxchg8b_unlocked(cpu_env, cpu_A0);
5263 }
5264 }
5265 set_cc_op(s, CC_OP_EFLAGS);
5266 break;
5267
5268 /**************************/
5269 /* push/pop */
5270 case 0x50 ... 0x57: /* push */
5271 gen_op_mov_v_reg(MO_32, cpu_T0, (b & 7) | REX_B(s));
5272 gen_push_v(s, cpu_T0);
5273 break;
5274 case 0x58 ... 0x5f: /* pop */
5275 ot = gen_pop_T0(s);
5276 /* NOTE: order is important for pop %sp */
5277 gen_pop_update(s, ot);
5278 gen_op_mov_reg_v(ot, (b & 7) | REX_B(s), cpu_T0);
5279 break;
5280 case 0x60: /* pusha */
5281 if (CODE64(s))
5282 goto illegal_op;
5283 gen_pusha(s);
5284 break;
5285 case 0x61: /* popa */
5286 if (CODE64(s))
5287 goto illegal_op;
5288 gen_popa(s);
5289 break;
5290 case 0x68: /* push Iv */
5291 case 0x6a:
5292 ot = mo_pushpop(s, dflag);
5293 if (b == 0x68)
5294 val = insn_get(env, s, ot);
5295 else
5296 val = (int8_t)insn_get(env, s, MO_8);
5297 tcg_gen_movi_tl(cpu_T0, val);
5298 gen_push_v(s, cpu_T0);
5299 break;
5300 case 0x8f: /* pop Ev */
5301 modrm = cpu_ldub_code(env, s->pc++);
5302 mod = (modrm >> 6) & 3;
5303 ot = gen_pop_T0(s);
5304 if (mod == 3) {
5305 /* NOTE: order is important for pop %sp */
5306 gen_pop_update(s, ot);
5307 rm = (modrm & 7) | REX_B(s);
5308 gen_op_mov_reg_v(ot, rm, cpu_T0);
5309 } else {
5310 /* NOTE: order is important too for MMU exceptions */
5311 s->popl_esp_hack = 1 << ot;
5312 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5313 s->popl_esp_hack = 0;
5314 gen_pop_update(s, ot);
5315 }
5316 break;
5317 case 0xc8: /* enter */
5318 {
5319 int level;
5320 val = cpu_lduw_code(env, s->pc);
5321 s->pc += 2;
5322 level = cpu_ldub_code(env, s->pc++);
5323 gen_enter(s, val, level);
5324 }
5325 break;
5326 case 0xc9: /* leave */
5327 gen_leave(s);
5328 break;
5329 case 0x06: /* push es */
5330 case 0x0e: /* push cs */
5331 case 0x16: /* push ss */
5332 case 0x1e: /* push ds */
5333 if (CODE64(s))
5334 goto illegal_op;
5335 gen_op_movl_T0_seg(b >> 3);
5336 gen_push_v(s, cpu_T0);
5337 break;
5338 case 0x1a0: /* push fs */
5339 case 0x1a8: /* push gs */
5340 gen_op_movl_T0_seg((b >> 3) & 7);
5341 gen_push_v(s, cpu_T0);
5342 break;
5343 case 0x07: /* pop es */
5344 case 0x17: /* pop ss */
5345 case 0x1f: /* pop ds */
5346 if (CODE64(s))
5347 goto illegal_op;
5348 reg = b >> 3;
5349 ot = gen_pop_T0(s);
5350 gen_movl_seg_T0(s, reg);
5351 gen_pop_update(s, ot);
5352 /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp. */
5353 if (s->is_jmp) {
5354 gen_jmp_im(s->pc - s->cs_base);
5355 if (reg == R_SS) {
5356 s->tf = 0;
5357 gen_eob_inhibit_irq(s, true);
5358 } else {
5359 gen_eob(s);
5360 }
5361 }
5362 break;
5363 case 0x1a1: /* pop fs */
5364 case 0x1a9: /* pop gs */
5365 ot = gen_pop_T0(s);
5366 gen_movl_seg_T0(s, (b >> 3) & 7);
5367 gen_pop_update(s, ot);
5368 if (s->is_jmp) {
5369 gen_jmp_im(s->pc - s->cs_base);
5370 gen_eob(s);
5371 }
5372 break;
5373
5374 /**************************/
5375 /* mov */
5376 case 0x88:
5377 case 0x89: /* mov Gv, Ev */
5378 ot = mo_b_d(b, dflag);
5379 modrm = cpu_ldub_code(env, s->pc++);
5380 reg = ((modrm >> 3) & 7) | rex_r;
5381
5382 /* generate a generic store */
5383 gen_ldst_modrm(env, s, modrm, ot, reg, 1);
5384 break;
5385 case 0xc6:
5386 case 0xc7: /* mov Ev, Iv */
5387 ot = mo_b_d(b, dflag);
5388 modrm = cpu_ldub_code(env, s->pc++);
5389 mod = (modrm >> 6) & 3;
5390 if (mod != 3) {
5391 s->rip_offset = insn_const_size(ot);
5392 gen_lea_modrm(env, s, modrm);
5393 }
5394 val = insn_get(env, s, ot);
5395 tcg_gen_movi_tl(cpu_T0, val);
5396 if (mod != 3) {
5397 gen_op_st_v(s, ot, cpu_T0, cpu_A0);
5398 } else {
5399 gen_op_mov_reg_v(ot, (modrm & 7) | REX_B(s), cpu_T0);
5400 }
5401 break;
5402 case 0x8a:
5403 case 0x8b: /* mov Ev, Gv */
5404 ot = mo_b_d(b, dflag);
5405 modrm = cpu_ldub_code(env, s->pc++);
5406 reg = ((modrm >> 3) & 7) | rex_r;
5407
5408 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5409 gen_op_mov_reg_v(ot, reg, cpu_T0);
5410 break;
5411 case 0x8e: /* mov seg, Gv */
5412 modrm = cpu_ldub_code(env, s->pc++);
5413 reg = (modrm >> 3) & 7;
5414 if (reg >= 6 || reg == R_CS)
5415 goto illegal_op;
5416 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
5417 gen_movl_seg_T0(s, reg);
5418 /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp. */
5419 if (s->is_jmp) {
5420 gen_jmp_im(s->pc - s->cs_base);
5421 if (reg == R_SS) {
5422 s->tf = 0;
5423 gen_eob_inhibit_irq(s, true);
5424 } else {
5425 gen_eob(s);
5426 }
5427 }
5428 break;
5429 case 0x8c: /* mov Gv, seg */
5430 modrm = cpu_ldub_code(env, s->pc++);
5431 reg = (modrm >> 3) & 7;
5432 mod = (modrm >> 6) & 3;
5433 if (reg >= 6)
5434 goto illegal_op;
5435 gen_op_movl_T0_seg(reg);
5436 ot = mod == 3 ? dflag : MO_16;
5437 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5438 break;
5439
5440 case 0x1b6: /* movzbS Gv, Eb */
5441 case 0x1b7: /* movzwS Gv, Eb */
5442 case 0x1be: /* movsbS Gv, Eb */
5443 case 0x1bf: /* movswS Gv, Eb */
5444 {
5445 TCGMemOp d_ot;
5446 TCGMemOp s_ot;
5447
5448 /* d_ot is the size of destination */
5449 d_ot = dflag;
5450 /* ot is the size of source */
5451 ot = (b & 1) + MO_8;
5452 /* s_ot is the sign+size of source */
5453 s_ot = b & 8 ? MO_SIGN | ot : ot;
5454
5455 modrm = cpu_ldub_code(env, s->pc++);
5456 reg = ((modrm >> 3) & 7) | rex_r;
5457 mod = (modrm >> 6) & 3;
5458 rm = (modrm & 7) | REX_B(s);
5459
5460 if (mod == 3) {
5461 if (s_ot == MO_SB && byte_reg_is_xH(rm)) {
5462 tcg_gen_sextract_tl(cpu_T0, cpu_regs[rm - 4], 8, 8);
5463 } else {
5464 gen_op_mov_v_reg(ot, cpu_T0, rm);
5465 switch (s_ot) {
5466 case MO_UB:
5467 tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
5468 break;
5469 case MO_SB:
5470 tcg_gen_ext8s_tl(cpu_T0, cpu_T0);
5471 break;
5472 case MO_UW:
5473 tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
5474 break;
5475 default:
5476 case MO_SW:
5477 tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
5478 break;
5479 }
5480 }
5481 gen_op_mov_reg_v(d_ot, reg, cpu_T0);
5482 } else {
5483 gen_lea_modrm(env, s, modrm);
5484 gen_op_ld_v(s, s_ot, cpu_T0, cpu_A0);
5485 gen_op_mov_reg_v(d_ot, reg, cpu_T0);
5486 }
5487 }
5488 break;
5489
5490 case 0x8d: /* lea */
5491 modrm = cpu_ldub_code(env, s->pc++);
5492 mod = (modrm >> 6) & 3;
5493 if (mod == 3)
5494 goto illegal_op;
5495 reg = ((modrm >> 3) & 7) | rex_r;
5496 {
5497 AddressParts a = gen_lea_modrm_0(env, s, modrm);
5498 TCGv ea = gen_lea_modrm_1(a);
5499 gen_lea_v_seg(s, s->aflag, ea, -1, -1);
5500 gen_op_mov_reg_v(dflag, reg, cpu_A0);
5501 }
5502 break;
5503
5504 case 0xa0: /* mov EAX, Ov */
5505 case 0xa1:
5506 case 0xa2: /* mov Ov, EAX */
5507 case 0xa3:
5508 {
5509 target_ulong offset_addr;
5510
5511 ot = mo_b_d(b, dflag);
5512 switch (s->aflag) {
5513 #ifdef TARGET_X86_64
5514 case MO_64:
5515 offset_addr = cpu_ldq_code(env, s->pc);
5516 s->pc += 8;
5517 break;
5518 #endif
5519 default:
5520 offset_addr = insn_get(env, s, s->aflag);
5521 break;
5522 }
5523 tcg_gen_movi_tl(cpu_A0, offset_addr);
5524 gen_add_A0_ds_seg(s);
5525 if ((b & 2) == 0) {
5526 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
5527 gen_op_mov_reg_v(ot, R_EAX, cpu_T0);
5528 } else {
5529 gen_op_mov_v_reg(ot, cpu_T0, R_EAX);
5530 gen_op_st_v(s, ot, cpu_T0, cpu_A0);
5531 }
5532 }
5533 break;
5534 case 0xd7: /* xlat */
5535 tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EBX]);
5536 tcg_gen_ext8u_tl(cpu_T0, cpu_regs[R_EAX]);
5537 tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_T0);
5538 gen_extu(s->aflag, cpu_A0);
5539 gen_add_A0_ds_seg(s);
5540 gen_op_ld_v(s, MO_8, cpu_T0, cpu_A0);
5541 gen_op_mov_reg_v(MO_8, R_EAX, cpu_T0);
5542 break;
5543 case 0xb0 ... 0xb7: /* mov R, Ib */
5544 val = insn_get(env, s, MO_8);
5545 tcg_gen_movi_tl(cpu_T0, val);
5546 gen_op_mov_reg_v(MO_8, (b & 7) | REX_B(s), cpu_T0);
5547 break;
5548 case 0xb8 ... 0xbf: /* mov R, Iv */
5549 #ifdef TARGET_X86_64
5550 if (dflag == MO_64) {
5551 uint64_t tmp;
5552 /* 64 bit case */
5553 tmp = cpu_ldq_code(env, s->pc);
5554 s->pc += 8;
5555 reg = (b & 7) | REX_B(s);
5556 tcg_gen_movi_tl(cpu_T0, tmp);
5557 gen_op_mov_reg_v(MO_64, reg, cpu_T0);
5558 } else
5559 #endif
5560 {
5561 ot = dflag;
5562 val = insn_get(env, s, ot);
5563 reg = (b & 7) | REX_B(s);
5564 tcg_gen_movi_tl(cpu_T0, val);
5565 gen_op_mov_reg_v(ot, reg, cpu_T0);
5566 }
5567 break;
5568
5569 case 0x91 ... 0x97: /* xchg R, EAX */
5570 do_xchg_reg_eax:
5571 ot = dflag;
5572 reg = (b & 7) | REX_B(s);
5573 rm = R_EAX;
5574 goto do_xchg_reg;
5575 case 0x86:
5576 case 0x87: /* xchg Ev, Gv */
5577 ot = mo_b_d(b, dflag);
5578 modrm = cpu_ldub_code(env, s->pc++);
5579 reg = ((modrm >> 3) & 7) | rex_r;
5580 mod = (modrm >> 6) & 3;
5581 if (mod == 3) {
5582 rm = (modrm & 7) | REX_B(s);
5583 do_xchg_reg:
5584 gen_op_mov_v_reg(ot, cpu_T0, reg);
5585 gen_op_mov_v_reg(ot, cpu_T1, rm);
5586 gen_op_mov_reg_v(ot, rm, cpu_T0);
5587 gen_op_mov_reg_v(ot, reg, cpu_T1);
5588 } else {
5589 gen_lea_modrm(env, s, modrm);
5590 gen_op_mov_v_reg(ot, cpu_T0, reg);
5591 /* for xchg, lock is implicit */
5592 tcg_gen_atomic_xchg_tl(cpu_T1, cpu_A0, cpu_T0,
5593 s->mem_index, ot | MO_LE);
5594 gen_op_mov_reg_v(ot, reg, cpu_T1);
5595 }
5596 break;
5597 case 0xc4: /* les Gv */
5598 /* In CODE64 this is VEX3; see above. */
5599 op = R_ES;
5600 goto do_lxx;
5601 case 0xc5: /* lds Gv */
5602 /* In CODE64 this is VEX2; see above. */
5603 op = R_DS;
5604 goto do_lxx;
5605 case 0x1b2: /* lss Gv */
5606 op = R_SS;
5607 goto do_lxx;
5608 case 0x1b4: /* lfs Gv */
5609 op = R_FS;
5610 goto do_lxx;
5611 case 0x1b5: /* lgs Gv */
5612 op = R_GS;
5613 do_lxx:
5614 ot = dflag != MO_16 ? MO_32 : MO_16;
5615 modrm = cpu_ldub_code(env, s->pc++);
5616 reg = ((modrm >> 3) & 7) | rex_r;
5617 mod = (modrm >> 6) & 3;
5618 if (mod == 3)
5619 goto illegal_op;
5620 gen_lea_modrm(env, s, modrm);
5621 gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
5622 gen_add_A0_im(s, 1 << ot);
5623 /* load the segment first to handle exceptions properly */
5624 gen_op_ld_v(s, MO_16, cpu_T0, cpu_A0);
5625 gen_movl_seg_T0(s, op);
5626 /* then put the data */
5627 gen_op_mov_reg_v(ot, reg, cpu_T1);
5628 if (s->is_jmp) {
5629 gen_jmp_im(s->pc - s->cs_base);
5630 gen_eob(s);
5631 }
5632 break;
5633
5634 /************************/
5635 /* shifts */
5636 case 0xc0:
5637 case 0xc1:
5638 /* shift Ev,Ib */
5639 shift = 2;
5640 grp2:
5641 {
5642 ot = mo_b_d(b, dflag);
5643 modrm = cpu_ldub_code(env, s->pc++);
5644 mod = (modrm >> 6) & 3;
5645 op = (modrm >> 3) & 7;
5646
5647 if (mod != 3) {
5648 if (shift == 2) {
5649 s->rip_offset = 1;
5650 }
5651 gen_lea_modrm(env, s, modrm);
5652 opreg = OR_TMP0;
5653 } else {
5654 opreg = (modrm & 7) | REX_B(s);
5655 }
5656
5657 /* simpler op */
5658 if (shift == 0) {
5659 gen_shift(s, op, ot, opreg, OR_ECX);
5660 } else {
5661 if (shift == 2) {
5662 shift = cpu_ldub_code(env, s->pc++);
5663 }
5664 gen_shifti(s, op, ot, opreg, shift);
5665 }
5666 }
5667 break;
5668 case 0xd0:
5669 case 0xd1:
5670 /* shift Ev,1 */
5671 shift = 1;
5672 goto grp2;
5673 case 0xd2:
5674 case 0xd3:
5675 /* shift Ev,cl */
5676 shift = 0;
5677 goto grp2;
5678
5679 case 0x1a4: /* shld imm */
5680 op = 0;
5681 shift = 1;
5682 goto do_shiftd;
5683 case 0x1a5: /* shld cl */
5684 op = 0;
5685 shift = 0;
5686 goto do_shiftd;
5687 case 0x1ac: /* shrd imm */
5688 op = 1;
5689 shift = 1;
5690 goto do_shiftd;
5691 case 0x1ad: /* shrd cl */
5692 op = 1;
5693 shift = 0;
5694 do_shiftd:
5695 ot = dflag;
5696 modrm = cpu_ldub_code(env, s->pc++);
5697 mod = (modrm >> 6) & 3;
5698 rm = (modrm & 7) | REX_B(s);
5699 reg = ((modrm >> 3) & 7) | rex_r;
5700 if (mod != 3) {
5701 gen_lea_modrm(env, s, modrm);
5702 opreg = OR_TMP0;
5703 } else {
5704 opreg = rm;
5705 }
5706 gen_op_mov_v_reg(ot, cpu_T1, reg);
5707
5708 if (shift) {
5709 TCGv imm = tcg_const_tl(cpu_ldub_code(env, s->pc++));
5710 gen_shiftd_rm_T1(s, ot, opreg, op, imm);
5711 tcg_temp_free(imm);
5712 } else {
5713 gen_shiftd_rm_T1(s, ot, opreg, op, cpu_regs[R_ECX]);
5714 }
5715 break;
5716
5717 /************************/
5718 /* floats */
5719 case 0xd8 ... 0xdf:
5720 if (s->flags & (HF_EM_MASK | HF_TS_MASK)) {
5721 /* if CR0.EM or CR0.TS are set, generate an FPU exception */
5722 /* XXX: what to do if illegal op ? */
5723 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
5724 break;
5725 }
5726 modrm = cpu_ldub_code(env, s->pc++);
5727 mod = (modrm >> 6) & 3;
5728 rm = modrm & 7;
5729 op = ((b & 7) << 3) | ((modrm >> 3) & 7);
5730 if (mod != 3) {
5731 /* memory op */
5732 gen_lea_modrm(env, s, modrm);
5733 switch(op) {
5734 case 0x00 ... 0x07: /* fxxxs */
5735 case 0x10 ... 0x17: /* fixxxl */
5736 case 0x20 ... 0x27: /* fxxxl */
5737 case 0x30 ... 0x37: /* fixxx */
5738 {
5739 int op1;
5740 op1 = op & 7;
5741
5742 switch(op >> 4) {
5743 case 0:
5744 tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5745 s->mem_index, MO_LEUL);
5746 gen_helper_flds_FT0(cpu_env, cpu_tmp2_i32);
5747 break;
5748 case 1:
5749 tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5750 s->mem_index, MO_LEUL);
5751 gen_helper_fildl_FT0(cpu_env, cpu_tmp2_i32);
5752 break;
5753 case 2:
5754 tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
5755 s->mem_index, MO_LEQ);
5756 gen_helper_fldl_FT0(cpu_env, cpu_tmp1_i64);
5757 break;
5758 case 3:
5759 default:
5760 tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5761 s->mem_index, MO_LESW);
5762 gen_helper_fildl_FT0(cpu_env, cpu_tmp2_i32);
5763 break;
5764 }
5765
5766 gen_helper_fp_arith_ST0_FT0(op1);
5767 if (op1 == 3) {
5768 /* fcomp needs pop */
5769 gen_helper_fpop(cpu_env);
5770 }
5771 }
5772 break;
5773 case 0x08: /* flds */
5774 case 0x0a: /* fsts */
5775 case 0x0b: /* fstps */
5776 case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */
5777 case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */
5778 case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */
5779 switch(op & 7) {
5780 case 0:
5781 switch(op >> 4) {
5782 case 0:
5783 tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5784 s->mem_index, MO_LEUL);
5785 gen_helper_flds_ST0(cpu_env, cpu_tmp2_i32);
5786 break;
5787 case 1:
5788 tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5789 s->mem_index, MO_LEUL);
5790 gen_helper_fildl_ST0(cpu_env, cpu_tmp2_i32);
5791 break;
5792 case 2:
5793 tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
5794 s->mem_index, MO_LEQ);
5795 gen_helper_fldl_ST0(cpu_env, cpu_tmp1_i64);
5796 break;
5797 case 3:
5798 default:
5799 tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5800 s->mem_index, MO_LESW);
5801 gen_helper_fildl_ST0(cpu_env, cpu_tmp2_i32);
5802 break;
5803 }
5804 break;
5805 case 1:
5806 /* XXX: the corresponding CPUID bit must be tested ! */
5807 switch(op >> 4) {
5808 case 1:
5809 gen_helper_fisttl_ST0(cpu_tmp2_i32, cpu_env);
5810 tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5811 s->mem_index, MO_LEUL);
5812 break;
5813 case 2:
5814 gen_helper_fisttll_ST0(cpu_tmp1_i64, cpu_env);
5815 tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
5816 s->mem_index, MO_LEQ);
5817 break;
5818 case 3:
5819 default:
5820 gen_helper_fistt_ST0(cpu_tmp2_i32, cpu_env);
5821 tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5822 s->mem_index, MO_LEUW);
5823 break;
5824 }
5825 gen_helper_fpop(cpu_env);
5826 break;
5827 default:
5828 switch(op >> 4) {
5829 case 0:
5830 gen_helper_fsts_ST0(cpu_tmp2_i32, cpu_env);
5831 tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5832 s->mem_index, MO_LEUL);
5833 break;
5834 case 1:
5835 gen_helper_fistl_ST0(cpu_tmp2_i32, cpu_env);
5836 tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5837 s->mem_index, MO_LEUL);
5838 break;
5839 case 2:
5840 gen_helper_fstl_ST0(cpu_tmp1_i64, cpu_env);
5841 tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
5842 s->mem_index, MO_LEQ);
5843 break;
5844 case 3:
5845 default:
5846 gen_helper_fist_ST0(cpu_tmp2_i32, cpu_env);
5847 tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5848 s->mem_index, MO_LEUW);
5849 break;
5850 }
5851 if ((op & 7) == 3)
5852 gen_helper_fpop(cpu_env);
5853 break;
5854 }
5855 break;
5856 case 0x0c: /* fldenv mem */
5857 gen_helper_fldenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
5858 break;
5859 case 0x0d: /* fldcw mem */
5860 tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5861 s->mem_index, MO_LEUW);
5862 gen_helper_fldcw(cpu_env, cpu_tmp2_i32);
5863 break;
5864 case 0x0e: /* fnstenv mem */
5865 gen_helper_fstenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
5866 break;
5867 case 0x0f: /* fnstcw mem */
5868 gen_helper_fnstcw(cpu_tmp2_i32, cpu_env);
5869 tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5870 s->mem_index, MO_LEUW);
5871 break;
5872 case 0x1d: /* fldt mem */
5873 gen_helper_fldt_ST0(cpu_env, cpu_A0);
5874 break;
5875 case 0x1f: /* fstpt mem */
5876 gen_helper_fstt_ST0(cpu_env, cpu_A0);
5877 gen_helper_fpop(cpu_env);
5878 break;
5879 case 0x2c: /* frstor mem */
5880 gen_helper_frstor(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
5881 break;
5882 case 0x2e: /* fnsave mem */
5883 gen_helper_fsave(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
5884 break;
5885 case 0x2f: /* fnstsw mem */
5886 gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
5887 tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5888 s->mem_index, MO_LEUW);
5889 break;
5890 case 0x3c: /* fbld */
5891 gen_helper_fbld_ST0(cpu_env, cpu_A0);
5892 break;
5893 case 0x3e: /* fbstp */
5894 gen_helper_fbst_ST0(cpu_env, cpu_A0);
5895 gen_helper_fpop(cpu_env);
5896 break;
5897 case 0x3d: /* fildll */
5898 tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
5899 gen_helper_fildll_ST0(cpu_env, cpu_tmp1_i64);
5900 break;
5901 case 0x3f: /* fistpll */
5902 gen_helper_fistll_ST0(cpu_tmp1_i64, cpu_env);
5903 tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
5904 gen_helper_fpop(cpu_env);
5905 break;
5906 default:
5907 goto unknown_op;
5908 }
5909 } else {
5910 /* register float ops */
5911 opreg = rm;
5912
5913 switch(op) {
5914 case 0x08: /* fld sti */
5915 gen_helper_fpush(cpu_env);
5916 gen_helper_fmov_ST0_STN(cpu_env,
5917 tcg_const_i32((opreg + 1) & 7));
5918 break;
5919 case 0x09: /* fxchg sti */
5920 case 0x29: /* fxchg4 sti, undocumented op */
5921 case 0x39: /* fxchg7 sti, undocumented op */
5922 gen_helper_fxchg_ST0_STN(cpu_env, tcg_const_i32(opreg));
5923 break;
5924 case 0x0a: /* grp d9/2 */
5925 switch(rm) {
5926 case 0: /* fnop */
5927 /* check exceptions (FreeBSD FPU probe) */
5928 gen_helper_fwait(cpu_env);
5929 break;
5930 default:
5931 goto unknown_op;
5932 }
5933 break;
5934 case 0x0c: /* grp d9/4 */
5935 switch(rm) {
5936 case 0: /* fchs */
5937 gen_helper_fchs_ST0(cpu_env);
5938 break;
5939 case 1: /* fabs */
5940 gen_helper_fabs_ST0(cpu_env);
5941 break;
5942 case 4: /* ftst */
5943 gen_helper_fldz_FT0(cpu_env);
5944 gen_helper_fcom_ST0_FT0(cpu_env);
5945 break;
5946 case 5: /* fxam */
5947 gen_helper_fxam_ST0(cpu_env);
5948 break;
5949 default:
5950 goto unknown_op;
5951 }
5952 break;
5953 case 0x0d: /* grp d9/5 */
5954 {
5955 switch(rm) {
5956 case 0:
5957 gen_helper_fpush(cpu_env);
5958 gen_helper_fld1_ST0(cpu_env);
5959 break;
5960 case 1:
5961 gen_helper_fpush(cpu_env);
5962 gen_helper_fldl2t_ST0(cpu_env);
5963 break;
5964 case 2:
5965 gen_helper_fpush(cpu_env);
5966 gen_helper_fldl2e_ST0(cpu_env);
5967 break;
5968 case 3:
5969 gen_helper_fpush(cpu_env);
5970 gen_helper_fldpi_ST0(cpu_env);
5971 break;
5972 case 4:
5973 gen_helper_fpush(cpu_env);
5974 gen_helper_fldlg2_ST0(cpu_env);
5975 break;
5976 case 5:
5977 gen_helper_fpush(cpu_env);
5978 gen_helper_fldln2_ST0(cpu_env);
5979 break;
5980 case 6:
5981 gen_helper_fpush(cpu_env);
5982 gen_helper_fldz_ST0(cpu_env);
5983 break;
5984 default:
5985 goto unknown_op;
5986 }
5987 }
5988 break;
5989 case 0x0e: /* grp d9/6 */
5990 switch(rm) {
5991 case 0: /* f2xm1 */
5992 gen_helper_f2xm1(cpu_env);
5993 break;
5994 case 1: /* fyl2x */
5995 gen_helper_fyl2x(cpu_env);
5996 break;
5997 case 2: /* fptan */
5998 gen_helper_fptan(cpu_env);
5999 break;
6000 case 3: /* fpatan */
6001 gen_helper_fpatan(cpu_env);
6002 break;
6003 case 4: /* fxtract */
6004 gen_helper_fxtract(cpu_env);
6005 break;
6006 case 5: /* fprem1 */
6007 gen_helper_fprem1(cpu_env);
6008 break;
6009 case 6: /* fdecstp */
6010 gen_helper_fdecstp(cpu_env);
6011 break;
6012 default:
6013 case 7: /* fincstp */
6014 gen_helper_fincstp(cpu_env);
6015 break;
6016 }
6017 break;
6018 case 0x0f: /* grp d9/7 */
6019 switch(rm) {
6020 case 0: /* fprem */
6021 gen_helper_fprem(cpu_env);
6022 break;
6023 case 1: /* fyl2xp1 */
6024 gen_helper_fyl2xp1(cpu_env);
6025 break;
6026 case 2: /* fsqrt */
6027 gen_helper_fsqrt(cpu_env);
6028 break;
6029 case 3: /* fsincos */
6030 gen_helper_fsincos(cpu_env);
6031 break;
6032 case 5: /* fscale */
6033 gen_helper_fscale(cpu_env);
6034 break;
6035 case 4: /* frndint */
6036 gen_helper_frndint(cpu_env);
6037 break;
6038 case 6: /* fsin */
6039 gen_helper_fsin(cpu_env);
6040 break;
6041 default:
6042 case 7: /* fcos */
6043 gen_helper_fcos(cpu_env);
6044 break;
6045 }
6046 break;
6047 case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */
6048 case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */
6049 case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */
6050 {
6051 int op1;
6052
6053 op1 = op & 7;
6054 if (op >= 0x20) {
6055 gen_helper_fp_arith_STN_ST0(op1, opreg);
6056 if (op >= 0x30)
6057 gen_helper_fpop(cpu_env);
6058 } else {
6059 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6060 gen_helper_fp_arith_ST0_FT0(op1);
6061 }
6062 }
6063 break;
6064 case 0x02: /* fcom */
6065 case 0x22: /* fcom2, undocumented op */
6066 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6067 gen_helper_fcom_ST0_FT0(cpu_env);
6068 break;
6069 case 0x03: /* fcomp */
6070 case 0x23: /* fcomp3, undocumented op */
6071 case 0x32: /* fcomp5, undocumented op */
6072 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6073 gen_helper_fcom_ST0_FT0(cpu_env);
6074 gen_helper_fpop(cpu_env);
6075 break;
6076 case 0x15: /* da/5 */
6077 switch(rm) {
6078 case 1: /* fucompp */
6079 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6080 gen_helper_fucom_ST0_FT0(cpu_env);
6081 gen_helper_fpop(cpu_env);
6082 gen_helper_fpop(cpu_env);
6083 break;
6084 default:
6085 goto unknown_op;
6086 }
6087 break;
6088 case 0x1c:
6089 switch(rm) {
6090 case 0: /* feni (287 only, just do nop here) */
6091 break;
6092 case 1: /* fdisi (287 only, just do nop here) */
6093 break;
6094 case 2: /* fclex */
6095 gen_helper_fclex(cpu_env);
6096 break;
6097 case 3: /* fninit */
6098 gen_helper_fninit(cpu_env);
6099 break;
6100 case 4: /* fsetpm (287 only, just do nop here) */
6101 break;
6102 default:
6103 goto unknown_op;
6104 }
6105 break;
6106 case 0x1d: /* fucomi */
6107 if (!(s->cpuid_features & CPUID_CMOV)) {
6108 goto illegal_op;
6109 }
6110 gen_update_cc_op(s);
6111 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6112 gen_helper_fucomi_ST0_FT0(cpu_env);
6113 set_cc_op(s, CC_OP_EFLAGS);
6114 break;
6115 case 0x1e: /* fcomi */
6116 if (!(s->cpuid_features & CPUID_CMOV)) {
6117 goto illegal_op;
6118 }
6119 gen_update_cc_op(s);
6120 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6121 gen_helper_fcomi_ST0_FT0(cpu_env);
6122 set_cc_op(s, CC_OP_EFLAGS);
6123 break;
6124 case 0x28: /* ffree sti */
6125 gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6126 break;
6127 case 0x2a: /* fst sti */
6128 gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6129 break;
6130 case 0x2b: /* fstp sti */
6131 case 0x0b: /* fstp1 sti, undocumented op */
6132 case 0x3a: /* fstp8 sti, undocumented op */
6133 case 0x3b: /* fstp9 sti, undocumented op */
6134 gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6135 gen_helper_fpop(cpu_env);
6136 break;
6137 case 0x2c: /* fucom st(i) */
6138 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6139 gen_helper_fucom_ST0_FT0(cpu_env);
6140 break;
6141 case 0x2d: /* fucomp st(i) */
6142 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6143 gen_helper_fucom_ST0_FT0(cpu_env);
6144 gen_helper_fpop(cpu_env);
6145 break;
6146 case 0x33: /* de/3 */
6147 switch(rm) {
6148 case 1: /* fcompp */
6149 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6150 gen_helper_fcom_ST0_FT0(cpu_env);
6151 gen_helper_fpop(cpu_env);
6152 gen_helper_fpop(cpu_env);
6153 break;
6154 default:
6155 goto unknown_op;
6156 }
6157 break;
6158 case 0x38: /* ffreep sti, undocumented op */
6159 gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6160 gen_helper_fpop(cpu_env);
6161 break;
6162 case 0x3c: /* df/4 */
6163 switch(rm) {
6164 case 0:
6165 gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
6166 tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
6167 gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
6168 break;
6169 default:
6170 goto unknown_op;
6171 }
6172 break;
6173 case 0x3d: /* fucomip */
6174 if (!(s->cpuid_features & CPUID_CMOV)) {
6175 goto illegal_op;
6176 }
6177 gen_update_cc_op(s);
6178 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6179 gen_helper_fucomi_ST0_FT0(cpu_env);
6180 gen_helper_fpop(cpu_env);
6181 set_cc_op(s, CC_OP_EFLAGS);
6182 break;
6183 case 0x3e: /* fcomip */
6184 if (!(s->cpuid_features & CPUID_CMOV)) {
6185 goto illegal_op;
6186 }
6187 gen_update_cc_op(s);
6188 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6189 gen_helper_fcomi_ST0_FT0(cpu_env);
6190 gen_helper_fpop(cpu_env);
6191 set_cc_op(s, CC_OP_EFLAGS);
6192 break;
6193 case 0x10 ... 0x13: /* fcmovxx */
6194 case 0x18 ... 0x1b:
6195 {
6196 int op1;
6197 TCGLabel *l1;
6198 static const uint8_t fcmov_cc[8] = {
6199 (JCC_B << 1),
6200 (JCC_Z << 1),
6201 (JCC_BE << 1),
6202 (JCC_P << 1),
6203 };
6204
6205 if (!(s->cpuid_features & CPUID_CMOV)) {
6206 goto illegal_op;
6207 }
6208 op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
6209 l1 = gen_new_label();
6210 gen_jcc1_noeob(s, op1, l1);
6211 gen_helper_fmov_ST0_STN(cpu_env, tcg_const_i32(opreg));
6212 gen_set_label(l1);
6213 }
6214 break;
6215 default:
6216 goto unknown_op;
6217 }
6218 }
6219 break;
6220 /************************/
6221 /* string ops */
6222
6223 case 0xa4: /* movsS */
6224 case 0xa5:
6225 ot = mo_b_d(b, dflag);
6226 if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6227 gen_repz_movs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6228 } else {
6229 gen_movs(s, ot);
6230 }
6231 break;
6232
6233 case 0xaa: /* stosS */
6234 case 0xab:
6235 ot = mo_b_d(b, dflag);
6236 if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6237 gen_repz_stos(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6238 } else {
6239 gen_stos(s, ot);
6240 }
6241 break;
6242 case 0xac: /* lodsS */
6243 case 0xad:
6244 ot = mo_b_d(b, dflag);
6245 if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6246 gen_repz_lods(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6247 } else {
6248 gen_lods(s, ot);
6249 }
6250 break;
6251 case 0xae: /* scasS */
6252 case 0xaf:
6253 ot = mo_b_d(b, dflag);
6254 if (prefixes & PREFIX_REPNZ) {
6255 gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6256 } else if (prefixes & PREFIX_REPZ) {
6257 gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6258 } else {
6259 gen_scas(s, ot);
6260 }
6261 break;
6262
6263 case 0xa6: /* cmpsS */
6264 case 0xa7:
6265 ot = mo_b_d(b, dflag);
6266 if (prefixes & PREFIX_REPNZ) {
6267 gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6268 } else if (prefixes & PREFIX_REPZ) {
6269 gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6270 } else {
6271 gen_cmps(s, ot);
6272 }
6273 break;
6274 case 0x6c: /* insS */
6275 case 0x6d:
6276 ot = mo_b_d32(b, dflag);
6277 tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
6278 gen_check_io(s, ot, pc_start - s->cs_base,
6279 SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes) | 4);
6280 if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6281 gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6282 } else {
6283 gen_ins(s, ot);
6284 if (s->tb->cflags & CF_USE_ICOUNT) {
6285 gen_jmp(s, s->pc - s->cs_base);
6286 }
6287 }
6288 break;
6289 case 0x6e: /* outsS */
6290 case 0x6f:
6291 ot = mo_b_d32(b, dflag);
6292 tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
6293 gen_check_io(s, ot, pc_start - s->cs_base,
6294 svm_is_rep(prefixes) | 4);
6295 if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6296 gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6297 } else {
6298 gen_outs(s, ot);
6299 if (s->tb->cflags & CF_USE_ICOUNT) {
6300 gen_jmp(s, s->pc - s->cs_base);
6301 }
6302 }
6303 break;
6304
6305 /************************/
6306 /* port I/O */
6307
6308 case 0xe4:
6309 case 0xe5:
6310 ot = mo_b_d32(b, dflag);
6311 val = cpu_ldub_code(env, s->pc++);
6312 tcg_gen_movi_tl(cpu_T0, val);
6313 gen_check_io(s, ot, pc_start - s->cs_base,
6314 SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
6315 if (s->tb->cflags & CF_USE_ICOUNT) {
6316 gen_io_start();
6317 }
6318 tcg_gen_movi_i32(cpu_tmp2_i32, val);
6319 gen_helper_in_func(ot, cpu_T1, cpu_tmp2_i32);
6320 gen_op_mov_reg_v(ot, R_EAX, cpu_T1);
6321 gen_bpt_io(s, cpu_tmp2_i32, ot);
6322 if (s->tb->cflags & CF_USE_ICOUNT) {
6323 gen_io_end();
6324 gen_jmp(s, s->pc - s->cs_base);
6325 }
6326 break;
6327 case 0xe6:
6328 case 0xe7:
6329 ot = mo_b_d32(b, dflag);
6330 val = cpu_ldub_code(env, s->pc++);
6331 tcg_gen_movi_tl(cpu_T0, val);
6332 gen_check_io(s, ot, pc_start - s->cs_base,
6333 svm_is_rep(prefixes));
6334 gen_op_mov_v_reg(ot, cpu_T1, R_EAX);
6335
6336 if (s->tb->cflags & CF_USE_ICOUNT) {
6337 gen_io_start();
6338 }
6339 tcg_gen_movi_i32(cpu_tmp2_i32, val);
6340 tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
6341 gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
6342 gen_bpt_io(s, cpu_tmp2_i32, ot);
6343 if (s->tb->cflags & CF_USE_ICOUNT) {
6344 gen_io_end();
6345 gen_jmp(s, s->pc - s->cs_base);
6346 }
6347 break;
6348 case 0xec:
6349 case 0xed:
6350 ot = mo_b_d32(b, dflag);
6351 tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
6352 gen_check_io(s, ot, pc_start - s->cs_base,
6353 SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
6354 if (s->tb->cflags & CF_USE_ICOUNT) {
6355 gen_io_start();
6356 }
6357 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
6358 gen_helper_in_func(ot, cpu_T1, cpu_tmp2_i32);
6359 gen_op_mov_reg_v(ot, R_EAX, cpu_T1);
6360 gen_bpt_io(s, cpu_tmp2_i32, ot);
6361 if (s->tb->cflags & CF_USE_ICOUNT) {
6362 gen_io_end();
6363 gen_jmp(s, s->pc - s->cs_base);
6364 }
6365 break;
6366 case 0xee:
6367 case 0xef:
6368 ot = mo_b_d32(b, dflag);
6369 tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
6370 gen_check_io(s, ot, pc_start - s->cs_base,
6371 svm_is_rep(prefixes));
6372 gen_op_mov_v_reg(ot, cpu_T1, R_EAX);
6373
6374 if (s->tb->cflags & CF_USE_ICOUNT) {
6375 gen_io_start();
6376 }
6377 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
6378 tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
6379 gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
6380 gen_bpt_io(s, cpu_tmp2_i32, ot);
6381 if (s->tb->cflags & CF_USE_ICOUNT) {
6382 gen_io_end();
6383 gen_jmp(s, s->pc - s->cs_base);
6384 }
6385 break;
6386
6387 /************************/
6388 /* control */
6389 case 0xc2: /* ret im */
6390 val = cpu_ldsw_code(env, s->pc);
6391 s->pc += 2;
6392 ot = gen_pop_T0(s);
6393 gen_stack_update(s, val + (1 << ot));
6394 /* Note that gen_pop_T0 uses a zero-extending load. */
6395 gen_op_jmp_v(cpu_T0);
6396 gen_bnd_jmp(s);
6397 gen_eob(s);
6398 break;
6399 case 0xc3: /* ret */
6400 ot = gen_pop_T0(s);
6401 gen_pop_update(s, ot);
6402 /* Note that gen_pop_T0 uses a zero-extending load. */
6403 gen_op_jmp_v(cpu_T0);
6404 gen_bnd_jmp(s);
6405 gen_eob(s);
6406 break;
6407 case 0xca: /* lret im */
6408 val = cpu_ldsw_code(env, s->pc);
6409 s->pc += 2;
6410 do_lret:
6411 if (s->pe && !s->vm86) {
6412 gen_update_cc_op(s);
6413 gen_jmp_im(pc_start - s->cs_base);
6414 gen_helper_lret_protected(cpu_env, tcg_const_i32(dflag - 1),
6415 tcg_const_i32(val));
6416 } else {
6417 gen_stack_A0(s);
6418 /* pop offset */
6419 gen_op_ld_v(s, dflag, cpu_T0, cpu_A0);
6420 /* NOTE: keeping EIP updated is not a problem in case of
6421 exception */
6422 gen_op_jmp_v(cpu_T0);
6423 /* pop selector */
6424 gen_add_A0_im(s, 1 << dflag);
6425 gen_op_ld_v(s, dflag, cpu_T0, cpu_A0);
6426 gen_op_movl_seg_T0_vm(R_CS);
6427 /* add stack offset */
6428 gen_stack_update(s, val + (2 << dflag));
6429 }
6430 gen_eob(s);
6431 break;
6432 case 0xcb: /* lret */
6433 val = 0;
6434 goto do_lret;
6435 case 0xcf: /* iret */
6436 gen_svm_check_intercept(s, pc_start, SVM_EXIT_IRET);
6437 if (!s->pe) {
6438 /* real mode */
6439 gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6440 set_cc_op(s, CC_OP_EFLAGS);
6441 } else if (s->vm86) {
6442 if (s->iopl != 3) {
6443 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6444 } else {
6445 gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6446 set_cc_op(s, CC_OP_EFLAGS);
6447 }
6448 } else {
6449 gen_helper_iret_protected(cpu_env, tcg_const_i32(dflag - 1),
6450 tcg_const_i32(s->pc - s->cs_base));
6451 set_cc_op(s, CC_OP_EFLAGS);
6452 }
6453 gen_eob(s);
6454 break;
6455 case 0xe8: /* call im */
6456 {
6457 if (dflag != MO_16) {
6458 tval = (int32_t)insn_get(env, s, MO_32);
6459 } else {
6460 tval = (int16_t)insn_get(env, s, MO_16);
6461 }
6462 next_eip = s->pc - s->cs_base;
6463 tval += next_eip;
6464 if (dflag == MO_16) {
6465 tval &= 0xffff;
6466 } else if (!CODE64(s)) {
6467 tval &= 0xffffffff;
6468 }
6469 tcg_gen_movi_tl(cpu_T0, next_eip);
6470 gen_push_v(s, cpu_T0);
6471 gen_bnd_jmp(s);
6472 gen_jmp(s, tval);
6473 }
6474 break;
6475 case 0x9a: /* lcall im */
6476 {
6477 unsigned int selector, offset;
6478
6479 if (CODE64(s))
6480 goto illegal_op;
6481 ot = dflag;
6482 offset = insn_get(env, s, ot);
6483 selector = insn_get(env, s, MO_16);
6484
6485 tcg_gen_movi_tl(cpu_T0, selector);
6486 tcg_gen_movi_tl(cpu_T1, offset);
6487 }
6488 goto do_lcall;
6489 case 0xe9: /* jmp im */
6490 if (dflag != MO_16) {
6491 tval = (int32_t)insn_get(env, s, MO_32);
6492 } else {
6493 tval = (int16_t)insn_get(env, s, MO_16);
6494 }
6495 tval += s->pc - s->cs_base;
6496 if (dflag == MO_16) {
6497 tval &= 0xffff;
6498 } else if (!CODE64(s)) {
6499 tval &= 0xffffffff;
6500 }
6501 gen_bnd_jmp(s);
6502 gen_jmp(s, tval);
6503 break;
6504 case 0xea: /* ljmp im */
6505 {
6506 unsigned int selector, offset;
6507
6508 if (CODE64(s))
6509 goto illegal_op;
6510 ot = dflag;
6511 offset = insn_get(env, s, ot);
6512 selector = insn_get(env, s, MO_16);
6513
6514 tcg_gen_movi_tl(cpu_T0, selector);
6515 tcg_gen_movi_tl(cpu_T1, offset);
6516 }
6517 goto do_ljmp;
6518 case 0xeb: /* jmp Jb */
6519 tval = (int8_t)insn_get(env, s, MO_8);
6520 tval += s->pc - s->cs_base;
6521 if (dflag == MO_16) {
6522 tval &= 0xffff;
6523 }
6524 gen_jmp(s, tval);
6525 break;
6526 case 0x70 ... 0x7f: /* jcc Jb */
6527 tval = (int8_t)insn_get(env, s, MO_8);
6528 goto do_jcc;
6529 case 0x180 ... 0x18f: /* jcc Jv */
6530 if (dflag != MO_16) {
6531 tval = (int32_t)insn_get(env, s, MO_32);
6532 } else {
6533 tval = (int16_t)insn_get(env, s, MO_16);
6534 }
6535 do_jcc:
6536 next_eip = s->pc - s->cs_base;
6537 tval += next_eip;
6538 if (dflag == MO_16) {
6539 tval &= 0xffff;
6540 }
6541 gen_bnd_jmp(s);
6542 gen_jcc(s, b, tval, next_eip);
6543 break;
6544
6545 case 0x190 ... 0x19f: /* setcc Gv */
6546 modrm = cpu_ldub_code(env, s->pc++);
6547 gen_setcc1(s, b, cpu_T0);
6548 gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
6549 break;
6550 case 0x140 ... 0x14f: /* cmov Gv, Ev */
6551 if (!(s->cpuid_features & CPUID_CMOV)) {
6552 goto illegal_op;
6553 }
6554 ot = dflag;
6555 modrm = cpu_ldub_code(env, s->pc++);
6556 reg = ((modrm >> 3) & 7) | rex_r;
6557 gen_cmovcc1(env, s, ot, b, modrm, reg);
6558 break;
6559
6560 /************************/
6561 /* flags */
6562 case 0x9c: /* pushf */
6563 gen_svm_check_intercept(s, pc_start, SVM_EXIT_PUSHF);
6564 if (s->vm86 && s->iopl != 3) {
6565 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6566 } else {
6567 gen_update_cc_op(s);
6568 gen_helper_read_eflags(cpu_T0, cpu_env);
6569 gen_push_v(s, cpu_T0);
6570 }
6571 break;
6572 case 0x9d: /* popf */
6573 gen_svm_check_intercept(s, pc_start, SVM_EXIT_POPF);
6574 if (s->vm86 && s->iopl != 3) {
6575 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6576 } else {
6577 ot = gen_pop_T0(s);
6578 if (s->cpl == 0) {
6579 if (dflag != MO_16) {
6580 gen_helper_write_eflags(cpu_env, cpu_T0,
6581 tcg_const_i32((TF_MASK | AC_MASK |
6582 ID_MASK | NT_MASK |
6583 IF_MASK |
6584 IOPL_MASK)));
6585 } else {
6586 gen_helper_write_eflags(cpu_env, cpu_T0,
6587 tcg_const_i32((TF_MASK | AC_MASK |
6588 ID_MASK | NT_MASK |
6589 IF_MASK | IOPL_MASK)
6590 & 0xffff));
6591 }
6592 } else {
6593 if (s->cpl <= s->iopl) {
6594 if (dflag != MO_16) {
6595 gen_helper_write_eflags(cpu_env, cpu_T0,
6596 tcg_const_i32((TF_MASK |
6597 AC_MASK |
6598 ID_MASK |
6599 NT_MASK |
6600 IF_MASK)));
6601 } else {
6602 gen_helper_write_eflags(cpu_env, cpu_T0,
6603 tcg_const_i32((TF_MASK |
6604 AC_MASK |
6605 ID_MASK |
6606 NT_MASK |
6607 IF_MASK)
6608 & 0xffff));
6609 }
6610 } else {
6611 if (dflag != MO_16) {
6612 gen_helper_write_eflags(cpu_env, cpu_T0,
6613 tcg_const_i32((TF_MASK | AC_MASK |
6614 ID_MASK | NT_MASK)));
6615 } else {
6616 gen_helper_write_eflags(cpu_env, cpu_T0,
6617 tcg_const_i32((TF_MASK | AC_MASK |
6618 ID_MASK | NT_MASK)
6619 & 0xffff));
6620 }
6621 }
6622 }
6623 gen_pop_update(s, ot);
6624 set_cc_op(s, CC_OP_EFLAGS);
6625 /* abort translation because TF/AC flag may change */
6626 gen_jmp_im(s->pc - s->cs_base);
6627 gen_eob(s);
6628 }
6629 break;
6630 case 0x9e: /* sahf */
6631 if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6632 goto illegal_op;
6633 gen_op_mov_v_reg(MO_8, cpu_T0, R_AH);
6634 gen_compute_eflags(s);
6635 tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
6636 tcg_gen_andi_tl(cpu_T0, cpu_T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
6637 tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_T0);
6638 break;
6639 case 0x9f: /* lahf */
6640 if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6641 goto illegal_op;
6642 gen_compute_eflags(s);
6643 /* Note: gen_compute_eflags() only gives the condition codes */
6644 tcg_gen_ori_tl(cpu_T0, cpu_cc_src, 0x02);
6645 gen_op_mov_reg_v(MO_8, R_AH, cpu_T0);
6646 break;
6647 case 0xf5: /* cmc */
6648 gen_compute_eflags(s);
6649 tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6650 break;
6651 case 0xf8: /* clc */
6652 gen_compute_eflags(s);
6653 tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
6654 break;
6655 case 0xf9: /* stc */
6656 gen_compute_eflags(s);
6657 tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6658 break;
6659 case 0xfc: /* cld */
6660 tcg_gen_movi_i32(cpu_tmp2_i32, 1);
6661 tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6662 break;
6663 case 0xfd: /* std */
6664 tcg_gen_movi_i32(cpu_tmp2_i32, -1);
6665 tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6666 break;
6667
6668 /************************/
6669 /* bit operations */
6670 case 0x1ba: /* bt/bts/btr/btc Gv, im */
6671 ot = dflag;
6672 modrm = cpu_ldub_code(env, s->pc++);
6673 op = (modrm >> 3) & 7;
6674 mod = (modrm >> 6) & 3;
6675 rm = (modrm & 7) | REX_B(s);
6676 if (mod != 3) {
6677 s->rip_offset = 1;
6678 gen_lea_modrm(env, s, modrm);
6679 if (!(s->prefix & PREFIX_LOCK)) {
6680 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
6681 }
6682 } else {
6683 gen_op_mov_v_reg(ot, cpu_T0, rm);
6684 }
6685 /* load shift */
6686 val = cpu_ldub_code(env, s->pc++);
6687 tcg_gen_movi_tl(cpu_T1, val);
6688 if (op < 4)
6689 goto unknown_op;
6690 op -= 4;
6691 goto bt_op;
6692 case 0x1a3: /* bt Gv, Ev */
6693 op = 0;
6694 goto do_btx;
6695 case 0x1ab: /* bts */
6696 op = 1;
6697 goto do_btx;
6698 case 0x1b3: /* btr */
6699 op = 2;
6700 goto do_btx;
6701 case 0x1bb: /* btc */
6702 op = 3;
6703 do_btx:
6704 ot = dflag;
6705 modrm = cpu_ldub_code(env, s->pc++);
6706 reg = ((modrm >> 3) & 7) | rex_r;
6707 mod = (modrm >> 6) & 3;
6708 rm = (modrm & 7) | REX_B(s);
6709 gen_op_mov_v_reg(MO_32, cpu_T1, reg);
6710 if (mod != 3) {
6711 AddressParts a = gen_lea_modrm_0(env, s, modrm);
6712 /* specific case: we need to add a displacement */
6713 gen_exts(ot, cpu_T1);
6714 tcg_gen_sari_tl(cpu_tmp0, cpu_T1, 3 + ot);
6715 tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, ot);
6716 tcg_gen_add_tl(cpu_A0, gen_lea_modrm_1(a), cpu_tmp0);
6717 gen_lea_v_seg(s, s->aflag, cpu_A0, a.def_seg, s->override);
6718 if (!(s->prefix & PREFIX_LOCK)) {
6719 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
6720 }
6721 } else {
6722 gen_op_mov_v_reg(ot, cpu_T0, rm);
6723 }
6724 bt_op:
6725 tcg_gen_andi_tl(cpu_T1, cpu_T1, (1 << (3 + ot)) - 1);
6726 tcg_gen_movi_tl(cpu_tmp0, 1);
6727 tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T1);
6728 if (s->prefix & PREFIX_LOCK) {
6729 switch (op) {
6730 case 0: /* bt */
6731 /* Needs no atomic ops; we surpressed the normal
6732 memory load for LOCK above so do it now. */
6733 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
6734 break;
6735 case 1: /* bts */
6736 tcg_gen_atomic_fetch_or_tl(cpu_T0, cpu_A0, cpu_tmp0,
6737 s->mem_index, ot | MO_LE);
6738 break;
6739 case 2: /* btr */
6740 tcg_gen_not_tl(cpu_tmp0, cpu_tmp0);
6741 tcg_gen_atomic_fetch_and_tl(cpu_T0, cpu_A0, cpu_tmp0,
6742 s->mem_index, ot | MO_LE);
6743 break;
6744 default:
6745 case 3: /* btc */
6746 tcg_gen_atomic_fetch_xor_tl(cpu_T0, cpu_A0, cpu_tmp0,
6747 s->mem_index, ot | MO_LE);
6748 break;
6749 }
6750 tcg_gen_shr_tl(cpu_tmp4, cpu_T0, cpu_T1);
6751 } else {
6752 tcg_gen_shr_tl(cpu_tmp4, cpu_T0, cpu_T1);
6753 switch (op) {
6754 case 0: /* bt */
6755 /* Data already loaded; nothing to do. */
6756 break;
6757 case 1: /* bts */
6758 tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_tmp0);
6759 break;
6760 case 2: /* btr */
6761 tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_tmp0);
6762 break;
6763 default:
6764 case 3: /* btc */
6765 tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_tmp0);
6766 break;
6767 }
6768 if (op != 0) {
6769 if (mod != 3) {
6770 gen_op_st_v(s, ot, cpu_T0, cpu_A0);
6771 } else {
6772 gen_op_mov_reg_v(ot, rm, cpu_T0);
6773 }
6774 }
6775 }
6776
6777 /* Delay all CC updates until after the store above. Note that
6778 C is the result of the test, Z is unchanged, and the others
6779 are all undefined. */
6780 switch (s->cc_op) {
6781 case CC_OP_MULB ... CC_OP_MULQ:
6782 case CC_OP_ADDB ... CC_OP_ADDQ:
6783 case CC_OP_ADCB ... CC_OP_ADCQ:
6784 case CC_OP_SUBB ... CC_OP_SUBQ:
6785 case CC_OP_SBBB ... CC_OP_SBBQ:
6786 case CC_OP_LOGICB ... CC_OP_LOGICQ:
6787 case CC_OP_INCB ... CC_OP_INCQ:
6788 case CC_OP_DECB ... CC_OP_DECQ:
6789 case CC_OP_SHLB ... CC_OP_SHLQ:
6790 case CC_OP_SARB ... CC_OP_SARQ:
6791 case CC_OP_BMILGB ... CC_OP_BMILGQ:
6792 /* Z was going to be computed from the non-zero status of CC_DST.
6793 We can get that same Z value (and the new C value) by leaving
6794 CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the
6795 same width. */
6796 tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
6797 set_cc_op(s, ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB);
6798 break;
6799 default:
6800 /* Otherwise, generate EFLAGS and replace the C bit. */
6801 gen_compute_eflags(s);
6802 tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, cpu_tmp4,
6803 ctz32(CC_C), 1);
6804 break;
6805 }
6806 break;
6807 case 0x1bc: /* bsf / tzcnt */
6808 case 0x1bd: /* bsr / lzcnt */
6809 ot = dflag;
6810 modrm = cpu_ldub_code(env, s->pc++);
6811 reg = ((modrm >> 3) & 7) | rex_r;
6812 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
6813 gen_extu(ot, cpu_T0);
6814
6815 /* Note that lzcnt and tzcnt are in different extensions. */
6816 if ((prefixes & PREFIX_REPZ)
6817 && (b & 1
6818 ? s->cpuid_ext3_features & CPUID_EXT3_ABM
6819 : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
6820 int size = 8 << ot;
6821 /* For lzcnt/tzcnt, C bit is defined related to the input. */
6822 tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
6823 if (b & 1) {
6824 /* For lzcnt, reduce the target_ulong result by the
6825 number of zeros that we expect to find at the top. */
6826 tcg_gen_clzi_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS);
6827 tcg_gen_subi_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS - size);
6828 } else {
6829 /* For tzcnt, a zero input must return the operand size. */
6830 tcg_gen_ctzi_tl(cpu_T0, cpu_T0, size);
6831 }
6832 /* For lzcnt/tzcnt, Z bit is defined related to the result. */
6833 gen_op_update1_cc();
6834 set_cc_op(s, CC_OP_BMILGB + ot);
6835 } else {
6836 /* For bsr/bsf, only the Z bit is defined and it is related
6837 to the input and not the result. */
6838 tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
6839 set_cc_op(s, CC_OP_LOGICB + ot);
6840
6841 /* ??? The manual says that the output is undefined when the
6842 input is zero, but real hardware leaves it unchanged, and
6843 real programs appear to depend on that. Accomplish this
6844 by passing the output as the value to return upon zero. */
6845 if (b & 1) {
6846 /* For bsr, return the bit index of the first 1 bit,
6847 not the count of leading zeros. */
6848 tcg_gen_xori_tl(cpu_T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
6849 tcg_gen_clz_tl(cpu_T0, cpu_T0, cpu_T1);
6850 tcg_gen_xori_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS - 1);
6851 } else {
6852 tcg_gen_ctz_tl(cpu_T0, cpu_T0, cpu_regs[reg]);
6853 }
6854 }
6855 gen_op_mov_reg_v(ot, reg, cpu_T0);
6856 break;
6857 /************************/
6858 /* bcd */
6859 case 0x27: /* daa */
6860 if (CODE64(s))
6861 goto illegal_op;
6862 gen_update_cc_op(s);
6863 gen_helper_daa(cpu_env);
6864 set_cc_op(s, CC_OP_EFLAGS);
6865 break;
6866 case 0x2f: /* das */
6867 if (CODE64(s))
6868 goto illegal_op;
6869 gen_update_cc_op(s);
6870 gen_helper_das(cpu_env);
6871 set_cc_op(s, CC_OP_EFLAGS);
6872 break;
6873 case 0x37: /* aaa */
6874 if (CODE64(s))
6875 goto illegal_op;
6876 gen_update_cc_op(s);
6877 gen_helper_aaa(cpu_env);
6878 set_cc_op(s, CC_OP_EFLAGS);
6879 break;
6880 case 0x3f: /* aas */
6881 if (CODE64(s))
6882 goto illegal_op;
6883 gen_update_cc_op(s);
6884 gen_helper_aas(cpu_env);
6885 set_cc_op(s, CC_OP_EFLAGS);
6886 break;
6887 case 0xd4: /* aam */
6888 if (CODE64(s))
6889 goto illegal_op;
6890 val = cpu_ldub_code(env, s->pc++);
6891 if (val == 0) {
6892 gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base);
6893 } else {
6894 gen_helper_aam(cpu_env, tcg_const_i32(val));
6895 set_cc_op(s, CC_OP_LOGICB);
6896 }
6897 break;
6898 case 0xd5: /* aad */
6899 if (CODE64(s))
6900 goto illegal_op;
6901 val = cpu_ldub_code(env, s->pc++);
6902 gen_helper_aad(cpu_env, tcg_const_i32(val));
6903 set_cc_op(s, CC_OP_LOGICB);
6904 break;
6905 /************************/
6906 /* misc */
6907 case 0x90: /* nop */
6908 /* XXX: correct lock test for all insn */
6909 if (prefixes & PREFIX_LOCK) {
6910 goto illegal_op;
6911 }
6912 /* If REX_B is set, then this is xchg eax, r8d, not a nop. */
6913 if (REX_B(s)) {
6914 goto do_xchg_reg_eax;
6915 }
6916 if (prefixes & PREFIX_REPZ) {
6917 gen_update_cc_op(s);
6918 gen_jmp_im(pc_start - s->cs_base);
6919 gen_helper_pause(cpu_env, tcg_const_i32(s->pc - pc_start));
6920 s->is_jmp = DISAS_TB_JUMP;
6921 }
6922 break;
6923 case 0x9b: /* fwait */
6924 if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) ==
6925 (HF_MP_MASK | HF_TS_MASK)) {
6926 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
6927 } else {
6928 gen_helper_fwait(cpu_env);
6929 }
6930 break;
6931 case 0xcc: /* int3 */
6932 gen_interrupt(s, EXCP03_INT3, pc_start - s->cs_base, s->pc - s->cs_base);
6933 break;
6934 case 0xcd: /* int N */
6935 val = cpu_ldub_code(env, s->pc++);
6936 if (s->vm86 && s->iopl != 3) {
6937 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6938 } else {
6939 gen_interrupt(s, val, pc_start - s->cs_base, s->pc - s->cs_base);
6940 }
6941 break;
6942 case 0xce: /* into */
6943 if (CODE64(s))
6944 goto illegal_op;
6945 gen_update_cc_op(s);
6946 gen_jmp_im(pc_start - s->cs_base);
6947 gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start));
6948 break;
6949 #ifdef WANT_ICEBP
6950 case 0xf1: /* icebp (undocumented, exits to external debugger) */
6951 gen_svm_check_intercept(s, pc_start, SVM_EXIT_ICEBP);
6952 #if 1
6953 gen_debug(s, pc_start - s->cs_base);
6954 #else
6955 /* start debug */
6956 tb_flush(CPU(x86_env_get_cpu(env)));
6957 qemu_set_log(CPU_LOG_INT | CPU_LOG_TB_IN_ASM);
6958 #endif
6959 break;
6960 #endif
6961 case 0xfa: /* cli */
6962 if (!s->vm86) {
6963 if (s->cpl <= s->iopl) {
6964 gen_helper_cli(cpu_env);
6965 } else {
6966 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6967 }
6968 } else {
6969 if (s->iopl == 3) {
6970 gen_helper_cli(cpu_env);
6971 } else {
6972 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6973 }
6974 }
6975 break;
6976 case 0xfb: /* sti */
6977 if (s->vm86 ? s->iopl == 3 : s->cpl <= s->iopl) {
6978 gen_helper_sti(cpu_env);
6979 /* interruptions are enabled only the first insn after sti */
6980 gen_jmp_im(s->pc - s->cs_base);
6981 gen_eob_inhibit_irq(s, true);
6982 } else {
6983 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6984 }
6985 break;
6986 case 0x62: /* bound */
6987 if (CODE64(s))
6988 goto illegal_op;
6989 ot = dflag;
6990 modrm = cpu_ldub_code(env, s->pc++);
6991 reg = (modrm >> 3) & 7;
6992 mod = (modrm >> 6) & 3;
6993 if (mod == 3)
6994 goto illegal_op;
6995 gen_op_mov_v_reg(ot, cpu_T0, reg);
6996 gen_lea_modrm(env, s, modrm);
6997 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
6998 if (ot == MO_16) {
6999 gen_helper_boundw(cpu_env, cpu_A0, cpu_tmp2_i32);
7000 } else {
7001 gen_helper_boundl(cpu_env, cpu_A0, cpu_tmp2_i32);
7002 }
7003 break;
7004 case 0x1c8 ... 0x1cf: /* bswap reg */
7005 reg = (b & 7) | REX_B(s);
7006 #ifdef TARGET_X86_64
7007 if (dflag == MO_64) {
7008 gen_op_mov_v_reg(MO_64, cpu_T0, reg);
7009 tcg_gen_bswap64_i64(cpu_T0, cpu_T0);
7010 gen_op_mov_reg_v(MO_64, reg, cpu_T0);
7011 } else
7012 #endif
7013 {
7014 gen_op_mov_v_reg(MO_32, cpu_T0, reg);
7015 tcg_gen_ext32u_tl(cpu_T0, cpu_T0);
7016 tcg_gen_bswap32_tl(cpu_T0, cpu_T0);
7017 gen_op_mov_reg_v(MO_32, reg, cpu_T0);
7018 }
7019 break;
7020 case 0xd6: /* salc */
7021 if (CODE64(s))
7022 goto illegal_op;
7023 gen_compute_eflags_c(s, cpu_T0);
7024 tcg_gen_neg_tl(cpu_T0, cpu_T0);
7025 gen_op_mov_reg_v(MO_8, R_EAX, cpu_T0);
7026 break;
7027 case 0xe0: /* loopnz */
7028 case 0xe1: /* loopz */
7029 case 0xe2: /* loop */
7030 case 0xe3: /* jecxz */
7031 {
7032 TCGLabel *l1, *l2, *l3;
7033
7034 tval = (int8_t)insn_get(env, s, MO_8);
7035 next_eip = s->pc - s->cs_base;
7036 tval += next_eip;
7037 if (dflag == MO_16) {
7038 tval &= 0xffff;
7039 }
7040
7041 l1 = gen_new_label();
7042 l2 = gen_new_label();
7043 l3 = gen_new_label();
7044 b &= 3;
7045 switch(b) {
7046 case 0: /* loopnz */
7047 case 1: /* loopz */
7048 gen_op_add_reg_im(s->aflag, R_ECX, -1);
7049 gen_op_jz_ecx(s->aflag, l3);
7050 gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1);
7051 break;
7052 case 2: /* loop */
7053 gen_op_add_reg_im(s->aflag, R_ECX, -1);
7054 gen_op_jnz_ecx(s->aflag, l1);
7055 break;
7056 default:
7057 case 3: /* jcxz */
7058 gen_op_jz_ecx(s->aflag, l1);
7059 break;
7060 }
7061
7062 gen_set_label(l3);
7063 gen_jmp_im(next_eip);
7064 tcg_gen_br(l2);
7065
7066 gen_set_label(l1);
7067 gen_jmp_im(tval);
7068 gen_set_label(l2);
7069 gen_eob(s);
7070 }
7071 break;
7072 case 0x130: /* wrmsr */
7073 case 0x132: /* rdmsr */
7074 if (s->cpl != 0) {
7075 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7076 } else {
7077 gen_update_cc_op(s);
7078 gen_jmp_im(pc_start - s->cs_base);
7079 if (b & 2) {
7080 gen_helper_rdmsr(cpu_env);
7081 } else {
7082 gen_helper_wrmsr(cpu_env);
7083 }
7084 }
7085 break;
7086 case 0x131: /* rdtsc */
7087 gen_update_cc_op(s);
7088 gen_jmp_im(pc_start - s->cs_base);
7089 if (s->tb->cflags & CF_USE_ICOUNT) {
7090 gen_io_start();
7091 }
7092 gen_helper_rdtsc(cpu_env);
7093 if (s->tb->cflags & CF_USE_ICOUNT) {
7094 gen_io_end();
7095 gen_jmp(s, s->pc - s->cs_base);
7096 }
7097 break;
7098 case 0x133: /* rdpmc */
7099 gen_update_cc_op(s);
7100 gen_jmp_im(pc_start - s->cs_base);
7101 gen_helper_rdpmc(cpu_env);
7102 break;
7103 case 0x134: /* sysenter */
7104 /* For Intel SYSENTER is valid on 64-bit */
7105 if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7106 goto illegal_op;
7107 if (!s->pe) {
7108 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7109 } else {
7110 gen_helper_sysenter(cpu_env);
7111 gen_eob(s);
7112 }
7113 break;
7114 case 0x135: /* sysexit */
7115 /* For Intel SYSEXIT is valid on 64-bit */
7116 if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7117 goto illegal_op;
7118 if (!s->pe) {
7119 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7120 } else {
7121 gen_helper_sysexit(cpu_env, tcg_const_i32(dflag - 1));
7122 gen_eob(s);
7123 }
7124 break;
7125 #ifdef TARGET_X86_64
7126 case 0x105: /* syscall */
7127 /* XXX: is it usable in real mode ? */
7128 gen_update_cc_op(s);
7129 gen_jmp_im(pc_start - s->cs_base);
7130 gen_helper_syscall(cpu_env, tcg_const_i32(s->pc - pc_start));
7131 /* TF handling for the syscall insn is different. The TF bit is checked
7132 after the syscall insn completes. This allows #DB to not be
7133 generated after one has entered CPL0 if TF is set in FMASK. */
7134 gen_eob_worker(s, false, true);
7135 break;
7136 case 0x107: /* sysret */
7137 if (!s->pe) {
7138 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7139 } else {
7140 gen_helper_sysret(cpu_env, tcg_const_i32(dflag - 1));
7141 /* condition codes are modified only in long mode */
7142 if (s->lma) {
7143 set_cc_op(s, CC_OP_EFLAGS);
7144 }
7145 /* TF handling for the sysret insn is different. The TF bit is
7146 checked after the sysret insn completes. This allows #DB to be
7147 generated "as if" the syscall insn in userspace has just
7148 completed. */
7149 gen_eob_worker(s, false, true);
7150 }
7151 break;
7152 #endif
7153 case 0x1a2: /* cpuid */
7154 gen_update_cc_op(s);
7155 gen_jmp_im(pc_start - s->cs_base);
7156 gen_helper_cpuid(cpu_env);
7157 break;
7158 case 0xf4: /* hlt */
7159 if (s->cpl != 0) {
7160 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7161 } else {
7162 gen_update_cc_op(s);
7163 gen_jmp_im(pc_start - s->cs_base);
7164 gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start));
7165 s->is_jmp = DISAS_TB_JUMP;
7166 }
7167 break;
7168 case 0x100:
7169 modrm = cpu_ldub_code(env, s->pc++);
7170 mod = (modrm >> 6) & 3;
7171 op = (modrm >> 3) & 7;
7172 switch(op) {
7173 case 0: /* sldt */
7174 if (!s->pe || s->vm86)
7175 goto illegal_op;
7176 gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ);
7177 tcg_gen_ld32u_tl(cpu_T0, cpu_env,
7178 offsetof(CPUX86State, ldt.selector));
7179 ot = mod == 3 ? dflag : MO_16;
7180 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7181 break;
7182 case 2: /* lldt */
7183 if (!s->pe || s->vm86)
7184 goto illegal_op;
7185 if (s->cpl != 0) {
7186 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7187 } else {
7188 gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
7189 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7190 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
7191 gen_helper_lldt(cpu_env, cpu_tmp2_i32);
7192 }
7193 break;
7194 case 1: /* str */
7195 if (!s->pe || s->vm86)
7196 goto illegal_op;
7197 gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ);
7198 tcg_gen_ld32u_tl(cpu_T0, cpu_env,
7199 offsetof(CPUX86State, tr.selector));
7200 ot = mod == 3 ? dflag : MO_16;
7201 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7202 break;
7203 case 3: /* ltr */
7204 if (!s->pe || s->vm86)
7205 goto illegal_op;
7206 if (s->cpl != 0) {
7207 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7208 } else {
7209 gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
7210 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7211 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
7212 gen_helper_ltr(cpu_env, cpu_tmp2_i32);
7213 }
7214 break;
7215 case 4: /* verr */
7216 case 5: /* verw */
7217 if (!s->pe || s->vm86)
7218 goto illegal_op;
7219 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7220 gen_update_cc_op(s);
7221 if (op == 4) {
7222 gen_helper_verr(cpu_env, cpu_T0);
7223 } else {
7224 gen_helper_verw(cpu_env, cpu_T0);
7225 }
7226 set_cc_op(s, CC_OP_EFLAGS);
7227 break;
7228 default:
7229 goto unknown_op;
7230 }
7231 break;
7232
7233 case 0x101:
7234 modrm = cpu_ldub_code(env, s->pc++);
7235 switch (modrm) {
7236 CASE_MODRM_MEM_OP(0): /* sgdt */
7237 gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
7238 gen_lea_modrm(env, s, modrm);
7239 tcg_gen_ld32u_tl(cpu_T0,
7240 cpu_env, offsetof(CPUX86State, gdt.limit));
7241 gen_op_st_v(s, MO_16, cpu_T0, cpu_A0);
7242 gen_add_A0_im(s, 2);
7243 tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, gdt.base));
7244 if (dflag == MO_16) {
7245 tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
7246 }
7247 gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
7248 break;
7249
7250 case 0xc8: /* monitor */
7251 if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || s->cpl != 0) {
7252 goto illegal_op;
7253 }
7254 gen_update_cc_op(s);
7255 gen_jmp_im(pc_start - s->cs_base);
7256 tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EAX]);
7257 gen_extu(s->aflag, cpu_A0);
7258 gen_add_A0_ds_seg(s);
7259 gen_helper_monitor(cpu_env, cpu_A0);
7260 break;
7261
7262 case 0xc9: /* mwait */
7263 if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || s->cpl != 0) {
7264 goto illegal_op;
7265 }
7266 gen_update_cc_op(s);
7267 gen_jmp_im(pc_start - s->cs_base);
7268 gen_helper_mwait(cpu_env, tcg_const_i32(s->pc - pc_start));
7269 gen_eob(s);
7270 break;
7271
7272 case 0xca: /* clac */
7273 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7274 || s->cpl != 0) {
7275 goto illegal_op;
7276 }
7277 gen_helper_clac(cpu_env);
7278 gen_jmp_im(s->pc - s->cs_base);
7279 gen_eob(s);
7280 break;
7281
7282 case 0xcb: /* stac */
7283 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7284 || s->cpl != 0) {
7285 goto illegal_op;
7286 }
7287 gen_helper_stac(cpu_env);
7288 gen_jmp_im(s->pc - s->cs_base);
7289 gen_eob(s);
7290 break;
7291
7292 CASE_MODRM_MEM_OP(1): /* sidt */
7293 gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
7294 gen_lea_modrm(env, s, modrm);
7295 tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.limit));
7296 gen_op_st_v(s, MO_16, cpu_T0, cpu_A0);
7297 gen_add_A0_im(s, 2);
7298 tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.base));
7299 if (dflag == MO_16) {
7300 tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
7301 }
7302 gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
7303 break;
7304
7305 case 0xd0: /* xgetbv */
7306 if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7307 || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7308 | PREFIX_REPZ | PREFIX_REPNZ))) {
7309 goto illegal_op;
7310 }
7311 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
7312 gen_helper_xgetbv(cpu_tmp1_i64, cpu_env, cpu_tmp2_i32);
7313 tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], cpu_tmp1_i64);
7314 break;
7315
7316 case 0xd1: /* xsetbv */
7317 if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7318 || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7319 | PREFIX_REPZ | PREFIX_REPNZ))) {
7320 goto illegal_op;
7321 }
7322 if (s->cpl != 0) {
7323 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7324 break;
7325 }
7326 tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
7327 cpu_regs[R_EDX]);
7328 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
7329 gen_helper_xsetbv(cpu_env, cpu_tmp2_i32, cpu_tmp1_i64);
7330 /* End TB because translation flags may change. */
7331 gen_jmp_im(s->pc - s->cs_base);
7332 gen_eob(s);
7333 break;
7334
7335 case 0xd8: /* VMRUN */
7336 if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7337 goto illegal_op;
7338 }
7339 if (s->cpl != 0) {
7340 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7341 break;
7342 }
7343 gen_update_cc_op(s);
7344 gen_jmp_im(pc_start - s->cs_base);
7345 gen_helper_vmrun(cpu_env, tcg_const_i32(s->aflag - 1),
7346 tcg_const_i32(s->pc - pc_start));
7347 tcg_gen_exit_tb(0);
7348 s->is_jmp = DISAS_TB_JUMP;
7349 break;
7350
7351 case 0xd9: /* VMMCALL */
7352 if (!(s->flags & HF_SVME_MASK)) {
7353 goto illegal_op;
7354 }
7355 gen_update_cc_op(s);
7356 gen_jmp_im(pc_start - s->cs_base);
7357 gen_helper_vmmcall(cpu_env);
7358 break;
7359
7360 case 0xda: /* VMLOAD */
7361 if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7362 goto illegal_op;
7363 }
7364 if (s->cpl != 0) {
7365 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7366 break;
7367 }
7368 gen_update_cc_op(s);
7369 gen_jmp_im(pc_start - s->cs_base);
7370 gen_helper_vmload(cpu_env, tcg_const_i32(s->aflag - 1));
7371 break;
7372
7373 case 0xdb: /* VMSAVE */
7374 if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7375 goto illegal_op;
7376 }
7377 if (s->cpl != 0) {
7378 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7379 break;
7380 }
7381 gen_update_cc_op(s);
7382 gen_jmp_im(pc_start - s->cs_base);
7383 gen_helper_vmsave(cpu_env, tcg_const_i32(s->aflag - 1));
7384 break;
7385
7386 case 0xdc: /* STGI */
7387 if ((!(s->flags & HF_SVME_MASK)
7388 && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7389 || !s->pe) {
7390 goto illegal_op;
7391 }
7392 if (s->cpl != 0) {
7393 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7394 break;
7395 }
7396 gen_update_cc_op(s);
7397 gen_jmp_im(pc_start - s->cs_base);
7398 gen_helper_stgi(cpu_env);
7399 break;
7400
7401 case 0xdd: /* CLGI */
7402 if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7403 goto illegal_op;
7404 }
7405 if (s->cpl != 0) {
7406 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7407 break;
7408 }
7409 gen_update_cc_op(s);
7410 gen_jmp_im(pc_start - s->cs_base);
7411 gen_helper_clgi(cpu_env);
7412 break;
7413
7414 case 0xde: /* SKINIT */
7415 if ((!(s->flags & HF_SVME_MASK)
7416 && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7417 || !s->pe) {
7418 goto illegal_op;
7419 }
7420 gen_update_cc_op(s);
7421 gen_jmp_im(pc_start - s->cs_base);
7422 gen_helper_skinit(cpu_env);
7423 break;
7424
7425 case 0xdf: /* INVLPGA */
7426 if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7427 goto illegal_op;
7428 }
7429 if (s->cpl != 0) {
7430 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7431 break;
7432 }
7433 gen_update_cc_op(s);
7434 gen_jmp_im(pc_start - s->cs_base);
7435 gen_helper_invlpga(cpu_env, tcg_const_i32(s->aflag - 1));
7436 break;
7437
7438 CASE_MODRM_MEM_OP(2): /* lgdt */
7439 if (s->cpl != 0) {
7440 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7441 break;
7442 }
7443 gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_WRITE);
7444 gen_lea_modrm(env, s, modrm);
7445 gen_op_ld_v(s, MO_16, cpu_T1, cpu_A0);
7446 gen_add_A0_im(s, 2);
7447 gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
7448 if (dflag == MO_16) {
7449 tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
7450 }
7451 tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State, gdt.base));
7452 tcg_gen_st32_tl(cpu_T1, cpu_env, offsetof(CPUX86State, gdt.limit));
7453 break;
7454
7455 CASE_MODRM_MEM_OP(3): /* lidt */
7456 if (s->cpl != 0) {
7457 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7458 break;
7459 }
7460 gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_WRITE);
7461 gen_lea_modrm(env, s, modrm);
7462 gen_op_ld_v(s, MO_16, cpu_T1, cpu_A0);
7463 gen_add_A0_im(s, 2);
7464 gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
7465 if (dflag == MO_16) {
7466 tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
7467 }
7468 tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.base));
7469 tcg_gen_st32_tl(cpu_T1, cpu_env, offsetof(CPUX86State, idt.limit));
7470 break;
7471
7472 CASE_MODRM_OP(4): /* smsw */
7473 gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_CR0);
7474 tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, cr[0]));
7475 if (CODE64(s)) {
7476 mod = (modrm >> 6) & 3;
7477 ot = (mod != 3 ? MO_16 : s->dflag);
7478 } else {
7479 ot = MO_16;
7480 }
7481 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7482 break;
7483 case 0xee: /* rdpkru */
7484 if (prefixes & PREFIX_LOCK) {
7485 goto illegal_op;
7486 }
7487 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
7488 gen_helper_rdpkru(cpu_tmp1_i64, cpu_env, cpu_tmp2_i32);
7489 tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], cpu_tmp1_i64);
7490 break;
7491 case 0xef: /* wrpkru */
7492 if (prefixes & PREFIX_LOCK) {
7493 goto illegal_op;
7494 }
7495 tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
7496 cpu_regs[R_EDX]);
7497 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
7498 gen_helper_wrpkru(cpu_env, cpu_tmp2_i32, cpu_tmp1_i64);
7499 break;
7500 CASE_MODRM_OP(6): /* lmsw */
7501 if (s->cpl != 0) {
7502 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7503 break;
7504 }
7505 gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
7506 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7507 gen_helper_lmsw(cpu_env, cpu_T0);
7508 gen_jmp_im(s->pc - s->cs_base);
7509 gen_eob(s);
7510 break;
7511
7512 CASE_MODRM_MEM_OP(7): /* invlpg */
7513 if (s->cpl != 0) {
7514 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7515 break;
7516 }
7517 gen_update_cc_op(s);
7518 gen_jmp_im(pc_start - s->cs_base);
7519 gen_lea_modrm(env, s, modrm);
7520 gen_helper_invlpg(cpu_env, cpu_A0);
7521 gen_jmp_im(s->pc - s->cs_base);
7522 gen_eob(s);
7523 break;
7524
7525 case 0xf8: /* swapgs */
7526 #ifdef TARGET_X86_64
7527 if (CODE64(s)) {
7528 if (s->cpl != 0) {
7529 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7530 } else {
7531 tcg_gen_mov_tl(cpu_T0, cpu_seg_base[R_GS]);
7532 tcg_gen_ld_tl(cpu_seg_base[R_GS], cpu_env,
7533 offsetof(CPUX86State, kernelgsbase));
7534 tcg_gen_st_tl(cpu_T0, cpu_env,
7535 offsetof(CPUX86State, kernelgsbase));
7536 }
7537 break;
7538 }
7539 #endif
7540 goto illegal_op;
7541
7542 case 0xf9: /* rdtscp */
7543 if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP)) {
7544 goto illegal_op;
7545 }
7546 gen_update_cc_op(s);
7547 gen_jmp_im(pc_start - s->cs_base);
7548 if (s->tb->cflags & CF_USE_ICOUNT) {
7549 gen_io_start();
7550 }
7551 gen_helper_rdtscp(cpu_env);
7552 if (s->tb->cflags & CF_USE_ICOUNT) {
7553 gen_io_end();
7554 gen_jmp(s, s->pc - s->cs_base);
7555 }
7556 break;
7557
7558 default:
7559 goto unknown_op;
7560 }
7561 break;
7562
7563 case 0x108: /* invd */
7564 case 0x109: /* wbinvd */
7565 if (s->cpl != 0) {
7566 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7567 } else {
7568 gen_svm_check_intercept(s, pc_start, (b & 2) ? SVM_EXIT_INVD : SVM_EXIT_WBINVD);
7569 /* nothing to do */
7570 }
7571 break;
7572 case 0x63: /* arpl or movslS (x86_64) */
7573 #ifdef TARGET_X86_64
7574 if (CODE64(s)) {
7575 int d_ot;
7576 /* d_ot is the size of destination */
7577 d_ot = dflag;
7578
7579 modrm = cpu_ldub_code(env, s->pc++);
7580 reg = ((modrm >> 3) & 7) | rex_r;
7581 mod = (modrm >> 6) & 3;
7582 rm = (modrm & 7) | REX_B(s);
7583
7584 if (mod == 3) {
7585 gen_op_mov_v_reg(MO_32, cpu_T0, rm);
7586 /* sign extend */
7587 if (d_ot == MO_64) {
7588 tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
7589 }
7590 gen_op_mov_reg_v(d_ot, reg, cpu_T0);
7591 } else {
7592 gen_lea_modrm(env, s, modrm);
7593 gen_op_ld_v(s, MO_32 | MO_SIGN, cpu_T0, cpu_A0);
7594 gen_op_mov_reg_v(d_ot, reg, cpu_T0);
7595 }
7596 } else
7597 #endif
7598 {
7599 TCGLabel *label1;
7600 TCGv t0, t1, t2, a0;
7601
7602 if (!s->pe || s->vm86)
7603 goto illegal_op;
7604 t0 = tcg_temp_local_new();
7605 t1 = tcg_temp_local_new();
7606 t2 = tcg_temp_local_new();
7607 ot = MO_16;
7608 modrm = cpu_ldub_code(env, s->pc++);
7609 reg = (modrm >> 3) & 7;
7610 mod = (modrm >> 6) & 3;
7611 rm = modrm & 7;
7612 if (mod != 3) {
7613 gen_lea_modrm(env, s, modrm);
7614 gen_op_ld_v(s, ot, t0, cpu_A0);
7615 a0 = tcg_temp_local_new();
7616 tcg_gen_mov_tl(a0, cpu_A0);
7617 } else {
7618 gen_op_mov_v_reg(ot, t0, rm);
7619 TCGV_UNUSED(a0);
7620 }
7621 gen_op_mov_v_reg(ot, t1, reg);
7622 tcg_gen_andi_tl(cpu_tmp0, t0, 3);
7623 tcg_gen_andi_tl(t1, t1, 3);
7624 tcg_gen_movi_tl(t2, 0);
7625 label1 = gen_new_label();
7626 tcg_gen_brcond_tl(TCG_COND_GE, cpu_tmp0, t1, label1);
7627 tcg_gen_andi_tl(t0, t0, ~3);
7628 tcg_gen_or_tl(t0, t0, t1);
7629 tcg_gen_movi_tl(t2, CC_Z);
7630 gen_set_label(label1);
7631 if (mod != 3) {
7632 gen_op_st_v(s, ot, t0, a0);
7633 tcg_temp_free(a0);
7634 } else {
7635 gen_op_mov_reg_v(ot, rm, t0);
7636 }
7637 gen_compute_eflags(s);
7638 tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
7639 tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
7640 tcg_temp_free(t0);
7641 tcg_temp_free(t1);
7642 tcg_temp_free(t2);
7643 }
7644 break;
7645 case 0x102: /* lar */
7646 case 0x103: /* lsl */
7647 {
7648 TCGLabel *label1;
7649 TCGv t0;
7650 if (!s->pe || s->vm86)
7651 goto illegal_op;
7652 ot = dflag != MO_16 ? MO_32 : MO_16;
7653 modrm = cpu_ldub_code(env, s->pc++);
7654 reg = ((modrm >> 3) & 7) | rex_r;
7655 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7656 t0 = tcg_temp_local_new();
7657 gen_update_cc_op(s);
7658 if (b == 0x102) {
7659 gen_helper_lar(t0, cpu_env, cpu_T0);
7660 } else {
7661 gen_helper_lsl(t0, cpu_env, cpu_T0);
7662 }
7663 tcg_gen_andi_tl(cpu_tmp0, cpu_cc_src, CC_Z);
7664 label1 = gen_new_label();
7665 tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
7666 gen_op_mov_reg_v(ot, reg, t0);
7667 gen_set_label(label1);
7668 set_cc_op(s, CC_OP_EFLAGS);
7669 tcg_temp_free(t0);
7670 }
7671 break;
7672 case 0x118:
7673 modrm = cpu_ldub_code(env, s->pc++);
7674 mod = (modrm >> 6) & 3;
7675 op = (modrm >> 3) & 7;
7676 switch(op) {
7677 case 0: /* prefetchnta */
7678 case 1: /* prefetchnt0 */
7679 case 2: /* prefetchnt0 */
7680 case 3: /* prefetchnt0 */
7681 if (mod == 3)
7682 goto illegal_op;
7683 gen_nop_modrm(env, s, modrm);
7684 /* nothing more to do */
7685 break;
7686 default: /* nop (multi byte) */
7687 gen_nop_modrm(env, s, modrm);
7688 break;
7689 }
7690 break;
7691 case 0x11a:
7692 modrm = cpu_ldub_code(env, s->pc++);
7693 if (s->flags & HF_MPX_EN_MASK) {
7694 mod = (modrm >> 6) & 3;
7695 reg = ((modrm >> 3) & 7) | rex_r;
7696 if (prefixes & PREFIX_REPZ) {
7697 /* bndcl */
7698 if (reg >= 4
7699 || (prefixes & PREFIX_LOCK)
7700 || s->aflag == MO_16) {
7701 goto illegal_op;
7702 }
7703 gen_bndck(env, s, modrm, TCG_COND_LTU, cpu_bndl[reg]);
7704 } else if (prefixes & PREFIX_REPNZ) {
7705 /* bndcu */
7706 if (reg >= 4
7707 || (prefixes & PREFIX_LOCK)
7708 || s->aflag == MO_16) {
7709 goto illegal_op;
7710 }
7711 TCGv_i64 notu = tcg_temp_new_i64();
7712 tcg_gen_not_i64(notu, cpu_bndu[reg]);
7713 gen_bndck(env, s, modrm, TCG_COND_GTU, notu);
7714 tcg_temp_free_i64(notu);
7715 } else if (prefixes & PREFIX_DATA) {
7716 /* bndmov -- from reg/mem */
7717 if (reg >= 4 || s->aflag == MO_16) {
7718 goto illegal_op;
7719 }
7720 if (mod == 3) {
7721 int reg2 = (modrm & 7) | REX_B(s);
7722 if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
7723 goto illegal_op;
7724 }
7725 if (s->flags & HF_MPX_IU_MASK) {
7726 tcg_gen_mov_i64(cpu_bndl[reg], cpu_bndl[reg2]);
7727 tcg_gen_mov_i64(cpu_bndu[reg], cpu_bndu[reg2]);
7728 }
7729 } else {
7730 gen_lea_modrm(env, s, modrm);
7731 if (CODE64(s)) {
7732 tcg_gen_qemu_ld_i64(cpu_bndl[reg], cpu_A0,
7733 s->mem_index, MO_LEQ);
7734 tcg_gen_addi_tl(cpu_A0, cpu_A0, 8);
7735 tcg_gen_qemu_ld_i64(cpu_bndu[reg], cpu_A0,
7736 s->mem_index, MO_LEQ);
7737 } else {
7738 tcg_gen_qemu_ld_i64(cpu_bndl[reg], cpu_A0,
7739 s->mem_index, MO_LEUL);
7740 tcg_gen_addi_tl(cpu_A0, cpu_A0, 4);
7741 tcg_gen_qemu_ld_i64(cpu_bndu[reg], cpu_A0,
7742 s->mem_index, MO_LEUL);
7743 }
7744 /* bnd registers are now in-use */
7745 gen_set_hflag(s, HF_MPX_IU_MASK);
7746 }
7747 } else if (mod != 3) {
7748 /* bndldx */
7749 AddressParts a = gen_lea_modrm_0(env, s, modrm);
7750 if (reg >= 4
7751 || (prefixes & PREFIX_LOCK)
7752 || s->aflag == MO_16
7753 || a.base < -1) {
7754 goto illegal_op;
7755 }
7756 if (a.base >= 0) {
7757 tcg_gen_addi_tl(cpu_A0, cpu_regs[a.base], a.disp);
7758 } else {
7759 tcg_gen_movi_tl(cpu_A0, 0);
7760 }
7761 gen_lea_v_seg(s, s->aflag, cpu_A0, a.def_seg, s->override);
7762 if (a.index >= 0) {
7763 tcg_gen_mov_tl(cpu_T0, cpu_regs[a.index]);
7764 } else {
7765 tcg_gen_movi_tl(cpu_T0, 0);
7766 }
7767 if (CODE64(s)) {
7768 gen_helper_bndldx64(cpu_bndl[reg], cpu_env, cpu_A0, cpu_T0);
7769 tcg_gen_ld_i64(cpu_bndu[reg], cpu_env,
7770 offsetof(CPUX86State, mmx_t0.MMX_Q(0)));
7771 } else {
7772 gen_helper_bndldx32(cpu_bndu[reg], cpu_env, cpu_A0, cpu_T0);
7773 tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndu[reg]);
7774 tcg_gen_shri_i64(cpu_bndu[reg], cpu_bndu[reg], 32);
7775 }
7776 gen_set_hflag(s, HF_MPX_IU_MASK);
7777 }
7778 }
7779 gen_nop_modrm(env, s, modrm);
7780 break;
7781 case 0x11b:
7782 modrm = cpu_ldub_code(env, s->pc++);
7783 if (s->flags & HF_MPX_EN_MASK) {
7784 mod = (modrm >> 6) & 3;
7785 reg = ((modrm >> 3) & 7) | rex_r;
7786 if (mod != 3 && (prefixes & PREFIX_REPZ)) {
7787 /* bndmk */
7788 if (reg >= 4
7789 || (prefixes & PREFIX_LOCK)
7790 || s->aflag == MO_16) {
7791 goto illegal_op;
7792 }
7793 AddressParts a = gen_lea_modrm_0(env, s, modrm);
7794 if (a.base >= 0) {
7795 tcg_gen_extu_tl_i64(cpu_bndl[reg], cpu_regs[a.base]);
7796 if (!CODE64(s)) {
7797 tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndl[reg]);
7798 }
7799 } else if (a.base == -1) {
7800 /* no base register has lower bound of 0 */
7801 tcg_gen_movi_i64(cpu_bndl[reg], 0);
7802 } else {
7803 /* rip-relative generates #ud */
7804 goto illegal_op;
7805 }
7806 tcg_gen_not_tl(cpu_A0, gen_lea_modrm_1(a));
7807 if (!CODE64(s)) {
7808 tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
7809 }
7810 tcg_gen_extu_tl_i64(cpu_bndu[reg], cpu_A0);
7811 /* bnd registers are now in-use */
7812 gen_set_hflag(s, HF_MPX_IU_MASK);
7813 break;
7814 } else if (prefixes & PREFIX_REPNZ) {
7815 /* bndcn */
7816 if (reg >= 4
7817 || (prefixes & PREFIX_LOCK)
7818 || s->aflag == MO_16) {
7819 goto illegal_op;
7820 }
7821 gen_bndck(env, s, modrm, TCG_COND_GTU, cpu_bndu[reg]);
7822 } else if (prefixes & PREFIX_DATA) {
7823 /* bndmov -- to reg/mem */
7824 if (reg >= 4 || s->aflag == MO_16) {
7825 goto illegal_op;
7826 }
7827 if (mod == 3) {
7828 int reg2 = (modrm & 7) | REX_B(s);
7829 if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
7830 goto illegal_op;
7831 }
7832 if (s->flags & HF_MPX_IU_MASK) {
7833 tcg_gen_mov_i64(cpu_bndl[reg2], cpu_bndl[reg]);
7834 tcg_gen_mov_i64(cpu_bndu[reg2], cpu_bndu[reg]);
7835 }
7836 } else {
7837 gen_lea_modrm(env, s, modrm);
7838 if (CODE64(s)) {
7839 tcg_gen_qemu_st_i64(cpu_bndl[reg], cpu_A0,
7840 s->mem_index, MO_LEQ);
7841 tcg_gen_addi_tl(cpu_A0, cpu_A0, 8);
7842 tcg_gen_qemu_st_i64(cpu_bndu[reg], cpu_A0,
7843 s->mem_index, MO_LEQ);
7844 } else {
7845 tcg_gen_qemu_st_i64(cpu_bndl[reg], cpu_A0,
7846 s->mem_index, MO_LEUL);
7847 tcg_gen_addi_tl(cpu_A0, cpu_A0, 4);
7848 tcg_gen_qemu_st_i64(cpu_bndu[reg], cpu_A0,
7849 s->mem_index, MO_LEUL);
7850 }
7851 }
7852 } else if (mod != 3) {
7853 /* bndstx */
7854 AddressParts a = gen_lea_modrm_0(env, s, modrm);
7855 if (reg >= 4
7856 || (prefixes & PREFIX_LOCK)
7857 || s->aflag == MO_16
7858 || a.base < -1) {
7859 goto illegal_op;
7860 }
7861 if (a.base >= 0) {
7862 tcg_gen_addi_tl(cpu_A0, cpu_regs[a.base], a.disp);
7863 } else {
7864 tcg_gen_movi_tl(cpu_A0, 0);
7865 }
7866 gen_lea_v_seg(s, s->aflag, cpu_A0, a.def_seg, s->override);
7867 if (a.index >= 0) {
7868 tcg_gen_mov_tl(cpu_T0, cpu_regs[a.index]);
7869 } else {
7870 tcg_gen_movi_tl(cpu_T0, 0);
7871 }
7872 if (CODE64(s)) {
7873 gen_helper_bndstx64(cpu_env, cpu_A0, cpu_T0,
7874 cpu_bndl[reg], cpu_bndu[reg]);
7875 } else {
7876 gen_helper_bndstx32(cpu_env, cpu_A0, cpu_T0,
7877 cpu_bndl[reg], cpu_bndu[reg]);
7878 }
7879 }
7880 }
7881 gen_nop_modrm(env, s, modrm);
7882 break;
7883 case 0x119: case 0x11c ... 0x11f: /* nop (multi byte) */
7884 modrm = cpu_ldub_code(env, s->pc++);
7885 gen_nop_modrm(env, s, modrm);
7886 break;
7887 case 0x120: /* mov reg, crN */
7888 case 0x122: /* mov crN, reg */
7889 if (s->cpl != 0) {
7890 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7891 } else {
7892 modrm = cpu_ldub_code(env, s->pc++);
7893 /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
7894 * AMD documentation (24594.pdf) and testing of
7895 * intel 386 and 486 processors all show that the mod bits
7896 * are assumed to be 1's, regardless of actual values.
7897 */
7898 rm = (modrm & 7) | REX_B(s);
7899 reg = ((modrm >> 3) & 7) | rex_r;
7900 if (CODE64(s))
7901 ot = MO_64;
7902 else
7903 ot = MO_32;
7904 if ((prefixes & PREFIX_LOCK) && (reg == 0) &&
7905 (s->cpuid_ext3_features & CPUID_EXT3_CR8LEG)) {
7906 reg = 8;
7907 }
7908 switch(reg) {
7909 case 0:
7910 case 2:
7911 case 3:
7912 case 4:
7913 case 8:
7914 gen_update_cc_op(s);
7915 gen_jmp_im(pc_start - s->cs_base);
7916 if (b & 2) {
7917 gen_op_mov_v_reg(ot, cpu_T0, rm);
7918 gen_helper_write_crN(cpu_env, tcg_const_i32(reg),
7919 cpu_T0);
7920 gen_jmp_im(s->pc - s->cs_base);
7921 gen_eob(s);
7922 } else {
7923 gen_helper_read_crN(cpu_T0, cpu_env, tcg_const_i32(reg));
7924 gen_op_mov_reg_v(ot, rm, cpu_T0);
7925 }
7926 break;
7927 default:
7928 goto unknown_op;
7929 }
7930 }
7931 break;
7932 case 0x121: /* mov reg, drN */
7933 case 0x123: /* mov drN, reg */
7934 if (s->cpl != 0) {
7935 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7936 } else {
7937 modrm = cpu_ldub_code(env, s->pc++);
7938 /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
7939 * AMD documentation (24594.pdf) and testing of
7940 * intel 386 and 486 processors all show that the mod bits
7941 * are assumed to be 1's, regardless of actual values.
7942 */
7943 rm = (modrm & 7) | REX_B(s);
7944 reg = ((modrm >> 3) & 7) | rex_r;
7945 if (CODE64(s))
7946 ot = MO_64;
7947 else
7948 ot = MO_32;
7949 if (reg >= 8) {
7950 goto illegal_op;
7951 }
7952 if (b & 2) {
7953 gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_DR0 + reg);
7954 gen_op_mov_v_reg(ot, cpu_T0, rm);
7955 tcg_gen_movi_i32(cpu_tmp2_i32, reg);
7956 gen_helper_set_dr(cpu_env, cpu_tmp2_i32, cpu_T0);
7957 gen_jmp_im(s->pc - s->cs_base);
7958 gen_eob(s);
7959 } else {
7960 gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg);
7961 tcg_gen_movi_i32(cpu_tmp2_i32, reg);
7962 gen_helper_get_dr(cpu_T0, cpu_env, cpu_tmp2_i32);
7963 gen_op_mov_reg_v(ot, rm, cpu_T0);
7964 }
7965 }
7966 break;
7967 case 0x106: /* clts */
7968 if (s->cpl != 0) {
7969 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7970 } else {
7971 gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
7972 gen_helper_clts(cpu_env);
7973 /* abort block because static cpu state changed */
7974 gen_jmp_im(s->pc - s->cs_base);
7975 gen_eob(s);
7976 }
7977 break;
7978 /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4 support */
7979 case 0x1c3: /* MOVNTI reg, mem */
7980 if (!(s->cpuid_features & CPUID_SSE2))
7981 goto illegal_op;
7982 ot = mo_64_32(dflag);
7983 modrm = cpu_ldub_code(env, s->pc++);
7984 mod = (modrm >> 6) & 3;
7985 if (mod == 3)
7986 goto illegal_op;
7987 reg = ((modrm >> 3) & 7) | rex_r;
7988 /* generate a generic store */
7989 gen_ldst_modrm(env, s, modrm, ot, reg, 1);
7990 break;
7991 case 0x1ae:
7992 modrm = cpu_ldub_code(env, s->pc++);
7993 switch (modrm) {
7994 CASE_MODRM_MEM_OP(0): /* fxsave */
7995 if (!(s->cpuid_features & CPUID_FXSR)
7996 || (prefixes & PREFIX_LOCK)) {
7997 goto illegal_op;
7998 }
7999 if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8000 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8001 break;
8002 }
8003 gen_lea_modrm(env, s, modrm);
8004 gen_helper_fxsave(cpu_env, cpu_A0);
8005 break;
8006
8007 CASE_MODRM_MEM_OP(1): /* fxrstor */
8008 if (!(s->cpuid_features & CPUID_FXSR)
8009 || (prefixes & PREFIX_LOCK)) {
8010 goto illegal_op;
8011 }
8012 if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8013 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8014 break;
8015 }
8016 gen_lea_modrm(env, s, modrm);
8017 gen_helper_fxrstor(cpu_env, cpu_A0);
8018 break;
8019
8020 CASE_MODRM_MEM_OP(2): /* ldmxcsr */
8021 if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8022 goto illegal_op;
8023 }
8024 if (s->flags & HF_TS_MASK) {
8025 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8026 break;
8027 }
8028 gen_lea_modrm(env, s, modrm);
8029 tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0, s->mem_index, MO_LEUL);
8030 gen_helper_ldmxcsr(cpu_env, cpu_tmp2_i32);
8031 break;
8032
8033 CASE_MODRM_MEM_OP(3): /* stmxcsr */
8034 if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8035 goto illegal_op;
8036 }
8037 if (s->flags & HF_TS_MASK) {
8038 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8039 break;
8040 }
8041 gen_lea_modrm(env, s, modrm);
8042 tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, mxcsr));
8043 gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
8044 break;
8045
8046 CASE_MODRM_MEM_OP(4): /* xsave */
8047 if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8048 || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8049 | PREFIX_REPZ | PREFIX_REPNZ))) {
8050 goto illegal_op;
8051 }
8052 gen_lea_modrm(env, s, modrm);
8053 tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
8054 cpu_regs[R_EDX]);
8055 gen_helper_xsave(cpu_env, cpu_A0, cpu_tmp1_i64);
8056 break;
8057
8058 CASE_MODRM_MEM_OP(5): /* xrstor */
8059 if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8060 || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8061 | PREFIX_REPZ | PREFIX_REPNZ))) {
8062 goto illegal_op;
8063 }
8064 gen_lea_modrm(env, s, modrm);
8065 tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
8066 cpu_regs[R_EDX]);
8067 gen_helper_xrstor(cpu_env, cpu_A0, cpu_tmp1_i64);
8068 /* XRSTOR is how MPX is enabled, which changes how
8069 we translate. Thus we need to end the TB. */
8070 gen_update_cc_op(s);
8071 gen_jmp_im(s->pc - s->cs_base);
8072 gen_eob(s);
8073 break;
8074
8075 CASE_MODRM_MEM_OP(6): /* xsaveopt / clwb */
8076 if (prefixes & PREFIX_LOCK) {
8077 goto illegal_op;
8078 }
8079 if (prefixes & PREFIX_DATA) {
8080 /* clwb */
8081 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLWB)) {
8082 goto illegal_op;
8083 }
8084 gen_nop_modrm(env, s, modrm);
8085 } else {
8086 /* xsaveopt */
8087 if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8088 || (s->cpuid_xsave_features & CPUID_XSAVE_XSAVEOPT) == 0
8089 || (prefixes & (PREFIX_REPZ | PREFIX_REPNZ))) {
8090 goto illegal_op;
8091 }
8092 gen_lea_modrm(env, s, modrm);
8093 tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
8094 cpu_regs[R_EDX]);
8095 gen_helper_xsaveopt(cpu_env, cpu_A0, cpu_tmp1_i64);
8096 }
8097 break;
8098
8099 CASE_MODRM_MEM_OP(7): /* clflush / clflushopt */
8100 if (prefixes & PREFIX_LOCK) {
8101 goto illegal_op;
8102 }
8103 if (prefixes & PREFIX_DATA) {
8104 /* clflushopt */
8105 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLFLUSHOPT)) {
8106 goto illegal_op;
8107 }
8108 } else {
8109 /* clflush */
8110 if ((s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))
8111 || !(s->cpuid_features & CPUID_CLFLUSH)) {
8112 goto illegal_op;
8113 }
8114 }
8115 gen_nop_modrm(env, s, modrm);
8116 break;
8117
8118 case 0xc0 ... 0xc7: /* rdfsbase (f3 0f ae /0) */
8119 case 0xc8 ... 0xc8: /* rdgsbase (f3 0f ae /1) */
8120 case 0xd0 ... 0xd7: /* wrfsbase (f3 0f ae /2) */
8121 case 0xd8 ... 0xd8: /* wrgsbase (f3 0f ae /3) */
8122 if (CODE64(s)
8123 && (prefixes & PREFIX_REPZ)
8124 && !(prefixes & PREFIX_LOCK)
8125 && (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_FSGSBASE)) {
8126 TCGv base, treg, src, dst;
8127
8128 /* Preserve hflags bits by testing CR4 at runtime. */
8129 tcg_gen_movi_i32(cpu_tmp2_i32, CR4_FSGSBASE_MASK);
8130 gen_helper_cr4_testbit(cpu_env, cpu_tmp2_i32);
8131
8132 base = cpu_seg_base[modrm & 8 ? R_GS : R_FS];
8133 treg = cpu_regs[(modrm & 7) | REX_B(s)];
8134
8135 if (modrm & 0x10) {
8136 /* wr*base */
8137 dst = base, src = treg;
8138 } else {
8139 /* rd*base */
8140 dst = treg, src = base;
8141 }
8142
8143 if (s->dflag == MO_32) {
8144 tcg_gen_ext32u_tl(dst, src);
8145 } else {
8146 tcg_gen_mov_tl(dst, src);
8147 }
8148 break;
8149 }
8150 goto unknown_op;
8151
8152 case 0xf8: /* sfence / pcommit */
8153 if (prefixes & PREFIX_DATA) {
8154 /* pcommit */
8155 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_PCOMMIT)
8156 || (prefixes & PREFIX_LOCK)) {
8157 goto illegal_op;
8158 }
8159 break;
8160 }
8161 /* fallthru */
8162 case 0xf9 ... 0xff: /* sfence */
8163 if (!(s->cpuid_features & CPUID_SSE)
8164 || (prefixes & PREFIX_LOCK)) {
8165 goto illegal_op;
8166 }
8167 tcg_gen_mb(TCG_MO_ST_ST | TCG_BAR_SC);
8168 break;
8169 case 0xe8 ... 0xef: /* lfence */
8170 if (!(s->cpuid_features & CPUID_SSE)
8171 || (prefixes & PREFIX_LOCK)) {
8172 goto illegal_op;
8173 }
8174 tcg_gen_mb(TCG_MO_LD_LD | TCG_BAR_SC);
8175 break;
8176 case 0xf0 ... 0xf7: /* mfence */
8177 if (!(s->cpuid_features & CPUID_SSE2)
8178 || (prefixes & PREFIX_LOCK)) {
8179 goto illegal_op;
8180 }
8181 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8182 break;
8183
8184 default:
8185 goto unknown_op;
8186 }
8187 break;
8188
8189 case 0x10d: /* 3DNow! prefetch(w) */
8190 modrm = cpu_ldub_code(env, s->pc++);
8191 mod = (modrm >> 6) & 3;
8192 if (mod == 3)
8193 goto illegal_op;
8194 gen_nop_modrm(env, s, modrm);
8195 break;
8196 case 0x1aa: /* rsm */
8197 gen_svm_check_intercept(s, pc_start, SVM_EXIT_RSM);
8198 if (!(s->flags & HF_SMM_MASK))
8199 goto illegal_op;
8200 gen_update_cc_op(s);
8201 gen_jmp_im(s->pc - s->cs_base);
8202 gen_helper_rsm(cpu_env);
8203 gen_eob(s);
8204 break;
8205 case 0x1b8: /* SSE4.2 popcnt */
8206 if ((prefixes & (PREFIX_REPZ | PREFIX_LOCK | PREFIX_REPNZ)) !=
8207 PREFIX_REPZ)
8208 goto illegal_op;
8209 if (!(s->cpuid_ext_features & CPUID_EXT_POPCNT))
8210 goto illegal_op;
8211
8212 modrm = cpu_ldub_code(env, s->pc++);
8213 reg = ((modrm >> 3) & 7) | rex_r;
8214
8215 if (s->prefix & PREFIX_DATA) {
8216 ot = MO_16;
8217 } else {
8218 ot = mo_64_32(dflag);
8219 }
8220
8221 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
8222 gen_extu(ot, cpu_T0);
8223 tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
8224 tcg_gen_ctpop_tl(cpu_T0, cpu_T0);
8225 gen_op_mov_reg_v(ot, reg, cpu_T0);
8226
8227 set_cc_op(s, CC_OP_POPCNT);
8228 break;
8229 case 0x10e ... 0x10f:
8230 /* 3DNow! instructions, ignore prefixes */
8231 s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
8232 case 0x110 ... 0x117:
8233 case 0x128 ... 0x12f:
8234 case 0x138 ... 0x13a:
8235 case 0x150 ... 0x179:
8236 case 0x17c ... 0x17f:
8237 case 0x1c2:
8238 case 0x1c4 ... 0x1c6:
8239 case 0x1d0 ... 0x1fe:
8240 gen_sse(env, s, b, pc_start, rex_r);
8241 break;
8242 default:
8243 goto unknown_op;
8244 }
8245 return s->pc;
8246 illegal_op:
8247 gen_illegal_opcode(s);
8248 return s->pc;
8249 unknown_op:
8250 gen_unknown_opcode(env, s);
8251 return s->pc;
8252 }
8253
8254 void tcg_x86_init(void)
8255 {
8256 static const char reg_names[CPU_NB_REGS][4] = {
8257 #ifdef TARGET_X86_64
8258 [R_EAX] = "rax",
8259 [R_EBX] = "rbx",
8260 [R_ECX] = "rcx",
8261 [R_EDX] = "rdx",
8262 [R_ESI] = "rsi",
8263 [R_EDI] = "rdi",
8264 [R_EBP] = "rbp",
8265 [R_ESP] = "rsp",
8266 [8] = "r8",
8267 [9] = "r9",
8268 [10] = "r10",
8269 [11] = "r11",
8270 [12] = "r12",
8271 [13] = "r13",
8272 [14] = "r14",
8273 [15] = "r15",
8274 #else
8275 [R_EAX] = "eax",
8276 [R_EBX] = "ebx",
8277 [R_ECX] = "ecx",
8278 [R_EDX] = "edx",
8279 [R_ESI] = "esi",
8280 [R_EDI] = "edi",
8281 [R_EBP] = "ebp",
8282 [R_ESP] = "esp",
8283 #endif
8284 };
8285 static const char seg_base_names[6][8] = {
8286 [R_CS] = "cs_base",
8287 [R_DS] = "ds_base",
8288 [R_ES] = "es_base",
8289 [R_FS] = "fs_base",
8290 [R_GS] = "gs_base",
8291 [R_SS] = "ss_base",
8292 };
8293 static const char bnd_regl_names[4][8] = {
8294 "bnd0_lb", "bnd1_lb", "bnd2_lb", "bnd3_lb"
8295 };
8296 static const char bnd_regu_names[4][8] = {
8297 "bnd0_ub", "bnd1_ub", "bnd2_ub", "bnd3_ub"
8298 };
8299 int i;
8300 static bool initialized;
8301
8302 if (initialized) {
8303 return;
8304 }
8305 initialized = true;
8306
8307 cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env");
8308 tcg_ctx.tcg_env = cpu_env;
8309 cpu_cc_op = tcg_global_mem_new_i32(cpu_env,
8310 offsetof(CPUX86State, cc_op), "cc_op");
8311 cpu_cc_dst = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_dst),
8312 "cc_dst");
8313 cpu_cc_src = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src),
8314 "cc_src");
8315 cpu_cc_src2 = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src2),
8316 "cc_src2");
8317
8318 for (i = 0; i < CPU_NB_REGS; ++i) {
8319 cpu_regs[i] = tcg_global_mem_new(cpu_env,
8320 offsetof(CPUX86State, regs[i]),
8321 reg_names[i]);
8322 }
8323
8324 for (i = 0; i < 6; ++i) {
8325 cpu_seg_base[i]
8326 = tcg_global_mem_new(cpu_env,
8327 offsetof(CPUX86State, segs[i].base),
8328 seg_base_names[i]);
8329 }
8330
8331 for (i = 0; i < 4; ++i) {
8332 cpu_bndl[i]
8333 = tcg_global_mem_new_i64(cpu_env,
8334 offsetof(CPUX86State, bnd_regs[i].lb),
8335 bnd_regl_names[i]);
8336 cpu_bndu[i]
8337 = tcg_global_mem_new_i64(cpu_env,
8338 offsetof(CPUX86State, bnd_regs[i].ub),
8339 bnd_regu_names[i]);
8340 }
8341 }
8342
8343 /* generate intermediate code for basic block 'tb'. */
8344 void gen_intermediate_code(CPUX86State *env, TranslationBlock *tb)
8345 {
8346 X86CPU *cpu = x86_env_get_cpu(env);
8347 CPUState *cs = CPU(cpu);
8348 DisasContext dc1, *dc = &dc1;
8349 target_ulong pc_ptr;
8350 uint32_t flags;
8351 target_ulong pc_start;
8352 target_ulong cs_base;
8353 int num_insns;
8354 int max_insns;
8355
8356 /* generate intermediate code */
8357 pc_start = tb->pc;
8358 cs_base = tb->cs_base;
8359 flags = tb->flags;
8360
8361 dc->pe = (flags >> HF_PE_SHIFT) & 1;
8362 dc->code32 = (flags >> HF_CS32_SHIFT) & 1;
8363 dc->ss32 = (flags >> HF_SS32_SHIFT) & 1;
8364 dc->addseg = (flags >> HF_ADDSEG_SHIFT) & 1;
8365 dc->f_st = 0;
8366 dc->vm86 = (flags >> VM_SHIFT) & 1;
8367 dc->cpl = (flags >> HF_CPL_SHIFT) & 3;
8368 dc->iopl = (flags >> IOPL_SHIFT) & 3;
8369 dc->tf = (flags >> TF_SHIFT) & 1;
8370 dc->singlestep_enabled = cs->singlestep_enabled;
8371 dc->cc_op = CC_OP_DYNAMIC;
8372 dc->cc_op_dirty = false;
8373 dc->cs_base = cs_base;
8374 dc->tb = tb;
8375 dc->popl_esp_hack = 0;
8376 /* select memory access functions */
8377 dc->mem_index = 0;
8378 #ifdef CONFIG_SOFTMMU
8379 dc->mem_index = cpu_mmu_index(env, false);
8380 #endif
8381 dc->cpuid_features = env->features[FEAT_1_EDX];
8382 dc->cpuid_ext_features = env->features[FEAT_1_ECX];
8383 dc->cpuid_ext2_features = env->features[FEAT_8000_0001_EDX];
8384 dc->cpuid_ext3_features = env->features[FEAT_8000_0001_ECX];
8385 dc->cpuid_7_0_ebx_features = env->features[FEAT_7_0_EBX];
8386 dc->cpuid_xsave_features = env->features[FEAT_XSAVE];
8387 #ifdef TARGET_X86_64
8388 dc->lma = (flags >> HF_LMA_SHIFT) & 1;
8389 dc->code64 = (flags >> HF_CS64_SHIFT) & 1;
8390 #endif
8391 dc->flags = flags;
8392 dc->jmp_opt = !(dc->tf || cs->singlestep_enabled ||
8393 (flags & HF_INHIBIT_IRQ_MASK));
8394 /* Do not optimize repz jumps at all in icount mode, because
8395 rep movsS instructions are execured with different paths
8396 in !repz_opt and repz_opt modes. The first one was used
8397 always except single step mode. And this setting
8398 disables jumps optimization and control paths become
8399 equivalent in run and single step modes.
8400 Now there will be no jump optimization for repz in
8401 record/replay modes and there will always be an
8402 additional step for ecx=0 when icount is enabled.
8403 */
8404 dc->repz_opt = !dc->jmp_opt && !(tb->cflags & CF_USE_ICOUNT);
8405 #if 0
8406 /* check addseg logic */
8407 if (!dc->addseg && (dc->vm86 || !dc->pe || !dc->code32))
8408 printf("ERROR addseg\n");
8409 #endif
8410
8411 cpu_T0 = tcg_temp_new();
8412 cpu_T1 = tcg_temp_new();
8413 cpu_A0 = tcg_temp_new();
8414
8415 cpu_tmp0 = tcg_temp_new();
8416 cpu_tmp1_i64 = tcg_temp_new_i64();
8417 cpu_tmp2_i32 = tcg_temp_new_i32();
8418 cpu_tmp3_i32 = tcg_temp_new_i32();
8419 cpu_tmp4 = tcg_temp_new();
8420 cpu_ptr0 = tcg_temp_new_ptr();
8421 cpu_ptr1 = tcg_temp_new_ptr();
8422 cpu_cc_srcT = tcg_temp_local_new();
8423
8424 dc->is_jmp = DISAS_NEXT;
8425 pc_ptr = pc_start;
8426 num_insns = 0;
8427 max_insns = tb->cflags & CF_COUNT_MASK;
8428 if (max_insns == 0) {
8429 max_insns = CF_COUNT_MASK;
8430 }
8431 if (max_insns > TCG_MAX_INSNS) {
8432 max_insns = TCG_MAX_INSNS;
8433 }
8434
8435 gen_tb_start(tb);
8436 for(;;) {
8437 tcg_gen_insn_start(pc_ptr, dc->cc_op);
8438 num_insns++;
8439
8440 /* If RF is set, suppress an internally generated breakpoint. */
8441 if (unlikely(cpu_breakpoint_test(cs, pc_ptr,
8442 tb->flags & HF_RF_MASK
8443 ? BP_GDB : BP_ANY))) {
8444 gen_debug(dc, pc_ptr - dc->cs_base);
8445 /* The address covered by the breakpoint must be included in
8446 [tb->pc, tb->pc + tb->size) in order to for it to be
8447 properly cleared -- thus we increment the PC here so that
8448 the logic setting tb->size below does the right thing. */
8449 pc_ptr += 1;
8450 goto done_generating;
8451 }
8452 if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
8453 gen_io_start();
8454 }
8455
8456 pc_ptr = disas_insn(env, dc, pc_ptr);
8457 /* stop translation if indicated */
8458 if (dc->is_jmp)
8459 break;
8460 /* if single step mode, we generate only one instruction and
8461 generate an exception */
8462 /* if irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
8463 the flag and abort the translation to give the irqs a
8464 change to be happen */
8465 if (dc->tf || dc->singlestep_enabled ||
8466 (flags & HF_INHIBIT_IRQ_MASK)) {
8467 gen_jmp_im(pc_ptr - dc->cs_base);
8468 gen_eob(dc);
8469 break;
8470 }
8471 /* Do not cross the boundary of the pages in icount mode,
8472 it can cause an exception. Do it only when boundary is
8473 crossed by the first instruction in the block.
8474 If current instruction already crossed the bound - it's ok,
8475 because an exception hasn't stopped this code.
8476 */
8477 if ((tb->cflags & CF_USE_ICOUNT)
8478 && ((pc_ptr & TARGET_PAGE_MASK)
8479 != ((pc_ptr + TARGET_MAX_INSN_SIZE - 1) & TARGET_PAGE_MASK)
8480 || (pc_ptr & ~TARGET_PAGE_MASK) == 0)) {
8481 gen_jmp_im(pc_ptr - dc->cs_base);
8482 gen_eob(dc);
8483 break;
8484 }
8485 /* if too long translation, stop generation too */
8486 if (tcg_op_buf_full() ||
8487 (pc_ptr - pc_start) >= (TARGET_PAGE_SIZE - 32) ||
8488 num_insns >= max_insns) {
8489 gen_jmp_im(pc_ptr - dc->cs_base);
8490 gen_eob(dc);
8491 break;
8492 }
8493 if (singlestep) {
8494 gen_jmp_im(pc_ptr - dc->cs_base);
8495 gen_eob(dc);
8496 break;
8497 }
8498 }
8499 if (tb->cflags & CF_LAST_IO)
8500 gen_io_end();
8501 done_generating:
8502 gen_tb_end(tb, num_insns);
8503
8504 #ifdef DEBUG_DISAS
8505 if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
8506 && qemu_log_in_addr_range(pc_start)) {
8507 int disas_flags;
8508 qemu_log_lock();
8509 qemu_log("----------------\n");
8510 qemu_log("IN: %s\n", lookup_symbol(pc_start));
8511 #ifdef TARGET_X86_64
8512 if (dc->code64)
8513 disas_flags = 2;
8514 else
8515 #endif
8516 disas_flags = !dc->code32;
8517 log_target_disas(cs, pc_start, pc_ptr - pc_start, disas_flags);
8518 qemu_log("\n");
8519 qemu_log_unlock();
8520 }
8521 #endif
8522
8523 tb->size = pc_ptr - pc_start;
8524 tb->icount = num_insns;
8525 }
8526
8527 void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb,
8528 target_ulong *data)
8529 {
8530 int cc_op = data[1];
8531 env->eip = data[0] - tb->cs_base;
8532 if (cc_op != CC_OP_DYNAMIC) {
8533 env->cc_op = cc_op;
8534 }
8535 }