]> git.proxmox.com Git - mirror_qemu.git/blob - tcg/i386/tcg-target.inc.c
tcg/i386: Add support for fence
[mirror_qemu.git] / tcg / i386 / tcg-target.inc.c
1 /*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 #include "tcg-be-ldst.h"
26
27 #ifdef CONFIG_DEBUG_TCG
28 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
29 #if TCG_TARGET_REG_BITS == 64
30 "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
31 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
32 #else
33 "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
34 #endif
35 };
36 #endif
37
38 static const int tcg_target_reg_alloc_order[] = {
39 #if TCG_TARGET_REG_BITS == 64
40 TCG_REG_RBP,
41 TCG_REG_RBX,
42 TCG_REG_R12,
43 TCG_REG_R13,
44 TCG_REG_R14,
45 TCG_REG_R15,
46 TCG_REG_R10,
47 TCG_REG_R11,
48 TCG_REG_R9,
49 TCG_REG_R8,
50 TCG_REG_RCX,
51 TCG_REG_RDX,
52 TCG_REG_RSI,
53 TCG_REG_RDI,
54 TCG_REG_RAX,
55 #else
56 TCG_REG_EBX,
57 TCG_REG_ESI,
58 TCG_REG_EDI,
59 TCG_REG_EBP,
60 TCG_REG_ECX,
61 TCG_REG_EDX,
62 TCG_REG_EAX,
63 #endif
64 };
65
66 static const int tcg_target_call_iarg_regs[] = {
67 #if TCG_TARGET_REG_BITS == 64
68 #if defined(_WIN64)
69 TCG_REG_RCX,
70 TCG_REG_RDX,
71 #else
72 TCG_REG_RDI,
73 TCG_REG_RSI,
74 TCG_REG_RDX,
75 TCG_REG_RCX,
76 #endif
77 TCG_REG_R8,
78 TCG_REG_R9,
79 #else
80 /* 32 bit mode uses stack based calling convention (GCC default). */
81 #endif
82 };
83
84 static const int tcg_target_call_oarg_regs[] = {
85 TCG_REG_EAX,
86 #if TCG_TARGET_REG_BITS == 32
87 TCG_REG_EDX
88 #endif
89 };
90
91 /* Constants we accept. */
92 #define TCG_CT_CONST_S32 0x100
93 #define TCG_CT_CONST_U32 0x200
94 #define TCG_CT_CONST_I32 0x400
95
96 /* Registers used with L constraint, which are the first argument
97 registers on x86_64, and two random call clobbered registers on
98 i386. */
99 #if TCG_TARGET_REG_BITS == 64
100 # define TCG_REG_L0 tcg_target_call_iarg_regs[0]
101 # define TCG_REG_L1 tcg_target_call_iarg_regs[1]
102 #else
103 # define TCG_REG_L0 TCG_REG_EAX
104 # define TCG_REG_L1 TCG_REG_EDX
105 #endif
106
107 /* The host compiler should supply <cpuid.h> to enable runtime features
108 detection, as we're not going to go so far as our own inline assembly.
109 If not available, default values will be assumed. */
110 #if defined(CONFIG_CPUID_H)
111 #include <cpuid.h>
112 #endif
113
114 /* For 32-bit, we are going to attempt to determine at runtime whether cmov
115 is available. */
116 #if TCG_TARGET_REG_BITS == 64
117 # define have_cmov 1
118 #elif defined(CONFIG_CPUID_H) && defined(bit_CMOV)
119 static bool have_cmov;
120 #else
121 # define have_cmov 0
122 #endif
123
124 /* If bit_MOVBE is defined in cpuid.h (added in GCC version 4.6), we are
125 going to attempt to determine at runtime whether movbe is available. */
126 #if defined(CONFIG_CPUID_H) && defined(bit_MOVBE)
127 static bool have_movbe;
128 #else
129 # define have_movbe 0
130 #endif
131
132 /* We need this symbol in tcg-target.h, and we can't properly conditionalize
133 it there. Therefore we always define the variable. */
134 bool have_bmi1;
135
136 #if defined(CONFIG_CPUID_H) && defined(bit_BMI2)
137 static bool have_bmi2;
138 #else
139 # define have_bmi2 0
140 #endif
141
142 static tcg_insn_unit *tb_ret_addr;
143
144 static void patch_reloc(tcg_insn_unit *code_ptr, int type,
145 intptr_t value, intptr_t addend)
146 {
147 value += addend;
148 switch(type) {
149 case R_386_PC32:
150 value -= (uintptr_t)code_ptr;
151 if (value != (int32_t)value) {
152 tcg_abort();
153 }
154 tcg_patch32(code_ptr, value);
155 break;
156 case R_386_PC8:
157 value -= (uintptr_t)code_ptr;
158 if (value != (int8_t)value) {
159 tcg_abort();
160 }
161 tcg_patch8(code_ptr, value);
162 break;
163 default:
164 tcg_abort();
165 }
166 }
167
168 /* parse target specific constraints */
169 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
170 {
171 const char *ct_str;
172
173 ct_str = *pct_str;
174 switch(ct_str[0]) {
175 case 'a':
176 ct->ct |= TCG_CT_REG;
177 tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
178 break;
179 case 'b':
180 ct->ct |= TCG_CT_REG;
181 tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
182 break;
183 case 'c':
184 case_c:
185 ct->ct |= TCG_CT_REG;
186 tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
187 break;
188 case 'd':
189 ct->ct |= TCG_CT_REG;
190 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
191 break;
192 case 'S':
193 ct->ct |= TCG_CT_REG;
194 tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
195 break;
196 case 'D':
197 ct->ct |= TCG_CT_REG;
198 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
199 break;
200 case 'q':
201 ct->ct |= TCG_CT_REG;
202 if (TCG_TARGET_REG_BITS == 64) {
203 tcg_regset_set32(ct->u.regs, 0, 0xffff);
204 } else {
205 tcg_regset_set32(ct->u.regs, 0, 0xf);
206 }
207 break;
208 case 'Q':
209 ct->ct |= TCG_CT_REG;
210 tcg_regset_set32(ct->u.regs, 0, 0xf);
211 break;
212 case 'r':
213 case_r:
214 ct->ct |= TCG_CT_REG;
215 if (TCG_TARGET_REG_BITS == 64) {
216 tcg_regset_set32(ct->u.regs, 0, 0xffff);
217 } else {
218 tcg_regset_set32(ct->u.regs, 0, 0xff);
219 }
220 break;
221 case 'C':
222 /* With SHRX et al, we need not use ECX as shift count register. */
223 if (have_bmi2) {
224 goto case_r;
225 } else {
226 goto case_c;
227 }
228
229 /* qemu_ld/st address constraint */
230 case 'L':
231 ct->ct |= TCG_CT_REG;
232 if (TCG_TARGET_REG_BITS == 64) {
233 tcg_regset_set32(ct->u.regs, 0, 0xffff);
234 } else {
235 tcg_regset_set32(ct->u.regs, 0, 0xff);
236 }
237 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
238 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
239 break;
240
241 case 'e':
242 ct->ct |= TCG_CT_CONST_S32;
243 break;
244 case 'Z':
245 ct->ct |= TCG_CT_CONST_U32;
246 break;
247 case 'I':
248 ct->ct |= TCG_CT_CONST_I32;
249 break;
250
251 default:
252 return -1;
253 }
254 ct_str++;
255 *pct_str = ct_str;
256 return 0;
257 }
258
259 /* test if a constant matches the constraint */
260 static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
261 const TCGArgConstraint *arg_ct)
262 {
263 int ct = arg_ct->ct;
264 if (ct & TCG_CT_CONST) {
265 return 1;
266 }
267 if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
268 return 1;
269 }
270 if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
271 return 1;
272 }
273 if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) {
274 return 1;
275 }
276 return 0;
277 }
278
279 #if TCG_TARGET_REG_BITS == 64
280 # define LOWREGMASK(x) ((x) & 7)
281 #else
282 # define LOWREGMASK(x) (x)
283 #endif
284
285 #define P_EXT 0x100 /* 0x0f opcode prefix */
286 #define P_EXT38 0x200 /* 0x0f 0x38 opcode prefix */
287 #define P_DATA16 0x400 /* 0x66 opcode prefix */
288 #if TCG_TARGET_REG_BITS == 64
289 # define P_ADDR32 0x800 /* 0x67 opcode prefix */
290 # define P_REXW 0x1000 /* Set REX.W = 1 */
291 # define P_REXB_R 0x2000 /* REG field as byte register */
292 # define P_REXB_RM 0x4000 /* R/M field as byte register */
293 # define P_GS 0x8000 /* gs segment override */
294 #else
295 # define P_ADDR32 0
296 # define P_REXW 0
297 # define P_REXB_R 0
298 # define P_REXB_RM 0
299 # define P_GS 0
300 #endif
301 #define P_SIMDF3 0x10000 /* 0xf3 opcode prefix */
302 #define P_SIMDF2 0x20000 /* 0xf2 opcode prefix */
303
304 #define OPC_ARITH_EvIz (0x81)
305 #define OPC_ARITH_EvIb (0x83)
306 #define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
307 #define OPC_ANDN (0xf2 | P_EXT38)
308 #define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
309 #define OPC_BSWAP (0xc8 | P_EXT)
310 #define OPC_CALL_Jz (0xe8)
311 #define OPC_CMOVCC (0x40 | P_EXT) /* ... plus condition code */
312 #define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
313 #define OPC_DEC_r32 (0x48)
314 #define OPC_IMUL_GvEv (0xaf | P_EXT)
315 #define OPC_IMUL_GvEvIb (0x6b)
316 #define OPC_IMUL_GvEvIz (0x69)
317 #define OPC_INC_r32 (0x40)
318 #define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */
319 #define OPC_JCC_short (0x70) /* ... plus condition code */
320 #define OPC_JMP_long (0xe9)
321 #define OPC_JMP_short (0xeb)
322 #define OPC_LEA (0x8d)
323 #define OPC_MOVB_EvGv (0x88) /* stores, more or less */
324 #define OPC_MOVL_EvGv (0x89) /* stores, more or less */
325 #define OPC_MOVL_GvEv (0x8b) /* loads, more or less */
326 #define OPC_MOVB_EvIz (0xc6)
327 #define OPC_MOVL_EvIz (0xc7)
328 #define OPC_MOVL_Iv (0xb8)
329 #define OPC_MOVBE_GyMy (0xf0 | P_EXT38)
330 #define OPC_MOVBE_MyGy (0xf1 | P_EXT38)
331 #define OPC_MOVSBL (0xbe | P_EXT)
332 #define OPC_MOVSWL (0xbf | P_EXT)
333 #define OPC_MOVSLQ (0x63 | P_REXW)
334 #define OPC_MOVZBL (0xb6 | P_EXT)
335 #define OPC_MOVZWL (0xb7 | P_EXT)
336 #define OPC_POP_r32 (0x58)
337 #define OPC_PUSH_r32 (0x50)
338 #define OPC_PUSH_Iv (0x68)
339 #define OPC_PUSH_Ib (0x6a)
340 #define OPC_RET (0xc3)
341 #define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
342 #define OPC_SHIFT_1 (0xd1)
343 #define OPC_SHIFT_Ib (0xc1)
344 #define OPC_SHIFT_cl (0xd3)
345 #define OPC_SARX (0xf7 | P_EXT38 | P_SIMDF3)
346 #define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16)
347 #define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2)
348 #define OPC_TESTL (0x85)
349 #define OPC_XCHG_ax_r32 (0x90)
350
351 #define OPC_GRP3_Ev (0xf7)
352 #define OPC_GRP5 (0xff)
353
354 /* Group 1 opcode extensions for 0x80-0x83.
355 These are also used as modifiers for OPC_ARITH. */
356 #define ARITH_ADD 0
357 #define ARITH_OR 1
358 #define ARITH_ADC 2
359 #define ARITH_SBB 3
360 #define ARITH_AND 4
361 #define ARITH_SUB 5
362 #define ARITH_XOR 6
363 #define ARITH_CMP 7
364
365 /* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */
366 #define SHIFT_ROL 0
367 #define SHIFT_ROR 1
368 #define SHIFT_SHL 4
369 #define SHIFT_SHR 5
370 #define SHIFT_SAR 7
371
372 /* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */
373 #define EXT3_NOT 2
374 #define EXT3_NEG 3
375 #define EXT3_MUL 4
376 #define EXT3_IMUL 5
377 #define EXT3_DIV 6
378 #define EXT3_IDIV 7
379
380 /* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */
381 #define EXT5_INC_Ev 0
382 #define EXT5_DEC_Ev 1
383 #define EXT5_CALLN_Ev 2
384 #define EXT5_JMPN_Ev 4
385
386 /* Condition codes to be added to OPC_JCC_{long,short}. */
387 #define JCC_JMP (-1)
388 #define JCC_JO 0x0
389 #define JCC_JNO 0x1
390 #define JCC_JB 0x2
391 #define JCC_JAE 0x3
392 #define JCC_JE 0x4
393 #define JCC_JNE 0x5
394 #define JCC_JBE 0x6
395 #define JCC_JA 0x7
396 #define JCC_JS 0x8
397 #define JCC_JNS 0x9
398 #define JCC_JP 0xa
399 #define JCC_JNP 0xb
400 #define JCC_JL 0xc
401 #define JCC_JGE 0xd
402 #define JCC_JLE 0xe
403 #define JCC_JG 0xf
404
405 static const uint8_t tcg_cond_to_jcc[] = {
406 [TCG_COND_EQ] = JCC_JE,
407 [TCG_COND_NE] = JCC_JNE,
408 [TCG_COND_LT] = JCC_JL,
409 [TCG_COND_GE] = JCC_JGE,
410 [TCG_COND_LE] = JCC_JLE,
411 [TCG_COND_GT] = JCC_JG,
412 [TCG_COND_LTU] = JCC_JB,
413 [TCG_COND_GEU] = JCC_JAE,
414 [TCG_COND_LEU] = JCC_JBE,
415 [TCG_COND_GTU] = JCC_JA,
416 };
417
418 #if TCG_TARGET_REG_BITS == 64
419 static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
420 {
421 int rex;
422
423 if (opc & P_GS) {
424 tcg_out8(s, 0x65);
425 }
426 if (opc & P_DATA16) {
427 /* We should never be asking for both 16 and 64-bit operation. */
428 tcg_debug_assert((opc & P_REXW) == 0);
429 tcg_out8(s, 0x66);
430 }
431 if (opc & P_ADDR32) {
432 tcg_out8(s, 0x67);
433 }
434
435 rex = 0;
436 rex |= (opc & P_REXW) ? 0x8 : 0x0; /* REX.W */
437 rex |= (r & 8) >> 1; /* REX.R */
438 rex |= (x & 8) >> 2; /* REX.X */
439 rex |= (rm & 8) >> 3; /* REX.B */
440
441 /* P_REXB_{R,RM} indicates that the given register is the low byte.
442 For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
443 as otherwise the encoding indicates %[abcd]h. Note that the values
444 that are ORed in merely indicate that the REX byte must be present;
445 those bits get discarded in output. */
446 rex |= opc & (r >= 4 ? P_REXB_R : 0);
447 rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
448
449 if (rex) {
450 tcg_out8(s, (uint8_t)(rex | 0x40));
451 }
452
453 if (opc & (P_EXT | P_EXT38)) {
454 tcg_out8(s, 0x0f);
455 if (opc & P_EXT38) {
456 tcg_out8(s, 0x38);
457 }
458 }
459
460 tcg_out8(s, opc);
461 }
462 #else
463 static void tcg_out_opc(TCGContext *s, int opc)
464 {
465 if (opc & P_DATA16) {
466 tcg_out8(s, 0x66);
467 }
468 if (opc & (P_EXT | P_EXT38)) {
469 tcg_out8(s, 0x0f);
470 if (opc & P_EXT38) {
471 tcg_out8(s, 0x38);
472 }
473 }
474 tcg_out8(s, opc);
475 }
476 /* Discard the register arguments to tcg_out_opc early, so as not to penalize
477 the 32-bit compilation paths. This method works with all versions of gcc,
478 whereas relying on optimization may not be able to exclude them. */
479 #define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc)
480 #endif
481
482 static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
483 {
484 tcg_out_opc(s, opc, r, rm, 0);
485 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
486 }
487
488 static void tcg_out_vex_modrm(TCGContext *s, int opc, int r, int v, int rm)
489 {
490 int tmp;
491
492 if ((opc & (P_REXW | P_EXT | P_EXT38)) || (rm & 8)) {
493 /* Three byte VEX prefix. */
494 tcg_out8(s, 0xc4);
495
496 /* VEX.m-mmmm */
497 if (opc & P_EXT38) {
498 tmp = 2;
499 } else if (opc & P_EXT) {
500 tmp = 1;
501 } else {
502 tcg_abort();
503 }
504 tmp |= 0x40; /* VEX.X */
505 tmp |= (r & 8 ? 0 : 0x80); /* VEX.R */
506 tmp |= (rm & 8 ? 0 : 0x20); /* VEX.B */
507 tcg_out8(s, tmp);
508
509 tmp = (opc & P_REXW ? 0x80 : 0); /* VEX.W */
510 } else {
511 /* Two byte VEX prefix. */
512 tcg_out8(s, 0xc5);
513
514 tmp = (r & 8 ? 0 : 0x80); /* VEX.R */
515 }
516 /* VEX.pp */
517 if (opc & P_DATA16) {
518 tmp |= 1; /* 0x66 */
519 } else if (opc & P_SIMDF3) {
520 tmp |= 2; /* 0xf3 */
521 } else if (opc & P_SIMDF2) {
522 tmp |= 3; /* 0xf2 */
523 }
524 tmp |= (~v & 15) << 3; /* VEX.vvvv */
525 tcg_out8(s, tmp);
526 tcg_out8(s, opc);
527 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
528 }
529
530 /* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
531 We handle either RM and INDEX missing with a negative value. In 64-bit
532 mode for absolute addresses, ~RM is the size of the immediate operand
533 that will follow the instruction. */
534
535 static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
536 int index, int shift, intptr_t offset)
537 {
538 int mod, len;
539
540 if (index < 0 && rm < 0) {
541 if (TCG_TARGET_REG_BITS == 64) {
542 /* Try for a rip-relative addressing mode. This has replaced
543 the 32-bit-mode absolute addressing encoding. */
544 intptr_t pc = (intptr_t)s->code_ptr + 5 + ~rm;
545 intptr_t disp = offset - pc;
546 if (disp == (int32_t)disp) {
547 tcg_out_opc(s, opc, r, 0, 0);
548 tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
549 tcg_out32(s, disp);
550 return;
551 }
552
553 /* Try for an absolute address encoding. This requires the
554 use of the MODRM+SIB encoding and is therefore larger than
555 rip-relative addressing. */
556 if (offset == (int32_t)offset) {
557 tcg_out_opc(s, opc, r, 0, 0);
558 tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
559 tcg_out8(s, (4 << 3) | 5);
560 tcg_out32(s, offset);
561 return;
562 }
563
564 /* ??? The memory isn't directly addressable. */
565 tcg_abort();
566 } else {
567 /* Absolute address. */
568 tcg_out_opc(s, opc, r, 0, 0);
569 tcg_out8(s, (r << 3) | 5);
570 tcg_out32(s, offset);
571 return;
572 }
573 }
574
575 /* Find the length of the immediate addend. Note that the encoding
576 that would be used for (%ebp) indicates absolute addressing. */
577 if (rm < 0) {
578 mod = 0, len = 4, rm = 5;
579 } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
580 mod = 0, len = 0;
581 } else if (offset == (int8_t)offset) {
582 mod = 0x40, len = 1;
583 } else {
584 mod = 0x80, len = 4;
585 }
586
587 /* Use a single byte MODRM format if possible. Note that the encoding
588 that would be used for %esp is the escape to the two byte form. */
589 if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
590 /* Single byte MODRM format. */
591 tcg_out_opc(s, opc, r, rm, 0);
592 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
593 } else {
594 /* Two byte MODRM+SIB format. */
595
596 /* Note that the encoding that would place %esp into the index
597 field indicates no index register. In 64-bit mode, the REX.X
598 bit counts, so %r12 can be used as the index. */
599 if (index < 0) {
600 index = 4;
601 } else {
602 tcg_debug_assert(index != TCG_REG_ESP);
603 }
604
605 tcg_out_opc(s, opc, r, rm, index);
606 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
607 tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
608 }
609
610 if (len == 1) {
611 tcg_out8(s, offset);
612 } else if (len == 4) {
613 tcg_out32(s, offset);
614 }
615 }
616
617 /* A simplification of the above with no index or shift. */
618 static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
619 int rm, intptr_t offset)
620 {
621 tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
622 }
623
624 /* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */
625 static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
626 {
627 /* Propagate an opcode prefix, such as P_REXW. */
628 int ext = subop & ~0x7;
629 subop &= 0x7;
630
631 tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
632 }
633
634 static inline void tcg_out_mov(TCGContext *s, TCGType type,
635 TCGReg ret, TCGReg arg)
636 {
637 if (arg != ret) {
638 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
639 tcg_out_modrm(s, opc, ret, arg);
640 }
641 }
642
643 static void tcg_out_movi(TCGContext *s, TCGType type,
644 TCGReg ret, tcg_target_long arg)
645 {
646 tcg_target_long diff;
647
648 if (arg == 0) {
649 tgen_arithr(s, ARITH_XOR, ret, ret);
650 return;
651 }
652 if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
653 tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
654 tcg_out32(s, arg);
655 return;
656 }
657 if (arg == (int32_t)arg) {
658 tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
659 tcg_out32(s, arg);
660 return;
661 }
662
663 /* Try a 7 byte pc-relative lea before the 10 byte movq. */
664 diff = arg - ((uintptr_t)s->code_ptr + 7);
665 if (diff == (int32_t)diff) {
666 tcg_out_opc(s, OPC_LEA | P_REXW, ret, 0, 0);
667 tcg_out8(s, (LOWREGMASK(ret) << 3) | 5);
668 tcg_out32(s, diff);
669 return;
670 }
671
672 tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
673 tcg_out64(s, arg);
674 }
675
676 static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
677 {
678 if (val == (int8_t)val) {
679 tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
680 tcg_out8(s, val);
681 } else if (val == (int32_t)val) {
682 tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
683 tcg_out32(s, val);
684 } else {
685 tcg_abort();
686 }
687 }
688
689 static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
690 {
691 /* Given the strength of x86 memory ordering, we only need care for
692 store-load ordering. Experimentally, "lock orl $0,0(%esp)" is
693 faster than "mfence", so don't bother with the sse insn. */
694 if (a0 & TCG_MO_ST_LD) {
695 tcg_out8(s, 0xf0);
696 tcg_out_modrm_offset(s, OPC_ARITH_EvIb, ARITH_OR, TCG_REG_ESP, 0);
697 tcg_out8(s, 0);
698 }
699 }
700
701 static inline void tcg_out_push(TCGContext *s, int reg)
702 {
703 tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
704 }
705
706 static inline void tcg_out_pop(TCGContext *s, int reg)
707 {
708 tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
709 }
710
711 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
712 TCGReg arg1, intptr_t arg2)
713 {
714 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
715 tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
716 }
717
718 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
719 TCGReg arg1, intptr_t arg2)
720 {
721 int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
722 tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
723 }
724
725 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
726 TCGReg base, intptr_t ofs)
727 {
728 int rexw = 0;
729 if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64) {
730 if (val != (int32_t)val) {
731 return false;
732 }
733 rexw = P_REXW;
734 }
735 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | rexw, 0, base, ofs);
736 tcg_out32(s, val);
737 return true;
738 }
739
740 static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
741 {
742 /* Propagate an opcode prefix, such as P_DATA16. */
743 int ext = subopc & ~0x7;
744 subopc &= 0x7;
745
746 if (count == 1) {
747 tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
748 } else {
749 tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
750 tcg_out8(s, count);
751 }
752 }
753
754 static inline void tcg_out_bswap32(TCGContext *s, int reg)
755 {
756 tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
757 }
758
759 static inline void tcg_out_rolw_8(TCGContext *s, int reg)
760 {
761 tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
762 }
763
764 static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
765 {
766 /* movzbl */
767 tcg_debug_assert(src < 4 || TCG_TARGET_REG_BITS == 64);
768 tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
769 }
770
771 static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
772 {
773 /* movsbl */
774 tcg_debug_assert(src < 4 || TCG_TARGET_REG_BITS == 64);
775 tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
776 }
777
778 static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
779 {
780 /* movzwl */
781 tcg_out_modrm(s, OPC_MOVZWL, dest, src);
782 }
783
784 static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
785 {
786 /* movsw[lq] */
787 tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
788 }
789
790 static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
791 {
792 /* 32-bit mov zero extends. */
793 tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
794 }
795
796 static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
797 {
798 tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
799 }
800
801 static inline void tcg_out_bswap64(TCGContext *s, int reg)
802 {
803 tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
804 }
805
806 static void tgen_arithi(TCGContext *s, int c, int r0,
807 tcg_target_long val, int cf)
808 {
809 int rexw = 0;
810
811 if (TCG_TARGET_REG_BITS == 64) {
812 rexw = c & -8;
813 c &= 7;
814 }
815
816 /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
817 partial flags update stalls on Pentium4 and are not recommended
818 by current Intel optimization manuals. */
819 if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
820 int is_inc = (c == ARITH_ADD) ^ (val < 0);
821 if (TCG_TARGET_REG_BITS == 64) {
822 /* The single-byte increment encodings are re-tasked as the
823 REX prefixes. Use the MODRM encoding. */
824 tcg_out_modrm(s, OPC_GRP5 + rexw,
825 (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
826 } else {
827 tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
828 }
829 return;
830 }
831
832 if (c == ARITH_AND) {
833 if (TCG_TARGET_REG_BITS == 64) {
834 if (val == 0xffffffffu) {
835 tcg_out_ext32u(s, r0, r0);
836 return;
837 }
838 if (val == (uint32_t)val) {
839 /* AND with no high bits set can use a 32-bit operation. */
840 rexw = 0;
841 }
842 }
843 if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
844 tcg_out_ext8u(s, r0, r0);
845 return;
846 }
847 if (val == 0xffffu) {
848 tcg_out_ext16u(s, r0, r0);
849 return;
850 }
851 }
852
853 if (val == (int8_t)val) {
854 tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
855 tcg_out8(s, val);
856 return;
857 }
858 if (rexw == 0 || val == (int32_t)val) {
859 tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
860 tcg_out32(s, val);
861 return;
862 }
863
864 tcg_abort();
865 }
866
867 static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
868 {
869 if (val != 0) {
870 tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
871 }
872 }
873
874 /* Use SMALL != 0 to force a short forward branch. */
875 static void tcg_out_jxx(TCGContext *s, int opc, TCGLabel *l, int small)
876 {
877 int32_t val, val1;
878
879 if (l->has_value) {
880 val = tcg_pcrel_diff(s, l->u.value_ptr);
881 val1 = val - 2;
882 if ((int8_t)val1 == val1) {
883 if (opc == -1) {
884 tcg_out8(s, OPC_JMP_short);
885 } else {
886 tcg_out8(s, OPC_JCC_short + opc);
887 }
888 tcg_out8(s, val1);
889 } else {
890 if (small) {
891 tcg_abort();
892 }
893 if (opc == -1) {
894 tcg_out8(s, OPC_JMP_long);
895 tcg_out32(s, val - 5);
896 } else {
897 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
898 tcg_out32(s, val - 6);
899 }
900 }
901 } else if (small) {
902 if (opc == -1) {
903 tcg_out8(s, OPC_JMP_short);
904 } else {
905 tcg_out8(s, OPC_JCC_short + opc);
906 }
907 tcg_out_reloc(s, s->code_ptr, R_386_PC8, l, -1);
908 s->code_ptr += 1;
909 } else {
910 if (opc == -1) {
911 tcg_out8(s, OPC_JMP_long);
912 } else {
913 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
914 }
915 tcg_out_reloc(s, s->code_ptr, R_386_PC32, l, -4);
916 s->code_ptr += 4;
917 }
918 }
919
920 static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
921 int const_arg2, int rexw)
922 {
923 if (const_arg2) {
924 if (arg2 == 0) {
925 /* test r, r */
926 tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
927 } else {
928 tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
929 }
930 } else {
931 tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
932 }
933 }
934
935 static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
936 TCGArg arg1, TCGArg arg2, int const_arg2,
937 TCGLabel *label, int small)
938 {
939 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
940 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small);
941 }
942
943 #if TCG_TARGET_REG_BITS == 64
944 static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
945 TCGArg arg1, TCGArg arg2, int const_arg2,
946 TCGLabel *label, int small)
947 {
948 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
949 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small);
950 }
951 #else
952 /* XXX: we implement it at the target level to avoid having to
953 handle cross basic blocks temporaries */
954 static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
955 const int *const_args, int small)
956 {
957 TCGLabel *label_next = gen_new_label();
958 TCGLabel *label_this = arg_label(args[5]);
959
960 switch(args[4]) {
961 case TCG_COND_EQ:
962 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
963 label_next, 1);
964 tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
965 label_this, small);
966 break;
967 case TCG_COND_NE:
968 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
969 label_this, small);
970 tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
971 label_this, small);
972 break;
973 case TCG_COND_LT:
974 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
975 label_this, small);
976 tcg_out_jxx(s, JCC_JNE, label_next, 1);
977 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
978 label_this, small);
979 break;
980 case TCG_COND_LE:
981 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
982 label_this, small);
983 tcg_out_jxx(s, JCC_JNE, label_next, 1);
984 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
985 label_this, small);
986 break;
987 case TCG_COND_GT:
988 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
989 label_this, small);
990 tcg_out_jxx(s, JCC_JNE, label_next, 1);
991 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
992 label_this, small);
993 break;
994 case TCG_COND_GE:
995 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
996 label_this, small);
997 tcg_out_jxx(s, JCC_JNE, label_next, 1);
998 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
999 label_this, small);
1000 break;
1001 case TCG_COND_LTU:
1002 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
1003 label_this, small);
1004 tcg_out_jxx(s, JCC_JNE, label_next, 1);
1005 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
1006 label_this, small);
1007 break;
1008 case TCG_COND_LEU:
1009 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
1010 label_this, small);
1011 tcg_out_jxx(s, JCC_JNE, label_next, 1);
1012 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
1013 label_this, small);
1014 break;
1015 case TCG_COND_GTU:
1016 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
1017 label_this, small);
1018 tcg_out_jxx(s, JCC_JNE, label_next, 1);
1019 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
1020 label_this, small);
1021 break;
1022 case TCG_COND_GEU:
1023 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
1024 label_this, small);
1025 tcg_out_jxx(s, JCC_JNE, label_next, 1);
1026 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
1027 label_this, small);
1028 break;
1029 default:
1030 tcg_abort();
1031 }
1032 tcg_out_label(s, label_next, s->code_ptr);
1033 }
1034 #endif
1035
1036 static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
1037 TCGArg arg1, TCGArg arg2, int const_arg2)
1038 {
1039 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
1040 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
1041 tcg_out_ext8u(s, dest, dest);
1042 }
1043
1044 #if TCG_TARGET_REG_BITS == 64
1045 static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
1046 TCGArg arg1, TCGArg arg2, int const_arg2)
1047 {
1048 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
1049 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
1050 tcg_out_ext8u(s, dest, dest);
1051 }
1052 #else
1053 static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
1054 const int *const_args)
1055 {
1056 TCGArg new_args[6];
1057 TCGLabel *label_true, *label_over;
1058
1059 memcpy(new_args, args+1, 5*sizeof(TCGArg));
1060
1061 if (args[0] == args[1] || args[0] == args[2]
1062 || (!const_args[3] && args[0] == args[3])
1063 || (!const_args[4] && args[0] == args[4])) {
1064 /* When the destination overlaps with one of the argument
1065 registers, don't do anything tricky. */
1066 label_true = gen_new_label();
1067 label_over = gen_new_label();
1068
1069 new_args[5] = label_arg(label_true);
1070 tcg_out_brcond2(s, new_args, const_args+1, 1);
1071
1072 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
1073 tcg_out_jxx(s, JCC_JMP, label_over, 1);
1074 tcg_out_label(s, label_true, s->code_ptr);
1075
1076 tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
1077 tcg_out_label(s, label_over, s->code_ptr);
1078 } else {
1079 /* When the destination does not overlap one of the arguments,
1080 clear the destination first, jump if cond false, and emit an
1081 increment in the true case. This results in smaller code. */
1082
1083 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
1084
1085 label_over = gen_new_label();
1086 new_args[4] = tcg_invert_cond(new_args[4]);
1087 new_args[5] = label_arg(label_over);
1088 tcg_out_brcond2(s, new_args, const_args+1, 1);
1089
1090 tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
1091 tcg_out_label(s, label_over, s->code_ptr);
1092 }
1093 }
1094 #endif
1095
1096 static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest,
1097 TCGArg c1, TCGArg c2, int const_c2,
1098 TCGArg v1)
1099 {
1100 tcg_out_cmp(s, c1, c2, const_c2, 0);
1101 if (have_cmov) {
1102 tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1);
1103 } else {
1104 TCGLabel *over = gen_new_label();
1105 tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1);
1106 tcg_out_mov(s, TCG_TYPE_I32, dest, v1);
1107 tcg_out_label(s, over, s->code_ptr);
1108 }
1109 }
1110
1111 #if TCG_TARGET_REG_BITS == 64
1112 static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGArg dest,
1113 TCGArg c1, TCGArg c2, int const_c2,
1114 TCGArg v1)
1115 {
1116 tcg_out_cmp(s, c1, c2, const_c2, P_REXW);
1117 tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | P_REXW, dest, v1);
1118 }
1119 #endif
1120
1121 static void tcg_out_branch(TCGContext *s, int call, tcg_insn_unit *dest)
1122 {
1123 intptr_t disp = tcg_pcrel_diff(s, dest) - 5;
1124
1125 if (disp == (int32_t)disp) {
1126 tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
1127 tcg_out32(s, disp);
1128 } else {
1129 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, (uintptr_t)dest);
1130 tcg_out_modrm(s, OPC_GRP5,
1131 call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10);
1132 }
1133 }
1134
1135 static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *dest)
1136 {
1137 tcg_out_branch(s, 1, dest);
1138 }
1139
1140 static void tcg_out_jmp(TCGContext *s, tcg_insn_unit *dest)
1141 {
1142 tcg_out_branch(s, 0, dest);
1143 }
1144
1145 static void tcg_out_nopn(TCGContext *s, int n)
1146 {
1147 int i;
1148 /* Emit 1 or 2 operand size prefixes for the standard one byte nop,
1149 * "xchg %eax,%eax", forming "xchg %ax,%ax". All cores accept the
1150 * duplicate prefix, and all of the interesting recent cores can
1151 * decode and discard the duplicates in a single cycle.
1152 */
1153 tcg_debug_assert(n >= 1);
1154 for (i = 1; i < n; ++i) {
1155 tcg_out8(s, 0x66);
1156 }
1157 tcg_out8(s, 0x90);
1158 }
1159
1160 #if defined(CONFIG_SOFTMMU)
1161 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1162 * int mmu_idx, uintptr_t ra)
1163 */
1164 static void * const qemu_ld_helpers[16] = {
1165 [MO_UB] = helper_ret_ldub_mmu,
1166 [MO_LEUW] = helper_le_lduw_mmu,
1167 [MO_LEUL] = helper_le_ldul_mmu,
1168 [MO_LEQ] = helper_le_ldq_mmu,
1169 [MO_BEUW] = helper_be_lduw_mmu,
1170 [MO_BEUL] = helper_be_ldul_mmu,
1171 [MO_BEQ] = helper_be_ldq_mmu,
1172 };
1173
1174 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1175 * uintxx_t val, int mmu_idx, uintptr_t ra)
1176 */
1177 static void * const qemu_st_helpers[16] = {
1178 [MO_UB] = helper_ret_stb_mmu,
1179 [MO_LEUW] = helper_le_stw_mmu,
1180 [MO_LEUL] = helper_le_stl_mmu,
1181 [MO_LEQ] = helper_le_stq_mmu,
1182 [MO_BEUW] = helper_be_stw_mmu,
1183 [MO_BEUL] = helper_be_stl_mmu,
1184 [MO_BEQ] = helper_be_stq_mmu,
1185 };
1186
1187 /* Perform the TLB load and compare.
1188
1189 Inputs:
1190 ADDRLO and ADDRHI contain the low and high part of the address.
1191
1192 MEM_INDEX and S_BITS are the memory context and log2 size of the load.
1193
1194 WHICH is the offset into the CPUTLBEntry structure of the slot to read.
1195 This should be offsetof addr_read or addr_write.
1196
1197 Outputs:
1198 LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
1199 positions of the displacements of forward jumps to the TLB miss case.
1200
1201 Second argument register is loaded with the low part of the address.
1202 In the TLB hit case, it has been adjusted as indicated by the TLB
1203 and so is a host address. In the TLB miss case, it continues to
1204 hold a guest address.
1205
1206 First argument register is clobbered. */
1207
1208 static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
1209 int mem_index, TCGMemOp opc,
1210 tcg_insn_unit **label_ptr, int which)
1211 {
1212 const TCGReg r0 = TCG_REG_L0;
1213 const TCGReg r1 = TCG_REG_L1;
1214 TCGType ttype = TCG_TYPE_I32;
1215 TCGType tlbtype = TCG_TYPE_I32;
1216 int trexw = 0, hrexw = 0, tlbrexw = 0;
1217 unsigned a_bits = get_alignment_bits(opc);
1218 unsigned s_bits = opc & MO_SIZE;
1219 unsigned a_mask = (1 << a_bits) - 1;
1220 unsigned s_mask = (1 << s_bits) - 1;
1221 target_ulong tlb_mask;
1222
1223 if (TCG_TARGET_REG_BITS == 64) {
1224 if (TARGET_LONG_BITS == 64) {
1225 ttype = TCG_TYPE_I64;
1226 trexw = P_REXW;
1227 }
1228 if (TCG_TYPE_PTR == TCG_TYPE_I64) {
1229 hrexw = P_REXW;
1230 if (TARGET_PAGE_BITS + CPU_TLB_BITS > 32) {
1231 tlbtype = TCG_TYPE_I64;
1232 tlbrexw = P_REXW;
1233 }
1234 }
1235 }
1236
1237 tcg_out_mov(s, tlbtype, r0, addrlo);
1238 /* If the required alignment is at least as large as the access, simply
1239 copy the address and mask. For lesser alignments, check that we don't
1240 cross pages for the complete access. */
1241 if (a_bits >= s_bits) {
1242 tcg_out_mov(s, ttype, r1, addrlo);
1243 } else {
1244 tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo, s_mask - a_mask);
1245 }
1246 tlb_mask = TARGET_PAGE_MASK | a_mask;
1247
1248 tcg_out_shifti(s, SHIFT_SHR + tlbrexw, r0,
1249 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1250
1251 tgen_arithi(s, ARITH_AND + trexw, r1, tlb_mask, 0);
1252 tgen_arithi(s, ARITH_AND + tlbrexw, r0,
1253 (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
1254
1255 tcg_out_modrm_sib_offset(s, OPC_LEA + hrexw, r0, TCG_AREG0, r0, 0,
1256 offsetof(CPUArchState, tlb_table[mem_index][0])
1257 + which);
1258
1259 /* cmp 0(r0), r1 */
1260 tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, 0);
1261
1262 /* Prepare for both the fast path add of the tlb addend, and the slow
1263 path function argument setup. There are two cases worth note:
1264 For 32-bit guest and x86_64 host, MOVL zero-extends the guest address
1265 before the fastpath ADDQ below. For 64-bit guest and x32 host, MOVQ
1266 copies the entire guest address for the slow path, while truncation
1267 for the 32-bit host happens with the fastpath ADDL below. */
1268 tcg_out_mov(s, ttype, r1, addrlo);
1269
1270 /* jne slow_path */
1271 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
1272 label_ptr[0] = s->code_ptr;
1273 s->code_ptr += 4;
1274
1275 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1276 /* cmp 4(r0), addrhi */
1277 tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, r0, 4);
1278
1279 /* jne slow_path */
1280 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
1281 label_ptr[1] = s->code_ptr;
1282 s->code_ptr += 4;
1283 }
1284
1285 /* TLB Hit. */
1286
1287 /* add addend(r0), r1 */
1288 tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r1, r0,
1289 offsetof(CPUTLBEntry, addend) - which);
1290 }
1291
1292 /*
1293 * Record the context of a call to the out of line helper code for the slow path
1294 * for a load or store, so that we can later generate the correct helper code
1295 */
1296 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1297 TCGReg datalo, TCGReg datahi,
1298 TCGReg addrlo, TCGReg addrhi,
1299 tcg_insn_unit *raddr,
1300 tcg_insn_unit **label_ptr)
1301 {
1302 TCGLabelQemuLdst *label = new_ldst_label(s);
1303
1304 label->is_ld = is_ld;
1305 label->oi = oi;
1306 label->datalo_reg = datalo;
1307 label->datahi_reg = datahi;
1308 label->addrlo_reg = addrlo;
1309 label->addrhi_reg = addrhi;
1310 label->raddr = raddr;
1311 label->label_ptr[0] = label_ptr[0];
1312 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1313 label->label_ptr[1] = label_ptr[1];
1314 }
1315 }
1316
1317 /*
1318 * Generate code for the slow path for a load at the end of block
1319 */
1320 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1321 {
1322 TCGMemOpIdx oi = l->oi;
1323 TCGMemOp opc = get_memop(oi);
1324 TCGReg data_reg;
1325 tcg_insn_unit **label_ptr = &l->label_ptr[0];
1326
1327 /* resolve label address */
1328 tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
1329 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1330 tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
1331 }
1332
1333 if (TCG_TARGET_REG_BITS == 32) {
1334 int ofs = 0;
1335
1336 tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1337 ofs += 4;
1338
1339 tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1340 ofs += 4;
1341
1342 if (TARGET_LONG_BITS == 64) {
1343 tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1344 ofs += 4;
1345 }
1346
1347 tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs);
1348 ofs += 4;
1349
1350 tcg_out_sti(s, TCG_TYPE_PTR, (uintptr_t)l->raddr, TCG_REG_ESP, ofs);
1351 } else {
1352 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1353 /* The second argument is already loaded with addrlo. */
1354 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], oi);
1355 tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3],
1356 (uintptr_t)l->raddr);
1357 }
1358
1359 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1360
1361 data_reg = l->datalo_reg;
1362 switch (opc & MO_SSIZE) {
1363 case MO_SB:
1364 tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
1365 break;
1366 case MO_SW:
1367 tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
1368 break;
1369 #if TCG_TARGET_REG_BITS == 64
1370 case MO_SL:
1371 tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
1372 break;
1373 #endif
1374 case MO_UB:
1375 case MO_UW:
1376 /* Note that the helpers have zero-extended to tcg_target_long. */
1377 case MO_UL:
1378 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1379 break;
1380 case MO_Q:
1381 if (TCG_TARGET_REG_BITS == 64) {
1382 tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
1383 } else if (data_reg == TCG_REG_EDX) {
1384 /* xchg %edx, %eax */
1385 tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
1386 tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX);
1387 } else {
1388 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1389 tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX);
1390 }
1391 break;
1392 default:
1393 tcg_abort();
1394 }
1395
1396 /* Jump to the code corresponding to next IR of qemu_st */
1397 tcg_out_jmp(s, l->raddr);
1398 }
1399
1400 /*
1401 * Generate code for the slow path for a store at the end of block
1402 */
1403 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1404 {
1405 TCGMemOpIdx oi = l->oi;
1406 TCGMemOp opc = get_memop(oi);
1407 TCGMemOp s_bits = opc & MO_SIZE;
1408 tcg_insn_unit **label_ptr = &l->label_ptr[0];
1409 TCGReg retaddr;
1410
1411 /* resolve label address */
1412 tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
1413 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1414 tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
1415 }
1416
1417 if (TCG_TARGET_REG_BITS == 32) {
1418 int ofs = 0;
1419
1420 tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1421 ofs += 4;
1422
1423 tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1424 ofs += 4;
1425
1426 if (TARGET_LONG_BITS == 64) {
1427 tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1428 ofs += 4;
1429 }
1430
1431 tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs);
1432 ofs += 4;
1433
1434 if (s_bits == MO_64) {
1435 tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs);
1436 ofs += 4;
1437 }
1438
1439 tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs);
1440 ofs += 4;
1441
1442 retaddr = TCG_REG_EAX;
1443 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1444 tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, ofs);
1445 } else {
1446 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1447 /* The second argument is already loaded with addrlo. */
1448 tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
1449 tcg_target_call_iarg_regs[2], l->datalo_reg);
1450 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], oi);
1451
1452 if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) {
1453 retaddr = tcg_target_call_iarg_regs[4];
1454 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1455 } else {
1456 retaddr = TCG_REG_RAX;
1457 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1458 tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP,
1459 TCG_TARGET_CALL_STACK_OFFSET);
1460 }
1461 }
1462
1463 /* "Tail call" to the helper, with the return address back inline. */
1464 tcg_out_push(s, retaddr);
1465 tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1466 }
1467 #elif defined(__x86_64__) && defined(__linux__)
1468 # include <asm/prctl.h>
1469 # include <sys/prctl.h>
1470
1471 int arch_prctl(int code, unsigned long addr);
1472
1473 static int guest_base_flags;
1474 static inline void setup_guest_base_seg(void)
1475 {
1476 if (arch_prctl(ARCH_SET_GS, guest_base) == 0) {
1477 guest_base_flags = P_GS;
1478 }
1479 }
1480 #else
1481 # define guest_base_flags 0
1482 static inline void setup_guest_base_seg(void) { }
1483 #endif /* SOFTMMU */
1484
1485 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
1486 TCGReg base, int index, intptr_t ofs,
1487 int seg, TCGMemOp memop)
1488 {
1489 const TCGMemOp real_bswap = memop & MO_BSWAP;
1490 TCGMemOp bswap = real_bswap;
1491 int movop = OPC_MOVL_GvEv;
1492
1493 if (have_movbe && real_bswap) {
1494 bswap = 0;
1495 movop = OPC_MOVBE_GyMy;
1496 }
1497
1498 switch (memop & MO_SSIZE) {
1499 case MO_UB:
1500 tcg_out_modrm_sib_offset(s, OPC_MOVZBL + seg, datalo,
1501 base, index, 0, ofs);
1502 break;
1503 case MO_SB:
1504 tcg_out_modrm_sib_offset(s, OPC_MOVSBL + P_REXW + seg, datalo,
1505 base, index, 0, ofs);
1506 break;
1507 case MO_UW:
1508 tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
1509 base, index, 0, ofs);
1510 if (real_bswap) {
1511 tcg_out_rolw_8(s, datalo);
1512 }
1513 break;
1514 case MO_SW:
1515 if (real_bswap) {
1516 if (have_movbe) {
1517 tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
1518 datalo, base, index, 0, ofs);
1519 } else {
1520 tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
1521 base, index, 0, ofs);
1522 tcg_out_rolw_8(s, datalo);
1523 }
1524 tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
1525 } else {
1526 tcg_out_modrm_sib_offset(s, OPC_MOVSWL + P_REXW + seg,
1527 datalo, base, index, 0, ofs);
1528 }
1529 break;
1530 case MO_UL:
1531 tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs);
1532 if (bswap) {
1533 tcg_out_bswap32(s, datalo);
1534 }
1535 break;
1536 #if TCG_TARGET_REG_BITS == 64
1537 case MO_SL:
1538 if (real_bswap) {
1539 tcg_out_modrm_sib_offset(s, movop + seg, datalo,
1540 base, index, 0, ofs);
1541 if (bswap) {
1542 tcg_out_bswap32(s, datalo);
1543 }
1544 tcg_out_ext32s(s, datalo, datalo);
1545 } else {
1546 tcg_out_modrm_sib_offset(s, OPC_MOVSLQ + seg, datalo,
1547 base, index, 0, ofs);
1548 }
1549 break;
1550 #endif
1551 case MO_Q:
1552 if (TCG_TARGET_REG_BITS == 64) {
1553 tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo,
1554 base, index, 0, ofs);
1555 if (bswap) {
1556 tcg_out_bswap64(s, datalo);
1557 }
1558 } else {
1559 if (real_bswap) {
1560 int t = datalo;
1561 datalo = datahi;
1562 datahi = t;
1563 }
1564 if (base != datalo) {
1565 tcg_out_modrm_sib_offset(s, movop + seg, datalo,
1566 base, index, 0, ofs);
1567 tcg_out_modrm_sib_offset(s, movop + seg, datahi,
1568 base, index, 0, ofs + 4);
1569 } else {
1570 tcg_out_modrm_sib_offset(s, movop + seg, datahi,
1571 base, index, 0, ofs + 4);
1572 tcg_out_modrm_sib_offset(s, movop + seg, datalo,
1573 base, index, 0, ofs);
1574 }
1575 if (bswap) {
1576 tcg_out_bswap32(s, datalo);
1577 tcg_out_bswap32(s, datahi);
1578 }
1579 }
1580 break;
1581 default:
1582 tcg_abort();
1583 }
1584 }
1585
1586 /* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
1587 EAX. It will be useful once fixed registers globals are less
1588 common. */
1589 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
1590 {
1591 TCGReg datalo, datahi, addrlo;
1592 TCGReg addrhi __attribute__((unused));
1593 TCGMemOpIdx oi;
1594 TCGMemOp opc;
1595 #if defined(CONFIG_SOFTMMU)
1596 int mem_index;
1597 tcg_insn_unit *label_ptr[2];
1598 #endif
1599
1600 datalo = *args++;
1601 datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
1602 addrlo = *args++;
1603 addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
1604 oi = *args++;
1605 opc = get_memop(oi);
1606
1607 #if defined(CONFIG_SOFTMMU)
1608 mem_index = get_mmuidx(oi);
1609
1610 tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
1611 label_ptr, offsetof(CPUTLBEntry, addr_read));
1612
1613 /* TLB Hit. */
1614 tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, opc);
1615
1616 /* Record the current context of a load into ldst label */
1617 add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
1618 s->code_ptr, label_ptr);
1619 #else
1620 {
1621 int32_t offset = guest_base;
1622 TCGReg base = addrlo;
1623 int index = -1;
1624 int seg = 0;
1625
1626 /* For a 32-bit guest, the high 32 bits may contain garbage.
1627 We can do this with the ADDR32 prefix if we're not using
1628 a guest base, or when using segmentation. Otherwise we
1629 need to zero-extend manually. */
1630 if (guest_base == 0 || guest_base_flags) {
1631 seg = guest_base_flags;
1632 offset = 0;
1633 if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
1634 seg |= P_ADDR32;
1635 }
1636 } else if (TCG_TARGET_REG_BITS == 64) {
1637 if (TARGET_LONG_BITS == 32) {
1638 tcg_out_ext32u(s, TCG_REG_L0, base);
1639 base = TCG_REG_L0;
1640 }
1641 if (offset != guest_base) {
1642 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base);
1643 index = TCG_REG_L1;
1644 offset = 0;
1645 }
1646 }
1647
1648 tcg_out_qemu_ld_direct(s, datalo, datahi,
1649 base, index, offset, seg, opc);
1650 }
1651 #endif
1652 }
1653
1654 static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
1655 TCGReg base, intptr_t ofs, int seg,
1656 TCGMemOp memop)
1657 {
1658 /* ??? Ideally we wouldn't need a scratch register. For user-only,
1659 we could perform the bswap twice to restore the original value
1660 instead of moving to the scratch. But as it is, the L constraint
1661 means that TCG_REG_L0 is definitely free here. */
1662 const TCGReg scratch = TCG_REG_L0;
1663 const TCGMemOp real_bswap = memop & MO_BSWAP;
1664 TCGMemOp bswap = real_bswap;
1665 int movop = OPC_MOVL_EvGv;
1666
1667 if (have_movbe && real_bswap) {
1668 bswap = 0;
1669 movop = OPC_MOVBE_MyGy;
1670 }
1671
1672 switch (memop & MO_SIZE) {
1673 case MO_8:
1674 /* In 32-bit mode, 8-bit stores can only happen from [abcd]x.
1675 Use the scratch register if necessary. */
1676 if (TCG_TARGET_REG_BITS == 32 && datalo >= 4) {
1677 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1678 datalo = scratch;
1679 }
1680 tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
1681 datalo, base, ofs);
1682 break;
1683 case MO_16:
1684 if (bswap) {
1685 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1686 tcg_out_rolw_8(s, scratch);
1687 datalo = scratch;
1688 }
1689 tcg_out_modrm_offset(s, movop + P_DATA16 + seg, datalo, base, ofs);
1690 break;
1691 case MO_32:
1692 if (bswap) {
1693 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1694 tcg_out_bswap32(s, scratch);
1695 datalo = scratch;
1696 }
1697 tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
1698 break;
1699 case MO_64:
1700 if (TCG_TARGET_REG_BITS == 64) {
1701 if (bswap) {
1702 tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
1703 tcg_out_bswap64(s, scratch);
1704 datalo = scratch;
1705 }
1706 tcg_out_modrm_offset(s, movop + P_REXW + seg, datalo, base, ofs);
1707 } else if (bswap) {
1708 tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
1709 tcg_out_bswap32(s, scratch);
1710 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs);
1711 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1712 tcg_out_bswap32(s, scratch);
1713 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4);
1714 } else {
1715 if (real_bswap) {
1716 int t = datalo;
1717 datalo = datahi;
1718 datahi = t;
1719 }
1720 tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
1721 tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs+4);
1722 }
1723 break;
1724 default:
1725 tcg_abort();
1726 }
1727 }
1728
1729 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
1730 {
1731 TCGReg datalo, datahi, addrlo;
1732 TCGReg addrhi __attribute__((unused));
1733 TCGMemOpIdx oi;
1734 TCGMemOp opc;
1735 #if defined(CONFIG_SOFTMMU)
1736 int mem_index;
1737 tcg_insn_unit *label_ptr[2];
1738 #endif
1739
1740 datalo = *args++;
1741 datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
1742 addrlo = *args++;
1743 addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
1744 oi = *args++;
1745 opc = get_memop(oi);
1746
1747 #if defined(CONFIG_SOFTMMU)
1748 mem_index = get_mmuidx(oi);
1749
1750 tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
1751 label_ptr, offsetof(CPUTLBEntry, addr_write));
1752
1753 /* TLB Hit. */
1754 tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
1755
1756 /* Record the current context of a store into ldst label */
1757 add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
1758 s->code_ptr, label_ptr);
1759 #else
1760 {
1761 int32_t offset = guest_base;
1762 TCGReg base = addrlo;
1763 int seg = 0;
1764
1765 /* See comment in tcg_out_qemu_ld re zero-extension of addrlo. */
1766 if (guest_base == 0 || guest_base_flags) {
1767 seg = guest_base_flags;
1768 offset = 0;
1769 if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
1770 seg |= P_ADDR32;
1771 }
1772 } else if (TCG_TARGET_REG_BITS == 64) {
1773 /* ??? Note that we can't use the same SIB addressing scheme
1774 as for loads, since we require L0 free for bswap. */
1775 if (offset != guest_base) {
1776 if (TARGET_LONG_BITS == 32) {
1777 tcg_out_ext32u(s, TCG_REG_L0, base);
1778 base = TCG_REG_L0;
1779 }
1780 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base);
1781 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
1782 base = TCG_REG_L1;
1783 offset = 0;
1784 } else if (TARGET_LONG_BITS == 32) {
1785 tcg_out_ext32u(s, TCG_REG_L1, base);
1786 base = TCG_REG_L1;
1787 }
1788 }
1789
1790 tcg_out_qemu_st_direct(s, datalo, datahi, base, offset, seg, opc);
1791 }
1792 #endif
1793 }
1794
1795 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1796 const TCGArg *args, const int *const_args)
1797 {
1798 int c, vexop, rexw = 0;
1799
1800 #if TCG_TARGET_REG_BITS == 64
1801 # define OP_32_64(x) \
1802 case glue(glue(INDEX_op_, x), _i64): \
1803 rexw = P_REXW; /* FALLTHRU */ \
1804 case glue(glue(INDEX_op_, x), _i32)
1805 #else
1806 # define OP_32_64(x) \
1807 case glue(glue(INDEX_op_, x), _i32)
1808 #endif
1809
1810 switch(opc) {
1811 case INDEX_op_exit_tb:
1812 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]);
1813 tcg_out_jmp(s, tb_ret_addr);
1814 break;
1815 case INDEX_op_goto_tb:
1816 if (s->tb_jmp_insn_offset) {
1817 /* direct jump method */
1818 int gap;
1819 /* jump displacement must be aligned for atomic patching;
1820 * see if we need to add extra nops before jump
1821 */
1822 gap = tcg_pcrel_diff(s, QEMU_ALIGN_PTR_UP(s->code_ptr + 1, 4));
1823 if (gap != 1) {
1824 tcg_out_nopn(s, gap - 1);
1825 }
1826 tcg_out8(s, OPC_JMP_long); /* jmp im */
1827 s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
1828 tcg_out32(s, 0);
1829 } else {
1830 /* indirect jump method */
1831 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
1832 (intptr_t)(s->tb_jmp_target_addr + args[0]));
1833 }
1834 s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s);
1835 break;
1836 case INDEX_op_br:
1837 tcg_out_jxx(s, JCC_JMP, arg_label(args[0]), 0);
1838 break;
1839 OP_32_64(ld8u):
1840 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1841 tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
1842 break;
1843 OP_32_64(ld8s):
1844 tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]);
1845 break;
1846 OP_32_64(ld16u):
1847 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1848 tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
1849 break;
1850 OP_32_64(ld16s):
1851 tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]);
1852 break;
1853 #if TCG_TARGET_REG_BITS == 64
1854 case INDEX_op_ld32u_i64:
1855 #endif
1856 case INDEX_op_ld_i32:
1857 tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1858 break;
1859
1860 OP_32_64(st8):
1861 if (const_args[0]) {
1862 tcg_out_modrm_offset(s, OPC_MOVB_EvIz,
1863 0, args[1], args[2]);
1864 tcg_out8(s, args[0]);
1865 } else {
1866 tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
1867 args[0], args[1], args[2]);
1868 }
1869 break;
1870 OP_32_64(st16):
1871 if (const_args[0]) {
1872 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16,
1873 0, args[1], args[2]);
1874 tcg_out16(s, args[0]);
1875 } else {
1876 tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
1877 args[0], args[1], args[2]);
1878 }
1879 break;
1880 #if TCG_TARGET_REG_BITS == 64
1881 case INDEX_op_st32_i64:
1882 #endif
1883 case INDEX_op_st_i32:
1884 if (const_args[0]) {
1885 tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, args[1], args[2]);
1886 tcg_out32(s, args[0]);
1887 } else {
1888 tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1889 }
1890 break;
1891
1892 OP_32_64(add):
1893 /* For 3-operand addition, use LEA. */
1894 if (args[0] != args[1]) {
1895 TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
1896
1897 if (const_args[2]) {
1898 c3 = a2, a2 = -1;
1899 } else if (a0 == a2) {
1900 /* Watch out for dest = src + dest, since we've removed
1901 the matching constraint on the add. */
1902 tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
1903 break;
1904 }
1905
1906 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
1907 break;
1908 }
1909 c = ARITH_ADD;
1910 goto gen_arith;
1911 OP_32_64(sub):
1912 c = ARITH_SUB;
1913 goto gen_arith;
1914 OP_32_64(and):
1915 c = ARITH_AND;
1916 goto gen_arith;
1917 OP_32_64(or):
1918 c = ARITH_OR;
1919 goto gen_arith;
1920 OP_32_64(xor):
1921 c = ARITH_XOR;
1922 goto gen_arith;
1923 gen_arith:
1924 if (const_args[2]) {
1925 tgen_arithi(s, c + rexw, args[0], args[2], 0);
1926 } else {
1927 tgen_arithr(s, c + rexw, args[0], args[2]);
1928 }
1929 break;
1930
1931 OP_32_64(andc):
1932 if (const_args[2]) {
1933 tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32,
1934 args[0], args[1]);
1935 tgen_arithi(s, ARITH_AND + rexw, args[0], ~args[2], 0);
1936 } else {
1937 tcg_out_vex_modrm(s, OPC_ANDN + rexw, args[0], args[2], args[1]);
1938 }
1939 break;
1940
1941 OP_32_64(mul):
1942 if (const_args[2]) {
1943 int32_t val;
1944 val = args[2];
1945 if (val == (int8_t)val) {
1946 tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]);
1947 tcg_out8(s, val);
1948 } else {
1949 tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]);
1950 tcg_out32(s, val);
1951 }
1952 } else {
1953 tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]);
1954 }
1955 break;
1956
1957 OP_32_64(div2):
1958 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
1959 break;
1960 OP_32_64(divu2):
1961 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
1962 break;
1963
1964 OP_32_64(shl):
1965 c = SHIFT_SHL;
1966 vexop = OPC_SHLX;
1967 goto gen_shift_maybe_vex;
1968 OP_32_64(shr):
1969 c = SHIFT_SHR;
1970 vexop = OPC_SHRX;
1971 goto gen_shift_maybe_vex;
1972 OP_32_64(sar):
1973 c = SHIFT_SAR;
1974 vexop = OPC_SARX;
1975 goto gen_shift_maybe_vex;
1976 OP_32_64(rotl):
1977 c = SHIFT_ROL;
1978 goto gen_shift;
1979 OP_32_64(rotr):
1980 c = SHIFT_ROR;
1981 goto gen_shift;
1982 gen_shift_maybe_vex:
1983 if (have_bmi2 && !const_args[2]) {
1984 tcg_out_vex_modrm(s, vexop + rexw, args[0], args[2], args[1]);
1985 break;
1986 }
1987 /* FALLTHRU */
1988 gen_shift:
1989 if (const_args[2]) {
1990 tcg_out_shifti(s, c + rexw, args[0], args[2]);
1991 } else {
1992 tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]);
1993 }
1994 break;
1995
1996 case INDEX_op_brcond_i32:
1997 tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
1998 arg_label(args[3]), 0);
1999 break;
2000 case INDEX_op_setcond_i32:
2001 tcg_out_setcond32(s, args[3], args[0], args[1],
2002 args[2], const_args[2]);
2003 break;
2004 case INDEX_op_movcond_i32:
2005 tcg_out_movcond32(s, args[5], args[0], args[1],
2006 args[2], const_args[2], args[3]);
2007 break;
2008
2009 OP_32_64(bswap16):
2010 tcg_out_rolw_8(s, args[0]);
2011 break;
2012 OP_32_64(bswap32):
2013 tcg_out_bswap32(s, args[0]);
2014 break;
2015
2016 OP_32_64(neg):
2017 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]);
2018 break;
2019 OP_32_64(not):
2020 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]);
2021 break;
2022
2023 OP_32_64(ext8s):
2024 tcg_out_ext8s(s, args[0], args[1], rexw);
2025 break;
2026 OP_32_64(ext16s):
2027 tcg_out_ext16s(s, args[0], args[1], rexw);
2028 break;
2029 OP_32_64(ext8u):
2030 tcg_out_ext8u(s, args[0], args[1]);
2031 break;
2032 OP_32_64(ext16u):
2033 tcg_out_ext16u(s, args[0], args[1]);
2034 break;
2035
2036 case INDEX_op_qemu_ld_i32:
2037 tcg_out_qemu_ld(s, args, 0);
2038 break;
2039 case INDEX_op_qemu_ld_i64:
2040 tcg_out_qemu_ld(s, args, 1);
2041 break;
2042 case INDEX_op_qemu_st_i32:
2043 tcg_out_qemu_st(s, args, 0);
2044 break;
2045 case INDEX_op_qemu_st_i64:
2046 tcg_out_qemu_st(s, args, 1);
2047 break;
2048
2049 OP_32_64(mulu2):
2050 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]);
2051 break;
2052 OP_32_64(muls2):
2053 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]);
2054 break;
2055 OP_32_64(add2):
2056 if (const_args[4]) {
2057 tgen_arithi(s, ARITH_ADD + rexw, args[0], args[4], 1);
2058 } else {
2059 tgen_arithr(s, ARITH_ADD + rexw, args[0], args[4]);
2060 }
2061 if (const_args[5]) {
2062 tgen_arithi(s, ARITH_ADC + rexw, args[1], args[5], 1);
2063 } else {
2064 tgen_arithr(s, ARITH_ADC + rexw, args[1], args[5]);
2065 }
2066 break;
2067 OP_32_64(sub2):
2068 if (const_args[4]) {
2069 tgen_arithi(s, ARITH_SUB + rexw, args[0], args[4], 1);
2070 } else {
2071 tgen_arithr(s, ARITH_SUB + rexw, args[0], args[4]);
2072 }
2073 if (const_args[5]) {
2074 tgen_arithi(s, ARITH_SBB + rexw, args[1], args[5], 1);
2075 } else {
2076 tgen_arithr(s, ARITH_SBB + rexw, args[1], args[5]);
2077 }
2078 break;
2079
2080 #if TCG_TARGET_REG_BITS == 32
2081 case INDEX_op_brcond2_i32:
2082 tcg_out_brcond2(s, args, const_args, 0);
2083 break;
2084 case INDEX_op_setcond2_i32:
2085 tcg_out_setcond2(s, args, const_args);
2086 break;
2087 #else /* TCG_TARGET_REG_BITS == 64 */
2088 case INDEX_op_ld32s_i64:
2089 tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]);
2090 break;
2091 case INDEX_op_ld_i64:
2092 tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2093 break;
2094 case INDEX_op_st_i64:
2095 if (const_args[0]) {
2096 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW,
2097 0, args[1], args[2]);
2098 tcg_out32(s, args[0]);
2099 } else {
2100 tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2101 }
2102 break;
2103
2104 case INDEX_op_brcond_i64:
2105 tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1],
2106 arg_label(args[3]), 0);
2107 break;
2108 case INDEX_op_setcond_i64:
2109 tcg_out_setcond64(s, args[3], args[0], args[1],
2110 args[2], const_args[2]);
2111 break;
2112 case INDEX_op_movcond_i64:
2113 tcg_out_movcond64(s, args[5], args[0], args[1],
2114 args[2], const_args[2], args[3]);
2115 break;
2116
2117 case INDEX_op_bswap64_i64:
2118 tcg_out_bswap64(s, args[0]);
2119 break;
2120 case INDEX_op_extu_i32_i64:
2121 case INDEX_op_ext32u_i64:
2122 tcg_out_ext32u(s, args[0], args[1]);
2123 break;
2124 case INDEX_op_ext_i32_i64:
2125 case INDEX_op_ext32s_i64:
2126 tcg_out_ext32s(s, args[0], args[1]);
2127 break;
2128 #endif
2129
2130 OP_32_64(deposit):
2131 if (args[3] == 0 && args[4] == 8) {
2132 /* load bits 0..7 */
2133 tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM,
2134 args[2], args[0]);
2135 } else if (args[3] == 8 && args[4] == 8) {
2136 /* load bits 8..15 */
2137 tcg_out_modrm(s, OPC_MOVB_EvGv, args[2], args[0] + 4);
2138 } else if (args[3] == 0 && args[4] == 16) {
2139 /* load bits 0..15 */
2140 tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, args[2], args[0]);
2141 } else {
2142 tcg_abort();
2143 }
2144 break;
2145
2146 case INDEX_op_mb:
2147 tcg_out_mb(s, args[0]);
2148 break;
2149 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
2150 case INDEX_op_mov_i64:
2151 case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
2152 case INDEX_op_movi_i64:
2153 case INDEX_op_call: /* Always emitted via tcg_out_call. */
2154 default:
2155 tcg_abort();
2156 }
2157
2158 #undef OP_32_64
2159 }
2160
2161 static const TCGTargetOpDef x86_op_defs[] = {
2162 { INDEX_op_exit_tb, { } },
2163 { INDEX_op_goto_tb, { } },
2164 { INDEX_op_br, { } },
2165 { INDEX_op_ld8u_i32, { "r", "r" } },
2166 { INDEX_op_ld8s_i32, { "r", "r" } },
2167 { INDEX_op_ld16u_i32, { "r", "r" } },
2168 { INDEX_op_ld16s_i32, { "r", "r" } },
2169 { INDEX_op_ld_i32, { "r", "r" } },
2170 { INDEX_op_st8_i32, { "qi", "r" } },
2171 { INDEX_op_st16_i32, { "ri", "r" } },
2172 { INDEX_op_st_i32, { "ri", "r" } },
2173
2174 { INDEX_op_add_i32, { "r", "r", "ri" } },
2175 { INDEX_op_sub_i32, { "r", "0", "ri" } },
2176 { INDEX_op_mul_i32, { "r", "0", "ri" } },
2177 { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } },
2178 { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } },
2179 { INDEX_op_and_i32, { "r", "0", "ri" } },
2180 { INDEX_op_or_i32, { "r", "0", "ri" } },
2181 { INDEX_op_xor_i32, { "r", "0", "ri" } },
2182 { INDEX_op_andc_i32, { "r", "r", "ri" } },
2183
2184 { INDEX_op_shl_i32, { "r", "0", "Ci" } },
2185 { INDEX_op_shr_i32, { "r", "0", "Ci" } },
2186 { INDEX_op_sar_i32, { "r", "0", "Ci" } },
2187 { INDEX_op_rotl_i32, { "r", "0", "ci" } },
2188 { INDEX_op_rotr_i32, { "r", "0", "ci" } },
2189
2190 { INDEX_op_brcond_i32, { "r", "ri" } },
2191
2192 { INDEX_op_bswap16_i32, { "r", "0" } },
2193 { INDEX_op_bswap32_i32, { "r", "0" } },
2194
2195 { INDEX_op_neg_i32, { "r", "0" } },
2196
2197 { INDEX_op_not_i32, { "r", "0" } },
2198
2199 { INDEX_op_ext8s_i32, { "r", "q" } },
2200 { INDEX_op_ext16s_i32, { "r", "r" } },
2201 { INDEX_op_ext8u_i32, { "r", "q" } },
2202 { INDEX_op_ext16u_i32, { "r", "r" } },
2203
2204 { INDEX_op_setcond_i32, { "q", "r", "ri" } },
2205
2206 { INDEX_op_deposit_i32, { "Q", "0", "Q" } },
2207 { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } },
2208
2209 { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
2210 { INDEX_op_muls2_i32, { "a", "d", "a", "r" } },
2211 { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
2212 { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
2213
2214 { INDEX_op_mb, { } },
2215
2216 #if TCG_TARGET_REG_BITS == 32
2217 { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
2218 { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
2219 #else
2220 { INDEX_op_ld8u_i64, { "r", "r" } },
2221 { INDEX_op_ld8s_i64, { "r", "r" } },
2222 { INDEX_op_ld16u_i64, { "r", "r" } },
2223 { INDEX_op_ld16s_i64, { "r", "r" } },
2224 { INDEX_op_ld32u_i64, { "r", "r" } },
2225 { INDEX_op_ld32s_i64, { "r", "r" } },
2226 { INDEX_op_ld_i64, { "r", "r" } },
2227 { INDEX_op_st8_i64, { "ri", "r" } },
2228 { INDEX_op_st16_i64, { "ri", "r" } },
2229 { INDEX_op_st32_i64, { "ri", "r" } },
2230 { INDEX_op_st_i64, { "re", "r" } },
2231
2232 { INDEX_op_add_i64, { "r", "r", "re" } },
2233 { INDEX_op_mul_i64, { "r", "0", "re" } },
2234 { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },
2235 { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },
2236 { INDEX_op_sub_i64, { "r", "0", "re" } },
2237 { INDEX_op_and_i64, { "r", "0", "reZ" } },
2238 { INDEX_op_or_i64, { "r", "0", "re" } },
2239 { INDEX_op_xor_i64, { "r", "0", "re" } },
2240 { INDEX_op_andc_i64, { "r", "r", "rI" } },
2241
2242 { INDEX_op_shl_i64, { "r", "0", "Ci" } },
2243 { INDEX_op_shr_i64, { "r", "0", "Ci" } },
2244 { INDEX_op_sar_i64, { "r", "0", "Ci" } },
2245 { INDEX_op_rotl_i64, { "r", "0", "ci" } },
2246 { INDEX_op_rotr_i64, { "r", "0", "ci" } },
2247
2248 { INDEX_op_brcond_i64, { "r", "re" } },
2249 { INDEX_op_setcond_i64, { "r", "r", "re" } },
2250
2251 { INDEX_op_bswap16_i64, { "r", "0" } },
2252 { INDEX_op_bswap32_i64, { "r", "0" } },
2253 { INDEX_op_bswap64_i64, { "r", "0" } },
2254 { INDEX_op_neg_i64, { "r", "0" } },
2255 { INDEX_op_not_i64, { "r", "0" } },
2256
2257 { INDEX_op_ext8s_i64, { "r", "r" } },
2258 { INDEX_op_ext16s_i64, { "r", "r" } },
2259 { INDEX_op_ext32s_i64, { "r", "r" } },
2260 { INDEX_op_ext8u_i64, { "r", "r" } },
2261 { INDEX_op_ext16u_i64, { "r", "r" } },
2262 { INDEX_op_ext32u_i64, { "r", "r" } },
2263
2264 { INDEX_op_ext_i32_i64, { "r", "r" } },
2265 { INDEX_op_extu_i32_i64, { "r", "r" } },
2266
2267 { INDEX_op_deposit_i64, { "Q", "0", "Q" } },
2268 { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } },
2269
2270 { INDEX_op_mulu2_i64, { "a", "d", "a", "r" } },
2271 { INDEX_op_muls2_i64, { "a", "d", "a", "r" } },
2272 { INDEX_op_add2_i64, { "r", "r", "0", "1", "re", "re" } },
2273 { INDEX_op_sub2_i64, { "r", "r", "0", "1", "re", "re" } },
2274 #endif
2275
2276 #if TCG_TARGET_REG_BITS == 64
2277 { INDEX_op_qemu_ld_i32, { "r", "L" } },
2278 { INDEX_op_qemu_st_i32, { "L", "L" } },
2279 { INDEX_op_qemu_ld_i64, { "r", "L" } },
2280 { INDEX_op_qemu_st_i64, { "L", "L" } },
2281 #elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
2282 { INDEX_op_qemu_ld_i32, { "r", "L" } },
2283 { INDEX_op_qemu_st_i32, { "L", "L" } },
2284 { INDEX_op_qemu_ld_i64, { "r", "r", "L" } },
2285 { INDEX_op_qemu_st_i64, { "L", "L", "L" } },
2286 #else
2287 { INDEX_op_qemu_ld_i32, { "r", "L", "L" } },
2288 { INDEX_op_qemu_st_i32, { "L", "L", "L" } },
2289 { INDEX_op_qemu_ld_i64, { "r", "r", "L", "L" } },
2290 { INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } },
2291 #endif
2292 { -1 },
2293 };
2294
2295 static int tcg_target_callee_save_regs[] = {
2296 #if TCG_TARGET_REG_BITS == 64
2297 TCG_REG_RBP,
2298 TCG_REG_RBX,
2299 #if defined(_WIN64)
2300 TCG_REG_RDI,
2301 TCG_REG_RSI,
2302 #endif
2303 TCG_REG_R12,
2304 TCG_REG_R13,
2305 TCG_REG_R14, /* Currently used for the global env. */
2306 TCG_REG_R15,
2307 #else
2308 TCG_REG_EBP, /* Currently used for the global env. */
2309 TCG_REG_EBX,
2310 TCG_REG_ESI,
2311 TCG_REG_EDI,
2312 #endif
2313 };
2314
2315 /* Compute frame size via macros, to share between tcg_target_qemu_prologue
2316 and tcg_register_jit. */
2317
2318 #define PUSH_SIZE \
2319 ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \
2320 * (TCG_TARGET_REG_BITS / 8))
2321
2322 #define FRAME_SIZE \
2323 ((PUSH_SIZE \
2324 + TCG_STATIC_CALL_ARGS_SIZE \
2325 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2326 + TCG_TARGET_STACK_ALIGN - 1) \
2327 & ~(TCG_TARGET_STACK_ALIGN - 1))
2328
2329 /* Generate global QEMU prologue and epilogue code */
2330 static void tcg_target_qemu_prologue(TCGContext *s)
2331 {
2332 int i, stack_addend;
2333
2334 /* TB prologue */
2335
2336 /* Reserve some stack space, also for TCG temps. */
2337 stack_addend = FRAME_SIZE - PUSH_SIZE;
2338 tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
2339 CPU_TEMP_BUF_NLONGS * sizeof(long));
2340
2341 /* Save all callee saved registers. */
2342 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
2343 tcg_out_push(s, tcg_target_callee_save_regs[i]);
2344 }
2345
2346 #if TCG_TARGET_REG_BITS == 32
2347 tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
2348 (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
2349 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2350 /* jmp *tb. */
2351 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP,
2352 (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4
2353 + stack_addend);
2354 #else
2355 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2356 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2357 /* jmp *tb. */
2358 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
2359 #endif
2360
2361 /* TB epilogue */
2362 tb_ret_addr = s->code_ptr;
2363
2364 tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend);
2365
2366 for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
2367 tcg_out_pop(s, tcg_target_callee_save_regs[i]);
2368 }
2369 tcg_out_opc(s, OPC_RET, 0, 0, 0);
2370
2371 #if !defined(CONFIG_SOFTMMU)
2372 /* Try to set up a segment register to point to guest_base. */
2373 if (guest_base) {
2374 setup_guest_base_seg();
2375 }
2376 #endif
2377 }
2378
2379 static void tcg_target_init(TCGContext *s)
2380 {
2381 #ifdef CONFIG_CPUID_H
2382 unsigned a, b, c, d;
2383 int max = __get_cpuid_max(0, 0);
2384
2385 if (max >= 1) {
2386 __cpuid(1, a, b, c, d);
2387 #ifndef have_cmov
2388 /* For 32-bit, 99% certainty that we're running on hardware that
2389 supports cmov, but we still need to check. In case cmov is not
2390 available, we'll use a small forward branch. */
2391 have_cmov = (d & bit_CMOV) != 0;
2392 #endif
2393 #ifndef have_movbe
2394 /* MOVBE is only available on Intel Atom and Haswell CPUs, so we
2395 need to probe for it. */
2396 have_movbe = (c & bit_MOVBE) != 0;
2397 #endif
2398 }
2399
2400 if (max >= 7) {
2401 /* BMI1 is available on AMD Piledriver and Intel Haswell CPUs. */
2402 __cpuid_count(7, 0, a, b, c, d);
2403 #ifdef bit_BMI
2404 have_bmi1 = (b & bit_BMI) != 0;
2405 #endif
2406 #ifndef have_bmi2
2407 have_bmi2 = (b & bit_BMI2) != 0;
2408 #endif
2409 }
2410 #endif
2411
2412 if (TCG_TARGET_REG_BITS == 64) {
2413 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
2414 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
2415 } else {
2416 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
2417 }
2418
2419 tcg_regset_clear(tcg_target_call_clobber_regs);
2420 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
2421 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
2422 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
2423 if (TCG_TARGET_REG_BITS == 64) {
2424 #if !defined(_WIN64)
2425 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
2426 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
2427 #endif
2428 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
2429 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
2430 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
2431 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
2432 }
2433
2434 tcg_regset_clear(s->reserved_regs);
2435 tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
2436
2437 tcg_add_target_add_op_defs(x86_op_defs);
2438 }
2439
2440 typedef struct {
2441 DebugFrameHeader h;
2442 uint8_t fde_def_cfa[4];
2443 uint8_t fde_reg_ofs[14];
2444 } DebugFrame;
2445
2446 /* We're expecting a 2 byte uleb128 encoded value. */
2447 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2448
2449 #if !defined(__ELF__)
2450 /* Host machine without ELF. */
2451 #elif TCG_TARGET_REG_BITS == 64
2452 #define ELF_HOST_MACHINE EM_X86_64
2453 static const DebugFrame debug_frame = {
2454 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2455 .h.cie.id = -1,
2456 .h.cie.version = 1,
2457 .h.cie.code_align = 1,
2458 .h.cie.data_align = 0x78, /* sleb128 -8 */
2459 .h.cie.return_column = 16,
2460
2461 /* Total FDE size does not include the "len" member. */
2462 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2463
2464 .fde_def_cfa = {
2465 12, 7, /* DW_CFA_def_cfa %rsp, ... */
2466 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2467 (FRAME_SIZE >> 7)
2468 },
2469 .fde_reg_ofs = {
2470 0x90, 1, /* DW_CFA_offset, %rip, -8 */
2471 /* The following ordering must match tcg_target_callee_save_regs. */
2472 0x86, 2, /* DW_CFA_offset, %rbp, -16 */
2473 0x83, 3, /* DW_CFA_offset, %rbx, -24 */
2474 0x8c, 4, /* DW_CFA_offset, %r12, -32 */
2475 0x8d, 5, /* DW_CFA_offset, %r13, -40 */
2476 0x8e, 6, /* DW_CFA_offset, %r14, -48 */
2477 0x8f, 7, /* DW_CFA_offset, %r15, -56 */
2478 }
2479 };
2480 #else
2481 #define ELF_HOST_MACHINE EM_386
2482 static const DebugFrame debug_frame = {
2483 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2484 .h.cie.id = -1,
2485 .h.cie.version = 1,
2486 .h.cie.code_align = 1,
2487 .h.cie.data_align = 0x7c, /* sleb128 -4 */
2488 .h.cie.return_column = 8,
2489
2490 /* Total FDE size does not include the "len" member. */
2491 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2492
2493 .fde_def_cfa = {
2494 12, 4, /* DW_CFA_def_cfa %esp, ... */
2495 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2496 (FRAME_SIZE >> 7)
2497 },
2498 .fde_reg_ofs = {
2499 0x88, 1, /* DW_CFA_offset, %eip, -4 */
2500 /* The following ordering must match tcg_target_callee_save_regs. */
2501 0x85, 2, /* DW_CFA_offset, %ebp, -8 */
2502 0x83, 3, /* DW_CFA_offset, %ebx, -12 */
2503 0x86, 4, /* DW_CFA_offset, %esi, -16 */
2504 0x87, 5, /* DW_CFA_offset, %edi, -20 */
2505 }
2506 };
2507 #endif
2508
2509 #if defined(ELF_HOST_MACHINE)
2510 void tcg_register_jit(void *buf, size_t buf_size)
2511 {
2512 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2513 }
2514 #endif