]> git.proxmox.com Git - mirror_qemu.git/blob - tcg/i386/tcg-target.inc.c
tcg/i386: Fuly convert tcg_target_op_def
[mirror_qemu.git] / tcg / i386 / tcg-target.inc.c
1 /*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 #include "tcg-be-ldst.h"
26
27 #ifdef CONFIG_DEBUG_TCG
28 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
29 #if TCG_TARGET_REG_BITS == 64
30 "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
31 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
32 #else
33 "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
34 #endif
35 };
36 #endif
37
38 static const int tcg_target_reg_alloc_order[] = {
39 #if TCG_TARGET_REG_BITS == 64
40 TCG_REG_RBP,
41 TCG_REG_RBX,
42 TCG_REG_R12,
43 TCG_REG_R13,
44 TCG_REG_R14,
45 TCG_REG_R15,
46 TCG_REG_R10,
47 TCG_REG_R11,
48 TCG_REG_R9,
49 TCG_REG_R8,
50 TCG_REG_RCX,
51 TCG_REG_RDX,
52 TCG_REG_RSI,
53 TCG_REG_RDI,
54 TCG_REG_RAX,
55 #else
56 TCG_REG_EBX,
57 TCG_REG_ESI,
58 TCG_REG_EDI,
59 TCG_REG_EBP,
60 TCG_REG_ECX,
61 TCG_REG_EDX,
62 TCG_REG_EAX,
63 #endif
64 };
65
66 static const int tcg_target_call_iarg_regs[] = {
67 #if TCG_TARGET_REG_BITS == 64
68 #if defined(_WIN64)
69 TCG_REG_RCX,
70 TCG_REG_RDX,
71 #else
72 TCG_REG_RDI,
73 TCG_REG_RSI,
74 TCG_REG_RDX,
75 TCG_REG_RCX,
76 #endif
77 TCG_REG_R8,
78 TCG_REG_R9,
79 #else
80 /* 32 bit mode uses stack based calling convention (GCC default). */
81 #endif
82 };
83
84 static const int tcg_target_call_oarg_regs[] = {
85 TCG_REG_EAX,
86 #if TCG_TARGET_REG_BITS == 32
87 TCG_REG_EDX
88 #endif
89 };
90
91 /* Constants we accept. */
92 #define TCG_CT_CONST_S32 0x100
93 #define TCG_CT_CONST_U32 0x200
94 #define TCG_CT_CONST_I32 0x400
95
96 /* Registers used with L constraint, which are the first argument
97 registers on x86_64, and two random call clobbered registers on
98 i386. */
99 #if TCG_TARGET_REG_BITS == 64
100 # define TCG_REG_L0 tcg_target_call_iarg_regs[0]
101 # define TCG_REG_L1 tcg_target_call_iarg_regs[1]
102 #else
103 # define TCG_REG_L0 TCG_REG_EAX
104 # define TCG_REG_L1 TCG_REG_EDX
105 #endif
106
107 /* The host compiler should supply <cpuid.h> to enable runtime features
108 detection, as we're not going to go so far as our own inline assembly.
109 If not available, default values will be assumed. */
110 #if defined(CONFIG_CPUID_H)
111 #include <cpuid.h>
112 #endif
113
114 /* For 32-bit, we are going to attempt to determine at runtime whether cmov
115 is available. */
116 #if TCG_TARGET_REG_BITS == 64
117 # define have_cmov 1
118 #elif defined(CONFIG_CPUID_H) && defined(bit_CMOV)
119 static bool have_cmov;
120 #else
121 # define have_cmov 0
122 #endif
123
124 /* If bit_MOVBE is defined in cpuid.h (added in GCC version 4.6), we are
125 going to attempt to determine at runtime whether movbe is available. */
126 #if defined(CONFIG_CPUID_H) && defined(bit_MOVBE)
127 static bool have_movbe;
128 #else
129 # define have_movbe 0
130 #endif
131
132 /* We need this symbol in tcg-target.h, and we can't properly conditionalize
133 it there. Therefore we always define the variable. */
134 bool have_bmi1;
135
136 #if defined(CONFIG_CPUID_H) && defined(bit_BMI2)
137 static bool have_bmi2;
138 #else
139 # define have_bmi2 0
140 #endif
141
142 static tcg_insn_unit *tb_ret_addr;
143
144 static void patch_reloc(tcg_insn_unit *code_ptr, int type,
145 intptr_t value, intptr_t addend)
146 {
147 value += addend;
148 switch(type) {
149 case R_386_PC32:
150 value -= (uintptr_t)code_ptr;
151 if (value != (int32_t)value) {
152 tcg_abort();
153 }
154 tcg_patch32(code_ptr, value);
155 break;
156 case R_386_PC8:
157 value -= (uintptr_t)code_ptr;
158 if (value != (int8_t)value) {
159 tcg_abort();
160 }
161 tcg_patch8(code_ptr, value);
162 break;
163 default:
164 tcg_abort();
165 }
166 }
167
168 /* parse target specific constraints */
169 static const char *target_parse_constraint(TCGArgConstraint *ct,
170 const char *ct_str, TCGType type)
171 {
172 switch(*ct_str++) {
173 case 'a':
174 ct->ct |= TCG_CT_REG;
175 tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
176 break;
177 case 'b':
178 ct->ct |= TCG_CT_REG;
179 tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
180 break;
181 case 'c':
182 case_c:
183 ct->ct |= TCG_CT_REG;
184 tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
185 break;
186 case 'd':
187 ct->ct |= TCG_CT_REG;
188 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
189 break;
190 case 'S':
191 ct->ct |= TCG_CT_REG;
192 tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
193 break;
194 case 'D':
195 ct->ct |= TCG_CT_REG;
196 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
197 break;
198 case 'q':
199 ct->ct |= TCG_CT_REG;
200 if (TCG_TARGET_REG_BITS == 64) {
201 tcg_regset_set32(ct->u.regs, 0, 0xffff);
202 } else {
203 tcg_regset_set32(ct->u.regs, 0, 0xf);
204 }
205 break;
206 case 'Q':
207 ct->ct |= TCG_CT_REG;
208 tcg_regset_set32(ct->u.regs, 0, 0xf);
209 break;
210 case 'r':
211 case_r:
212 ct->ct |= TCG_CT_REG;
213 if (TCG_TARGET_REG_BITS == 64) {
214 tcg_regset_set32(ct->u.regs, 0, 0xffff);
215 } else {
216 tcg_regset_set32(ct->u.regs, 0, 0xff);
217 }
218 break;
219 case 'C':
220 /* With SHRX et al, we need not use ECX as shift count register. */
221 if (have_bmi2) {
222 goto case_r;
223 } else {
224 goto case_c;
225 }
226
227 /* qemu_ld/st address constraint */
228 case 'L':
229 ct->ct |= TCG_CT_REG;
230 if (TCG_TARGET_REG_BITS == 64) {
231 tcg_regset_set32(ct->u.regs, 0, 0xffff);
232 } else {
233 tcg_regset_set32(ct->u.regs, 0, 0xff);
234 }
235 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
236 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
237 break;
238
239 case 'e':
240 ct->ct |= (type == TCG_TYPE_I32 ? TCG_CT_CONST : TCG_CT_CONST_S32);
241 break;
242 case 'Z':
243 ct->ct |= (type == TCG_TYPE_I32 ? TCG_CT_CONST : TCG_CT_CONST_U32);
244 break;
245 case 'I':
246 ct->ct |= (type == TCG_TYPE_I32 ? TCG_CT_CONST : TCG_CT_CONST_I32);
247 break;
248
249 default:
250 return NULL;
251 }
252 return ct_str;
253 }
254
255 /* test if a constant matches the constraint */
256 static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
257 const TCGArgConstraint *arg_ct)
258 {
259 int ct = arg_ct->ct;
260 if (ct & TCG_CT_CONST) {
261 return 1;
262 }
263 if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
264 return 1;
265 }
266 if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
267 return 1;
268 }
269 if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) {
270 return 1;
271 }
272 return 0;
273 }
274
275 #if TCG_TARGET_REG_BITS == 64
276 # define LOWREGMASK(x) ((x) & 7)
277 #else
278 # define LOWREGMASK(x) (x)
279 #endif
280
281 #define P_EXT 0x100 /* 0x0f opcode prefix */
282 #define P_EXT38 0x200 /* 0x0f 0x38 opcode prefix */
283 #define P_DATA16 0x400 /* 0x66 opcode prefix */
284 #if TCG_TARGET_REG_BITS == 64
285 # define P_ADDR32 0x800 /* 0x67 opcode prefix */
286 # define P_REXW 0x1000 /* Set REX.W = 1 */
287 # define P_REXB_R 0x2000 /* REG field as byte register */
288 # define P_REXB_RM 0x4000 /* R/M field as byte register */
289 # define P_GS 0x8000 /* gs segment override */
290 #else
291 # define P_ADDR32 0
292 # define P_REXW 0
293 # define P_REXB_R 0
294 # define P_REXB_RM 0
295 # define P_GS 0
296 #endif
297 #define P_SIMDF3 0x10000 /* 0xf3 opcode prefix */
298 #define P_SIMDF2 0x20000 /* 0xf2 opcode prefix */
299
300 #define OPC_ARITH_EvIz (0x81)
301 #define OPC_ARITH_EvIb (0x83)
302 #define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
303 #define OPC_ANDN (0xf2 | P_EXT38)
304 #define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
305 #define OPC_BSWAP (0xc8 | P_EXT)
306 #define OPC_CALL_Jz (0xe8)
307 #define OPC_CMOVCC (0x40 | P_EXT) /* ... plus condition code */
308 #define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
309 #define OPC_DEC_r32 (0x48)
310 #define OPC_IMUL_GvEv (0xaf | P_EXT)
311 #define OPC_IMUL_GvEvIb (0x6b)
312 #define OPC_IMUL_GvEvIz (0x69)
313 #define OPC_INC_r32 (0x40)
314 #define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */
315 #define OPC_JCC_short (0x70) /* ... plus condition code */
316 #define OPC_JMP_long (0xe9)
317 #define OPC_JMP_short (0xeb)
318 #define OPC_LEA (0x8d)
319 #define OPC_MOVB_EvGv (0x88) /* stores, more or less */
320 #define OPC_MOVL_EvGv (0x89) /* stores, more or less */
321 #define OPC_MOVL_GvEv (0x8b) /* loads, more or less */
322 #define OPC_MOVB_EvIz (0xc6)
323 #define OPC_MOVL_EvIz (0xc7)
324 #define OPC_MOVL_Iv (0xb8)
325 #define OPC_MOVBE_GyMy (0xf0 | P_EXT38)
326 #define OPC_MOVBE_MyGy (0xf1 | P_EXT38)
327 #define OPC_MOVSBL (0xbe | P_EXT)
328 #define OPC_MOVSWL (0xbf | P_EXT)
329 #define OPC_MOVSLQ (0x63 | P_REXW)
330 #define OPC_MOVZBL (0xb6 | P_EXT)
331 #define OPC_MOVZWL (0xb7 | P_EXT)
332 #define OPC_POP_r32 (0x58)
333 #define OPC_PUSH_r32 (0x50)
334 #define OPC_PUSH_Iv (0x68)
335 #define OPC_PUSH_Ib (0x6a)
336 #define OPC_RET (0xc3)
337 #define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
338 #define OPC_SHIFT_1 (0xd1)
339 #define OPC_SHIFT_Ib (0xc1)
340 #define OPC_SHIFT_cl (0xd3)
341 #define OPC_SARX (0xf7 | P_EXT38 | P_SIMDF3)
342 #define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16)
343 #define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2)
344 #define OPC_TESTL (0x85)
345 #define OPC_XCHG_ax_r32 (0x90)
346
347 #define OPC_GRP3_Ev (0xf7)
348 #define OPC_GRP5 (0xff)
349
350 /* Group 1 opcode extensions for 0x80-0x83.
351 These are also used as modifiers for OPC_ARITH. */
352 #define ARITH_ADD 0
353 #define ARITH_OR 1
354 #define ARITH_ADC 2
355 #define ARITH_SBB 3
356 #define ARITH_AND 4
357 #define ARITH_SUB 5
358 #define ARITH_XOR 6
359 #define ARITH_CMP 7
360
361 /* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */
362 #define SHIFT_ROL 0
363 #define SHIFT_ROR 1
364 #define SHIFT_SHL 4
365 #define SHIFT_SHR 5
366 #define SHIFT_SAR 7
367
368 /* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */
369 #define EXT3_NOT 2
370 #define EXT3_NEG 3
371 #define EXT3_MUL 4
372 #define EXT3_IMUL 5
373 #define EXT3_DIV 6
374 #define EXT3_IDIV 7
375
376 /* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */
377 #define EXT5_INC_Ev 0
378 #define EXT5_DEC_Ev 1
379 #define EXT5_CALLN_Ev 2
380 #define EXT5_JMPN_Ev 4
381
382 /* Condition codes to be added to OPC_JCC_{long,short}. */
383 #define JCC_JMP (-1)
384 #define JCC_JO 0x0
385 #define JCC_JNO 0x1
386 #define JCC_JB 0x2
387 #define JCC_JAE 0x3
388 #define JCC_JE 0x4
389 #define JCC_JNE 0x5
390 #define JCC_JBE 0x6
391 #define JCC_JA 0x7
392 #define JCC_JS 0x8
393 #define JCC_JNS 0x9
394 #define JCC_JP 0xa
395 #define JCC_JNP 0xb
396 #define JCC_JL 0xc
397 #define JCC_JGE 0xd
398 #define JCC_JLE 0xe
399 #define JCC_JG 0xf
400
401 static const uint8_t tcg_cond_to_jcc[] = {
402 [TCG_COND_EQ] = JCC_JE,
403 [TCG_COND_NE] = JCC_JNE,
404 [TCG_COND_LT] = JCC_JL,
405 [TCG_COND_GE] = JCC_JGE,
406 [TCG_COND_LE] = JCC_JLE,
407 [TCG_COND_GT] = JCC_JG,
408 [TCG_COND_LTU] = JCC_JB,
409 [TCG_COND_GEU] = JCC_JAE,
410 [TCG_COND_LEU] = JCC_JBE,
411 [TCG_COND_GTU] = JCC_JA,
412 };
413
414 #if TCG_TARGET_REG_BITS == 64
415 static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
416 {
417 int rex;
418
419 if (opc & P_GS) {
420 tcg_out8(s, 0x65);
421 }
422 if (opc & P_DATA16) {
423 /* We should never be asking for both 16 and 64-bit operation. */
424 tcg_debug_assert((opc & P_REXW) == 0);
425 tcg_out8(s, 0x66);
426 }
427 if (opc & P_ADDR32) {
428 tcg_out8(s, 0x67);
429 }
430
431 rex = 0;
432 rex |= (opc & P_REXW) ? 0x8 : 0x0; /* REX.W */
433 rex |= (r & 8) >> 1; /* REX.R */
434 rex |= (x & 8) >> 2; /* REX.X */
435 rex |= (rm & 8) >> 3; /* REX.B */
436
437 /* P_REXB_{R,RM} indicates that the given register is the low byte.
438 For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
439 as otherwise the encoding indicates %[abcd]h. Note that the values
440 that are ORed in merely indicate that the REX byte must be present;
441 those bits get discarded in output. */
442 rex |= opc & (r >= 4 ? P_REXB_R : 0);
443 rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
444
445 if (rex) {
446 tcg_out8(s, (uint8_t)(rex | 0x40));
447 }
448
449 if (opc & (P_EXT | P_EXT38)) {
450 tcg_out8(s, 0x0f);
451 if (opc & P_EXT38) {
452 tcg_out8(s, 0x38);
453 }
454 }
455
456 tcg_out8(s, opc);
457 }
458 #else
459 static void tcg_out_opc(TCGContext *s, int opc)
460 {
461 if (opc & P_DATA16) {
462 tcg_out8(s, 0x66);
463 }
464 if (opc & (P_EXT | P_EXT38)) {
465 tcg_out8(s, 0x0f);
466 if (opc & P_EXT38) {
467 tcg_out8(s, 0x38);
468 }
469 }
470 tcg_out8(s, opc);
471 }
472 /* Discard the register arguments to tcg_out_opc early, so as not to penalize
473 the 32-bit compilation paths. This method works with all versions of gcc,
474 whereas relying on optimization may not be able to exclude them. */
475 #define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc)
476 #endif
477
478 static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
479 {
480 tcg_out_opc(s, opc, r, rm, 0);
481 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
482 }
483
484 static void tcg_out_vex_modrm(TCGContext *s, int opc, int r, int v, int rm)
485 {
486 int tmp;
487
488 if ((opc & (P_REXW | P_EXT | P_EXT38)) || (rm & 8)) {
489 /* Three byte VEX prefix. */
490 tcg_out8(s, 0xc4);
491
492 /* VEX.m-mmmm */
493 if (opc & P_EXT38) {
494 tmp = 2;
495 } else if (opc & P_EXT) {
496 tmp = 1;
497 } else {
498 tcg_abort();
499 }
500 tmp |= 0x40; /* VEX.X */
501 tmp |= (r & 8 ? 0 : 0x80); /* VEX.R */
502 tmp |= (rm & 8 ? 0 : 0x20); /* VEX.B */
503 tcg_out8(s, tmp);
504
505 tmp = (opc & P_REXW ? 0x80 : 0); /* VEX.W */
506 } else {
507 /* Two byte VEX prefix. */
508 tcg_out8(s, 0xc5);
509
510 tmp = (r & 8 ? 0 : 0x80); /* VEX.R */
511 }
512 /* VEX.pp */
513 if (opc & P_DATA16) {
514 tmp |= 1; /* 0x66 */
515 } else if (opc & P_SIMDF3) {
516 tmp |= 2; /* 0xf3 */
517 } else if (opc & P_SIMDF2) {
518 tmp |= 3; /* 0xf2 */
519 }
520 tmp |= (~v & 15) << 3; /* VEX.vvvv */
521 tcg_out8(s, tmp);
522 tcg_out8(s, opc);
523 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
524 }
525
526 /* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
527 We handle either RM and INDEX missing with a negative value. In 64-bit
528 mode for absolute addresses, ~RM is the size of the immediate operand
529 that will follow the instruction. */
530
531 static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
532 int index, int shift, intptr_t offset)
533 {
534 int mod, len;
535
536 if (index < 0 && rm < 0) {
537 if (TCG_TARGET_REG_BITS == 64) {
538 /* Try for a rip-relative addressing mode. This has replaced
539 the 32-bit-mode absolute addressing encoding. */
540 intptr_t pc = (intptr_t)s->code_ptr + 5 + ~rm;
541 intptr_t disp = offset - pc;
542 if (disp == (int32_t)disp) {
543 tcg_out_opc(s, opc, r, 0, 0);
544 tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
545 tcg_out32(s, disp);
546 return;
547 }
548
549 /* Try for an absolute address encoding. This requires the
550 use of the MODRM+SIB encoding and is therefore larger than
551 rip-relative addressing. */
552 if (offset == (int32_t)offset) {
553 tcg_out_opc(s, opc, r, 0, 0);
554 tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
555 tcg_out8(s, (4 << 3) | 5);
556 tcg_out32(s, offset);
557 return;
558 }
559
560 /* ??? The memory isn't directly addressable. */
561 tcg_abort();
562 } else {
563 /* Absolute address. */
564 tcg_out_opc(s, opc, r, 0, 0);
565 tcg_out8(s, (r << 3) | 5);
566 tcg_out32(s, offset);
567 return;
568 }
569 }
570
571 /* Find the length of the immediate addend. Note that the encoding
572 that would be used for (%ebp) indicates absolute addressing. */
573 if (rm < 0) {
574 mod = 0, len = 4, rm = 5;
575 } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
576 mod = 0, len = 0;
577 } else if (offset == (int8_t)offset) {
578 mod = 0x40, len = 1;
579 } else {
580 mod = 0x80, len = 4;
581 }
582
583 /* Use a single byte MODRM format if possible. Note that the encoding
584 that would be used for %esp is the escape to the two byte form. */
585 if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
586 /* Single byte MODRM format. */
587 tcg_out_opc(s, opc, r, rm, 0);
588 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
589 } else {
590 /* Two byte MODRM+SIB format. */
591
592 /* Note that the encoding that would place %esp into the index
593 field indicates no index register. In 64-bit mode, the REX.X
594 bit counts, so %r12 can be used as the index. */
595 if (index < 0) {
596 index = 4;
597 } else {
598 tcg_debug_assert(index != TCG_REG_ESP);
599 }
600
601 tcg_out_opc(s, opc, r, rm, index);
602 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
603 tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
604 }
605
606 if (len == 1) {
607 tcg_out8(s, offset);
608 } else if (len == 4) {
609 tcg_out32(s, offset);
610 }
611 }
612
613 /* A simplification of the above with no index or shift. */
614 static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
615 int rm, intptr_t offset)
616 {
617 tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
618 }
619
620 /* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */
621 static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
622 {
623 /* Propagate an opcode prefix, such as P_REXW. */
624 int ext = subop & ~0x7;
625 subop &= 0x7;
626
627 tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
628 }
629
630 static inline void tcg_out_mov(TCGContext *s, TCGType type,
631 TCGReg ret, TCGReg arg)
632 {
633 if (arg != ret) {
634 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
635 tcg_out_modrm(s, opc, ret, arg);
636 }
637 }
638
639 static void tcg_out_movi(TCGContext *s, TCGType type,
640 TCGReg ret, tcg_target_long arg)
641 {
642 tcg_target_long diff;
643
644 if (arg == 0) {
645 tgen_arithr(s, ARITH_XOR, ret, ret);
646 return;
647 }
648 if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
649 tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
650 tcg_out32(s, arg);
651 return;
652 }
653 if (arg == (int32_t)arg) {
654 tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
655 tcg_out32(s, arg);
656 return;
657 }
658
659 /* Try a 7 byte pc-relative lea before the 10 byte movq. */
660 diff = arg - ((uintptr_t)s->code_ptr + 7);
661 if (diff == (int32_t)diff) {
662 tcg_out_opc(s, OPC_LEA | P_REXW, ret, 0, 0);
663 tcg_out8(s, (LOWREGMASK(ret) << 3) | 5);
664 tcg_out32(s, diff);
665 return;
666 }
667
668 tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
669 tcg_out64(s, arg);
670 }
671
672 static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
673 {
674 if (val == (int8_t)val) {
675 tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
676 tcg_out8(s, val);
677 } else if (val == (int32_t)val) {
678 tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
679 tcg_out32(s, val);
680 } else {
681 tcg_abort();
682 }
683 }
684
685 static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
686 {
687 /* Given the strength of x86 memory ordering, we only need care for
688 store-load ordering. Experimentally, "lock orl $0,0(%esp)" is
689 faster than "mfence", so don't bother with the sse insn. */
690 if (a0 & TCG_MO_ST_LD) {
691 tcg_out8(s, 0xf0);
692 tcg_out_modrm_offset(s, OPC_ARITH_EvIb, ARITH_OR, TCG_REG_ESP, 0);
693 tcg_out8(s, 0);
694 }
695 }
696
697 static inline void tcg_out_push(TCGContext *s, int reg)
698 {
699 tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
700 }
701
702 static inline void tcg_out_pop(TCGContext *s, int reg)
703 {
704 tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
705 }
706
707 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
708 TCGReg arg1, intptr_t arg2)
709 {
710 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
711 tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
712 }
713
714 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
715 TCGReg arg1, intptr_t arg2)
716 {
717 int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
718 tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
719 }
720
721 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
722 TCGReg base, intptr_t ofs)
723 {
724 int rexw = 0;
725 if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64) {
726 if (val != (int32_t)val) {
727 return false;
728 }
729 rexw = P_REXW;
730 }
731 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | rexw, 0, base, ofs);
732 tcg_out32(s, val);
733 return true;
734 }
735
736 static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
737 {
738 /* Propagate an opcode prefix, such as P_DATA16. */
739 int ext = subopc & ~0x7;
740 subopc &= 0x7;
741
742 if (count == 1) {
743 tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
744 } else {
745 tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
746 tcg_out8(s, count);
747 }
748 }
749
750 static inline void tcg_out_bswap32(TCGContext *s, int reg)
751 {
752 tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
753 }
754
755 static inline void tcg_out_rolw_8(TCGContext *s, int reg)
756 {
757 tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
758 }
759
760 static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
761 {
762 /* movzbl */
763 tcg_debug_assert(src < 4 || TCG_TARGET_REG_BITS == 64);
764 tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
765 }
766
767 static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
768 {
769 /* movsbl */
770 tcg_debug_assert(src < 4 || TCG_TARGET_REG_BITS == 64);
771 tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
772 }
773
774 static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
775 {
776 /* movzwl */
777 tcg_out_modrm(s, OPC_MOVZWL, dest, src);
778 }
779
780 static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
781 {
782 /* movsw[lq] */
783 tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
784 }
785
786 static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
787 {
788 /* 32-bit mov zero extends. */
789 tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
790 }
791
792 static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
793 {
794 tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
795 }
796
797 static inline void tcg_out_bswap64(TCGContext *s, int reg)
798 {
799 tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
800 }
801
802 static void tgen_arithi(TCGContext *s, int c, int r0,
803 tcg_target_long val, int cf)
804 {
805 int rexw = 0;
806
807 if (TCG_TARGET_REG_BITS == 64) {
808 rexw = c & -8;
809 c &= 7;
810 }
811
812 /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
813 partial flags update stalls on Pentium4 and are not recommended
814 by current Intel optimization manuals. */
815 if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
816 int is_inc = (c == ARITH_ADD) ^ (val < 0);
817 if (TCG_TARGET_REG_BITS == 64) {
818 /* The single-byte increment encodings are re-tasked as the
819 REX prefixes. Use the MODRM encoding. */
820 tcg_out_modrm(s, OPC_GRP5 + rexw,
821 (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
822 } else {
823 tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
824 }
825 return;
826 }
827
828 if (c == ARITH_AND) {
829 if (TCG_TARGET_REG_BITS == 64) {
830 if (val == 0xffffffffu) {
831 tcg_out_ext32u(s, r0, r0);
832 return;
833 }
834 if (val == (uint32_t)val) {
835 /* AND with no high bits set can use a 32-bit operation. */
836 rexw = 0;
837 }
838 }
839 if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
840 tcg_out_ext8u(s, r0, r0);
841 return;
842 }
843 if (val == 0xffffu) {
844 tcg_out_ext16u(s, r0, r0);
845 return;
846 }
847 }
848
849 if (val == (int8_t)val) {
850 tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
851 tcg_out8(s, val);
852 return;
853 }
854 if (rexw == 0 || val == (int32_t)val) {
855 tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
856 tcg_out32(s, val);
857 return;
858 }
859
860 tcg_abort();
861 }
862
863 static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
864 {
865 if (val != 0) {
866 tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
867 }
868 }
869
870 /* Use SMALL != 0 to force a short forward branch. */
871 static void tcg_out_jxx(TCGContext *s, int opc, TCGLabel *l, int small)
872 {
873 int32_t val, val1;
874
875 if (l->has_value) {
876 val = tcg_pcrel_diff(s, l->u.value_ptr);
877 val1 = val - 2;
878 if ((int8_t)val1 == val1) {
879 if (opc == -1) {
880 tcg_out8(s, OPC_JMP_short);
881 } else {
882 tcg_out8(s, OPC_JCC_short + opc);
883 }
884 tcg_out8(s, val1);
885 } else {
886 if (small) {
887 tcg_abort();
888 }
889 if (opc == -1) {
890 tcg_out8(s, OPC_JMP_long);
891 tcg_out32(s, val - 5);
892 } else {
893 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
894 tcg_out32(s, val - 6);
895 }
896 }
897 } else if (small) {
898 if (opc == -1) {
899 tcg_out8(s, OPC_JMP_short);
900 } else {
901 tcg_out8(s, OPC_JCC_short + opc);
902 }
903 tcg_out_reloc(s, s->code_ptr, R_386_PC8, l, -1);
904 s->code_ptr += 1;
905 } else {
906 if (opc == -1) {
907 tcg_out8(s, OPC_JMP_long);
908 } else {
909 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
910 }
911 tcg_out_reloc(s, s->code_ptr, R_386_PC32, l, -4);
912 s->code_ptr += 4;
913 }
914 }
915
916 static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
917 int const_arg2, int rexw)
918 {
919 if (const_arg2) {
920 if (arg2 == 0) {
921 /* test r, r */
922 tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
923 } else {
924 tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
925 }
926 } else {
927 tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
928 }
929 }
930
931 static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
932 TCGArg arg1, TCGArg arg2, int const_arg2,
933 TCGLabel *label, int small)
934 {
935 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
936 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small);
937 }
938
939 #if TCG_TARGET_REG_BITS == 64
940 static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
941 TCGArg arg1, TCGArg arg2, int const_arg2,
942 TCGLabel *label, int small)
943 {
944 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
945 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small);
946 }
947 #else
948 /* XXX: we implement it at the target level to avoid having to
949 handle cross basic blocks temporaries */
950 static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
951 const int *const_args, int small)
952 {
953 TCGLabel *label_next = gen_new_label();
954 TCGLabel *label_this = arg_label(args[5]);
955
956 switch(args[4]) {
957 case TCG_COND_EQ:
958 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
959 label_next, 1);
960 tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
961 label_this, small);
962 break;
963 case TCG_COND_NE:
964 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
965 label_this, small);
966 tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
967 label_this, small);
968 break;
969 case TCG_COND_LT:
970 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
971 label_this, small);
972 tcg_out_jxx(s, JCC_JNE, label_next, 1);
973 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
974 label_this, small);
975 break;
976 case TCG_COND_LE:
977 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
978 label_this, small);
979 tcg_out_jxx(s, JCC_JNE, label_next, 1);
980 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
981 label_this, small);
982 break;
983 case TCG_COND_GT:
984 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
985 label_this, small);
986 tcg_out_jxx(s, JCC_JNE, label_next, 1);
987 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
988 label_this, small);
989 break;
990 case TCG_COND_GE:
991 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
992 label_this, small);
993 tcg_out_jxx(s, JCC_JNE, label_next, 1);
994 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
995 label_this, small);
996 break;
997 case TCG_COND_LTU:
998 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
999 label_this, small);
1000 tcg_out_jxx(s, JCC_JNE, label_next, 1);
1001 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
1002 label_this, small);
1003 break;
1004 case TCG_COND_LEU:
1005 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
1006 label_this, small);
1007 tcg_out_jxx(s, JCC_JNE, label_next, 1);
1008 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
1009 label_this, small);
1010 break;
1011 case TCG_COND_GTU:
1012 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
1013 label_this, small);
1014 tcg_out_jxx(s, JCC_JNE, label_next, 1);
1015 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
1016 label_this, small);
1017 break;
1018 case TCG_COND_GEU:
1019 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
1020 label_this, small);
1021 tcg_out_jxx(s, JCC_JNE, label_next, 1);
1022 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
1023 label_this, small);
1024 break;
1025 default:
1026 tcg_abort();
1027 }
1028 tcg_out_label(s, label_next, s->code_ptr);
1029 }
1030 #endif
1031
1032 static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
1033 TCGArg arg1, TCGArg arg2, int const_arg2)
1034 {
1035 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
1036 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
1037 tcg_out_ext8u(s, dest, dest);
1038 }
1039
1040 #if TCG_TARGET_REG_BITS == 64
1041 static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
1042 TCGArg arg1, TCGArg arg2, int const_arg2)
1043 {
1044 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
1045 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
1046 tcg_out_ext8u(s, dest, dest);
1047 }
1048 #else
1049 static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
1050 const int *const_args)
1051 {
1052 TCGArg new_args[6];
1053 TCGLabel *label_true, *label_over;
1054
1055 memcpy(new_args, args+1, 5*sizeof(TCGArg));
1056
1057 if (args[0] == args[1] || args[0] == args[2]
1058 || (!const_args[3] && args[0] == args[3])
1059 || (!const_args[4] && args[0] == args[4])) {
1060 /* When the destination overlaps with one of the argument
1061 registers, don't do anything tricky. */
1062 label_true = gen_new_label();
1063 label_over = gen_new_label();
1064
1065 new_args[5] = label_arg(label_true);
1066 tcg_out_brcond2(s, new_args, const_args+1, 1);
1067
1068 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
1069 tcg_out_jxx(s, JCC_JMP, label_over, 1);
1070 tcg_out_label(s, label_true, s->code_ptr);
1071
1072 tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
1073 tcg_out_label(s, label_over, s->code_ptr);
1074 } else {
1075 /* When the destination does not overlap one of the arguments,
1076 clear the destination first, jump if cond false, and emit an
1077 increment in the true case. This results in smaller code. */
1078
1079 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
1080
1081 label_over = gen_new_label();
1082 new_args[4] = tcg_invert_cond(new_args[4]);
1083 new_args[5] = label_arg(label_over);
1084 tcg_out_brcond2(s, new_args, const_args+1, 1);
1085
1086 tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
1087 tcg_out_label(s, label_over, s->code_ptr);
1088 }
1089 }
1090 #endif
1091
1092 static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest,
1093 TCGArg c1, TCGArg c2, int const_c2,
1094 TCGArg v1)
1095 {
1096 tcg_out_cmp(s, c1, c2, const_c2, 0);
1097 if (have_cmov) {
1098 tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1);
1099 } else {
1100 TCGLabel *over = gen_new_label();
1101 tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1);
1102 tcg_out_mov(s, TCG_TYPE_I32, dest, v1);
1103 tcg_out_label(s, over, s->code_ptr);
1104 }
1105 }
1106
1107 #if TCG_TARGET_REG_BITS == 64
1108 static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGArg dest,
1109 TCGArg c1, TCGArg c2, int const_c2,
1110 TCGArg v1)
1111 {
1112 tcg_out_cmp(s, c1, c2, const_c2, P_REXW);
1113 tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | P_REXW, dest, v1);
1114 }
1115 #endif
1116
1117 static void tcg_out_branch(TCGContext *s, int call, tcg_insn_unit *dest)
1118 {
1119 intptr_t disp = tcg_pcrel_diff(s, dest) - 5;
1120
1121 if (disp == (int32_t)disp) {
1122 tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
1123 tcg_out32(s, disp);
1124 } else {
1125 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, (uintptr_t)dest);
1126 tcg_out_modrm(s, OPC_GRP5,
1127 call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10);
1128 }
1129 }
1130
1131 static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *dest)
1132 {
1133 tcg_out_branch(s, 1, dest);
1134 }
1135
1136 static void tcg_out_jmp(TCGContext *s, tcg_insn_unit *dest)
1137 {
1138 tcg_out_branch(s, 0, dest);
1139 }
1140
1141 static void tcg_out_nopn(TCGContext *s, int n)
1142 {
1143 int i;
1144 /* Emit 1 or 2 operand size prefixes for the standard one byte nop,
1145 * "xchg %eax,%eax", forming "xchg %ax,%ax". All cores accept the
1146 * duplicate prefix, and all of the interesting recent cores can
1147 * decode and discard the duplicates in a single cycle.
1148 */
1149 tcg_debug_assert(n >= 1);
1150 for (i = 1; i < n; ++i) {
1151 tcg_out8(s, 0x66);
1152 }
1153 tcg_out8(s, 0x90);
1154 }
1155
1156 #if defined(CONFIG_SOFTMMU)
1157 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1158 * int mmu_idx, uintptr_t ra)
1159 */
1160 static void * const qemu_ld_helpers[16] = {
1161 [MO_UB] = helper_ret_ldub_mmu,
1162 [MO_LEUW] = helper_le_lduw_mmu,
1163 [MO_LEUL] = helper_le_ldul_mmu,
1164 [MO_LEQ] = helper_le_ldq_mmu,
1165 [MO_BEUW] = helper_be_lduw_mmu,
1166 [MO_BEUL] = helper_be_ldul_mmu,
1167 [MO_BEQ] = helper_be_ldq_mmu,
1168 };
1169
1170 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1171 * uintxx_t val, int mmu_idx, uintptr_t ra)
1172 */
1173 static void * const qemu_st_helpers[16] = {
1174 [MO_UB] = helper_ret_stb_mmu,
1175 [MO_LEUW] = helper_le_stw_mmu,
1176 [MO_LEUL] = helper_le_stl_mmu,
1177 [MO_LEQ] = helper_le_stq_mmu,
1178 [MO_BEUW] = helper_be_stw_mmu,
1179 [MO_BEUL] = helper_be_stl_mmu,
1180 [MO_BEQ] = helper_be_stq_mmu,
1181 };
1182
1183 /* Perform the TLB load and compare.
1184
1185 Inputs:
1186 ADDRLO and ADDRHI contain the low and high part of the address.
1187
1188 MEM_INDEX and S_BITS are the memory context and log2 size of the load.
1189
1190 WHICH is the offset into the CPUTLBEntry structure of the slot to read.
1191 This should be offsetof addr_read or addr_write.
1192
1193 Outputs:
1194 LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
1195 positions of the displacements of forward jumps to the TLB miss case.
1196
1197 Second argument register is loaded with the low part of the address.
1198 In the TLB hit case, it has been adjusted as indicated by the TLB
1199 and so is a host address. In the TLB miss case, it continues to
1200 hold a guest address.
1201
1202 First argument register is clobbered. */
1203
1204 static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
1205 int mem_index, TCGMemOp opc,
1206 tcg_insn_unit **label_ptr, int which)
1207 {
1208 const TCGReg r0 = TCG_REG_L0;
1209 const TCGReg r1 = TCG_REG_L1;
1210 TCGType ttype = TCG_TYPE_I32;
1211 TCGType tlbtype = TCG_TYPE_I32;
1212 int trexw = 0, hrexw = 0, tlbrexw = 0;
1213 unsigned a_bits = get_alignment_bits(opc);
1214 unsigned s_bits = opc & MO_SIZE;
1215 unsigned a_mask = (1 << a_bits) - 1;
1216 unsigned s_mask = (1 << s_bits) - 1;
1217 target_ulong tlb_mask;
1218
1219 if (TCG_TARGET_REG_BITS == 64) {
1220 if (TARGET_LONG_BITS == 64) {
1221 ttype = TCG_TYPE_I64;
1222 trexw = P_REXW;
1223 }
1224 if (TCG_TYPE_PTR == TCG_TYPE_I64) {
1225 hrexw = P_REXW;
1226 if (TARGET_PAGE_BITS + CPU_TLB_BITS > 32) {
1227 tlbtype = TCG_TYPE_I64;
1228 tlbrexw = P_REXW;
1229 }
1230 }
1231 }
1232
1233 tcg_out_mov(s, tlbtype, r0, addrlo);
1234 /* If the required alignment is at least as large as the access, simply
1235 copy the address and mask. For lesser alignments, check that we don't
1236 cross pages for the complete access. */
1237 if (a_bits >= s_bits) {
1238 tcg_out_mov(s, ttype, r1, addrlo);
1239 } else {
1240 tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo, s_mask - a_mask);
1241 }
1242 tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask;
1243
1244 tcg_out_shifti(s, SHIFT_SHR + tlbrexw, r0,
1245 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1246
1247 tgen_arithi(s, ARITH_AND + trexw, r1, tlb_mask, 0);
1248 tgen_arithi(s, ARITH_AND + tlbrexw, r0,
1249 (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
1250
1251 tcg_out_modrm_sib_offset(s, OPC_LEA + hrexw, r0, TCG_AREG0, r0, 0,
1252 offsetof(CPUArchState, tlb_table[mem_index][0])
1253 + which);
1254
1255 /* cmp 0(r0), r1 */
1256 tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, 0);
1257
1258 /* Prepare for both the fast path add of the tlb addend, and the slow
1259 path function argument setup. There are two cases worth note:
1260 For 32-bit guest and x86_64 host, MOVL zero-extends the guest address
1261 before the fastpath ADDQ below. For 64-bit guest and x32 host, MOVQ
1262 copies the entire guest address for the slow path, while truncation
1263 for the 32-bit host happens with the fastpath ADDL below. */
1264 tcg_out_mov(s, ttype, r1, addrlo);
1265
1266 /* jne slow_path */
1267 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
1268 label_ptr[0] = s->code_ptr;
1269 s->code_ptr += 4;
1270
1271 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1272 /* cmp 4(r0), addrhi */
1273 tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, r0, 4);
1274
1275 /* jne slow_path */
1276 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
1277 label_ptr[1] = s->code_ptr;
1278 s->code_ptr += 4;
1279 }
1280
1281 /* TLB Hit. */
1282
1283 /* add addend(r0), r1 */
1284 tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r1, r0,
1285 offsetof(CPUTLBEntry, addend) - which);
1286 }
1287
1288 /*
1289 * Record the context of a call to the out of line helper code for the slow path
1290 * for a load or store, so that we can later generate the correct helper code
1291 */
1292 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1293 TCGReg datalo, TCGReg datahi,
1294 TCGReg addrlo, TCGReg addrhi,
1295 tcg_insn_unit *raddr,
1296 tcg_insn_unit **label_ptr)
1297 {
1298 TCGLabelQemuLdst *label = new_ldst_label(s);
1299
1300 label->is_ld = is_ld;
1301 label->oi = oi;
1302 label->datalo_reg = datalo;
1303 label->datahi_reg = datahi;
1304 label->addrlo_reg = addrlo;
1305 label->addrhi_reg = addrhi;
1306 label->raddr = raddr;
1307 label->label_ptr[0] = label_ptr[0];
1308 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1309 label->label_ptr[1] = label_ptr[1];
1310 }
1311 }
1312
1313 /*
1314 * Generate code for the slow path for a load at the end of block
1315 */
1316 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1317 {
1318 TCGMemOpIdx oi = l->oi;
1319 TCGMemOp opc = get_memop(oi);
1320 TCGReg data_reg;
1321 tcg_insn_unit **label_ptr = &l->label_ptr[0];
1322
1323 /* resolve label address */
1324 tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
1325 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1326 tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
1327 }
1328
1329 if (TCG_TARGET_REG_BITS == 32) {
1330 int ofs = 0;
1331
1332 tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1333 ofs += 4;
1334
1335 tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1336 ofs += 4;
1337
1338 if (TARGET_LONG_BITS == 64) {
1339 tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1340 ofs += 4;
1341 }
1342
1343 tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs);
1344 ofs += 4;
1345
1346 tcg_out_sti(s, TCG_TYPE_PTR, (uintptr_t)l->raddr, TCG_REG_ESP, ofs);
1347 } else {
1348 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1349 /* The second argument is already loaded with addrlo. */
1350 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], oi);
1351 tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3],
1352 (uintptr_t)l->raddr);
1353 }
1354
1355 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1356
1357 data_reg = l->datalo_reg;
1358 switch (opc & MO_SSIZE) {
1359 case MO_SB:
1360 tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
1361 break;
1362 case MO_SW:
1363 tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
1364 break;
1365 #if TCG_TARGET_REG_BITS == 64
1366 case MO_SL:
1367 tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
1368 break;
1369 #endif
1370 case MO_UB:
1371 case MO_UW:
1372 /* Note that the helpers have zero-extended to tcg_target_long. */
1373 case MO_UL:
1374 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1375 break;
1376 case MO_Q:
1377 if (TCG_TARGET_REG_BITS == 64) {
1378 tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
1379 } else if (data_reg == TCG_REG_EDX) {
1380 /* xchg %edx, %eax */
1381 tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
1382 tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX);
1383 } else {
1384 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1385 tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX);
1386 }
1387 break;
1388 default:
1389 tcg_abort();
1390 }
1391
1392 /* Jump to the code corresponding to next IR of qemu_st */
1393 tcg_out_jmp(s, l->raddr);
1394 }
1395
1396 /*
1397 * Generate code for the slow path for a store at the end of block
1398 */
1399 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1400 {
1401 TCGMemOpIdx oi = l->oi;
1402 TCGMemOp opc = get_memop(oi);
1403 TCGMemOp s_bits = opc & MO_SIZE;
1404 tcg_insn_unit **label_ptr = &l->label_ptr[0];
1405 TCGReg retaddr;
1406
1407 /* resolve label address */
1408 tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
1409 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1410 tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
1411 }
1412
1413 if (TCG_TARGET_REG_BITS == 32) {
1414 int ofs = 0;
1415
1416 tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1417 ofs += 4;
1418
1419 tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1420 ofs += 4;
1421
1422 if (TARGET_LONG_BITS == 64) {
1423 tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1424 ofs += 4;
1425 }
1426
1427 tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs);
1428 ofs += 4;
1429
1430 if (s_bits == MO_64) {
1431 tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs);
1432 ofs += 4;
1433 }
1434
1435 tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs);
1436 ofs += 4;
1437
1438 retaddr = TCG_REG_EAX;
1439 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1440 tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, ofs);
1441 } else {
1442 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1443 /* The second argument is already loaded with addrlo. */
1444 tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
1445 tcg_target_call_iarg_regs[2], l->datalo_reg);
1446 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], oi);
1447
1448 if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) {
1449 retaddr = tcg_target_call_iarg_regs[4];
1450 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1451 } else {
1452 retaddr = TCG_REG_RAX;
1453 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1454 tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP,
1455 TCG_TARGET_CALL_STACK_OFFSET);
1456 }
1457 }
1458
1459 /* "Tail call" to the helper, with the return address back inline. */
1460 tcg_out_push(s, retaddr);
1461 tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1462 }
1463 #elif defined(__x86_64__) && defined(__linux__)
1464 # include <asm/prctl.h>
1465 # include <sys/prctl.h>
1466
1467 int arch_prctl(int code, unsigned long addr);
1468
1469 static int guest_base_flags;
1470 static inline void setup_guest_base_seg(void)
1471 {
1472 if (arch_prctl(ARCH_SET_GS, guest_base) == 0) {
1473 guest_base_flags = P_GS;
1474 }
1475 }
1476 #else
1477 # define guest_base_flags 0
1478 static inline void setup_guest_base_seg(void) { }
1479 #endif /* SOFTMMU */
1480
1481 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
1482 TCGReg base, int index, intptr_t ofs,
1483 int seg, TCGMemOp memop)
1484 {
1485 const TCGMemOp real_bswap = memop & MO_BSWAP;
1486 TCGMemOp bswap = real_bswap;
1487 int movop = OPC_MOVL_GvEv;
1488
1489 if (have_movbe && real_bswap) {
1490 bswap = 0;
1491 movop = OPC_MOVBE_GyMy;
1492 }
1493
1494 switch (memop & MO_SSIZE) {
1495 case MO_UB:
1496 tcg_out_modrm_sib_offset(s, OPC_MOVZBL + seg, datalo,
1497 base, index, 0, ofs);
1498 break;
1499 case MO_SB:
1500 tcg_out_modrm_sib_offset(s, OPC_MOVSBL + P_REXW + seg, datalo,
1501 base, index, 0, ofs);
1502 break;
1503 case MO_UW:
1504 tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
1505 base, index, 0, ofs);
1506 if (real_bswap) {
1507 tcg_out_rolw_8(s, datalo);
1508 }
1509 break;
1510 case MO_SW:
1511 if (real_bswap) {
1512 if (have_movbe) {
1513 tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
1514 datalo, base, index, 0, ofs);
1515 } else {
1516 tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
1517 base, index, 0, ofs);
1518 tcg_out_rolw_8(s, datalo);
1519 }
1520 tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
1521 } else {
1522 tcg_out_modrm_sib_offset(s, OPC_MOVSWL + P_REXW + seg,
1523 datalo, base, index, 0, ofs);
1524 }
1525 break;
1526 case MO_UL:
1527 tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs);
1528 if (bswap) {
1529 tcg_out_bswap32(s, datalo);
1530 }
1531 break;
1532 #if TCG_TARGET_REG_BITS == 64
1533 case MO_SL:
1534 if (real_bswap) {
1535 tcg_out_modrm_sib_offset(s, movop + seg, datalo,
1536 base, index, 0, ofs);
1537 if (bswap) {
1538 tcg_out_bswap32(s, datalo);
1539 }
1540 tcg_out_ext32s(s, datalo, datalo);
1541 } else {
1542 tcg_out_modrm_sib_offset(s, OPC_MOVSLQ + seg, datalo,
1543 base, index, 0, ofs);
1544 }
1545 break;
1546 #endif
1547 case MO_Q:
1548 if (TCG_TARGET_REG_BITS == 64) {
1549 tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo,
1550 base, index, 0, ofs);
1551 if (bswap) {
1552 tcg_out_bswap64(s, datalo);
1553 }
1554 } else {
1555 if (real_bswap) {
1556 int t = datalo;
1557 datalo = datahi;
1558 datahi = t;
1559 }
1560 if (base != datalo) {
1561 tcg_out_modrm_sib_offset(s, movop + seg, datalo,
1562 base, index, 0, ofs);
1563 tcg_out_modrm_sib_offset(s, movop + seg, datahi,
1564 base, index, 0, ofs + 4);
1565 } else {
1566 tcg_out_modrm_sib_offset(s, movop + seg, datahi,
1567 base, index, 0, ofs + 4);
1568 tcg_out_modrm_sib_offset(s, movop + seg, datalo,
1569 base, index, 0, ofs);
1570 }
1571 if (bswap) {
1572 tcg_out_bswap32(s, datalo);
1573 tcg_out_bswap32(s, datahi);
1574 }
1575 }
1576 break;
1577 default:
1578 tcg_abort();
1579 }
1580 }
1581
1582 /* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
1583 EAX. It will be useful once fixed registers globals are less
1584 common. */
1585 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
1586 {
1587 TCGReg datalo, datahi, addrlo;
1588 TCGReg addrhi __attribute__((unused));
1589 TCGMemOpIdx oi;
1590 TCGMemOp opc;
1591 #if defined(CONFIG_SOFTMMU)
1592 int mem_index;
1593 tcg_insn_unit *label_ptr[2];
1594 #endif
1595
1596 datalo = *args++;
1597 datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
1598 addrlo = *args++;
1599 addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
1600 oi = *args++;
1601 opc = get_memop(oi);
1602
1603 #if defined(CONFIG_SOFTMMU)
1604 mem_index = get_mmuidx(oi);
1605
1606 tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
1607 label_ptr, offsetof(CPUTLBEntry, addr_read));
1608
1609 /* TLB Hit. */
1610 tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, opc);
1611
1612 /* Record the current context of a load into ldst label */
1613 add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
1614 s->code_ptr, label_ptr);
1615 #else
1616 {
1617 int32_t offset = guest_base;
1618 TCGReg base = addrlo;
1619 int index = -1;
1620 int seg = 0;
1621
1622 /* For a 32-bit guest, the high 32 bits may contain garbage.
1623 We can do this with the ADDR32 prefix if we're not using
1624 a guest base, or when using segmentation. Otherwise we
1625 need to zero-extend manually. */
1626 if (guest_base == 0 || guest_base_flags) {
1627 seg = guest_base_flags;
1628 offset = 0;
1629 if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
1630 seg |= P_ADDR32;
1631 }
1632 } else if (TCG_TARGET_REG_BITS == 64) {
1633 if (TARGET_LONG_BITS == 32) {
1634 tcg_out_ext32u(s, TCG_REG_L0, base);
1635 base = TCG_REG_L0;
1636 }
1637 if (offset != guest_base) {
1638 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base);
1639 index = TCG_REG_L1;
1640 offset = 0;
1641 }
1642 }
1643
1644 tcg_out_qemu_ld_direct(s, datalo, datahi,
1645 base, index, offset, seg, opc);
1646 }
1647 #endif
1648 }
1649
1650 static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
1651 TCGReg base, intptr_t ofs, int seg,
1652 TCGMemOp memop)
1653 {
1654 /* ??? Ideally we wouldn't need a scratch register. For user-only,
1655 we could perform the bswap twice to restore the original value
1656 instead of moving to the scratch. But as it is, the L constraint
1657 means that TCG_REG_L0 is definitely free here. */
1658 const TCGReg scratch = TCG_REG_L0;
1659 const TCGMemOp real_bswap = memop & MO_BSWAP;
1660 TCGMemOp bswap = real_bswap;
1661 int movop = OPC_MOVL_EvGv;
1662
1663 if (have_movbe && real_bswap) {
1664 bswap = 0;
1665 movop = OPC_MOVBE_MyGy;
1666 }
1667
1668 switch (memop & MO_SIZE) {
1669 case MO_8:
1670 /* In 32-bit mode, 8-bit stores can only happen from [abcd]x.
1671 Use the scratch register if necessary. */
1672 if (TCG_TARGET_REG_BITS == 32 && datalo >= 4) {
1673 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1674 datalo = scratch;
1675 }
1676 tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
1677 datalo, base, ofs);
1678 break;
1679 case MO_16:
1680 if (bswap) {
1681 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1682 tcg_out_rolw_8(s, scratch);
1683 datalo = scratch;
1684 }
1685 tcg_out_modrm_offset(s, movop + P_DATA16 + seg, datalo, base, ofs);
1686 break;
1687 case MO_32:
1688 if (bswap) {
1689 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1690 tcg_out_bswap32(s, scratch);
1691 datalo = scratch;
1692 }
1693 tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
1694 break;
1695 case MO_64:
1696 if (TCG_TARGET_REG_BITS == 64) {
1697 if (bswap) {
1698 tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
1699 tcg_out_bswap64(s, scratch);
1700 datalo = scratch;
1701 }
1702 tcg_out_modrm_offset(s, movop + P_REXW + seg, datalo, base, ofs);
1703 } else if (bswap) {
1704 tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
1705 tcg_out_bswap32(s, scratch);
1706 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs);
1707 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1708 tcg_out_bswap32(s, scratch);
1709 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4);
1710 } else {
1711 if (real_bswap) {
1712 int t = datalo;
1713 datalo = datahi;
1714 datahi = t;
1715 }
1716 tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
1717 tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs+4);
1718 }
1719 break;
1720 default:
1721 tcg_abort();
1722 }
1723 }
1724
1725 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
1726 {
1727 TCGReg datalo, datahi, addrlo;
1728 TCGReg addrhi __attribute__((unused));
1729 TCGMemOpIdx oi;
1730 TCGMemOp opc;
1731 #if defined(CONFIG_SOFTMMU)
1732 int mem_index;
1733 tcg_insn_unit *label_ptr[2];
1734 #endif
1735
1736 datalo = *args++;
1737 datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
1738 addrlo = *args++;
1739 addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
1740 oi = *args++;
1741 opc = get_memop(oi);
1742
1743 #if defined(CONFIG_SOFTMMU)
1744 mem_index = get_mmuidx(oi);
1745
1746 tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
1747 label_ptr, offsetof(CPUTLBEntry, addr_write));
1748
1749 /* TLB Hit. */
1750 tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
1751
1752 /* Record the current context of a store into ldst label */
1753 add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
1754 s->code_ptr, label_ptr);
1755 #else
1756 {
1757 int32_t offset = guest_base;
1758 TCGReg base = addrlo;
1759 int seg = 0;
1760
1761 /* See comment in tcg_out_qemu_ld re zero-extension of addrlo. */
1762 if (guest_base == 0 || guest_base_flags) {
1763 seg = guest_base_flags;
1764 offset = 0;
1765 if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
1766 seg |= P_ADDR32;
1767 }
1768 } else if (TCG_TARGET_REG_BITS == 64) {
1769 /* ??? Note that we can't use the same SIB addressing scheme
1770 as for loads, since we require L0 free for bswap. */
1771 if (offset != guest_base) {
1772 if (TARGET_LONG_BITS == 32) {
1773 tcg_out_ext32u(s, TCG_REG_L0, base);
1774 base = TCG_REG_L0;
1775 }
1776 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base);
1777 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
1778 base = TCG_REG_L1;
1779 offset = 0;
1780 } else if (TARGET_LONG_BITS == 32) {
1781 tcg_out_ext32u(s, TCG_REG_L1, base);
1782 base = TCG_REG_L1;
1783 }
1784 }
1785
1786 tcg_out_qemu_st_direct(s, datalo, datahi, base, offset, seg, opc);
1787 }
1788 #endif
1789 }
1790
1791 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1792 const TCGArg *args, const int *const_args)
1793 {
1794 int c, vexop, rexw = 0;
1795
1796 #if TCG_TARGET_REG_BITS == 64
1797 # define OP_32_64(x) \
1798 case glue(glue(INDEX_op_, x), _i64): \
1799 rexw = P_REXW; /* FALLTHRU */ \
1800 case glue(glue(INDEX_op_, x), _i32)
1801 #else
1802 # define OP_32_64(x) \
1803 case glue(glue(INDEX_op_, x), _i32)
1804 #endif
1805
1806 switch(opc) {
1807 case INDEX_op_exit_tb:
1808 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]);
1809 tcg_out_jmp(s, tb_ret_addr);
1810 break;
1811 case INDEX_op_goto_tb:
1812 if (s->tb_jmp_insn_offset) {
1813 /* direct jump method */
1814 int gap;
1815 /* jump displacement must be aligned for atomic patching;
1816 * see if we need to add extra nops before jump
1817 */
1818 gap = tcg_pcrel_diff(s, QEMU_ALIGN_PTR_UP(s->code_ptr + 1, 4));
1819 if (gap != 1) {
1820 tcg_out_nopn(s, gap - 1);
1821 }
1822 tcg_out8(s, OPC_JMP_long); /* jmp im */
1823 s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
1824 tcg_out32(s, 0);
1825 } else {
1826 /* indirect jump method */
1827 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
1828 (intptr_t)(s->tb_jmp_target_addr + args[0]));
1829 }
1830 s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s);
1831 break;
1832 case INDEX_op_br:
1833 tcg_out_jxx(s, JCC_JMP, arg_label(args[0]), 0);
1834 break;
1835 OP_32_64(ld8u):
1836 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1837 tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
1838 break;
1839 OP_32_64(ld8s):
1840 tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]);
1841 break;
1842 OP_32_64(ld16u):
1843 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1844 tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
1845 break;
1846 OP_32_64(ld16s):
1847 tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]);
1848 break;
1849 #if TCG_TARGET_REG_BITS == 64
1850 case INDEX_op_ld32u_i64:
1851 #endif
1852 case INDEX_op_ld_i32:
1853 tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1854 break;
1855
1856 OP_32_64(st8):
1857 if (const_args[0]) {
1858 tcg_out_modrm_offset(s, OPC_MOVB_EvIz,
1859 0, args[1], args[2]);
1860 tcg_out8(s, args[0]);
1861 } else {
1862 tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
1863 args[0], args[1], args[2]);
1864 }
1865 break;
1866 OP_32_64(st16):
1867 if (const_args[0]) {
1868 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16,
1869 0, args[1], args[2]);
1870 tcg_out16(s, args[0]);
1871 } else {
1872 tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
1873 args[0], args[1], args[2]);
1874 }
1875 break;
1876 #if TCG_TARGET_REG_BITS == 64
1877 case INDEX_op_st32_i64:
1878 #endif
1879 case INDEX_op_st_i32:
1880 if (const_args[0]) {
1881 tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, args[1], args[2]);
1882 tcg_out32(s, args[0]);
1883 } else {
1884 tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1885 }
1886 break;
1887
1888 OP_32_64(add):
1889 /* For 3-operand addition, use LEA. */
1890 if (args[0] != args[1]) {
1891 TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
1892
1893 if (const_args[2]) {
1894 c3 = a2, a2 = -1;
1895 } else if (a0 == a2) {
1896 /* Watch out for dest = src + dest, since we've removed
1897 the matching constraint on the add. */
1898 tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
1899 break;
1900 }
1901
1902 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
1903 break;
1904 }
1905 c = ARITH_ADD;
1906 goto gen_arith;
1907 OP_32_64(sub):
1908 c = ARITH_SUB;
1909 goto gen_arith;
1910 OP_32_64(and):
1911 c = ARITH_AND;
1912 goto gen_arith;
1913 OP_32_64(or):
1914 c = ARITH_OR;
1915 goto gen_arith;
1916 OP_32_64(xor):
1917 c = ARITH_XOR;
1918 goto gen_arith;
1919 gen_arith:
1920 if (const_args[2]) {
1921 tgen_arithi(s, c + rexw, args[0], args[2], 0);
1922 } else {
1923 tgen_arithr(s, c + rexw, args[0], args[2]);
1924 }
1925 break;
1926
1927 OP_32_64(andc):
1928 if (const_args[2]) {
1929 tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32,
1930 args[0], args[1]);
1931 tgen_arithi(s, ARITH_AND + rexw, args[0], ~args[2], 0);
1932 } else {
1933 tcg_out_vex_modrm(s, OPC_ANDN + rexw, args[0], args[2], args[1]);
1934 }
1935 break;
1936
1937 OP_32_64(mul):
1938 if (const_args[2]) {
1939 int32_t val;
1940 val = args[2];
1941 if (val == (int8_t)val) {
1942 tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]);
1943 tcg_out8(s, val);
1944 } else {
1945 tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]);
1946 tcg_out32(s, val);
1947 }
1948 } else {
1949 tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]);
1950 }
1951 break;
1952
1953 OP_32_64(div2):
1954 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
1955 break;
1956 OP_32_64(divu2):
1957 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
1958 break;
1959
1960 OP_32_64(shl):
1961 c = SHIFT_SHL;
1962 vexop = OPC_SHLX;
1963 goto gen_shift_maybe_vex;
1964 OP_32_64(shr):
1965 c = SHIFT_SHR;
1966 vexop = OPC_SHRX;
1967 goto gen_shift_maybe_vex;
1968 OP_32_64(sar):
1969 c = SHIFT_SAR;
1970 vexop = OPC_SARX;
1971 goto gen_shift_maybe_vex;
1972 OP_32_64(rotl):
1973 c = SHIFT_ROL;
1974 goto gen_shift;
1975 OP_32_64(rotr):
1976 c = SHIFT_ROR;
1977 goto gen_shift;
1978 gen_shift_maybe_vex:
1979 if (have_bmi2 && !const_args[2]) {
1980 tcg_out_vex_modrm(s, vexop + rexw, args[0], args[2], args[1]);
1981 break;
1982 }
1983 /* FALLTHRU */
1984 gen_shift:
1985 if (const_args[2]) {
1986 tcg_out_shifti(s, c + rexw, args[0], args[2]);
1987 } else {
1988 tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]);
1989 }
1990 break;
1991
1992 case INDEX_op_brcond_i32:
1993 tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
1994 arg_label(args[3]), 0);
1995 break;
1996 case INDEX_op_setcond_i32:
1997 tcg_out_setcond32(s, args[3], args[0], args[1],
1998 args[2], const_args[2]);
1999 break;
2000 case INDEX_op_movcond_i32:
2001 tcg_out_movcond32(s, args[5], args[0], args[1],
2002 args[2], const_args[2], args[3]);
2003 break;
2004
2005 OP_32_64(bswap16):
2006 tcg_out_rolw_8(s, args[0]);
2007 break;
2008 OP_32_64(bswap32):
2009 tcg_out_bswap32(s, args[0]);
2010 break;
2011
2012 OP_32_64(neg):
2013 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]);
2014 break;
2015 OP_32_64(not):
2016 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]);
2017 break;
2018
2019 OP_32_64(ext8s):
2020 tcg_out_ext8s(s, args[0], args[1], rexw);
2021 break;
2022 OP_32_64(ext16s):
2023 tcg_out_ext16s(s, args[0], args[1], rexw);
2024 break;
2025 OP_32_64(ext8u):
2026 tcg_out_ext8u(s, args[0], args[1]);
2027 break;
2028 OP_32_64(ext16u):
2029 tcg_out_ext16u(s, args[0], args[1]);
2030 break;
2031
2032 case INDEX_op_qemu_ld_i32:
2033 tcg_out_qemu_ld(s, args, 0);
2034 break;
2035 case INDEX_op_qemu_ld_i64:
2036 tcg_out_qemu_ld(s, args, 1);
2037 break;
2038 case INDEX_op_qemu_st_i32:
2039 tcg_out_qemu_st(s, args, 0);
2040 break;
2041 case INDEX_op_qemu_st_i64:
2042 tcg_out_qemu_st(s, args, 1);
2043 break;
2044
2045 OP_32_64(mulu2):
2046 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]);
2047 break;
2048 OP_32_64(muls2):
2049 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]);
2050 break;
2051 OP_32_64(add2):
2052 if (const_args[4]) {
2053 tgen_arithi(s, ARITH_ADD + rexw, args[0], args[4], 1);
2054 } else {
2055 tgen_arithr(s, ARITH_ADD + rexw, args[0], args[4]);
2056 }
2057 if (const_args[5]) {
2058 tgen_arithi(s, ARITH_ADC + rexw, args[1], args[5], 1);
2059 } else {
2060 tgen_arithr(s, ARITH_ADC + rexw, args[1], args[5]);
2061 }
2062 break;
2063 OP_32_64(sub2):
2064 if (const_args[4]) {
2065 tgen_arithi(s, ARITH_SUB + rexw, args[0], args[4], 1);
2066 } else {
2067 tgen_arithr(s, ARITH_SUB + rexw, args[0], args[4]);
2068 }
2069 if (const_args[5]) {
2070 tgen_arithi(s, ARITH_SBB + rexw, args[1], args[5], 1);
2071 } else {
2072 tgen_arithr(s, ARITH_SBB + rexw, args[1], args[5]);
2073 }
2074 break;
2075
2076 #if TCG_TARGET_REG_BITS == 32
2077 case INDEX_op_brcond2_i32:
2078 tcg_out_brcond2(s, args, const_args, 0);
2079 break;
2080 case INDEX_op_setcond2_i32:
2081 tcg_out_setcond2(s, args, const_args);
2082 break;
2083 #else /* TCG_TARGET_REG_BITS == 64 */
2084 case INDEX_op_ld32s_i64:
2085 tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]);
2086 break;
2087 case INDEX_op_ld_i64:
2088 tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2089 break;
2090 case INDEX_op_st_i64:
2091 if (const_args[0]) {
2092 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW,
2093 0, args[1], args[2]);
2094 tcg_out32(s, args[0]);
2095 } else {
2096 tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2097 }
2098 break;
2099
2100 case INDEX_op_brcond_i64:
2101 tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1],
2102 arg_label(args[3]), 0);
2103 break;
2104 case INDEX_op_setcond_i64:
2105 tcg_out_setcond64(s, args[3], args[0], args[1],
2106 args[2], const_args[2]);
2107 break;
2108 case INDEX_op_movcond_i64:
2109 tcg_out_movcond64(s, args[5], args[0], args[1],
2110 args[2], const_args[2], args[3]);
2111 break;
2112
2113 case INDEX_op_bswap64_i64:
2114 tcg_out_bswap64(s, args[0]);
2115 break;
2116 case INDEX_op_extu_i32_i64:
2117 case INDEX_op_ext32u_i64:
2118 tcg_out_ext32u(s, args[0], args[1]);
2119 break;
2120 case INDEX_op_ext_i32_i64:
2121 case INDEX_op_ext32s_i64:
2122 tcg_out_ext32s(s, args[0], args[1]);
2123 break;
2124 #endif
2125
2126 OP_32_64(deposit):
2127 if (args[3] == 0 && args[4] == 8) {
2128 /* load bits 0..7 */
2129 tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM,
2130 args[2], args[0]);
2131 } else if (args[3] == 8 && args[4] == 8) {
2132 /* load bits 8..15 */
2133 tcg_out_modrm(s, OPC_MOVB_EvGv, args[2], args[0] + 4);
2134 } else if (args[3] == 0 && args[4] == 16) {
2135 /* load bits 0..15 */
2136 tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, args[2], args[0]);
2137 } else {
2138 tcg_abort();
2139 }
2140 break;
2141
2142 case INDEX_op_extract_i64:
2143 if (args[2] + args[3] == 32) {
2144 /* This is a 32-bit zero-extending right shift. */
2145 tcg_out_mov(s, TCG_TYPE_I32, args[0], args[1]);
2146 tcg_out_shifti(s, SHIFT_SHR, args[0], args[2]);
2147 break;
2148 }
2149 /* FALLTHRU */
2150 case INDEX_op_extract_i32:
2151 /* On the off-chance that we can use the high-byte registers.
2152 Otherwise we emit the same ext16 + shift pattern that we
2153 would have gotten from the normal tcg-op.c expansion. */
2154 tcg_debug_assert(args[2] == 8 && args[3] == 8);
2155 if (args[1] < 4 && args[0] < 8) {
2156 tcg_out_modrm(s, OPC_MOVZBL, args[0], args[1] + 4);
2157 } else {
2158 tcg_out_ext16u(s, args[0], args[1]);
2159 tcg_out_shifti(s, SHIFT_SHR, args[0], 8);
2160 }
2161 break;
2162
2163 case INDEX_op_sextract_i32:
2164 /* We don't implement sextract_i64, as we cannot sign-extend to
2165 64-bits without using the REX prefix that explicitly excludes
2166 access to the high-byte registers. */
2167 tcg_debug_assert(args[2] == 8 && args[3] == 8);
2168 if (args[1] < 4 && args[0] < 8) {
2169 tcg_out_modrm(s, OPC_MOVSBL, args[0], args[1] + 4);
2170 } else {
2171 tcg_out_ext16s(s, args[0], args[1], 0);
2172 tcg_out_shifti(s, SHIFT_SAR, args[0], 8);
2173 }
2174 break;
2175
2176 case INDEX_op_mb:
2177 tcg_out_mb(s, args[0]);
2178 break;
2179 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
2180 case INDEX_op_mov_i64:
2181 case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
2182 case INDEX_op_movi_i64:
2183 case INDEX_op_call: /* Always emitted via tcg_out_call. */
2184 default:
2185 tcg_abort();
2186 }
2187
2188 #undef OP_32_64
2189 }
2190
2191 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
2192 {
2193 static const TCGTargetOpDef ri_r = { .args_ct_str = { "ri", "r" } };
2194 static const TCGTargetOpDef re_r = { .args_ct_str = { "re", "r" } };
2195 static const TCGTargetOpDef qi_r = { .args_ct_str = { "qi", "r" } };
2196 static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
2197 static const TCGTargetOpDef r_q = { .args_ct_str = { "r", "q" } };
2198 static const TCGTargetOpDef r_re = { .args_ct_str = { "r", "re" } };
2199 static const TCGTargetOpDef r_0 = { .args_ct_str = { "r", "0" } };
2200 static const TCGTargetOpDef r_r_re = { .args_ct_str = { "r", "r", "re" } };
2201 static const TCGTargetOpDef r_0_re = { .args_ct_str = { "r", "0", "re" } };
2202 static const TCGTargetOpDef r_0_Ci = { .args_ct_str = { "r", "0", "Ci" } };
2203 static const TCGTargetOpDef r_0_ci = { .args_ct_str = { "r", "0", "ci" } };
2204 static const TCGTargetOpDef r_L = { .args_ct_str = { "r", "L" } };
2205 static const TCGTargetOpDef L_L = { .args_ct_str = { "L", "L" } };
2206 static const TCGTargetOpDef r_L_L = { .args_ct_str = { "r", "L", "L" } };
2207 static const TCGTargetOpDef r_r_L = { .args_ct_str = { "r", "r", "L" } };
2208 static const TCGTargetOpDef L_L_L = { .args_ct_str = { "L", "L", "L" } };
2209 static const TCGTargetOpDef r_r_L_L
2210 = { .args_ct_str = { "r", "r", "L", "L" } };
2211 static const TCGTargetOpDef L_L_L_L
2212 = { .args_ct_str = { "L", "L", "L", "L" } };
2213
2214 switch (op) {
2215 case INDEX_op_ld8u_i32:
2216 case INDEX_op_ld8u_i64:
2217 case INDEX_op_ld8s_i32:
2218 case INDEX_op_ld8s_i64:
2219 case INDEX_op_ld16u_i32:
2220 case INDEX_op_ld16u_i64:
2221 case INDEX_op_ld16s_i32:
2222 case INDEX_op_ld16s_i64:
2223 case INDEX_op_ld_i32:
2224 case INDEX_op_ld32u_i64:
2225 case INDEX_op_ld32s_i64:
2226 case INDEX_op_ld_i64:
2227 return &r_r;
2228
2229 case INDEX_op_st8_i32:
2230 case INDEX_op_st8_i64:
2231 return &qi_r;
2232 case INDEX_op_st16_i32:
2233 case INDEX_op_st16_i64:
2234 case INDEX_op_st_i32:
2235 case INDEX_op_st32_i64:
2236 return &ri_r;
2237 case INDEX_op_st_i64:
2238 return &re_r;
2239
2240 case INDEX_op_add_i32:
2241 case INDEX_op_add_i64:
2242 return &r_r_re;
2243 case INDEX_op_sub_i32:
2244 case INDEX_op_sub_i64:
2245 case INDEX_op_mul_i32:
2246 case INDEX_op_mul_i64:
2247 case INDEX_op_or_i32:
2248 case INDEX_op_or_i64:
2249 case INDEX_op_xor_i32:
2250 case INDEX_op_xor_i64:
2251 return &r_0_re;
2252
2253 case INDEX_op_and_i32:
2254 case INDEX_op_and_i64:
2255 {
2256 static const TCGTargetOpDef and
2257 = { .args_ct_str = { "r", "0", "reZ" } };
2258 return &and;
2259 }
2260 break;
2261 case INDEX_op_andc_i32:
2262 case INDEX_op_andc_i64:
2263 {
2264 static const TCGTargetOpDef andc
2265 = { .args_ct_str = { "r", "r", "rI" } };
2266 return &andc;
2267 }
2268 break;
2269
2270 case INDEX_op_shl_i32:
2271 case INDEX_op_shl_i64:
2272 case INDEX_op_shr_i32:
2273 case INDEX_op_shr_i64:
2274 case INDEX_op_sar_i32:
2275 case INDEX_op_sar_i64:
2276 return &r_0_Ci;
2277 case INDEX_op_rotl_i32:
2278 case INDEX_op_rotl_i64:
2279 case INDEX_op_rotr_i32:
2280 case INDEX_op_rotr_i64:
2281 return &r_0_ci;
2282
2283 case INDEX_op_brcond_i32:
2284 case INDEX_op_brcond_i64:
2285 return &r_re;
2286
2287 case INDEX_op_bswap16_i32:
2288 case INDEX_op_bswap16_i64:
2289 case INDEX_op_bswap32_i32:
2290 case INDEX_op_bswap32_i64:
2291 case INDEX_op_bswap64_i64:
2292 case INDEX_op_neg_i32:
2293 case INDEX_op_neg_i64:
2294 case INDEX_op_not_i32:
2295 case INDEX_op_not_i64:
2296 return &r_0;
2297
2298 case INDEX_op_ext8s_i32:
2299 case INDEX_op_ext8s_i64:
2300 case INDEX_op_ext8u_i32:
2301 case INDEX_op_ext8u_i64:
2302 return &r_q;
2303 case INDEX_op_ext16s_i32:
2304 case INDEX_op_ext16s_i64:
2305 case INDEX_op_ext16u_i32:
2306 case INDEX_op_ext16u_i64:
2307 case INDEX_op_ext32s_i64:
2308 case INDEX_op_ext32u_i64:
2309 case INDEX_op_ext_i32_i64:
2310 case INDEX_op_extu_i32_i64:
2311 case INDEX_op_extract_i32:
2312 case INDEX_op_extract_i64:
2313 case INDEX_op_sextract_i32:
2314 return &r_r;
2315
2316 case INDEX_op_deposit_i32:
2317 case INDEX_op_deposit_i64:
2318 {
2319 static const TCGTargetOpDef dep
2320 = { .args_ct_str = { "Q", "0", "Q" } };
2321 return &dep;
2322 }
2323 case INDEX_op_setcond_i32:
2324 case INDEX_op_setcond_i64:
2325 {
2326 static const TCGTargetOpDef setc
2327 = { .args_ct_str = { "q", "r", "re" } };
2328 return &setc;
2329 }
2330 case INDEX_op_movcond_i32:
2331 case INDEX_op_movcond_i64:
2332 {
2333 static const TCGTargetOpDef movc
2334 = { .args_ct_str = { "r", "r", "re", "r", "0" } };
2335 return &movc;
2336 }
2337 case INDEX_op_div2_i32:
2338 case INDEX_op_div2_i64:
2339 case INDEX_op_divu2_i32:
2340 case INDEX_op_divu2_i64:
2341 {
2342 static const TCGTargetOpDef div2
2343 = { .args_ct_str = { "a", "d", "0", "1", "r" } };
2344 return &div2;
2345 }
2346 case INDEX_op_mulu2_i32:
2347 case INDEX_op_mulu2_i64:
2348 case INDEX_op_muls2_i32:
2349 case INDEX_op_muls2_i64:
2350 {
2351 static const TCGTargetOpDef mul2
2352 = { .args_ct_str = { "a", "d", "a", "r" } };
2353 return &mul2;
2354 }
2355 case INDEX_op_add2_i32:
2356 case INDEX_op_add2_i64:
2357 case INDEX_op_sub2_i32:
2358 case INDEX_op_sub2_i64:
2359 {
2360 static const TCGTargetOpDef arith2
2361 = { .args_ct_str = { "r", "r", "0", "1", "re", "re" } };
2362 return &arith2;
2363 }
2364
2365 case INDEX_op_qemu_ld_i32:
2366 return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &r_L : &r_L_L;
2367 case INDEX_op_qemu_st_i32:
2368 return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &L_L : &L_L_L;
2369 case INDEX_op_qemu_ld_i64:
2370 return (TCG_TARGET_REG_BITS == 64 ? &r_L
2371 : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &r_r_L
2372 : &r_r_L_L);
2373 case INDEX_op_qemu_st_i64:
2374 return (TCG_TARGET_REG_BITS == 64 ? &L_L
2375 : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &L_L_L
2376 : &L_L_L_L);
2377
2378 case INDEX_op_brcond2_i32:
2379 {
2380 static const TCGTargetOpDef b2
2381 = { .args_ct_str = { "r", "r", "ri", "ri" } };
2382 return &b2;
2383 }
2384 case INDEX_op_setcond2_i32:
2385 {
2386 static const TCGTargetOpDef s2
2387 = { .args_ct_str = { "r", "r", "r", "ri", "ri" } };
2388 return &s2;
2389 }
2390
2391 default:
2392 break;
2393 }
2394 return NULL;
2395 }
2396
2397 static int tcg_target_callee_save_regs[] = {
2398 #if TCG_TARGET_REG_BITS == 64
2399 TCG_REG_RBP,
2400 TCG_REG_RBX,
2401 #if defined(_WIN64)
2402 TCG_REG_RDI,
2403 TCG_REG_RSI,
2404 #endif
2405 TCG_REG_R12,
2406 TCG_REG_R13,
2407 TCG_REG_R14, /* Currently used for the global env. */
2408 TCG_REG_R15,
2409 #else
2410 TCG_REG_EBP, /* Currently used for the global env. */
2411 TCG_REG_EBX,
2412 TCG_REG_ESI,
2413 TCG_REG_EDI,
2414 #endif
2415 };
2416
2417 /* Compute frame size via macros, to share between tcg_target_qemu_prologue
2418 and tcg_register_jit. */
2419
2420 #define PUSH_SIZE \
2421 ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \
2422 * (TCG_TARGET_REG_BITS / 8))
2423
2424 #define FRAME_SIZE \
2425 ((PUSH_SIZE \
2426 + TCG_STATIC_CALL_ARGS_SIZE \
2427 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2428 + TCG_TARGET_STACK_ALIGN - 1) \
2429 & ~(TCG_TARGET_STACK_ALIGN - 1))
2430
2431 /* Generate global QEMU prologue and epilogue code */
2432 static void tcg_target_qemu_prologue(TCGContext *s)
2433 {
2434 int i, stack_addend;
2435
2436 /* TB prologue */
2437
2438 /* Reserve some stack space, also for TCG temps. */
2439 stack_addend = FRAME_SIZE - PUSH_SIZE;
2440 tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
2441 CPU_TEMP_BUF_NLONGS * sizeof(long));
2442
2443 /* Save all callee saved registers. */
2444 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
2445 tcg_out_push(s, tcg_target_callee_save_regs[i]);
2446 }
2447
2448 #if TCG_TARGET_REG_BITS == 32
2449 tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
2450 (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
2451 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2452 /* jmp *tb. */
2453 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP,
2454 (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4
2455 + stack_addend);
2456 #else
2457 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2458 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2459 /* jmp *tb. */
2460 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
2461 #endif
2462
2463 /* TB epilogue */
2464 tb_ret_addr = s->code_ptr;
2465
2466 tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend);
2467
2468 for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
2469 tcg_out_pop(s, tcg_target_callee_save_regs[i]);
2470 }
2471 tcg_out_opc(s, OPC_RET, 0, 0, 0);
2472
2473 #if !defined(CONFIG_SOFTMMU)
2474 /* Try to set up a segment register to point to guest_base. */
2475 if (guest_base) {
2476 setup_guest_base_seg();
2477 }
2478 #endif
2479 }
2480
2481 static void tcg_target_init(TCGContext *s)
2482 {
2483 #ifdef CONFIG_CPUID_H
2484 unsigned a, b, c, d;
2485 int max = __get_cpuid_max(0, 0);
2486
2487 if (max >= 1) {
2488 __cpuid(1, a, b, c, d);
2489 #ifndef have_cmov
2490 /* For 32-bit, 99% certainty that we're running on hardware that
2491 supports cmov, but we still need to check. In case cmov is not
2492 available, we'll use a small forward branch. */
2493 have_cmov = (d & bit_CMOV) != 0;
2494 #endif
2495 #ifndef have_movbe
2496 /* MOVBE is only available on Intel Atom and Haswell CPUs, so we
2497 need to probe for it. */
2498 have_movbe = (c & bit_MOVBE) != 0;
2499 #endif
2500 }
2501
2502 if (max >= 7) {
2503 /* BMI1 is available on AMD Piledriver and Intel Haswell CPUs. */
2504 __cpuid_count(7, 0, a, b, c, d);
2505 #ifdef bit_BMI
2506 have_bmi1 = (b & bit_BMI) != 0;
2507 #endif
2508 #ifndef have_bmi2
2509 have_bmi2 = (b & bit_BMI2) != 0;
2510 #endif
2511 }
2512 #endif
2513
2514 if (TCG_TARGET_REG_BITS == 64) {
2515 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
2516 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
2517 } else {
2518 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
2519 }
2520
2521 tcg_regset_clear(tcg_target_call_clobber_regs);
2522 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
2523 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
2524 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
2525 if (TCG_TARGET_REG_BITS == 64) {
2526 #if !defined(_WIN64)
2527 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
2528 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
2529 #endif
2530 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
2531 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
2532 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
2533 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
2534 }
2535
2536 tcg_regset_clear(s->reserved_regs);
2537 tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
2538 }
2539
2540 typedef struct {
2541 DebugFrameHeader h;
2542 uint8_t fde_def_cfa[4];
2543 uint8_t fde_reg_ofs[14];
2544 } DebugFrame;
2545
2546 /* We're expecting a 2 byte uleb128 encoded value. */
2547 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2548
2549 #if !defined(__ELF__)
2550 /* Host machine without ELF. */
2551 #elif TCG_TARGET_REG_BITS == 64
2552 #define ELF_HOST_MACHINE EM_X86_64
2553 static const DebugFrame debug_frame = {
2554 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2555 .h.cie.id = -1,
2556 .h.cie.version = 1,
2557 .h.cie.code_align = 1,
2558 .h.cie.data_align = 0x78, /* sleb128 -8 */
2559 .h.cie.return_column = 16,
2560
2561 /* Total FDE size does not include the "len" member. */
2562 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2563
2564 .fde_def_cfa = {
2565 12, 7, /* DW_CFA_def_cfa %rsp, ... */
2566 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2567 (FRAME_SIZE >> 7)
2568 },
2569 .fde_reg_ofs = {
2570 0x90, 1, /* DW_CFA_offset, %rip, -8 */
2571 /* The following ordering must match tcg_target_callee_save_regs. */
2572 0x86, 2, /* DW_CFA_offset, %rbp, -16 */
2573 0x83, 3, /* DW_CFA_offset, %rbx, -24 */
2574 0x8c, 4, /* DW_CFA_offset, %r12, -32 */
2575 0x8d, 5, /* DW_CFA_offset, %r13, -40 */
2576 0x8e, 6, /* DW_CFA_offset, %r14, -48 */
2577 0x8f, 7, /* DW_CFA_offset, %r15, -56 */
2578 }
2579 };
2580 #else
2581 #define ELF_HOST_MACHINE EM_386
2582 static const DebugFrame debug_frame = {
2583 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2584 .h.cie.id = -1,
2585 .h.cie.version = 1,
2586 .h.cie.code_align = 1,
2587 .h.cie.data_align = 0x7c, /* sleb128 -4 */
2588 .h.cie.return_column = 8,
2589
2590 /* Total FDE size does not include the "len" member. */
2591 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2592
2593 .fde_def_cfa = {
2594 12, 4, /* DW_CFA_def_cfa %esp, ... */
2595 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2596 (FRAME_SIZE >> 7)
2597 },
2598 .fde_reg_ofs = {
2599 0x88, 1, /* DW_CFA_offset, %eip, -4 */
2600 /* The following ordering must match tcg_target_callee_save_regs. */
2601 0x85, 2, /* DW_CFA_offset, %ebp, -8 */
2602 0x83, 3, /* DW_CFA_offset, %ebx, -12 */
2603 0x86, 4, /* DW_CFA_offset, %esi, -16 */
2604 0x87, 5, /* DW_CFA_offset, %edi, -20 */
2605 }
2606 };
2607 #endif
2608
2609 #if defined(ELF_HOST_MACHINE)
2610 void tcg_register_jit(void *buf, size_t buf_size)
2611 {
2612 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2613 }
2614 #endif