]> git.proxmox.com Git - mirror_qemu.git/blob - tcg/i386/tcg-target.c
Merge remote-tracking branch 'mst/tags/for_anthony' into stable-1.5
[mirror_qemu.git] / tcg / i386 / tcg-target.c
1 /*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 #ifndef NDEBUG
26 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
27 #if TCG_TARGET_REG_BITS == 64
28 "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
29 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
30 #else
31 "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
32 #endif
33 };
34 #endif
35
36 static const int tcg_target_reg_alloc_order[] = {
37 #if TCG_TARGET_REG_BITS == 64
38 TCG_REG_RBP,
39 TCG_REG_RBX,
40 TCG_REG_R12,
41 TCG_REG_R13,
42 TCG_REG_R14,
43 TCG_REG_R15,
44 TCG_REG_R10,
45 TCG_REG_R11,
46 TCG_REG_R9,
47 TCG_REG_R8,
48 TCG_REG_RCX,
49 TCG_REG_RDX,
50 TCG_REG_RSI,
51 TCG_REG_RDI,
52 TCG_REG_RAX,
53 #else
54 TCG_REG_EBX,
55 TCG_REG_ESI,
56 TCG_REG_EDI,
57 TCG_REG_EBP,
58 TCG_REG_ECX,
59 TCG_REG_EDX,
60 TCG_REG_EAX,
61 #endif
62 };
63
64 static const int tcg_target_call_iarg_regs[] = {
65 #if TCG_TARGET_REG_BITS == 64
66 #if defined(_WIN64)
67 TCG_REG_RCX,
68 TCG_REG_RDX,
69 #else
70 TCG_REG_RDI,
71 TCG_REG_RSI,
72 TCG_REG_RDX,
73 TCG_REG_RCX,
74 #endif
75 TCG_REG_R8,
76 TCG_REG_R9,
77 #else
78 /* 32 bit mode uses stack based calling convention (GCC default). */
79 #endif
80 };
81
82 static const int tcg_target_call_oarg_regs[] = {
83 TCG_REG_EAX,
84 #if TCG_TARGET_REG_BITS == 32
85 TCG_REG_EDX
86 #endif
87 };
88
89 /* Registers used with L constraint, which are the first argument
90 registers on x86_64, and two random call clobbered registers on
91 i386. */
92 #if TCG_TARGET_REG_BITS == 64
93 # define TCG_REG_L0 tcg_target_call_iarg_regs[0]
94 # define TCG_REG_L1 tcg_target_call_iarg_regs[1]
95 #else
96 # define TCG_REG_L0 TCG_REG_EAX
97 # define TCG_REG_L1 TCG_REG_EDX
98 #endif
99
100 /* For 32-bit, we are going to attempt to determine at runtime whether cmov
101 is available. However, the host compiler must supply <cpuid.h>, as we're
102 not going to go so far as our own inline assembly. */
103 #if TCG_TARGET_REG_BITS == 64
104 # define have_cmov 1
105 #elif defined(CONFIG_CPUID_H)
106 #include <cpuid.h>
107 static bool have_cmov;
108 #else
109 # define have_cmov 0
110 #endif
111
112 static uint8_t *tb_ret_addr;
113
114 static void patch_reloc(uint8_t *code_ptr, int type,
115 tcg_target_long value, tcg_target_long addend)
116 {
117 value += addend;
118 switch(type) {
119 case R_386_PC32:
120 value -= (uintptr_t)code_ptr;
121 if (value != (int32_t)value) {
122 tcg_abort();
123 }
124 *(uint32_t *)code_ptr = value;
125 break;
126 case R_386_PC8:
127 value -= (uintptr_t)code_ptr;
128 if (value != (int8_t)value) {
129 tcg_abort();
130 }
131 *(uint8_t *)code_ptr = value;
132 break;
133 default:
134 tcg_abort();
135 }
136 }
137
138 /* parse target specific constraints */
139 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
140 {
141 const char *ct_str;
142
143 ct_str = *pct_str;
144 switch(ct_str[0]) {
145 case 'a':
146 ct->ct |= TCG_CT_REG;
147 tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
148 break;
149 case 'b':
150 ct->ct |= TCG_CT_REG;
151 tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
152 break;
153 case 'c':
154 ct->ct |= TCG_CT_REG;
155 tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
156 break;
157 case 'd':
158 ct->ct |= TCG_CT_REG;
159 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
160 break;
161 case 'S':
162 ct->ct |= TCG_CT_REG;
163 tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
164 break;
165 case 'D':
166 ct->ct |= TCG_CT_REG;
167 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
168 break;
169 case 'q':
170 ct->ct |= TCG_CT_REG;
171 if (TCG_TARGET_REG_BITS == 64) {
172 tcg_regset_set32(ct->u.regs, 0, 0xffff);
173 } else {
174 tcg_regset_set32(ct->u.regs, 0, 0xf);
175 }
176 break;
177 case 'Q':
178 ct->ct |= TCG_CT_REG;
179 tcg_regset_set32(ct->u.regs, 0, 0xf);
180 break;
181 case 'r':
182 ct->ct |= TCG_CT_REG;
183 if (TCG_TARGET_REG_BITS == 64) {
184 tcg_regset_set32(ct->u.regs, 0, 0xffff);
185 } else {
186 tcg_regset_set32(ct->u.regs, 0, 0xff);
187 }
188 break;
189
190 /* qemu_ld/st address constraint */
191 case 'L':
192 ct->ct |= TCG_CT_REG;
193 if (TCG_TARGET_REG_BITS == 64) {
194 tcg_regset_set32(ct->u.regs, 0, 0xffff);
195 } else {
196 tcg_regset_set32(ct->u.regs, 0, 0xff);
197 }
198 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
199 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
200 break;
201
202 case 'e':
203 ct->ct |= TCG_CT_CONST_S32;
204 break;
205 case 'Z':
206 ct->ct |= TCG_CT_CONST_U32;
207 break;
208
209 default:
210 return -1;
211 }
212 ct_str++;
213 *pct_str = ct_str;
214 return 0;
215 }
216
217 /* test if a constant matches the constraint */
218 static inline int tcg_target_const_match(tcg_target_long val,
219 const TCGArgConstraint *arg_ct)
220 {
221 int ct = arg_ct->ct;
222 if (ct & TCG_CT_CONST) {
223 return 1;
224 }
225 if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
226 return 1;
227 }
228 if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
229 return 1;
230 }
231 return 0;
232 }
233
234 #if TCG_TARGET_REG_BITS == 64
235 # define LOWREGMASK(x) ((x) & 7)
236 #else
237 # define LOWREGMASK(x) (x)
238 #endif
239
240 #define P_EXT 0x100 /* 0x0f opcode prefix */
241 #define P_DATA16 0x200 /* 0x66 opcode prefix */
242 #if TCG_TARGET_REG_BITS == 64
243 # define P_ADDR32 0x400 /* 0x67 opcode prefix */
244 # define P_REXW 0x800 /* Set REX.W = 1 */
245 # define P_REXB_R 0x1000 /* REG field as byte register */
246 # define P_REXB_RM 0x2000 /* R/M field as byte register */
247 # define P_GS 0x4000 /* gs segment override */
248 #else
249 # define P_ADDR32 0
250 # define P_REXW 0
251 # define P_REXB_R 0
252 # define P_REXB_RM 0
253 # define P_GS 0
254 #endif
255
256 #define OPC_ARITH_EvIz (0x81)
257 #define OPC_ARITH_EvIb (0x83)
258 #define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
259 #define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
260 #define OPC_BSWAP (0xc8 | P_EXT)
261 #define OPC_CALL_Jz (0xe8)
262 #define OPC_CMOVCC (0x40 | P_EXT) /* ... plus condition code */
263 #define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
264 #define OPC_DEC_r32 (0x48)
265 #define OPC_IMUL_GvEv (0xaf | P_EXT)
266 #define OPC_IMUL_GvEvIb (0x6b)
267 #define OPC_IMUL_GvEvIz (0x69)
268 #define OPC_INC_r32 (0x40)
269 #define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */
270 #define OPC_JCC_short (0x70) /* ... plus condition code */
271 #define OPC_JMP_long (0xe9)
272 #define OPC_JMP_short (0xeb)
273 #define OPC_LEA (0x8d)
274 #define OPC_MOVB_EvGv (0x88) /* stores, more or less */
275 #define OPC_MOVL_EvGv (0x89) /* stores, more or less */
276 #define OPC_MOVL_GvEv (0x8b) /* loads, more or less */
277 #define OPC_MOVB_EvIz (0xc6)
278 #define OPC_MOVL_EvIz (0xc7)
279 #define OPC_MOVL_Iv (0xb8)
280 #define OPC_MOVSBL (0xbe | P_EXT)
281 #define OPC_MOVSWL (0xbf | P_EXT)
282 #define OPC_MOVSLQ (0x63 | P_REXW)
283 #define OPC_MOVZBL (0xb6 | P_EXT)
284 #define OPC_MOVZWL (0xb7 | P_EXT)
285 #define OPC_POP_r32 (0x58)
286 #define OPC_PUSH_r32 (0x50)
287 #define OPC_PUSH_Iv (0x68)
288 #define OPC_PUSH_Ib (0x6a)
289 #define OPC_RET (0xc3)
290 #define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
291 #define OPC_SHIFT_1 (0xd1)
292 #define OPC_SHIFT_Ib (0xc1)
293 #define OPC_SHIFT_cl (0xd3)
294 #define OPC_TESTL (0x85)
295 #define OPC_XCHG_ax_r32 (0x90)
296
297 #define OPC_GRP3_Ev (0xf7)
298 #define OPC_GRP5 (0xff)
299
300 /* Group 1 opcode extensions for 0x80-0x83.
301 These are also used as modifiers for OPC_ARITH. */
302 #define ARITH_ADD 0
303 #define ARITH_OR 1
304 #define ARITH_ADC 2
305 #define ARITH_SBB 3
306 #define ARITH_AND 4
307 #define ARITH_SUB 5
308 #define ARITH_XOR 6
309 #define ARITH_CMP 7
310
311 /* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */
312 #define SHIFT_ROL 0
313 #define SHIFT_ROR 1
314 #define SHIFT_SHL 4
315 #define SHIFT_SHR 5
316 #define SHIFT_SAR 7
317
318 /* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */
319 #define EXT3_NOT 2
320 #define EXT3_NEG 3
321 #define EXT3_MUL 4
322 #define EXT3_IMUL 5
323 #define EXT3_DIV 6
324 #define EXT3_IDIV 7
325
326 /* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */
327 #define EXT5_INC_Ev 0
328 #define EXT5_DEC_Ev 1
329 #define EXT5_CALLN_Ev 2
330 #define EXT5_JMPN_Ev 4
331
332 /* Condition codes to be added to OPC_JCC_{long,short}. */
333 #define JCC_JMP (-1)
334 #define JCC_JO 0x0
335 #define JCC_JNO 0x1
336 #define JCC_JB 0x2
337 #define JCC_JAE 0x3
338 #define JCC_JE 0x4
339 #define JCC_JNE 0x5
340 #define JCC_JBE 0x6
341 #define JCC_JA 0x7
342 #define JCC_JS 0x8
343 #define JCC_JNS 0x9
344 #define JCC_JP 0xa
345 #define JCC_JNP 0xb
346 #define JCC_JL 0xc
347 #define JCC_JGE 0xd
348 #define JCC_JLE 0xe
349 #define JCC_JG 0xf
350
351 static const uint8_t tcg_cond_to_jcc[] = {
352 [TCG_COND_EQ] = JCC_JE,
353 [TCG_COND_NE] = JCC_JNE,
354 [TCG_COND_LT] = JCC_JL,
355 [TCG_COND_GE] = JCC_JGE,
356 [TCG_COND_LE] = JCC_JLE,
357 [TCG_COND_GT] = JCC_JG,
358 [TCG_COND_LTU] = JCC_JB,
359 [TCG_COND_GEU] = JCC_JAE,
360 [TCG_COND_LEU] = JCC_JBE,
361 [TCG_COND_GTU] = JCC_JA,
362 };
363
364 #if TCG_TARGET_REG_BITS == 64
365 static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
366 {
367 int rex;
368
369 if (opc & P_GS) {
370 tcg_out8(s, 0x65);
371 }
372 if (opc & P_DATA16) {
373 /* We should never be asking for both 16 and 64-bit operation. */
374 assert((opc & P_REXW) == 0);
375 tcg_out8(s, 0x66);
376 }
377 if (opc & P_ADDR32) {
378 tcg_out8(s, 0x67);
379 }
380
381 rex = 0;
382 rex |= (opc & P_REXW) >> 8; /* REX.W */
383 rex |= (r & 8) >> 1; /* REX.R */
384 rex |= (x & 8) >> 2; /* REX.X */
385 rex |= (rm & 8) >> 3; /* REX.B */
386
387 /* P_REXB_{R,RM} indicates that the given register is the low byte.
388 For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
389 as otherwise the encoding indicates %[abcd]h. Note that the values
390 that are ORed in merely indicate that the REX byte must be present;
391 those bits get discarded in output. */
392 rex |= opc & (r >= 4 ? P_REXB_R : 0);
393 rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
394
395 if (rex) {
396 tcg_out8(s, (uint8_t)(rex | 0x40));
397 }
398
399 if (opc & P_EXT) {
400 tcg_out8(s, 0x0f);
401 }
402 tcg_out8(s, opc);
403 }
404 #else
405 static void tcg_out_opc(TCGContext *s, int opc)
406 {
407 if (opc & P_DATA16) {
408 tcg_out8(s, 0x66);
409 }
410 if (opc & P_EXT) {
411 tcg_out8(s, 0x0f);
412 }
413 tcg_out8(s, opc);
414 }
415 /* Discard the register arguments to tcg_out_opc early, so as not to penalize
416 the 32-bit compilation paths. This method works with all versions of gcc,
417 whereas relying on optimization may not be able to exclude them. */
418 #define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc)
419 #endif
420
421 static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
422 {
423 tcg_out_opc(s, opc, r, rm, 0);
424 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
425 }
426
427 /* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
428 We handle either RM and INDEX missing with a negative value. In 64-bit
429 mode for absolute addresses, ~RM is the size of the immediate operand
430 that will follow the instruction. */
431
432 static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
433 int index, int shift,
434 tcg_target_long offset)
435 {
436 int mod, len;
437
438 if (index < 0 && rm < 0) {
439 if (TCG_TARGET_REG_BITS == 64) {
440 /* Try for a rip-relative addressing mode. This has replaced
441 the 32-bit-mode absolute addressing encoding. */
442 tcg_target_long pc = (tcg_target_long)s->code_ptr + 5 + ~rm;
443 tcg_target_long disp = offset - pc;
444 if (disp == (int32_t)disp) {
445 tcg_out_opc(s, opc, r, 0, 0);
446 tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
447 tcg_out32(s, disp);
448 return;
449 }
450
451 /* Try for an absolute address encoding. This requires the
452 use of the MODRM+SIB encoding and is therefore larger than
453 rip-relative addressing. */
454 if (offset == (int32_t)offset) {
455 tcg_out_opc(s, opc, r, 0, 0);
456 tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
457 tcg_out8(s, (4 << 3) | 5);
458 tcg_out32(s, offset);
459 return;
460 }
461
462 /* ??? The memory isn't directly addressable. */
463 tcg_abort();
464 } else {
465 /* Absolute address. */
466 tcg_out_opc(s, opc, r, 0, 0);
467 tcg_out8(s, (r << 3) | 5);
468 tcg_out32(s, offset);
469 return;
470 }
471 }
472
473 /* Find the length of the immediate addend. Note that the encoding
474 that would be used for (%ebp) indicates absolute addressing. */
475 if (rm < 0) {
476 mod = 0, len = 4, rm = 5;
477 } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
478 mod = 0, len = 0;
479 } else if (offset == (int8_t)offset) {
480 mod = 0x40, len = 1;
481 } else {
482 mod = 0x80, len = 4;
483 }
484
485 /* Use a single byte MODRM format if possible. Note that the encoding
486 that would be used for %esp is the escape to the two byte form. */
487 if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
488 /* Single byte MODRM format. */
489 tcg_out_opc(s, opc, r, rm, 0);
490 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
491 } else {
492 /* Two byte MODRM+SIB format. */
493
494 /* Note that the encoding that would place %esp into the index
495 field indicates no index register. In 64-bit mode, the REX.X
496 bit counts, so %r12 can be used as the index. */
497 if (index < 0) {
498 index = 4;
499 } else {
500 assert(index != TCG_REG_ESP);
501 }
502
503 tcg_out_opc(s, opc, r, rm, index);
504 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
505 tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
506 }
507
508 if (len == 1) {
509 tcg_out8(s, offset);
510 } else if (len == 4) {
511 tcg_out32(s, offset);
512 }
513 }
514
515 /* A simplification of the above with no index or shift. */
516 static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
517 int rm, tcg_target_long offset)
518 {
519 tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
520 }
521
522 /* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */
523 static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
524 {
525 /* Propagate an opcode prefix, such as P_REXW. */
526 int ext = subop & ~0x7;
527 subop &= 0x7;
528
529 tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
530 }
531
532 static inline void tcg_out_mov(TCGContext *s, TCGType type,
533 TCGReg ret, TCGReg arg)
534 {
535 if (arg != ret) {
536 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
537 tcg_out_modrm(s, opc, ret, arg);
538 }
539 }
540
541 static void tcg_out_movi(TCGContext *s, TCGType type,
542 TCGReg ret, tcg_target_long arg)
543 {
544 tcg_target_long diff;
545
546 if (arg == 0) {
547 tgen_arithr(s, ARITH_XOR, ret, ret);
548 return;
549 }
550 if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
551 tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
552 tcg_out32(s, arg);
553 return;
554 }
555 if (arg == (int32_t)arg) {
556 tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
557 tcg_out32(s, arg);
558 return;
559 }
560
561 /* Try a 7 byte pc-relative lea before the 10 byte movq. */
562 diff = arg - ((tcg_target_long)s->code_ptr + 7);
563 if (diff == (int32_t)diff) {
564 tcg_out_opc(s, OPC_LEA | P_REXW, ret, 0, 0);
565 tcg_out8(s, (LOWREGMASK(ret) << 3) | 5);
566 tcg_out32(s, diff);
567 return;
568 }
569
570 tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
571 tcg_out64(s, arg);
572 }
573
574 static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
575 {
576 if (val == (int8_t)val) {
577 tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
578 tcg_out8(s, val);
579 } else if (val == (int32_t)val) {
580 tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
581 tcg_out32(s, val);
582 } else {
583 tcg_abort();
584 }
585 }
586
587 static inline void tcg_out_push(TCGContext *s, int reg)
588 {
589 tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
590 }
591
592 static inline void tcg_out_pop(TCGContext *s, int reg)
593 {
594 tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
595 }
596
597 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
598 TCGReg arg1, tcg_target_long arg2)
599 {
600 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
601 tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
602 }
603
604 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
605 TCGReg arg1, tcg_target_long arg2)
606 {
607 int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
608 tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
609 }
610
611 static inline void tcg_out_sti(TCGContext *s, TCGType type, TCGReg base,
612 tcg_target_long ofs, tcg_target_long val)
613 {
614 int opc = OPC_MOVL_EvIz + (type == TCG_TYPE_I64 ? P_REXW : 0);
615 tcg_out_modrm_offset(s, opc, 0, base, ofs);
616 tcg_out32(s, val);
617 }
618
619 static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
620 {
621 /* Propagate an opcode prefix, such as P_DATA16. */
622 int ext = subopc & ~0x7;
623 subopc &= 0x7;
624
625 if (count == 1) {
626 tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
627 } else {
628 tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
629 tcg_out8(s, count);
630 }
631 }
632
633 static inline void tcg_out_bswap32(TCGContext *s, int reg)
634 {
635 tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
636 }
637
638 static inline void tcg_out_rolw_8(TCGContext *s, int reg)
639 {
640 tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
641 }
642
643 static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
644 {
645 /* movzbl */
646 assert(src < 4 || TCG_TARGET_REG_BITS == 64);
647 tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
648 }
649
650 static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
651 {
652 /* movsbl */
653 assert(src < 4 || TCG_TARGET_REG_BITS == 64);
654 tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
655 }
656
657 static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
658 {
659 /* movzwl */
660 tcg_out_modrm(s, OPC_MOVZWL, dest, src);
661 }
662
663 static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
664 {
665 /* movsw[lq] */
666 tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
667 }
668
669 static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
670 {
671 /* 32-bit mov zero extends. */
672 tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
673 }
674
675 static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
676 {
677 tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
678 }
679
680 static inline void tcg_out_bswap64(TCGContext *s, int reg)
681 {
682 tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
683 }
684
685 static void tgen_arithi(TCGContext *s, int c, int r0,
686 tcg_target_long val, int cf)
687 {
688 int rexw = 0;
689
690 if (TCG_TARGET_REG_BITS == 64) {
691 rexw = c & -8;
692 c &= 7;
693 }
694
695 /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
696 partial flags update stalls on Pentium4 and are not recommended
697 by current Intel optimization manuals. */
698 if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
699 int is_inc = (c == ARITH_ADD) ^ (val < 0);
700 if (TCG_TARGET_REG_BITS == 64) {
701 /* The single-byte increment encodings are re-tasked as the
702 REX prefixes. Use the MODRM encoding. */
703 tcg_out_modrm(s, OPC_GRP5 + rexw,
704 (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
705 } else {
706 tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
707 }
708 return;
709 }
710
711 if (c == ARITH_AND) {
712 if (TCG_TARGET_REG_BITS == 64) {
713 if (val == 0xffffffffu) {
714 tcg_out_ext32u(s, r0, r0);
715 return;
716 }
717 if (val == (uint32_t)val) {
718 /* AND with no high bits set can use a 32-bit operation. */
719 rexw = 0;
720 }
721 }
722 if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
723 tcg_out_ext8u(s, r0, r0);
724 return;
725 }
726 if (val == 0xffffu) {
727 tcg_out_ext16u(s, r0, r0);
728 return;
729 }
730 }
731
732 if (val == (int8_t)val) {
733 tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
734 tcg_out8(s, val);
735 return;
736 }
737 if (rexw == 0 || val == (int32_t)val) {
738 tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
739 tcg_out32(s, val);
740 return;
741 }
742
743 tcg_abort();
744 }
745
746 static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
747 {
748 if (val != 0) {
749 tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
750 }
751 }
752
753 /* Use SMALL != 0 to force a short forward branch. */
754 static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
755 {
756 int32_t val, val1;
757 TCGLabel *l = &s->labels[label_index];
758
759 if (l->has_value) {
760 val = l->u.value - (tcg_target_long)s->code_ptr;
761 val1 = val - 2;
762 if ((int8_t)val1 == val1) {
763 if (opc == -1) {
764 tcg_out8(s, OPC_JMP_short);
765 } else {
766 tcg_out8(s, OPC_JCC_short + opc);
767 }
768 tcg_out8(s, val1);
769 } else {
770 if (small) {
771 tcg_abort();
772 }
773 if (opc == -1) {
774 tcg_out8(s, OPC_JMP_long);
775 tcg_out32(s, val - 5);
776 } else {
777 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
778 tcg_out32(s, val - 6);
779 }
780 }
781 } else if (small) {
782 if (opc == -1) {
783 tcg_out8(s, OPC_JMP_short);
784 } else {
785 tcg_out8(s, OPC_JCC_short + opc);
786 }
787 tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1);
788 s->code_ptr += 1;
789 } else {
790 if (opc == -1) {
791 tcg_out8(s, OPC_JMP_long);
792 } else {
793 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
794 }
795 tcg_out_reloc(s, s->code_ptr, R_386_PC32, label_index, -4);
796 s->code_ptr += 4;
797 }
798 }
799
800 static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
801 int const_arg2, int rexw)
802 {
803 if (const_arg2) {
804 if (arg2 == 0) {
805 /* test r, r */
806 tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
807 } else {
808 tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
809 }
810 } else {
811 tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
812 }
813 }
814
815 static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
816 TCGArg arg1, TCGArg arg2, int const_arg2,
817 int label_index, int small)
818 {
819 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
820 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
821 }
822
823 #if TCG_TARGET_REG_BITS == 64
824 static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
825 TCGArg arg1, TCGArg arg2, int const_arg2,
826 int label_index, int small)
827 {
828 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
829 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
830 }
831 #else
832 /* XXX: we implement it at the target level to avoid having to
833 handle cross basic blocks temporaries */
834 static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
835 const int *const_args, int small)
836 {
837 int label_next;
838 label_next = gen_new_label();
839 switch(args[4]) {
840 case TCG_COND_EQ:
841 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
842 label_next, 1);
843 tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
844 args[5], small);
845 break;
846 case TCG_COND_NE:
847 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
848 args[5], small);
849 tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
850 args[5], small);
851 break;
852 case TCG_COND_LT:
853 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
854 args[5], small);
855 tcg_out_jxx(s, JCC_JNE, label_next, 1);
856 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
857 args[5], small);
858 break;
859 case TCG_COND_LE:
860 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
861 args[5], small);
862 tcg_out_jxx(s, JCC_JNE, label_next, 1);
863 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
864 args[5], small);
865 break;
866 case TCG_COND_GT:
867 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
868 args[5], small);
869 tcg_out_jxx(s, JCC_JNE, label_next, 1);
870 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
871 args[5], small);
872 break;
873 case TCG_COND_GE:
874 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
875 args[5], small);
876 tcg_out_jxx(s, JCC_JNE, label_next, 1);
877 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
878 args[5], small);
879 break;
880 case TCG_COND_LTU:
881 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
882 args[5], small);
883 tcg_out_jxx(s, JCC_JNE, label_next, 1);
884 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
885 args[5], small);
886 break;
887 case TCG_COND_LEU:
888 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
889 args[5], small);
890 tcg_out_jxx(s, JCC_JNE, label_next, 1);
891 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
892 args[5], small);
893 break;
894 case TCG_COND_GTU:
895 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
896 args[5], small);
897 tcg_out_jxx(s, JCC_JNE, label_next, 1);
898 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
899 args[5], small);
900 break;
901 case TCG_COND_GEU:
902 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
903 args[5], small);
904 tcg_out_jxx(s, JCC_JNE, label_next, 1);
905 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
906 args[5], small);
907 break;
908 default:
909 tcg_abort();
910 }
911 tcg_out_label(s, label_next, s->code_ptr);
912 }
913 #endif
914
915 static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
916 TCGArg arg1, TCGArg arg2, int const_arg2)
917 {
918 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
919 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
920 tcg_out_ext8u(s, dest, dest);
921 }
922
923 #if TCG_TARGET_REG_BITS == 64
924 static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
925 TCGArg arg1, TCGArg arg2, int const_arg2)
926 {
927 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
928 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
929 tcg_out_ext8u(s, dest, dest);
930 }
931 #else
932 static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
933 const int *const_args)
934 {
935 TCGArg new_args[6];
936 int label_true, label_over;
937
938 memcpy(new_args, args+1, 5*sizeof(TCGArg));
939
940 if (args[0] == args[1] || args[0] == args[2]
941 || (!const_args[3] && args[0] == args[3])
942 || (!const_args[4] && args[0] == args[4])) {
943 /* When the destination overlaps with one of the argument
944 registers, don't do anything tricky. */
945 label_true = gen_new_label();
946 label_over = gen_new_label();
947
948 new_args[5] = label_true;
949 tcg_out_brcond2(s, new_args, const_args+1, 1);
950
951 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
952 tcg_out_jxx(s, JCC_JMP, label_over, 1);
953 tcg_out_label(s, label_true, s->code_ptr);
954
955 tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
956 tcg_out_label(s, label_over, s->code_ptr);
957 } else {
958 /* When the destination does not overlap one of the arguments,
959 clear the destination first, jump if cond false, and emit an
960 increment in the true case. This results in smaller code. */
961
962 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
963
964 label_over = gen_new_label();
965 new_args[4] = tcg_invert_cond(new_args[4]);
966 new_args[5] = label_over;
967 tcg_out_brcond2(s, new_args, const_args+1, 1);
968
969 tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
970 tcg_out_label(s, label_over, s->code_ptr);
971 }
972 }
973 #endif
974
975 static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest,
976 TCGArg c1, TCGArg c2, int const_c2,
977 TCGArg v1)
978 {
979 tcg_out_cmp(s, c1, c2, const_c2, 0);
980 if (have_cmov) {
981 tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1);
982 } else {
983 int over = gen_new_label();
984 tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1);
985 tcg_out_mov(s, TCG_TYPE_I32, dest, v1);
986 tcg_out_label(s, over, s->code_ptr);
987 }
988 }
989
990 #if TCG_TARGET_REG_BITS == 64
991 static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGArg dest,
992 TCGArg c1, TCGArg c2, int const_c2,
993 TCGArg v1)
994 {
995 tcg_out_cmp(s, c1, c2, const_c2, P_REXW);
996 tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | P_REXW, dest, v1);
997 }
998 #endif
999
1000 static void tcg_out_branch(TCGContext *s, int call, tcg_target_long dest)
1001 {
1002 tcg_target_long disp = dest - (tcg_target_long)s->code_ptr - 5;
1003
1004 if (disp == (int32_t)disp) {
1005 tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
1006 tcg_out32(s, disp);
1007 } else {
1008 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, dest);
1009 tcg_out_modrm(s, OPC_GRP5,
1010 call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10);
1011 }
1012 }
1013
1014 static inline void tcg_out_calli(TCGContext *s, tcg_target_long dest)
1015 {
1016 tcg_out_branch(s, 1, dest);
1017 }
1018
1019 static void tcg_out_jmp(TCGContext *s, tcg_target_long dest)
1020 {
1021 tcg_out_branch(s, 0, dest);
1022 }
1023
1024 #if defined(CONFIG_SOFTMMU)
1025
1026 #include "exec/softmmu_defs.h"
1027
1028 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1029 * int mmu_idx, uintptr_t ra)
1030 */
1031 static const void * const qemu_ld_helpers[4] = {
1032 helper_ret_ldb_mmu,
1033 helper_ret_ldw_mmu,
1034 helper_ret_ldl_mmu,
1035 helper_ret_ldq_mmu,
1036 };
1037
1038 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1039 * uintxx_t val, int mmu_idx, uintptr_t ra)
1040 */
1041 static const void * const qemu_st_helpers[4] = {
1042 helper_ret_stb_mmu,
1043 helper_ret_stw_mmu,
1044 helper_ret_stl_mmu,
1045 helper_ret_stq_mmu,
1046 };
1047
1048 static void add_qemu_ldst_label(TCGContext *s,
1049 int is_ld,
1050 int opc,
1051 int data_reg,
1052 int data_reg2,
1053 int addrlo_reg,
1054 int addrhi_reg,
1055 int mem_index,
1056 uint8_t *raddr,
1057 uint8_t **label_ptr);
1058
1059 /* Perform the TLB load and compare.
1060
1061 Inputs:
1062 ADDRLO_IDX contains the index into ARGS of the low part of the
1063 address; the high part of the address is at ADDR_LOW_IDX+1.
1064
1065 MEM_INDEX and S_BITS are the memory context and log2 size of the load.
1066
1067 WHICH is the offset into the CPUTLBEntry structure of the slot to read.
1068 This should be offsetof addr_read or addr_write.
1069
1070 Outputs:
1071 LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
1072 positions of the displacements of forward jumps to the TLB miss case.
1073
1074 Second argument register is loaded with the low part of the address.
1075 In the TLB hit case, it has been adjusted as indicated by the TLB
1076 and so is a host address. In the TLB miss case, it continues to
1077 hold a guest address.
1078
1079 First argument register is clobbered. */
1080
1081 static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
1082 int mem_index, int s_bits,
1083 const TCGArg *args,
1084 uint8_t **label_ptr, int which)
1085 {
1086 const int addrlo = args[addrlo_idx];
1087 const int r0 = TCG_REG_L0;
1088 const int r1 = TCG_REG_L1;
1089 TCGType type = TCG_TYPE_I32;
1090 int rexw = 0;
1091
1092 if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 64) {
1093 type = TCG_TYPE_I64;
1094 rexw = P_REXW;
1095 }
1096
1097 tcg_out_mov(s, type, r0, addrlo);
1098 tcg_out_mov(s, type, r1, addrlo);
1099
1100 tcg_out_shifti(s, SHIFT_SHR + rexw, r0,
1101 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1102
1103 tgen_arithi(s, ARITH_AND + rexw, r1,
1104 TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
1105 tgen_arithi(s, ARITH_AND + rexw, r0,
1106 (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
1107
1108 tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, r0, TCG_AREG0, r0, 0,
1109 offsetof(CPUArchState, tlb_table[mem_index][0])
1110 + which);
1111
1112 /* cmp 0(r0), r1 */
1113 tcg_out_modrm_offset(s, OPC_CMP_GvEv + rexw, r1, r0, 0);
1114
1115 tcg_out_mov(s, type, r1, addrlo);
1116
1117 /* jne slow_path */
1118 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
1119 label_ptr[0] = s->code_ptr;
1120 s->code_ptr += 4;
1121
1122 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1123 /* cmp 4(r0), addrhi */
1124 tcg_out_modrm_offset(s, OPC_CMP_GvEv, args[addrlo_idx+1], r0, 4);
1125
1126 /* jne slow_path */
1127 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
1128 label_ptr[1] = s->code_ptr;
1129 s->code_ptr += 4;
1130 }
1131
1132 /* TLB Hit. */
1133
1134 /* add addend(r0), r1 */
1135 tcg_out_modrm_offset(s, OPC_ADD_GvEv + P_REXW, r1, r0,
1136 offsetof(CPUTLBEntry, addend) - which);
1137 }
1138 #elif defined(__x86_64__) && defined(__linux__)
1139 # include <asm/prctl.h>
1140 # include <sys/prctl.h>
1141
1142 int arch_prctl(int code, unsigned long addr);
1143
1144 static int guest_base_flags;
1145 static inline void setup_guest_base_seg(void)
1146 {
1147 if (arch_prctl(ARCH_SET_GS, GUEST_BASE) == 0) {
1148 guest_base_flags = P_GS;
1149 }
1150 }
1151 #else
1152 # define guest_base_flags 0
1153 static inline void setup_guest_base_seg(void) { }
1154 #endif /* SOFTMMU */
1155
1156 static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
1157 int base, tcg_target_long ofs, int seg,
1158 int sizeop)
1159 {
1160 #ifdef TARGET_WORDS_BIGENDIAN
1161 const int bswap = 1;
1162 #else
1163 const int bswap = 0;
1164 #endif
1165 switch (sizeop) {
1166 case 0:
1167 tcg_out_modrm_offset(s, OPC_MOVZBL + seg, datalo, base, ofs);
1168 break;
1169 case 0 | 4:
1170 tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW + seg, datalo, base, ofs);
1171 break;
1172 case 1:
1173 tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
1174 if (bswap) {
1175 tcg_out_rolw_8(s, datalo);
1176 }
1177 break;
1178 case 1 | 4:
1179 if (bswap) {
1180 tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
1181 tcg_out_rolw_8(s, datalo);
1182 tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
1183 } else {
1184 tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW + seg,
1185 datalo, base, ofs);
1186 }
1187 break;
1188 case 2:
1189 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs);
1190 if (bswap) {
1191 tcg_out_bswap32(s, datalo);
1192 }
1193 break;
1194 #if TCG_TARGET_REG_BITS == 64
1195 case 2 | 4:
1196 if (bswap) {
1197 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs);
1198 tcg_out_bswap32(s, datalo);
1199 tcg_out_ext32s(s, datalo, datalo);
1200 } else {
1201 tcg_out_modrm_offset(s, OPC_MOVSLQ + seg, datalo, base, ofs);
1202 }
1203 break;
1204 #endif
1205 case 3:
1206 if (TCG_TARGET_REG_BITS == 64) {
1207 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + P_REXW + seg,
1208 datalo, base, ofs);
1209 if (bswap) {
1210 tcg_out_bswap64(s, datalo);
1211 }
1212 } else {
1213 if (bswap) {
1214 int t = datalo;
1215 datalo = datahi;
1216 datahi = t;
1217 }
1218 if (base != datalo) {
1219 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
1220 datalo, base, ofs);
1221 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
1222 datahi, base, ofs + 4);
1223 } else {
1224 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
1225 datahi, base, ofs + 4);
1226 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
1227 datalo, base, ofs);
1228 }
1229 if (bswap) {
1230 tcg_out_bswap32(s, datalo);
1231 tcg_out_bswap32(s, datahi);
1232 }
1233 }
1234 break;
1235 default:
1236 tcg_abort();
1237 }
1238 }
1239
1240 /* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
1241 EAX. It will be useful once fixed registers globals are less
1242 common. */
1243 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
1244 int opc)
1245 {
1246 int data_reg, data_reg2 = 0;
1247 int addrlo_idx;
1248 #if defined(CONFIG_SOFTMMU)
1249 int mem_index, s_bits;
1250 uint8_t *label_ptr[2];
1251 #endif
1252
1253 data_reg = args[0];
1254 addrlo_idx = 1;
1255 if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
1256 data_reg2 = args[1];
1257 addrlo_idx = 2;
1258 }
1259
1260 #if defined(CONFIG_SOFTMMU)
1261 mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
1262 s_bits = opc & 3;
1263
1264 tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
1265 label_ptr, offsetof(CPUTLBEntry, addr_read));
1266
1267 /* TLB Hit. */
1268 tcg_out_qemu_ld_direct(s, data_reg, data_reg2, TCG_REG_L1, 0, 0, opc);
1269
1270 /* Record the current context of a load into ldst label */
1271 add_qemu_ldst_label(s,
1272 1,
1273 opc,
1274 data_reg,
1275 data_reg2,
1276 args[addrlo_idx],
1277 args[addrlo_idx + 1],
1278 mem_index,
1279 s->code_ptr,
1280 label_ptr);
1281 #else
1282 {
1283 int32_t offset = GUEST_BASE;
1284 int base = args[addrlo_idx];
1285 int seg = 0;
1286
1287 /* ??? We assume all operations have left us with register contents
1288 that are zero extended. So far this appears to be true. If we
1289 want to enforce this, we can either do an explicit zero-extension
1290 here, or (if GUEST_BASE == 0, or a segment register is in use)
1291 use the ADDR32 prefix. For now, do nothing. */
1292 if (GUEST_BASE && guest_base_flags) {
1293 seg = guest_base_flags;
1294 offset = 0;
1295 } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
1296 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
1297 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
1298 base = TCG_REG_L1;
1299 offset = 0;
1300 }
1301
1302 tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, seg, opc);
1303 }
1304 #endif
1305 }
1306
1307 static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
1308 int base, tcg_target_long ofs, int seg,
1309 int sizeop)
1310 {
1311 #ifdef TARGET_WORDS_BIGENDIAN
1312 const int bswap = 1;
1313 #else
1314 const int bswap = 0;
1315 #endif
1316 /* ??? Ideally we wouldn't need a scratch register. For user-only,
1317 we could perform the bswap twice to restore the original value
1318 instead of moving to the scratch. But as it is, the L constraint
1319 means that TCG_REG_L0 is definitely free here. */
1320 const int scratch = TCG_REG_L0;
1321
1322 switch (sizeop) {
1323 case 0:
1324 tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
1325 datalo, base, ofs);
1326 break;
1327 case 1:
1328 if (bswap) {
1329 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1330 tcg_out_rolw_8(s, scratch);
1331 datalo = scratch;
1332 }
1333 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16 + seg,
1334 datalo, base, ofs);
1335 break;
1336 case 2:
1337 if (bswap) {
1338 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1339 tcg_out_bswap32(s, scratch);
1340 datalo = scratch;
1341 }
1342 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs);
1343 break;
1344 case 3:
1345 if (TCG_TARGET_REG_BITS == 64) {
1346 if (bswap) {
1347 tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
1348 tcg_out_bswap64(s, scratch);
1349 datalo = scratch;
1350 }
1351 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_REXW + seg,
1352 datalo, base, ofs);
1353 } else if (bswap) {
1354 tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
1355 tcg_out_bswap32(s, scratch);
1356 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs);
1357 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1358 tcg_out_bswap32(s, scratch);
1359 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4);
1360 } else {
1361 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs);
1362 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datahi, base, ofs+4);
1363 }
1364 break;
1365 default:
1366 tcg_abort();
1367 }
1368 }
1369
1370 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
1371 int opc)
1372 {
1373 int data_reg, data_reg2 = 0;
1374 int addrlo_idx;
1375 #if defined(CONFIG_SOFTMMU)
1376 int mem_index, s_bits;
1377 uint8_t *label_ptr[2];
1378 #endif
1379
1380 data_reg = args[0];
1381 addrlo_idx = 1;
1382 if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
1383 data_reg2 = args[1];
1384 addrlo_idx = 2;
1385 }
1386
1387 #if defined(CONFIG_SOFTMMU)
1388 mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
1389 s_bits = opc;
1390
1391 tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
1392 label_ptr, offsetof(CPUTLBEntry, addr_write));
1393
1394 /* TLB Hit. */
1395 tcg_out_qemu_st_direct(s, data_reg, data_reg2, TCG_REG_L1, 0, 0, opc);
1396
1397 /* Record the current context of a store into ldst label */
1398 add_qemu_ldst_label(s,
1399 0,
1400 opc,
1401 data_reg,
1402 data_reg2,
1403 args[addrlo_idx],
1404 args[addrlo_idx + 1],
1405 mem_index,
1406 s->code_ptr,
1407 label_ptr);
1408 #else
1409 {
1410 int32_t offset = GUEST_BASE;
1411 int base = args[addrlo_idx];
1412 int seg = 0;
1413
1414 /* ??? We assume all operations have left us with register contents
1415 that are zero extended. So far this appears to be true. If we
1416 want to enforce this, we can either do an explicit zero-extension
1417 here, or (if GUEST_BASE == 0, or a segment register is in use)
1418 use the ADDR32 prefix. For now, do nothing. */
1419 if (GUEST_BASE && guest_base_flags) {
1420 seg = guest_base_flags;
1421 offset = 0;
1422 } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
1423 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
1424 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
1425 base = TCG_REG_L1;
1426 offset = 0;
1427 }
1428
1429 tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, seg, opc);
1430 }
1431 #endif
1432 }
1433
1434 #if defined(CONFIG_SOFTMMU)
1435 /*
1436 * Record the context of a call to the out of line helper code for the slow path
1437 * for a load or store, so that we can later generate the correct helper code
1438 */
1439 static void add_qemu_ldst_label(TCGContext *s,
1440 int is_ld,
1441 int opc,
1442 int data_reg,
1443 int data_reg2,
1444 int addrlo_reg,
1445 int addrhi_reg,
1446 int mem_index,
1447 uint8_t *raddr,
1448 uint8_t **label_ptr)
1449 {
1450 int idx;
1451 TCGLabelQemuLdst *label;
1452
1453 if (s->nb_qemu_ldst_labels >= TCG_MAX_QEMU_LDST) {
1454 tcg_abort();
1455 }
1456
1457 idx = s->nb_qemu_ldst_labels++;
1458 label = (TCGLabelQemuLdst *)&s->qemu_ldst_labels[idx];
1459 label->is_ld = is_ld;
1460 label->opc = opc;
1461 label->datalo_reg = data_reg;
1462 label->datahi_reg = data_reg2;
1463 label->addrlo_reg = addrlo_reg;
1464 label->addrhi_reg = addrhi_reg;
1465 label->mem_index = mem_index;
1466 label->raddr = raddr;
1467 label->label_ptr[0] = label_ptr[0];
1468 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1469 label->label_ptr[1] = label_ptr[1];
1470 }
1471 }
1472
1473 /* See the GETPC definition in include/exec/exec-all.h. */
1474 static inline uintptr_t do_getpc(uint8_t *raddr)
1475 {
1476 return (uintptr_t)raddr - 1;
1477 }
1478
1479 /*
1480 * Generate code for the slow path for a load at the end of block
1481 */
1482 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1483 {
1484 int opc = l->opc;
1485 int s_bits = opc & 3;
1486 TCGReg data_reg;
1487 uint8_t **label_ptr = &l->label_ptr[0];
1488
1489 /* resolve label address */
1490 *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4);
1491 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1492 *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4);
1493 }
1494
1495 if (TCG_TARGET_REG_BITS == 32) {
1496 int ofs = 0;
1497
1498 tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1499 ofs += 4;
1500
1501 tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1502 ofs += 4;
1503
1504 if (TARGET_LONG_BITS == 64) {
1505 tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1506 ofs += 4;
1507 }
1508
1509 tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, l->mem_index);
1510 ofs += 4;
1511
1512 tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, do_getpc(l->raddr));
1513 } else {
1514 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1515 /* The second argument is already loaded with addrlo. */
1516 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
1517 l->mem_index);
1518 tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3],
1519 do_getpc(l->raddr));
1520 }
1521
1522 tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]);
1523
1524 data_reg = l->datalo_reg;
1525 switch(opc) {
1526 case 0 | 4:
1527 tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
1528 break;
1529 case 1 | 4:
1530 tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
1531 break;
1532 case 0:
1533 tcg_out_ext8u(s, data_reg, TCG_REG_EAX);
1534 break;
1535 case 1:
1536 tcg_out_ext16u(s, data_reg, TCG_REG_EAX);
1537 break;
1538 case 2:
1539 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1540 break;
1541 #if TCG_TARGET_REG_BITS == 64
1542 case 2 | 4:
1543 tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
1544 break;
1545 #endif
1546 case 3:
1547 if (TCG_TARGET_REG_BITS == 64) {
1548 tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
1549 } else if (data_reg == TCG_REG_EDX) {
1550 /* xchg %edx, %eax */
1551 tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
1552 tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX);
1553 } else {
1554 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1555 tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX);
1556 }
1557 break;
1558 default:
1559 tcg_abort();
1560 }
1561
1562 /* Jump to the code corresponding to next IR of qemu_st */
1563 tcg_out_jmp(s, (tcg_target_long)l->raddr);
1564 }
1565
1566 /*
1567 * Generate code for the slow path for a store at the end of block
1568 */
1569 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1570 {
1571 int opc = l->opc;
1572 int s_bits = opc & 3;
1573 uint8_t **label_ptr = &l->label_ptr[0];
1574
1575 /* resolve label address */
1576 *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4);
1577 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1578 *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4);
1579 }
1580
1581 if (TCG_TARGET_REG_BITS == 32) {
1582 int ofs = 0;
1583
1584 tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1585 ofs += 4;
1586
1587 tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1588 ofs += 4;
1589
1590 if (TARGET_LONG_BITS == 64) {
1591 tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1592 ofs += 4;
1593 }
1594
1595 tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs);
1596 ofs += 4;
1597
1598 if (opc == 3) {
1599 tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs);
1600 ofs += 4;
1601 }
1602
1603 tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, l->mem_index);
1604 ofs += 4;
1605
1606 tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, do_getpc(l->raddr));
1607 } else {
1608 uintptr_t pc;
1609
1610 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1611 /* The second argument is already loaded with addrlo. */
1612 tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
1613 tcg_target_call_iarg_regs[2], l->datalo_reg);
1614 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
1615 l->mem_index);
1616
1617 pc = do_getpc(l->raddr);
1618 if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) {
1619 tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[4], pc);
1620 } else if (pc == (int32_t)pc) {
1621 tcg_out_sti(s, TCG_TYPE_PTR, TCG_REG_ESP, 0, pc);
1622 } else {
1623 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RAX, pc);
1624 tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_RAX, TCG_REG_ESP, 0);
1625 }
1626 }
1627
1628 tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]);
1629
1630 tcg_out_jmp(s, (tcg_target_long)l->raddr);
1631 }
1632
1633 /*
1634 * Generate TB finalization at the end of block
1635 */
1636 void tcg_out_tb_finalize(TCGContext *s)
1637 {
1638 int i;
1639 TCGLabelQemuLdst *label;
1640
1641 /* qemu_ld/st slow paths */
1642 for (i = 0; i < s->nb_qemu_ldst_labels; i++) {
1643 label = (TCGLabelQemuLdst *)&s->qemu_ldst_labels[i];
1644 if (label->is_ld) {
1645 tcg_out_qemu_ld_slow_path(s, label);
1646 } else {
1647 tcg_out_qemu_st_slow_path(s, label);
1648 }
1649 }
1650 }
1651 #endif /* CONFIG_SOFTMMU */
1652
1653 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1654 const TCGArg *args, const int *const_args)
1655 {
1656 int c, rexw = 0;
1657
1658 #if TCG_TARGET_REG_BITS == 64
1659 # define OP_32_64(x) \
1660 case glue(glue(INDEX_op_, x), _i64): \
1661 rexw = P_REXW; /* FALLTHRU */ \
1662 case glue(glue(INDEX_op_, x), _i32)
1663 #else
1664 # define OP_32_64(x) \
1665 case glue(glue(INDEX_op_, x), _i32)
1666 #endif
1667
1668 switch(opc) {
1669 case INDEX_op_exit_tb:
1670 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]);
1671 tcg_out_jmp(s, (tcg_target_long) tb_ret_addr);
1672 break;
1673 case INDEX_op_goto_tb:
1674 if (s->tb_jmp_offset) {
1675 /* direct jump method */
1676 tcg_out8(s, OPC_JMP_long); /* jmp im */
1677 s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
1678 tcg_out32(s, 0);
1679 } else {
1680 /* indirect jump method */
1681 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
1682 (tcg_target_long)(s->tb_next + args[0]));
1683 }
1684 s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
1685 break;
1686 case INDEX_op_call:
1687 if (const_args[0]) {
1688 tcg_out_calli(s, args[0]);
1689 } else {
1690 /* call *reg */
1691 tcg_out_modrm(s, OPC_GRP5, EXT5_CALLN_Ev, args[0]);
1692 }
1693 break;
1694 case INDEX_op_br:
1695 tcg_out_jxx(s, JCC_JMP, args[0], 0);
1696 break;
1697 case INDEX_op_movi_i32:
1698 tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
1699 break;
1700 OP_32_64(ld8u):
1701 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1702 tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
1703 break;
1704 OP_32_64(ld8s):
1705 tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]);
1706 break;
1707 OP_32_64(ld16u):
1708 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1709 tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
1710 break;
1711 OP_32_64(ld16s):
1712 tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]);
1713 break;
1714 #if TCG_TARGET_REG_BITS == 64
1715 case INDEX_op_ld32u_i64:
1716 #endif
1717 case INDEX_op_ld_i32:
1718 tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1719 break;
1720
1721 OP_32_64(st8):
1722 if (const_args[0]) {
1723 tcg_out_modrm_offset(s, OPC_MOVB_EvIz,
1724 0, args[1], args[2]);
1725 tcg_out8(s, args[0]);
1726 } else {
1727 tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
1728 args[0], args[1], args[2]);
1729 }
1730 break;
1731 OP_32_64(st16):
1732 if (const_args[0]) {
1733 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16,
1734 0, args[1], args[2]);
1735 tcg_out16(s, args[0]);
1736 } else {
1737 tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
1738 args[0], args[1], args[2]);
1739 }
1740 break;
1741 #if TCG_TARGET_REG_BITS == 64
1742 case INDEX_op_st32_i64:
1743 #endif
1744 case INDEX_op_st_i32:
1745 if (const_args[0]) {
1746 tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, args[1], args[2]);
1747 tcg_out32(s, args[0]);
1748 } else {
1749 tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1750 }
1751 break;
1752
1753 OP_32_64(add):
1754 /* For 3-operand addition, use LEA. */
1755 if (args[0] != args[1]) {
1756 TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
1757
1758 if (const_args[2]) {
1759 c3 = a2, a2 = -1;
1760 } else if (a0 == a2) {
1761 /* Watch out for dest = src + dest, since we've removed
1762 the matching constraint on the add. */
1763 tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
1764 break;
1765 }
1766
1767 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
1768 break;
1769 }
1770 c = ARITH_ADD;
1771 goto gen_arith;
1772 OP_32_64(sub):
1773 c = ARITH_SUB;
1774 goto gen_arith;
1775 OP_32_64(and):
1776 c = ARITH_AND;
1777 goto gen_arith;
1778 OP_32_64(or):
1779 c = ARITH_OR;
1780 goto gen_arith;
1781 OP_32_64(xor):
1782 c = ARITH_XOR;
1783 goto gen_arith;
1784 gen_arith:
1785 if (const_args[2]) {
1786 tgen_arithi(s, c + rexw, args[0], args[2], 0);
1787 } else {
1788 tgen_arithr(s, c + rexw, args[0], args[2]);
1789 }
1790 break;
1791
1792 OP_32_64(mul):
1793 if (const_args[2]) {
1794 int32_t val;
1795 val = args[2];
1796 if (val == (int8_t)val) {
1797 tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]);
1798 tcg_out8(s, val);
1799 } else {
1800 tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]);
1801 tcg_out32(s, val);
1802 }
1803 } else {
1804 tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]);
1805 }
1806 break;
1807
1808 OP_32_64(div2):
1809 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
1810 break;
1811 OP_32_64(divu2):
1812 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
1813 break;
1814
1815 OP_32_64(shl):
1816 c = SHIFT_SHL;
1817 goto gen_shift;
1818 OP_32_64(shr):
1819 c = SHIFT_SHR;
1820 goto gen_shift;
1821 OP_32_64(sar):
1822 c = SHIFT_SAR;
1823 goto gen_shift;
1824 OP_32_64(rotl):
1825 c = SHIFT_ROL;
1826 goto gen_shift;
1827 OP_32_64(rotr):
1828 c = SHIFT_ROR;
1829 goto gen_shift;
1830 gen_shift:
1831 if (const_args[2]) {
1832 tcg_out_shifti(s, c + rexw, args[0], args[2]);
1833 } else {
1834 tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]);
1835 }
1836 break;
1837
1838 case INDEX_op_brcond_i32:
1839 tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
1840 args[3], 0);
1841 break;
1842 case INDEX_op_setcond_i32:
1843 tcg_out_setcond32(s, args[3], args[0], args[1],
1844 args[2], const_args[2]);
1845 break;
1846 case INDEX_op_movcond_i32:
1847 tcg_out_movcond32(s, args[5], args[0], args[1],
1848 args[2], const_args[2], args[3]);
1849 break;
1850
1851 OP_32_64(bswap16):
1852 tcg_out_rolw_8(s, args[0]);
1853 break;
1854 OP_32_64(bswap32):
1855 tcg_out_bswap32(s, args[0]);
1856 break;
1857
1858 OP_32_64(neg):
1859 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]);
1860 break;
1861 OP_32_64(not):
1862 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]);
1863 break;
1864
1865 OP_32_64(ext8s):
1866 tcg_out_ext8s(s, args[0], args[1], rexw);
1867 break;
1868 OP_32_64(ext16s):
1869 tcg_out_ext16s(s, args[0], args[1], rexw);
1870 break;
1871 OP_32_64(ext8u):
1872 tcg_out_ext8u(s, args[0], args[1]);
1873 break;
1874 OP_32_64(ext16u):
1875 tcg_out_ext16u(s, args[0], args[1]);
1876 break;
1877
1878 case INDEX_op_qemu_ld8u:
1879 tcg_out_qemu_ld(s, args, 0);
1880 break;
1881 case INDEX_op_qemu_ld8s:
1882 tcg_out_qemu_ld(s, args, 0 | 4);
1883 break;
1884 case INDEX_op_qemu_ld16u:
1885 tcg_out_qemu_ld(s, args, 1);
1886 break;
1887 case INDEX_op_qemu_ld16s:
1888 tcg_out_qemu_ld(s, args, 1 | 4);
1889 break;
1890 #if TCG_TARGET_REG_BITS == 64
1891 case INDEX_op_qemu_ld32u:
1892 #endif
1893 case INDEX_op_qemu_ld32:
1894 tcg_out_qemu_ld(s, args, 2);
1895 break;
1896 case INDEX_op_qemu_ld64:
1897 tcg_out_qemu_ld(s, args, 3);
1898 break;
1899
1900 case INDEX_op_qemu_st8:
1901 tcg_out_qemu_st(s, args, 0);
1902 break;
1903 case INDEX_op_qemu_st16:
1904 tcg_out_qemu_st(s, args, 1);
1905 break;
1906 case INDEX_op_qemu_st32:
1907 tcg_out_qemu_st(s, args, 2);
1908 break;
1909 case INDEX_op_qemu_st64:
1910 tcg_out_qemu_st(s, args, 3);
1911 break;
1912
1913 OP_32_64(mulu2):
1914 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]);
1915 break;
1916 OP_32_64(muls2):
1917 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]);
1918 break;
1919 OP_32_64(add2):
1920 if (const_args[4]) {
1921 tgen_arithi(s, ARITH_ADD + rexw, args[0], args[4], 1);
1922 } else {
1923 tgen_arithr(s, ARITH_ADD + rexw, args[0], args[4]);
1924 }
1925 if (const_args[5]) {
1926 tgen_arithi(s, ARITH_ADC + rexw, args[1], args[5], 1);
1927 } else {
1928 tgen_arithr(s, ARITH_ADC + rexw, args[1], args[5]);
1929 }
1930 break;
1931 OP_32_64(sub2):
1932 if (const_args[4]) {
1933 tgen_arithi(s, ARITH_SUB + rexw, args[0], args[4], 1);
1934 } else {
1935 tgen_arithr(s, ARITH_SUB + rexw, args[0], args[4]);
1936 }
1937 if (const_args[5]) {
1938 tgen_arithi(s, ARITH_SBB + rexw, args[1], args[5], 1);
1939 } else {
1940 tgen_arithr(s, ARITH_SBB + rexw, args[1], args[5]);
1941 }
1942 break;
1943
1944 #if TCG_TARGET_REG_BITS == 32
1945 case INDEX_op_brcond2_i32:
1946 tcg_out_brcond2(s, args, const_args, 0);
1947 break;
1948 case INDEX_op_setcond2_i32:
1949 tcg_out_setcond2(s, args, const_args);
1950 break;
1951 #else /* TCG_TARGET_REG_BITS == 64 */
1952 case INDEX_op_movi_i64:
1953 tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
1954 break;
1955 case INDEX_op_ld32s_i64:
1956 tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]);
1957 break;
1958 case INDEX_op_ld_i64:
1959 tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1960 break;
1961 case INDEX_op_st_i64:
1962 if (const_args[0]) {
1963 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW,
1964 0, args[1], args[2]);
1965 tcg_out32(s, args[0]);
1966 } else {
1967 tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1968 }
1969 break;
1970 case INDEX_op_qemu_ld32s:
1971 tcg_out_qemu_ld(s, args, 2 | 4);
1972 break;
1973
1974 case INDEX_op_brcond_i64:
1975 tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1],
1976 args[3], 0);
1977 break;
1978 case INDEX_op_setcond_i64:
1979 tcg_out_setcond64(s, args[3], args[0], args[1],
1980 args[2], const_args[2]);
1981 break;
1982 case INDEX_op_movcond_i64:
1983 tcg_out_movcond64(s, args[5], args[0], args[1],
1984 args[2], const_args[2], args[3]);
1985 break;
1986
1987 case INDEX_op_bswap64_i64:
1988 tcg_out_bswap64(s, args[0]);
1989 break;
1990 case INDEX_op_ext32u_i64:
1991 tcg_out_ext32u(s, args[0], args[1]);
1992 break;
1993 case INDEX_op_ext32s_i64:
1994 tcg_out_ext32s(s, args[0], args[1]);
1995 break;
1996 #endif
1997
1998 OP_32_64(deposit):
1999 if (args[3] == 0 && args[4] == 8) {
2000 /* load bits 0..7 */
2001 tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM,
2002 args[2], args[0]);
2003 } else if (args[3] == 8 && args[4] == 8) {
2004 /* load bits 8..15 */
2005 tcg_out_modrm(s, OPC_MOVB_EvGv, args[2], args[0] + 4);
2006 } else if (args[3] == 0 && args[4] == 16) {
2007 /* load bits 0..15 */
2008 tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, args[2], args[0]);
2009 } else {
2010 tcg_abort();
2011 }
2012 break;
2013
2014 default:
2015 tcg_abort();
2016 }
2017
2018 #undef OP_32_64
2019 }
2020
2021 static const TCGTargetOpDef x86_op_defs[] = {
2022 { INDEX_op_exit_tb, { } },
2023 { INDEX_op_goto_tb, { } },
2024 { INDEX_op_call, { "ri" } },
2025 { INDEX_op_br, { } },
2026 { INDEX_op_mov_i32, { "r", "r" } },
2027 { INDEX_op_movi_i32, { "r" } },
2028 { INDEX_op_ld8u_i32, { "r", "r" } },
2029 { INDEX_op_ld8s_i32, { "r", "r" } },
2030 { INDEX_op_ld16u_i32, { "r", "r" } },
2031 { INDEX_op_ld16s_i32, { "r", "r" } },
2032 { INDEX_op_ld_i32, { "r", "r" } },
2033 { INDEX_op_st8_i32, { "qi", "r" } },
2034 { INDEX_op_st16_i32, { "ri", "r" } },
2035 { INDEX_op_st_i32, { "ri", "r" } },
2036
2037 { INDEX_op_add_i32, { "r", "r", "ri" } },
2038 { INDEX_op_sub_i32, { "r", "0", "ri" } },
2039 { INDEX_op_mul_i32, { "r", "0", "ri" } },
2040 { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } },
2041 { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } },
2042 { INDEX_op_and_i32, { "r", "0", "ri" } },
2043 { INDEX_op_or_i32, { "r", "0", "ri" } },
2044 { INDEX_op_xor_i32, { "r", "0", "ri" } },
2045
2046 { INDEX_op_shl_i32, { "r", "0", "ci" } },
2047 { INDEX_op_shr_i32, { "r", "0", "ci" } },
2048 { INDEX_op_sar_i32, { "r", "0", "ci" } },
2049 { INDEX_op_rotl_i32, { "r", "0", "ci" } },
2050 { INDEX_op_rotr_i32, { "r", "0", "ci" } },
2051
2052 { INDEX_op_brcond_i32, { "r", "ri" } },
2053
2054 { INDEX_op_bswap16_i32, { "r", "0" } },
2055 { INDEX_op_bswap32_i32, { "r", "0" } },
2056
2057 { INDEX_op_neg_i32, { "r", "0" } },
2058
2059 { INDEX_op_not_i32, { "r", "0" } },
2060
2061 { INDEX_op_ext8s_i32, { "r", "q" } },
2062 { INDEX_op_ext16s_i32, { "r", "r" } },
2063 { INDEX_op_ext8u_i32, { "r", "q" } },
2064 { INDEX_op_ext16u_i32, { "r", "r" } },
2065
2066 { INDEX_op_setcond_i32, { "q", "r", "ri" } },
2067
2068 { INDEX_op_deposit_i32, { "Q", "0", "Q" } },
2069 #if TCG_TARGET_HAS_movcond_i32
2070 { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } },
2071 #endif
2072
2073 { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
2074 { INDEX_op_muls2_i32, { "a", "d", "a", "r" } },
2075 { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
2076 { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
2077
2078 #if TCG_TARGET_REG_BITS == 32
2079 { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
2080 { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
2081 #else
2082 { INDEX_op_mov_i64, { "r", "r" } },
2083 { INDEX_op_movi_i64, { "r" } },
2084 { INDEX_op_ld8u_i64, { "r", "r" } },
2085 { INDEX_op_ld8s_i64, { "r", "r" } },
2086 { INDEX_op_ld16u_i64, { "r", "r" } },
2087 { INDEX_op_ld16s_i64, { "r", "r" } },
2088 { INDEX_op_ld32u_i64, { "r", "r" } },
2089 { INDEX_op_ld32s_i64, { "r", "r" } },
2090 { INDEX_op_ld_i64, { "r", "r" } },
2091 { INDEX_op_st8_i64, { "ri", "r" } },
2092 { INDEX_op_st16_i64, { "ri", "r" } },
2093 { INDEX_op_st32_i64, { "ri", "r" } },
2094 { INDEX_op_st_i64, { "re", "r" } },
2095
2096 { INDEX_op_add_i64, { "r", "r", "re" } },
2097 { INDEX_op_mul_i64, { "r", "0", "re" } },
2098 { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },
2099 { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },
2100 { INDEX_op_sub_i64, { "r", "0", "re" } },
2101 { INDEX_op_and_i64, { "r", "0", "reZ" } },
2102 { INDEX_op_or_i64, { "r", "0", "re" } },
2103 { INDEX_op_xor_i64, { "r", "0", "re" } },
2104
2105 { INDEX_op_shl_i64, { "r", "0", "ci" } },
2106 { INDEX_op_shr_i64, { "r", "0", "ci" } },
2107 { INDEX_op_sar_i64, { "r", "0", "ci" } },
2108 { INDEX_op_rotl_i64, { "r", "0", "ci" } },
2109 { INDEX_op_rotr_i64, { "r", "0", "ci" } },
2110
2111 { INDEX_op_brcond_i64, { "r", "re" } },
2112 { INDEX_op_setcond_i64, { "r", "r", "re" } },
2113
2114 { INDEX_op_bswap16_i64, { "r", "0" } },
2115 { INDEX_op_bswap32_i64, { "r", "0" } },
2116 { INDEX_op_bswap64_i64, { "r", "0" } },
2117 { INDEX_op_neg_i64, { "r", "0" } },
2118 { INDEX_op_not_i64, { "r", "0" } },
2119
2120 { INDEX_op_ext8s_i64, { "r", "r" } },
2121 { INDEX_op_ext16s_i64, { "r", "r" } },
2122 { INDEX_op_ext32s_i64, { "r", "r" } },
2123 { INDEX_op_ext8u_i64, { "r", "r" } },
2124 { INDEX_op_ext16u_i64, { "r", "r" } },
2125 { INDEX_op_ext32u_i64, { "r", "r" } },
2126
2127 { INDEX_op_deposit_i64, { "Q", "0", "Q" } },
2128 { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } },
2129
2130 { INDEX_op_mulu2_i64, { "a", "d", "a", "r" } },
2131 { INDEX_op_muls2_i64, { "a", "d", "a", "r" } },
2132 { INDEX_op_add2_i64, { "r", "r", "0", "1", "re", "re" } },
2133 { INDEX_op_sub2_i64, { "r", "r", "0", "1", "re", "re" } },
2134 #endif
2135
2136 #if TCG_TARGET_REG_BITS == 64
2137 { INDEX_op_qemu_ld8u, { "r", "L" } },
2138 { INDEX_op_qemu_ld8s, { "r", "L" } },
2139 { INDEX_op_qemu_ld16u, { "r", "L" } },
2140 { INDEX_op_qemu_ld16s, { "r", "L" } },
2141 { INDEX_op_qemu_ld32, { "r", "L" } },
2142 { INDEX_op_qemu_ld32u, { "r", "L" } },
2143 { INDEX_op_qemu_ld32s, { "r", "L" } },
2144 { INDEX_op_qemu_ld64, { "r", "L" } },
2145
2146 { INDEX_op_qemu_st8, { "L", "L" } },
2147 { INDEX_op_qemu_st16, { "L", "L" } },
2148 { INDEX_op_qemu_st32, { "L", "L" } },
2149 { INDEX_op_qemu_st64, { "L", "L" } },
2150 #elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
2151 { INDEX_op_qemu_ld8u, { "r", "L" } },
2152 { INDEX_op_qemu_ld8s, { "r", "L" } },
2153 { INDEX_op_qemu_ld16u, { "r", "L" } },
2154 { INDEX_op_qemu_ld16s, { "r", "L" } },
2155 { INDEX_op_qemu_ld32, { "r", "L" } },
2156 { INDEX_op_qemu_ld64, { "r", "r", "L" } },
2157
2158 { INDEX_op_qemu_st8, { "cb", "L" } },
2159 { INDEX_op_qemu_st16, { "L", "L" } },
2160 { INDEX_op_qemu_st32, { "L", "L" } },
2161 { INDEX_op_qemu_st64, { "L", "L", "L" } },
2162 #else
2163 { INDEX_op_qemu_ld8u, { "r", "L", "L" } },
2164 { INDEX_op_qemu_ld8s, { "r", "L", "L" } },
2165 { INDEX_op_qemu_ld16u, { "r", "L", "L" } },
2166 { INDEX_op_qemu_ld16s, { "r", "L", "L" } },
2167 { INDEX_op_qemu_ld32, { "r", "L", "L" } },
2168 { INDEX_op_qemu_ld64, { "r", "r", "L", "L" } },
2169
2170 { INDEX_op_qemu_st8, { "cb", "L", "L" } },
2171 { INDEX_op_qemu_st16, { "L", "L", "L" } },
2172 { INDEX_op_qemu_st32, { "L", "L", "L" } },
2173 { INDEX_op_qemu_st64, { "L", "L", "L", "L" } },
2174 #endif
2175 { -1 },
2176 };
2177
2178 static int tcg_target_callee_save_regs[] = {
2179 #if TCG_TARGET_REG_BITS == 64
2180 TCG_REG_RBP,
2181 TCG_REG_RBX,
2182 #if defined(_WIN64)
2183 TCG_REG_RDI,
2184 TCG_REG_RSI,
2185 #endif
2186 TCG_REG_R12,
2187 TCG_REG_R13,
2188 TCG_REG_R14, /* Currently used for the global env. */
2189 TCG_REG_R15,
2190 #else
2191 TCG_REG_EBP, /* Currently used for the global env. */
2192 TCG_REG_EBX,
2193 TCG_REG_ESI,
2194 TCG_REG_EDI,
2195 #endif
2196 };
2197
2198 /* Compute frame size via macros, to share between tcg_target_qemu_prologue
2199 and tcg_register_jit. */
2200
2201 #define PUSH_SIZE \
2202 ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \
2203 * (TCG_TARGET_REG_BITS / 8))
2204
2205 #define FRAME_SIZE \
2206 ((PUSH_SIZE \
2207 + TCG_STATIC_CALL_ARGS_SIZE \
2208 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2209 + TCG_TARGET_STACK_ALIGN - 1) \
2210 & ~(TCG_TARGET_STACK_ALIGN - 1))
2211
2212 /* Generate global QEMU prologue and epilogue code */
2213 static void tcg_target_qemu_prologue(TCGContext *s)
2214 {
2215 int i, stack_addend;
2216
2217 /* TB prologue */
2218
2219 /* Reserve some stack space, also for TCG temps. */
2220 stack_addend = FRAME_SIZE - PUSH_SIZE;
2221 tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
2222 CPU_TEMP_BUF_NLONGS * sizeof(long));
2223
2224 /* Save all callee saved registers. */
2225 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
2226 tcg_out_push(s, tcg_target_callee_save_regs[i]);
2227 }
2228
2229 #if TCG_TARGET_REG_BITS == 32
2230 tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
2231 (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
2232 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2233 /* jmp *tb. */
2234 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP,
2235 (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4
2236 + stack_addend);
2237 #else
2238 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2239 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2240 /* jmp *tb. */
2241 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
2242 #endif
2243
2244 /* TB epilogue */
2245 tb_ret_addr = s->code_ptr;
2246
2247 tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend);
2248
2249 for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
2250 tcg_out_pop(s, tcg_target_callee_save_regs[i]);
2251 }
2252 tcg_out_opc(s, OPC_RET, 0, 0, 0);
2253
2254 #if !defined(CONFIG_SOFTMMU)
2255 /* Try to set up a segment register to point to GUEST_BASE. */
2256 if (GUEST_BASE) {
2257 setup_guest_base_seg();
2258 }
2259 #endif
2260 }
2261
2262 static void tcg_target_init(TCGContext *s)
2263 {
2264 /* For 32-bit, 99% certainty that we're running on hardware that supports
2265 cmov, but we still need to check. In case cmov is not available, we'll
2266 use a small forward branch. */
2267 #ifndef have_cmov
2268 {
2269 unsigned a, b, c, d;
2270 have_cmov = (__get_cpuid(1, &a, &b, &c, &d) && (d & bit_CMOV));
2271 }
2272 #endif
2273
2274 if (TCG_TARGET_REG_BITS == 64) {
2275 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
2276 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
2277 } else {
2278 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
2279 }
2280
2281 tcg_regset_clear(tcg_target_call_clobber_regs);
2282 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
2283 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
2284 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
2285 if (TCG_TARGET_REG_BITS == 64) {
2286 #if !defined(_WIN64)
2287 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
2288 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
2289 #endif
2290 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
2291 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
2292 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
2293 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
2294 }
2295
2296 tcg_regset_clear(s->reserved_regs);
2297 tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
2298
2299 tcg_add_target_add_op_defs(x86_op_defs);
2300 }
2301
2302 typedef struct {
2303 DebugFrameCIE cie;
2304 DebugFrameFDEHeader fde;
2305 uint8_t fde_def_cfa[4];
2306 uint8_t fde_reg_ofs[14];
2307 } DebugFrame;
2308
2309 /* We're expecting a 2 byte uleb128 encoded value. */
2310 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2311
2312 #if !defined(__ELF__)
2313 /* Host machine without ELF. */
2314 #elif TCG_TARGET_REG_BITS == 64
2315 #define ELF_HOST_MACHINE EM_X86_64
2316 static DebugFrame debug_frame = {
2317 .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2318 .cie.id = -1,
2319 .cie.version = 1,
2320 .cie.code_align = 1,
2321 .cie.data_align = 0x78, /* sleb128 -8 */
2322 .cie.return_column = 16,
2323
2324 /* Total FDE size does not include the "len" member. */
2325 .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
2326
2327 .fde_def_cfa = {
2328 12, 7, /* DW_CFA_def_cfa %rsp, ... */
2329 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2330 (FRAME_SIZE >> 7)
2331 },
2332 .fde_reg_ofs = {
2333 0x90, 1, /* DW_CFA_offset, %rip, -8 */
2334 /* The following ordering must match tcg_target_callee_save_regs. */
2335 0x86, 2, /* DW_CFA_offset, %rbp, -16 */
2336 0x83, 3, /* DW_CFA_offset, %rbx, -24 */
2337 0x8c, 4, /* DW_CFA_offset, %r12, -32 */
2338 0x8d, 5, /* DW_CFA_offset, %r13, -40 */
2339 0x8e, 6, /* DW_CFA_offset, %r14, -48 */
2340 0x8f, 7, /* DW_CFA_offset, %r15, -56 */
2341 }
2342 };
2343 #else
2344 #define ELF_HOST_MACHINE EM_386
2345 static DebugFrame debug_frame = {
2346 .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2347 .cie.id = -1,
2348 .cie.version = 1,
2349 .cie.code_align = 1,
2350 .cie.data_align = 0x7c, /* sleb128 -4 */
2351 .cie.return_column = 8,
2352
2353 /* Total FDE size does not include the "len" member. */
2354 .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
2355
2356 .fde_def_cfa = {
2357 12, 4, /* DW_CFA_def_cfa %esp, ... */
2358 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2359 (FRAME_SIZE >> 7)
2360 },
2361 .fde_reg_ofs = {
2362 0x88, 1, /* DW_CFA_offset, %eip, -4 */
2363 /* The following ordering must match tcg_target_callee_save_regs. */
2364 0x85, 2, /* DW_CFA_offset, %ebp, -8 */
2365 0x83, 3, /* DW_CFA_offset, %ebx, -12 */
2366 0x86, 4, /* DW_CFA_offset, %esi, -16 */
2367 0x87, 5, /* DW_CFA_offset, %edi, -20 */
2368 }
2369 };
2370 #endif
2371
2372 #if defined(ELF_HOST_MACHINE)
2373 void tcg_register_jit(void *buf, size_t buf_size)
2374 {
2375 debug_frame.fde.func_start = (tcg_target_long) buf;
2376 debug_frame.fde.func_len = buf_size;
2377
2378 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2379 }
2380 #endif