]> git.proxmox.com Git - qemu.git/blob - tcg/i386/tcg-target.c
Merge remote-tracking branch 'rth/tcg-ldst-6' into staging
[qemu.git] / tcg / i386 / tcg-target.c
1 /*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 #include "tcg-be-ldst.h"
26
27 #ifndef NDEBUG
28 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
29 #if TCG_TARGET_REG_BITS == 64
30 "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
31 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
32 #else
33 "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
34 #endif
35 };
36 #endif
37
38 static const int tcg_target_reg_alloc_order[] = {
39 #if TCG_TARGET_REG_BITS == 64
40 TCG_REG_RBP,
41 TCG_REG_RBX,
42 TCG_REG_R12,
43 TCG_REG_R13,
44 TCG_REG_R14,
45 TCG_REG_R15,
46 TCG_REG_R10,
47 TCG_REG_R11,
48 TCG_REG_R9,
49 TCG_REG_R8,
50 TCG_REG_RCX,
51 TCG_REG_RDX,
52 TCG_REG_RSI,
53 TCG_REG_RDI,
54 TCG_REG_RAX,
55 #else
56 TCG_REG_EBX,
57 TCG_REG_ESI,
58 TCG_REG_EDI,
59 TCG_REG_EBP,
60 TCG_REG_ECX,
61 TCG_REG_EDX,
62 TCG_REG_EAX,
63 #endif
64 };
65
66 static const int tcg_target_call_iarg_regs[] = {
67 #if TCG_TARGET_REG_BITS == 64
68 #if defined(_WIN64)
69 TCG_REG_RCX,
70 TCG_REG_RDX,
71 #else
72 TCG_REG_RDI,
73 TCG_REG_RSI,
74 TCG_REG_RDX,
75 TCG_REG_RCX,
76 #endif
77 TCG_REG_R8,
78 TCG_REG_R9,
79 #else
80 /* 32 bit mode uses stack based calling convention (GCC default). */
81 #endif
82 };
83
84 static const int tcg_target_call_oarg_regs[] = {
85 TCG_REG_EAX,
86 #if TCG_TARGET_REG_BITS == 32
87 TCG_REG_EDX
88 #endif
89 };
90
91 /* Registers used with L constraint, which are the first argument
92 registers on x86_64, and two random call clobbered registers on
93 i386. */
94 #if TCG_TARGET_REG_BITS == 64
95 # define TCG_REG_L0 tcg_target_call_iarg_regs[0]
96 # define TCG_REG_L1 tcg_target_call_iarg_regs[1]
97 #else
98 # define TCG_REG_L0 TCG_REG_EAX
99 # define TCG_REG_L1 TCG_REG_EDX
100 #endif
101
102 /* For 32-bit, we are going to attempt to determine at runtime whether cmov
103 is available. However, the host compiler must supply <cpuid.h>, as we're
104 not going to go so far as our own inline assembly. */
105 #if TCG_TARGET_REG_BITS == 64
106 # define have_cmov 1
107 #elif defined(CONFIG_CPUID_H)
108 #include <cpuid.h>
109 static bool have_cmov;
110 #else
111 # define have_cmov 0
112 #endif
113
114 static uint8_t *tb_ret_addr;
115
116 static void patch_reloc(uint8_t *code_ptr, int type,
117 intptr_t value, intptr_t addend)
118 {
119 value += addend;
120 switch(type) {
121 case R_386_PC32:
122 value -= (uintptr_t)code_ptr;
123 if (value != (int32_t)value) {
124 tcg_abort();
125 }
126 *(uint32_t *)code_ptr = value;
127 break;
128 case R_386_PC8:
129 value -= (uintptr_t)code_ptr;
130 if (value != (int8_t)value) {
131 tcg_abort();
132 }
133 *(uint8_t *)code_ptr = value;
134 break;
135 default:
136 tcg_abort();
137 }
138 }
139
140 /* parse target specific constraints */
141 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
142 {
143 const char *ct_str;
144
145 ct_str = *pct_str;
146 switch(ct_str[0]) {
147 case 'a':
148 ct->ct |= TCG_CT_REG;
149 tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
150 break;
151 case 'b':
152 ct->ct |= TCG_CT_REG;
153 tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
154 break;
155 case 'c':
156 ct->ct |= TCG_CT_REG;
157 tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
158 break;
159 case 'd':
160 ct->ct |= TCG_CT_REG;
161 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
162 break;
163 case 'S':
164 ct->ct |= TCG_CT_REG;
165 tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
166 break;
167 case 'D':
168 ct->ct |= TCG_CT_REG;
169 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
170 break;
171 case 'q':
172 ct->ct |= TCG_CT_REG;
173 if (TCG_TARGET_REG_BITS == 64) {
174 tcg_regset_set32(ct->u.regs, 0, 0xffff);
175 } else {
176 tcg_regset_set32(ct->u.regs, 0, 0xf);
177 }
178 break;
179 case 'Q':
180 ct->ct |= TCG_CT_REG;
181 tcg_regset_set32(ct->u.regs, 0, 0xf);
182 break;
183 case 'r':
184 ct->ct |= TCG_CT_REG;
185 if (TCG_TARGET_REG_BITS == 64) {
186 tcg_regset_set32(ct->u.regs, 0, 0xffff);
187 } else {
188 tcg_regset_set32(ct->u.regs, 0, 0xff);
189 }
190 break;
191
192 /* qemu_ld/st address constraint */
193 case 'L':
194 ct->ct |= TCG_CT_REG;
195 if (TCG_TARGET_REG_BITS == 64) {
196 tcg_regset_set32(ct->u.regs, 0, 0xffff);
197 } else {
198 tcg_regset_set32(ct->u.regs, 0, 0xff);
199 }
200 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
201 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
202 break;
203
204 case 'e':
205 ct->ct |= TCG_CT_CONST_S32;
206 break;
207 case 'Z':
208 ct->ct |= TCG_CT_CONST_U32;
209 break;
210
211 default:
212 return -1;
213 }
214 ct_str++;
215 *pct_str = ct_str;
216 return 0;
217 }
218
219 /* test if a constant matches the constraint */
220 static inline int tcg_target_const_match(tcg_target_long val,
221 const TCGArgConstraint *arg_ct)
222 {
223 int ct = arg_ct->ct;
224 if (ct & TCG_CT_CONST) {
225 return 1;
226 }
227 if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
228 return 1;
229 }
230 if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
231 return 1;
232 }
233 return 0;
234 }
235
236 #if TCG_TARGET_REG_BITS == 64
237 # define LOWREGMASK(x) ((x) & 7)
238 #else
239 # define LOWREGMASK(x) (x)
240 #endif
241
242 #define P_EXT 0x100 /* 0x0f opcode prefix */
243 #define P_DATA16 0x200 /* 0x66 opcode prefix */
244 #if TCG_TARGET_REG_BITS == 64
245 # define P_ADDR32 0x400 /* 0x67 opcode prefix */
246 # define P_REXW 0x800 /* Set REX.W = 1 */
247 # define P_REXB_R 0x1000 /* REG field as byte register */
248 # define P_REXB_RM 0x2000 /* R/M field as byte register */
249 # define P_GS 0x4000 /* gs segment override */
250 #else
251 # define P_ADDR32 0
252 # define P_REXW 0
253 # define P_REXB_R 0
254 # define P_REXB_RM 0
255 # define P_GS 0
256 #endif
257
258 #define OPC_ARITH_EvIz (0x81)
259 #define OPC_ARITH_EvIb (0x83)
260 #define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
261 #define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
262 #define OPC_BSWAP (0xc8 | P_EXT)
263 #define OPC_CALL_Jz (0xe8)
264 #define OPC_CMOVCC (0x40 | P_EXT) /* ... plus condition code */
265 #define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
266 #define OPC_DEC_r32 (0x48)
267 #define OPC_IMUL_GvEv (0xaf | P_EXT)
268 #define OPC_IMUL_GvEvIb (0x6b)
269 #define OPC_IMUL_GvEvIz (0x69)
270 #define OPC_INC_r32 (0x40)
271 #define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */
272 #define OPC_JCC_short (0x70) /* ... plus condition code */
273 #define OPC_JMP_long (0xe9)
274 #define OPC_JMP_short (0xeb)
275 #define OPC_LEA (0x8d)
276 #define OPC_MOVB_EvGv (0x88) /* stores, more or less */
277 #define OPC_MOVL_EvGv (0x89) /* stores, more or less */
278 #define OPC_MOVL_GvEv (0x8b) /* loads, more or less */
279 #define OPC_MOVB_EvIz (0xc6)
280 #define OPC_MOVL_EvIz (0xc7)
281 #define OPC_MOVL_Iv (0xb8)
282 #define OPC_MOVSBL (0xbe | P_EXT)
283 #define OPC_MOVSWL (0xbf | P_EXT)
284 #define OPC_MOVSLQ (0x63 | P_REXW)
285 #define OPC_MOVZBL (0xb6 | P_EXT)
286 #define OPC_MOVZWL (0xb7 | P_EXT)
287 #define OPC_POP_r32 (0x58)
288 #define OPC_PUSH_r32 (0x50)
289 #define OPC_PUSH_Iv (0x68)
290 #define OPC_PUSH_Ib (0x6a)
291 #define OPC_RET (0xc3)
292 #define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
293 #define OPC_SHIFT_1 (0xd1)
294 #define OPC_SHIFT_Ib (0xc1)
295 #define OPC_SHIFT_cl (0xd3)
296 #define OPC_TESTL (0x85)
297 #define OPC_XCHG_ax_r32 (0x90)
298
299 #define OPC_GRP3_Ev (0xf7)
300 #define OPC_GRP5 (0xff)
301
302 /* Group 1 opcode extensions for 0x80-0x83.
303 These are also used as modifiers for OPC_ARITH. */
304 #define ARITH_ADD 0
305 #define ARITH_OR 1
306 #define ARITH_ADC 2
307 #define ARITH_SBB 3
308 #define ARITH_AND 4
309 #define ARITH_SUB 5
310 #define ARITH_XOR 6
311 #define ARITH_CMP 7
312
313 /* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */
314 #define SHIFT_ROL 0
315 #define SHIFT_ROR 1
316 #define SHIFT_SHL 4
317 #define SHIFT_SHR 5
318 #define SHIFT_SAR 7
319
320 /* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */
321 #define EXT3_NOT 2
322 #define EXT3_NEG 3
323 #define EXT3_MUL 4
324 #define EXT3_IMUL 5
325 #define EXT3_DIV 6
326 #define EXT3_IDIV 7
327
328 /* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */
329 #define EXT5_INC_Ev 0
330 #define EXT5_DEC_Ev 1
331 #define EXT5_CALLN_Ev 2
332 #define EXT5_JMPN_Ev 4
333
334 /* Condition codes to be added to OPC_JCC_{long,short}. */
335 #define JCC_JMP (-1)
336 #define JCC_JO 0x0
337 #define JCC_JNO 0x1
338 #define JCC_JB 0x2
339 #define JCC_JAE 0x3
340 #define JCC_JE 0x4
341 #define JCC_JNE 0x5
342 #define JCC_JBE 0x6
343 #define JCC_JA 0x7
344 #define JCC_JS 0x8
345 #define JCC_JNS 0x9
346 #define JCC_JP 0xa
347 #define JCC_JNP 0xb
348 #define JCC_JL 0xc
349 #define JCC_JGE 0xd
350 #define JCC_JLE 0xe
351 #define JCC_JG 0xf
352
353 static const uint8_t tcg_cond_to_jcc[] = {
354 [TCG_COND_EQ] = JCC_JE,
355 [TCG_COND_NE] = JCC_JNE,
356 [TCG_COND_LT] = JCC_JL,
357 [TCG_COND_GE] = JCC_JGE,
358 [TCG_COND_LE] = JCC_JLE,
359 [TCG_COND_GT] = JCC_JG,
360 [TCG_COND_LTU] = JCC_JB,
361 [TCG_COND_GEU] = JCC_JAE,
362 [TCG_COND_LEU] = JCC_JBE,
363 [TCG_COND_GTU] = JCC_JA,
364 };
365
366 #if TCG_TARGET_REG_BITS == 64
367 static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
368 {
369 int rex;
370
371 if (opc & P_GS) {
372 tcg_out8(s, 0x65);
373 }
374 if (opc & P_DATA16) {
375 /* We should never be asking for both 16 and 64-bit operation. */
376 assert((opc & P_REXW) == 0);
377 tcg_out8(s, 0x66);
378 }
379 if (opc & P_ADDR32) {
380 tcg_out8(s, 0x67);
381 }
382
383 rex = 0;
384 rex |= (opc & P_REXW) >> 8; /* REX.W */
385 rex |= (r & 8) >> 1; /* REX.R */
386 rex |= (x & 8) >> 2; /* REX.X */
387 rex |= (rm & 8) >> 3; /* REX.B */
388
389 /* P_REXB_{R,RM} indicates that the given register is the low byte.
390 For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
391 as otherwise the encoding indicates %[abcd]h. Note that the values
392 that are ORed in merely indicate that the REX byte must be present;
393 those bits get discarded in output. */
394 rex |= opc & (r >= 4 ? P_REXB_R : 0);
395 rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
396
397 if (rex) {
398 tcg_out8(s, (uint8_t)(rex | 0x40));
399 }
400
401 if (opc & P_EXT) {
402 tcg_out8(s, 0x0f);
403 }
404 tcg_out8(s, opc);
405 }
406 #else
407 static void tcg_out_opc(TCGContext *s, int opc)
408 {
409 if (opc & P_DATA16) {
410 tcg_out8(s, 0x66);
411 }
412 if (opc & P_EXT) {
413 tcg_out8(s, 0x0f);
414 }
415 tcg_out8(s, opc);
416 }
417 /* Discard the register arguments to tcg_out_opc early, so as not to penalize
418 the 32-bit compilation paths. This method works with all versions of gcc,
419 whereas relying on optimization may not be able to exclude them. */
420 #define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc)
421 #endif
422
423 static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
424 {
425 tcg_out_opc(s, opc, r, rm, 0);
426 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
427 }
428
429 /* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
430 We handle either RM and INDEX missing with a negative value. In 64-bit
431 mode for absolute addresses, ~RM is the size of the immediate operand
432 that will follow the instruction. */
433
434 static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
435 int index, int shift, intptr_t offset)
436 {
437 int mod, len;
438
439 if (index < 0 && rm < 0) {
440 if (TCG_TARGET_REG_BITS == 64) {
441 /* Try for a rip-relative addressing mode. This has replaced
442 the 32-bit-mode absolute addressing encoding. */
443 intptr_t pc = (intptr_t)s->code_ptr + 5 + ~rm;
444 intptr_t disp = offset - pc;
445 if (disp == (int32_t)disp) {
446 tcg_out_opc(s, opc, r, 0, 0);
447 tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
448 tcg_out32(s, disp);
449 return;
450 }
451
452 /* Try for an absolute address encoding. This requires the
453 use of the MODRM+SIB encoding and is therefore larger than
454 rip-relative addressing. */
455 if (offset == (int32_t)offset) {
456 tcg_out_opc(s, opc, r, 0, 0);
457 tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
458 tcg_out8(s, (4 << 3) | 5);
459 tcg_out32(s, offset);
460 return;
461 }
462
463 /* ??? The memory isn't directly addressable. */
464 tcg_abort();
465 } else {
466 /* Absolute address. */
467 tcg_out_opc(s, opc, r, 0, 0);
468 tcg_out8(s, (r << 3) | 5);
469 tcg_out32(s, offset);
470 return;
471 }
472 }
473
474 /* Find the length of the immediate addend. Note that the encoding
475 that would be used for (%ebp) indicates absolute addressing. */
476 if (rm < 0) {
477 mod = 0, len = 4, rm = 5;
478 } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
479 mod = 0, len = 0;
480 } else if (offset == (int8_t)offset) {
481 mod = 0x40, len = 1;
482 } else {
483 mod = 0x80, len = 4;
484 }
485
486 /* Use a single byte MODRM format if possible. Note that the encoding
487 that would be used for %esp is the escape to the two byte form. */
488 if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
489 /* Single byte MODRM format. */
490 tcg_out_opc(s, opc, r, rm, 0);
491 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
492 } else {
493 /* Two byte MODRM+SIB format. */
494
495 /* Note that the encoding that would place %esp into the index
496 field indicates no index register. In 64-bit mode, the REX.X
497 bit counts, so %r12 can be used as the index. */
498 if (index < 0) {
499 index = 4;
500 } else {
501 assert(index != TCG_REG_ESP);
502 }
503
504 tcg_out_opc(s, opc, r, rm, index);
505 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
506 tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
507 }
508
509 if (len == 1) {
510 tcg_out8(s, offset);
511 } else if (len == 4) {
512 tcg_out32(s, offset);
513 }
514 }
515
516 /* A simplification of the above with no index or shift. */
517 static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
518 int rm, intptr_t offset)
519 {
520 tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
521 }
522
523 /* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */
524 static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
525 {
526 /* Propagate an opcode prefix, such as P_REXW. */
527 int ext = subop & ~0x7;
528 subop &= 0x7;
529
530 tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
531 }
532
533 static inline void tcg_out_mov(TCGContext *s, TCGType type,
534 TCGReg ret, TCGReg arg)
535 {
536 if (arg != ret) {
537 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
538 tcg_out_modrm(s, opc, ret, arg);
539 }
540 }
541
542 static void tcg_out_movi(TCGContext *s, TCGType type,
543 TCGReg ret, tcg_target_long arg)
544 {
545 tcg_target_long diff;
546
547 if (arg == 0) {
548 tgen_arithr(s, ARITH_XOR, ret, ret);
549 return;
550 }
551 if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
552 tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
553 tcg_out32(s, arg);
554 return;
555 }
556 if (arg == (int32_t)arg) {
557 tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
558 tcg_out32(s, arg);
559 return;
560 }
561
562 /* Try a 7 byte pc-relative lea before the 10 byte movq. */
563 diff = arg - ((uintptr_t)s->code_ptr + 7);
564 if (diff == (int32_t)diff) {
565 tcg_out_opc(s, OPC_LEA | P_REXW, ret, 0, 0);
566 tcg_out8(s, (LOWREGMASK(ret) << 3) | 5);
567 tcg_out32(s, diff);
568 return;
569 }
570
571 tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
572 tcg_out64(s, arg);
573 }
574
575 static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
576 {
577 if (val == (int8_t)val) {
578 tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
579 tcg_out8(s, val);
580 } else if (val == (int32_t)val) {
581 tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
582 tcg_out32(s, val);
583 } else {
584 tcg_abort();
585 }
586 }
587
588 static inline void tcg_out_push(TCGContext *s, int reg)
589 {
590 tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
591 }
592
593 static inline void tcg_out_pop(TCGContext *s, int reg)
594 {
595 tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
596 }
597
598 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
599 TCGReg arg1, intptr_t arg2)
600 {
601 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
602 tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
603 }
604
605 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
606 TCGReg arg1, intptr_t arg2)
607 {
608 int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
609 tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
610 }
611
612 static inline void tcg_out_sti(TCGContext *s, TCGType type, TCGReg base,
613 tcg_target_long ofs, tcg_target_long val)
614 {
615 int opc = OPC_MOVL_EvIz + (type == TCG_TYPE_I64 ? P_REXW : 0);
616 tcg_out_modrm_offset(s, opc, 0, base, ofs);
617 tcg_out32(s, val);
618 }
619
620 static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
621 {
622 /* Propagate an opcode prefix, such as P_DATA16. */
623 int ext = subopc & ~0x7;
624 subopc &= 0x7;
625
626 if (count == 1) {
627 tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
628 } else {
629 tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
630 tcg_out8(s, count);
631 }
632 }
633
634 static inline void tcg_out_bswap32(TCGContext *s, int reg)
635 {
636 tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
637 }
638
639 static inline void tcg_out_rolw_8(TCGContext *s, int reg)
640 {
641 tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
642 }
643
644 static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
645 {
646 /* movzbl */
647 assert(src < 4 || TCG_TARGET_REG_BITS == 64);
648 tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
649 }
650
651 static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
652 {
653 /* movsbl */
654 assert(src < 4 || TCG_TARGET_REG_BITS == 64);
655 tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
656 }
657
658 static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
659 {
660 /* movzwl */
661 tcg_out_modrm(s, OPC_MOVZWL, dest, src);
662 }
663
664 static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
665 {
666 /* movsw[lq] */
667 tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
668 }
669
670 static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
671 {
672 /* 32-bit mov zero extends. */
673 tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
674 }
675
676 static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
677 {
678 tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
679 }
680
681 static inline void tcg_out_bswap64(TCGContext *s, int reg)
682 {
683 tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
684 }
685
686 static void tgen_arithi(TCGContext *s, int c, int r0,
687 tcg_target_long val, int cf)
688 {
689 int rexw = 0;
690
691 if (TCG_TARGET_REG_BITS == 64) {
692 rexw = c & -8;
693 c &= 7;
694 }
695
696 /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
697 partial flags update stalls on Pentium4 and are not recommended
698 by current Intel optimization manuals. */
699 if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
700 int is_inc = (c == ARITH_ADD) ^ (val < 0);
701 if (TCG_TARGET_REG_BITS == 64) {
702 /* The single-byte increment encodings are re-tasked as the
703 REX prefixes. Use the MODRM encoding. */
704 tcg_out_modrm(s, OPC_GRP5 + rexw,
705 (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
706 } else {
707 tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
708 }
709 return;
710 }
711
712 if (c == ARITH_AND) {
713 if (TCG_TARGET_REG_BITS == 64) {
714 if (val == 0xffffffffu) {
715 tcg_out_ext32u(s, r0, r0);
716 return;
717 }
718 if (val == (uint32_t)val) {
719 /* AND with no high bits set can use a 32-bit operation. */
720 rexw = 0;
721 }
722 }
723 if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
724 tcg_out_ext8u(s, r0, r0);
725 return;
726 }
727 if (val == 0xffffu) {
728 tcg_out_ext16u(s, r0, r0);
729 return;
730 }
731 }
732
733 if (val == (int8_t)val) {
734 tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
735 tcg_out8(s, val);
736 return;
737 }
738 if (rexw == 0 || val == (int32_t)val) {
739 tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
740 tcg_out32(s, val);
741 return;
742 }
743
744 tcg_abort();
745 }
746
747 static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
748 {
749 if (val != 0) {
750 tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
751 }
752 }
753
754 /* Use SMALL != 0 to force a short forward branch. */
755 static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
756 {
757 int32_t val, val1;
758 TCGLabel *l = &s->labels[label_index];
759
760 if (l->has_value) {
761 val = l->u.value - (intptr_t)s->code_ptr;
762 val1 = val - 2;
763 if ((int8_t)val1 == val1) {
764 if (opc == -1) {
765 tcg_out8(s, OPC_JMP_short);
766 } else {
767 tcg_out8(s, OPC_JCC_short + opc);
768 }
769 tcg_out8(s, val1);
770 } else {
771 if (small) {
772 tcg_abort();
773 }
774 if (opc == -1) {
775 tcg_out8(s, OPC_JMP_long);
776 tcg_out32(s, val - 5);
777 } else {
778 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
779 tcg_out32(s, val - 6);
780 }
781 }
782 } else if (small) {
783 if (opc == -1) {
784 tcg_out8(s, OPC_JMP_short);
785 } else {
786 tcg_out8(s, OPC_JCC_short + opc);
787 }
788 tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1);
789 s->code_ptr += 1;
790 } else {
791 if (opc == -1) {
792 tcg_out8(s, OPC_JMP_long);
793 } else {
794 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
795 }
796 tcg_out_reloc(s, s->code_ptr, R_386_PC32, label_index, -4);
797 s->code_ptr += 4;
798 }
799 }
800
801 static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
802 int const_arg2, int rexw)
803 {
804 if (const_arg2) {
805 if (arg2 == 0) {
806 /* test r, r */
807 tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
808 } else {
809 tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
810 }
811 } else {
812 tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
813 }
814 }
815
816 static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
817 TCGArg arg1, TCGArg arg2, int const_arg2,
818 int label_index, int small)
819 {
820 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
821 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
822 }
823
824 #if TCG_TARGET_REG_BITS == 64
825 static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
826 TCGArg arg1, TCGArg arg2, int const_arg2,
827 int label_index, int small)
828 {
829 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
830 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
831 }
832 #else
833 /* XXX: we implement it at the target level to avoid having to
834 handle cross basic blocks temporaries */
835 static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
836 const int *const_args, int small)
837 {
838 int label_next;
839 label_next = gen_new_label();
840 switch(args[4]) {
841 case TCG_COND_EQ:
842 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
843 label_next, 1);
844 tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
845 args[5], small);
846 break;
847 case TCG_COND_NE:
848 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
849 args[5], small);
850 tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
851 args[5], small);
852 break;
853 case TCG_COND_LT:
854 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
855 args[5], small);
856 tcg_out_jxx(s, JCC_JNE, label_next, 1);
857 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
858 args[5], small);
859 break;
860 case TCG_COND_LE:
861 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
862 args[5], small);
863 tcg_out_jxx(s, JCC_JNE, label_next, 1);
864 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
865 args[5], small);
866 break;
867 case TCG_COND_GT:
868 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
869 args[5], small);
870 tcg_out_jxx(s, JCC_JNE, label_next, 1);
871 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
872 args[5], small);
873 break;
874 case TCG_COND_GE:
875 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
876 args[5], small);
877 tcg_out_jxx(s, JCC_JNE, label_next, 1);
878 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
879 args[5], small);
880 break;
881 case TCG_COND_LTU:
882 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
883 args[5], small);
884 tcg_out_jxx(s, JCC_JNE, label_next, 1);
885 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
886 args[5], small);
887 break;
888 case TCG_COND_LEU:
889 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
890 args[5], small);
891 tcg_out_jxx(s, JCC_JNE, label_next, 1);
892 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
893 args[5], small);
894 break;
895 case TCG_COND_GTU:
896 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
897 args[5], small);
898 tcg_out_jxx(s, JCC_JNE, label_next, 1);
899 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
900 args[5], small);
901 break;
902 case TCG_COND_GEU:
903 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
904 args[5], small);
905 tcg_out_jxx(s, JCC_JNE, label_next, 1);
906 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
907 args[5], small);
908 break;
909 default:
910 tcg_abort();
911 }
912 tcg_out_label(s, label_next, s->code_ptr);
913 }
914 #endif
915
916 static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
917 TCGArg arg1, TCGArg arg2, int const_arg2)
918 {
919 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
920 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
921 tcg_out_ext8u(s, dest, dest);
922 }
923
924 #if TCG_TARGET_REG_BITS == 64
925 static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
926 TCGArg arg1, TCGArg arg2, int const_arg2)
927 {
928 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
929 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
930 tcg_out_ext8u(s, dest, dest);
931 }
932 #else
933 static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
934 const int *const_args)
935 {
936 TCGArg new_args[6];
937 int label_true, label_over;
938
939 memcpy(new_args, args+1, 5*sizeof(TCGArg));
940
941 if (args[0] == args[1] || args[0] == args[2]
942 || (!const_args[3] && args[0] == args[3])
943 || (!const_args[4] && args[0] == args[4])) {
944 /* When the destination overlaps with one of the argument
945 registers, don't do anything tricky. */
946 label_true = gen_new_label();
947 label_over = gen_new_label();
948
949 new_args[5] = label_true;
950 tcg_out_brcond2(s, new_args, const_args+1, 1);
951
952 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
953 tcg_out_jxx(s, JCC_JMP, label_over, 1);
954 tcg_out_label(s, label_true, s->code_ptr);
955
956 tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
957 tcg_out_label(s, label_over, s->code_ptr);
958 } else {
959 /* When the destination does not overlap one of the arguments,
960 clear the destination first, jump if cond false, and emit an
961 increment in the true case. This results in smaller code. */
962
963 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
964
965 label_over = gen_new_label();
966 new_args[4] = tcg_invert_cond(new_args[4]);
967 new_args[5] = label_over;
968 tcg_out_brcond2(s, new_args, const_args+1, 1);
969
970 tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
971 tcg_out_label(s, label_over, s->code_ptr);
972 }
973 }
974 #endif
975
976 static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest,
977 TCGArg c1, TCGArg c2, int const_c2,
978 TCGArg v1)
979 {
980 tcg_out_cmp(s, c1, c2, const_c2, 0);
981 if (have_cmov) {
982 tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1);
983 } else {
984 int over = gen_new_label();
985 tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1);
986 tcg_out_mov(s, TCG_TYPE_I32, dest, v1);
987 tcg_out_label(s, over, s->code_ptr);
988 }
989 }
990
991 #if TCG_TARGET_REG_BITS == 64
992 static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGArg dest,
993 TCGArg c1, TCGArg c2, int const_c2,
994 TCGArg v1)
995 {
996 tcg_out_cmp(s, c1, c2, const_c2, P_REXW);
997 tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | P_REXW, dest, v1);
998 }
999 #endif
1000
1001 static void tcg_out_branch(TCGContext *s, int call, uintptr_t dest)
1002 {
1003 intptr_t disp = dest - (intptr_t)s->code_ptr - 5;
1004
1005 if (disp == (int32_t)disp) {
1006 tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
1007 tcg_out32(s, disp);
1008 } else {
1009 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, dest);
1010 tcg_out_modrm(s, OPC_GRP5,
1011 call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10);
1012 }
1013 }
1014
1015 static inline void tcg_out_calli(TCGContext *s, uintptr_t dest)
1016 {
1017 tcg_out_branch(s, 1, dest);
1018 }
1019
1020 static void tcg_out_jmp(TCGContext *s, uintptr_t dest)
1021 {
1022 tcg_out_branch(s, 0, dest);
1023 }
1024
1025 #if defined(CONFIG_SOFTMMU)
1026 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1027 * int mmu_idx, uintptr_t ra)
1028 */
1029 static const void * const qemu_ld_helpers[16] = {
1030 [MO_UB] = helper_ret_ldub_mmu,
1031 [MO_LEUW] = helper_le_lduw_mmu,
1032 [MO_LEUL] = helper_le_ldul_mmu,
1033 [MO_LEQ] = helper_le_ldq_mmu,
1034 [MO_BEUW] = helper_be_lduw_mmu,
1035 [MO_BEUL] = helper_be_ldul_mmu,
1036 [MO_BEQ] = helper_be_ldq_mmu,
1037 };
1038
1039 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1040 * uintxx_t val, int mmu_idx, uintptr_t ra)
1041 */
1042 static const void * const qemu_st_helpers[16] = {
1043 [MO_UB] = helper_ret_stb_mmu,
1044 [MO_LEUW] = helper_le_stw_mmu,
1045 [MO_LEUL] = helper_le_stl_mmu,
1046 [MO_LEQ] = helper_le_stq_mmu,
1047 [MO_BEUW] = helper_be_stw_mmu,
1048 [MO_BEUL] = helper_be_stl_mmu,
1049 [MO_BEQ] = helper_be_stq_mmu,
1050 };
1051
1052 /* Perform the TLB load and compare.
1053
1054 Inputs:
1055 ADDRLO and ADDRHI contain the low and high part of the address.
1056
1057 MEM_INDEX and S_BITS are the memory context and log2 size of the load.
1058
1059 WHICH is the offset into the CPUTLBEntry structure of the slot to read.
1060 This should be offsetof addr_read or addr_write.
1061
1062 Outputs:
1063 LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
1064 positions of the displacements of forward jumps to the TLB miss case.
1065
1066 Second argument register is loaded with the low part of the address.
1067 In the TLB hit case, it has been adjusted as indicated by the TLB
1068 and so is a host address. In the TLB miss case, it continues to
1069 hold a guest address.
1070
1071 First argument register is clobbered. */
1072
1073 static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
1074 int mem_index, TCGMemOp s_bits,
1075 uint8_t **label_ptr, int which)
1076 {
1077 const TCGReg r0 = TCG_REG_L0;
1078 const TCGReg r1 = TCG_REG_L1;
1079 TCGType ttype = TCG_TYPE_I32;
1080 TCGType htype = TCG_TYPE_I32;
1081 int trexw = 0, hrexw = 0;
1082
1083 if (TCG_TARGET_REG_BITS == 64) {
1084 if (TARGET_LONG_BITS == 64) {
1085 ttype = TCG_TYPE_I64;
1086 trexw = P_REXW;
1087 }
1088 if (TCG_TYPE_PTR == TCG_TYPE_I64) {
1089 htype = TCG_TYPE_I64;
1090 hrexw = P_REXW;
1091 }
1092 }
1093
1094 tcg_out_mov(s, htype, r0, addrlo);
1095 tcg_out_mov(s, ttype, r1, addrlo);
1096
1097 tcg_out_shifti(s, SHIFT_SHR + hrexw, r0,
1098 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1099
1100 tgen_arithi(s, ARITH_AND + trexw, r1,
1101 TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
1102 tgen_arithi(s, ARITH_AND + hrexw, r0,
1103 (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
1104
1105 tcg_out_modrm_sib_offset(s, OPC_LEA + hrexw, r0, TCG_AREG0, r0, 0,
1106 offsetof(CPUArchState, tlb_table[mem_index][0])
1107 + which);
1108
1109 /* cmp 0(r0), r1 */
1110 tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, 0);
1111
1112 /* Prepare for both the fast path add of the tlb addend, and the slow
1113 path function argument setup. There are two cases worth note:
1114 For 32-bit guest and x86_64 host, MOVL zero-extends the guest address
1115 before the fastpath ADDQ below. For 64-bit guest and x32 host, MOVQ
1116 copies the entire guest address for the slow path, while truncation
1117 for the 32-bit host happens with the fastpath ADDL below. */
1118 tcg_out_mov(s, ttype, r1, addrlo);
1119
1120 /* jne slow_path */
1121 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
1122 label_ptr[0] = s->code_ptr;
1123 s->code_ptr += 4;
1124
1125 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1126 /* cmp 4(r0), addrhi */
1127 tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, r0, 4);
1128
1129 /* jne slow_path */
1130 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
1131 label_ptr[1] = s->code_ptr;
1132 s->code_ptr += 4;
1133 }
1134
1135 /* TLB Hit. */
1136
1137 /* add addend(r0), r1 */
1138 tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r1, r0,
1139 offsetof(CPUTLBEntry, addend) - which);
1140 }
1141
1142 /*
1143 * Record the context of a call to the out of line helper code for the slow path
1144 * for a load or store, so that we can later generate the correct helper code
1145 */
1146 static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOp opc,
1147 TCGReg datalo, TCGReg datahi,
1148 TCGReg addrlo, TCGReg addrhi,
1149 int mem_index, uint8_t *raddr,
1150 uint8_t **label_ptr)
1151 {
1152 TCGLabelQemuLdst *label = new_ldst_label(s);
1153
1154 label->is_ld = is_ld;
1155 label->opc = opc;
1156 label->datalo_reg = datalo;
1157 label->datahi_reg = datahi;
1158 label->addrlo_reg = addrlo;
1159 label->addrhi_reg = addrhi;
1160 label->mem_index = mem_index;
1161 label->raddr = raddr;
1162 label->label_ptr[0] = label_ptr[0];
1163 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1164 label->label_ptr[1] = label_ptr[1];
1165 }
1166 }
1167
1168 /*
1169 * Generate code for the slow path for a load at the end of block
1170 */
1171 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1172 {
1173 TCGMemOp opc = l->opc;
1174 TCGReg data_reg;
1175 uint8_t **label_ptr = &l->label_ptr[0];
1176
1177 /* resolve label address */
1178 *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4);
1179 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1180 *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4);
1181 }
1182
1183 if (TCG_TARGET_REG_BITS == 32) {
1184 int ofs = 0;
1185
1186 tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1187 ofs += 4;
1188
1189 tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1190 ofs += 4;
1191
1192 if (TARGET_LONG_BITS == 64) {
1193 tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1194 ofs += 4;
1195 }
1196
1197 tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, l->mem_index);
1198 ofs += 4;
1199
1200 tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, (uintptr_t)l->raddr);
1201 } else {
1202 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1203 /* The second argument is already loaded with addrlo. */
1204 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
1205 l->mem_index);
1206 tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3],
1207 (uintptr_t)l->raddr);
1208 }
1209
1210 tcg_out_calli(s, (uintptr_t)qemu_ld_helpers[opc & ~MO_SIGN]);
1211
1212 data_reg = l->datalo_reg;
1213 switch (opc & MO_SSIZE) {
1214 case MO_SB:
1215 tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
1216 break;
1217 case MO_SW:
1218 tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
1219 break;
1220 #if TCG_TARGET_REG_BITS == 64
1221 case MO_SL:
1222 tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
1223 break;
1224 #endif
1225 case MO_UB:
1226 case MO_UW:
1227 /* Note that the helpers have zero-extended to tcg_target_long. */
1228 case MO_UL:
1229 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1230 break;
1231 case MO_Q:
1232 if (TCG_TARGET_REG_BITS == 64) {
1233 tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
1234 } else if (data_reg == TCG_REG_EDX) {
1235 /* xchg %edx, %eax */
1236 tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
1237 tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX);
1238 } else {
1239 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1240 tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX);
1241 }
1242 break;
1243 default:
1244 tcg_abort();
1245 }
1246
1247 /* Jump to the code corresponding to next IR of qemu_st */
1248 tcg_out_jmp(s, (uintptr_t)l->raddr);
1249 }
1250
1251 /*
1252 * Generate code for the slow path for a store at the end of block
1253 */
1254 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1255 {
1256 TCGMemOp opc = l->opc;
1257 TCGMemOp s_bits = opc & MO_SIZE;
1258 uint8_t **label_ptr = &l->label_ptr[0];
1259 TCGReg retaddr;
1260
1261 /* resolve label address */
1262 *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4);
1263 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1264 *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4);
1265 }
1266
1267 if (TCG_TARGET_REG_BITS == 32) {
1268 int ofs = 0;
1269
1270 tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1271 ofs += 4;
1272
1273 tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1274 ofs += 4;
1275
1276 if (TARGET_LONG_BITS == 64) {
1277 tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1278 ofs += 4;
1279 }
1280
1281 tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs);
1282 ofs += 4;
1283
1284 if (s_bits == MO_64) {
1285 tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs);
1286 ofs += 4;
1287 }
1288
1289 tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, l->mem_index);
1290 ofs += 4;
1291
1292 retaddr = TCG_REG_EAX;
1293 tcg_out_movi(s, TCG_TYPE_I32, retaddr, (uintptr_t)l->raddr);
1294 tcg_out_st(s, TCG_TYPE_I32, retaddr, TCG_REG_ESP, ofs);
1295 } else {
1296 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1297 /* The second argument is already loaded with addrlo. */
1298 tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
1299 tcg_target_call_iarg_regs[2], l->datalo_reg);
1300 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
1301 l->mem_index);
1302
1303 if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) {
1304 retaddr = tcg_target_call_iarg_regs[4];
1305 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1306 } else {
1307 retaddr = TCG_REG_RAX;
1308 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1309 tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, 0);
1310 }
1311 }
1312
1313 /* "Tail call" to the helper, with the return address back inline. */
1314 tcg_out_push(s, retaddr);
1315 tcg_out_jmp(s, (uintptr_t)qemu_st_helpers[opc]);
1316 }
1317 #elif defined(__x86_64__) && defined(__linux__)
1318 # include <asm/prctl.h>
1319 # include <sys/prctl.h>
1320
1321 int arch_prctl(int code, unsigned long addr);
1322
1323 static int guest_base_flags;
1324 static inline void setup_guest_base_seg(void)
1325 {
1326 if (arch_prctl(ARCH_SET_GS, GUEST_BASE) == 0) {
1327 guest_base_flags = P_GS;
1328 }
1329 }
1330 #else
1331 # define guest_base_flags 0
1332 static inline void setup_guest_base_seg(void) { }
1333 #endif /* SOFTMMU */
1334
1335 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
1336 TCGReg base, intptr_t ofs, int seg,
1337 TCGMemOp memop)
1338 {
1339 const TCGMemOp bswap = memop & MO_BSWAP;
1340
1341 switch (memop & MO_SSIZE) {
1342 case MO_UB:
1343 tcg_out_modrm_offset(s, OPC_MOVZBL + seg, datalo, base, ofs);
1344 break;
1345 case MO_SB:
1346 tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW + seg, datalo, base, ofs);
1347 break;
1348 case MO_UW:
1349 tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
1350 if (bswap) {
1351 tcg_out_rolw_8(s, datalo);
1352 }
1353 break;
1354 case MO_SW:
1355 if (bswap) {
1356 tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
1357 tcg_out_rolw_8(s, datalo);
1358 tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
1359 } else {
1360 tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW + seg,
1361 datalo, base, ofs);
1362 }
1363 break;
1364 case MO_UL:
1365 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs);
1366 if (bswap) {
1367 tcg_out_bswap32(s, datalo);
1368 }
1369 break;
1370 #if TCG_TARGET_REG_BITS == 64
1371 case MO_SL:
1372 if (bswap) {
1373 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs);
1374 tcg_out_bswap32(s, datalo);
1375 tcg_out_ext32s(s, datalo, datalo);
1376 } else {
1377 tcg_out_modrm_offset(s, OPC_MOVSLQ + seg, datalo, base, ofs);
1378 }
1379 break;
1380 #endif
1381 case MO_Q:
1382 if (TCG_TARGET_REG_BITS == 64) {
1383 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + P_REXW + seg,
1384 datalo, base, ofs);
1385 if (bswap) {
1386 tcg_out_bswap64(s, datalo);
1387 }
1388 } else {
1389 if (bswap) {
1390 int t = datalo;
1391 datalo = datahi;
1392 datahi = t;
1393 }
1394 if (base != datalo) {
1395 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
1396 datalo, base, ofs);
1397 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
1398 datahi, base, ofs + 4);
1399 } else {
1400 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
1401 datahi, base, ofs + 4);
1402 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
1403 datalo, base, ofs);
1404 }
1405 if (bswap) {
1406 tcg_out_bswap32(s, datalo);
1407 tcg_out_bswap32(s, datahi);
1408 }
1409 }
1410 break;
1411 default:
1412 tcg_abort();
1413 }
1414 }
1415
1416 /* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
1417 EAX. It will be useful once fixed registers globals are less
1418 common. */
1419 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
1420 {
1421 TCGReg datalo, datahi, addrlo;
1422 TCGReg addrhi __attribute__((unused));
1423 TCGMemOp opc;
1424 #if defined(CONFIG_SOFTMMU)
1425 int mem_index;
1426 TCGMemOp s_bits;
1427 uint8_t *label_ptr[2];
1428 #endif
1429
1430 datalo = *args++;
1431 datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
1432 addrlo = *args++;
1433 addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
1434 opc = *args++;
1435
1436 #if defined(CONFIG_SOFTMMU)
1437 mem_index = *args++;
1438 s_bits = opc & MO_SIZE;
1439
1440 tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
1441 label_ptr, offsetof(CPUTLBEntry, addr_read));
1442
1443 /* TLB Hit. */
1444 tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
1445
1446 /* Record the current context of a load into ldst label */
1447 add_qemu_ldst_label(s, 1, opc, datalo, datahi, addrlo, addrhi,
1448 mem_index, s->code_ptr, label_ptr);
1449 #else
1450 {
1451 int32_t offset = GUEST_BASE;
1452 TCGReg base = addrlo;
1453 int seg = 0;
1454
1455 /* ??? We assume all operations have left us with register contents
1456 that are zero extended. So far this appears to be true. If we
1457 want to enforce this, we can either do an explicit zero-extension
1458 here, or (if GUEST_BASE == 0, or a segment register is in use)
1459 use the ADDR32 prefix. For now, do nothing. */
1460 if (GUEST_BASE && guest_base_flags) {
1461 seg = guest_base_flags;
1462 offset = 0;
1463 } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
1464 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
1465 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
1466 base = TCG_REG_L1;
1467 offset = 0;
1468 }
1469
1470 tcg_out_qemu_ld_direct(s, datalo, datahi, base, offset, seg, opc);
1471 }
1472 #endif
1473 }
1474
1475 static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
1476 TCGReg base, intptr_t ofs, int seg,
1477 TCGMemOp memop)
1478 {
1479 const TCGMemOp bswap = memop & MO_BSWAP;
1480
1481 /* ??? Ideally we wouldn't need a scratch register. For user-only,
1482 we could perform the bswap twice to restore the original value
1483 instead of moving to the scratch. But as it is, the L constraint
1484 means that TCG_REG_L0 is definitely free here. */
1485 const TCGReg scratch = TCG_REG_L0;
1486
1487 switch (memop & MO_SIZE) {
1488 case MO_8:
1489 /* In 32-bit mode, 8-byte stores can only happen from [abcd]x.
1490 Use the scratch register if necessary. */
1491 if (TCG_TARGET_REG_BITS == 32 && datalo >= 4) {
1492 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1493 datalo = scratch;
1494 }
1495 tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
1496 datalo, base, ofs);
1497 break;
1498 case MO_16:
1499 if (bswap) {
1500 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1501 tcg_out_rolw_8(s, scratch);
1502 datalo = scratch;
1503 }
1504 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16 + seg,
1505 datalo, base, ofs);
1506 break;
1507 case MO_32:
1508 if (bswap) {
1509 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1510 tcg_out_bswap32(s, scratch);
1511 datalo = scratch;
1512 }
1513 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs);
1514 break;
1515 case MO_64:
1516 if (TCG_TARGET_REG_BITS == 64) {
1517 if (bswap) {
1518 tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
1519 tcg_out_bswap64(s, scratch);
1520 datalo = scratch;
1521 }
1522 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_REXW + seg,
1523 datalo, base, ofs);
1524 } else if (bswap) {
1525 tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
1526 tcg_out_bswap32(s, scratch);
1527 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs);
1528 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1529 tcg_out_bswap32(s, scratch);
1530 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4);
1531 } else {
1532 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs);
1533 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datahi, base, ofs+4);
1534 }
1535 break;
1536 default:
1537 tcg_abort();
1538 }
1539 }
1540
1541 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
1542 {
1543 TCGReg datalo, datahi, addrlo;
1544 TCGReg addrhi __attribute__((unused));
1545 TCGMemOp opc;
1546 #if defined(CONFIG_SOFTMMU)
1547 int mem_index;
1548 TCGMemOp s_bits;
1549 uint8_t *label_ptr[2];
1550 #endif
1551
1552 datalo = *args++;
1553 datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
1554 addrlo = *args++;
1555 addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
1556 opc = *args++;
1557
1558 #if defined(CONFIG_SOFTMMU)
1559 mem_index = *args++;
1560 s_bits = opc & MO_SIZE;
1561
1562 tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
1563 label_ptr, offsetof(CPUTLBEntry, addr_write));
1564
1565 /* TLB Hit. */
1566 tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
1567
1568 /* Record the current context of a store into ldst label */
1569 add_qemu_ldst_label(s, 0, opc, datalo, datahi, addrlo, addrhi,
1570 mem_index, s->code_ptr, label_ptr);
1571 #else
1572 {
1573 int32_t offset = GUEST_BASE;
1574 TCGReg base = addrlo;
1575 int seg = 0;
1576
1577 /* ??? We assume all operations have left us with register contents
1578 that are zero extended. So far this appears to be true. If we
1579 want to enforce this, we can either do an explicit zero-extension
1580 here, or (if GUEST_BASE == 0, or a segment register is in use)
1581 use the ADDR32 prefix. For now, do nothing. */
1582 if (GUEST_BASE && guest_base_flags) {
1583 seg = guest_base_flags;
1584 offset = 0;
1585 } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
1586 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
1587 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
1588 base = TCG_REG_L1;
1589 offset = 0;
1590 }
1591
1592 tcg_out_qemu_st_direct(s, datalo, datahi, base, offset, seg, opc);
1593 }
1594 #endif
1595 }
1596
1597 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1598 const TCGArg *args, const int *const_args)
1599 {
1600 int c, rexw = 0;
1601
1602 #if TCG_TARGET_REG_BITS == 64
1603 # define OP_32_64(x) \
1604 case glue(glue(INDEX_op_, x), _i64): \
1605 rexw = P_REXW; /* FALLTHRU */ \
1606 case glue(glue(INDEX_op_, x), _i32)
1607 #else
1608 # define OP_32_64(x) \
1609 case glue(glue(INDEX_op_, x), _i32)
1610 #endif
1611
1612 switch(opc) {
1613 case INDEX_op_exit_tb:
1614 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]);
1615 tcg_out_jmp(s, (uintptr_t)tb_ret_addr);
1616 break;
1617 case INDEX_op_goto_tb:
1618 if (s->tb_jmp_offset) {
1619 /* direct jump method */
1620 tcg_out8(s, OPC_JMP_long); /* jmp im */
1621 s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
1622 tcg_out32(s, 0);
1623 } else {
1624 /* indirect jump method */
1625 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
1626 (intptr_t)(s->tb_next + args[0]));
1627 }
1628 s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
1629 break;
1630 case INDEX_op_call:
1631 if (const_args[0]) {
1632 tcg_out_calli(s, args[0]);
1633 } else {
1634 /* call *reg */
1635 tcg_out_modrm(s, OPC_GRP5, EXT5_CALLN_Ev, args[0]);
1636 }
1637 break;
1638 case INDEX_op_br:
1639 tcg_out_jxx(s, JCC_JMP, args[0], 0);
1640 break;
1641 case INDEX_op_movi_i32:
1642 tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
1643 break;
1644 OP_32_64(ld8u):
1645 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1646 tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
1647 break;
1648 OP_32_64(ld8s):
1649 tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]);
1650 break;
1651 OP_32_64(ld16u):
1652 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1653 tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
1654 break;
1655 OP_32_64(ld16s):
1656 tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]);
1657 break;
1658 #if TCG_TARGET_REG_BITS == 64
1659 case INDEX_op_ld32u_i64:
1660 #endif
1661 case INDEX_op_ld_i32:
1662 tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1663 break;
1664
1665 OP_32_64(st8):
1666 if (const_args[0]) {
1667 tcg_out_modrm_offset(s, OPC_MOVB_EvIz,
1668 0, args[1], args[2]);
1669 tcg_out8(s, args[0]);
1670 } else {
1671 tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
1672 args[0], args[1], args[2]);
1673 }
1674 break;
1675 OP_32_64(st16):
1676 if (const_args[0]) {
1677 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16,
1678 0, args[1], args[2]);
1679 tcg_out16(s, args[0]);
1680 } else {
1681 tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
1682 args[0], args[1], args[2]);
1683 }
1684 break;
1685 #if TCG_TARGET_REG_BITS == 64
1686 case INDEX_op_st32_i64:
1687 #endif
1688 case INDEX_op_st_i32:
1689 if (const_args[0]) {
1690 tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, args[1], args[2]);
1691 tcg_out32(s, args[0]);
1692 } else {
1693 tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1694 }
1695 break;
1696
1697 OP_32_64(add):
1698 /* For 3-operand addition, use LEA. */
1699 if (args[0] != args[1]) {
1700 TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
1701
1702 if (const_args[2]) {
1703 c3 = a2, a2 = -1;
1704 } else if (a0 == a2) {
1705 /* Watch out for dest = src + dest, since we've removed
1706 the matching constraint on the add. */
1707 tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
1708 break;
1709 }
1710
1711 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
1712 break;
1713 }
1714 c = ARITH_ADD;
1715 goto gen_arith;
1716 OP_32_64(sub):
1717 c = ARITH_SUB;
1718 goto gen_arith;
1719 OP_32_64(and):
1720 c = ARITH_AND;
1721 goto gen_arith;
1722 OP_32_64(or):
1723 c = ARITH_OR;
1724 goto gen_arith;
1725 OP_32_64(xor):
1726 c = ARITH_XOR;
1727 goto gen_arith;
1728 gen_arith:
1729 if (const_args[2]) {
1730 tgen_arithi(s, c + rexw, args[0], args[2], 0);
1731 } else {
1732 tgen_arithr(s, c + rexw, args[0], args[2]);
1733 }
1734 break;
1735
1736 OP_32_64(mul):
1737 if (const_args[2]) {
1738 int32_t val;
1739 val = args[2];
1740 if (val == (int8_t)val) {
1741 tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]);
1742 tcg_out8(s, val);
1743 } else {
1744 tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]);
1745 tcg_out32(s, val);
1746 }
1747 } else {
1748 tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]);
1749 }
1750 break;
1751
1752 OP_32_64(div2):
1753 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
1754 break;
1755 OP_32_64(divu2):
1756 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
1757 break;
1758
1759 OP_32_64(shl):
1760 c = SHIFT_SHL;
1761 goto gen_shift;
1762 OP_32_64(shr):
1763 c = SHIFT_SHR;
1764 goto gen_shift;
1765 OP_32_64(sar):
1766 c = SHIFT_SAR;
1767 goto gen_shift;
1768 OP_32_64(rotl):
1769 c = SHIFT_ROL;
1770 goto gen_shift;
1771 OP_32_64(rotr):
1772 c = SHIFT_ROR;
1773 goto gen_shift;
1774 gen_shift:
1775 if (const_args[2]) {
1776 tcg_out_shifti(s, c + rexw, args[0], args[2]);
1777 } else {
1778 tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]);
1779 }
1780 break;
1781
1782 case INDEX_op_brcond_i32:
1783 tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
1784 args[3], 0);
1785 break;
1786 case INDEX_op_setcond_i32:
1787 tcg_out_setcond32(s, args[3], args[0], args[1],
1788 args[2], const_args[2]);
1789 break;
1790 case INDEX_op_movcond_i32:
1791 tcg_out_movcond32(s, args[5], args[0], args[1],
1792 args[2], const_args[2], args[3]);
1793 break;
1794
1795 OP_32_64(bswap16):
1796 tcg_out_rolw_8(s, args[0]);
1797 break;
1798 OP_32_64(bswap32):
1799 tcg_out_bswap32(s, args[0]);
1800 break;
1801
1802 OP_32_64(neg):
1803 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]);
1804 break;
1805 OP_32_64(not):
1806 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]);
1807 break;
1808
1809 OP_32_64(ext8s):
1810 tcg_out_ext8s(s, args[0], args[1], rexw);
1811 break;
1812 OP_32_64(ext16s):
1813 tcg_out_ext16s(s, args[0], args[1], rexw);
1814 break;
1815 OP_32_64(ext8u):
1816 tcg_out_ext8u(s, args[0], args[1]);
1817 break;
1818 OP_32_64(ext16u):
1819 tcg_out_ext16u(s, args[0], args[1]);
1820 break;
1821
1822 case INDEX_op_qemu_ld_i32:
1823 tcg_out_qemu_ld(s, args, 0);
1824 break;
1825 case INDEX_op_qemu_ld_i64:
1826 tcg_out_qemu_ld(s, args, 1);
1827 break;
1828 case INDEX_op_qemu_st_i32:
1829 tcg_out_qemu_st(s, args, 0);
1830 break;
1831 case INDEX_op_qemu_st_i64:
1832 tcg_out_qemu_st(s, args, 1);
1833 break;
1834
1835 OP_32_64(mulu2):
1836 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]);
1837 break;
1838 OP_32_64(muls2):
1839 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]);
1840 break;
1841 OP_32_64(add2):
1842 if (const_args[4]) {
1843 tgen_arithi(s, ARITH_ADD + rexw, args[0], args[4], 1);
1844 } else {
1845 tgen_arithr(s, ARITH_ADD + rexw, args[0], args[4]);
1846 }
1847 if (const_args[5]) {
1848 tgen_arithi(s, ARITH_ADC + rexw, args[1], args[5], 1);
1849 } else {
1850 tgen_arithr(s, ARITH_ADC + rexw, args[1], args[5]);
1851 }
1852 break;
1853 OP_32_64(sub2):
1854 if (const_args[4]) {
1855 tgen_arithi(s, ARITH_SUB + rexw, args[0], args[4], 1);
1856 } else {
1857 tgen_arithr(s, ARITH_SUB + rexw, args[0], args[4]);
1858 }
1859 if (const_args[5]) {
1860 tgen_arithi(s, ARITH_SBB + rexw, args[1], args[5], 1);
1861 } else {
1862 tgen_arithr(s, ARITH_SBB + rexw, args[1], args[5]);
1863 }
1864 break;
1865
1866 #if TCG_TARGET_REG_BITS == 32
1867 case INDEX_op_brcond2_i32:
1868 tcg_out_brcond2(s, args, const_args, 0);
1869 break;
1870 case INDEX_op_setcond2_i32:
1871 tcg_out_setcond2(s, args, const_args);
1872 break;
1873 #else /* TCG_TARGET_REG_BITS == 64 */
1874 case INDEX_op_movi_i64:
1875 tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
1876 break;
1877 case INDEX_op_ld32s_i64:
1878 tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]);
1879 break;
1880 case INDEX_op_ld_i64:
1881 tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1882 break;
1883 case INDEX_op_st_i64:
1884 if (const_args[0]) {
1885 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW,
1886 0, args[1], args[2]);
1887 tcg_out32(s, args[0]);
1888 } else {
1889 tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1890 }
1891 break;
1892
1893 case INDEX_op_brcond_i64:
1894 tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1],
1895 args[3], 0);
1896 break;
1897 case INDEX_op_setcond_i64:
1898 tcg_out_setcond64(s, args[3], args[0], args[1],
1899 args[2], const_args[2]);
1900 break;
1901 case INDEX_op_movcond_i64:
1902 tcg_out_movcond64(s, args[5], args[0], args[1],
1903 args[2], const_args[2], args[3]);
1904 break;
1905
1906 case INDEX_op_bswap64_i64:
1907 tcg_out_bswap64(s, args[0]);
1908 break;
1909 case INDEX_op_ext32u_i64:
1910 tcg_out_ext32u(s, args[0], args[1]);
1911 break;
1912 case INDEX_op_ext32s_i64:
1913 tcg_out_ext32s(s, args[0], args[1]);
1914 break;
1915 #endif
1916
1917 OP_32_64(deposit):
1918 if (args[3] == 0 && args[4] == 8) {
1919 /* load bits 0..7 */
1920 tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM,
1921 args[2], args[0]);
1922 } else if (args[3] == 8 && args[4] == 8) {
1923 /* load bits 8..15 */
1924 tcg_out_modrm(s, OPC_MOVB_EvGv, args[2], args[0] + 4);
1925 } else if (args[3] == 0 && args[4] == 16) {
1926 /* load bits 0..15 */
1927 tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, args[2], args[0]);
1928 } else {
1929 tcg_abort();
1930 }
1931 break;
1932
1933 default:
1934 tcg_abort();
1935 }
1936
1937 #undef OP_32_64
1938 }
1939
1940 static const TCGTargetOpDef x86_op_defs[] = {
1941 { INDEX_op_exit_tb, { } },
1942 { INDEX_op_goto_tb, { } },
1943 { INDEX_op_call, { "ri" } },
1944 { INDEX_op_br, { } },
1945 { INDEX_op_mov_i32, { "r", "r" } },
1946 { INDEX_op_movi_i32, { "r" } },
1947 { INDEX_op_ld8u_i32, { "r", "r" } },
1948 { INDEX_op_ld8s_i32, { "r", "r" } },
1949 { INDEX_op_ld16u_i32, { "r", "r" } },
1950 { INDEX_op_ld16s_i32, { "r", "r" } },
1951 { INDEX_op_ld_i32, { "r", "r" } },
1952 { INDEX_op_st8_i32, { "qi", "r" } },
1953 { INDEX_op_st16_i32, { "ri", "r" } },
1954 { INDEX_op_st_i32, { "ri", "r" } },
1955
1956 { INDEX_op_add_i32, { "r", "r", "ri" } },
1957 { INDEX_op_sub_i32, { "r", "0", "ri" } },
1958 { INDEX_op_mul_i32, { "r", "0", "ri" } },
1959 { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } },
1960 { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } },
1961 { INDEX_op_and_i32, { "r", "0", "ri" } },
1962 { INDEX_op_or_i32, { "r", "0", "ri" } },
1963 { INDEX_op_xor_i32, { "r", "0", "ri" } },
1964
1965 { INDEX_op_shl_i32, { "r", "0", "ci" } },
1966 { INDEX_op_shr_i32, { "r", "0", "ci" } },
1967 { INDEX_op_sar_i32, { "r", "0", "ci" } },
1968 { INDEX_op_rotl_i32, { "r", "0", "ci" } },
1969 { INDEX_op_rotr_i32, { "r", "0", "ci" } },
1970
1971 { INDEX_op_brcond_i32, { "r", "ri" } },
1972
1973 { INDEX_op_bswap16_i32, { "r", "0" } },
1974 { INDEX_op_bswap32_i32, { "r", "0" } },
1975
1976 { INDEX_op_neg_i32, { "r", "0" } },
1977
1978 { INDEX_op_not_i32, { "r", "0" } },
1979
1980 { INDEX_op_ext8s_i32, { "r", "q" } },
1981 { INDEX_op_ext16s_i32, { "r", "r" } },
1982 { INDEX_op_ext8u_i32, { "r", "q" } },
1983 { INDEX_op_ext16u_i32, { "r", "r" } },
1984
1985 { INDEX_op_setcond_i32, { "q", "r", "ri" } },
1986
1987 { INDEX_op_deposit_i32, { "Q", "0", "Q" } },
1988 #if TCG_TARGET_HAS_movcond_i32
1989 { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } },
1990 #endif
1991
1992 { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
1993 { INDEX_op_muls2_i32, { "a", "d", "a", "r" } },
1994 { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
1995 { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
1996
1997 #if TCG_TARGET_REG_BITS == 32
1998 { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
1999 { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
2000 #else
2001 { INDEX_op_mov_i64, { "r", "r" } },
2002 { INDEX_op_movi_i64, { "r" } },
2003 { INDEX_op_ld8u_i64, { "r", "r" } },
2004 { INDEX_op_ld8s_i64, { "r", "r" } },
2005 { INDEX_op_ld16u_i64, { "r", "r" } },
2006 { INDEX_op_ld16s_i64, { "r", "r" } },
2007 { INDEX_op_ld32u_i64, { "r", "r" } },
2008 { INDEX_op_ld32s_i64, { "r", "r" } },
2009 { INDEX_op_ld_i64, { "r", "r" } },
2010 { INDEX_op_st8_i64, { "ri", "r" } },
2011 { INDEX_op_st16_i64, { "ri", "r" } },
2012 { INDEX_op_st32_i64, { "ri", "r" } },
2013 { INDEX_op_st_i64, { "re", "r" } },
2014
2015 { INDEX_op_add_i64, { "r", "r", "re" } },
2016 { INDEX_op_mul_i64, { "r", "0", "re" } },
2017 { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },
2018 { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },
2019 { INDEX_op_sub_i64, { "r", "0", "re" } },
2020 { INDEX_op_and_i64, { "r", "0", "reZ" } },
2021 { INDEX_op_or_i64, { "r", "0", "re" } },
2022 { INDEX_op_xor_i64, { "r", "0", "re" } },
2023
2024 { INDEX_op_shl_i64, { "r", "0", "ci" } },
2025 { INDEX_op_shr_i64, { "r", "0", "ci" } },
2026 { INDEX_op_sar_i64, { "r", "0", "ci" } },
2027 { INDEX_op_rotl_i64, { "r", "0", "ci" } },
2028 { INDEX_op_rotr_i64, { "r", "0", "ci" } },
2029
2030 { INDEX_op_brcond_i64, { "r", "re" } },
2031 { INDEX_op_setcond_i64, { "r", "r", "re" } },
2032
2033 { INDEX_op_bswap16_i64, { "r", "0" } },
2034 { INDEX_op_bswap32_i64, { "r", "0" } },
2035 { INDEX_op_bswap64_i64, { "r", "0" } },
2036 { INDEX_op_neg_i64, { "r", "0" } },
2037 { INDEX_op_not_i64, { "r", "0" } },
2038
2039 { INDEX_op_ext8s_i64, { "r", "r" } },
2040 { INDEX_op_ext16s_i64, { "r", "r" } },
2041 { INDEX_op_ext32s_i64, { "r", "r" } },
2042 { INDEX_op_ext8u_i64, { "r", "r" } },
2043 { INDEX_op_ext16u_i64, { "r", "r" } },
2044 { INDEX_op_ext32u_i64, { "r", "r" } },
2045
2046 { INDEX_op_deposit_i64, { "Q", "0", "Q" } },
2047 { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } },
2048
2049 { INDEX_op_mulu2_i64, { "a", "d", "a", "r" } },
2050 { INDEX_op_muls2_i64, { "a", "d", "a", "r" } },
2051 { INDEX_op_add2_i64, { "r", "r", "0", "1", "re", "re" } },
2052 { INDEX_op_sub2_i64, { "r", "r", "0", "1", "re", "re" } },
2053 #endif
2054
2055 #if TCG_TARGET_REG_BITS == 64
2056 { INDEX_op_qemu_ld_i32, { "r", "L" } },
2057 { INDEX_op_qemu_st_i32, { "L", "L" } },
2058 { INDEX_op_qemu_ld_i64, { "r", "L" } },
2059 { INDEX_op_qemu_st_i64, { "L", "L" } },
2060 #elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
2061 { INDEX_op_qemu_ld_i32, { "r", "L" } },
2062 { INDEX_op_qemu_st_i32, { "L", "L" } },
2063 { INDEX_op_qemu_ld_i64, { "r", "r", "L" } },
2064 { INDEX_op_qemu_st_i64, { "L", "L", "L" } },
2065 #else
2066 { INDEX_op_qemu_ld_i32, { "r", "L", "L" } },
2067 { INDEX_op_qemu_st_i32, { "L", "L", "L" } },
2068 { INDEX_op_qemu_ld_i64, { "r", "r", "L", "L" } },
2069 { INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } },
2070 #endif
2071 { -1 },
2072 };
2073
2074 static int tcg_target_callee_save_regs[] = {
2075 #if TCG_TARGET_REG_BITS == 64
2076 TCG_REG_RBP,
2077 TCG_REG_RBX,
2078 #if defined(_WIN64)
2079 TCG_REG_RDI,
2080 TCG_REG_RSI,
2081 #endif
2082 TCG_REG_R12,
2083 TCG_REG_R13,
2084 TCG_REG_R14, /* Currently used for the global env. */
2085 TCG_REG_R15,
2086 #else
2087 TCG_REG_EBP, /* Currently used for the global env. */
2088 TCG_REG_EBX,
2089 TCG_REG_ESI,
2090 TCG_REG_EDI,
2091 #endif
2092 };
2093
2094 /* Compute frame size via macros, to share between tcg_target_qemu_prologue
2095 and tcg_register_jit. */
2096
2097 #define PUSH_SIZE \
2098 ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \
2099 * (TCG_TARGET_REG_BITS / 8))
2100
2101 #define FRAME_SIZE \
2102 ((PUSH_SIZE \
2103 + TCG_STATIC_CALL_ARGS_SIZE \
2104 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2105 + TCG_TARGET_STACK_ALIGN - 1) \
2106 & ~(TCG_TARGET_STACK_ALIGN - 1))
2107
2108 /* Generate global QEMU prologue and epilogue code */
2109 static void tcg_target_qemu_prologue(TCGContext *s)
2110 {
2111 int i, stack_addend;
2112
2113 /* TB prologue */
2114
2115 /* Reserve some stack space, also for TCG temps. */
2116 stack_addend = FRAME_SIZE - PUSH_SIZE;
2117 tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
2118 CPU_TEMP_BUF_NLONGS * sizeof(long));
2119
2120 /* Save all callee saved registers. */
2121 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
2122 tcg_out_push(s, tcg_target_callee_save_regs[i]);
2123 }
2124
2125 #if TCG_TARGET_REG_BITS == 32
2126 tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
2127 (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
2128 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2129 /* jmp *tb. */
2130 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP,
2131 (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4
2132 + stack_addend);
2133 #else
2134 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2135 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2136 /* jmp *tb. */
2137 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
2138 #endif
2139
2140 /* TB epilogue */
2141 tb_ret_addr = s->code_ptr;
2142
2143 tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend);
2144
2145 for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
2146 tcg_out_pop(s, tcg_target_callee_save_regs[i]);
2147 }
2148 tcg_out_opc(s, OPC_RET, 0, 0, 0);
2149
2150 #if !defined(CONFIG_SOFTMMU)
2151 /* Try to set up a segment register to point to GUEST_BASE. */
2152 if (GUEST_BASE) {
2153 setup_guest_base_seg();
2154 }
2155 #endif
2156 }
2157
2158 static void tcg_target_init(TCGContext *s)
2159 {
2160 /* For 32-bit, 99% certainty that we're running on hardware that supports
2161 cmov, but we still need to check. In case cmov is not available, we'll
2162 use a small forward branch. */
2163 #ifndef have_cmov
2164 {
2165 unsigned a, b, c, d;
2166 have_cmov = (__get_cpuid(1, &a, &b, &c, &d) && (d & bit_CMOV));
2167 }
2168 #endif
2169
2170 if (TCG_TARGET_REG_BITS == 64) {
2171 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
2172 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
2173 } else {
2174 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
2175 }
2176
2177 tcg_regset_clear(tcg_target_call_clobber_regs);
2178 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
2179 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
2180 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
2181 if (TCG_TARGET_REG_BITS == 64) {
2182 #if !defined(_WIN64)
2183 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
2184 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
2185 #endif
2186 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
2187 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
2188 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
2189 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
2190 }
2191
2192 tcg_regset_clear(s->reserved_regs);
2193 tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
2194
2195 tcg_add_target_add_op_defs(x86_op_defs);
2196 }
2197
2198 typedef struct {
2199 DebugFrameCIE cie;
2200 DebugFrameFDEHeader fde;
2201 uint8_t fde_def_cfa[4];
2202 uint8_t fde_reg_ofs[14];
2203 } DebugFrame;
2204
2205 /* We're expecting a 2 byte uleb128 encoded value. */
2206 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2207
2208 #if !defined(__ELF__)
2209 /* Host machine without ELF. */
2210 #elif TCG_TARGET_REG_BITS == 64
2211 #define ELF_HOST_MACHINE EM_X86_64
2212 static DebugFrame debug_frame = {
2213 .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2214 .cie.id = -1,
2215 .cie.version = 1,
2216 .cie.code_align = 1,
2217 .cie.data_align = 0x78, /* sleb128 -8 */
2218 .cie.return_column = 16,
2219
2220 /* Total FDE size does not include the "len" member. */
2221 .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
2222
2223 .fde_def_cfa = {
2224 12, 7, /* DW_CFA_def_cfa %rsp, ... */
2225 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2226 (FRAME_SIZE >> 7)
2227 },
2228 .fde_reg_ofs = {
2229 0x90, 1, /* DW_CFA_offset, %rip, -8 */
2230 /* The following ordering must match tcg_target_callee_save_regs. */
2231 0x86, 2, /* DW_CFA_offset, %rbp, -16 */
2232 0x83, 3, /* DW_CFA_offset, %rbx, -24 */
2233 0x8c, 4, /* DW_CFA_offset, %r12, -32 */
2234 0x8d, 5, /* DW_CFA_offset, %r13, -40 */
2235 0x8e, 6, /* DW_CFA_offset, %r14, -48 */
2236 0x8f, 7, /* DW_CFA_offset, %r15, -56 */
2237 }
2238 };
2239 #else
2240 #define ELF_HOST_MACHINE EM_386
2241 static DebugFrame debug_frame = {
2242 .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2243 .cie.id = -1,
2244 .cie.version = 1,
2245 .cie.code_align = 1,
2246 .cie.data_align = 0x7c, /* sleb128 -4 */
2247 .cie.return_column = 8,
2248
2249 /* Total FDE size does not include the "len" member. */
2250 .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
2251
2252 .fde_def_cfa = {
2253 12, 4, /* DW_CFA_def_cfa %esp, ... */
2254 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2255 (FRAME_SIZE >> 7)
2256 },
2257 .fde_reg_ofs = {
2258 0x88, 1, /* DW_CFA_offset, %eip, -4 */
2259 /* The following ordering must match tcg_target_callee_save_regs. */
2260 0x85, 2, /* DW_CFA_offset, %ebp, -8 */
2261 0x83, 3, /* DW_CFA_offset, %ebx, -12 */
2262 0x86, 4, /* DW_CFA_offset, %esi, -16 */
2263 0x87, 5, /* DW_CFA_offset, %edi, -20 */
2264 }
2265 };
2266 #endif
2267
2268 #if defined(ELF_HOST_MACHINE)
2269 void tcg_register_jit(void *buf, size_t buf_size)
2270 {
2271 debug_frame.fde.func_start = (uintptr_t)buf;
2272 debug_frame.fde.func_len = buf_size;
2273
2274 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2275 }
2276 #endif