]> git.proxmox.com Git - qemu.git/blob - tcg/i386/tcg-target.c
Merge remote-tracking branch 'jliu/or32' into staging
[qemu.git] / tcg / i386 / tcg-target.c
1 /*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 #include "tcg-be-ldst.h"
26
27 #ifndef NDEBUG
28 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
29 #if TCG_TARGET_REG_BITS == 64
30 "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
31 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
32 #else
33 "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
34 #endif
35 };
36 #endif
37
38 static const int tcg_target_reg_alloc_order[] = {
39 #if TCG_TARGET_REG_BITS == 64
40 TCG_REG_RBP,
41 TCG_REG_RBX,
42 TCG_REG_R12,
43 TCG_REG_R13,
44 TCG_REG_R14,
45 TCG_REG_R15,
46 TCG_REG_R10,
47 TCG_REG_R11,
48 TCG_REG_R9,
49 TCG_REG_R8,
50 TCG_REG_RCX,
51 TCG_REG_RDX,
52 TCG_REG_RSI,
53 TCG_REG_RDI,
54 TCG_REG_RAX,
55 #else
56 TCG_REG_EBX,
57 TCG_REG_ESI,
58 TCG_REG_EDI,
59 TCG_REG_EBP,
60 TCG_REG_ECX,
61 TCG_REG_EDX,
62 TCG_REG_EAX,
63 #endif
64 };
65
66 static const int tcg_target_call_iarg_regs[] = {
67 #if TCG_TARGET_REG_BITS == 64
68 #if defined(_WIN64)
69 TCG_REG_RCX,
70 TCG_REG_RDX,
71 #else
72 TCG_REG_RDI,
73 TCG_REG_RSI,
74 TCG_REG_RDX,
75 TCG_REG_RCX,
76 #endif
77 TCG_REG_R8,
78 TCG_REG_R9,
79 #else
80 /* 32 bit mode uses stack based calling convention (GCC default). */
81 #endif
82 };
83
84 static const int tcg_target_call_oarg_regs[] = {
85 TCG_REG_EAX,
86 #if TCG_TARGET_REG_BITS == 32
87 TCG_REG_EDX
88 #endif
89 };
90
91 /* Registers used with L constraint, which are the first argument
92 registers on x86_64, and two random call clobbered registers on
93 i386. */
94 #if TCG_TARGET_REG_BITS == 64
95 # define TCG_REG_L0 tcg_target_call_iarg_regs[0]
96 # define TCG_REG_L1 tcg_target_call_iarg_regs[1]
97 #else
98 # define TCG_REG_L0 TCG_REG_EAX
99 # define TCG_REG_L1 TCG_REG_EDX
100 #endif
101
102 /* For 32-bit, we are going to attempt to determine at runtime whether cmov
103 is available. However, the host compiler must supply <cpuid.h>, as we're
104 not going to go so far as our own inline assembly. */
105 #if TCG_TARGET_REG_BITS == 64
106 # define have_cmov 1
107 #elif defined(CONFIG_CPUID_H)
108 #include <cpuid.h>
109 static bool have_cmov;
110 #else
111 # define have_cmov 0
112 #endif
113
114 static uint8_t *tb_ret_addr;
115
116 static void patch_reloc(uint8_t *code_ptr, int type,
117 intptr_t value, intptr_t addend)
118 {
119 value += addend;
120 switch(type) {
121 case R_386_PC32:
122 value -= (uintptr_t)code_ptr;
123 if (value != (int32_t)value) {
124 tcg_abort();
125 }
126 *(uint32_t *)code_ptr = value;
127 break;
128 case R_386_PC8:
129 value -= (uintptr_t)code_ptr;
130 if (value != (int8_t)value) {
131 tcg_abort();
132 }
133 *(uint8_t *)code_ptr = value;
134 break;
135 default:
136 tcg_abort();
137 }
138 }
139
140 /* parse target specific constraints */
141 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
142 {
143 const char *ct_str;
144
145 ct_str = *pct_str;
146 switch(ct_str[0]) {
147 case 'a':
148 ct->ct |= TCG_CT_REG;
149 tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
150 break;
151 case 'b':
152 ct->ct |= TCG_CT_REG;
153 tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
154 break;
155 case 'c':
156 ct->ct |= TCG_CT_REG;
157 tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
158 break;
159 case 'd':
160 ct->ct |= TCG_CT_REG;
161 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
162 break;
163 case 'S':
164 ct->ct |= TCG_CT_REG;
165 tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
166 break;
167 case 'D':
168 ct->ct |= TCG_CT_REG;
169 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
170 break;
171 case 'q':
172 ct->ct |= TCG_CT_REG;
173 if (TCG_TARGET_REG_BITS == 64) {
174 tcg_regset_set32(ct->u.regs, 0, 0xffff);
175 } else {
176 tcg_regset_set32(ct->u.regs, 0, 0xf);
177 }
178 break;
179 case 'Q':
180 ct->ct |= TCG_CT_REG;
181 tcg_regset_set32(ct->u.regs, 0, 0xf);
182 break;
183 case 'r':
184 ct->ct |= TCG_CT_REG;
185 if (TCG_TARGET_REG_BITS == 64) {
186 tcg_regset_set32(ct->u.regs, 0, 0xffff);
187 } else {
188 tcg_regset_set32(ct->u.regs, 0, 0xff);
189 }
190 break;
191
192 /* qemu_ld/st address constraint */
193 case 'L':
194 ct->ct |= TCG_CT_REG;
195 if (TCG_TARGET_REG_BITS == 64) {
196 tcg_regset_set32(ct->u.regs, 0, 0xffff);
197 } else {
198 tcg_regset_set32(ct->u.regs, 0, 0xff);
199 }
200 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
201 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
202 break;
203
204 case 'e':
205 ct->ct |= TCG_CT_CONST_S32;
206 break;
207 case 'Z':
208 ct->ct |= TCG_CT_CONST_U32;
209 break;
210
211 default:
212 return -1;
213 }
214 ct_str++;
215 *pct_str = ct_str;
216 return 0;
217 }
218
219 /* test if a constant matches the constraint */
220 static inline int tcg_target_const_match(tcg_target_long val,
221 const TCGArgConstraint *arg_ct)
222 {
223 int ct = arg_ct->ct;
224 if (ct & TCG_CT_CONST) {
225 return 1;
226 }
227 if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
228 return 1;
229 }
230 if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
231 return 1;
232 }
233 return 0;
234 }
235
236 #if TCG_TARGET_REG_BITS == 64
237 # define LOWREGMASK(x) ((x) & 7)
238 #else
239 # define LOWREGMASK(x) (x)
240 #endif
241
242 #define P_EXT 0x100 /* 0x0f opcode prefix */
243 #define P_DATA16 0x200 /* 0x66 opcode prefix */
244 #if TCG_TARGET_REG_BITS == 64
245 # define P_ADDR32 0x400 /* 0x67 opcode prefix */
246 # define P_REXW 0x800 /* Set REX.W = 1 */
247 # define P_REXB_R 0x1000 /* REG field as byte register */
248 # define P_REXB_RM 0x2000 /* R/M field as byte register */
249 # define P_GS 0x4000 /* gs segment override */
250 #else
251 # define P_ADDR32 0
252 # define P_REXW 0
253 # define P_REXB_R 0
254 # define P_REXB_RM 0
255 # define P_GS 0
256 #endif
257
258 #define OPC_ARITH_EvIz (0x81)
259 #define OPC_ARITH_EvIb (0x83)
260 #define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
261 #define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
262 #define OPC_BSWAP (0xc8 | P_EXT)
263 #define OPC_CALL_Jz (0xe8)
264 #define OPC_CMOVCC (0x40 | P_EXT) /* ... plus condition code */
265 #define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
266 #define OPC_DEC_r32 (0x48)
267 #define OPC_IMUL_GvEv (0xaf | P_EXT)
268 #define OPC_IMUL_GvEvIb (0x6b)
269 #define OPC_IMUL_GvEvIz (0x69)
270 #define OPC_INC_r32 (0x40)
271 #define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */
272 #define OPC_JCC_short (0x70) /* ... plus condition code */
273 #define OPC_JMP_long (0xe9)
274 #define OPC_JMP_short (0xeb)
275 #define OPC_LEA (0x8d)
276 #define OPC_MOVB_EvGv (0x88) /* stores, more or less */
277 #define OPC_MOVL_EvGv (0x89) /* stores, more or less */
278 #define OPC_MOVL_GvEv (0x8b) /* loads, more or less */
279 #define OPC_MOVB_EvIz (0xc6)
280 #define OPC_MOVL_EvIz (0xc7)
281 #define OPC_MOVL_Iv (0xb8)
282 #define OPC_MOVSBL (0xbe | P_EXT)
283 #define OPC_MOVSWL (0xbf | P_EXT)
284 #define OPC_MOVSLQ (0x63 | P_REXW)
285 #define OPC_MOVZBL (0xb6 | P_EXT)
286 #define OPC_MOVZWL (0xb7 | P_EXT)
287 #define OPC_POP_r32 (0x58)
288 #define OPC_PUSH_r32 (0x50)
289 #define OPC_PUSH_Iv (0x68)
290 #define OPC_PUSH_Ib (0x6a)
291 #define OPC_RET (0xc3)
292 #define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
293 #define OPC_SHIFT_1 (0xd1)
294 #define OPC_SHIFT_Ib (0xc1)
295 #define OPC_SHIFT_cl (0xd3)
296 #define OPC_TESTL (0x85)
297 #define OPC_XCHG_ax_r32 (0x90)
298
299 #define OPC_GRP3_Ev (0xf7)
300 #define OPC_GRP5 (0xff)
301
302 /* Group 1 opcode extensions for 0x80-0x83.
303 These are also used as modifiers for OPC_ARITH. */
304 #define ARITH_ADD 0
305 #define ARITH_OR 1
306 #define ARITH_ADC 2
307 #define ARITH_SBB 3
308 #define ARITH_AND 4
309 #define ARITH_SUB 5
310 #define ARITH_XOR 6
311 #define ARITH_CMP 7
312
313 /* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */
314 #define SHIFT_ROL 0
315 #define SHIFT_ROR 1
316 #define SHIFT_SHL 4
317 #define SHIFT_SHR 5
318 #define SHIFT_SAR 7
319
320 /* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */
321 #define EXT3_NOT 2
322 #define EXT3_NEG 3
323 #define EXT3_MUL 4
324 #define EXT3_IMUL 5
325 #define EXT3_DIV 6
326 #define EXT3_IDIV 7
327
328 /* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */
329 #define EXT5_INC_Ev 0
330 #define EXT5_DEC_Ev 1
331 #define EXT5_CALLN_Ev 2
332 #define EXT5_JMPN_Ev 4
333
334 /* Condition codes to be added to OPC_JCC_{long,short}. */
335 #define JCC_JMP (-1)
336 #define JCC_JO 0x0
337 #define JCC_JNO 0x1
338 #define JCC_JB 0x2
339 #define JCC_JAE 0x3
340 #define JCC_JE 0x4
341 #define JCC_JNE 0x5
342 #define JCC_JBE 0x6
343 #define JCC_JA 0x7
344 #define JCC_JS 0x8
345 #define JCC_JNS 0x9
346 #define JCC_JP 0xa
347 #define JCC_JNP 0xb
348 #define JCC_JL 0xc
349 #define JCC_JGE 0xd
350 #define JCC_JLE 0xe
351 #define JCC_JG 0xf
352
353 static const uint8_t tcg_cond_to_jcc[] = {
354 [TCG_COND_EQ] = JCC_JE,
355 [TCG_COND_NE] = JCC_JNE,
356 [TCG_COND_LT] = JCC_JL,
357 [TCG_COND_GE] = JCC_JGE,
358 [TCG_COND_LE] = JCC_JLE,
359 [TCG_COND_GT] = JCC_JG,
360 [TCG_COND_LTU] = JCC_JB,
361 [TCG_COND_GEU] = JCC_JAE,
362 [TCG_COND_LEU] = JCC_JBE,
363 [TCG_COND_GTU] = JCC_JA,
364 };
365
366 #if TCG_TARGET_REG_BITS == 64
367 static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
368 {
369 int rex;
370
371 if (opc & P_GS) {
372 tcg_out8(s, 0x65);
373 }
374 if (opc & P_DATA16) {
375 /* We should never be asking for both 16 and 64-bit operation. */
376 assert((opc & P_REXW) == 0);
377 tcg_out8(s, 0x66);
378 }
379 if (opc & P_ADDR32) {
380 tcg_out8(s, 0x67);
381 }
382
383 rex = 0;
384 rex |= (opc & P_REXW) >> 8; /* REX.W */
385 rex |= (r & 8) >> 1; /* REX.R */
386 rex |= (x & 8) >> 2; /* REX.X */
387 rex |= (rm & 8) >> 3; /* REX.B */
388
389 /* P_REXB_{R,RM} indicates that the given register is the low byte.
390 For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
391 as otherwise the encoding indicates %[abcd]h. Note that the values
392 that are ORed in merely indicate that the REX byte must be present;
393 those bits get discarded in output. */
394 rex |= opc & (r >= 4 ? P_REXB_R : 0);
395 rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
396
397 if (rex) {
398 tcg_out8(s, (uint8_t)(rex | 0x40));
399 }
400
401 if (opc & P_EXT) {
402 tcg_out8(s, 0x0f);
403 }
404 tcg_out8(s, opc);
405 }
406 #else
407 static void tcg_out_opc(TCGContext *s, int opc)
408 {
409 if (opc & P_DATA16) {
410 tcg_out8(s, 0x66);
411 }
412 if (opc & P_EXT) {
413 tcg_out8(s, 0x0f);
414 }
415 tcg_out8(s, opc);
416 }
417 /* Discard the register arguments to tcg_out_opc early, so as not to penalize
418 the 32-bit compilation paths. This method works with all versions of gcc,
419 whereas relying on optimization may not be able to exclude them. */
420 #define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc)
421 #endif
422
423 static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
424 {
425 tcg_out_opc(s, opc, r, rm, 0);
426 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
427 }
428
429 /* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
430 We handle either RM and INDEX missing with a negative value. In 64-bit
431 mode for absolute addresses, ~RM is the size of the immediate operand
432 that will follow the instruction. */
433
434 static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
435 int index, int shift, intptr_t offset)
436 {
437 int mod, len;
438
439 if (index < 0 && rm < 0) {
440 if (TCG_TARGET_REG_BITS == 64) {
441 /* Try for a rip-relative addressing mode. This has replaced
442 the 32-bit-mode absolute addressing encoding. */
443 intptr_t pc = (intptr_t)s->code_ptr + 5 + ~rm;
444 intptr_t disp = offset - pc;
445 if (disp == (int32_t)disp) {
446 tcg_out_opc(s, opc, r, 0, 0);
447 tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
448 tcg_out32(s, disp);
449 return;
450 }
451
452 /* Try for an absolute address encoding. This requires the
453 use of the MODRM+SIB encoding and is therefore larger than
454 rip-relative addressing. */
455 if (offset == (int32_t)offset) {
456 tcg_out_opc(s, opc, r, 0, 0);
457 tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
458 tcg_out8(s, (4 << 3) | 5);
459 tcg_out32(s, offset);
460 return;
461 }
462
463 /* ??? The memory isn't directly addressable. */
464 tcg_abort();
465 } else {
466 /* Absolute address. */
467 tcg_out_opc(s, opc, r, 0, 0);
468 tcg_out8(s, (r << 3) | 5);
469 tcg_out32(s, offset);
470 return;
471 }
472 }
473
474 /* Find the length of the immediate addend. Note that the encoding
475 that would be used for (%ebp) indicates absolute addressing. */
476 if (rm < 0) {
477 mod = 0, len = 4, rm = 5;
478 } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
479 mod = 0, len = 0;
480 } else if (offset == (int8_t)offset) {
481 mod = 0x40, len = 1;
482 } else {
483 mod = 0x80, len = 4;
484 }
485
486 /* Use a single byte MODRM format if possible. Note that the encoding
487 that would be used for %esp is the escape to the two byte form. */
488 if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
489 /* Single byte MODRM format. */
490 tcg_out_opc(s, opc, r, rm, 0);
491 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
492 } else {
493 /* Two byte MODRM+SIB format. */
494
495 /* Note that the encoding that would place %esp into the index
496 field indicates no index register. In 64-bit mode, the REX.X
497 bit counts, so %r12 can be used as the index. */
498 if (index < 0) {
499 index = 4;
500 } else {
501 assert(index != TCG_REG_ESP);
502 }
503
504 tcg_out_opc(s, opc, r, rm, index);
505 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
506 tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
507 }
508
509 if (len == 1) {
510 tcg_out8(s, offset);
511 } else if (len == 4) {
512 tcg_out32(s, offset);
513 }
514 }
515
516 /* A simplification of the above with no index or shift. */
517 static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
518 int rm, intptr_t offset)
519 {
520 tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
521 }
522
523 /* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */
524 static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
525 {
526 /* Propagate an opcode prefix, such as P_REXW. */
527 int ext = subop & ~0x7;
528 subop &= 0x7;
529
530 tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
531 }
532
533 static inline void tcg_out_mov(TCGContext *s, TCGType type,
534 TCGReg ret, TCGReg arg)
535 {
536 if (arg != ret) {
537 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
538 tcg_out_modrm(s, opc, ret, arg);
539 }
540 }
541
542 static void tcg_out_movi(TCGContext *s, TCGType type,
543 TCGReg ret, tcg_target_long arg)
544 {
545 tcg_target_long diff;
546
547 if (arg == 0) {
548 tgen_arithr(s, ARITH_XOR, ret, ret);
549 return;
550 }
551 if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
552 tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
553 tcg_out32(s, arg);
554 return;
555 }
556 if (arg == (int32_t)arg) {
557 tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
558 tcg_out32(s, arg);
559 return;
560 }
561
562 /* Try a 7 byte pc-relative lea before the 10 byte movq. */
563 diff = arg - ((uintptr_t)s->code_ptr + 7);
564 if (diff == (int32_t)diff) {
565 tcg_out_opc(s, OPC_LEA | P_REXW, ret, 0, 0);
566 tcg_out8(s, (LOWREGMASK(ret) << 3) | 5);
567 tcg_out32(s, diff);
568 return;
569 }
570
571 tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
572 tcg_out64(s, arg);
573 }
574
575 static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
576 {
577 if (val == (int8_t)val) {
578 tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
579 tcg_out8(s, val);
580 } else if (val == (int32_t)val) {
581 tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
582 tcg_out32(s, val);
583 } else {
584 tcg_abort();
585 }
586 }
587
588 static inline void tcg_out_push(TCGContext *s, int reg)
589 {
590 tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
591 }
592
593 static inline void tcg_out_pop(TCGContext *s, int reg)
594 {
595 tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
596 }
597
598 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
599 TCGReg arg1, intptr_t arg2)
600 {
601 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
602 tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
603 }
604
605 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
606 TCGReg arg1, intptr_t arg2)
607 {
608 int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
609 tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
610 }
611
612 static inline void tcg_out_sti(TCGContext *s, TCGType type, TCGReg base,
613 tcg_target_long ofs, tcg_target_long val)
614 {
615 int opc = OPC_MOVL_EvIz + (type == TCG_TYPE_I64 ? P_REXW : 0);
616 tcg_out_modrm_offset(s, opc, 0, base, ofs);
617 tcg_out32(s, val);
618 }
619
620 static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
621 {
622 /* Propagate an opcode prefix, such as P_DATA16. */
623 int ext = subopc & ~0x7;
624 subopc &= 0x7;
625
626 if (count == 1) {
627 tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
628 } else {
629 tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
630 tcg_out8(s, count);
631 }
632 }
633
634 static inline void tcg_out_bswap32(TCGContext *s, int reg)
635 {
636 tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
637 }
638
639 static inline void tcg_out_rolw_8(TCGContext *s, int reg)
640 {
641 tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
642 }
643
644 static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
645 {
646 /* movzbl */
647 assert(src < 4 || TCG_TARGET_REG_BITS == 64);
648 tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
649 }
650
651 static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
652 {
653 /* movsbl */
654 assert(src < 4 || TCG_TARGET_REG_BITS == 64);
655 tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
656 }
657
658 static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
659 {
660 /* movzwl */
661 tcg_out_modrm(s, OPC_MOVZWL, dest, src);
662 }
663
664 static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
665 {
666 /* movsw[lq] */
667 tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
668 }
669
670 static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
671 {
672 /* 32-bit mov zero extends. */
673 tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
674 }
675
676 static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
677 {
678 tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
679 }
680
681 static inline void tcg_out_bswap64(TCGContext *s, int reg)
682 {
683 tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
684 }
685
686 static void tgen_arithi(TCGContext *s, int c, int r0,
687 tcg_target_long val, int cf)
688 {
689 int rexw = 0;
690
691 if (TCG_TARGET_REG_BITS == 64) {
692 rexw = c & -8;
693 c &= 7;
694 }
695
696 /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
697 partial flags update stalls on Pentium4 and are not recommended
698 by current Intel optimization manuals. */
699 if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
700 int is_inc = (c == ARITH_ADD) ^ (val < 0);
701 if (TCG_TARGET_REG_BITS == 64) {
702 /* The single-byte increment encodings are re-tasked as the
703 REX prefixes. Use the MODRM encoding. */
704 tcg_out_modrm(s, OPC_GRP5 + rexw,
705 (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
706 } else {
707 tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
708 }
709 return;
710 }
711
712 if (c == ARITH_AND) {
713 if (TCG_TARGET_REG_BITS == 64) {
714 if (val == 0xffffffffu) {
715 tcg_out_ext32u(s, r0, r0);
716 return;
717 }
718 if (val == (uint32_t)val) {
719 /* AND with no high bits set can use a 32-bit operation. */
720 rexw = 0;
721 }
722 }
723 if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
724 tcg_out_ext8u(s, r0, r0);
725 return;
726 }
727 if (val == 0xffffu) {
728 tcg_out_ext16u(s, r0, r0);
729 return;
730 }
731 }
732
733 if (val == (int8_t)val) {
734 tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
735 tcg_out8(s, val);
736 return;
737 }
738 if (rexw == 0 || val == (int32_t)val) {
739 tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
740 tcg_out32(s, val);
741 return;
742 }
743
744 tcg_abort();
745 }
746
747 static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
748 {
749 if (val != 0) {
750 tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
751 }
752 }
753
754 /* Use SMALL != 0 to force a short forward branch. */
755 static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
756 {
757 int32_t val, val1;
758 TCGLabel *l = &s->labels[label_index];
759
760 if (l->has_value) {
761 val = l->u.value - (intptr_t)s->code_ptr;
762 val1 = val - 2;
763 if ((int8_t)val1 == val1) {
764 if (opc == -1) {
765 tcg_out8(s, OPC_JMP_short);
766 } else {
767 tcg_out8(s, OPC_JCC_short + opc);
768 }
769 tcg_out8(s, val1);
770 } else {
771 if (small) {
772 tcg_abort();
773 }
774 if (opc == -1) {
775 tcg_out8(s, OPC_JMP_long);
776 tcg_out32(s, val - 5);
777 } else {
778 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
779 tcg_out32(s, val - 6);
780 }
781 }
782 } else if (small) {
783 if (opc == -1) {
784 tcg_out8(s, OPC_JMP_short);
785 } else {
786 tcg_out8(s, OPC_JCC_short + opc);
787 }
788 tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1);
789 s->code_ptr += 1;
790 } else {
791 if (opc == -1) {
792 tcg_out8(s, OPC_JMP_long);
793 } else {
794 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
795 }
796 tcg_out_reloc(s, s->code_ptr, R_386_PC32, label_index, -4);
797 s->code_ptr += 4;
798 }
799 }
800
801 static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
802 int const_arg2, int rexw)
803 {
804 if (const_arg2) {
805 if (arg2 == 0) {
806 /* test r, r */
807 tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
808 } else {
809 tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
810 }
811 } else {
812 tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
813 }
814 }
815
816 static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
817 TCGArg arg1, TCGArg arg2, int const_arg2,
818 int label_index, int small)
819 {
820 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
821 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
822 }
823
824 #if TCG_TARGET_REG_BITS == 64
825 static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
826 TCGArg arg1, TCGArg arg2, int const_arg2,
827 int label_index, int small)
828 {
829 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
830 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
831 }
832 #else
833 /* XXX: we implement it at the target level to avoid having to
834 handle cross basic blocks temporaries */
835 static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
836 const int *const_args, int small)
837 {
838 int label_next;
839 label_next = gen_new_label();
840 switch(args[4]) {
841 case TCG_COND_EQ:
842 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
843 label_next, 1);
844 tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
845 args[5], small);
846 break;
847 case TCG_COND_NE:
848 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
849 args[5], small);
850 tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
851 args[5], small);
852 break;
853 case TCG_COND_LT:
854 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
855 args[5], small);
856 tcg_out_jxx(s, JCC_JNE, label_next, 1);
857 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
858 args[5], small);
859 break;
860 case TCG_COND_LE:
861 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
862 args[5], small);
863 tcg_out_jxx(s, JCC_JNE, label_next, 1);
864 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
865 args[5], small);
866 break;
867 case TCG_COND_GT:
868 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
869 args[5], small);
870 tcg_out_jxx(s, JCC_JNE, label_next, 1);
871 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
872 args[5], small);
873 break;
874 case TCG_COND_GE:
875 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
876 args[5], small);
877 tcg_out_jxx(s, JCC_JNE, label_next, 1);
878 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
879 args[5], small);
880 break;
881 case TCG_COND_LTU:
882 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
883 args[5], small);
884 tcg_out_jxx(s, JCC_JNE, label_next, 1);
885 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
886 args[5], small);
887 break;
888 case TCG_COND_LEU:
889 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
890 args[5], small);
891 tcg_out_jxx(s, JCC_JNE, label_next, 1);
892 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
893 args[5], small);
894 break;
895 case TCG_COND_GTU:
896 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
897 args[5], small);
898 tcg_out_jxx(s, JCC_JNE, label_next, 1);
899 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
900 args[5], small);
901 break;
902 case TCG_COND_GEU:
903 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
904 args[5], small);
905 tcg_out_jxx(s, JCC_JNE, label_next, 1);
906 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
907 args[5], small);
908 break;
909 default:
910 tcg_abort();
911 }
912 tcg_out_label(s, label_next, s->code_ptr);
913 }
914 #endif
915
916 static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
917 TCGArg arg1, TCGArg arg2, int const_arg2)
918 {
919 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
920 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
921 tcg_out_ext8u(s, dest, dest);
922 }
923
924 #if TCG_TARGET_REG_BITS == 64
925 static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
926 TCGArg arg1, TCGArg arg2, int const_arg2)
927 {
928 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
929 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
930 tcg_out_ext8u(s, dest, dest);
931 }
932 #else
933 static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
934 const int *const_args)
935 {
936 TCGArg new_args[6];
937 int label_true, label_over;
938
939 memcpy(new_args, args+1, 5*sizeof(TCGArg));
940
941 if (args[0] == args[1] || args[0] == args[2]
942 || (!const_args[3] && args[0] == args[3])
943 || (!const_args[4] && args[0] == args[4])) {
944 /* When the destination overlaps with one of the argument
945 registers, don't do anything tricky. */
946 label_true = gen_new_label();
947 label_over = gen_new_label();
948
949 new_args[5] = label_true;
950 tcg_out_brcond2(s, new_args, const_args+1, 1);
951
952 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
953 tcg_out_jxx(s, JCC_JMP, label_over, 1);
954 tcg_out_label(s, label_true, s->code_ptr);
955
956 tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
957 tcg_out_label(s, label_over, s->code_ptr);
958 } else {
959 /* When the destination does not overlap one of the arguments,
960 clear the destination first, jump if cond false, and emit an
961 increment in the true case. This results in smaller code. */
962
963 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
964
965 label_over = gen_new_label();
966 new_args[4] = tcg_invert_cond(new_args[4]);
967 new_args[5] = label_over;
968 tcg_out_brcond2(s, new_args, const_args+1, 1);
969
970 tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
971 tcg_out_label(s, label_over, s->code_ptr);
972 }
973 }
974 #endif
975
976 static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest,
977 TCGArg c1, TCGArg c2, int const_c2,
978 TCGArg v1)
979 {
980 tcg_out_cmp(s, c1, c2, const_c2, 0);
981 if (have_cmov) {
982 tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1);
983 } else {
984 int over = gen_new_label();
985 tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1);
986 tcg_out_mov(s, TCG_TYPE_I32, dest, v1);
987 tcg_out_label(s, over, s->code_ptr);
988 }
989 }
990
991 #if TCG_TARGET_REG_BITS == 64
992 static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGArg dest,
993 TCGArg c1, TCGArg c2, int const_c2,
994 TCGArg v1)
995 {
996 tcg_out_cmp(s, c1, c2, const_c2, P_REXW);
997 tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | P_REXW, dest, v1);
998 }
999 #endif
1000
1001 static void tcg_out_branch(TCGContext *s, int call, uintptr_t dest)
1002 {
1003 intptr_t disp = dest - (intptr_t)s->code_ptr - 5;
1004
1005 if (disp == (int32_t)disp) {
1006 tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
1007 tcg_out32(s, disp);
1008 } else {
1009 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, dest);
1010 tcg_out_modrm(s, OPC_GRP5,
1011 call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10);
1012 }
1013 }
1014
1015 static inline void tcg_out_calli(TCGContext *s, uintptr_t dest)
1016 {
1017 tcg_out_branch(s, 1, dest);
1018 }
1019
1020 static void tcg_out_jmp(TCGContext *s, uintptr_t dest)
1021 {
1022 tcg_out_branch(s, 0, dest);
1023 }
1024
1025 #if defined(CONFIG_SOFTMMU)
1026 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1027 * int mmu_idx, uintptr_t ra)
1028 */
1029 static const void * const qemu_ld_helpers[4] = {
1030 helper_ret_ldub_mmu,
1031 helper_ret_lduw_mmu,
1032 helper_ret_ldul_mmu,
1033 helper_ret_ldq_mmu,
1034 };
1035
1036 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1037 * uintxx_t val, int mmu_idx, uintptr_t ra)
1038 */
1039 static const void * const qemu_st_helpers[4] = {
1040 helper_ret_stb_mmu,
1041 helper_ret_stw_mmu,
1042 helper_ret_stl_mmu,
1043 helper_ret_stq_mmu,
1044 };
1045
1046 static void add_qemu_ldst_label(TCGContext *s,
1047 int is_ld,
1048 int opc,
1049 int data_reg,
1050 int data_reg2,
1051 int addrlo_reg,
1052 int addrhi_reg,
1053 int mem_index,
1054 uint8_t *raddr,
1055 uint8_t **label_ptr);
1056
1057 /* Perform the TLB load and compare.
1058
1059 Inputs:
1060 ADDRLO_IDX contains the index into ARGS of the low part of the
1061 address; the high part of the address is at ADDR_LOW_IDX+1.
1062
1063 MEM_INDEX and S_BITS are the memory context and log2 size of the load.
1064
1065 WHICH is the offset into the CPUTLBEntry structure of the slot to read.
1066 This should be offsetof addr_read or addr_write.
1067
1068 Outputs:
1069 LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
1070 positions of the displacements of forward jumps to the TLB miss case.
1071
1072 Second argument register is loaded with the low part of the address.
1073 In the TLB hit case, it has been adjusted as indicated by the TLB
1074 and so is a host address. In the TLB miss case, it continues to
1075 hold a guest address.
1076
1077 First argument register is clobbered. */
1078
1079 static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
1080 int mem_index, int s_bits,
1081 const TCGArg *args,
1082 uint8_t **label_ptr, int which)
1083 {
1084 const int addrlo = args[addrlo_idx];
1085 const int r0 = TCG_REG_L0;
1086 const int r1 = TCG_REG_L1;
1087 TCGType ttype = TCG_TYPE_I32;
1088 TCGType htype = TCG_TYPE_I32;
1089 int trexw = 0, hrexw = 0;
1090
1091 if (TCG_TARGET_REG_BITS == 64) {
1092 if (TARGET_LONG_BITS == 64) {
1093 ttype = TCG_TYPE_I64;
1094 trexw = P_REXW;
1095 }
1096 if (TCG_TYPE_PTR == TCG_TYPE_I64) {
1097 htype = TCG_TYPE_I64;
1098 hrexw = P_REXW;
1099 }
1100 }
1101
1102 tcg_out_mov(s, htype, r0, addrlo);
1103 tcg_out_mov(s, ttype, r1, addrlo);
1104
1105 tcg_out_shifti(s, SHIFT_SHR + hrexw, r0,
1106 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1107
1108 tgen_arithi(s, ARITH_AND + trexw, r1,
1109 TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
1110 tgen_arithi(s, ARITH_AND + hrexw, r0,
1111 (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
1112
1113 tcg_out_modrm_sib_offset(s, OPC_LEA + hrexw, r0, TCG_AREG0, r0, 0,
1114 offsetof(CPUArchState, tlb_table[mem_index][0])
1115 + which);
1116
1117 /* cmp 0(r0), r1 */
1118 tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, 0);
1119
1120 /* Prepare for both the fast path add of the tlb addend, and the slow
1121 path function argument setup. There are two cases worth note:
1122 For 32-bit guest and x86_64 host, MOVL zero-extends the guest address
1123 before the fastpath ADDQ below. For 64-bit guest and x32 host, MOVQ
1124 copies the entire guest address for the slow path, while truncation
1125 for the 32-bit host happens with the fastpath ADDL below. */
1126 tcg_out_mov(s, ttype, r1, addrlo);
1127
1128 /* jne slow_path */
1129 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
1130 label_ptr[0] = s->code_ptr;
1131 s->code_ptr += 4;
1132
1133 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1134 /* cmp 4(r0), addrhi */
1135 tcg_out_modrm_offset(s, OPC_CMP_GvEv, args[addrlo_idx+1], r0, 4);
1136
1137 /* jne slow_path */
1138 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
1139 label_ptr[1] = s->code_ptr;
1140 s->code_ptr += 4;
1141 }
1142
1143 /* TLB Hit. */
1144
1145 /* add addend(r0), r1 */
1146 tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r1, r0,
1147 offsetof(CPUTLBEntry, addend) - which);
1148 }
1149 #elif defined(__x86_64__) && defined(__linux__)
1150 # include <asm/prctl.h>
1151 # include <sys/prctl.h>
1152
1153 int arch_prctl(int code, unsigned long addr);
1154
1155 static int guest_base_flags;
1156 static inline void setup_guest_base_seg(void)
1157 {
1158 if (arch_prctl(ARCH_SET_GS, GUEST_BASE) == 0) {
1159 guest_base_flags = P_GS;
1160 }
1161 }
1162 #else
1163 # define guest_base_flags 0
1164 static inline void setup_guest_base_seg(void) { }
1165 #endif /* SOFTMMU */
1166
1167 static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
1168 int base, intptr_t ofs, int seg, int sizeop)
1169 {
1170 #ifdef TARGET_WORDS_BIGENDIAN
1171 const int bswap = 1;
1172 #else
1173 const int bswap = 0;
1174 #endif
1175 switch (sizeop) {
1176 case 0:
1177 tcg_out_modrm_offset(s, OPC_MOVZBL + seg, datalo, base, ofs);
1178 break;
1179 case 0 | 4:
1180 tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW + seg, datalo, base, ofs);
1181 break;
1182 case 1:
1183 tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
1184 if (bswap) {
1185 tcg_out_rolw_8(s, datalo);
1186 }
1187 break;
1188 case 1 | 4:
1189 if (bswap) {
1190 tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
1191 tcg_out_rolw_8(s, datalo);
1192 tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
1193 } else {
1194 tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW + seg,
1195 datalo, base, ofs);
1196 }
1197 break;
1198 case 2:
1199 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs);
1200 if (bswap) {
1201 tcg_out_bswap32(s, datalo);
1202 }
1203 break;
1204 #if TCG_TARGET_REG_BITS == 64
1205 case 2 | 4:
1206 if (bswap) {
1207 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs);
1208 tcg_out_bswap32(s, datalo);
1209 tcg_out_ext32s(s, datalo, datalo);
1210 } else {
1211 tcg_out_modrm_offset(s, OPC_MOVSLQ + seg, datalo, base, ofs);
1212 }
1213 break;
1214 #endif
1215 case 3:
1216 if (TCG_TARGET_REG_BITS == 64) {
1217 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + P_REXW + seg,
1218 datalo, base, ofs);
1219 if (bswap) {
1220 tcg_out_bswap64(s, datalo);
1221 }
1222 } else {
1223 if (bswap) {
1224 int t = datalo;
1225 datalo = datahi;
1226 datahi = t;
1227 }
1228 if (base != datalo) {
1229 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
1230 datalo, base, ofs);
1231 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
1232 datahi, base, ofs + 4);
1233 } else {
1234 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
1235 datahi, base, ofs + 4);
1236 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
1237 datalo, base, ofs);
1238 }
1239 if (bswap) {
1240 tcg_out_bswap32(s, datalo);
1241 tcg_out_bswap32(s, datahi);
1242 }
1243 }
1244 break;
1245 default:
1246 tcg_abort();
1247 }
1248 }
1249
1250 /* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
1251 EAX. It will be useful once fixed registers globals are less
1252 common. */
1253 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
1254 int opc)
1255 {
1256 int data_reg, data_reg2 = 0;
1257 int addrlo_idx;
1258 #if defined(CONFIG_SOFTMMU)
1259 int mem_index, s_bits;
1260 uint8_t *label_ptr[2];
1261 #endif
1262
1263 data_reg = args[0];
1264 addrlo_idx = 1;
1265 if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
1266 data_reg2 = args[1];
1267 addrlo_idx = 2;
1268 }
1269
1270 #if defined(CONFIG_SOFTMMU)
1271 mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
1272 s_bits = opc & 3;
1273
1274 tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
1275 label_ptr, offsetof(CPUTLBEntry, addr_read));
1276
1277 /* TLB Hit. */
1278 tcg_out_qemu_ld_direct(s, data_reg, data_reg2, TCG_REG_L1, 0, 0, opc);
1279
1280 /* Record the current context of a load into ldst label */
1281 add_qemu_ldst_label(s,
1282 1,
1283 opc,
1284 data_reg,
1285 data_reg2,
1286 args[addrlo_idx],
1287 args[addrlo_idx + 1],
1288 mem_index,
1289 s->code_ptr,
1290 label_ptr);
1291 #else
1292 {
1293 int32_t offset = GUEST_BASE;
1294 int base = args[addrlo_idx];
1295 int seg = 0;
1296
1297 /* ??? We assume all operations have left us with register contents
1298 that are zero extended. So far this appears to be true. If we
1299 want to enforce this, we can either do an explicit zero-extension
1300 here, or (if GUEST_BASE == 0, or a segment register is in use)
1301 use the ADDR32 prefix. For now, do nothing. */
1302 if (GUEST_BASE && guest_base_flags) {
1303 seg = guest_base_flags;
1304 offset = 0;
1305 } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
1306 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
1307 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
1308 base = TCG_REG_L1;
1309 offset = 0;
1310 }
1311
1312 tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, seg, opc);
1313 }
1314 #endif
1315 }
1316
1317 static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
1318 int base, intptr_t ofs, int seg,
1319 int sizeop)
1320 {
1321 #ifdef TARGET_WORDS_BIGENDIAN
1322 const int bswap = 1;
1323 #else
1324 const int bswap = 0;
1325 #endif
1326 /* ??? Ideally we wouldn't need a scratch register. For user-only,
1327 we could perform the bswap twice to restore the original value
1328 instead of moving to the scratch. But as it is, the L constraint
1329 means that TCG_REG_L0 is definitely free here. */
1330 const int scratch = TCG_REG_L0;
1331
1332 switch (sizeop) {
1333 case 0:
1334 tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
1335 datalo, base, ofs);
1336 break;
1337 case 1:
1338 if (bswap) {
1339 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1340 tcg_out_rolw_8(s, scratch);
1341 datalo = scratch;
1342 }
1343 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16 + seg,
1344 datalo, base, ofs);
1345 break;
1346 case 2:
1347 if (bswap) {
1348 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1349 tcg_out_bswap32(s, scratch);
1350 datalo = scratch;
1351 }
1352 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs);
1353 break;
1354 case 3:
1355 if (TCG_TARGET_REG_BITS == 64) {
1356 if (bswap) {
1357 tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
1358 tcg_out_bswap64(s, scratch);
1359 datalo = scratch;
1360 }
1361 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_REXW + seg,
1362 datalo, base, ofs);
1363 } else if (bswap) {
1364 tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
1365 tcg_out_bswap32(s, scratch);
1366 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs);
1367 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1368 tcg_out_bswap32(s, scratch);
1369 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4);
1370 } else {
1371 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs);
1372 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datahi, base, ofs+4);
1373 }
1374 break;
1375 default:
1376 tcg_abort();
1377 }
1378 }
1379
1380 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
1381 int opc)
1382 {
1383 int data_reg, data_reg2 = 0;
1384 int addrlo_idx;
1385 #if defined(CONFIG_SOFTMMU)
1386 int mem_index, s_bits;
1387 uint8_t *label_ptr[2];
1388 #endif
1389
1390 data_reg = args[0];
1391 addrlo_idx = 1;
1392 if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
1393 data_reg2 = args[1];
1394 addrlo_idx = 2;
1395 }
1396
1397 #if defined(CONFIG_SOFTMMU)
1398 mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
1399 s_bits = opc;
1400
1401 tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
1402 label_ptr, offsetof(CPUTLBEntry, addr_write));
1403
1404 /* TLB Hit. */
1405 tcg_out_qemu_st_direct(s, data_reg, data_reg2, TCG_REG_L1, 0, 0, opc);
1406
1407 /* Record the current context of a store into ldst label */
1408 add_qemu_ldst_label(s,
1409 0,
1410 opc,
1411 data_reg,
1412 data_reg2,
1413 args[addrlo_idx],
1414 args[addrlo_idx + 1],
1415 mem_index,
1416 s->code_ptr,
1417 label_ptr);
1418 #else
1419 {
1420 int32_t offset = GUEST_BASE;
1421 int base = args[addrlo_idx];
1422 int seg = 0;
1423
1424 /* ??? We assume all operations have left us with register contents
1425 that are zero extended. So far this appears to be true. If we
1426 want to enforce this, we can either do an explicit zero-extension
1427 here, or (if GUEST_BASE == 0, or a segment register is in use)
1428 use the ADDR32 prefix. For now, do nothing. */
1429 if (GUEST_BASE && guest_base_flags) {
1430 seg = guest_base_flags;
1431 offset = 0;
1432 } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
1433 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
1434 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
1435 base = TCG_REG_L1;
1436 offset = 0;
1437 }
1438
1439 tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, seg, opc);
1440 }
1441 #endif
1442 }
1443
1444 #if defined(CONFIG_SOFTMMU)
1445 /*
1446 * Record the context of a call to the out of line helper code for the slow path
1447 * for a load or store, so that we can later generate the correct helper code
1448 */
1449 static void add_qemu_ldst_label(TCGContext *s,
1450 int is_ld,
1451 int opc,
1452 int data_reg,
1453 int data_reg2,
1454 int addrlo_reg,
1455 int addrhi_reg,
1456 int mem_index,
1457 uint8_t *raddr,
1458 uint8_t **label_ptr)
1459 {
1460 TCGLabelQemuLdst *label = new_ldst_label(s);
1461
1462 label->is_ld = is_ld;
1463 label->opc = opc;
1464 label->datalo_reg = data_reg;
1465 label->datahi_reg = data_reg2;
1466 label->addrlo_reg = addrlo_reg;
1467 label->addrhi_reg = addrhi_reg;
1468 label->mem_index = mem_index;
1469 label->raddr = raddr;
1470 label->label_ptr[0] = label_ptr[0];
1471 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1472 label->label_ptr[1] = label_ptr[1];
1473 }
1474 }
1475
1476 /*
1477 * Generate code for the slow path for a load at the end of block
1478 */
1479 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1480 {
1481 int opc = l->opc;
1482 int s_bits = opc & 3;
1483 TCGReg data_reg;
1484 uint8_t **label_ptr = &l->label_ptr[0];
1485
1486 /* resolve label address */
1487 *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4);
1488 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1489 *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4);
1490 }
1491
1492 if (TCG_TARGET_REG_BITS == 32) {
1493 int ofs = 0;
1494
1495 tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1496 ofs += 4;
1497
1498 tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1499 ofs += 4;
1500
1501 if (TARGET_LONG_BITS == 64) {
1502 tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1503 ofs += 4;
1504 }
1505
1506 tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, l->mem_index);
1507 ofs += 4;
1508
1509 tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, (uintptr_t)l->raddr);
1510 } else {
1511 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1512 /* The second argument is already loaded with addrlo. */
1513 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
1514 l->mem_index);
1515 tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3],
1516 (uintptr_t)l->raddr);
1517 }
1518
1519 tcg_out_calli(s, (uintptr_t)qemu_ld_helpers[s_bits]);
1520
1521 data_reg = l->datalo_reg;
1522 switch(opc) {
1523 case 0 | 4:
1524 tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
1525 break;
1526 case 1 | 4:
1527 tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
1528 break;
1529 #if TCG_TARGET_REG_BITS == 64
1530 case 2 | 4:
1531 tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
1532 break;
1533 #endif
1534 case 0:
1535 case 1:
1536 /* Note that the helpers have zero-extended to tcg_target_long. */
1537 case 2:
1538 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1539 break;
1540 case 3:
1541 if (TCG_TARGET_REG_BITS == 64) {
1542 tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
1543 } else if (data_reg == TCG_REG_EDX) {
1544 /* xchg %edx, %eax */
1545 tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
1546 tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX);
1547 } else {
1548 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1549 tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX);
1550 }
1551 break;
1552 default:
1553 tcg_abort();
1554 }
1555
1556 /* Jump to the code corresponding to next IR of qemu_st */
1557 tcg_out_jmp(s, (uintptr_t)l->raddr);
1558 }
1559
1560 /*
1561 * Generate code for the slow path for a store at the end of block
1562 */
1563 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1564 {
1565 int opc = l->opc;
1566 int s_bits = opc & 3;
1567 uint8_t **label_ptr = &l->label_ptr[0];
1568 TCGReg retaddr;
1569
1570 /* resolve label address */
1571 *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4);
1572 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1573 *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4);
1574 }
1575
1576 if (TCG_TARGET_REG_BITS == 32) {
1577 int ofs = 0;
1578
1579 tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1580 ofs += 4;
1581
1582 tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1583 ofs += 4;
1584
1585 if (TARGET_LONG_BITS == 64) {
1586 tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1587 ofs += 4;
1588 }
1589
1590 tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs);
1591 ofs += 4;
1592
1593 if (opc == 3) {
1594 tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs);
1595 ofs += 4;
1596 }
1597
1598 tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, l->mem_index);
1599 ofs += 4;
1600
1601 retaddr = TCG_REG_EAX;
1602 tcg_out_movi(s, TCG_TYPE_I32, retaddr, (uintptr_t)l->raddr);
1603 tcg_out_st(s, TCG_TYPE_I32, retaddr, TCG_REG_ESP, ofs);
1604 } else {
1605 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1606 /* The second argument is already loaded with addrlo. */
1607 tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
1608 tcg_target_call_iarg_regs[2], l->datalo_reg);
1609 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
1610 l->mem_index);
1611
1612 if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) {
1613 retaddr = tcg_target_call_iarg_regs[4];
1614 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1615 } else {
1616 retaddr = TCG_REG_RAX;
1617 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1618 tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, 0);
1619 }
1620 }
1621
1622 /* "Tail call" to the helper, with the return address back inline. */
1623 tcg_out_push(s, retaddr);
1624 tcg_out_jmp(s, (uintptr_t)qemu_st_helpers[s_bits]);
1625 }
1626 #endif /* CONFIG_SOFTMMU */
1627
1628 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1629 const TCGArg *args, const int *const_args)
1630 {
1631 int c, rexw = 0;
1632
1633 #if TCG_TARGET_REG_BITS == 64
1634 # define OP_32_64(x) \
1635 case glue(glue(INDEX_op_, x), _i64): \
1636 rexw = P_REXW; /* FALLTHRU */ \
1637 case glue(glue(INDEX_op_, x), _i32)
1638 #else
1639 # define OP_32_64(x) \
1640 case glue(glue(INDEX_op_, x), _i32)
1641 #endif
1642
1643 switch(opc) {
1644 case INDEX_op_exit_tb:
1645 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]);
1646 tcg_out_jmp(s, (uintptr_t)tb_ret_addr);
1647 break;
1648 case INDEX_op_goto_tb:
1649 if (s->tb_jmp_offset) {
1650 /* direct jump method */
1651 tcg_out8(s, OPC_JMP_long); /* jmp im */
1652 s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
1653 tcg_out32(s, 0);
1654 } else {
1655 /* indirect jump method */
1656 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
1657 (intptr_t)(s->tb_next + args[0]));
1658 }
1659 s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
1660 break;
1661 case INDEX_op_call:
1662 if (const_args[0]) {
1663 tcg_out_calli(s, args[0]);
1664 } else {
1665 /* call *reg */
1666 tcg_out_modrm(s, OPC_GRP5, EXT5_CALLN_Ev, args[0]);
1667 }
1668 break;
1669 case INDEX_op_br:
1670 tcg_out_jxx(s, JCC_JMP, args[0], 0);
1671 break;
1672 case INDEX_op_movi_i32:
1673 tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
1674 break;
1675 OP_32_64(ld8u):
1676 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1677 tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
1678 break;
1679 OP_32_64(ld8s):
1680 tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]);
1681 break;
1682 OP_32_64(ld16u):
1683 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1684 tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
1685 break;
1686 OP_32_64(ld16s):
1687 tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]);
1688 break;
1689 #if TCG_TARGET_REG_BITS == 64
1690 case INDEX_op_ld32u_i64:
1691 #endif
1692 case INDEX_op_ld_i32:
1693 tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1694 break;
1695
1696 OP_32_64(st8):
1697 if (const_args[0]) {
1698 tcg_out_modrm_offset(s, OPC_MOVB_EvIz,
1699 0, args[1], args[2]);
1700 tcg_out8(s, args[0]);
1701 } else {
1702 tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
1703 args[0], args[1], args[2]);
1704 }
1705 break;
1706 OP_32_64(st16):
1707 if (const_args[0]) {
1708 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16,
1709 0, args[1], args[2]);
1710 tcg_out16(s, args[0]);
1711 } else {
1712 tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
1713 args[0], args[1], args[2]);
1714 }
1715 break;
1716 #if TCG_TARGET_REG_BITS == 64
1717 case INDEX_op_st32_i64:
1718 #endif
1719 case INDEX_op_st_i32:
1720 if (const_args[0]) {
1721 tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, args[1], args[2]);
1722 tcg_out32(s, args[0]);
1723 } else {
1724 tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1725 }
1726 break;
1727
1728 OP_32_64(add):
1729 /* For 3-operand addition, use LEA. */
1730 if (args[0] != args[1]) {
1731 TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
1732
1733 if (const_args[2]) {
1734 c3 = a2, a2 = -1;
1735 } else if (a0 == a2) {
1736 /* Watch out for dest = src + dest, since we've removed
1737 the matching constraint on the add. */
1738 tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
1739 break;
1740 }
1741
1742 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
1743 break;
1744 }
1745 c = ARITH_ADD;
1746 goto gen_arith;
1747 OP_32_64(sub):
1748 c = ARITH_SUB;
1749 goto gen_arith;
1750 OP_32_64(and):
1751 c = ARITH_AND;
1752 goto gen_arith;
1753 OP_32_64(or):
1754 c = ARITH_OR;
1755 goto gen_arith;
1756 OP_32_64(xor):
1757 c = ARITH_XOR;
1758 goto gen_arith;
1759 gen_arith:
1760 if (const_args[2]) {
1761 tgen_arithi(s, c + rexw, args[0], args[2], 0);
1762 } else {
1763 tgen_arithr(s, c + rexw, args[0], args[2]);
1764 }
1765 break;
1766
1767 OP_32_64(mul):
1768 if (const_args[2]) {
1769 int32_t val;
1770 val = args[2];
1771 if (val == (int8_t)val) {
1772 tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]);
1773 tcg_out8(s, val);
1774 } else {
1775 tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]);
1776 tcg_out32(s, val);
1777 }
1778 } else {
1779 tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]);
1780 }
1781 break;
1782
1783 OP_32_64(div2):
1784 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
1785 break;
1786 OP_32_64(divu2):
1787 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
1788 break;
1789
1790 OP_32_64(shl):
1791 c = SHIFT_SHL;
1792 goto gen_shift;
1793 OP_32_64(shr):
1794 c = SHIFT_SHR;
1795 goto gen_shift;
1796 OP_32_64(sar):
1797 c = SHIFT_SAR;
1798 goto gen_shift;
1799 OP_32_64(rotl):
1800 c = SHIFT_ROL;
1801 goto gen_shift;
1802 OP_32_64(rotr):
1803 c = SHIFT_ROR;
1804 goto gen_shift;
1805 gen_shift:
1806 if (const_args[2]) {
1807 tcg_out_shifti(s, c + rexw, args[0], args[2]);
1808 } else {
1809 tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]);
1810 }
1811 break;
1812
1813 case INDEX_op_brcond_i32:
1814 tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
1815 args[3], 0);
1816 break;
1817 case INDEX_op_setcond_i32:
1818 tcg_out_setcond32(s, args[3], args[0], args[1],
1819 args[2], const_args[2]);
1820 break;
1821 case INDEX_op_movcond_i32:
1822 tcg_out_movcond32(s, args[5], args[0], args[1],
1823 args[2], const_args[2], args[3]);
1824 break;
1825
1826 OP_32_64(bswap16):
1827 tcg_out_rolw_8(s, args[0]);
1828 break;
1829 OP_32_64(bswap32):
1830 tcg_out_bswap32(s, args[0]);
1831 break;
1832
1833 OP_32_64(neg):
1834 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]);
1835 break;
1836 OP_32_64(not):
1837 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]);
1838 break;
1839
1840 OP_32_64(ext8s):
1841 tcg_out_ext8s(s, args[0], args[1], rexw);
1842 break;
1843 OP_32_64(ext16s):
1844 tcg_out_ext16s(s, args[0], args[1], rexw);
1845 break;
1846 OP_32_64(ext8u):
1847 tcg_out_ext8u(s, args[0], args[1]);
1848 break;
1849 OP_32_64(ext16u):
1850 tcg_out_ext16u(s, args[0], args[1]);
1851 break;
1852
1853 case INDEX_op_qemu_ld8u:
1854 tcg_out_qemu_ld(s, args, 0);
1855 break;
1856 case INDEX_op_qemu_ld8s:
1857 tcg_out_qemu_ld(s, args, 0 | 4);
1858 break;
1859 case INDEX_op_qemu_ld16u:
1860 tcg_out_qemu_ld(s, args, 1);
1861 break;
1862 case INDEX_op_qemu_ld16s:
1863 tcg_out_qemu_ld(s, args, 1 | 4);
1864 break;
1865 #if TCG_TARGET_REG_BITS == 64
1866 case INDEX_op_qemu_ld32u:
1867 #endif
1868 case INDEX_op_qemu_ld32:
1869 tcg_out_qemu_ld(s, args, 2);
1870 break;
1871 case INDEX_op_qemu_ld64:
1872 tcg_out_qemu_ld(s, args, 3);
1873 break;
1874
1875 case INDEX_op_qemu_st8:
1876 tcg_out_qemu_st(s, args, 0);
1877 break;
1878 case INDEX_op_qemu_st16:
1879 tcg_out_qemu_st(s, args, 1);
1880 break;
1881 case INDEX_op_qemu_st32:
1882 tcg_out_qemu_st(s, args, 2);
1883 break;
1884 case INDEX_op_qemu_st64:
1885 tcg_out_qemu_st(s, args, 3);
1886 break;
1887
1888 OP_32_64(mulu2):
1889 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]);
1890 break;
1891 OP_32_64(muls2):
1892 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]);
1893 break;
1894 OP_32_64(add2):
1895 if (const_args[4]) {
1896 tgen_arithi(s, ARITH_ADD + rexw, args[0], args[4], 1);
1897 } else {
1898 tgen_arithr(s, ARITH_ADD + rexw, args[0], args[4]);
1899 }
1900 if (const_args[5]) {
1901 tgen_arithi(s, ARITH_ADC + rexw, args[1], args[5], 1);
1902 } else {
1903 tgen_arithr(s, ARITH_ADC + rexw, args[1], args[5]);
1904 }
1905 break;
1906 OP_32_64(sub2):
1907 if (const_args[4]) {
1908 tgen_arithi(s, ARITH_SUB + rexw, args[0], args[4], 1);
1909 } else {
1910 tgen_arithr(s, ARITH_SUB + rexw, args[0], args[4]);
1911 }
1912 if (const_args[5]) {
1913 tgen_arithi(s, ARITH_SBB + rexw, args[1], args[5], 1);
1914 } else {
1915 tgen_arithr(s, ARITH_SBB + rexw, args[1], args[5]);
1916 }
1917 break;
1918
1919 #if TCG_TARGET_REG_BITS == 32
1920 case INDEX_op_brcond2_i32:
1921 tcg_out_brcond2(s, args, const_args, 0);
1922 break;
1923 case INDEX_op_setcond2_i32:
1924 tcg_out_setcond2(s, args, const_args);
1925 break;
1926 #else /* TCG_TARGET_REG_BITS == 64 */
1927 case INDEX_op_movi_i64:
1928 tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
1929 break;
1930 case INDEX_op_ld32s_i64:
1931 tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]);
1932 break;
1933 case INDEX_op_ld_i64:
1934 tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1935 break;
1936 case INDEX_op_st_i64:
1937 if (const_args[0]) {
1938 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW,
1939 0, args[1], args[2]);
1940 tcg_out32(s, args[0]);
1941 } else {
1942 tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1943 }
1944 break;
1945 case INDEX_op_qemu_ld32s:
1946 tcg_out_qemu_ld(s, args, 2 | 4);
1947 break;
1948
1949 case INDEX_op_brcond_i64:
1950 tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1],
1951 args[3], 0);
1952 break;
1953 case INDEX_op_setcond_i64:
1954 tcg_out_setcond64(s, args[3], args[0], args[1],
1955 args[2], const_args[2]);
1956 break;
1957 case INDEX_op_movcond_i64:
1958 tcg_out_movcond64(s, args[5], args[0], args[1],
1959 args[2], const_args[2], args[3]);
1960 break;
1961
1962 case INDEX_op_bswap64_i64:
1963 tcg_out_bswap64(s, args[0]);
1964 break;
1965 case INDEX_op_ext32u_i64:
1966 tcg_out_ext32u(s, args[0], args[1]);
1967 break;
1968 case INDEX_op_ext32s_i64:
1969 tcg_out_ext32s(s, args[0], args[1]);
1970 break;
1971 #endif
1972
1973 OP_32_64(deposit):
1974 if (args[3] == 0 && args[4] == 8) {
1975 /* load bits 0..7 */
1976 tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM,
1977 args[2], args[0]);
1978 } else if (args[3] == 8 && args[4] == 8) {
1979 /* load bits 8..15 */
1980 tcg_out_modrm(s, OPC_MOVB_EvGv, args[2], args[0] + 4);
1981 } else if (args[3] == 0 && args[4] == 16) {
1982 /* load bits 0..15 */
1983 tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, args[2], args[0]);
1984 } else {
1985 tcg_abort();
1986 }
1987 break;
1988
1989 default:
1990 tcg_abort();
1991 }
1992
1993 #undef OP_32_64
1994 }
1995
1996 static const TCGTargetOpDef x86_op_defs[] = {
1997 { INDEX_op_exit_tb, { } },
1998 { INDEX_op_goto_tb, { } },
1999 { INDEX_op_call, { "ri" } },
2000 { INDEX_op_br, { } },
2001 { INDEX_op_mov_i32, { "r", "r" } },
2002 { INDEX_op_movi_i32, { "r" } },
2003 { INDEX_op_ld8u_i32, { "r", "r" } },
2004 { INDEX_op_ld8s_i32, { "r", "r" } },
2005 { INDEX_op_ld16u_i32, { "r", "r" } },
2006 { INDEX_op_ld16s_i32, { "r", "r" } },
2007 { INDEX_op_ld_i32, { "r", "r" } },
2008 { INDEX_op_st8_i32, { "qi", "r" } },
2009 { INDEX_op_st16_i32, { "ri", "r" } },
2010 { INDEX_op_st_i32, { "ri", "r" } },
2011
2012 { INDEX_op_add_i32, { "r", "r", "ri" } },
2013 { INDEX_op_sub_i32, { "r", "0", "ri" } },
2014 { INDEX_op_mul_i32, { "r", "0", "ri" } },
2015 { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } },
2016 { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } },
2017 { INDEX_op_and_i32, { "r", "0", "ri" } },
2018 { INDEX_op_or_i32, { "r", "0", "ri" } },
2019 { INDEX_op_xor_i32, { "r", "0", "ri" } },
2020
2021 { INDEX_op_shl_i32, { "r", "0", "ci" } },
2022 { INDEX_op_shr_i32, { "r", "0", "ci" } },
2023 { INDEX_op_sar_i32, { "r", "0", "ci" } },
2024 { INDEX_op_rotl_i32, { "r", "0", "ci" } },
2025 { INDEX_op_rotr_i32, { "r", "0", "ci" } },
2026
2027 { INDEX_op_brcond_i32, { "r", "ri" } },
2028
2029 { INDEX_op_bswap16_i32, { "r", "0" } },
2030 { INDEX_op_bswap32_i32, { "r", "0" } },
2031
2032 { INDEX_op_neg_i32, { "r", "0" } },
2033
2034 { INDEX_op_not_i32, { "r", "0" } },
2035
2036 { INDEX_op_ext8s_i32, { "r", "q" } },
2037 { INDEX_op_ext16s_i32, { "r", "r" } },
2038 { INDEX_op_ext8u_i32, { "r", "q" } },
2039 { INDEX_op_ext16u_i32, { "r", "r" } },
2040
2041 { INDEX_op_setcond_i32, { "q", "r", "ri" } },
2042
2043 { INDEX_op_deposit_i32, { "Q", "0", "Q" } },
2044 #if TCG_TARGET_HAS_movcond_i32
2045 { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } },
2046 #endif
2047
2048 { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
2049 { INDEX_op_muls2_i32, { "a", "d", "a", "r" } },
2050 { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
2051 { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
2052
2053 #if TCG_TARGET_REG_BITS == 32
2054 { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
2055 { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
2056 #else
2057 { INDEX_op_mov_i64, { "r", "r" } },
2058 { INDEX_op_movi_i64, { "r" } },
2059 { INDEX_op_ld8u_i64, { "r", "r" } },
2060 { INDEX_op_ld8s_i64, { "r", "r" } },
2061 { INDEX_op_ld16u_i64, { "r", "r" } },
2062 { INDEX_op_ld16s_i64, { "r", "r" } },
2063 { INDEX_op_ld32u_i64, { "r", "r" } },
2064 { INDEX_op_ld32s_i64, { "r", "r" } },
2065 { INDEX_op_ld_i64, { "r", "r" } },
2066 { INDEX_op_st8_i64, { "ri", "r" } },
2067 { INDEX_op_st16_i64, { "ri", "r" } },
2068 { INDEX_op_st32_i64, { "ri", "r" } },
2069 { INDEX_op_st_i64, { "re", "r" } },
2070
2071 { INDEX_op_add_i64, { "r", "r", "re" } },
2072 { INDEX_op_mul_i64, { "r", "0", "re" } },
2073 { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },
2074 { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },
2075 { INDEX_op_sub_i64, { "r", "0", "re" } },
2076 { INDEX_op_and_i64, { "r", "0", "reZ" } },
2077 { INDEX_op_or_i64, { "r", "0", "re" } },
2078 { INDEX_op_xor_i64, { "r", "0", "re" } },
2079
2080 { INDEX_op_shl_i64, { "r", "0", "ci" } },
2081 { INDEX_op_shr_i64, { "r", "0", "ci" } },
2082 { INDEX_op_sar_i64, { "r", "0", "ci" } },
2083 { INDEX_op_rotl_i64, { "r", "0", "ci" } },
2084 { INDEX_op_rotr_i64, { "r", "0", "ci" } },
2085
2086 { INDEX_op_brcond_i64, { "r", "re" } },
2087 { INDEX_op_setcond_i64, { "r", "r", "re" } },
2088
2089 { INDEX_op_bswap16_i64, { "r", "0" } },
2090 { INDEX_op_bswap32_i64, { "r", "0" } },
2091 { INDEX_op_bswap64_i64, { "r", "0" } },
2092 { INDEX_op_neg_i64, { "r", "0" } },
2093 { INDEX_op_not_i64, { "r", "0" } },
2094
2095 { INDEX_op_ext8s_i64, { "r", "r" } },
2096 { INDEX_op_ext16s_i64, { "r", "r" } },
2097 { INDEX_op_ext32s_i64, { "r", "r" } },
2098 { INDEX_op_ext8u_i64, { "r", "r" } },
2099 { INDEX_op_ext16u_i64, { "r", "r" } },
2100 { INDEX_op_ext32u_i64, { "r", "r" } },
2101
2102 { INDEX_op_deposit_i64, { "Q", "0", "Q" } },
2103 { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } },
2104
2105 { INDEX_op_mulu2_i64, { "a", "d", "a", "r" } },
2106 { INDEX_op_muls2_i64, { "a", "d", "a", "r" } },
2107 { INDEX_op_add2_i64, { "r", "r", "0", "1", "re", "re" } },
2108 { INDEX_op_sub2_i64, { "r", "r", "0", "1", "re", "re" } },
2109 #endif
2110
2111 #if TCG_TARGET_REG_BITS == 64
2112 { INDEX_op_qemu_ld8u, { "r", "L" } },
2113 { INDEX_op_qemu_ld8s, { "r", "L" } },
2114 { INDEX_op_qemu_ld16u, { "r", "L" } },
2115 { INDEX_op_qemu_ld16s, { "r", "L" } },
2116 { INDEX_op_qemu_ld32, { "r", "L" } },
2117 { INDEX_op_qemu_ld32u, { "r", "L" } },
2118 { INDEX_op_qemu_ld32s, { "r", "L" } },
2119 { INDEX_op_qemu_ld64, { "r", "L" } },
2120
2121 { INDEX_op_qemu_st8, { "L", "L" } },
2122 { INDEX_op_qemu_st16, { "L", "L" } },
2123 { INDEX_op_qemu_st32, { "L", "L" } },
2124 { INDEX_op_qemu_st64, { "L", "L" } },
2125 #elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
2126 { INDEX_op_qemu_ld8u, { "r", "L" } },
2127 { INDEX_op_qemu_ld8s, { "r", "L" } },
2128 { INDEX_op_qemu_ld16u, { "r", "L" } },
2129 { INDEX_op_qemu_ld16s, { "r", "L" } },
2130 { INDEX_op_qemu_ld32, { "r", "L" } },
2131 { INDEX_op_qemu_ld64, { "r", "r", "L" } },
2132
2133 { INDEX_op_qemu_st8, { "cb", "L" } },
2134 { INDEX_op_qemu_st16, { "L", "L" } },
2135 { INDEX_op_qemu_st32, { "L", "L" } },
2136 { INDEX_op_qemu_st64, { "L", "L", "L" } },
2137 #else
2138 { INDEX_op_qemu_ld8u, { "r", "L", "L" } },
2139 { INDEX_op_qemu_ld8s, { "r", "L", "L" } },
2140 { INDEX_op_qemu_ld16u, { "r", "L", "L" } },
2141 { INDEX_op_qemu_ld16s, { "r", "L", "L" } },
2142 { INDEX_op_qemu_ld32, { "r", "L", "L" } },
2143 { INDEX_op_qemu_ld64, { "r", "r", "L", "L" } },
2144
2145 { INDEX_op_qemu_st8, { "cb", "L", "L" } },
2146 { INDEX_op_qemu_st16, { "L", "L", "L" } },
2147 { INDEX_op_qemu_st32, { "L", "L", "L" } },
2148 { INDEX_op_qemu_st64, { "L", "L", "L", "L" } },
2149 #endif
2150 { -1 },
2151 };
2152
2153 static int tcg_target_callee_save_regs[] = {
2154 #if TCG_TARGET_REG_BITS == 64
2155 TCG_REG_RBP,
2156 TCG_REG_RBX,
2157 #if defined(_WIN64)
2158 TCG_REG_RDI,
2159 TCG_REG_RSI,
2160 #endif
2161 TCG_REG_R12,
2162 TCG_REG_R13,
2163 TCG_REG_R14, /* Currently used for the global env. */
2164 TCG_REG_R15,
2165 #else
2166 TCG_REG_EBP, /* Currently used for the global env. */
2167 TCG_REG_EBX,
2168 TCG_REG_ESI,
2169 TCG_REG_EDI,
2170 #endif
2171 };
2172
2173 /* Compute frame size via macros, to share between tcg_target_qemu_prologue
2174 and tcg_register_jit. */
2175
2176 #define PUSH_SIZE \
2177 ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \
2178 * (TCG_TARGET_REG_BITS / 8))
2179
2180 #define FRAME_SIZE \
2181 ((PUSH_SIZE \
2182 + TCG_STATIC_CALL_ARGS_SIZE \
2183 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2184 + TCG_TARGET_STACK_ALIGN - 1) \
2185 & ~(TCG_TARGET_STACK_ALIGN - 1))
2186
2187 /* Generate global QEMU prologue and epilogue code */
2188 static void tcg_target_qemu_prologue(TCGContext *s)
2189 {
2190 int i, stack_addend;
2191
2192 /* TB prologue */
2193
2194 /* Reserve some stack space, also for TCG temps. */
2195 stack_addend = FRAME_SIZE - PUSH_SIZE;
2196 tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
2197 CPU_TEMP_BUF_NLONGS * sizeof(long));
2198
2199 /* Save all callee saved registers. */
2200 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
2201 tcg_out_push(s, tcg_target_callee_save_regs[i]);
2202 }
2203
2204 #if TCG_TARGET_REG_BITS == 32
2205 tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
2206 (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
2207 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2208 /* jmp *tb. */
2209 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP,
2210 (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4
2211 + stack_addend);
2212 #else
2213 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2214 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2215 /* jmp *tb. */
2216 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
2217 #endif
2218
2219 /* TB epilogue */
2220 tb_ret_addr = s->code_ptr;
2221
2222 tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend);
2223
2224 for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
2225 tcg_out_pop(s, tcg_target_callee_save_regs[i]);
2226 }
2227 tcg_out_opc(s, OPC_RET, 0, 0, 0);
2228
2229 #if !defined(CONFIG_SOFTMMU)
2230 /* Try to set up a segment register to point to GUEST_BASE. */
2231 if (GUEST_BASE) {
2232 setup_guest_base_seg();
2233 }
2234 #endif
2235 }
2236
2237 static void tcg_target_init(TCGContext *s)
2238 {
2239 /* For 32-bit, 99% certainty that we're running on hardware that supports
2240 cmov, but we still need to check. In case cmov is not available, we'll
2241 use a small forward branch. */
2242 #ifndef have_cmov
2243 {
2244 unsigned a, b, c, d;
2245 have_cmov = (__get_cpuid(1, &a, &b, &c, &d) && (d & bit_CMOV));
2246 }
2247 #endif
2248
2249 if (TCG_TARGET_REG_BITS == 64) {
2250 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
2251 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
2252 } else {
2253 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
2254 }
2255
2256 tcg_regset_clear(tcg_target_call_clobber_regs);
2257 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
2258 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
2259 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
2260 if (TCG_TARGET_REG_BITS == 64) {
2261 #if !defined(_WIN64)
2262 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
2263 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
2264 #endif
2265 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
2266 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
2267 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
2268 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
2269 }
2270
2271 tcg_regset_clear(s->reserved_regs);
2272 tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
2273
2274 tcg_add_target_add_op_defs(x86_op_defs);
2275 }
2276
2277 typedef struct {
2278 DebugFrameCIE cie;
2279 DebugFrameFDEHeader fde;
2280 uint8_t fde_def_cfa[4];
2281 uint8_t fde_reg_ofs[14];
2282 } DebugFrame;
2283
2284 /* We're expecting a 2 byte uleb128 encoded value. */
2285 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2286
2287 #if !defined(__ELF__)
2288 /* Host machine without ELF. */
2289 #elif TCG_TARGET_REG_BITS == 64
2290 #define ELF_HOST_MACHINE EM_X86_64
2291 static DebugFrame debug_frame = {
2292 .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2293 .cie.id = -1,
2294 .cie.version = 1,
2295 .cie.code_align = 1,
2296 .cie.data_align = 0x78, /* sleb128 -8 */
2297 .cie.return_column = 16,
2298
2299 /* Total FDE size does not include the "len" member. */
2300 .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
2301
2302 .fde_def_cfa = {
2303 12, 7, /* DW_CFA_def_cfa %rsp, ... */
2304 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2305 (FRAME_SIZE >> 7)
2306 },
2307 .fde_reg_ofs = {
2308 0x90, 1, /* DW_CFA_offset, %rip, -8 */
2309 /* The following ordering must match tcg_target_callee_save_regs. */
2310 0x86, 2, /* DW_CFA_offset, %rbp, -16 */
2311 0x83, 3, /* DW_CFA_offset, %rbx, -24 */
2312 0x8c, 4, /* DW_CFA_offset, %r12, -32 */
2313 0x8d, 5, /* DW_CFA_offset, %r13, -40 */
2314 0x8e, 6, /* DW_CFA_offset, %r14, -48 */
2315 0x8f, 7, /* DW_CFA_offset, %r15, -56 */
2316 }
2317 };
2318 #else
2319 #define ELF_HOST_MACHINE EM_386
2320 static DebugFrame debug_frame = {
2321 .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2322 .cie.id = -1,
2323 .cie.version = 1,
2324 .cie.code_align = 1,
2325 .cie.data_align = 0x7c, /* sleb128 -4 */
2326 .cie.return_column = 8,
2327
2328 /* Total FDE size does not include the "len" member. */
2329 .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
2330
2331 .fde_def_cfa = {
2332 12, 4, /* DW_CFA_def_cfa %esp, ... */
2333 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2334 (FRAME_SIZE >> 7)
2335 },
2336 .fde_reg_ofs = {
2337 0x88, 1, /* DW_CFA_offset, %eip, -4 */
2338 /* The following ordering must match tcg_target_callee_save_regs. */
2339 0x85, 2, /* DW_CFA_offset, %ebp, -8 */
2340 0x83, 3, /* DW_CFA_offset, %ebx, -12 */
2341 0x86, 4, /* DW_CFA_offset, %esi, -16 */
2342 0x87, 5, /* DW_CFA_offset, %edi, -20 */
2343 }
2344 };
2345 #endif
2346
2347 #if defined(ELF_HOST_MACHINE)
2348 void tcg_register_jit(void *buf, size_t buf_size)
2349 {
2350 debug_frame.fde.func_start = (uintptr_t)buf;
2351 debug_frame.fde.func_len = buf_size;
2352
2353 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2354 }
2355 #endif