]> git.proxmox.com Git - qemu.git/blob - tcg/i386/tcg-target.c
tcg/i386: Use GDB JIT debugging interface only for hosts with ELF
[qemu.git] / tcg / i386 / tcg-target.c
1 /*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 #ifndef NDEBUG
26 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
27 #if TCG_TARGET_REG_BITS == 64
28 "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
29 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
30 #else
31 "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
32 #endif
33 };
34 #endif
35
36 static const int tcg_target_reg_alloc_order[] = {
37 #if TCG_TARGET_REG_BITS == 64
38 TCG_REG_RBP,
39 TCG_REG_RBX,
40 TCG_REG_R12,
41 TCG_REG_R13,
42 TCG_REG_R14,
43 TCG_REG_R15,
44 TCG_REG_R10,
45 TCG_REG_R11,
46 TCG_REG_R9,
47 TCG_REG_R8,
48 TCG_REG_RCX,
49 TCG_REG_RDX,
50 TCG_REG_RSI,
51 TCG_REG_RDI,
52 TCG_REG_RAX,
53 #else
54 TCG_REG_EBX,
55 TCG_REG_ESI,
56 TCG_REG_EDI,
57 TCG_REG_EBP,
58 TCG_REG_ECX,
59 TCG_REG_EDX,
60 TCG_REG_EAX,
61 #endif
62 };
63
64 static const int tcg_target_call_iarg_regs[] = {
65 #if TCG_TARGET_REG_BITS == 64
66 TCG_REG_RDI,
67 TCG_REG_RSI,
68 TCG_REG_RDX,
69 TCG_REG_RCX,
70 TCG_REG_R8,
71 TCG_REG_R9,
72 #else
73 TCG_REG_EAX,
74 TCG_REG_EDX,
75 TCG_REG_ECX
76 #endif
77 };
78
79 static const int tcg_target_call_oarg_regs[] = {
80 TCG_REG_EAX,
81 #if TCG_TARGET_REG_BITS == 32
82 TCG_REG_EDX
83 #endif
84 };
85
86 static uint8_t *tb_ret_addr;
87
88 static void patch_reloc(uint8_t *code_ptr, int type,
89 tcg_target_long value, tcg_target_long addend)
90 {
91 value += addend;
92 switch(type) {
93 case R_386_PC32:
94 value -= (uintptr_t)code_ptr;
95 if (value != (int32_t)value) {
96 tcg_abort();
97 }
98 *(uint32_t *)code_ptr = value;
99 break;
100 case R_386_PC8:
101 value -= (uintptr_t)code_ptr;
102 if (value != (int8_t)value) {
103 tcg_abort();
104 }
105 *(uint8_t *)code_ptr = value;
106 break;
107 default:
108 tcg_abort();
109 }
110 }
111
112 /* maximum number of register used for input function arguments */
113 static inline int tcg_target_get_call_iarg_regs_count(int flags)
114 {
115 if (TCG_TARGET_REG_BITS == 64) {
116 return 6;
117 }
118
119 return 0;
120 }
121
122 /* parse target specific constraints */
123 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
124 {
125 const char *ct_str;
126
127 ct_str = *pct_str;
128 switch(ct_str[0]) {
129 case 'a':
130 ct->ct |= TCG_CT_REG;
131 tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
132 break;
133 case 'b':
134 ct->ct |= TCG_CT_REG;
135 tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
136 break;
137 case 'c':
138 ct->ct |= TCG_CT_REG;
139 tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
140 break;
141 case 'd':
142 ct->ct |= TCG_CT_REG;
143 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
144 break;
145 case 'S':
146 ct->ct |= TCG_CT_REG;
147 tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
148 break;
149 case 'D':
150 ct->ct |= TCG_CT_REG;
151 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
152 break;
153 case 'q':
154 ct->ct |= TCG_CT_REG;
155 if (TCG_TARGET_REG_BITS == 64) {
156 tcg_regset_set32(ct->u.regs, 0, 0xffff);
157 } else {
158 tcg_regset_set32(ct->u.regs, 0, 0xf);
159 }
160 break;
161 case 'Q':
162 ct->ct |= TCG_CT_REG;
163 tcg_regset_set32(ct->u.regs, 0, 0xf);
164 break;
165 case 'r':
166 ct->ct |= TCG_CT_REG;
167 if (TCG_TARGET_REG_BITS == 64) {
168 tcg_regset_set32(ct->u.regs, 0, 0xffff);
169 } else {
170 tcg_regset_set32(ct->u.regs, 0, 0xff);
171 }
172 break;
173
174 /* qemu_ld/st address constraint */
175 case 'L':
176 ct->ct |= TCG_CT_REG;
177 if (TCG_TARGET_REG_BITS == 64) {
178 tcg_regset_set32(ct->u.regs, 0, 0xffff);
179 tcg_regset_reset_reg(ct->u.regs, TCG_REG_RSI);
180 tcg_regset_reset_reg(ct->u.regs, TCG_REG_RDI);
181 #ifdef CONFIG_TCG_PASS_AREG0
182 tcg_regset_reset_reg(ct->u.regs, TCG_REG_RDX);
183 #endif
184 } else {
185 tcg_regset_set32(ct->u.regs, 0, 0xff);
186 tcg_regset_reset_reg(ct->u.regs, TCG_REG_EAX);
187 tcg_regset_reset_reg(ct->u.regs, TCG_REG_EDX);
188 }
189 break;
190
191 case 'e':
192 ct->ct |= TCG_CT_CONST_S32;
193 break;
194 case 'Z':
195 ct->ct |= TCG_CT_CONST_U32;
196 break;
197
198 default:
199 return -1;
200 }
201 ct_str++;
202 *pct_str = ct_str;
203 return 0;
204 }
205
206 /* test if a constant matches the constraint */
207 static inline int tcg_target_const_match(tcg_target_long val,
208 const TCGArgConstraint *arg_ct)
209 {
210 int ct = arg_ct->ct;
211 if (ct & TCG_CT_CONST) {
212 return 1;
213 }
214 if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
215 return 1;
216 }
217 if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
218 return 1;
219 }
220 return 0;
221 }
222
223 #if TCG_TARGET_REG_BITS == 64
224 # define LOWREGMASK(x) ((x) & 7)
225 #else
226 # define LOWREGMASK(x) (x)
227 #endif
228
229 #define P_EXT 0x100 /* 0x0f opcode prefix */
230 #define P_DATA16 0x200 /* 0x66 opcode prefix */
231 #if TCG_TARGET_REG_BITS == 64
232 # define P_ADDR32 0x400 /* 0x67 opcode prefix */
233 # define P_REXW 0x800 /* Set REX.W = 1 */
234 # define P_REXB_R 0x1000 /* REG field as byte register */
235 # define P_REXB_RM 0x2000 /* R/M field as byte register */
236 #else
237 # define P_ADDR32 0
238 # define P_REXW 0
239 # define P_REXB_R 0
240 # define P_REXB_RM 0
241 #endif
242
243 #define OPC_ARITH_EvIz (0x81)
244 #define OPC_ARITH_EvIb (0x83)
245 #define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
246 #define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
247 #define OPC_BSWAP (0xc8 | P_EXT)
248 #define OPC_CALL_Jz (0xe8)
249 #define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
250 #define OPC_DEC_r32 (0x48)
251 #define OPC_IMUL_GvEv (0xaf | P_EXT)
252 #define OPC_IMUL_GvEvIb (0x6b)
253 #define OPC_IMUL_GvEvIz (0x69)
254 #define OPC_INC_r32 (0x40)
255 #define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */
256 #define OPC_JCC_short (0x70) /* ... plus condition code */
257 #define OPC_JMP_long (0xe9)
258 #define OPC_JMP_short (0xeb)
259 #define OPC_LEA (0x8d)
260 #define OPC_MOVB_EvGv (0x88) /* stores, more or less */
261 #define OPC_MOVL_EvGv (0x89) /* stores, more or less */
262 #define OPC_MOVL_GvEv (0x8b) /* loads, more or less */
263 #define OPC_MOVL_EvIz (0xc7)
264 #define OPC_MOVL_Iv (0xb8)
265 #define OPC_MOVSBL (0xbe | P_EXT)
266 #define OPC_MOVSWL (0xbf | P_EXT)
267 #define OPC_MOVSLQ (0x63 | P_REXW)
268 #define OPC_MOVZBL (0xb6 | P_EXT)
269 #define OPC_MOVZWL (0xb7 | P_EXT)
270 #define OPC_POP_r32 (0x58)
271 #define OPC_PUSH_r32 (0x50)
272 #define OPC_PUSH_Iv (0x68)
273 #define OPC_PUSH_Ib (0x6a)
274 #define OPC_RET (0xc3)
275 #define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
276 #define OPC_SHIFT_1 (0xd1)
277 #define OPC_SHIFT_Ib (0xc1)
278 #define OPC_SHIFT_cl (0xd3)
279 #define OPC_TESTL (0x85)
280 #define OPC_XCHG_ax_r32 (0x90)
281
282 #define OPC_GRP3_Ev (0xf7)
283 #define OPC_GRP5 (0xff)
284
285 /* Group 1 opcode extensions for 0x80-0x83.
286 These are also used as modifiers for OPC_ARITH. */
287 #define ARITH_ADD 0
288 #define ARITH_OR 1
289 #define ARITH_ADC 2
290 #define ARITH_SBB 3
291 #define ARITH_AND 4
292 #define ARITH_SUB 5
293 #define ARITH_XOR 6
294 #define ARITH_CMP 7
295
296 /* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */
297 #define SHIFT_ROL 0
298 #define SHIFT_ROR 1
299 #define SHIFT_SHL 4
300 #define SHIFT_SHR 5
301 #define SHIFT_SAR 7
302
303 /* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */
304 #define EXT3_NOT 2
305 #define EXT3_NEG 3
306 #define EXT3_MUL 4
307 #define EXT3_IMUL 5
308 #define EXT3_DIV 6
309 #define EXT3_IDIV 7
310
311 /* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */
312 #define EXT5_INC_Ev 0
313 #define EXT5_DEC_Ev 1
314 #define EXT5_CALLN_Ev 2
315 #define EXT5_JMPN_Ev 4
316
317 /* Condition codes to be added to OPC_JCC_{long,short}. */
318 #define JCC_JMP (-1)
319 #define JCC_JO 0x0
320 #define JCC_JNO 0x1
321 #define JCC_JB 0x2
322 #define JCC_JAE 0x3
323 #define JCC_JE 0x4
324 #define JCC_JNE 0x5
325 #define JCC_JBE 0x6
326 #define JCC_JA 0x7
327 #define JCC_JS 0x8
328 #define JCC_JNS 0x9
329 #define JCC_JP 0xa
330 #define JCC_JNP 0xb
331 #define JCC_JL 0xc
332 #define JCC_JGE 0xd
333 #define JCC_JLE 0xe
334 #define JCC_JG 0xf
335
336 static const uint8_t tcg_cond_to_jcc[10] = {
337 [TCG_COND_EQ] = JCC_JE,
338 [TCG_COND_NE] = JCC_JNE,
339 [TCG_COND_LT] = JCC_JL,
340 [TCG_COND_GE] = JCC_JGE,
341 [TCG_COND_LE] = JCC_JLE,
342 [TCG_COND_GT] = JCC_JG,
343 [TCG_COND_LTU] = JCC_JB,
344 [TCG_COND_GEU] = JCC_JAE,
345 [TCG_COND_LEU] = JCC_JBE,
346 [TCG_COND_GTU] = JCC_JA,
347 };
348
349 #if TCG_TARGET_REG_BITS == 64
350 static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
351 {
352 int rex;
353
354 if (opc & P_DATA16) {
355 /* We should never be asking for both 16 and 64-bit operation. */
356 assert((opc & P_REXW) == 0);
357 tcg_out8(s, 0x66);
358 }
359 if (opc & P_ADDR32) {
360 tcg_out8(s, 0x67);
361 }
362
363 rex = 0;
364 rex |= (opc & P_REXW) >> 8; /* REX.W */
365 rex |= (r & 8) >> 1; /* REX.R */
366 rex |= (x & 8) >> 2; /* REX.X */
367 rex |= (rm & 8) >> 3; /* REX.B */
368
369 /* P_REXB_{R,RM} indicates that the given register is the low byte.
370 For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
371 as otherwise the encoding indicates %[abcd]h. Note that the values
372 that are ORed in merely indicate that the REX byte must be present;
373 those bits get discarded in output. */
374 rex |= opc & (r >= 4 ? P_REXB_R : 0);
375 rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
376
377 if (rex) {
378 tcg_out8(s, (uint8_t)(rex | 0x40));
379 }
380
381 if (opc & P_EXT) {
382 tcg_out8(s, 0x0f);
383 }
384 tcg_out8(s, opc);
385 }
386 #else
387 static void tcg_out_opc(TCGContext *s, int opc)
388 {
389 if (opc & P_DATA16) {
390 tcg_out8(s, 0x66);
391 }
392 if (opc & P_EXT) {
393 tcg_out8(s, 0x0f);
394 }
395 tcg_out8(s, opc);
396 }
397 /* Discard the register arguments to tcg_out_opc early, so as not to penalize
398 the 32-bit compilation paths. This method works with all versions of gcc,
399 whereas relying on optimization may not be able to exclude them. */
400 #define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc)
401 #endif
402
403 static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
404 {
405 tcg_out_opc(s, opc, r, rm, 0);
406 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
407 }
408
409 /* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
410 We handle either RM and INDEX missing with a negative value. In 64-bit
411 mode for absolute addresses, ~RM is the size of the immediate operand
412 that will follow the instruction. */
413
414 static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
415 int index, int shift,
416 tcg_target_long offset)
417 {
418 int mod, len;
419
420 if (index < 0 && rm < 0) {
421 if (TCG_TARGET_REG_BITS == 64) {
422 /* Try for a rip-relative addressing mode. This has replaced
423 the 32-bit-mode absolute addressing encoding. */
424 tcg_target_long pc = (tcg_target_long)s->code_ptr + 5 + ~rm;
425 tcg_target_long disp = offset - pc;
426 if (disp == (int32_t)disp) {
427 tcg_out_opc(s, opc, r, 0, 0);
428 tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
429 tcg_out32(s, disp);
430 return;
431 }
432
433 /* Try for an absolute address encoding. This requires the
434 use of the MODRM+SIB encoding and is therefore larger than
435 rip-relative addressing. */
436 if (offset == (int32_t)offset) {
437 tcg_out_opc(s, opc, r, 0, 0);
438 tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
439 tcg_out8(s, (4 << 3) | 5);
440 tcg_out32(s, offset);
441 return;
442 }
443
444 /* ??? The memory isn't directly addressable. */
445 tcg_abort();
446 } else {
447 /* Absolute address. */
448 tcg_out_opc(s, opc, r, 0, 0);
449 tcg_out8(s, (r << 3) | 5);
450 tcg_out32(s, offset);
451 return;
452 }
453 }
454
455 /* Find the length of the immediate addend. Note that the encoding
456 that would be used for (%ebp) indicates absolute addressing. */
457 if (rm < 0) {
458 mod = 0, len = 4, rm = 5;
459 } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
460 mod = 0, len = 0;
461 } else if (offset == (int8_t)offset) {
462 mod = 0x40, len = 1;
463 } else {
464 mod = 0x80, len = 4;
465 }
466
467 /* Use a single byte MODRM format if possible. Note that the encoding
468 that would be used for %esp is the escape to the two byte form. */
469 if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
470 /* Single byte MODRM format. */
471 tcg_out_opc(s, opc, r, rm, 0);
472 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
473 } else {
474 /* Two byte MODRM+SIB format. */
475
476 /* Note that the encoding that would place %esp into the index
477 field indicates no index register. In 64-bit mode, the REX.X
478 bit counts, so %r12 can be used as the index. */
479 if (index < 0) {
480 index = 4;
481 } else {
482 assert(index != TCG_REG_ESP);
483 }
484
485 tcg_out_opc(s, opc, r, rm, index);
486 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
487 tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
488 }
489
490 if (len == 1) {
491 tcg_out8(s, offset);
492 } else if (len == 4) {
493 tcg_out32(s, offset);
494 }
495 }
496
497 /* A simplification of the above with no index or shift. */
498 static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
499 int rm, tcg_target_long offset)
500 {
501 tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
502 }
503
504 /* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */
505 static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
506 {
507 /* Propagate an opcode prefix, such as P_REXW. */
508 int ext = subop & ~0x7;
509 subop &= 0x7;
510
511 tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
512 }
513
514 static inline void tcg_out_mov(TCGContext *s, TCGType type,
515 TCGReg ret, TCGReg arg)
516 {
517 if (arg != ret) {
518 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
519 tcg_out_modrm(s, opc, ret, arg);
520 }
521 }
522
523 static void tcg_out_movi(TCGContext *s, TCGType type,
524 TCGReg ret, tcg_target_long arg)
525 {
526 if (arg == 0) {
527 tgen_arithr(s, ARITH_XOR, ret, ret);
528 return;
529 } else if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
530 tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
531 tcg_out32(s, arg);
532 } else if (arg == (int32_t)arg) {
533 tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
534 tcg_out32(s, arg);
535 } else {
536 tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
537 tcg_out32(s, arg);
538 tcg_out32(s, arg >> 31 >> 1);
539 }
540 }
541
542 static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
543 {
544 if (val == (int8_t)val) {
545 tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
546 tcg_out8(s, val);
547 } else if (val == (int32_t)val) {
548 tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
549 tcg_out32(s, val);
550 } else {
551 tcg_abort();
552 }
553 }
554
555 static inline void tcg_out_push(TCGContext *s, int reg)
556 {
557 tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
558 }
559
560 static inline void tcg_out_pop(TCGContext *s, int reg)
561 {
562 tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
563 }
564
565 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
566 TCGReg arg1, tcg_target_long arg2)
567 {
568 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
569 tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
570 }
571
572 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
573 TCGReg arg1, tcg_target_long arg2)
574 {
575 int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
576 tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
577 }
578
579 static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
580 {
581 /* Propagate an opcode prefix, such as P_DATA16. */
582 int ext = subopc & ~0x7;
583 subopc &= 0x7;
584
585 if (count == 1) {
586 tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
587 } else {
588 tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
589 tcg_out8(s, count);
590 }
591 }
592
593 static inline void tcg_out_bswap32(TCGContext *s, int reg)
594 {
595 tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
596 }
597
598 static inline void tcg_out_rolw_8(TCGContext *s, int reg)
599 {
600 tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
601 }
602
603 static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
604 {
605 /* movzbl */
606 assert(src < 4 || TCG_TARGET_REG_BITS == 64);
607 tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
608 }
609
610 static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
611 {
612 /* movsbl */
613 assert(src < 4 || TCG_TARGET_REG_BITS == 64);
614 tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
615 }
616
617 static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
618 {
619 /* movzwl */
620 tcg_out_modrm(s, OPC_MOVZWL, dest, src);
621 }
622
623 static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
624 {
625 /* movsw[lq] */
626 tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
627 }
628
629 static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
630 {
631 /* 32-bit mov zero extends. */
632 tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
633 }
634
635 static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
636 {
637 tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
638 }
639
640 static inline void tcg_out_bswap64(TCGContext *s, int reg)
641 {
642 tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
643 }
644
645 static void tgen_arithi(TCGContext *s, int c, int r0,
646 tcg_target_long val, int cf)
647 {
648 int rexw = 0;
649
650 if (TCG_TARGET_REG_BITS == 64) {
651 rexw = c & -8;
652 c &= 7;
653 }
654
655 /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
656 partial flags update stalls on Pentium4 and are not recommended
657 by current Intel optimization manuals. */
658 if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
659 int is_inc = (c == ARITH_ADD) ^ (val < 0);
660 if (TCG_TARGET_REG_BITS == 64) {
661 /* The single-byte increment encodings are re-tasked as the
662 REX prefixes. Use the MODRM encoding. */
663 tcg_out_modrm(s, OPC_GRP5 + rexw,
664 (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
665 } else {
666 tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
667 }
668 return;
669 }
670
671 if (c == ARITH_AND) {
672 if (TCG_TARGET_REG_BITS == 64) {
673 if (val == 0xffffffffu) {
674 tcg_out_ext32u(s, r0, r0);
675 return;
676 }
677 if (val == (uint32_t)val) {
678 /* AND with no high bits set can use a 32-bit operation. */
679 rexw = 0;
680 }
681 }
682 if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
683 tcg_out_ext8u(s, r0, r0);
684 return;
685 }
686 if (val == 0xffffu) {
687 tcg_out_ext16u(s, r0, r0);
688 return;
689 }
690 }
691
692 if (val == (int8_t)val) {
693 tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
694 tcg_out8(s, val);
695 return;
696 }
697 if (rexw == 0 || val == (int32_t)val) {
698 tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
699 tcg_out32(s, val);
700 return;
701 }
702
703 tcg_abort();
704 }
705
706 static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
707 {
708 if (val != 0) {
709 tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
710 }
711 }
712
713 /* Use SMALL != 0 to force a short forward branch. */
714 static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
715 {
716 int32_t val, val1;
717 TCGLabel *l = &s->labels[label_index];
718
719 if (l->has_value) {
720 val = l->u.value - (tcg_target_long)s->code_ptr;
721 val1 = val - 2;
722 if ((int8_t)val1 == val1) {
723 if (opc == -1) {
724 tcg_out8(s, OPC_JMP_short);
725 } else {
726 tcg_out8(s, OPC_JCC_short + opc);
727 }
728 tcg_out8(s, val1);
729 } else {
730 if (small) {
731 tcg_abort();
732 }
733 if (opc == -1) {
734 tcg_out8(s, OPC_JMP_long);
735 tcg_out32(s, val - 5);
736 } else {
737 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
738 tcg_out32(s, val - 6);
739 }
740 }
741 } else if (small) {
742 if (opc == -1) {
743 tcg_out8(s, OPC_JMP_short);
744 } else {
745 tcg_out8(s, OPC_JCC_short + opc);
746 }
747 tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1);
748 s->code_ptr += 1;
749 } else {
750 if (opc == -1) {
751 tcg_out8(s, OPC_JMP_long);
752 } else {
753 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
754 }
755 tcg_out_reloc(s, s->code_ptr, R_386_PC32, label_index, -4);
756 s->code_ptr += 4;
757 }
758 }
759
760 static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
761 int const_arg2, int rexw)
762 {
763 if (const_arg2) {
764 if (arg2 == 0) {
765 /* test r, r */
766 tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
767 } else {
768 tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
769 }
770 } else {
771 tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
772 }
773 }
774
775 static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
776 TCGArg arg1, TCGArg arg2, int const_arg2,
777 int label_index, int small)
778 {
779 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
780 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
781 }
782
783 #if TCG_TARGET_REG_BITS == 64
784 static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
785 TCGArg arg1, TCGArg arg2, int const_arg2,
786 int label_index, int small)
787 {
788 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
789 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
790 }
791 #else
792 /* XXX: we implement it at the target level to avoid having to
793 handle cross basic blocks temporaries */
794 static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
795 const int *const_args, int small)
796 {
797 int label_next;
798 label_next = gen_new_label();
799 switch(args[4]) {
800 case TCG_COND_EQ:
801 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
802 label_next, 1);
803 tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
804 args[5], small);
805 break;
806 case TCG_COND_NE:
807 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
808 args[5], small);
809 tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
810 args[5], small);
811 break;
812 case TCG_COND_LT:
813 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
814 args[5], small);
815 tcg_out_jxx(s, JCC_JNE, label_next, 1);
816 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
817 args[5], small);
818 break;
819 case TCG_COND_LE:
820 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
821 args[5], small);
822 tcg_out_jxx(s, JCC_JNE, label_next, 1);
823 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
824 args[5], small);
825 break;
826 case TCG_COND_GT:
827 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
828 args[5], small);
829 tcg_out_jxx(s, JCC_JNE, label_next, 1);
830 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
831 args[5], small);
832 break;
833 case TCG_COND_GE:
834 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
835 args[5], small);
836 tcg_out_jxx(s, JCC_JNE, label_next, 1);
837 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
838 args[5], small);
839 break;
840 case TCG_COND_LTU:
841 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
842 args[5], small);
843 tcg_out_jxx(s, JCC_JNE, label_next, 1);
844 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
845 args[5], small);
846 break;
847 case TCG_COND_LEU:
848 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
849 args[5], small);
850 tcg_out_jxx(s, JCC_JNE, label_next, 1);
851 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
852 args[5], small);
853 break;
854 case TCG_COND_GTU:
855 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
856 args[5], small);
857 tcg_out_jxx(s, JCC_JNE, label_next, 1);
858 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
859 args[5], small);
860 break;
861 case TCG_COND_GEU:
862 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
863 args[5], small);
864 tcg_out_jxx(s, JCC_JNE, label_next, 1);
865 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
866 args[5], small);
867 break;
868 default:
869 tcg_abort();
870 }
871 tcg_out_label(s, label_next, s->code_ptr);
872 }
873 #endif
874
875 static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
876 TCGArg arg1, TCGArg arg2, int const_arg2)
877 {
878 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
879 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
880 tcg_out_ext8u(s, dest, dest);
881 }
882
883 #if TCG_TARGET_REG_BITS == 64
884 static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
885 TCGArg arg1, TCGArg arg2, int const_arg2)
886 {
887 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
888 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
889 tcg_out_ext8u(s, dest, dest);
890 }
891 #else
892 static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
893 const int *const_args)
894 {
895 TCGArg new_args[6];
896 int label_true, label_over;
897
898 memcpy(new_args, args+1, 5*sizeof(TCGArg));
899
900 if (args[0] == args[1] || args[0] == args[2]
901 || (!const_args[3] && args[0] == args[3])
902 || (!const_args[4] && args[0] == args[4])) {
903 /* When the destination overlaps with one of the argument
904 registers, don't do anything tricky. */
905 label_true = gen_new_label();
906 label_over = gen_new_label();
907
908 new_args[5] = label_true;
909 tcg_out_brcond2(s, new_args, const_args+1, 1);
910
911 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
912 tcg_out_jxx(s, JCC_JMP, label_over, 1);
913 tcg_out_label(s, label_true, s->code_ptr);
914
915 tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
916 tcg_out_label(s, label_over, s->code_ptr);
917 } else {
918 /* When the destination does not overlap one of the arguments,
919 clear the destination first, jump if cond false, and emit an
920 increment in the true case. This results in smaller code. */
921
922 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
923
924 label_over = gen_new_label();
925 new_args[4] = tcg_invert_cond(new_args[4]);
926 new_args[5] = label_over;
927 tcg_out_brcond2(s, new_args, const_args+1, 1);
928
929 tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
930 tcg_out_label(s, label_over, s->code_ptr);
931 }
932 }
933 #endif
934
935 static void tcg_out_branch(TCGContext *s, int call, tcg_target_long dest)
936 {
937 tcg_target_long disp = dest - (tcg_target_long)s->code_ptr - 5;
938
939 if (disp == (int32_t)disp) {
940 tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
941 tcg_out32(s, disp);
942 } else {
943 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, dest);
944 tcg_out_modrm(s, OPC_GRP5,
945 call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10);
946 }
947 }
948
949 static inline void tcg_out_calli(TCGContext *s, tcg_target_long dest)
950 {
951 tcg_out_branch(s, 1, dest);
952 }
953
954 static void tcg_out_jmp(TCGContext *s, tcg_target_long dest)
955 {
956 tcg_out_branch(s, 0, dest);
957 }
958
959 #if defined(CONFIG_SOFTMMU)
960
961 #include "../../softmmu_defs.h"
962
963 #ifdef CONFIG_TCG_PASS_AREG0
964 /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
965 int mmu_idx) */
966 static const void *qemu_ld_helpers[4] = {
967 helper_ldb_mmu,
968 helper_ldw_mmu,
969 helper_ldl_mmu,
970 helper_ldq_mmu,
971 };
972
973 /* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
974 uintxx_t val, int mmu_idx) */
975 static const void *qemu_st_helpers[4] = {
976 helper_stb_mmu,
977 helper_stw_mmu,
978 helper_stl_mmu,
979 helper_stq_mmu,
980 };
981 #else
982 /* legacy helper signature: __ld_mmu(target_ulong addr, int
983 mmu_idx) */
984 static void *qemu_ld_helpers[4] = {
985 __ldb_mmu,
986 __ldw_mmu,
987 __ldl_mmu,
988 __ldq_mmu,
989 };
990
991 /* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
992 int mmu_idx) */
993 static void *qemu_st_helpers[4] = {
994 __stb_mmu,
995 __stw_mmu,
996 __stl_mmu,
997 __stq_mmu,
998 };
999 #endif
1000
1001 /* Perform the TLB load and compare.
1002
1003 Inputs:
1004 ADDRLO_IDX contains the index into ARGS of the low part of the
1005 address; the high part of the address is at ADDR_LOW_IDX+1.
1006
1007 MEM_INDEX and S_BITS are the memory context and log2 size of the load.
1008
1009 WHICH is the offset into the CPUTLBEntry structure of the slot to read.
1010 This should be offsetof addr_read or addr_write.
1011
1012 Outputs:
1013 LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
1014 positions of the displacements of forward jumps to the TLB miss case.
1015
1016 First argument register is loaded with the low part of the address.
1017 In the TLB hit case, it has been adjusted as indicated by the TLB
1018 and so is a host address. In the TLB miss case, it continues to
1019 hold a guest address.
1020
1021 Second argument register is clobbered. */
1022
1023 static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
1024 int mem_index, int s_bits,
1025 const TCGArg *args,
1026 uint8_t **label_ptr, int which)
1027 {
1028 const int addrlo = args[addrlo_idx];
1029 const int r0 = tcg_target_call_iarg_regs[0];
1030 const int r1 = tcg_target_call_iarg_regs[1];
1031 TCGType type = TCG_TYPE_I32;
1032 int rexw = 0;
1033
1034 if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 64) {
1035 type = TCG_TYPE_I64;
1036 rexw = P_REXW;
1037 }
1038
1039 tcg_out_mov(s, type, r1, addrlo);
1040 tcg_out_mov(s, type, r0, addrlo);
1041
1042 tcg_out_shifti(s, SHIFT_SHR + rexw, r1,
1043 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1044
1045 tgen_arithi(s, ARITH_AND + rexw, r0,
1046 TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
1047 tgen_arithi(s, ARITH_AND + rexw, r1,
1048 (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
1049
1050 tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, r1, TCG_AREG0, r1, 0,
1051 offsetof(CPUArchState, tlb_table[mem_index][0])
1052 + which);
1053
1054 /* cmp 0(r1), r0 */
1055 tcg_out_modrm_offset(s, OPC_CMP_GvEv + rexw, r0, r1, 0);
1056
1057 tcg_out_mov(s, type, r0, addrlo);
1058
1059 /* jne label1 */
1060 tcg_out8(s, OPC_JCC_short + JCC_JNE);
1061 label_ptr[0] = s->code_ptr;
1062 s->code_ptr++;
1063
1064 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1065 /* cmp 4(r1), addrhi */
1066 tcg_out_modrm_offset(s, OPC_CMP_GvEv, args[addrlo_idx+1], r1, 4);
1067
1068 /* jne label1 */
1069 tcg_out8(s, OPC_JCC_short + JCC_JNE);
1070 label_ptr[1] = s->code_ptr;
1071 s->code_ptr++;
1072 }
1073
1074 /* TLB Hit. */
1075
1076 /* add addend(r1), r0 */
1077 tcg_out_modrm_offset(s, OPC_ADD_GvEv + P_REXW, r0, r1,
1078 offsetof(CPUTLBEntry, addend) - which);
1079 }
1080 #endif
1081
1082 static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
1083 int base, tcg_target_long ofs, int sizeop)
1084 {
1085 #ifdef TARGET_WORDS_BIGENDIAN
1086 const int bswap = 1;
1087 #else
1088 const int bswap = 0;
1089 #endif
1090 switch (sizeop) {
1091 case 0:
1092 tcg_out_modrm_offset(s, OPC_MOVZBL, datalo, base, ofs);
1093 break;
1094 case 0 | 4:
1095 tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW, datalo, base, ofs);
1096 break;
1097 case 1:
1098 tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
1099 if (bswap) {
1100 tcg_out_rolw_8(s, datalo);
1101 }
1102 break;
1103 case 1 | 4:
1104 if (bswap) {
1105 tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
1106 tcg_out_rolw_8(s, datalo);
1107 tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
1108 } else {
1109 tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW, datalo, base, ofs);
1110 }
1111 break;
1112 case 2:
1113 tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1114 if (bswap) {
1115 tcg_out_bswap32(s, datalo);
1116 }
1117 break;
1118 #if TCG_TARGET_REG_BITS == 64
1119 case 2 | 4:
1120 if (bswap) {
1121 tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1122 tcg_out_bswap32(s, datalo);
1123 tcg_out_ext32s(s, datalo, datalo);
1124 } else {
1125 tcg_out_modrm_offset(s, OPC_MOVSLQ, datalo, base, ofs);
1126 }
1127 break;
1128 #endif
1129 case 3:
1130 if (TCG_TARGET_REG_BITS == 64) {
1131 tcg_out_ld(s, TCG_TYPE_I64, datalo, base, ofs);
1132 if (bswap) {
1133 tcg_out_bswap64(s, datalo);
1134 }
1135 } else {
1136 if (bswap) {
1137 int t = datalo;
1138 datalo = datahi;
1139 datahi = t;
1140 }
1141 if (base != datalo) {
1142 tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1143 tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1144 } else {
1145 tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1146 tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1147 }
1148 if (bswap) {
1149 tcg_out_bswap32(s, datalo);
1150 tcg_out_bswap32(s, datahi);
1151 }
1152 }
1153 break;
1154 default:
1155 tcg_abort();
1156 }
1157 }
1158
1159 /* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
1160 EAX. It will be useful once fixed registers globals are less
1161 common. */
1162 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
1163 int opc)
1164 {
1165 int data_reg, data_reg2 = 0;
1166 int addrlo_idx;
1167 #if defined(CONFIG_SOFTMMU)
1168 int mem_index, s_bits;
1169 #if TCG_TARGET_REG_BITS == 64
1170 int arg_idx;
1171 #else
1172 int stack_adjust;
1173 #endif
1174 uint8_t *label_ptr[3];
1175 #endif
1176
1177 data_reg = args[0];
1178 addrlo_idx = 1;
1179 if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
1180 data_reg2 = args[1];
1181 addrlo_idx = 2;
1182 }
1183
1184 #if defined(CONFIG_SOFTMMU)
1185 mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
1186 s_bits = opc & 3;
1187
1188 tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
1189 label_ptr, offsetof(CPUTLBEntry, addr_read));
1190
1191 /* TLB Hit. */
1192 tcg_out_qemu_ld_direct(s, data_reg, data_reg2,
1193 tcg_target_call_iarg_regs[0], 0, opc);
1194
1195 /* jmp label2 */
1196 tcg_out8(s, OPC_JMP_short);
1197 label_ptr[2] = s->code_ptr;
1198 s->code_ptr++;
1199
1200 /* TLB Miss. */
1201
1202 /* label1: */
1203 *label_ptr[0] = s->code_ptr - label_ptr[0] - 1;
1204 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1205 *label_ptr[1] = s->code_ptr - label_ptr[1] - 1;
1206 }
1207
1208 /* XXX: move that code at the end of the TB */
1209 #if TCG_TARGET_REG_BITS == 32
1210 tcg_out_pushi(s, mem_index);
1211 stack_adjust = 4;
1212 if (TARGET_LONG_BITS == 64) {
1213 tcg_out_push(s, args[addrlo_idx + 1]);
1214 stack_adjust += 4;
1215 }
1216 tcg_out_push(s, args[addrlo_idx]);
1217 stack_adjust += 4;
1218 #ifdef CONFIG_TCG_PASS_AREG0
1219 tcg_out_push(s, TCG_AREG0);
1220 stack_adjust += 4;
1221 #endif
1222 #else
1223 /* The first argument is already loaded with addrlo. */
1224 arg_idx = 1;
1225 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx],
1226 mem_index);
1227 #ifdef CONFIG_TCG_PASS_AREG0
1228 /* XXX/FIXME: suboptimal */
1229 tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3],
1230 tcg_target_call_iarg_regs[2]);
1231 tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
1232 tcg_target_call_iarg_regs[1]);
1233 tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1],
1234 tcg_target_call_iarg_regs[0]);
1235 tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
1236 TCG_AREG0);
1237 #endif
1238 #endif
1239
1240 tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]);
1241
1242 #if TCG_TARGET_REG_BITS == 32
1243 if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) {
1244 /* Pop and discard. This is 2 bytes smaller than the add. */
1245 tcg_out_pop(s, TCG_REG_ECX);
1246 } else if (stack_adjust != 0) {
1247 tcg_out_addi(s, TCG_REG_CALL_STACK, stack_adjust);
1248 }
1249 #endif
1250
1251 switch(opc) {
1252 case 0 | 4:
1253 tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
1254 break;
1255 case 1 | 4:
1256 tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
1257 break;
1258 case 0:
1259 tcg_out_ext8u(s, data_reg, TCG_REG_EAX);
1260 break;
1261 case 1:
1262 tcg_out_ext16u(s, data_reg, TCG_REG_EAX);
1263 break;
1264 case 2:
1265 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1266 break;
1267 #if TCG_TARGET_REG_BITS == 64
1268 case 2 | 4:
1269 tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
1270 break;
1271 #endif
1272 case 3:
1273 if (TCG_TARGET_REG_BITS == 64) {
1274 tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
1275 } else if (data_reg == TCG_REG_EDX) {
1276 /* xchg %edx, %eax */
1277 tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
1278 tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EAX);
1279 } else {
1280 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1281 tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EDX);
1282 }
1283 break;
1284 default:
1285 tcg_abort();
1286 }
1287
1288 /* label2: */
1289 *label_ptr[2] = s->code_ptr - label_ptr[2] - 1;
1290 #else
1291 {
1292 int32_t offset = GUEST_BASE;
1293 int base = args[addrlo_idx];
1294
1295 if (TCG_TARGET_REG_BITS == 64) {
1296 /* ??? We assume all operations have left us with register
1297 contents that are zero extended. So far this appears to
1298 be true. If we want to enforce this, we can either do
1299 an explicit zero-extension here, or (if GUEST_BASE == 0)
1300 use the ADDR32 prefix. For now, do nothing. */
1301
1302 if (offset != GUEST_BASE) {
1303 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE);
1304 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base);
1305 base = TCG_REG_RDI, offset = 0;
1306 }
1307 }
1308
1309 tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, opc);
1310 }
1311 #endif
1312 }
1313
1314 static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
1315 int base, tcg_target_long ofs, int sizeop)
1316 {
1317 #ifdef TARGET_WORDS_BIGENDIAN
1318 const int bswap = 1;
1319 #else
1320 const int bswap = 0;
1321 #endif
1322 /* ??? Ideally we wouldn't need a scratch register. For user-only,
1323 we could perform the bswap twice to restore the original value
1324 instead of moving to the scratch. But as it is, the L constraint
1325 means that the second argument reg is definitely free here. */
1326 int scratch = tcg_target_call_iarg_regs[1];
1327
1328 switch (sizeop) {
1329 case 0:
1330 tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R, datalo, base, ofs);
1331 break;
1332 case 1:
1333 if (bswap) {
1334 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1335 tcg_out_rolw_8(s, scratch);
1336 datalo = scratch;
1337 }
1338 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16, datalo, base, ofs);
1339 break;
1340 case 2:
1341 if (bswap) {
1342 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1343 tcg_out_bswap32(s, scratch);
1344 datalo = scratch;
1345 }
1346 tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
1347 break;
1348 case 3:
1349 if (TCG_TARGET_REG_BITS == 64) {
1350 if (bswap) {
1351 tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
1352 tcg_out_bswap64(s, scratch);
1353 datalo = scratch;
1354 }
1355 tcg_out_st(s, TCG_TYPE_I64, datalo, base, ofs);
1356 } else if (bswap) {
1357 tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
1358 tcg_out_bswap32(s, scratch);
1359 tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs);
1360 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1361 tcg_out_bswap32(s, scratch);
1362 tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs + 4);
1363 } else {
1364 tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
1365 tcg_out_st(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1366 }
1367 break;
1368 default:
1369 tcg_abort();
1370 }
1371 }
1372
1373 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
1374 int opc)
1375 {
1376 int data_reg, data_reg2 = 0;
1377 int addrlo_idx;
1378 #if defined(CONFIG_SOFTMMU)
1379 int mem_index, s_bits;
1380 int stack_adjust;
1381 uint8_t *label_ptr[3];
1382 #endif
1383
1384 data_reg = args[0];
1385 addrlo_idx = 1;
1386 if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
1387 data_reg2 = args[1];
1388 addrlo_idx = 2;
1389 }
1390
1391 #if defined(CONFIG_SOFTMMU)
1392 mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
1393 s_bits = opc;
1394
1395 tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
1396 label_ptr, offsetof(CPUTLBEntry, addr_write));
1397
1398 /* TLB Hit. */
1399 tcg_out_qemu_st_direct(s, data_reg, data_reg2,
1400 tcg_target_call_iarg_regs[0], 0, opc);
1401
1402 /* jmp label2 */
1403 tcg_out8(s, OPC_JMP_short);
1404 label_ptr[2] = s->code_ptr;
1405 s->code_ptr++;
1406
1407 /* TLB Miss. */
1408
1409 /* label1: */
1410 *label_ptr[0] = s->code_ptr - label_ptr[0] - 1;
1411 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1412 *label_ptr[1] = s->code_ptr - label_ptr[1] - 1;
1413 }
1414
1415 /* XXX: move that code at the end of the TB */
1416 #if TCG_TARGET_REG_BITS == 32
1417 tcg_out_pushi(s, mem_index);
1418 stack_adjust = 4;
1419 if (opc == 3) {
1420 tcg_out_push(s, data_reg2);
1421 stack_adjust += 4;
1422 }
1423 tcg_out_push(s, data_reg);
1424 stack_adjust += 4;
1425 if (TARGET_LONG_BITS == 64) {
1426 tcg_out_push(s, args[addrlo_idx + 1]);
1427 stack_adjust += 4;
1428 }
1429 tcg_out_push(s, args[addrlo_idx]);
1430 stack_adjust += 4;
1431 #ifdef CONFIG_TCG_PASS_AREG0
1432 tcg_out_push(s, TCG_AREG0);
1433 stack_adjust += 4;
1434 #endif
1435 #else
1436 tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
1437 TCG_REG_RSI, data_reg);
1438 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_RDX, mem_index);
1439 stack_adjust = 0;
1440 #ifdef CONFIG_TCG_PASS_AREG0
1441 /* XXX/FIXME: suboptimal */
1442 tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3],
1443 tcg_target_call_iarg_regs[2]);
1444 tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
1445 tcg_target_call_iarg_regs[1]);
1446 tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1],
1447 tcg_target_call_iarg_regs[0]);
1448 tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
1449 TCG_AREG0);
1450 #endif
1451 #endif
1452
1453 tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]);
1454
1455 if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) {
1456 /* Pop and discard. This is 2 bytes smaller than the add. */
1457 tcg_out_pop(s, TCG_REG_ECX);
1458 } else if (stack_adjust != 0) {
1459 tcg_out_addi(s, TCG_REG_CALL_STACK, stack_adjust);
1460 }
1461
1462 /* label2: */
1463 *label_ptr[2] = s->code_ptr - label_ptr[2] - 1;
1464 #else
1465 {
1466 int32_t offset = GUEST_BASE;
1467 int base = args[addrlo_idx];
1468
1469 if (TCG_TARGET_REG_BITS == 64) {
1470 /* ??? We assume all operations have left us with register
1471 contents that are zero extended. So far this appears to
1472 be true. If we want to enforce this, we can either do
1473 an explicit zero-extension here, or (if GUEST_BASE == 0)
1474 use the ADDR32 prefix. For now, do nothing. */
1475
1476 if (offset != GUEST_BASE) {
1477 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE);
1478 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base);
1479 base = TCG_REG_RDI, offset = 0;
1480 }
1481 }
1482
1483 tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, opc);
1484 }
1485 #endif
1486 }
1487
1488 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1489 const TCGArg *args, const int *const_args)
1490 {
1491 int c, rexw = 0;
1492
1493 #if TCG_TARGET_REG_BITS == 64
1494 # define OP_32_64(x) \
1495 case glue(glue(INDEX_op_, x), _i64): \
1496 rexw = P_REXW; /* FALLTHRU */ \
1497 case glue(glue(INDEX_op_, x), _i32)
1498 #else
1499 # define OP_32_64(x) \
1500 case glue(glue(INDEX_op_, x), _i32)
1501 #endif
1502
1503 switch(opc) {
1504 case INDEX_op_exit_tb:
1505 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]);
1506 tcg_out_jmp(s, (tcg_target_long) tb_ret_addr);
1507 break;
1508 case INDEX_op_goto_tb:
1509 if (s->tb_jmp_offset) {
1510 /* direct jump method */
1511 tcg_out8(s, OPC_JMP_long); /* jmp im */
1512 s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
1513 tcg_out32(s, 0);
1514 } else {
1515 /* indirect jump method */
1516 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
1517 (tcg_target_long)(s->tb_next + args[0]));
1518 }
1519 s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
1520 break;
1521 case INDEX_op_call:
1522 if (const_args[0]) {
1523 tcg_out_calli(s, args[0]);
1524 } else {
1525 /* call *reg */
1526 tcg_out_modrm(s, OPC_GRP5, EXT5_CALLN_Ev, args[0]);
1527 }
1528 break;
1529 case INDEX_op_jmp:
1530 if (const_args[0]) {
1531 tcg_out_jmp(s, args[0]);
1532 } else {
1533 /* jmp *reg */
1534 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, args[0]);
1535 }
1536 break;
1537 case INDEX_op_br:
1538 tcg_out_jxx(s, JCC_JMP, args[0], 0);
1539 break;
1540 case INDEX_op_movi_i32:
1541 tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
1542 break;
1543 OP_32_64(ld8u):
1544 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1545 tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
1546 break;
1547 OP_32_64(ld8s):
1548 tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]);
1549 break;
1550 OP_32_64(ld16u):
1551 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1552 tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
1553 break;
1554 OP_32_64(ld16s):
1555 tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]);
1556 break;
1557 #if TCG_TARGET_REG_BITS == 64
1558 case INDEX_op_ld32u_i64:
1559 #endif
1560 case INDEX_op_ld_i32:
1561 tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1562 break;
1563
1564 OP_32_64(st8):
1565 tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
1566 args[0], args[1], args[2]);
1567 break;
1568 OP_32_64(st16):
1569 tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
1570 args[0], args[1], args[2]);
1571 break;
1572 #if TCG_TARGET_REG_BITS == 64
1573 case INDEX_op_st32_i64:
1574 #endif
1575 case INDEX_op_st_i32:
1576 tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1577 break;
1578
1579 OP_32_64(add):
1580 /* For 3-operand addition, use LEA. */
1581 if (args[0] != args[1]) {
1582 TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
1583
1584 if (const_args[2]) {
1585 c3 = a2, a2 = -1;
1586 } else if (a0 == a2) {
1587 /* Watch out for dest = src + dest, since we've removed
1588 the matching constraint on the add. */
1589 tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
1590 break;
1591 }
1592
1593 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
1594 break;
1595 }
1596 c = ARITH_ADD;
1597 goto gen_arith;
1598 OP_32_64(sub):
1599 c = ARITH_SUB;
1600 goto gen_arith;
1601 OP_32_64(and):
1602 c = ARITH_AND;
1603 goto gen_arith;
1604 OP_32_64(or):
1605 c = ARITH_OR;
1606 goto gen_arith;
1607 OP_32_64(xor):
1608 c = ARITH_XOR;
1609 goto gen_arith;
1610 gen_arith:
1611 if (const_args[2]) {
1612 tgen_arithi(s, c + rexw, args[0], args[2], 0);
1613 } else {
1614 tgen_arithr(s, c + rexw, args[0], args[2]);
1615 }
1616 break;
1617
1618 OP_32_64(mul):
1619 if (const_args[2]) {
1620 int32_t val;
1621 val = args[2];
1622 if (val == (int8_t)val) {
1623 tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]);
1624 tcg_out8(s, val);
1625 } else {
1626 tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]);
1627 tcg_out32(s, val);
1628 }
1629 } else {
1630 tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]);
1631 }
1632 break;
1633
1634 OP_32_64(div2):
1635 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
1636 break;
1637 OP_32_64(divu2):
1638 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
1639 break;
1640
1641 OP_32_64(shl):
1642 c = SHIFT_SHL;
1643 goto gen_shift;
1644 OP_32_64(shr):
1645 c = SHIFT_SHR;
1646 goto gen_shift;
1647 OP_32_64(sar):
1648 c = SHIFT_SAR;
1649 goto gen_shift;
1650 OP_32_64(rotl):
1651 c = SHIFT_ROL;
1652 goto gen_shift;
1653 OP_32_64(rotr):
1654 c = SHIFT_ROR;
1655 goto gen_shift;
1656 gen_shift:
1657 if (const_args[2]) {
1658 tcg_out_shifti(s, c + rexw, args[0], args[2]);
1659 } else {
1660 tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]);
1661 }
1662 break;
1663
1664 case INDEX_op_brcond_i32:
1665 tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
1666 args[3], 0);
1667 break;
1668 case INDEX_op_setcond_i32:
1669 tcg_out_setcond32(s, args[3], args[0], args[1],
1670 args[2], const_args[2]);
1671 break;
1672
1673 OP_32_64(bswap16):
1674 tcg_out_rolw_8(s, args[0]);
1675 break;
1676 OP_32_64(bswap32):
1677 tcg_out_bswap32(s, args[0]);
1678 break;
1679
1680 OP_32_64(neg):
1681 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]);
1682 break;
1683 OP_32_64(not):
1684 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]);
1685 break;
1686
1687 OP_32_64(ext8s):
1688 tcg_out_ext8s(s, args[0], args[1], rexw);
1689 break;
1690 OP_32_64(ext16s):
1691 tcg_out_ext16s(s, args[0], args[1], rexw);
1692 break;
1693 OP_32_64(ext8u):
1694 tcg_out_ext8u(s, args[0], args[1]);
1695 break;
1696 OP_32_64(ext16u):
1697 tcg_out_ext16u(s, args[0], args[1]);
1698 break;
1699
1700 case INDEX_op_qemu_ld8u:
1701 tcg_out_qemu_ld(s, args, 0);
1702 break;
1703 case INDEX_op_qemu_ld8s:
1704 tcg_out_qemu_ld(s, args, 0 | 4);
1705 break;
1706 case INDEX_op_qemu_ld16u:
1707 tcg_out_qemu_ld(s, args, 1);
1708 break;
1709 case INDEX_op_qemu_ld16s:
1710 tcg_out_qemu_ld(s, args, 1 | 4);
1711 break;
1712 #if TCG_TARGET_REG_BITS == 64
1713 case INDEX_op_qemu_ld32u:
1714 #endif
1715 case INDEX_op_qemu_ld32:
1716 tcg_out_qemu_ld(s, args, 2);
1717 break;
1718 case INDEX_op_qemu_ld64:
1719 tcg_out_qemu_ld(s, args, 3);
1720 break;
1721
1722 case INDEX_op_qemu_st8:
1723 tcg_out_qemu_st(s, args, 0);
1724 break;
1725 case INDEX_op_qemu_st16:
1726 tcg_out_qemu_st(s, args, 1);
1727 break;
1728 case INDEX_op_qemu_st32:
1729 tcg_out_qemu_st(s, args, 2);
1730 break;
1731 case INDEX_op_qemu_st64:
1732 tcg_out_qemu_st(s, args, 3);
1733 break;
1734
1735 #if TCG_TARGET_REG_BITS == 32
1736 case INDEX_op_brcond2_i32:
1737 tcg_out_brcond2(s, args, const_args, 0);
1738 break;
1739 case INDEX_op_setcond2_i32:
1740 tcg_out_setcond2(s, args, const_args);
1741 break;
1742 case INDEX_op_mulu2_i32:
1743 tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_MUL, args[3]);
1744 break;
1745 case INDEX_op_add2_i32:
1746 if (const_args[4]) {
1747 tgen_arithi(s, ARITH_ADD, args[0], args[4], 1);
1748 } else {
1749 tgen_arithr(s, ARITH_ADD, args[0], args[4]);
1750 }
1751 if (const_args[5]) {
1752 tgen_arithi(s, ARITH_ADC, args[1], args[5], 1);
1753 } else {
1754 tgen_arithr(s, ARITH_ADC, args[1], args[5]);
1755 }
1756 break;
1757 case INDEX_op_sub2_i32:
1758 if (const_args[4]) {
1759 tgen_arithi(s, ARITH_SUB, args[0], args[4], 1);
1760 } else {
1761 tgen_arithr(s, ARITH_SUB, args[0], args[4]);
1762 }
1763 if (const_args[5]) {
1764 tgen_arithi(s, ARITH_SBB, args[1], args[5], 1);
1765 } else {
1766 tgen_arithr(s, ARITH_SBB, args[1], args[5]);
1767 }
1768 break;
1769 #else /* TCG_TARGET_REG_BITS == 64 */
1770 case INDEX_op_movi_i64:
1771 tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
1772 break;
1773 case INDEX_op_ld32s_i64:
1774 tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]);
1775 break;
1776 case INDEX_op_ld_i64:
1777 tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1778 break;
1779 case INDEX_op_st_i64:
1780 tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1781 break;
1782 case INDEX_op_qemu_ld32s:
1783 tcg_out_qemu_ld(s, args, 2 | 4);
1784 break;
1785
1786 case INDEX_op_brcond_i64:
1787 tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1],
1788 args[3], 0);
1789 break;
1790 case INDEX_op_setcond_i64:
1791 tcg_out_setcond64(s, args[3], args[0], args[1],
1792 args[2], const_args[2]);
1793 break;
1794
1795 case INDEX_op_bswap64_i64:
1796 tcg_out_bswap64(s, args[0]);
1797 break;
1798 case INDEX_op_ext32u_i64:
1799 tcg_out_ext32u(s, args[0], args[1]);
1800 break;
1801 case INDEX_op_ext32s_i64:
1802 tcg_out_ext32s(s, args[0], args[1]);
1803 break;
1804 #endif
1805
1806 OP_32_64(deposit):
1807 if (args[3] == 0 && args[4] == 8) {
1808 /* load bits 0..7 */
1809 tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM,
1810 args[2], args[0]);
1811 } else if (args[3] == 8 && args[4] == 8) {
1812 /* load bits 8..15 */
1813 tcg_out_modrm(s, OPC_MOVB_EvGv, args[2], args[0] + 4);
1814 } else if (args[3] == 0 && args[4] == 16) {
1815 /* load bits 0..15 */
1816 tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, args[2], args[0]);
1817 } else {
1818 tcg_abort();
1819 }
1820 break;
1821
1822 default:
1823 tcg_abort();
1824 }
1825
1826 #undef OP_32_64
1827 }
1828
1829 static const TCGTargetOpDef x86_op_defs[] = {
1830 { INDEX_op_exit_tb, { } },
1831 { INDEX_op_goto_tb, { } },
1832 { INDEX_op_call, { "ri" } },
1833 { INDEX_op_jmp, { "ri" } },
1834 { INDEX_op_br, { } },
1835 { INDEX_op_mov_i32, { "r", "r" } },
1836 { INDEX_op_movi_i32, { "r" } },
1837 { INDEX_op_ld8u_i32, { "r", "r" } },
1838 { INDEX_op_ld8s_i32, { "r", "r" } },
1839 { INDEX_op_ld16u_i32, { "r", "r" } },
1840 { INDEX_op_ld16s_i32, { "r", "r" } },
1841 { INDEX_op_ld_i32, { "r", "r" } },
1842 { INDEX_op_st8_i32, { "q", "r" } },
1843 { INDEX_op_st16_i32, { "r", "r" } },
1844 { INDEX_op_st_i32, { "r", "r" } },
1845
1846 { INDEX_op_add_i32, { "r", "r", "ri" } },
1847 { INDEX_op_sub_i32, { "r", "0", "ri" } },
1848 { INDEX_op_mul_i32, { "r", "0", "ri" } },
1849 { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } },
1850 { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } },
1851 { INDEX_op_and_i32, { "r", "0", "ri" } },
1852 { INDEX_op_or_i32, { "r", "0", "ri" } },
1853 { INDEX_op_xor_i32, { "r", "0", "ri" } },
1854
1855 { INDEX_op_shl_i32, { "r", "0", "ci" } },
1856 { INDEX_op_shr_i32, { "r", "0", "ci" } },
1857 { INDEX_op_sar_i32, { "r", "0", "ci" } },
1858 { INDEX_op_rotl_i32, { "r", "0", "ci" } },
1859 { INDEX_op_rotr_i32, { "r", "0", "ci" } },
1860
1861 { INDEX_op_brcond_i32, { "r", "ri" } },
1862
1863 { INDEX_op_bswap16_i32, { "r", "0" } },
1864 { INDEX_op_bswap32_i32, { "r", "0" } },
1865
1866 { INDEX_op_neg_i32, { "r", "0" } },
1867
1868 { INDEX_op_not_i32, { "r", "0" } },
1869
1870 { INDEX_op_ext8s_i32, { "r", "q" } },
1871 { INDEX_op_ext16s_i32, { "r", "r" } },
1872 { INDEX_op_ext8u_i32, { "r", "q" } },
1873 { INDEX_op_ext16u_i32, { "r", "r" } },
1874
1875 { INDEX_op_setcond_i32, { "q", "r", "ri" } },
1876
1877 { INDEX_op_deposit_i32, { "Q", "0", "Q" } },
1878
1879 #if TCG_TARGET_REG_BITS == 32
1880 { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
1881 { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
1882 { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
1883 { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
1884 { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
1885 #else
1886 { INDEX_op_mov_i64, { "r", "r" } },
1887 { INDEX_op_movi_i64, { "r" } },
1888 { INDEX_op_ld8u_i64, { "r", "r" } },
1889 { INDEX_op_ld8s_i64, { "r", "r" } },
1890 { INDEX_op_ld16u_i64, { "r", "r" } },
1891 { INDEX_op_ld16s_i64, { "r", "r" } },
1892 { INDEX_op_ld32u_i64, { "r", "r" } },
1893 { INDEX_op_ld32s_i64, { "r", "r" } },
1894 { INDEX_op_ld_i64, { "r", "r" } },
1895 { INDEX_op_st8_i64, { "r", "r" } },
1896 { INDEX_op_st16_i64, { "r", "r" } },
1897 { INDEX_op_st32_i64, { "r", "r" } },
1898 { INDEX_op_st_i64, { "r", "r" } },
1899
1900 { INDEX_op_add_i64, { "r", "0", "re" } },
1901 { INDEX_op_mul_i64, { "r", "0", "re" } },
1902 { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },
1903 { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },
1904 { INDEX_op_sub_i64, { "r", "0", "re" } },
1905 { INDEX_op_and_i64, { "r", "0", "reZ" } },
1906 { INDEX_op_or_i64, { "r", "0", "re" } },
1907 { INDEX_op_xor_i64, { "r", "0", "re" } },
1908
1909 { INDEX_op_shl_i64, { "r", "0", "ci" } },
1910 { INDEX_op_shr_i64, { "r", "0", "ci" } },
1911 { INDEX_op_sar_i64, { "r", "0", "ci" } },
1912 { INDEX_op_rotl_i64, { "r", "0", "ci" } },
1913 { INDEX_op_rotr_i64, { "r", "0", "ci" } },
1914
1915 { INDEX_op_brcond_i64, { "r", "re" } },
1916 { INDEX_op_setcond_i64, { "r", "r", "re" } },
1917
1918 { INDEX_op_bswap16_i64, { "r", "0" } },
1919 { INDEX_op_bswap32_i64, { "r", "0" } },
1920 { INDEX_op_bswap64_i64, { "r", "0" } },
1921 { INDEX_op_neg_i64, { "r", "0" } },
1922 { INDEX_op_not_i64, { "r", "0" } },
1923
1924 { INDEX_op_ext8s_i64, { "r", "r" } },
1925 { INDEX_op_ext16s_i64, { "r", "r" } },
1926 { INDEX_op_ext32s_i64, { "r", "r" } },
1927 { INDEX_op_ext8u_i64, { "r", "r" } },
1928 { INDEX_op_ext16u_i64, { "r", "r" } },
1929 { INDEX_op_ext32u_i64, { "r", "r" } },
1930
1931 { INDEX_op_deposit_i64, { "Q", "0", "Q" } },
1932 #endif
1933
1934 #if TCG_TARGET_REG_BITS == 64
1935 { INDEX_op_qemu_ld8u, { "r", "L" } },
1936 { INDEX_op_qemu_ld8s, { "r", "L" } },
1937 { INDEX_op_qemu_ld16u, { "r", "L" } },
1938 { INDEX_op_qemu_ld16s, { "r", "L" } },
1939 { INDEX_op_qemu_ld32, { "r", "L" } },
1940 { INDEX_op_qemu_ld32u, { "r", "L" } },
1941 { INDEX_op_qemu_ld32s, { "r", "L" } },
1942 { INDEX_op_qemu_ld64, { "r", "L" } },
1943
1944 { INDEX_op_qemu_st8, { "L", "L" } },
1945 { INDEX_op_qemu_st16, { "L", "L" } },
1946 { INDEX_op_qemu_st32, { "L", "L" } },
1947 { INDEX_op_qemu_st64, { "L", "L" } },
1948 #elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
1949 { INDEX_op_qemu_ld8u, { "r", "L" } },
1950 { INDEX_op_qemu_ld8s, { "r", "L" } },
1951 { INDEX_op_qemu_ld16u, { "r", "L" } },
1952 { INDEX_op_qemu_ld16s, { "r", "L" } },
1953 { INDEX_op_qemu_ld32, { "r", "L" } },
1954 { INDEX_op_qemu_ld64, { "r", "r", "L" } },
1955
1956 { INDEX_op_qemu_st8, { "cb", "L" } },
1957 { INDEX_op_qemu_st16, { "L", "L" } },
1958 { INDEX_op_qemu_st32, { "L", "L" } },
1959 { INDEX_op_qemu_st64, { "L", "L", "L" } },
1960 #else
1961 { INDEX_op_qemu_ld8u, { "r", "L", "L" } },
1962 { INDEX_op_qemu_ld8s, { "r", "L", "L" } },
1963 { INDEX_op_qemu_ld16u, { "r", "L", "L" } },
1964 { INDEX_op_qemu_ld16s, { "r", "L", "L" } },
1965 { INDEX_op_qemu_ld32, { "r", "L", "L" } },
1966 { INDEX_op_qemu_ld64, { "r", "r", "L", "L" } },
1967
1968 { INDEX_op_qemu_st8, { "cb", "L", "L" } },
1969 { INDEX_op_qemu_st16, { "L", "L", "L" } },
1970 { INDEX_op_qemu_st32, { "L", "L", "L" } },
1971 { INDEX_op_qemu_st64, { "L", "L", "L", "L" } },
1972 #endif
1973 { -1 },
1974 };
1975
1976 static int tcg_target_callee_save_regs[] = {
1977 #if TCG_TARGET_REG_BITS == 64
1978 TCG_REG_RBP,
1979 TCG_REG_RBX,
1980 TCG_REG_R12,
1981 TCG_REG_R13,
1982 TCG_REG_R14, /* Currently used for the global env. */
1983 TCG_REG_R15,
1984 #else
1985 TCG_REG_EBP, /* Currently used for the global env. */
1986 TCG_REG_EBX,
1987 TCG_REG_ESI,
1988 TCG_REG_EDI,
1989 #endif
1990 };
1991
1992 /* Compute frame size via macros, to share between tcg_target_qemu_prologue
1993 and tcg_register_jit. */
1994
1995 #define PUSH_SIZE \
1996 ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \
1997 * (TCG_TARGET_REG_BITS / 8))
1998
1999 #define FRAME_SIZE \
2000 ((PUSH_SIZE \
2001 + TCG_STATIC_CALL_ARGS_SIZE \
2002 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2003 + TCG_TARGET_STACK_ALIGN - 1) \
2004 & ~(TCG_TARGET_STACK_ALIGN - 1))
2005
2006 /* Generate global QEMU prologue and epilogue code */
2007 static void tcg_target_qemu_prologue(TCGContext *s)
2008 {
2009 int i, stack_addend;
2010
2011 /* TB prologue */
2012
2013 /* Reserve some stack space, also for TCG temps. */
2014 stack_addend = FRAME_SIZE - PUSH_SIZE;
2015 tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
2016 CPU_TEMP_BUF_NLONGS * sizeof(long));
2017
2018 /* Save all callee saved registers. */
2019 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
2020 tcg_out_push(s, tcg_target_callee_save_regs[i]);
2021 }
2022
2023 #if TCG_TARGET_REG_BITS == 32
2024 tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
2025 (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
2026 tcg_out_ld(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[1], TCG_REG_ESP,
2027 (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4);
2028 #else
2029 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2030 #endif
2031 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2032
2033 /* jmp *tb. */
2034 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
2035
2036 /* TB epilogue */
2037 tb_ret_addr = s->code_ptr;
2038
2039 tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend);
2040
2041 for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
2042 tcg_out_pop(s, tcg_target_callee_save_regs[i]);
2043 }
2044 tcg_out_opc(s, OPC_RET, 0, 0, 0);
2045 }
2046
2047 static void tcg_target_init(TCGContext *s)
2048 {
2049 #if !defined(CONFIG_USER_ONLY)
2050 /* fail safe */
2051 if ((1 << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry))
2052 tcg_abort();
2053 #endif
2054
2055 if (TCG_TARGET_REG_BITS == 64) {
2056 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
2057 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
2058 } else {
2059 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
2060 }
2061
2062 tcg_regset_clear(tcg_target_call_clobber_regs);
2063 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
2064 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
2065 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
2066 if (TCG_TARGET_REG_BITS == 64) {
2067 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
2068 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
2069 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
2070 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
2071 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
2072 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
2073 }
2074
2075 tcg_regset_clear(s->reserved_regs);
2076 tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
2077
2078 tcg_add_target_add_op_defs(x86_op_defs);
2079 }
2080
2081 typedef struct {
2082 uint32_t len __attribute__((aligned((sizeof(void *)))));
2083 uint32_t id;
2084 uint8_t version;
2085 char augmentation[1];
2086 uint8_t code_align;
2087 uint8_t data_align;
2088 uint8_t return_column;
2089 } DebugFrameCIE;
2090
2091 typedef struct {
2092 uint32_t len __attribute__((aligned((sizeof(void *)))));
2093 uint32_t cie_offset;
2094 tcg_target_long func_start __attribute__((packed));
2095 tcg_target_long func_len __attribute__((packed));
2096 uint8_t def_cfa[4];
2097 uint8_t reg_ofs[14];
2098 } DebugFrameFDE;
2099
2100 typedef struct {
2101 DebugFrameCIE cie;
2102 DebugFrameFDE fde;
2103 } DebugFrame;
2104
2105 #if !defined(__ELF__)
2106 /* Host machine without ELF. */
2107 #elif TCG_TARGET_REG_BITS == 64
2108 #define ELF_HOST_MACHINE EM_X86_64
2109 static DebugFrame debug_frame = {
2110 .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2111 .cie.id = -1,
2112 .cie.version = 1,
2113 .cie.code_align = 1,
2114 .cie.data_align = 0x78, /* sleb128 -8 */
2115 .cie.return_column = 16,
2116
2117 .fde.len = sizeof(DebugFrameFDE)-4, /* length after .len member */
2118 .fde.def_cfa = {
2119 12, 7, /* DW_CFA_def_cfa %rsp, ... */
2120 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2121 (FRAME_SIZE >> 7)
2122 },
2123 .fde.reg_ofs = {
2124 0x90, 1, /* DW_CFA_offset, %rip, -8 */
2125 /* The following ordering must match tcg_target_callee_save_regs. */
2126 0x86, 2, /* DW_CFA_offset, %rbp, -16 */
2127 0x83, 3, /* DW_CFA_offset, %rbx, -24 */
2128 0x8c, 4, /* DW_CFA_offset, %r12, -32 */
2129 0x8d, 5, /* DW_CFA_offset, %r13, -40 */
2130 0x8e, 6, /* DW_CFA_offset, %r14, -48 */
2131 0x8f, 7, /* DW_CFA_offset, %r15, -56 */
2132 }
2133 };
2134 #else
2135 #define ELF_HOST_MACHINE EM_386
2136 static DebugFrame debug_frame = {
2137 .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2138 .cie.id = -1,
2139 .cie.version = 1,
2140 .cie.code_align = 1,
2141 .cie.data_align = 0x7c, /* sleb128 -4 */
2142 .cie.return_column = 8,
2143
2144 .fde.len = sizeof(DebugFrameFDE)-4, /* length after .len member */
2145 .fde.def_cfa = {
2146 12, 4, /* DW_CFA_def_cfa %esp, ... */
2147 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2148 (FRAME_SIZE >> 7)
2149 },
2150 .fde.reg_ofs = {
2151 0x88, 1, /* DW_CFA_offset, %eip, -4 */
2152 /* The following ordering must match tcg_target_callee_save_regs. */
2153 0x85, 2, /* DW_CFA_offset, %ebp, -8 */
2154 0x83, 3, /* DW_CFA_offset, %ebx, -12 */
2155 0x86, 4, /* DW_CFA_offset, %esi, -16 */
2156 0x87, 5, /* DW_CFA_offset, %edi, -20 */
2157 }
2158 };
2159 #endif
2160
2161 #if defined(ELF_HOST_MACHINE)
2162 void tcg_register_jit(void *buf, size_t buf_size)
2163 {
2164 /* We're expecting a 2 byte uleb128 encoded value. */
2165 assert(FRAME_SIZE >> 14 == 0);
2166
2167 debug_frame.fde.func_start = (tcg_target_long) buf;
2168 debug_frame.fde.func_len = buf_size;
2169
2170 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2171 }
2172 #endif