]> git.proxmox.com Git - qemu.git/blob - tcg/i386/tcg-target.c
Remove unused CONFIG_TCG_PASS_AREG0 and dead code
[qemu.git] / tcg / i386 / tcg-target.c
1 /*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 #ifndef NDEBUG
26 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
27 #if TCG_TARGET_REG_BITS == 64
28 "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
29 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
30 #else
31 "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
32 #endif
33 };
34 #endif
35
36 static const int tcg_target_reg_alloc_order[] = {
37 #if TCG_TARGET_REG_BITS == 64
38 TCG_REG_RBP,
39 TCG_REG_RBX,
40 TCG_REG_R12,
41 TCG_REG_R13,
42 TCG_REG_R14,
43 TCG_REG_R15,
44 TCG_REG_R10,
45 TCG_REG_R11,
46 TCG_REG_R9,
47 TCG_REG_R8,
48 TCG_REG_RCX,
49 TCG_REG_RDX,
50 TCG_REG_RSI,
51 TCG_REG_RDI,
52 TCG_REG_RAX,
53 #else
54 TCG_REG_EBX,
55 TCG_REG_ESI,
56 TCG_REG_EDI,
57 TCG_REG_EBP,
58 TCG_REG_ECX,
59 TCG_REG_EDX,
60 TCG_REG_EAX,
61 #endif
62 };
63
64 static const int tcg_target_call_iarg_regs[] = {
65 #if TCG_TARGET_REG_BITS == 64
66 #if defined(_WIN64)
67 TCG_REG_RCX,
68 TCG_REG_RDX,
69 #else
70 TCG_REG_RDI,
71 TCG_REG_RSI,
72 TCG_REG_RDX,
73 TCG_REG_RCX,
74 #endif
75 TCG_REG_R8,
76 TCG_REG_R9,
77 #else
78 TCG_REG_EAX,
79 TCG_REG_EDX,
80 TCG_REG_ECX
81 #endif
82 };
83
84 static const int tcg_target_call_oarg_regs[] = {
85 TCG_REG_EAX,
86 #if TCG_TARGET_REG_BITS == 32
87 TCG_REG_EDX
88 #endif
89 };
90
91 static uint8_t *tb_ret_addr;
92
93 static void patch_reloc(uint8_t *code_ptr, int type,
94 tcg_target_long value, tcg_target_long addend)
95 {
96 value += addend;
97 switch(type) {
98 case R_386_PC32:
99 value -= (uintptr_t)code_ptr;
100 if (value != (int32_t)value) {
101 tcg_abort();
102 }
103 *(uint32_t *)code_ptr = value;
104 break;
105 case R_386_PC8:
106 value -= (uintptr_t)code_ptr;
107 if (value != (int8_t)value) {
108 tcg_abort();
109 }
110 *(uint8_t *)code_ptr = value;
111 break;
112 default:
113 tcg_abort();
114 }
115 }
116
117 /* maximum number of register used for input function arguments */
118 static inline int tcg_target_get_call_iarg_regs_count(int flags)
119 {
120 if (TCG_TARGET_REG_BITS == 64) {
121 return 6;
122 }
123
124 return 0;
125 }
126
127 /* parse target specific constraints */
128 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
129 {
130 const char *ct_str;
131
132 ct_str = *pct_str;
133 switch(ct_str[0]) {
134 case 'a':
135 ct->ct |= TCG_CT_REG;
136 tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
137 break;
138 case 'b':
139 ct->ct |= TCG_CT_REG;
140 tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
141 break;
142 case 'c':
143 ct->ct |= TCG_CT_REG;
144 tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
145 break;
146 case 'd':
147 ct->ct |= TCG_CT_REG;
148 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
149 break;
150 case 'S':
151 ct->ct |= TCG_CT_REG;
152 tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
153 break;
154 case 'D':
155 ct->ct |= TCG_CT_REG;
156 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
157 break;
158 case 'q':
159 ct->ct |= TCG_CT_REG;
160 if (TCG_TARGET_REG_BITS == 64) {
161 tcg_regset_set32(ct->u.regs, 0, 0xffff);
162 } else {
163 tcg_regset_set32(ct->u.regs, 0, 0xf);
164 }
165 break;
166 case 'Q':
167 ct->ct |= TCG_CT_REG;
168 tcg_regset_set32(ct->u.regs, 0, 0xf);
169 break;
170 case 'r':
171 ct->ct |= TCG_CT_REG;
172 if (TCG_TARGET_REG_BITS == 64) {
173 tcg_regset_set32(ct->u.regs, 0, 0xffff);
174 } else {
175 tcg_regset_set32(ct->u.regs, 0, 0xff);
176 }
177 break;
178
179 /* qemu_ld/st address constraint */
180 case 'L':
181 ct->ct |= TCG_CT_REG;
182 if (TCG_TARGET_REG_BITS == 64) {
183 tcg_regset_set32(ct->u.regs, 0, 0xffff);
184 tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[0]);
185 tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[1]);
186 tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[2]);
187 } else {
188 tcg_regset_set32(ct->u.regs, 0, 0xff);
189 tcg_regset_reset_reg(ct->u.regs, TCG_REG_EAX);
190 tcg_regset_reset_reg(ct->u.regs, TCG_REG_EDX);
191 }
192 break;
193
194 case 'e':
195 ct->ct |= TCG_CT_CONST_S32;
196 break;
197 case 'Z':
198 ct->ct |= TCG_CT_CONST_U32;
199 break;
200
201 default:
202 return -1;
203 }
204 ct_str++;
205 *pct_str = ct_str;
206 return 0;
207 }
208
209 /* test if a constant matches the constraint */
210 static inline int tcg_target_const_match(tcg_target_long val,
211 const TCGArgConstraint *arg_ct)
212 {
213 int ct = arg_ct->ct;
214 if (ct & TCG_CT_CONST) {
215 return 1;
216 }
217 if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
218 return 1;
219 }
220 if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
221 return 1;
222 }
223 return 0;
224 }
225
226 #if TCG_TARGET_REG_BITS == 64
227 # define LOWREGMASK(x) ((x) & 7)
228 #else
229 # define LOWREGMASK(x) (x)
230 #endif
231
232 #define P_EXT 0x100 /* 0x0f opcode prefix */
233 #define P_DATA16 0x200 /* 0x66 opcode prefix */
234 #if TCG_TARGET_REG_BITS == 64
235 # define P_ADDR32 0x400 /* 0x67 opcode prefix */
236 # define P_REXW 0x800 /* Set REX.W = 1 */
237 # define P_REXB_R 0x1000 /* REG field as byte register */
238 # define P_REXB_RM 0x2000 /* R/M field as byte register */
239 #else
240 # define P_ADDR32 0
241 # define P_REXW 0
242 # define P_REXB_R 0
243 # define P_REXB_RM 0
244 #endif
245
246 #define OPC_ARITH_EvIz (0x81)
247 #define OPC_ARITH_EvIb (0x83)
248 #define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
249 #define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
250 #define OPC_BSWAP (0xc8 | P_EXT)
251 #define OPC_CALL_Jz (0xe8)
252 #define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
253 #define OPC_DEC_r32 (0x48)
254 #define OPC_IMUL_GvEv (0xaf | P_EXT)
255 #define OPC_IMUL_GvEvIb (0x6b)
256 #define OPC_IMUL_GvEvIz (0x69)
257 #define OPC_INC_r32 (0x40)
258 #define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */
259 #define OPC_JCC_short (0x70) /* ... plus condition code */
260 #define OPC_JMP_long (0xe9)
261 #define OPC_JMP_short (0xeb)
262 #define OPC_LEA (0x8d)
263 #define OPC_MOVB_EvGv (0x88) /* stores, more or less */
264 #define OPC_MOVL_EvGv (0x89) /* stores, more or less */
265 #define OPC_MOVL_GvEv (0x8b) /* loads, more or less */
266 #define OPC_MOVL_EvIz (0xc7)
267 #define OPC_MOVL_Iv (0xb8)
268 #define OPC_MOVSBL (0xbe | P_EXT)
269 #define OPC_MOVSWL (0xbf | P_EXT)
270 #define OPC_MOVSLQ (0x63 | P_REXW)
271 #define OPC_MOVZBL (0xb6 | P_EXT)
272 #define OPC_MOVZWL (0xb7 | P_EXT)
273 #define OPC_POP_r32 (0x58)
274 #define OPC_PUSH_r32 (0x50)
275 #define OPC_PUSH_Iv (0x68)
276 #define OPC_PUSH_Ib (0x6a)
277 #define OPC_RET (0xc3)
278 #define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
279 #define OPC_SHIFT_1 (0xd1)
280 #define OPC_SHIFT_Ib (0xc1)
281 #define OPC_SHIFT_cl (0xd3)
282 #define OPC_TESTL (0x85)
283 #define OPC_XCHG_ax_r32 (0x90)
284
285 #define OPC_GRP3_Ev (0xf7)
286 #define OPC_GRP5 (0xff)
287
288 /* Group 1 opcode extensions for 0x80-0x83.
289 These are also used as modifiers for OPC_ARITH. */
290 #define ARITH_ADD 0
291 #define ARITH_OR 1
292 #define ARITH_ADC 2
293 #define ARITH_SBB 3
294 #define ARITH_AND 4
295 #define ARITH_SUB 5
296 #define ARITH_XOR 6
297 #define ARITH_CMP 7
298
299 /* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */
300 #define SHIFT_ROL 0
301 #define SHIFT_ROR 1
302 #define SHIFT_SHL 4
303 #define SHIFT_SHR 5
304 #define SHIFT_SAR 7
305
306 /* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */
307 #define EXT3_NOT 2
308 #define EXT3_NEG 3
309 #define EXT3_MUL 4
310 #define EXT3_IMUL 5
311 #define EXT3_DIV 6
312 #define EXT3_IDIV 7
313
314 /* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */
315 #define EXT5_INC_Ev 0
316 #define EXT5_DEC_Ev 1
317 #define EXT5_CALLN_Ev 2
318 #define EXT5_JMPN_Ev 4
319
320 /* Condition codes to be added to OPC_JCC_{long,short}. */
321 #define JCC_JMP (-1)
322 #define JCC_JO 0x0
323 #define JCC_JNO 0x1
324 #define JCC_JB 0x2
325 #define JCC_JAE 0x3
326 #define JCC_JE 0x4
327 #define JCC_JNE 0x5
328 #define JCC_JBE 0x6
329 #define JCC_JA 0x7
330 #define JCC_JS 0x8
331 #define JCC_JNS 0x9
332 #define JCC_JP 0xa
333 #define JCC_JNP 0xb
334 #define JCC_JL 0xc
335 #define JCC_JGE 0xd
336 #define JCC_JLE 0xe
337 #define JCC_JG 0xf
338
339 static const uint8_t tcg_cond_to_jcc[10] = {
340 [TCG_COND_EQ] = JCC_JE,
341 [TCG_COND_NE] = JCC_JNE,
342 [TCG_COND_LT] = JCC_JL,
343 [TCG_COND_GE] = JCC_JGE,
344 [TCG_COND_LE] = JCC_JLE,
345 [TCG_COND_GT] = JCC_JG,
346 [TCG_COND_LTU] = JCC_JB,
347 [TCG_COND_GEU] = JCC_JAE,
348 [TCG_COND_LEU] = JCC_JBE,
349 [TCG_COND_GTU] = JCC_JA,
350 };
351
352 #if TCG_TARGET_REG_BITS == 64
353 static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
354 {
355 int rex;
356
357 if (opc & P_DATA16) {
358 /* We should never be asking for both 16 and 64-bit operation. */
359 assert((opc & P_REXW) == 0);
360 tcg_out8(s, 0x66);
361 }
362 if (opc & P_ADDR32) {
363 tcg_out8(s, 0x67);
364 }
365
366 rex = 0;
367 rex |= (opc & P_REXW) >> 8; /* REX.W */
368 rex |= (r & 8) >> 1; /* REX.R */
369 rex |= (x & 8) >> 2; /* REX.X */
370 rex |= (rm & 8) >> 3; /* REX.B */
371
372 /* P_REXB_{R,RM} indicates that the given register is the low byte.
373 For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
374 as otherwise the encoding indicates %[abcd]h. Note that the values
375 that are ORed in merely indicate that the REX byte must be present;
376 those bits get discarded in output. */
377 rex |= opc & (r >= 4 ? P_REXB_R : 0);
378 rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
379
380 if (rex) {
381 tcg_out8(s, (uint8_t)(rex | 0x40));
382 }
383
384 if (opc & P_EXT) {
385 tcg_out8(s, 0x0f);
386 }
387 tcg_out8(s, opc);
388 }
389 #else
390 static void tcg_out_opc(TCGContext *s, int opc)
391 {
392 if (opc & P_DATA16) {
393 tcg_out8(s, 0x66);
394 }
395 if (opc & P_EXT) {
396 tcg_out8(s, 0x0f);
397 }
398 tcg_out8(s, opc);
399 }
400 /* Discard the register arguments to tcg_out_opc early, so as not to penalize
401 the 32-bit compilation paths. This method works with all versions of gcc,
402 whereas relying on optimization may not be able to exclude them. */
403 #define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc)
404 #endif
405
406 static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
407 {
408 tcg_out_opc(s, opc, r, rm, 0);
409 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
410 }
411
412 /* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
413 We handle either RM and INDEX missing with a negative value. In 64-bit
414 mode for absolute addresses, ~RM is the size of the immediate operand
415 that will follow the instruction. */
416
417 static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
418 int index, int shift,
419 tcg_target_long offset)
420 {
421 int mod, len;
422
423 if (index < 0 && rm < 0) {
424 if (TCG_TARGET_REG_BITS == 64) {
425 /* Try for a rip-relative addressing mode. This has replaced
426 the 32-bit-mode absolute addressing encoding. */
427 tcg_target_long pc = (tcg_target_long)s->code_ptr + 5 + ~rm;
428 tcg_target_long disp = offset - pc;
429 if (disp == (int32_t)disp) {
430 tcg_out_opc(s, opc, r, 0, 0);
431 tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
432 tcg_out32(s, disp);
433 return;
434 }
435
436 /* Try for an absolute address encoding. This requires the
437 use of the MODRM+SIB encoding and is therefore larger than
438 rip-relative addressing. */
439 if (offset == (int32_t)offset) {
440 tcg_out_opc(s, opc, r, 0, 0);
441 tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
442 tcg_out8(s, (4 << 3) | 5);
443 tcg_out32(s, offset);
444 return;
445 }
446
447 /* ??? The memory isn't directly addressable. */
448 tcg_abort();
449 } else {
450 /* Absolute address. */
451 tcg_out_opc(s, opc, r, 0, 0);
452 tcg_out8(s, (r << 3) | 5);
453 tcg_out32(s, offset);
454 return;
455 }
456 }
457
458 /* Find the length of the immediate addend. Note that the encoding
459 that would be used for (%ebp) indicates absolute addressing. */
460 if (rm < 0) {
461 mod = 0, len = 4, rm = 5;
462 } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
463 mod = 0, len = 0;
464 } else if (offset == (int8_t)offset) {
465 mod = 0x40, len = 1;
466 } else {
467 mod = 0x80, len = 4;
468 }
469
470 /* Use a single byte MODRM format if possible. Note that the encoding
471 that would be used for %esp is the escape to the two byte form. */
472 if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
473 /* Single byte MODRM format. */
474 tcg_out_opc(s, opc, r, rm, 0);
475 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
476 } else {
477 /* Two byte MODRM+SIB format. */
478
479 /* Note that the encoding that would place %esp into the index
480 field indicates no index register. In 64-bit mode, the REX.X
481 bit counts, so %r12 can be used as the index. */
482 if (index < 0) {
483 index = 4;
484 } else {
485 assert(index != TCG_REG_ESP);
486 }
487
488 tcg_out_opc(s, opc, r, rm, index);
489 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
490 tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
491 }
492
493 if (len == 1) {
494 tcg_out8(s, offset);
495 } else if (len == 4) {
496 tcg_out32(s, offset);
497 }
498 }
499
500 /* A simplification of the above with no index or shift. */
501 static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
502 int rm, tcg_target_long offset)
503 {
504 tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
505 }
506
507 /* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */
508 static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
509 {
510 /* Propagate an opcode prefix, such as P_REXW. */
511 int ext = subop & ~0x7;
512 subop &= 0x7;
513
514 tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
515 }
516
517 static inline void tcg_out_mov(TCGContext *s, TCGType type,
518 TCGReg ret, TCGReg arg)
519 {
520 if (arg != ret) {
521 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
522 tcg_out_modrm(s, opc, ret, arg);
523 }
524 }
525
526 static void tcg_out_movi(TCGContext *s, TCGType type,
527 TCGReg ret, tcg_target_long arg)
528 {
529 if (arg == 0) {
530 tgen_arithr(s, ARITH_XOR, ret, ret);
531 return;
532 } else if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
533 tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
534 tcg_out32(s, arg);
535 } else if (arg == (int32_t)arg) {
536 tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
537 tcg_out32(s, arg);
538 } else {
539 tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
540 tcg_out32(s, arg);
541 tcg_out32(s, arg >> 31 >> 1);
542 }
543 }
544
545 static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
546 {
547 if (val == (int8_t)val) {
548 tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
549 tcg_out8(s, val);
550 } else if (val == (int32_t)val) {
551 tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
552 tcg_out32(s, val);
553 } else {
554 tcg_abort();
555 }
556 }
557
558 static inline void tcg_out_push(TCGContext *s, int reg)
559 {
560 tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
561 }
562
563 static inline void tcg_out_pop(TCGContext *s, int reg)
564 {
565 tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
566 }
567
568 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
569 TCGReg arg1, tcg_target_long arg2)
570 {
571 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
572 tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
573 }
574
575 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
576 TCGReg arg1, tcg_target_long arg2)
577 {
578 int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
579 tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
580 }
581
582 static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
583 {
584 /* Propagate an opcode prefix, such as P_DATA16. */
585 int ext = subopc & ~0x7;
586 subopc &= 0x7;
587
588 if (count == 1) {
589 tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
590 } else {
591 tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
592 tcg_out8(s, count);
593 }
594 }
595
596 static inline void tcg_out_bswap32(TCGContext *s, int reg)
597 {
598 tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
599 }
600
601 static inline void tcg_out_rolw_8(TCGContext *s, int reg)
602 {
603 tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
604 }
605
606 static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
607 {
608 /* movzbl */
609 assert(src < 4 || TCG_TARGET_REG_BITS == 64);
610 tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
611 }
612
613 static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
614 {
615 /* movsbl */
616 assert(src < 4 || TCG_TARGET_REG_BITS == 64);
617 tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
618 }
619
620 static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
621 {
622 /* movzwl */
623 tcg_out_modrm(s, OPC_MOVZWL, dest, src);
624 }
625
626 static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
627 {
628 /* movsw[lq] */
629 tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
630 }
631
632 static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
633 {
634 /* 32-bit mov zero extends. */
635 tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
636 }
637
638 static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
639 {
640 tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
641 }
642
643 static inline void tcg_out_bswap64(TCGContext *s, int reg)
644 {
645 tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
646 }
647
648 static void tgen_arithi(TCGContext *s, int c, int r0,
649 tcg_target_long val, int cf)
650 {
651 int rexw = 0;
652
653 if (TCG_TARGET_REG_BITS == 64) {
654 rexw = c & -8;
655 c &= 7;
656 }
657
658 /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
659 partial flags update stalls on Pentium4 and are not recommended
660 by current Intel optimization manuals. */
661 if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
662 int is_inc = (c == ARITH_ADD) ^ (val < 0);
663 if (TCG_TARGET_REG_BITS == 64) {
664 /* The single-byte increment encodings are re-tasked as the
665 REX prefixes. Use the MODRM encoding. */
666 tcg_out_modrm(s, OPC_GRP5 + rexw,
667 (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
668 } else {
669 tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
670 }
671 return;
672 }
673
674 if (c == ARITH_AND) {
675 if (TCG_TARGET_REG_BITS == 64) {
676 if (val == 0xffffffffu) {
677 tcg_out_ext32u(s, r0, r0);
678 return;
679 }
680 if (val == (uint32_t)val) {
681 /* AND with no high bits set can use a 32-bit operation. */
682 rexw = 0;
683 }
684 }
685 if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
686 tcg_out_ext8u(s, r0, r0);
687 return;
688 }
689 if (val == 0xffffu) {
690 tcg_out_ext16u(s, r0, r0);
691 return;
692 }
693 }
694
695 if (val == (int8_t)val) {
696 tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
697 tcg_out8(s, val);
698 return;
699 }
700 if (rexw == 0 || val == (int32_t)val) {
701 tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
702 tcg_out32(s, val);
703 return;
704 }
705
706 tcg_abort();
707 }
708
709 static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
710 {
711 if (val != 0) {
712 tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
713 }
714 }
715
716 /* Use SMALL != 0 to force a short forward branch. */
717 static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
718 {
719 int32_t val, val1;
720 TCGLabel *l = &s->labels[label_index];
721
722 if (l->has_value) {
723 val = l->u.value - (tcg_target_long)s->code_ptr;
724 val1 = val - 2;
725 if ((int8_t)val1 == val1) {
726 if (opc == -1) {
727 tcg_out8(s, OPC_JMP_short);
728 } else {
729 tcg_out8(s, OPC_JCC_short + opc);
730 }
731 tcg_out8(s, val1);
732 } else {
733 if (small) {
734 tcg_abort();
735 }
736 if (opc == -1) {
737 tcg_out8(s, OPC_JMP_long);
738 tcg_out32(s, val - 5);
739 } else {
740 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
741 tcg_out32(s, val - 6);
742 }
743 }
744 } else if (small) {
745 if (opc == -1) {
746 tcg_out8(s, OPC_JMP_short);
747 } else {
748 tcg_out8(s, OPC_JCC_short + opc);
749 }
750 tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1);
751 s->code_ptr += 1;
752 } else {
753 if (opc == -1) {
754 tcg_out8(s, OPC_JMP_long);
755 } else {
756 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
757 }
758 tcg_out_reloc(s, s->code_ptr, R_386_PC32, label_index, -4);
759 s->code_ptr += 4;
760 }
761 }
762
763 static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
764 int const_arg2, int rexw)
765 {
766 if (const_arg2) {
767 if (arg2 == 0) {
768 /* test r, r */
769 tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
770 } else {
771 tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
772 }
773 } else {
774 tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
775 }
776 }
777
778 static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
779 TCGArg arg1, TCGArg arg2, int const_arg2,
780 int label_index, int small)
781 {
782 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
783 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
784 }
785
786 #if TCG_TARGET_REG_BITS == 64
787 static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
788 TCGArg arg1, TCGArg arg2, int const_arg2,
789 int label_index, int small)
790 {
791 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
792 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
793 }
794 #else
795 /* XXX: we implement it at the target level to avoid having to
796 handle cross basic blocks temporaries */
797 static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
798 const int *const_args, int small)
799 {
800 int label_next;
801 label_next = gen_new_label();
802 switch(args[4]) {
803 case TCG_COND_EQ:
804 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
805 label_next, 1);
806 tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
807 args[5], small);
808 break;
809 case TCG_COND_NE:
810 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
811 args[5], small);
812 tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
813 args[5], small);
814 break;
815 case TCG_COND_LT:
816 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
817 args[5], small);
818 tcg_out_jxx(s, JCC_JNE, label_next, 1);
819 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
820 args[5], small);
821 break;
822 case TCG_COND_LE:
823 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
824 args[5], small);
825 tcg_out_jxx(s, JCC_JNE, label_next, 1);
826 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
827 args[5], small);
828 break;
829 case TCG_COND_GT:
830 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
831 args[5], small);
832 tcg_out_jxx(s, JCC_JNE, label_next, 1);
833 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
834 args[5], small);
835 break;
836 case TCG_COND_GE:
837 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
838 args[5], small);
839 tcg_out_jxx(s, JCC_JNE, label_next, 1);
840 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
841 args[5], small);
842 break;
843 case TCG_COND_LTU:
844 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
845 args[5], small);
846 tcg_out_jxx(s, JCC_JNE, label_next, 1);
847 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
848 args[5], small);
849 break;
850 case TCG_COND_LEU:
851 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
852 args[5], small);
853 tcg_out_jxx(s, JCC_JNE, label_next, 1);
854 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
855 args[5], small);
856 break;
857 case TCG_COND_GTU:
858 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
859 args[5], small);
860 tcg_out_jxx(s, JCC_JNE, label_next, 1);
861 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
862 args[5], small);
863 break;
864 case TCG_COND_GEU:
865 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
866 args[5], small);
867 tcg_out_jxx(s, JCC_JNE, label_next, 1);
868 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
869 args[5], small);
870 break;
871 default:
872 tcg_abort();
873 }
874 tcg_out_label(s, label_next, s->code_ptr);
875 }
876 #endif
877
878 static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
879 TCGArg arg1, TCGArg arg2, int const_arg2)
880 {
881 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
882 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
883 tcg_out_ext8u(s, dest, dest);
884 }
885
886 #if TCG_TARGET_REG_BITS == 64
887 static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
888 TCGArg arg1, TCGArg arg2, int const_arg2)
889 {
890 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
891 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
892 tcg_out_ext8u(s, dest, dest);
893 }
894 #else
895 static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
896 const int *const_args)
897 {
898 TCGArg new_args[6];
899 int label_true, label_over;
900
901 memcpy(new_args, args+1, 5*sizeof(TCGArg));
902
903 if (args[0] == args[1] || args[0] == args[2]
904 || (!const_args[3] && args[0] == args[3])
905 || (!const_args[4] && args[0] == args[4])) {
906 /* When the destination overlaps with one of the argument
907 registers, don't do anything tricky. */
908 label_true = gen_new_label();
909 label_over = gen_new_label();
910
911 new_args[5] = label_true;
912 tcg_out_brcond2(s, new_args, const_args+1, 1);
913
914 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
915 tcg_out_jxx(s, JCC_JMP, label_over, 1);
916 tcg_out_label(s, label_true, s->code_ptr);
917
918 tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
919 tcg_out_label(s, label_over, s->code_ptr);
920 } else {
921 /* When the destination does not overlap one of the arguments,
922 clear the destination first, jump if cond false, and emit an
923 increment in the true case. This results in smaller code. */
924
925 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
926
927 label_over = gen_new_label();
928 new_args[4] = tcg_invert_cond(new_args[4]);
929 new_args[5] = label_over;
930 tcg_out_brcond2(s, new_args, const_args+1, 1);
931
932 tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
933 tcg_out_label(s, label_over, s->code_ptr);
934 }
935 }
936 #endif
937
938 static void tcg_out_branch(TCGContext *s, int call, tcg_target_long dest)
939 {
940 tcg_target_long disp = dest - (tcg_target_long)s->code_ptr - 5;
941
942 if (disp == (int32_t)disp) {
943 tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
944 tcg_out32(s, disp);
945 } else {
946 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, dest);
947 tcg_out_modrm(s, OPC_GRP5,
948 call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10);
949 }
950 }
951
952 static inline void tcg_out_calli(TCGContext *s, tcg_target_long dest)
953 {
954 tcg_out_branch(s, 1, dest);
955 }
956
957 static void tcg_out_jmp(TCGContext *s, tcg_target_long dest)
958 {
959 tcg_out_branch(s, 0, dest);
960 }
961
962 #if defined(CONFIG_SOFTMMU)
963
964 #include "../../softmmu_defs.h"
965
966 /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
967 int mmu_idx) */
968 static const void *qemu_ld_helpers[4] = {
969 helper_ldb_mmu,
970 helper_ldw_mmu,
971 helper_ldl_mmu,
972 helper_ldq_mmu,
973 };
974
975 /* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
976 uintxx_t val, int mmu_idx) */
977 static const void *qemu_st_helpers[4] = {
978 helper_stb_mmu,
979 helper_stw_mmu,
980 helper_stl_mmu,
981 helper_stq_mmu,
982 };
983
984 /* Perform the TLB load and compare.
985
986 Inputs:
987 ADDRLO_IDX contains the index into ARGS of the low part of the
988 address; the high part of the address is at ADDR_LOW_IDX+1.
989
990 MEM_INDEX and S_BITS are the memory context and log2 size of the load.
991
992 WHICH is the offset into the CPUTLBEntry structure of the slot to read.
993 This should be offsetof addr_read or addr_write.
994
995 Outputs:
996 LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
997 positions of the displacements of forward jumps to the TLB miss case.
998
999 First argument register is loaded with the low part of the address.
1000 In the TLB hit case, it has been adjusted as indicated by the TLB
1001 and so is a host address. In the TLB miss case, it continues to
1002 hold a guest address.
1003
1004 Second argument register is clobbered. */
1005
1006 static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
1007 int mem_index, int s_bits,
1008 const TCGArg *args,
1009 uint8_t **label_ptr, int which)
1010 {
1011 const int addrlo = args[addrlo_idx];
1012 const int r0 = tcg_target_call_iarg_regs[0];
1013 const int r1 = tcg_target_call_iarg_regs[1];
1014 TCGType type = TCG_TYPE_I32;
1015 int rexw = 0;
1016
1017 if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 64) {
1018 type = TCG_TYPE_I64;
1019 rexw = P_REXW;
1020 }
1021
1022 tcg_out_mov(s, type, r1, addrlo);
1023 tcg_out_mov(s, type, r0, addrlo);
1024
1025 tcg_out_shifti(s, SHIFT_SHR + rexw, r1,
1026 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1027
1028 tgen_arithi(s, ARITH_AND + rexw, r0,
1029 TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
1030 tgen_arithi(s, ARITH_AND + rexw, r1,
1031 (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
1032
1033 tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, r1, TCG_AREG0, r1, 0,
1034 offsetof(CPUArchState, tlb_table[mem_index][0])
1035 + which);
1036
1037 /* cmp 0(r1), r0 */
1038 tcg_out_modrm_offset(s, OPC_CMP_GvEv + rexw, r0, r1, 0);
1039
1040 tcg_out_mov(s, type, r0, addrlo);
1041
1042 /* jne label1 */
1043 tcg_out8(s, OPC_JCC_short + JCC_JNE);
1044 label_ptr[0] = s->code_ptr;
1045 s->code_ptr++;
1046
1047 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1048 /* cmp 4(r1), addrhi */
1049 tcg_out_modrm_offset(s, OPC_CMP_GvEv, args[addrlo_idx+1], r1, 4);
1050
1051 /* jne label1 */
1052 tcg_out8(s, OPC_JCC_short + JCC_JNE);
1053 label_ptr[1] = s->code_ptr;
1054 s->code_ptr++;
1055 }
1056
1057 /* TLB Hit. */
1058
1059 /* add addend(r1), r0 */
1060 tcg_out_modrm_offset(s, OPC_ADD_GvEv + P_REXW, r0, r1,
1061 offsetof(CPUTLBEntry, addend) - which);
1062 }
1063 #endif
1064
1065 static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
1066 int base, tcg_target_long ofs, int sizeop)
1067 {
1068 #ifdef TARGET_WORDS_BIGENDIAN
1069 const int bswap = 1;
1070 #else
1071 const int bswap = 0;
1072 #endif
1073 switch (sizeop) {
1074 case 0:
1075 tcg_out_modrm_offset(s, OPC_MOVZBL, datalo, base, ofs);
1076 break;
1077 case 0 | 4:
1078 tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW, datalo, base, ofs);
1079 break;
1080 case 1:
1081 tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
1082 if (bswap) {
1083 tcg_out_rolw_8(s, datalo);
1084 }
1085 break;
1086 case 1 | 4:
1087 if (bswap) {
1088 tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
1089 tcg_out_rolw_8(s, datalo);
1090 tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
1091 } else {
1092 tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW, datalo, base, ofs);
1093 }
1094 break;
1095 case 2:
1096 tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1097 if (bswap) {
1098 tcg_out_bswap32(s, datalo);
1099 }
1100 break;
1101 #if TCG_TARGET_REG_BITS == 64
1102 case 2 | 4:
1103 if (bswap) {
1104 tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1105 tcg_out_bswap32(s, datalo);
1106 tcg_out_ext32s(s, datalo, datalo);
1107 } else {
1108 tcg_out_modrm_offset(s, OPC_MOVSLQ, datalo, base, ofs);
1109 }
1110 break;
1111 #endif
1112 case 3:
1113 if (TCG_TARGET_REG_BITS == 64) {
1114 tcg_out_ld(s, TCG_TYPE_I64, datalo, base, ofs);
1115 if (bswap) {
1116 tcg_out_bswap64(s, datalo);
1117 }
1118 } else {
1119 if (bswap) {
1120 int t = datalo;
1121 datalo = datahi;
1122 datahi = t;
1123 }
1124 if (base != datalo) {
1125 tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1126 tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1127 } else {
1128 tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1129 tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1130 }
1131 if (bswap) {
1132 tcg_out_bswap32(s, datalo);
1133 tcg_out_bswap32(s, datahi);
1134 }
1135 }
1136 break;
1137 default:
1138 tcg_abort();
1139 }
1140 }
1141
1142 /* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
1143 EAX. It will be useful once fixed registers globals are less
1144 common. */
1145 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
1146 int opc)
1147 {
1148 int data_reg, data_reg2 = 0;
1149 int addrlo_idx;
1150 #if defined(CONFIG_SOFTMMU)
1151 int mem_index, s_bits;
1152 #if TCG_TARGET_REG_BITS == 64
1153 int arg_idx;
1154 #else
1155 int stack_adjust;
1156 #endif
1157 uint8_t *label_ptr[3];
1158 #endif
1159
1160 data_reg = args[0];
1161 addrlo_idx = 1;
1162 if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
1163 data_reg2 = args[1];
1164 addrlo_idx = 2;
1165 }
1166
1167 #if defined(CONFIG_SOFTMMU)
1168 mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
1169 s_bits = opc & 3;
1170
1171 tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
1172 label_ptr, offsetof(CPUTLBEntry, addr_read));
1173
1174 /* TLB Hit. */
1175 tcg_out_qemu_ld_direct(s, data_reg, data_reg2,
1176 tcg_target_call_iarg_regs[0], 0, opc);
1177
1178 /* jmp label2 */
1179 tcg_out8(s, OPC_JMP_short);
1180 label_ptr[2] = s->code_ptr;
1181 s->code_ptr++;
1182
1183 /* TLB Miss. */
1184
1185 /* label1: */
1186 *label_ptr[0] = s->code_ptr - label_ptr[0] - 1;
1187 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1188 *label_ptr[1] = s->code_ptr - label_ptr[1] - 1;
1189 }
1190
1191 /* XXX: move that code at the end of the TB */
1192 #if TCG_TARGET_REG_BITS == 32
1193 tcg_out_pushi(s, mem_index);
1194 stack_adjust = 4;
1195 if (TARGET_LONG_BITS == 64) {
1196 tcg_out_push(s, args[addrlo_idx + 1]);
1197 stack_adjust += 4;
1198 }
1199 tcg_out_push(s, args[addrlo_idx]);
1200 stack_adjust += 4;
1201 tcg_out_push(s, TCG_AREG0);
1202 stack_adjust += 4;
1203 #else
1204 /* The first argument is already loaded with addrlo. */
1205 arg_idx = 1;
1206 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx],
1207 mem_index);
1208 /* XXX/FIXME: suboptimal */
1209 tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3],
1210 tcg_target_call_iarg_regs[2]);
1211 tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
1212 tcg_target_call_iarg_regs[1]);
1213 tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1],
1214 tcg_target_call_iarg_regs[0]);
1215 tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
1216 TCG_AREG0);
1217 #endif
1218
1219 tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]);
1220
1221 #if TCG_TARGET_REG_BITS == 32
1222 if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) {
1223 /* Pop and discard. This is 2 bytes smaller than the add. */
1224 tcg_out_pop(s, TCG_REG_ECX);
1225 } else if (stack_adjust != 0) {
1226 tcg_out_addi(s, TCG_REG_CALL_STACK, stack_adjust);
1227 }
1228 #endif
1229
1230 switch(opc) {
1231 case 0 | 4:
1232 tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
1233 break;
1234 case 1 | 4:
1235 tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
1236 break;
1237 case 0:
1238 tcg_out_ext8u(s, data_reg, TCG_REG_EAX);
1239 break;
1240 case 1:
1241 tcg_out_ext16u(s, data_reg, TCG_REG_EAX);
1242 break;
1243 case 2:
1244 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1245 break;
1246 #if TCG_TARGET_REG_BITS == 64
1247 case 2 | 4:
1248 tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
1249 break;
1250 #endif
1251 case 3:
1252 if (TCG_TARGET_REG_BITS == 64) {
1253 tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
1254 } else if (data_reg == TCG_REG_EDX) {
1255 /* xchg %edx, %eax */
1256 tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
1257 tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EAX);
1258 } else {
1259 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1260 tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EDX);
1261 }
1262 break;
1263 default:
1264 tcg_abort();
1265 }
1266
1267 /* label2: */
1268 *label_ptr[2] = s->code_ptr - label_ptr[2] - 1;
1269 #else
1270 {
1271 int32_t offset = GUEST_BASE;
1272 int base = args[addrlo_idx];
1273
1274 if (TCG_TARGET_REG_BITS == 64) {
1275 /* ??? We assume all operations have left us with register
1276 contents that are zero extended. So far this appears to
1277 be true. If we want to enforce this, we can either do
1278 an explicit zero-extension here, or (if GUEST_BASE == 0)
1279 use the ADDR32 prefix. For now, do nothing. */
1280
1281 if (offset != GUEST_BASE) {
1282 tcg_out_movi(s, TCG_TYPE_I64,
1283 tcg_target_call_iarg_regs[0], GUEST_BASE);
1284 tgen_arithr(s, ARITH_ADD + P_REXW,
1285 tcg_target_call_iarg_regs[0], base);
1286 base = tcg_target_call_iarg_regs[0];
1287 offset = 0;
1288 }
1289 }
1290
1291 tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, opc);
1292 }
1293 #endif
1294 }
1295
1296 static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
1297 int base, tcg_target_long ofs, int sizeop)
1298 {
1299 #ifdef TARGET_WORDS_BIGENDIAN
1300 const int bswap = 1;
1301 #else
1302 const int bswap = 0;
1303 #endif
1304 /* ??? Ideally we wouldn't need a scratch register. For user-only,
1305 we could perform the bswap twice to restore the original value
1306 instead of moving to the scratch. But as it is, the L constraint
1307 means that the second argument reg is definitely free here. */
1308 int scratch = tcg_target_call_iarg_regs[1];
1309
1310 switch (sizeop) {
1311 case 0:
1312 tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R, datalo, base, ofs);
1313 break;
1314 case 1:
1315 if (bswap) {
1316 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1317 tcg_out_rolw_8(s, scratch);
1318 datalo = scratch;
1319 }
1320 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16, datalo, base, ofs);
1321 break;
1322 case 2:
1323 if (bswap) {
1324 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1325 tcg_out_bswap32(s, scratch);
1326 datalo = scratch;
1327 }
1328 tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
1329 break;
1330 case 3:
1331 if (TCG_TARGET_REG_BITS == 64) {
1332 if (bswap) {
1333 tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
1334 tcg_out_bswap64(s, scratch);
1335 datalo = scratch;
1336 }
1337 tcg_out_st(s, TCG_TYPE_I64, datalo, base, ofs);
1338 } else if (bswap) {
1339 tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
1340 tcg_out_bswap32(s, scratch);
1341 tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs);
1342 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1343 tcg_out_bswap32(s, scratch);
1344 tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs + 4);
1345 } else {
1346 tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
1347 tcg_out_st(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1348 }
1349 break;
1350 default:
1351 tcg_abort();
1352 }
1353 }
1354
1355 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
1356 int opc)
1357 {
1358 int data_reg, data_reg2 = 0;
1359 int addrlo_idx;
1360 #if defined(CONFIG_SOFTMMU)
1361 int mem_index, s_bits;
1362 int stack_adjust;
1363 uint8_t *label_ptr[3];
1364 #endif
1365
1366 data_reg = args[0];
1367 addrlo_idx = 1;
1368 if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
1369 data_reg2 = args[1];
1370 addrlo_idx = 2;
1371 }
1372
1373 #if defined(CONFIG_SOFTMMU)
1374 mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
1375 s_bits = opc;
1376
1377 tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
1378 label_ptr, offsetof(CPUTLBEntry, addr_write));
1379
1380 /* TLB Hit. */
1381 tcg_out_qemu_st_direct(s, data_reg, data_reg2,
1382 tcg_target_call_iarg_regs[0], 0, opc);
1383
1384 /* jmp label2 */
1385 tcg_out8(s, OPC_JMP_short);
1386 label_ptr[2] = s->code_ptr;
1387 s->code_ptr++;
1388
1389 /* TLB Miss. */
1390
1391 /* label1: */
1392 *label_ptr[0] = s->code_ptr - label_ptr[0] - 1;
1393 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1394 *label_ptr[1] = s->code_ptr - label_ptr[1] - 1;
1395 }
1396
1397 /* XXX: move that code at the end of the TB */
1398 #if TCG_TARGET_REG_BITS == 32
1399 tcg_out_pushi(s, mem_index);
1400 stack_adjust = 4;
1401 if (opc == 3) {
1402 tcg_out_push(s, data_reg2);
1403 stack_adjust += 4;
1404 }
1405 tcg_out_push(s, data_reg);
1406 stack_adjust += 4;
1407 if (TARGET_LONG_BITS == 64) {
1408 tcg_out_push(s, args[addrlo_idx + 1]);
1409 stack_adjust += 4;
1410 }
1411 tcg_out_push(s, args[addrlo_idx]);
1412 stack_adjust += 4;
1413 tcg_out_push(s, TCG_AREG0);
1414 stack_adjust += 4;
1415 #else
1416 tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
1417 tcg_target_call_iarg_regs[1], data_reg);
1418 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], mem_index);
1419 stack_adjust = 0;
1420 /* XXX/FIXME: suboptimal */
1421 tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3],
1422 tcg_target_call_iarg_regs[2]);
1423 tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
1424 tcg_target_call_iarg_regs[1]);
1425 tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1],
1426 tcg_target_call_iarg_regs[0]);
1427 tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
1428 TCG_AREG0);
1429 #endif
1430
1431 tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]);
1432
1433 if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) {
1434 /* Pop and discard. This is 2 bytes smaller than the add. */
1435 tcg_out_pop(s, TCG_REG_ECX);
1436 } else if (stack_adjust != 0) {
1437 tcg_out_addi(s, TCG_REG_CALL_STACK, stack_adjust);
1438 }
1439
1440 /* label2: */
1441 *label_ptr[2] = s->code_ptr - label_ptr[2] - 1;
1442 #else
1443 {
1444 int32_t offset = GUEST_BASE;
1445 int base = args[addrlo_idx];
1446
1447 if (TCG_TARGET_REG_BITS == 64) {
1448 /* ??? We assume all operations have left us with register
1449 contents that are zero extended. So far this appears to
1450 be true. If we want to enforce this, we can either do
1451 an explicit zero-extension here, or (if GUEST_BASE == 0)
1452 use the ADDR32 prefix. For now, do nothing. */
1453
1454 if (offset != GUEST_BASE) {
1455 tcg_out_movi(s, TCG_TYPE_I64,
1456 tcg_target_call_iarg_regs[0], GUEST_BASE);
1457 tgen_arithr(s, ARITH_ADD + P_REXW,
1458 tcg_target_call_iarg_regs[0], base);
1459 base = tcg_target_call_iarg_regs[0];
1460 offset = 0;
1461 }
1462 }
1463
1464 tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, opc);
1465 }
1466 #endif
1467 }
1468
1469 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1470 const TCGArg *args, const int *const_args)
1471 {
1472 int c, rexw = 0;
1473
1474 #if TCG_TARGET_REG_BITS == 64
1475 # define OP_32_64(x) \
1476 case glue(glue(INDEX_op_, x), _i64): \
1477 rexw = P_REXW; /* FALLTHRU */ \
1478 case glue(glue(INDEX_op_, x), _i32)
1479 #else
1480 # define OP_32_64(x) \
1481 case glue(glue(INDEX_op_, x), _i32)
1482 #endif
1483
1484 switch(opc) {
1485 case INDEX_op_exit_tb:
1486 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]);
1487 tcg_out_jmp(s, (tcg_target_long) tb_ret_addr);
1488 break;
1489 case INDEX_op_goto_tb:
1490 if (s->tb_jmp_offset) {
1491 /* direct jump method */
1492 tcg_out8(s, OPC_JMP_long); /* jmp im */
1493 s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
1494 tcg_out32(s, 0);
1495 } else {
1496 /* indirect jump method */
1497 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
1498 (tcg_target_long)(s->tb_next + args[0]));
1499 }
1500 s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
1501 break;
1502 case INDEX_op_call:
1503 if (const_args[0]) {
1504 tcg_out_calli(s, args[0]);
1505 } else {
1506 /* call *reg */
1507 tcg_out_modrm(s, OPC_GRP5, EXT5_CALLN_Ev, args[0]);
1508 }
1509 break;
1510 case INDEX_op_jmp:
1511 if (const_args[0]) {
1512 tcg_out_jmp(s, args[0]);
1513 } else {
1514 /* jmp *reg */
1515 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, args[0]);
1516 }
1517 break;
1518 case INDEX_op_br:
1519 tcg_out_jxx(s, JCC_JMP, args[0], 0);
1520 break;
1521 case INDEX_op_movi_i32:
1522 tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
1523 break;
1524 OP_32_64(ld8u):
1525 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1526 tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
1527 break;
1528 OP_32_64(ld8s):
1529 tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]);
1530 break;
1531 OP_32_64(ld16u):
1532 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1533 tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
1534 break;
1535 OP_32_64(ld16s):
1536 tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]);
1537 break;
1538 #if TCG_TARGET_REG_BITS == 64
1539 case INDEX_op_ld32u_i64:
1540 #endif
1541 case INDEX_op_ld_i32:
1542 tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1543 break;
1544
1545 OP_32_64(st8):
1546 tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
1547 args[0], args[1], args[2]);
1548 break;
1549 OP_32_64(st16):
1550 tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
1551 args[0], args[1], args[2]);
1552 break;
1553 #if TCG_TARGET_REG_BITS == 64
1554 case INDEX_op_st32_i64:
1555 #endif
1556 case INDEX_op_st_i32:
1557 tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1558 break;
1559
1560 OP_32_64(add):
1561 /* For 3-operand addition, use LEA. */
1562 if (args[0] != args[1]) {
1563 TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
1564
1565 if (const_args[2]) {
1566 c3 = a2, a2 = -1;
1567 } else if (a0 == a2) {
1568 /* Watch out for dest = src + dest, since we've removed
1569 the matching constraint on the add. */
1570 tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
1571 break;
1572 }
1573
1574 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
1575 break;
1576 }
1577 c = ARITH_ADD;
1578 goto gen_arith;
1579 OP_32_64(sub):
1580 c = ARITH_SUB;
1581 goto gen_arith;
1582 OP_32_64(and):
1583 c = ARITH_AND;
1584 goto gen_arith;
1585 OP_32_64(or):
1586 c = ARITH_OR;
1587 goto gen_arith;
1588 OP_32_64(xor):
1589 c = ARITH_XOR;
1590 goto gen_arith;
1591 gen_arith:
1592 if (const_args[2]) {
1593 tgen_arithi(s, c + rexw, args[0], args[2], 0);
1594 } else {
1595 tgen_arithr(s, c + rexw, args[0], args[2]);
1596 }
1597 break;
1598
1599 OP_32_64(mul):
1600 if (const_args[2]) {
1601 int32_t val;
1602 val = args[2];
1603 if (val == (int8_t)val) {
1604 tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]);
1605 tcg_out8(s, val);
1606 } else {
1607 tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]);
1608 tcg_out32(s, val);
1609 }
1610 } else {
1611 tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]);
1612 }
1613 break;
1614
1615 OP_32_64(div2):
1616 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
1617 break;
1618 OP_32_64(divu2):
1619 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
1620 break;
1621
1622 OP_32_64(shl):
1623 c = SHIFT_SHL;
1624 goto gen_shift;
1625 OP_32_64(shr):
1626 c = SHIFT_SHR;
1627 goto gen_shift;
1628 OP_32_64(sar):
1629 c = SHIFT_SAR;
1630 goto gen_shift;
1631 OP_32_64(rotl):
1632 c = SHIFT_ROL;
1633 goto gen_shift;
1634 OP_32_64(rotr):
1635 c = SHIFT_ROR;
1636 goto gen_shift;
1637 gen_shift:
1638 if (const_args[2]) {
1639 tcg_out_shifti(s, c + rexw, args[0], args[2]);
1640 } else {
1641 tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]);
1642 }
1643 break;
1644
1645 case INDEX_op_brcond_i32:
1646 tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
1647 args[3], 0);
1648 break;
1649 case INDEX_op_setcond_i32:
1650 tcg_out_setcond32(s, args[3], args[0], args[1],
1651 args[2], const_args[2]);
1652 break;
1653
1654 OP_32_64(bswap16):
1655 tcg_out_rolw_8(s, args[0]);
1656 break;
1657 OP_32_64(bswap32):
1658 tcg_out_bswap32(s, args[0]);
1659 break;
1660
1661 OP_32_64(neg):
1662 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]);
1663 break;
1664 OP_32_64(not):
1665 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]);
1666 break;
1667
1668 OP_32_64(ext8s):
1669 tcg_out_ext8s(s, args[0], args[1], rexw);
1670 break;
1671 OP_32_64(ext16s):
1672 tcg_out_ext16s(s, args[0], args[1], rexw);
1673 break;
1674 OP_32_64(ext8u):
1675 tcg_out_ext8u(s, args[0], args[1]);
1676 break;
1677 OP_32_64(ext16u):
1678 tcg_out_ext16u(s, args[0], args[1]);
1679 break;
1680
1681 case INDEX_op_qemu_ld8u:
1682 tcg_out_qemu_ld(s, args, 0);
1683 break;
1684 case INDEX_op_qemu_ld8s:
1685 tcg_out_qemu_ld(s, args, 0 | 4);
1686 break;
1687 case INDEX_op_qemu_ld16u:
1688 tcg_out_qemu_ld(s, args, 1);
1689 break;
1690 case INDEX_op_qemu_ld16s:
1691 tcg_out_qemu_ld(s, args, 1 | 4);
1692 break;
1693 #if TCG_TARGET_REG_BITS == 64
1694 case INDEX_op_qemu_ld32u:
1695 #endif
1696 case INDEX_op_qemu_ld32:
1697 tcg_out_qemu_ld(s, args, 2);
1698 break;
1699 case INDEX_op_qemu_ld64:
1700 tcg_out_qemu_ld(s, args, 3);
1701 break;
1702
1703 case INDEX_op_qemu_st8:
1704 tcg_out_qemu_st(s, args, 0);
1705 break;
1706 case INDEX_op_qemu_st16:
1707 tcg_out_qemu_st(s, args, 1);
1708 break;
1709 case INDEX_op_qemu_st32:
1710 tcg_out_qemu_st(s, args, 2);
1711 break;
1712 case INDEX_op_qemu_st64:
1713 tcg_out_qemu_st(s, args, 3);
1714 break;
1715
1716 #if TCG_TARGET_REG_BITS == 32
1717 case INDEX_op_brcond2_i32:
1718 tcg_out_brcond2(s, args, const_args, 0);
1719 break;
1720 case INDEX_op_setcond2_i32:
1721 tcg_out_setcond2(s, args, const_args);
1722 break;
1723 case INDEX_op_mulu2_i32:
1724 tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_MUL, args[3]);
1725 break;
1726 case INDEX_op_add2_i32:
1727 if (const_args[4]) {
1728 tgen_arithi(s, ARITH_ADD, args[0], args[4], 1);
1729 } else {
1730 tgen_arithr(s, ARITH_ADD, args[0], args[4]);
1731 }
1732 if (const_args[5]) {
1733 tgen_arithi(s, ARITH_ADC, args[1], args[5], 1);
1734 } else {
1735 tgen_arithr(s, ARITH_ADC, args[1], args[5]);
1736 }
1737 break;
1738 case INDEX_op_sub2_i32:
1739 if (const_args[4]) {
1740 tgen_arithi(s, ARITH_SUB, args[0], args[4], 1);
1741 } else {
1742 tgen_arithr(s, ARITH_SUB, args[0], args[4]);
1743 }
1744 if (const_args[5]) {
1745 tgen_arithi(s, ARITH_SBB, args[1], args[5], 1);
1746 } else {
1747 tgen_arithr(s, ARITH_SBB, args[1], args[5]);
1748 }
1749 break;
1750 #else /* TCG_TARGET_REG_BITS == 64 */
1751 case INDEX_op_movi_i64:
1752 tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
1753 break;
1754 case INDEX_op_ld32s_i64:
1755 tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]);
1756 break;
1757 case INDEX_op_ld_i64:
1758 tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1759 break;
1760 case INDEX_op_st_i64:
1761 tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1762 break;
1763 case INDEX_op_qemu_ld32s:
1764 tcg_out_qemu_ld(s, args, 2 | 4);
1765 break;
1766
1767 case INDEX_op_brcond_i64:
1768 tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1],
1769 args[3], 0);
1770 break;
1771 case INDEX_op_setcond_i64:
1772 tcg_out_setcond64(s, args[3], args[0], args[1],
1773 args[2], const_args[2]);
1774 break;
1775
1776 case INDEX_op_bswap64_i64:
1777 tcg_out_bswap64(s, args[0]);
1778 break;
1779 case INDEX_op_ext32u_i64:
1780 tcg_out_ext32u(s, args[0], args[1]);
1781 break;
1782 case INDEX_op_ext32s_i64:
1783 tcg_out_ext32s(s, args[0], args[1]);
1784 break;
1785 #endif
1786
1787 OP_32_64(deposit):
1788 if (args[3] == 0 && args[4] == 8) {
1789 /* load bits 0..7 */
1790 tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM,
1791 args[2], args[0]);
1792 } else if (args[3] == 8 && args[4] == 8) {
1793 /* load bits 8..15 */
1794 tcg_out_modrm(s, OPC_MOVB_EvGv, args[2], args[0] + 4);
1795 } else if (args[3] == 0 && args[4] == 16) {
1796 /* load bits 0..15 */
1797 tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, args[2], args[0]);
1798 } else {
1799 tcg_abort();
1800 }
1801 break;
1802
1803 default:
1804 tcg_abort();
1805 }
1806
1807 #undef OP_32_64
1808 }
1809
1810 static const TCGTargetOpDef x86_op_defs[] = {
1811 { INDEX_op_exit_tb, { } },
1812 { INDEX_op_goto_tb, { } },
1813 { INDEX_op_call, { "ri" } },
1814 { INDEX_op_jmp, { "ri" } },
1815 { INDEX_op_br, { } },
1816 { INDEX_op_mov_i32, { "r", "r" } },
1817 { INDEX_op_movi_i32, { "r" } },
1818 { INDEX_op_ld8u_i32, { "r", "r" } },
1819 { INDEX_op_ld8s_i32, { "r", "r" } },
1820 { INDEX_op_ld16u_i32, { "r", "r" } },
1821 { INDEX_op_ld16s_i32, { "r", "r" } },
1822 { INDEX_op_ld_i32, { "r", "r" } },
1823 { INDEX_op_st8_i32, { "q", "r" } },
1824 { INDEX_op_st16_i32, { "r", "r" } },
1825 { INDEX_op_st_i32, { "r", "r" } },
1826
1827 { INDEX_op_add_i32, { "r", "r", "ri" } },
1828 { INDEX_op_sub_i32, { "r", "0", "ri" } },
1829 { INDEX_op_mul_i32, { "r", "0", "ri" } },
1830 { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } },
1831 { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } },
1832 { INDEX_op_and_i32, { "r", "0", "ri" } },
1833 { INDEX_op_or_i32, { "r", "0", "ri" } },
1834 { INDEX_op_xor_i32, { "r", "0", "ri" } },
1835
1836 { INDEX_op_shl_i32, { "r", "0", "ci" } },
1837 { INDEX_op_shr_i32, { "r", "0", "ci" } },
1838 { INDEX_op_sar_i32, { "r", "0", "ci" } },
1839 { INDEX_op_rotl_i32, { "r", "0", "ci" } },
1840 { INDEX_op_rotr_i32, { "r", "0", "ci" } },
1841
1842 { INDEX_op_brcond_i32, { "r", "ri" } },
1843
1844 { INDEX_op_bswap16_i32, { "r", "0" } },
1845 { INDEX_op_bswap32_i32, { "r", "0" } },
1846
1847 { INDEX_op_neg_i32, { "r", "0" } },
1848
1849 { INDEX_op_not_i32, { "r", "0" } },
1850
1851 { INDEX_op_ext8s_i32, { "r", "q" } },
1852 { INDEX_op_ext16s_i32, { "r", "r" } },
1853 { INDEX_op_ext8u_i32, { "r", "q" } },
1854 { INDEX_op_ext16u_i32, { "r", "r" } },
1855
1856 { INDEX_op_setcond_i32, { "q", "r", "ri" } },
1857
1858 { INDEX_op_deposit_i32, { "Q", "0", "Q" } },
1859
1860 #if TCG_TARGET_REG_BITS == 32
1861 { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
1862 { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
1863 { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
1864 { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
1865 { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
1866 #else
1867 { INDEX_op_mov_i64, { "r", "r" } },
1868 { INDEX_op_movi_i64, { "r" } },
1869 { INDEX_op_ld8u_i64, { "r", "r" } },
1870 { INDEX_op_ld8s_i64, { "r", "r" } },
1871 { INDEX_op_ld16u_i64, { "r", "r" } },
1872 { INDEX_op_ld16s_i64, { "r", "r" } },
1873 { INDEX_op_ld32u_i64, { "r", "r" } },
1874 { INDEX_op_ld32s_i64, { "r", "r" } },
1875 { INDEX_op_ld_i64, { "r", "r" } },
1876 { INDEX_op_st8_i64, { "r", "r" } },
1877 { INDEX_op_st16_i64, { "r", "r" } },
1878 { INDEX_op_st32_i64, { "r", "r" } },
1879 { INDEX_op_st_i64, { "r", "r" } },
1880
1881 { INDEX_op_add_i64, { "r", "0", "re" } },
1882 { INDEX_op_mul_i64, { "r", "0", "re" } },
1883 { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },
1884 { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },
1885 { INDEX_op_sub_i64, { "r", "0", "re" } },
1886 { INDEX_op_and_i64, { "r", "0", "reZ" } },
1887 { INDEX_op_or_i64, { "r", "0", "re" } },
1888 { INDEX_op_xor_i64, { "r", "0", "re" } },
1889
1890 { INDEX_op_shl_i64, { "r", "0", "ci" } },
1891 { INDEX_op_shr_i64, { "r", "0", "ci" } },
1892 { INDEX_op_sar_i64, { "r", "0", "ci" } },
1893 { INDEX_op_rotl_i64, { "r", "0", "ci" } },
1894 { INDEX_op_rotr_i64, { "r", "0", "ci" } },
1895
1896 { INDEX_op_brcond_i64, { "r", "re" } },
1897 { INDEX_op_setcond_i64, { "r", "r", "re" } },
1898
1899 { INDEX_op_bswap16_i64, { "r", "0" } },
1900 { INDEX_op_bswap32_i64, { "r", "0" } },
1901 { INDEX_op_bswap64_i64, { "r", "0" } },
1902 { INDEX_op_neg_i64, { "r", "0" } },
1903 { INDEX_op_not_i64, { "r", "0" } },
1904
1905 { INDEX_op_ext8s_i64, { "r", "r" } },
1906 { INDEX_op_ext16s_i64, { "r", "r" } },
1907 { INDEX_op_ext32s_i64, { "r", "r" } },
1908 { INDEX_op_ext8u_i64, { "r", "r" } },
1909 { INDEX_op_ext16u_i64, { "r", "r" } },
1910 { INDEX_op_ext32u_i64, { "r", "r" } },
1911
1912 { INDEX_op_deposit_i64, { "Q", "0", "Q" } },
1913 #endif
1914
1915 #if TCG_TARGET_REG_BITS == 64
1916 { INDEX_op_qemu_ld8u, { "r", "L" } },
1917 { INDEX_op_qemu_ld8s, { "r", "L" } },
1918 { INDEX_op_qemu_ld16u, { "r", "L" } },
1919 { INDEX_op_qemu_ld16s, { "r", "L" } },
1920 { INDEX_op_qemu_ld32, { "r", "L" } },
1921 { INDEX_op_qemu_ld32u, { "r", "L" } },
1922 { INDEX_op_qemu_ld32s, { "r", "L" } },
1923 { INDEX_op_qemu_ld64, { "r", "L" } },
1924
1925 { INDEX_op_qemu_st8, { "L", "L" } },
1926 { INDEX_op_qemu_st16, { "L", "L" } },
1927 { INDEX_op_qemu_st32, { "L", "L" } },
1928 { INDEX_op_qemu_st64, { "L", "L" } },
1929 #elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
1930 { INDEX_op_qemu_ld8u, { "r", "L" } },
1931 { INDEX_op_qemu_ld8s, { "r", "L" } },
1932 { INDEX_op_qemu_ld16u, { "r", "L" } },
1933 { INDEX_op_qemu_ld16s, { "r", "L" } },
1934 { INDEX_op_qemu_ld32, { "r", "L" } },
1935 { INDEX_op_qemu_ld64, { "r", "r", "L" } },
1936
1937 { INDEX_op_qemu_st8, { "cb", "L" } },
1938 { INDEX_op_qemu_st16, { "L", "L" } },
1939 { INDEX_op_qemu_st32, { "L", "L" } },
1940 { INDEX_op_qemu_st64, { "L", "L", "L" } },
1941 #else
1942 { INDEX_op_qemu_ld8u, { "r", "L", "L" } },
1943 { INDEX_op_qemu_ld8s, { "r", "L", "L" } },
1944 { INDEX_op_qemu_ld16u, { "r", "L", "L" } },
1945 { INDEX_op_qemu_ld16s, { "r", "L", "L" } },
1946 { INDEX_op_qemu_ld32, { "r", "L", "L" } },
1947 { INDEX_op_qemu_ld64, { "r", "r", "L", "L" } },
1948
1949 { INDEX_op_qemu_st8, { "cb", "L", "L" } },
1950 { INDEX_op_qemu_st16, { "L", "L", "L" } },
1951 { INDEX_op_qemu_st32, { "L", "L", "L" } },
1952 { INDEX_op_qemu_st64, { "L", "L", "L", "L" } },
1953 #endif
1954 { -1 },
1955 };
1956
1957 static int tcg_target_callee_save_regs[] = {
1958 #if TCG_TARGET_REG_BITS == 64
1959 TCG_REG_RBP,
1960 TCG_REG_RBX,
1961 #if defined(_WIN64)
1962 TCG_REG_RDI,
1963 TCG_REG_RSI,
1964 #endif
1965 TCG_REG_R12,
1966 TCG_REG_R13,
1967 TCG_REG_R14, /* Currently used for the global env. */
1968 TCG_REG_R15,
1969 #else
1970 TCG_REG_EBP, /* Currently used for the global env. */
1971 TCG_REG_EBX,
1972 TCG_REG_ESI,
1973 TCG_REG_EDI,
1974 #endif
1975 };
1976
1977 /* Compute frame size via macros, to share between tcg_target_qemu_prologue
1978 and tcg_register_jit. */
1979
1980 #define PUSH_SIZE \
1981 ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \
1982 * (TCG_TARGET_REG_BITS / 8))
1983
1984 #define FRAME_SIZE \
1985 ((PUSH_SIZE \
1986 + TCG_STATIC_CALL_ARGS_SIZE \
1987 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
1988 + TCG_TARGET_STACK_ALIGN - 1) \
1989 & ~(TCG_TARGET_STACK_ALIGN - 1))
1990
1991 /* Generate global QEMU prologue and epilogue code */
1992 static void tcg_target_qemu_prologue(TCGContext *s)
1993 {
1994 int i, stack_addend;
1995
1996 /* TB prologue */
1997
1998 /* Reserve some stack space, also for TCG temps. */
1999 stack_addend = FRAME_SIZE - PUSH_SIZE;
2000 tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
2001 CPU_TEMP_BUF_NLONGS * sizeof(long));
2002
2003 /* Save all callee saved registers. */
2004 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
2005 tcg_out_push(s, tcg_target_callee_save_regs[i]);
2006 }
2007
2008 #if TCG_TARGET_REG_BITS == 32
2009 tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
2010 (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
2011 tcg_out_ld(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[1], TCG_REG_ESP,
2012 (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4);
2013 #else
2014 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2015 #endif
2016 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2017
2018 /* jmp *tb. */
2019 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
2020
2021 /* TB epilogue */
2022 tb_ret_addr = s->code_ptr;
2023
2024 tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend);
2025
2026 for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
2027 tcg_out_pop(s, tcg_target_callee_save_regs[i]);
2028 }
2029 tcg_out_opc(s, OPC_RET, 0, 0, 0);
2030 }
2031
2032 static void tcg_target_init(TCGContext *s)
2033 {
2034 #if !defined(CONFIG_USER_ONLY)
2035 /* fail safe */
2036 if ((1 << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry))
2037 tcg_abort();
2038 #endif
2039
2040 if (TCG_TARGET_REG_BITS == 64) {
2041 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
2042 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
2043 } else {
2044 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
2045 }
2046
2047 tcg_regset_clear(tcg_target_call_clobber_regs);
2048 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
2049 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
2050 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
2051 if (TCG_TARGET_REG_BITS == 64) {
2052 #if !defined(_WIN64)
2053 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
2054 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
2055 #endif
2056 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
2057 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
2058 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
2059 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
2060 }
2061
2062 tcg_regset_clear(s->reserved_regs);
2063 tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
2064
2065 tcg_add_target_add_op_defs(x86_op_defs);
2066 }
2067
2068 typedef struct {
2069 uint32_t len __attribute__((aligned((sizeof(void *)))));
2070 uint32_t id;
2071 uint8_t version;
2072 char augmentation[1];
2073 uint8_t code_align;
2074 uint8_t data_align;
2075 uint8_t return_column;
2076 } DebugFrameCIE;
2077
2078 typedef struct {
2079 uint32_t len __attribute__((aligned((sizeof(void *)))));
2080 uint32_t cie_offset;
2081 tcg_target_long func_start __attribute__((packed));
2082 tcg_target_long func_len __attribute__((packed));
2083 uint8_t def_cfa[4];
2084 uint8_t reg_ofs[14];
2085 } DebugFrameFDE;
2086
2087 typedef struct {
2088 DebugFrameCIE cie;
2089 DebugFrameFDE fde;
2090 } DebugFrame;
2091
2092 #if !defined(__ELF__)
2093 /* Host machine without ELF. */
2094 #elif TCG_TARGET_REG_BITS == 64
2095 #define ELF_HOST_MACHINE EM_X86_64
2096 static DebugFrame debug_frame = {
2097 .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2098 .cie.id = -1,
2099 .cie.version = 1,
2100 .cie.code_align = 1,
2101 .cie.data_align = 0x78, /* sleb128 -8 */
2102 .cie.return_column = 16,
2103
2104 .fde.len = sizeof(DebugFrameFDE)-4, /* length after .len member */
2105 .fde.def_cfa = {
2106 12, 7, /* DW_CFA_def_cfa %rsp, ... */
2107 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2108 (FRAME_SIZE >> 7)
2109 },
2110 .fde.reg_ofs = {
2111 0x90, 1, /* DW_CFA_offset, %rip, -8 */
2112 /* The following ordering must match tcg_target_callee_save_regs. */
2113 0x86, 2, /* DW_CFA_offset, %rbp, -16 */
2114 0x83, 3, /* DW_CFA_offset, %rbx, -24 */
2115 0x8c, 4, /* DW_CFA_offset, %r12, -32 */
2116 0x8d, 5, /* DW_CFA_offset, %r13, -40 */
2117 0x8e, 6, /* DW_CFA_offset, %r14, -48 */
2118 0x8f, 7, /* DW_CFA_offset, %r15, -56 */
2119 }
2120 };
2121 #else
2122 #define ELF_HOST_MACHINE EM_386
2123 static DebugFrame debug_frame = {
2124 .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2125 .cie.id = -1,
2126 .cie.version = 1,
2127 .cie.code_align = 1,
2128 .cie.data_align = 0x7c, /* sleb128 -4 */
2129 .cie.return_column = 8,
2130
2131 .fde.len = sizeof(DebugFrameFDE)-4, /* length after .len member */
2132 .fde.def_cfa = {
2133 12, 4, /* DW_CFA_def_cfa %esp, ... */
2134 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2135 (FRAME_SIZE >> 7)
2136 },
2137 .fde.reg_ofs = {
2138 0x88, 1, /* DW_CFA_offset, %eip, -4 */
2139 /* The following ordering must match tcg_target_callee_save_regs. */
2140 0x85, 2, /* DW_CFA_offset, %ebp, -8 */
2141 0x83, 3, /* DW_CFA_offset, %ebx, -12 */
2142 0x86, 4, /* DW_CFA_offset, %esi, -16 */
2143 0x87, 5, /* DW_CFA_offset, %edi, -20 */
2144 }
2145 };
2146 #endif
2147
2148 #if defined(ELF_HOST_MACHINE)
2149 void tcg_register_jit(void *buf, size_t buf_size)
2150 {
2151 /* We're expecting a 2 byte uleb128 encoded value. */
2152 assert(FRAME_SIZE >> 14 == 0);
2153
2154 debug_frame.fde.func_start = (tcg_target_long) buf;
2155 debug_frame.fde.func_len = buf_size;
2156
2157 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2158 }
2159 #endif