]> git.proxmox.com Git - qemu.git/blob - tcg/i386/tcg-target.c
Merge branch 'w64' of git://qemu.weilnetz.de/qemu
[qemu.git] / tcg / i386 / tcg-target.c
1 /*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 #ifndef NDEBUG
26 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
27 #if TCG_TARGET_REG_BITS == 64
28 "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
29 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
30 #else
31 "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
32 #endif
33 };
34 #endif
35
36 static const int tcg_target_reg_alloc_order[] = {
37 #if TCG_TARGET_REG_BITS == 64
38 TCG_REG_RBP,
39 TCG_REG_RBX,
40 TCG_REG_R12,
41 TCG_REG_R13,
42 TCG_REG_R14,
43 TCG_REG_R15,
44 TCG_REG_R10,
45 TCG_REG_R11,
46 TCG_REG_R9,
47 TCG_REG_R8,
48 TCG_REG_RCX,
49 TCG_REG_RDX,
50 TCG_REG_RSI,
51 TCG_REG_RDI,
52 TCG_REG_RAX,
53 #else
54 TCG_REG_EBX,
55 TCG_REG_ESI,
56 TCG_REG_EDI,
57 TCG_REG_EBP,
58 TCG_REG_ECX,
59 TCG_REG_EDX,
60 TCG_REG_EAX,
61 #endif
62 };
63
64 static const int tcg_target_call_iarg_regs[] = {
65 #if TCG_TARGET_REG_BITS == 64
66 #if defined(_WIN64)
67 TCG_REG_RCX,
68 TCG_REG_RDX,
69 #else
70 TCG_REG_RDI,
71 TCG_REG_RSI,
72 TCG_REG_RDX,
73 TCG_REG_RCX,
74 #endif
75 TCG_REG_R8,
76 TCG_REG_R9,
77 #else
78 TCG_REG_EAX,
79 TCG_REG_EDX,
80 TCG_REG_ECX
81 #endif
82 };
83
84 static const int tcg_target_call_oarg_regs[] = {
85 TCG_REG_EAX,
86 #if TCG_TARGET_REG_BITS == 32
87 TCG_REG_EDX
88 #endif
89 };
90
91 static uint8_t *tb_ret_addr;
92
93 static void patch_reloc(uint8_t *code_ptr, int type,
94 tcg_target_long value, tcg_target_long addend)
95 {
96 value += addend;
97 switch(type) {
98 case R_386_PC32:
99 value -= (uintptr_t)code_ptr;
100 if (value != (int32_t)value) {
101 tcg_abort();
102 }
103 *(uint32_t *)code_ptr = value;
104 break;
105 case R_386_PC8:
106 value -= (uintptr_t)code_ptr;
107 if (value != (int8_t)value) {
108 tcg_abort();
109 }
110 *(uint8_t *)code_ptr = value;
111 break;
112 default:
113 tcg_abort();
114 }
115 }
116
117 /* maximum number of register used for input function arguments */
118 static inline int tcg_target_get_call_iarg_regs_count(int flags)
119 {
120 if (TCG_TARGET_REG_BITS == 64) {
121 return 6;
122 }
123
124 return 0;
125 }
126
127 /* parse target specific constraints */
128 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
129 {
130 const char *ct_str;
131
132 ct_str = *pct_str;
133 switch(ct_str[0]) {
134 case 'a':
135 ct->ct |= TCG_CT_REG;
136 tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
137 break;
138 case 'b':
139 ct->ct |= TCG_CT_REG;
140 tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
141 break;
142 case 'c':
143 ct->ct |= TCG_CT_REG;
144 tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
145 break;
146 case 'd':
147 ct->ct |= TCG_CT_REG;
148 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
149 break;
150 case 'S':
151 ct->ct |= TCG_CT_REG;
152 tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
153 break;
154 case 'D':
155 ct->ct |= TCG_CT_REG;
156 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
157 break;
158 case 'q':
159 ct->ct |= TCG_CT_REG;
160 if (TCG_TARGET_REG_BITS == 64) {
161 tcg_regset_set32(ct->u.regs, 0, 0xffff);
162 } else {
163 tcg_regset_set32(ct->u.regs, 0, 0xf);
164 }
165 break;
166 case 'Q':
167 ct->ct |= TCG_CT_REG;
168 tcg_regset_set32(ct->u.regs, 0, 0xf);
169 break;
170 case 'r':
171 ct->ct |= TCG_CT_REG;
172 if (TCG_TARGET_REG_BITS == 64) {
173 tcg_regset_set32(ct->u.regs, 0, 0xffff);
174 } else {
175 tcg_regset_set32(ct->u.regs, 0, 0xff);
176 }
177 break;
178
179 /* qemu_ld/st address constraint */
180 case 'L':
181 ct->ct |= TCG_CT_REG;
182 if (TCG_TARGET_REG_BITS == 64) {
183 tcg_regset_set32(ct->u.regs, 0, 0xffff);
184 tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[0]);
185 tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[1]);
186 #ifdef CONFIG_TCG_PASS_AREG0
187 tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[2]);
188 #endif
189 } else {
190 tcg_regset_set32(ct->u.regs, 0, 0xff);
191 tcg_regset_reset_reg(ct->u.regs, TCG_REG_EAX);
192 tcg_regset_reset_reg(ct->u.regs, TCG_REG_EDX);
193 }
194 break;
195
196 case 'e':
197 ct->ct |= TCG_CT_CONST_S32;
198 break;
199 case 'Z':
200 ct->ct |= TCG_CT_CONST_U32;
201 break;
202
203 default:
204 return -1;
205 }
206 ct_str++;
207 *pct_str = ct_str;
208 return 0;
209 }
210
211 /* test if a constant matches the constraint */
212 static inline int tcg_target_const_match(tcg_target_long val,
213 const TCGArgConstraint *arg_ct)
214 {
215 int ct = arg_ct->ct;
216 if (ct & TCG_CT_CONST) {
217 return 1;
218 }
219 if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
220 return 1;
221 }
222 if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
223 return 1;
224 }
225 return 0;
226 }
227
228 #if TCG_TARGET_REG_BITS == 64
229 # define LOWREGMASK(x) ((x) & 7)
230 #else
231 # define LOWREGMASK(x) (x)
232 #endif
233
234 #define P_EXT 0x100 /* 0x0f opcode prefix */
235 #define P_DATA16 0x200 /* 0x66 opcode prefix */
236 #if TCG_TARGET_REG_BITS == 64
237 # define P_ADDR32 0x400 /* 0x67 opcode prefix */
238 # define P_REXW 0x800 /* Set REX.W = 1 */
239 # define P_REXB_R 0x1000 /* REG field as byte register */
240 # define P_REXB_RM 0x2000 /* R/M field as byte register */
241 #else
242 # define P_ADDR32 0
243 # define P_REXW 0
244 # define P_REXB_R 0
245 # define P_REXB_RM 0
246 #endif
247
248 #define OPC_ARITH_EvIz (0x81)
249 #define OPC_ARITH_EvIb (0x83)
250 #define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
251 #define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
252 #define OPC_BSWAP (0xc8 | P_EXT)
253 #define OPC_CALL_Jz (0xe8)
254 #define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
255 #define OPC_DEC_r32 (0x48)
256 #define OPC_IMUL_GvEv (0xaf | P_EXT)
257 #define OPC_IMUL_GvEvIb (0x6b)
258 #define OPC_IMUL_GvEvIz (0x69)
259 #define OPC_INC_r32 (0x40)
260 #define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */
261 #define OPC_JCC_short (0x70) /* ... plus condition code */
262 #define OPC_JMP_long (0xe9)
263 #define OPC_JMP_short (0xeb)
264 #define OPC_LEA (0x8d)
265 #define OPC_MOVB_EvGv (0x88) /* stores, more or less */
266 #define OPC_MOVL_EvGv (0x89) /* stores, more or less */
267 #define OPC_MOVL_GvEv (0x8b) /* loads, more or less */
268 #define OPC_MOVL_EvIz (0xc7)
269 #define OPC_MOVL_Iv (0xb8)
270 #define OPC_MOVSBL (0xbe | P_EXT)
271 #define OPC_MOVSWL (0xbf | P_EXT)
272 #define OPC_MOVSLQ (0x63 | P_REXW)
273 #define OPC_MOVZBL (0xb6 | P_EXT)
274 #define OPC_MOVZWL (0xb7 | P_EXT)
275 #define OPC_POP_r32 (0x58)
276 #define OPC_PUSH_r32 (0x50)
277 #define OPC_PUSH_Iv (0x68)
278 #define OPC_PUSH_Ib (0x6a)
279 #define OPC_RET (0xc3)
280 #define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
281 #define OPC_SHIFT_1 (0xd1)
282 #define OPC_SHIFT_Ib (0xc1)
283 #define OPC_SHIFT_cl (0xd3)
284 #define OPC_TESTL (0x85)
285 #define OPC_XCHG_ax_r32 (0x90)
286
287 #define OPC_GRP3_Ev (0xf7)
288 #define OPC_GRP5 (0xff)
289
290 /* Group 1 opcode extensions for 0x80-0x83.
291 These are also used as modifiers for OPC_ARITH. */
292 #define ARITH_ADD 0
293 #define ARITH_OR 1
294 #define ARITH_ADC 2
295 #define ARITH_SBB 3
296 #define ARITH_AND 4
297 #define ARITH_SUB 5
298 #define ARITH_XOR 6
299 #define ARITH_CMP 7
300
301 /* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */
302 #define SHIFT_ROL 0
303 #define SHIFT_ROR 1
304 #define SHIFT_SHL 4
305 #define SHIFT_SHR 5
306 #define SHIFT_SAR 7
307
308 /* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */
309 #define EXT3_NOT 2
310 #define EXT3_NEG 3
311 #define EXT3_MUL 4
312 #define EXT3_IMUL 5
313 #define EXT3_DIV 6
314 #define EXT3_IDIV 7
315
316 /* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */
317 #define EXT5_INC_Ev 0
318 #define EXT5_DEC_Ev 1
319 #define EXT5_CALLN_Ev 2
320 #define EXT5_JMPN_Ev 4
321
322 /* Condition codes to be added to OPC_JCC_{long,short}. */
323 #define JCC_JMP (-1)
324 #define JCC_JO 0x0
325 #define JCC_JNO 0x1
326 #define JCC_JB 0x2
327 #define JCC_JAE 0x3
328 #define JCC_JE 0x4
329 #define JCC_JNE 0x5
330 #define JCC_JBE 0x6
331 #define JCC_JA 0x7
332 #define JCC_JS 0x8
333 #define JCC_JNS 0x9
334 #define JCC_JP 0xa
335 #define JCC_JNP 0xb
336 #define JCC_JL 0xc
337 #define JCC_JGE 0xd
338 #define JCC_JLE 0xe
339 #define JCC_JG 0xf
340
341 static const uint8_t tcg_cond_to_jcc[10] = {
342 [TCG_COND_EQ] = JCC_JE,
343 [TCG_COND_NE] = JCC_JNE,
344 [TCG_COND_LT] = JCC_JL,
345 [TCG_COND_GE] = JCC_JGE,
346 [TCG_COND_LE] = JCC_JLE,
347 [TCG_COND_GT] = JCC_JG,
348 [TCG_COND_LTU] = JCC_JB,
349 [TCG_COND_GEU] = JCC_JAE,
350 [TCG_COND_LEU] = JCC_JBE,
351 [TCG_COND_GTU] = JCC_JA,
352 };
353
354 #if TCG_TARGET_REG_BITS == 64
355 static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
356 {
357 int rex;
358
359 if (opc & P_DATA16) {
360 /* We should never be asking for both 16 and 64-bit operation. */
361 assert((opc & P_REXW) == 0);
362 tcg_out8(s, 0x66);
363 }
364 if (opc & P_ADDR32) {
365 tcg_out8(s, 0x67);
366 }
367
368 rex = 0;
369 rex |= (opc & P_REXW) >> 8; /* REX.W */
370 rex |= (r & 8) >> 1; /* REX.R */
371 rex |= (x & 8) >> 2; /* REX.X */
372 rex |= (rm & 8) >> 3; /* REX.B */
373
374 /* P_REXB_{R,RM} indicates that the given register is the low byte.
375 For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
376 as otherwise the encoding indicates %[abcd]h. Note that the values
377 that are ORed in merely indicate that the REX byte must be present;
378 those bits get discarded in output. */
379 rex |= opc & (r >= 4 ? P_REXB_R : 0);
380 rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
381
382 if (rex) {
383 tcg_out8(s, (uint8_t)(rex | 0x40));
384 }
385
386 if (opc & P_EXT) {
387 tcg_out8(s, 0x0f);
388 }
389 tcg_out8(s, opc);
390 }
391 #else
392 static void tcg_out_opc(TCGContext *s, int opc)
393 {
394 if (opc & P_DATA16) {
395 tcg_out8(s, 0x66);
396 }
397 if (opc & P_EXT) {
398 tcg_out8(s, 0x0f);
399 }
400 tcg_out8(s, opc);
401 }
402 /* Discard the register arguments to tcg_out_opc early, so as not to penalize
403 the 32-bit compilation paths. This method works with all versions of gcc,
404 whereas relying on optimization may not be able to exclude them. */
405 #define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc)
406 #endif
407
408 static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
409 {
410 tcg_out_opc(s, opc, r, rm, 0);
411 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
412 }
413
414 /* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
415 We handle either RM and INDEX missing with a negative value. In 64-bit
416 mode for absolute addresses, ~RM is the size of the immediate operand
417 that will follow the instruction. */
418
419 static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
420 int index, int shift,
421 tcg_target_long offset)
422 {
423 int mod, len;
424
425 if (index < 0 && rm < 0) {
426 if (TCG_TARGET_REG_BITS == 64) {
427 /* Try for a rip-relative addressing mode. This has replaced
428 the 32-bit-mode absolute addressing encoding. */
429 tcg_target_long pc = (tcg_target_long)s->code_ptr + 5 + ~rm;
430 tcg_target_long disp = offset - pc;
431 if (disp == (int32_t)disp) {
432 tcg_out_opc(s, opc, r, 0, 0);
433 tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
434 tcg_out32(s, disp);
435 return;
436 }
437
438 /* Try for an absolute address encoding. This requires the
439 use of the MODRM+SIB encoding and is therefore larger than
440 rip-relative addressing. */
441 if (offset == (int32_t)offset) {
442 tcg_out_opc(s, opc, r, 0, 0);
443 tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
444 tcg_out8(s, (4 << 3) | 5);
445 tcg_out32(s, offset);
446 return;
447 }
448
449 /* ??? The memory isn't directly addressable. */
450 tcg_abort();
451 } else {
452 /* Absolute address. */
453 tcg_out_opc(s, opc, r, 0, 0);
454 tcg_out8(s, (r << 3) | 5);
455 tcg_out32(s, offset);
456 return;
457 }
458 }
459
460 /* Find the length of the immediate addend. Note that the encoding
461 that would be used for (%ebp) indicates absolute addressing. */
462 if (rm < 0) {
463 mod = 0, len = 4, rm = 5;
464 } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
465 mod = 0, len = 0;
466 } else if (offset == (int8_t)offset) {
467 mod = 0x40, len = 1;
468 } else {
469 mod = 0x80, len = 4;
470 }
471
472 /* Use a single byte MODRM format if possible. Note that the encoding
473 that would be used for %esp is the escape to the two byte form. */
474 if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
475 /* Single byte MODRM format. */
476 tcg_out_opc(s, opc, r, rm, 0);
477 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
478 } else {
479 /* Two byte MODRM+SIB format. */
480
481 /* Note that the encoding that would place %esp into the index
482 field indicates no index register. In 64-bit mode, the REX.X
483 bit counts, so %r12 can be used as the index. */
484 if (index < 0) {
485 index = 4;
486 } else {
487 assert(index != TCG_REG_ESP);
488 }
489
490 tcg_out_opc(s, opc, r, rm, index);
491 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
492 tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
493 }
494
495 if (len == 1) {
496 tcg_out8(s, offset);
497 } else if (len == 4) {
498 tcg_out32(s, offset);
499 }
500 }
501
502 /* A simplification of the above with no index or shift. */
503 static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
504 int rm, tcg_target_long offset)
505 {
506 tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
507 }
508
509 /* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */
510 static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
511 {
512 /* Propagate an opcode prefix, such as P_REXW. */
513 int ext = subop & ~0x7;
514 subop &= 0x7;
515
516 tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
517 }
518
519 static inline void tcg_out_mov(TCGContext *s, TCGType type,
520 TCGReg ret, TCGReg arg)
521 {
522 if (arg != ret) {
523 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
524 tcg_out_modrm(s, opc, ret, arg);
525 }
526 }
527
528 static void tcg_out_movi(TCGContext *s, TCGType type,
529 TCGReg ret, tcg_target_long arg)
530 {
531 if (arg == 0) {
532 tgen_arithr(s, ARITH_XOR, ret, ret);
533 return;
534 } else if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
535 tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
536 tcg_out32(s, arg);
537 } else if (arg == (int32_t)arg) {
538 tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
539 tcg_out32(s, arg);
540 } else {
541 tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
542 tcg_out32(s, arg);
543 tcg_out32(s, arg >> 31 >> 1);
544 }
545 }
546
547 static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
548 {
549 if (val == (int8_t)val) {
550 tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
551 tcg_out8(s, val);
552 } else if (val == (int32_t)val) {
553 tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
554 tcg_out32(s, val);
555 } else {
556 tcg_abort();
557 }
558 }
559
560 static inline void tcg_out_push(TCGContext *s, int reg)
561 {
562 tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
563 }
564
565 static inline void tcg_out_pop(TCGContext *s, int reg)
566 {
567 tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
568 }
569
570 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
571 TCGReg arg1, tcg_target_long arg2)
572 {
573 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
574 tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
575 }
576
577 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
578 TCGReg arg1, tcg_target_long arg2)
579 {
580 int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
581 tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
582 }
583
584 static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
585 {
586 /* Propagate an opcode prefix, such as P_DATA16. */
587 int ext = subopc & ~0x7;
588 subopc &= 0x7;
589
590 if (count == 1) {
591 tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
592 } else {
593 tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
594 tcg_out8(s, count);
595 }
596 }
597
598 static inline void tcg_out_bswap32(TCGContext *s, int reg)
599 {
600 tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
601 }
602
603 static inline void tcg_out_rolw_8(TCGContext *s, int reg)
604 {
605 tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
606 }
607
608 static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
609 {
610 /* movzbl */
611 assert(src < 4 || TCG_TARGET_REG_BITS == 64);
612 tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
613 }
614
615 static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
616 {
617 /* movsbl */
618 assert(src < 4 || TCG_TARGET_REG_BITS == 64);
619 tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
620 }
621
622 static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
623 {
624 /* movzwl */
625 tcg_out_modrm(s, OPC_MOVZWL, dest, src);
626 }
627
628 static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
629 {
630 /* movsw[lq] */
631 tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
632 }
633
634 static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
635 {
636 /* 32-bit mov zero extends. */
637 tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
638 }
639
640 static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
641 {
642 tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
643 }
644
645 static inline void tcg_out_bswap64(TCGContext *s, int reg)
646 {
647 tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
648 }
649
650 static void tgen_arithi(TCGContext *s, int c, int r0,
651 tcg_target_long val, int cf)
652 {
653 int rexw = 0;
654
655 if (TCG_TARGET_REG_BITS == 64) {
656 rexw = c & -8;
657 c &= 7;
658 }
659
660 /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
661 partial flags update stalls on Pentium4 and are not recommended
662 by current Intel optimization manuals. */
663 if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
664 int is_inc = (c == ARITH_ADD) ^ (val < 0);
665 if (TCG_TARGET_REG_BITS == 64) {
666 /* The single-byte increment encodings are re-tasked as the
667 REX prefixes. Use the MODRM encoding. */
668 tcg_out_modrm(s, OPC_GRP5 + rexw,
669 (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
670 } else {
671 tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
672 }
673 return;
674 }
675
676 if (c == ARITH_AND) {
677 if (TCG_TARGET_REG_BITS == 64) {
678 if (val == 0xffffffffu) {
679 tcg_out_ext32u(s, r0, r0);
680 return;
681 }
682 if (val == (uint32_t)val) {
683 /* AND with no high bits set can use a 32-bit operation. */
684 rexw = 0;
685 }
686 }
687 if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
688 tcg_out_ext8u(s, r0, r0);
689 return;
690 }
691 if (val == 0xffffu) {
692 tcg_out_ext16u(s, r0, r0);
693 return;
694 }
695 }
696
697 if (val == (int8_t)val) {
698 tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
699 tcg_out8(s, val);
700 return;
701 }
702 if (rexw == 0 || val == (int32_t)val) {
703 tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
704 tcg_out32(s, val);
705 return;
706 }
707
708 tcg_abort();
709 }
710
711 static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
712 {
713 if (val != 0) {
714 tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
715 }
716 }
717
718 /* Use SMALL != 0 to force a short forward branch. */
719 static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
720 {
721 int32_t val, val1;
722 TCGLabel *l = &s->labels[label_index];
723
724 if (l->has_value) {
725 val = l->u.value - (tcg_target_long)s->code_ptr;
726 val1 = val - 2;
727 if ((int8_t)val1 == val1) {
728 if (opc == -1) {
729 tcg_out8(s, OPC_JMP_short);
730 } else {
731 tcg_out8(s, OPC_JCC_short + opc);
732 }
733 tcg_out8(s, val1);
734 } else {
735 if (small) {
736 tcg_abort();
737 }
738 if (opc == -1) {
739 tcg_out8(s, OPC_JMP_long);
740 tcg_out32(s, val - 5);
741 } else {
742 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
743 tcg_out32(s, val - 6);
744 }
745 }
746 } else if (small) {
747 if (opc == -1) {
748 tcg_out8(s, OPC_JMP_short);
749 } else {
750 tcg_out8(s, OPC_JCC_short + opc);
751 }
752 tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1);
753 s->code_ptr += 1;
754 } else {
755 if (opc == -1) {
756 tcg_out8(s, OPC_JMP_long);
757 } else {
758 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
759 }
760 tcg_out_reloc(s, s->code_ptr, R_386_PC32, label_index, -4);
761 s->code_ptr += 4;
762 }
763 }
764
765 static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
766 int const_arg2, int rexw)
767 {
768 if (const_arg2) {
769 if (arg2 == 0) {
770 /* test r, r */
771 tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
772 } else {
773 tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
774 }
775 } else {
776 tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
777 }
778 }
779
780 static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
781 TCGArg arg1, TCGArg arg2, int const_arg2,
782 int label_index, int small)
783 {
784 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
785 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
786 }
787
788 #if TCG_TARGET_REG_BITS == 64
789 static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
790 TCGArg arg1, TCGArg arg2, int const_arg2,
791 int label_index, int small)
792 {
793 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
794 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
795 }
796 #else
797 /* XXX: we implement it at the target level to avoid having to
798 handle cross basic blocks temporaries */
799 static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
800 const int *const_args, int small)
801 {
802 int label_next;
803 label_next = gen_new_label();
804 switch(args[4]) {
805 case TCG_COND_EQ:
806 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
807 label_next, 1);
808 tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
809 args[5], small);
810 break;
811 case TCG_COND_NE:
812 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
813 args[5], small);
814 tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
815 args[5], small);
816 break;
817 case TCG_COND_LT:
818 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
819 args[5], small);
820 tcg_out_jxx(s, JCC_JNE, label_next, 1);
821 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
822 args[5], small);
823 break;
824 case TCG_COND_LE:
825 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
826 args[5], small);
827 tcg_out_jxx(s, JCC_JNE, label_next, 1);
828 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
829 args[5], small);
830 break;
831 case TCG_COND_GT:
832 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
833 args[5], small);
834 tcg_out_jxx(s, JCC_JNE, label_next, 1);
835 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
836 args[5], small);
837 break;
838 case TCG_COND_GE:
839 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
840 args[5], small);
841 tcg_out_jxx(s, JCC_JNE, label_next, 1);
842 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
843 args[5], small);
844 break;
845 case TCG_COND_LTU:
846 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
847 args[5], small);
848 tcg_out_jxx(s, JCC_JNE, label_next, 1);
849 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
850 args[5], small);
851 break;
852 case TCG_COND_LEU:
853 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
854 args[5], small);
855 tcg_out_jxx(s, JCC_JNE, label_next, 1);
856 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
857 args[5], small);
858 break;
859 case TCG_COND_GTU:
860 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
861 args[5], small);
862 tcg_out_jxx(s, JCC_JNE, label_next, 1);
863 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
864 args[5], small);
865 break;
866 case TCG_COND_GEU:
867 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
868 args[5], small);
869 tcg_out_jxx(s, JCC_JNE, label_next, 1);
870 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
871 args[5], small);
872 break;
873 default:
874 tcg_abort();
875 }
876 tcg_out_label(s, label_next, s->code_ptr);
877 }
878 #endif
879
880 static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
881 TCGArg arg1, TCGArg arg2, int const_arg2)
882 {
883 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
884 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
885 tcg_out_ext8u(s, dest, dest);
886 }
887
888 #if TCG_TARGET_REG_BITS == 64
889 static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
890 TCGArg arg1, TCGArg arg2, int const_arg2)
891 {
892 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
893 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
894 tcg_out_ext8u(s, dest, dest);
895 }
896 #else
897 static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
898 const int *const_args)
899 {
900 TCGArg new_args[6];
901 int label_true, label_over;
902
903 memcpy(new_args, args+1, 5*sizeof(TCGArg));
904
905 if (args[0] == args[1] || args[0] == args[2]
906 || (!const_args[3] && args[0] == args[3])
907 || (!const_args[4] && args[0] == args[4])) {
908 /* When the destination overlaps with one of the argument
909 registers, don't do anything tricky. */
910 label_true = gen_new_label();
911 label_over = gen_new_label();
912
913 new_args[5] = label_true;
914 tcg_out_brcond2(s, new_args, const_args+1, 1);
915
916 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
917 tcg_out_jxx(s, JCC_JMP, label_over, 1);
918 tcg_out_label(s, label_true, s->code_ptr);
919
920 tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
921 tcg_out_label(s, label_over, s->code_ptr);
922 } else {
923 /* When the destination does not overlap one of the arguments,
924 clear the destination first, jump if cond false, and emit an
925 increment in the true case. This results in smaller code. */
926
927 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
928
929 label_over = gen_new_label();
930 new_args[4] = tcg_invert_cond(new_args[4]);
931 new_args[5] = label_over;
932 tcg_out_brcond2(s, new_args, const_args+1, 1);
933
934 tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
935 tcg_out_label(s, label_over, s->code_ptr);
936 }
937 }
938 #endif
939
940 static void tcg_out_branch(TCGContext *s, int call, tcg_target_long dest)
941 {
942 tcg_target_long disp = dest - (tcg_target_long)s->code_ptr - 5;
943
944 if (disp == (int32_t)disp) {
945 tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
946 tcg_out32(s, disp);
947 } else {
948 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, dest);
949 tcg_out_modrm(s, OPC_GRP5,
950 call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10);
951 }
952 }
953
954 static inline void tcg_out_calli(TCGContext *s, tcg_target_long dest)
955 {
956 tcg_out_branch(s, 1, dest);
957 }
958
959 static void tcg_out_jmp(TCGContext *s, tcg_target_long dest)
960 {
961 tcg_out_branch(s, 0, dest);
962 }
963
964 #if defined(CONFIG_SOFTMMU)
965
966 #include "../../softmmu_defs.h"
967
968 #ifdef CONFIG_TCG_PASS_AREG0
969 /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
970 int mmu_idx) */
971 static const void *qemu_ld_helpers[4] = {
972 helper_ldb_mmu,
973 helper_ldw_mmu,
974 helper_ldl_mmu,
975 helper_ldq_mmu,
976 };
977
978 /* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
979 uintxx_t val, int mmu_idx) */
980 static const void *qemu_st_helpers[4] = {
981 helper_stb_mmu,
982 helper_stw_mmu,
983 helper_stl_mmu,
984 helper_stq_mmu,
985 };
986 #else
987 /* legacy helper signature: __ld_mmu(target_ulong addr, int
988 mmu_idx) */
989 static void *qemu_ld_helpers[4] = {
990 __ldb_mmu,
991 __ldw_mmu,
992 __ldl_mmu,
993 __ldq_mmu,
994 };
995
996 /* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
997 int mmu_idx) */
998 static void *qemu_st_helpers[4] = {
999 __stb_mmu,
1000 __stw_mmu,
1001 __stl_mmu,
1002 __stq_mmu,
1003 };
1004 #endif
1005
1006 /* Perform the TLB load and compare.
1007
1008 Inputs:
1009 ADDRLO_IDX contains the index into ARGS of the low part of the
1010 address; the high part of the address is at ADDR_LOW_IDX+1.
1011
1012 MEM_INDEX and S_BITS are the memory context and log2 size of the load.
1013
1014 WHICH is the offset into the CPUTLBEntry structure of the slot to read.
1015 This should be offsetof addr_read or addr_write.
1016
1017 Outputs:
1018 LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
1019 positions of the displacements of forward jumps to the TLB miss case.
1020
1021 First argument register is loaded with the low part of the address.
1022 In the TLB hit case, it has been adjusted as indicated by the TLB
1023 and so is a host address. In the TLB miss case, it continues to
1024 hold a guest address.
1025
1026 Second argument register is clobbered. */
1027
1028 static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
1029 int mem_index, int s_bits,
1030 const TCGArg *args,
1031 uint8_t **label_ptr, int which)
1032 {
1033 const int addrlo = args[addrlo_idx];
1034 const int r0 = tcg_target_call_iarg_regs[0];
1035 const int r1 = tcg_target_call_iarg_regs[1];
1036 TCGType type = TCG_TYPE_I32;
1037 int rexw = 0;
1038
1039 if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 64) {
1040 type = TCG_TYPE_I64;
1041 rexw = P_REXW;
1042 }
1043
1044 tcg_out_mov(s, type, r1, addrlo);
1045 tcg_out_mov(s, type, r0, addrlo);
1046
1047 tcg_out_shifti(s, SHIFT_SHR + rexw, r1,
1048 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1049
1050 tgen_arithi(s, ARITH_AND + rexw, r0,
1051 TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
1052 tgen_arithi(s, ARITH_AND + rexw, r1,
1053 (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
1054
1055 tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, r1, TCG_AREG0, r1, 0,
1056 offsetof(CPUArchState, tlb_table[mem_index][0])
1057 + which);
1058
1059 /* cmp 0(r1), r0 */
1060 tcg_out_modrm_offset(s, OPC_CMP_GvEv + rexw, r0, r1, 0);
1061
1062 tcg_out_mov(s, type, r0, addrlo);
1063
1064 /* jne label1 */
1065 tcg_out8(s, OPC_JCC_short + JCC_JNE);
1066 label_ptr[0] = s->code_ptr;
1067 s->code_ptr++;
1068
1069 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1070 /* cmp 4(r1), addrhi */
1071 tcg_out_modrm_offset(s, OPC_CMP_GvEv, args[addrlo_idx+1], r1, 4);
1072
1073 /* jne label1 */
1074 tcg_out8(s, OPC_JCC_short + JCC_JNE);
1075 label_ptr[1] = s->code_ptr;
1076 s->code_ptr++;
1077 }
1078
1079 /* TLB Hit. */
1080
1081 /* add addend(r1), r0 */
1082 tcg_out_modrm_offset(s, OPC_ADD_GvEv + P_REXW, r0, r1,
1083 offsetof(CPUTLBEntry, addend) - which);
1084 }
1085 #endif
1086
1087 static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
1088 int base, tcg_target_long ofs, int sizeop)
1089 {
1090 #ifdef TARGET_WORDS_BIGENDIAN
1091 const int bswap = 1;
1092 #else
1093 const int bswap = 0;
1094 #endif
1095 switch (sizeop) {
1096 case 0:
1097 tcg_out_modrm_offset(s, OPC_MOVZBL, datalo, base, ofs);
1098 break;
1099 case 0 | 4:
1100 tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW, datalo, base, ofs);
1101 break;
1102 case 1:
1103 tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
1104 if (bswap) {
1105 tcg_out_rolw_8(s, datalo);
1106 }
1107 break;
1108 case 1 | 4:
1109 if (bswap) {
1110 tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
1111 tcg_out_rolw_8(s, datalo);
1112 tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
1113 } else {
1114 tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW, datalo, base, ofs);
1115 }
1116 break;
1117 case 2:
1118 tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1119 if (bswap) {
1120 tcg_out_bswap32(s, datalo);
1121 }
1122 break;
1123 #if TCG_TARGET_REG_BITS == 64
1124 case 2 | 4:
1125 if (bswap) {
1126 tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1127 tcg_out_bswap32(s, datalo);
1128 tcg_out_ext32s(s, datalo, datalo);
1129 } else {
1130 tcg_out_modrm_offset(s, OPC_MOVSLQ, datalo, base, ofs);
1131 }
1132 break;
1133 #endif
1134 case 3:
1135 if (TCG_TARGET_REG_BITS == 64) {
1136 tcg_out_ld(s, TCG_TYPE_I64, datalo, base, ofs);
1137 if (bswap) {
1138 tcg_out_bswap64(s, datalo);
1139 }
1140 } else {
1141 if (bswap) {
1142 int t = datalo;
1143 datalo = datahi;
1144 datahi = t;
1145 }
1146 if (base != datalo) {
1147 tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1148 tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1149 } else {
1150 tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1151 tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1152 }
1153 if (bswap) {
1154 tcg_out_bswap32(s, datalo);
1155 tcg_out_bswap32(s, datahi);
1156 }
1157 }
1158 break;
1159 default:
1160 tcg_abort();
1161 }
1162 }
1163
1164 /* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
1165 EAX. It will be useful once fixed registers globals are less
1166 common. */
1167 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
1168 int opc)
1169 {
1170 int data_reg, data_reg2 = 0;
1171 int addrlo_idx;
1172 #if defined(CONFIG_SOFTMMU)
1173 int mem_index, s_bits;
1174 #if TCG_TARGET_REG_BITS == 64
1175 int arg_idx;
1176 #else
1177 int stack_adjust;
1178 #endif
1179 uint8_t *label_ptr[3];
1180 #endif
1181
1182 data_reg = args[0];
1183 addrlo_idx = 1;
1184 if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
1185 data_reg2 = args[1];
1186 addrlo_idx = 2;
1187 }
1188
1189 #if defined(CONFIG_SOFTMMU)
1190 mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
1191 s_bits = opc & 3;
1192
1193 tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
1194 label_ptr, offsetof(CPUTLBEntry, addr_read));
1195
1196 /* TLB Hit. */
1197 tcg_out_qemu_ld_direct(s, data_reg, data_reg2,
1198 tcg_target_call_iarg_regs[0], 0, opc);
1199
1200 /* jmp label2 */
1201 tcg_out8(s, OPC_JMP_short);
1202 label_ptr[2] = s->code_ptr;
1203 s->code_ptr++;
1204
1205 /* TLB Miss. */
1206
1207 /* label1: */
1208 *label_ptr[0] = s->code_ptr - label_ptr[0] - 1;
1209 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1210 *label_ptr[1] = s->code_ptr - label_ptr[1] - 1;
1211 }
1212
1213 /* XXX: move that code at the end of the TB */
1214 #if TCG_TARGET_REG_BITS == 32
1215 tcg_out_pushi(s, mem_index);
1216 stack_adjust = 4;
1217 if (TARGET_LONG_BITS == 64) {
1218 tcg_out_push(s, args[addrlo_idx + 1]);
1219 stack_adjust += 4;
1220 }
1221 tcg_out_push(s, args[addrlo_idx]);
1222 stack_adjust += 4;
1223 #ifdef CONFIG_TCG_PASS_AREG0
1224 tcg_out_push(s, TCG_AREG0);
1225 stack_adjust += 4;
1226 #endif
1227 #else
1228 /* The first argument is already loaded with addrlo. */
1229 arg_idx = 1;
1230 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx],
1231 mem_index);
1232 #ifdef CONFIG_TCG_PASS_AREG0
1233 /* XXX/FIXME: suboptimal */
1234 tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3],
1235 tcg_target_call_iarg_regs[2]);
1236 tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
1237 tcg_target_call_iarg_regs[1]);
1238 tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1],
1239 tcg_target_call_iarg_regs[0]);
1240 tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
1241 TCG_AREG0);
1242 #endif
1243 #endif
1244
1245 tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]);
1246
1247 #if TCG_TARGET_REG_BITS == 32
1248 if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) {
1249 /* Pop and discard. This is 2 bytes smaller than the add. */
1250 tcg_out_pop(s, TCG_REG_ECX);
1251 } else if (stack_adjust != 0) {
1252 tcg_out_addi(s, TCG_REG_CALL_STACK, stack_adjust);
1253 }
1254 #endif
1255
1256 switch(opc) {
1257 case 0 | 4:
1258 tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
1259 break;
1260 case 1 | 4:
1261 tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
1262 break;
1263 case 0:
1264 tcg_out_ext8u(s, data_reg, TCG_REG_EAX);
1265 break;
1266 case 1:
1267 tcg_out_ext16u(s, data_reg, TCG_REG_EAX);
1268 break;
1269 case 2:
1270 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1271 break;
1272 #if TCG_TARGET_REG_BITS == 64
1273 case 2 | 4:
1274 tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
1275 break;
1276 #endif
1277 case 3:
1278 if (TCG_TARGET_REG_BITS == 64) {
1279 tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
1280 } else if (data_reg == TCG_REG_EDX) {
1281 /* xchg %edx, %eax */
1282 tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
1283 tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EAX);
1284 } else {
1285 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1286 tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EDX);
1287 }
1288 break;
1289 default:
1290 tcg_abort();
1291 }
1292
1293 /* label2: */
1294 *label_ptr[2] = s->code_ptr - label_ptr[2] - 1;
1295 #else
1296 {
1297 int32_t offset = GUEST_BASE;
1298 int base = args[addrlo_idx];
1299
1300 if (TCG_TARGET_REG_BITS == 64) {
1301 /* ??? We assume all operations have left us with register
1302 contents that are zero extended. So far this appears to
1303 be true. If we want to enforce this, we can either do
1304 an explicit zero-extension here, or (if GUEST_BASE == 0)
1305 use the ADDR32 prefix. For now, do nothing. */
1306
1307 if (offset != GUEST_BASE) {
1308 tcg_out_movi(s, TCG_TYPE_I64,
1309 tcg_target_call_iarg_regs[0], GUEST_BASE);
1310 tgen_arithr(s, ARITH_ADD + P_REXW,
1311 tcg_target_call_iarg_regs[0], base);
1312 base = tcg_target_call_iarg_regs[0];
1313 offset = 0;
1314 }
1315 }
1316
1317 tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, opc);
1318 }
1319 #endif
1320 }
1321
1322 static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
1323 int base, tcg_target_long ofs, int sizeop)
1324 {
1325 #ifdef TARGET_WORDS_BIGENDIAN
1326 const int bswap = 1;
1327 #else
1328 const int bswap = 0;
1329 #endif
1330 /* ??? Ideally we wouldn't need a scratch register. For user-only,
1331 we could perform the bswap twice to restore the original value
1332 instead of moving to the scratch. But as it is, the L constraint
1333 means that the second argument reg is definitely free here. */
1334 int scratch = tcg_target_call_iarg_regs[1];
1335
1336 switch (sizeop) {
1337 case 0:
1338 tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R, datalo, base, ofs);
1339 break;
1340 case 1:
1341 if (bswap) {
1342 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1343 tcg_out_rolw_8(s, scratch);
1344 datalo = scratch;
1345 }
1346 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16, datalo, base, ofs);
1347 break;
1348 case 2:
1349 if (bswap) {
1350 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1351 tcg_out_bswap32(s, scratch);
1352 datalo = scratch;
1353 }
1354 tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
1355 break;
1356 case 3:
1357 if (TCG_TARGET_REG_BITS == 64) {
1358 if (bswap) {
1359 tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
1360 tcg_out_bswap64(s, scratch);
1361 datalo = scratch;
1362 }
1363 tcg_out_st(s, TCG_TYPE_I64, datalo, base, ofs);
1364 } else if (bswap) {
1365 tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
1366 tcg_out_bswap32(s, scratch);
1367 tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs);
1368 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1369 tcg_out_bswap32(s, scratch);
1370 tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs + 4);
1371 } else {
1372 tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
1373 tcg_out_st(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1374 }
1375 break;
1376 default:
1377 tcg_abort();
1378 }
1379 }
1380
1381 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
1382 int opc)
1383 {
1384 int data_reg, data_reg2 = 0;
1385 int addrlo_idx;
1386 #if defined(CONFIG_SOFTMMU)
1387 int mem_index, s_bits;
1388 int stack_adjust;
1389 uint8_t *label_ptr[3];
1390 #endif
1391
1392 data_reg = args[0];
1393 addrlo_idx = 1;
1394 if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
1395 data_reg2 = args[1];
1396 addrlo_idx = 2;
1397 }
1398
1399 #if defined(CONFIG_SOFTMMU)
1400 mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
1401 s_bits = opc;
1402
1403 tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
1404 label_ptr, offsetof(CPUTLBEntry, addr_write));
1405
1406 /* TLB Hit. */
1407 tcg_out_qemu_st_direct(s, data_reg, data_reg2,
1408 tcg_target_call_iarg_regs[0], 0, opc);
1409
1410 /* jmp label2 */
1411 tcg_out8(s, OPC_JMP_short);
1412 label_ptr[2] = s->code_ptr;
1413 s->code_ptr++;
1414
1415 /* TLB Miss. */
1416
1417 /* label1: */
1418 *label_ptr[0] = s->code_ptr - label_ptr[0] - 1;
1419 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1420 *label_ptr[1] = s->code_ptr - label_ptr[1] - 1;
1421 }
1422
1423 /* XXX: move that code at the end of the TB */
1424 #if TCG_TARGET_REG_BITS == 32
1425 tcg_out_pushi(s, mem_index);
1426 stack_adjust = 4;
1427 if (opc == 3) {
1428 tcg_out_push(s, data_reg2);
1429 stack_adjust += 4;
1430 }
1431 tcg_out_push(s, data_reg);
1432 stack_adjust += 4;
1433 if (TARGET_LONG_BITS == 64) {
1434 tcg_out_push(s, args[addrlo_idx + 1]);
1435 stack_adjust += 4;
1436 }
1437 tcg_out_push(s, args[addrlo_idx]);
1438 stack_adjust += 4;
1439 #ifdef CONFIG_TCG_PASS_AREG0
1440 tcg_out_push(s, TCG_AREG0);
1441 stack_adjust += 4;
1442 #endif
1443 #else
1444 tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
1445 tcg_target_call_iarg_regs[1], data_reg);
1446 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], mem_index);
1447 stack_adjust = 0;
1448 #ifdef CONFIG_TCG_PASS_AREG0
1449 /* XXX/FIXME: suboptimal */
1450 tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3],
1451 tcg_target_call_iarg_regs[2]);
1452 tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
1453 tcg_target_call_iarg_regs[1]);
1454 tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1],
1455 tcg_target_call_iarg_regs[0]);
1456 tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
1457 TCG_AREG0);
1458 #endif
1459 #endif
1460
1461 tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]);
1462
1463 if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) {
1464 /* Pop and discard. This is 2 bytes smaller than the add. */
1465 tcg_out_pop(s, TCG_REG_ECX);
1466 } else if (stack_adjust != 0) {
1467 tcg_out_addi(s, TCG_REG_CALL_STACK, stack_adjust);
1468 }
1469
1470 /* label2: */
1471 *label_ptr[2] = s->code_ptr - label_ptr[2] - 1;
1472 #else
1473 {
1474 int32_t offset = GUEST_BASE;
1475 int base = args[addrlo_idx];
1476
1477 if (TCG_TARGET_REG_BITS == 64) {
1478 /* ??? We assume all operations have left us with register
1479 contents that are zero extended. So far this appears to
1480 be true. If we want to enforce this, we can either do
1481 an explicit zero-extension here, or (if GUEST_BASE == 0)
1482 use the ADDR32 prefix. For now, do nothing. */
1483
1484 if (offset != GUEST_BASE) {
1485 tcg_out_movi(s, TCG_TYPE_I64,
1486 tcg_target_call_iarg_regs[0], GUEST_BASE);
1487 tgen_arithr(s, ARITH_ADD + P_REXW,
1488 tcg_target_call_iarg_regs[0], base);
1489 base = tcg_target_call_iarg_regs[0];
1490 offset = 0;
1491 }
1492 }
1493
1494 tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, opc);
1495 }
1496 #endif
1497 }
1498
1499 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1500 const TCGArg *args, const int *const_args)
1501 {
1502 int c, rexw = 0;
1503
1504 #if TCG_TARGET_REG_BITS == 64
1505 # define OP_32_64(x) \
1506 case glue(glue(INDEX_op_, x), _i64): \
1507 rexw = P_REXW; /* FALLTHRU */ \
1508 case glue(glue(INDEX_op_, x), _i32)
1509 #else
1510 # define OP_32_64(x) \
1511 case glue(glue(INDEX_op_, x), _i32)
1512 #endif
1513
1514 switch(opc) {
1515 case INDEX_op_exit_tb:
1516 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]);
1517 tcg_out_jmp(s, (tcg_target_long) tb_ret_addr);
1518 break;
1519 case INDEX_op_goto_tb:
1520 if (s->tb_jmp_offset) {
1521 /* direct jump method */
1522 tcg_out8(s, OPC_JMP_long); /* jmp im */
1523 s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
1524 tcg_out32(s, 0);
1525 } else {
1526 /* indirect jump method */
1527 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
1528 (tcg_target_long)(s->tb_next + args[0]));
1529 }
1530 s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
1531 break;
1532 case INDEX_op_call:
1533 if (const_args[0]) {
1534 tcg_out_calli(s, args[0]);
1535 } else {
1536 /* call *reg */
1537 tcg_out_modrm(s, OPC_GRP5, EXT5_CALLN_Ev, args[0]);
1538 }
1539 break;
1540 case INDEX_op_jmp:
1541 if (const_args[0]) {
1542 tcg_out_jmp(s, args[0]);
1543 } else {
1544 /* jmp *reg */
1545 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, args[0]);
1546 }
1547 break;
1548 case INDEX_op_br:
1549 tcg_out_jxx(s, JCC_JMP, args[0], 0);
1550 break;
1551 case INDEX_op_movi_i32:
1552 tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
1553 break;
1554 OP_32_64(ld8u):
1555 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1556 tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
1557 break;
1558 OP_32_64(ld8s):
1559 tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]);
1560 break;
1561 OP_32_64(ld16u):
1562 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1563 tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
1564 break;
1565 OP_32_64(ld16s):
1566 tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]);
1567 break;
1568 #if TCG_TARGET_REG_BITS == 64
1569 case INDEX_op_ld32u_i64:
1570 #endif
1571 case INDEX_op_ld_i32:
1572 tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1573 break;
1574
1575 OP_32_64(st8):
1576 tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
1577 args[0], args[1], args[2]);
1578 break;
1579 OP_32_64(st16):
1580 tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
1581 args[0], args[1], args[2]);
1582 break;
1583 #if TCG_TARGET_REG_BITS == 64
1584 case INDEX_op_st32_i64:
1585 #endif
1586 case INDEX_op_st_i32:
1587 tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1588 break;
1589
1590 OP_32_64(add):
1591 /* For 3-operand addition, use LEA. */
1592 if (args[0] != args[1]) {
1593 TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
1594
1595 if (const_args[2]) {
1596 c3 = a2, a2 = -1;
1597 } else if (a0 == a2) {
1598 /* Watch out for dest = src + dest, since we've removed
1599 the matching constraint on the add. */
1600 tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
1601 break;
1602 }
1603
1604 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
1605 break;
1606 }
1607 c = ARITH_ADD;
1608 goto gen_arith;
1609 OP_32_64(sub):
1610 c = ARITH_SUB;
1611 goto gen_arith;
1612 OP_32_64(and):
1613 c = ARITH_AND;
1614 goto gen_arith;
1615 OP_32_64(or):
1616 c = ARITH_OR;
1617 goto gen_arith;
1618 OP_32_64(xor):
1619 c = ARITH_XOR;
1620 goto gen_arith;
1621 gen_arith:
1622 if (const_args[2]) {
1623 tgen_arithi(s, c + rexw, args[0], args[2], 0);
1624 } else {
1625 tgen_arithr(s, c + rexw, args[0], args[2]);
1626 }
1627 break;
1628
1629 OP_32_64(mul):
1630 if (const_args[2]) {
1631 int32_t val;
1632 val = args[2];
1633 if (val == (int8_t)val) {
1634 tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]);
1635 tcg_out8(s, val);
1636 } else {
1637 tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]);
1638 tcg_out32(s, val);
1639 }
1640 } else {
1641 tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]);
1642 }
1643 break;
1644
1645 OP_32_64(div2):
1646 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
1647 break;
1648 OP_32_64(divu2):
1649 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
1650 break;
1651
1652 OP_32_64(shl):
1653 c = SHIFT_SHL;
1654 goto gen_shift;
1655 OP_32_64(shr):
1656 c = SHIFT_SHR;
1657 goto gen_shift;
1658 OP_32_64(sar):
1659 c = SHIFT_SAR;
1660 goto gen_shift;
1661 OP_32_64(rotl):
1662 c = SHIFT_ROL;
1663 goto gen_shift;
1664 OP_32_64(rotr):
1665 c = SHIFT_ROR;
1666 goto gen_shift;
1667 gen_shift:
1668 if (const_args[2]) {
1669 tcg_out_shifti(s, c + rexw, args[0], args[2]);
1670 } else {
1671 tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]);
1672 }
1673 break;
1674
1675 case INDEX_op_brcond_i32:
1676 tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
1677 args[3], 0);
1678 break;
1679 case INDEX_op_setcond_i32:
1680 tcg_out_setcond32(s, args[3], args[0], args[1],
1681 args[2], const_args[2]);
1682 break;
1683
1684 OP_32_64(bswap16):
1685 tcg_out_rolw_8(s, args[0]);
1686 break;
1687 OP_32_64(bswap32):
1688 tcg_out_bswap32(s, args[0]);
1689 break;
1690
1691 OP_32_64(neg):
1692 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]);
1693 break;
1694 OP_32_64(not):
1695 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]);
1696 break;
1697
1698 OP_32_64(ext8s):
1699 tcg_out_ext8s(s, args[0], args[1], rexw);
1700 break;
1701 OP_32_64(ext16s):
1702 tcg_out_ext16s(s, args[0], args[1], rexw);
1703 break;
1704 OP_32_64(ext8u):
1705 tcg_out_ext8u(s, args[0], args[1]);
1706 break;
1707 OP_32_64(ext16u):
1708 tcg_out_ext16u(s, args[0], args[1]);
1709 break;
1710
1711 case INDEX_op_qemu_ld8u:
1712 tcg_out_qemu_ld(s, args, 0);
1713 break;
1714 case INDEX_op_qemu_ld8s:
1715 tcg_out_qemu_ld(s, args, 0 | 4);
1716 break;
1717 case INDEX_op_qemu_ld16u:
1718 tcg_out_qemu_ld(s, args, 1);
1719 break;
1720 case INDEX_op_qemu_ld16s:
1721 tcg_out_qemu_ld(s, args, 1 | 4);
1722 break;
1723 #if TCG_TARGET_REG_BITS == 64
1724 case INDEX_op_qemu_ld32u:
1725 #endif
1726 case INDEX_op_qemu_ld32:
1727 tcg_out_qemu_ld(s, args, 2);
1728 break;
1729 case INDEX_op_qemu_ld64:
1730 tcg_out_qemu_ld(s, args, 3);
1731 break;
1732
1733 case INDEX_op_qemu_st8:
1734 tcg_out_qemu_st(s, args, 0);
1735 break;
1736 case INDEX_op_qemu_st16:
1737 tcg_out_qemu_st(s, args, 1);
1738 break;
1739 case INDEX_op_qemu_st32:
1740 tcg_out_qemu_st(s, args, 2);
1741 break;
1742 case INDEX_op_qemu_st64:
1743 tcg_out_qemu_st(s, args, 3);
1744 break;
1745
1746 #if TCG_TARGET_REG_BITS == 32
1747 case INDEX_op_brcond2_i32:
1748 tcg_out_brcond2(s, args, const_args, 0);
1749 break;
1750 case INDEX_op_setcond2_i32:
1751 tcg_out_setcond2(s, args, const_args);
1752 break;
1753 case INDEX_op_mulu2_i32:
1754 tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_MUL, args[3]);
1755 break;
1756 case INDEX_op_add2_i32:
1757 if (const_args[4]) {
1758 tgen_arithi(s, ARITH_ADD, args[0], args[4], 1);
1759 } else {
1760 tgen_arithr(s, ARITH_ADD, args[0], args[4]);
1761 }
1762 if (const_args[5]) {
1763 tgen_arithi(s, ARITH_ADC, args[1], args[5], 1);
1764 } else {
1765 tgen_arithr(s, ARITH_ADC, args[1], args[5]);
1766 }
1767 break;
1768 case INDEX_op_sub2_i32:
1769 if (const_args[4]) {
1770 tgen_arithi(s, ARITH_SUB, args[0], args[4], 1);
1771 } else {
1772 tgen_arithr(s, ARITH_SUB, args[0], args[4]);
1773 }
1774 if (const_args[5]) {
1775 tgen_arithi(s, ARITH_SBB, args[1], args[5], 1);
1776 } else {
1777 tgen_arithr(s, ARITH_SBB, args[1], args[5]);
1778 }
1779 break;
1780 #else /* TCG_TARGET_REG_BITS == 64 */
1781 case INDEX_op_movi_i64:
1782 tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
1783 break;
1784 case INDEX_op_ld32s_i64:
1785 tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]);
1786 break;
1787 case INDEX_op_ld_i64:
1788 tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1789 break;
1790 case INDEX_op_st_i64:
1791 tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1792 break;
1793 case INDEX_op_qemu_ld32s:
1794 tcg_out_qemu_ld(s, args, 2 | 4);
1795 break;
1796
1797 case INDEX_op_brcond_i64:
1798 tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1],
1799 args[3], 0);
1800 break;
1801 case INDEX_op_setcond_i64:
1802 tcg_out_setcond64(s, args[3], args[0], args[1],
1803 args[2], const_args[2]);
1804 break;
1805
1806 case INDEX_op_bswap64_i64:
1807 tcg_out_bswap64(s, args[0]);
1808 break;
1809 case INDEX_op_ext32u_i64:
1810 tcg_out_ext32u(s, args[0], args[1]);
1811 break;
1812 case INDEX_op_ext32s_i64:
1813 tcg_out_ext32s(s, args[0], args[1]);
1814 break;
1815 #endif
1816
1817 OP_32_64(deposit):
1818 if (args[3] == 0 && args[4] == 8) {
1819 /* load bits 0..7 */
1820 tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM,
1821 args[2], args[0]);
1822 } else if (args[3] == 8 && args[4] == 8) {
1823 /* load bits 8..15 */
1824 tcg_out_modrm(s, OPC_MOVB_EvGv, args[2], args[0] + 4);
1825 } else if (args[3] == 0 && args[4] == 16) {
1826 /* load bits 0..15 */
1827 tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, args[2], args[0]);
1828 } else {
1829 tcg_abort();
1830 }
1831 break;
1832
1833 default:
1834 tcg_abort();
1835 }
1836
1837 #undef OP_32_64
1838 }
1839
1840 static const TCGTargetOpDef x86_op_defs[] = {
1841 { INDEX_op_exit_tb, { } },
1842 { INDEX_op_goto_tb, { } },
1843 { INDEX_op_call, { "ri" } },
1844 { INDEX_op_jmp, { "ri" } },
1845 { INDEX_op_br, { } },
1846 { INDEX_op_mov_i32, { "r", "r" } },
1847 { INDEX_op_movi_i32, { "r" } },
1848 { INDEX_op_ld8u_i32, { "r", "r" } },
1849 { INDEX_op_ld8s_i32, { "r", "r" } },
1850 { INDEX_op_ld16u_i32, { "r", "r" } },
1851 { INDEX_op_ld16s_i32, { "r", "r" } },
1852 { INDEX_op_ld_i32, { "r", "r" } },
1853 { INDEX_op_st8_i32, { "q", "r" } },
1854 { INDEX_op_st16_i32, { "r", "r" } },
1855 { INDEX_op_st_i32, { "r", "r" } },
1856
1857 { INDEX_op_add_i32, { "r", "r", "ri" } },
1858 { INDEX_op_sub_i32, { "r", "0", "ri" } },
1859 { INDEX_op_mul_i32, { "r", "0", "ri" } },
1860 { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } },
1861 { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } },
1862 { INDEX_op_and_i32, { "r", "0", "ri" } },
1863 { INDEX_op_or_i32, { "r", "0", "ri" } },
1864 { INDEX_op_xor_i32, { "r", "0", "ri" } },
1865
1866 { INDEX_op_shl_i32, { "r", "0", "ci" } },
1867 { INDEX_op_shr_i32, { "r", "0", "ci" } },
1868 { INDEX_op_sar_i32, { "r", "0", "ci" } },
1869 { INDEX_op_rotl_i32, { "r", "0", "ci" } },
1870 { INDEX_op_rotr_i32, { "r", "0", "ci" } },
1871
1872 { INDEX_op_brcond_i32, { "r", "ri" } },
1873
1874 { INDEX_op_bswap16_i32, { "r", "0" } },
1875 { INDEX_op_bswap32_i32, { "r", "0" } },
1876
1877 { INDEX_op_neg_i32, { "r", "0" } },
1878
1879 { INDEX_op_not_i32, { "r", "0" } },
1880
1881 { INDEX_op_ext8s_i32, { "r", "q" } },
1882 { INDEX_op_ext16s_i32, { "r", "r" } },
1883 { INDEX_op_ext8u_i32, { "r", "q" } },
1884 { INDEX_op_ext16u_i32, { "r", "r" } },
1885
1886 { INDEX_op_setcond_i32, { "q", "r", "ri" } },
1887
1888 { INDEX_op_deposit_i32, { "Q", "0", "Q" } },
1889
1890 #if TCG_TARGET_REG_BITS == 32
1891 { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
1892 { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
1893 { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
1894 { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
1895 { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
1896 #else
1897 { INDEX_op_mov_i64, { "r", "r" } },
1898 { INDEX_op_movi_i64, { "r" } },
1899 { INDEX_op_ld8u_i64, { "r", "r" } },
1900 { INDEX_op_ld8s_i64, { "r", "r" } },
1901 { INDEX_op_ld16u_i64, { "r", "r" } },
1902 { INDEX_op_ld16s_i64, { "r", "r" } },
1903 { INDEX_op_ld32u_i64, { "r", "r" } },
1904 { INDEX_op_ld32s_i64, { "r", "r" } },
1905 { INDEX_op_ld_i64, { "r", "r" } },
1906 { INDEX_op_st8_i64, { "r", "r" } },
1907 { INDEX_op_st16_i64, { "r", "r" } },
1908 { INDEX_op_st32_i64, { "r", "r" } },
1909 { INDEX_op_st_i64, { "r", "r" } },
1910
1911 { INDEX_op_add_i64, { "r", "0", "re" } },
1912 { INDEX_op_mul_i64, { "r", "0", "re" } },
1913 { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },
1914 { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },
1915 { INDEX_op_sub_i64, { "r", "0", "re" } },
1916 { INDEX_op_and_i64, { "r", "0", "reZ" } },
1917 { INDEX_op_or_i64, { "r", "0", "re" } },
1918 { INDEX_op_xor_i64, { "r", "0", "re" } },
1919
1920 { INDEX_op_shl_i64, { "r", "0", "ci" } },
1921 { INDEX_op_shr_i64, { "r", "0", "ci" } },
1922 { INDEX_op_sar_i64, { "r", "0", "ci" } },
1923 { INDEX_op_rotl_i64, { "r", "0", "ci" } },
1924 { INDEX_op_rotr_i64, { "r", "0", "ci" } },
1925
1926 { INDEX_op_brcond_i64, { "r", "re" } },
1927 { INDEX_op_setcond_i64, { "r", "r", "re" } },
1928
1929 { INDEX_op_bswap16_i64, { "r", "0" } },
1930 { INDEX_op_bswap32_i64, { "r", "0" } },
1931 { INDEX_op_bswap64_i64, { "r", "0" } },
1932 { INDEX_op_neg_i64, { "r", "0" } },
1933 { INDEX_op_not_i64, { "r", "0" } },
1934
1935 { INDEX_op_ext8s_i64, { "r", "r" } },
1936 { INDEX_op_ext16s_i64, { "r", "r" } },
1937 { INDEX_op_ext32s_i64, { "r", "r" } },
1938 { INDEX_op_ext8u_i64, { "r", "r" } },
1939 { INDEX_op_ext16u_i64, { "r", "r" } },
1940 { INDEX_op_ext32u_i64, { "r", "r" } },
1941
1942 { INDEX_op_deposit_i64, { "Q", "0", "Q" } },
1943 #endif
1944
1945 #if TCG_TARGET_REG_BITS == 64
1946 { INDEX_op_qemu_ld8u, { "r", "L" } },
1947 { INDEX_op_qemu_ld8s, { "r", "L" } },
1948 { INDEX_op_qemu_ld16u, { "r", "L" } },
1949 { INDEX_op_qemu_ld16s, { "r", "L" } },
1950 { INDEX_op_qemu_ld32, { "r", "L" } },
1951 { INDEX_op_qemu_ld32u, { "r", "L" } },
1952 { INDEX_op_qemu_ld32s, { "r", "L" } },
1953 { INDEX_op_qemu_ld64, { "r", "L" } },
1954
1955 { INDEX_op_qemu_st8, { "L", "L" } },
1956 { INDEX_op_qemu_st16, { "L", "L" } },
1957 { INDEX_op_qemu_st32, { "L", "L" } },
1958 { INDEX_op_qemu_st64, { "L", "L" } },
1959 #elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
1960 { INDEX_op_qemu_ld8u, { "r", "L" } },
1961 { INDEX_op_qemu_ld8s, { "r", "L" } },
1962 { INDEX_op_qemu_ld16u, { "r", "L" } },
1963 { INDEX_op_qemu_ld16s, { "r", "L" } },
1964 { INDEX_op_qemu_ld32, { "r", "L" } },
1965 { INDEX_op_qemu_ld64, { "r", "r", "L" } },
1966
1967 { INDEX_op_qemu_st8, { "cb", "L" } },
1968 { INDEX_op_qemu_st16, { "L", "L" } },
1969 { INDEX_op_qemu_st32, { "L", "L" } },
1970 { INDEX_op_qemu_st64, { "L", "L", "L" } },
1971 #else
1972 { INDEX_op_qemu_ld8u, { "r", "L", "L" } },
1973 { INDEX_op_qemu_ld8s, { "r", "L", "L" } },
1974 { INDEX_op_qemu_ld16u, { "r", "L", "L" } },
1975 { INDEX_op_qemu_ld16s, { "r", "L", "L" } },
1976 { INDEX_op_qemu_ld32, { "r", "L", "L" } },
1977 { INDEX_op_qemu_ld64, { "r", "r", "L", "L" } },
1978
1979 { INDEX_op_qemu_st8, { "cb", "L", "L" } },
1980 { INDEX_op_qemu_st16, { "L", "L", "L" } },
1981 { INDEX_op_qemu_st32, { "L", "L", "L" } },
1982 { INDEX_op_qemu_st64, { "L", "L", "L", "L" } },
1983 #endif
1984 { -1 },
1985 };
1986
1987 static int tcg_target_callee_save_regs[] = {
1988 #if TCG_TARGET_REG_BITS == 64
1989 TCG_REG_RBP,
1990 TCG_REG_RBX,
1991 #if defined(_WIN64)
1992 TCG_REG_RDI,
1993 TCG_REG_RSI,
1994 #endif
1995 TCG_REG_R12,
1996 TCG_REG_R13,
1997 TCG_REG_R14, /* Currently used for the global env. */
1998 TCG_REG_R15,
1999 #else
2000 TCG_REG_EBP, /* Currently used for the global env. */
2001 TCG_REG_EBX,
2002 TCG_REG_ESI,
2003 TCG_REG_EDI,
2004 #endif
2005 };
2006
2007 /* Compute frame size via macros, to share between tcg_target_qemu_prologue
2008 and tcg_register_jit. */
2009
2010 #define PUSH_SIZE \
2011 ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \
2012 * (TCG_TARGET_REG_BITS / 8))
2013
2014 #define FRAME_SIZE \
2015 ((PUSH_SIZE \
2016 + TCG_STATIC_CALL_ARGS_SIZE \
2017 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2018 + TCG_TARGET_STACK_ALIGN - 1) \
2019 & ~(TCG_TARGET_STACK_ALIGN - 1))
2020
2021 /* Generate global QEMU prologue and epilogue code */
2022 static void tcg_target_qemu_prologue(TCGContext *s)
2023 {
2024 int i, stack_addend;
2025
2026 /* TB prologue */
2027
2028 /* Reserve some stack space, also for TCG temps. */
2029 stack_addend = FRAME_SIZE - PUSH_SIZE;
2030 tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
2031 CPU_TEMP_BUF_NLONGS * sizeof(long));
2032
2033 /* Save all callee saved registers. */
2034 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
2035 tcg_out_push(s, tcg_target_callee_save_regs[i]);
2036 }
2037
2038 #if TCG_TARGET_REG_BITS == 32
2039 tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
2040 (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
2041 tcg_out_ld(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[1], TCG_REG_ESP,
2042 (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4);
2043 #else
2044 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2045 #endif
2046 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2047
2048 /* jmp *tb. */
2049 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
2050
2051 /* TB epilogue */
2052 tb_ret_addr = s->code_ptr;
2053
2054 tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend);
2055
2056 for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
2057 tcg_out_pop(s, tcg_target_callee_save_regs[i]);
2058 }
2059 tcg_out_opc(s, OPC_RET, 0, 0, 0);
2060 }
2061
2062 static void tcg_target_init(TCGContext *s)
2063 {
2064 #if !defined(CONFIG_USER_ONLY)
2065 /* fail safe */
2066 if ((1 << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry))
2067 tcg_abort();
2068 #endif
2069
2070 if (TCG_TARGET_REG_BITS == 64) {
2071 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
2072 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
2073 } else {
2074 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
2075 }
2076
2077 tcg_regset_clear(tcg_target_call_clobber_regs);
2078 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
2079 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
2080 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
2081 if (TCG_TARGET_REG_BITS == 64) {
2082 #if !defined(_WIN64)
2083 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
2084 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
2085 #endif
2086 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
2087 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
2088 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
2089 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
2090 }
2091
2092 tcg_regset_clear(s->reserved_regs);
2093 tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
2094
2095 tcg_add_target_add_op_defs(x86_op_defs);
2096 }
2097
2098 typedef struct {
2099 uint32_t len __attribute__((aligned((sizeof(void *)))));
2100 uint32_t id;
2101 uint8_t version;
2102 char augmentation[1];
2103 uint8_t code_align;
2104 uint8_t data_align;
2105 uint8_t return_column;
2106 } DebugFrameCIE;
2107
2108 typedef struct {
2109 uint32_t len __attribute__((aligned((sizeof(void *)))));
2110 uint32_t cie_offset;
2111 tcg_target_long func_start __attribute__((packed));
2112 tcg_target_long func_len __attribute__((packed));
2113 uint8_t def_cfa[4];
2114 uint8_t reg_ofs[14];
2115 } DebugFrameFDE;
2116
2117 typedef struct {
2118 DebugFrameCIE cie;
2119 DebugFrameFDE fde;
2120 } DebugFrame;
2121
2122 #if !defined(__ELF__)
2123 /* Host machine without ELF. */
2124 #elif TCG_TARGET_REG_BITS == 64
2125 #define ELF_HOST_MACHINE EM_X86_64
2126 static DebugFrame debug_frame = {
2127 .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2128 .cie.id = -1,
2129 .cie.version = 1,
2130 .cie.code_align = 1,
2131 .cie.data_align = 0x78, /* sleb128 -8 */
2132 .cie.return_column = 16,
2133
2134 .fde.len = sizeof(DebugFrameFDE)-4, /* length after .len member */
2135 .fde.def_cfa = {
2136 12, 7, /* DW_CFA_def_cfa %rsp, ... */
2137 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2138 (FRAME_SIZE >> 7)
2139 },
2140 .fde.reg_ofs = {
2141 0x90, 1, /* DW_CFA_offset, %rip, -8 */
2142 /* The following ordering must match tcg_target_callee_save_regs. */
2143 0x86, 2, /* DW_CFA_offset, %rbp, -16 */
2144 0x83, 3, /* DW_CFA_offset, %rbx, -24 */
2145 0x8c, 4, /* DW_CFA_offset, %r12, -32 */
2146 0x8d, 5, /* DW_CFA_offset, %r13, -40 */
2147 0x8e, 6, /* DW_CFA_offset, %r14, -48 */
2148 0x8f, 7, /* DW_CFA_offset, %r15, -56 */
2149 }
2150 };
2151 #else
2152 #define ELF_HOST_MACHINE EM_386
2153 static DebugFrame debug_frame = {
2154 .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2155 .cie.id = -1,
2156 .cie.version = 1,
2157 .cie.code_align = 1,
2158 .cie.data_align = 0x7c, /* sleb128 -4 */
2159 .cie.return_column = 8,
2160
2161 .fde.len = sizeof(DebugFrameFDE)-4, /* length after .len member */
2162 .fde.def_cfa = {
2163 12, 4, /* DW_CFA_def_cfa %esp, ... */
2164 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2165 (FRAME_SIZE >> 7)
2166 },
2167 .fde.reg_ofs = {
2168 0x88, 1, /* DW_CFA_offset, %eip, -4 */
2169 /* The following ordering must match tcg_target_callee_save_regs. */
2170 0x85, 2, /* DW_CFA_offset, %ebp, -8 */
2171 0x83, 3, /* DW_CFA_offset, %ebx, -12 */
2172 0x86, 4, /* DW_CFA_offset, %esi, -16 */
2173 0x87, 5, /* DW_CFA_offset, %edi, -20 */
2174 }
2175 };
2176 #endif
2177
2178 #if defined(ELF_HOST_MACHINE)
2179 void tcg_register_jit(void *buf, size_t buf_size)
2180 {
2181 /* We're expecting a 2 byte uleb128 encoded value. */
2182 assert(FRAME_SIZE >> 14 == 0);
2183
2184 debug_frame.fde.func_start = (tcg_target_long) buf;
2185 debug_frame.fde.func_len = buf_size;
2186
2187 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2188 }
2189 #endif