]> git.proxmox.com Git - mirror_qemu.git/blob - tcg/i386/tcg-target.c
tcg: Add TCG_COND_NEVER, TCG_COND_ALWAYS
[mirror_qemu.git] / tcg / i386 / tcg-target.c
1 /*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 #ifndef NDEBUG
26 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
27 #if TCG_TARGET_REG_BITS == 64
28 "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
29 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
30 #else
31 "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
32 #endif
33 };
34 #endif
35
36 static const int tcg_target_reg_alloc_order[] = {
37 #if TCG_TARGET_REG_BITS == 64
38 TCG_REG_RBP,
39 TCG_REG_RBX,
40 TCG_REG_R12,
41 TCG_REG_R13,
42 TCG_REG_R14,
43 TCG_REG_R15,
44 TCG_REG_R10,
45 TCG_REG_R11,
46 TCG_REG_R9,
47 TCG_REG_R8,
48 TCG_REG_RCX,
49 TCG_REG_RDX,
50 TCG_REG_RSI,
51 TCG_REG_RDI,
52 TCG_REG_RAX,
53 #else
54 TCG_REG_EBX,
55 TCG_REG_ESI,
56 TCG_REG_EDI,
57 TCG_REG_EBP,
58 TCG_REG_ECX,
59 TCG_REG_EDX,
60 TCG_REG_EAX,
61 #endif
62 };
63
64 static const int tcg_target_call_iarg_regs[] = {
65 #if TCG_TARGET_REG_BITS == 64
66 #if defined(_WIN64)
67 TCG_REG_RCX,
68 TCG_REG_RDX,
69 #else
70 TCG_REG_RDI,
71 TCG_REG_RSI,
72 TCG_REG_RDX,
73 TCG_REG_RCX,
74 #endif
75 TCG_REG_R8,
76 TCG_REG_R9,
77 #else
78 /* 32 bit mode uses stack based calling convention (GCC default). */
79 #endif
80 };
81
82 static const int tcg_target_call_oarg_regs[] = {
83 TCG_REG_EAX,
84 #if TCG_TARGET_REG_BITS == 32
85 TCG_REG_EDX
86 #endif
87 };
88
89 /* Registers used with L constraint, which are the first argument
90 registers on x86_64, and two random call clobbered registers on
91 i386. */
92 #if TCG_TARGET_REG_BITS == 64
93 # define TCG_REG_L0 tcg_target_call_iarg_regs[0]
94 # define TCG_REG_L1 tcg_target_call_iarg_regs[1]
95 # define TCG_REG_L2 tcg_target_call_iarg_regs[2]
96 #else
97 # define TCG_REG_L0 TCG_REG_EAX
98 # define TCG_REG_L1 TCG_REG_EDX
99 #endif
100
101 static uint8_t *tb_ret_addr;
102
103 static void patch_reloc(uint8_t *code_ptr, int type,
104 tcg_target_long value, tcg_target_long addend)
105 {
106 value += addend;
107 switch(type) {
108 case R_386_PC32:
109 value -= (uintptr_t)code_ptr;
110 if (value != (int32_t)value) {
111 tcg_abort();
112 }
113 *(uint32_t *)code_ptr = value;
114 break;
115 case R_386_PC8:
116 value -= (uintptr_t)code_ptr;
117 if (value != (int8_t)value) {
118 tcg_abort();
119 }
120 *(uint8_t *)code_ptr = value;
121 break;
122 default:
123 tcg_abort();
124 }
125 }
126
127 /* parse target specific constraints */
128 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
129 {
130 const char *ct_str;
131
132 ct_str = *pct_str;
133 switch(ct_str[0]) {
134 case 'a':
135 ct->ct |= TCG_CT_REG;
136 tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
137 break;
138 case 'b':
139 ct->ct |= TCG_CT_REG;
140 tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
141 break;
142 case 'c':
143 ct->ct |= TCG_CT_REG;
144 tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
145 break;
146 case 'd':
147 ct->ct |= TCG_CT_REG;
148 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
149 break;
150 case 'S':
151 ct->ct |= TCG_CT_REG;
152 tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
153 break;
154 case 'D':
155 ct->ct |= TCG_CT_REG;
156 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
157 break;
158 case 'q':
159 ct->ct |= TCG_CT_REG;
160 if (TCG_TARGET_REG_BITS == 64) {
161 tcg_regset_set32(ct->u.regs, 0, 0xffff);
162 } else {
163 tcg_regset_set32(ct->u.regs, 0, 0xf);
164 }
165 break;
166 case 'Q':
167 ct->ct |= TCG_CT_REG;
168 tcg_regset_set32(ct->u.regs, 0, 0xf);
169 break;
170 case 'r':
171 ct->ct |= TCG_CT_REG;
172 if (TCG_TARGET_REG_BITS == 64) {
173 tcg_regset_set32(ct->u.regs, 0, 0xffff);
174 } else {
175 tcg_regset_set32(ct->u.regs, 0, 0xff);
176 }
177 break;
178
179 /* qemu_ld/st address constraint */
180 case 'L':
181 ct->ct |= TCG_CT_REG;
182 #if TCG_TARGET_REG_BITS == 64
183 tcg_regset_set32(ct->u.regs, 0, 0xffff);
184 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
185 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
186 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L2);
187 #else
188 tcg_regset_set32(ct->u.regs, 0, 0xff);
189 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
190 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
191 #endif
192 break;
193
194 case 'e':
195 ct->ct |= TCG_CT_CONST_S32;
196 break;
197 case 'Z':
198 ct->ct |= TCG_CT_CONST_U32;
199 break;
200
201 default:
202 return -1;
203 }
204 ct_str++;
205 *pct_str = ct_str;
206 return 0;
207 }
208
209 /* test if a constant matches the constraint */
210 static inline int tcg_target_const_match(tcg_target_long val,
211 const TCGArgConstraint *arg_ct)
212 {
213 int ct = arg_ct->ct;
214 if (ct & TCG_CT_CONST) {
215 return 1;
216 }
217 if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
218 return 1;
219 }
220 if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
221 return 1;
222 }
223 return 0;
224 }
225
226 #if TCG_TARGET_REG_BITS == 64
227 # define LOWREGMASK(x) ((x) & 7)
228 #else
229 # define LOWREGMASK(x) (x)
230 #endif
231
232 #define P_EXT 0x100 /* 0x0f opcode prefix */
233 #define P_DATA16 0x200 /* 0x66 opcode prefix */
234 #if TCG_TARGET_REG_BITS == 64
235 # define P_ADDR32 0x400 /* 0x67 opcode prefix */
236 # define P_REXW 0x800 /* Set REX.W = 1 */
237 # define P_REXB_R 0x1000 /* REG field as byte register */
238 # define P_REXB_RM 0x2000 /* R/M field as byte register */
239 #else
240 # define P_ADDR32 0
241 # define P_REXW 0
242 # define P_REXB_R 0
243 # define P_REXB_RM 0
244 #endif
245
246 #define OPC_ARITH_EvIz (0x81)
247 #define OPC_ARITH_EvIb (0x83)
248 #define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
249 #define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
250 #define OPC_BSWAP (0xc8 | P_EXT)
251 #define OPC_CALL_Jz (0xe8)
252 #define OPC_CMOVCC (0x40 | P_EXT) /* ... plus condition code */
253 #define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
254 #define OPC_DEC_r32 (0x48)
255 #define OPC_IMUL_GvEv (0xaf | P_EXT)
256 #define OPC_IMUL_GvEvIb (0x6b)
257 #define OPC_IMUL_GvEvIz (0x69)
258 #define OPC_INC_r32 (0x40)
259 #define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */
260 #define OPC_JCC_short (0x70) /* ... plus condition code */
261 #define OPC_JMP_long (0xe9)
262 #define OPC_JMP_short (0xeb)
263 #define OPC_LEA (0x8d)
264 #define OPC_MOVB_EvGv (0x88) /* stores, more or less */
265 #define OPC_MOVL_EvGv (0x89) /* stores, more or less */
266 #define OPC_MOVL_GvEv (0x8b) /* loads, more or less */
267 #define OPC_MOVB_EvIz (0xc6)
268 #define OPC_MOVL_EvIz (0xc7)
269 #define OPC_MOVL_Iv (0xb8)
270 #define OPC_MOVSBL (0xbe | P_EXT)
271 #define OPC_MOVSWL (0xbf | P_EXT)
272 #define OPC_MOVSLQ (0x63 | P_REXW)
273 #define OPC_MOVZBL (0xb6 | P_EXT)
274 #define OPC_MOVZWL (0xb7 | P_EXT)
275 #define OPC_POP_r32 (0x58)
276 #define OPC_PUSH_r32 (0x50)
277 #define OPC_PUSH_Iv (0x68)
278 #define OPC_PUSH_Ib (0x6a)
279 #define OPC_RET (0xc3)
280 #define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
281 #define OPC_SHIFT_1 (0xd1)
282 #define OPC_SHIFT_Ib (0xc1)
283 #define OPC_SHIFT_cl (0xd3)
284 #define OPC_TESTL (0x85)
285 #define OPC_XCHG_ax_r32 (0x90)
286
287 #define OPC_GRP3_Ev (0xf7)
288 #define OPC_GRP5 (0xff)
289
290 /* Group 1 opcode extensions for 0x80-0x83.
291 These are also used as modifiers for OPC_ARITH. */
292 #define ARITH_ADD 0
293 #define ARITH_OR 1
294 #define ARITH_ADC 2
295 #define ARITH_SBB 3
296 #define ARITH_AND 4
297 #define ARITH_SUB 5
298 #define ARITH_XOR 6
299 #define ARITH_CMP 7
300
301 /* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */
302 #define SHIFT_ROL 0
303 #define SHIFT_ROR 1
304 #define SHIFT_SHL 4
305 #define SHIFT_SHR 5
306 #define SHIFT_SAR 7
307
308 /* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */
309 #define EXT3_NOT 2
310 #define EXT3_NEG 3
311 #define EXT3_MUL 4
312 #define EXT3_IMUL 5
313 #define EXT3_DIV 6
314 #define EXT3_IDIV 7
315
316 /* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */
317 #define EXT5_INC_Ev 0
318 #define EXT5_DEC_Ev 1
319 #define EXT5_CALLN_Ev 2
320 #define EXT5_JMPN_Ev 4
321
322 /* Condition codes to be added to OPC_JCC_{long,short}. */
323 #define JCC_JMP (-1)
324 #define JCC_JO 0x0
325 #define JCC_JNO 0x1
326 #define JCC_JB 0x2
327 #define JCC_JAE 0x3
328 #define JCC_JE 0x4
329 #define JCC_JNE 0x5
330 #define JCC_JBE 0x6
331 #define JCC_JA 0x7
332 #define JCC_JS 0x8
333 #define JCC_JNS 0x9
334 #define JCC_JP 0xa
335 #define JCC_JNP 0xb
336 #define JCC_JL 0xc
337 #define JCC_JGE 0xd
338 #define JCC_JLE 0xe
339 #define JCC_JG 0xf
340
341 static const uint8_t tcg_cond_to_jcc[] = {
342 [TCG_COND_EQ] = JCC_JE,
343 [TCG_COND_NE] = JCC_JNE,
344 [TCG_COND_LT] = JCC_JL,
345 [TCG_COND_GE] = JCC_JGE,
346 [TCG_COND_LE] = JCC_JLE,
347 [TCG_COND_GT] = JCC_JG,
348 [TCG_COND_LTU] = JCC_JB,
349 [TCG_COND_GEU] = JCC_JAE,
350 [TCG_COND_LEU] = JCC_JBE,
351 [TCG_COND_GTU] = JCC_JA,
352 };
353
354 #if TCG_TARGET_REG_BITS == 64
355 static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
356 {
357 int rex;
358
359 if (opc & P_DATA16) {
360 /* We should never be asking for both 16 and 64-bit operation. */
361 assert((opc & P_REXW) == 0);
362 tcg_out8(s, 0x66);
363 }
364 if (opc & P_ADDR32) {
365 tcg_out8(s, 0x67);
366 }
367
368 rex = 0;
369 rex |= (opc & P_REXW) >> 8; /* REX.W */
370 rex |= (r & 8) >> 1; /* REX.R */
371 rex |= (x & 8) >> 2; /* REX.X */
372 rex |= (rm & 8) >> 3; /* REX.B */
373
374 /* P_REXB_{R,RM} indicates that the given register is the low byte.
375 For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
376 as otherwise the encoding indicates %[abcd]h. Note that the values
377 that are ORed in merely indicate that the REX byte must be present;
378 those bits get discarded in output. */
379 rex |= opc & (r >= 4 ? P_REXB_R : 0);
380 rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
381
382 if (rex) {
383 tcg_out8(s, (uint8_t)(rex | 0x40));
384 }
385
386 if (opc & P_EXT) {
387 tcg_out8(s, 0x0f);
388 }
389 tcg_out8(s, opc);
390 }
391 #else
392 static void tcg_out_opc(TCGContext *s, int opc)
393 {
394 if (opc & P_DATA16) {
395 tcg_out8(s, 0x66);
396 }
397 if (opc & P_EXT) {
398 tcg_out8(s, 0x0f);
399 }
400 tcg_out8(s, opc);
401 }
402 /* Discard the register arguments to tcg_out_opc early, so as not to penalize
403 the 32-bit compilation paths. This method works with all versions of gcc,
404 whereas relying on optimization may not be able to exclude them. */
405 #define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc)
406 #endif
407
408 static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
409 {
410 tcg_out_opc(s, opc, r, rm, 0);
411 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
412 }
413
414 /* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
415 We handle either RM and INDEX missing with a negative value. In 64-bit
416 mode for absolute addresses, ~RM is the size of the immediate operand
417 that will follow the instruction. */
418
419 static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
420 int index, int shift,
421 tcg_target_long offset)
422 {
423 int mod, len;
424
425 if (index < 0 && rm < 0) {
426 if (TCG_TARGET_REG_BITS == 64) {
427 /* Try for a rip-relative addressing mode. This has replaced
428 the 32-bit-mode absolute addressing encoding. */
429 tcg_target_long pc = (tcg_target_long)s->code_ptr + 5 + ~rm;
430 tcg_target_long disp = offset - pc;
431 if (disp == (int32_t)disp) {
432 tcg_out_opc(s, opc, r, 0, 0);
433 tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
434 tcg_out32(s, disp);
435 return;
436 }
437
438 /* Try for an absolute address encoding. This requires the
439 use of the MODRM+SIB encoding and is therefore larger than
440 rip-relative addressing. */
441 if (offset == (int32_t)offset) {
442 tcg_out_opc(s, opc, r, 0, 0);
443 tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
444 tcg_out8(s, (4 << 3) | 5);
445 tcg_out32(s, offset);
446 return;
447 }
448
449 /* ??? The memory isn't directly addressable. */
450 tcg_abort();
451 } else {
452 /* Absolute address. */
453 tcg_out_opc(s, opc, r, 0, 0);
454 tcg_out8(s, (r << 3) | 5);
455 tcg_out32(s, offset);
456 return;
457 }
458 }
459
460 /* Find the length of the immediate addend. Note that the encoding
461 that would be used for (%ebp) indicates absolute addressing. */
462 if (rm < 0) {
463 mod = 0, len = 4, rm = 5;
464 } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
465 mod = 0, len = 0;
466 } else if (offset == (int8_t)offset) {
467 mod = 0x40, len = 1;
468 } else {
469 mod = 0x80, len = 4;
470 }
471
472 /* Use a single byte MODRM format if possible. Note that the encoding
473 that would be used for %esp is the escape to the two byte form. */
474 if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
475 /* Single byte MODRM format. */
476 tcg_out_opc(s, opc, r, rm, 0);
477 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
478 } else {
479 /* Two byte MODRM+SIB format. */
480
481 /* Note that the encoding that would place %esp into the index
482 field indicates no index register. In 64-bit mode, the REX.X
483 bit counts, so %r12 can be used as the index. */
484 if (index < 0) {
485 index = 4;
486 } else {
487 assert(index != TCG_REG_ESP);
488 }
489
490 tcg_out_opc(s, opc, r, rm, index);
491 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
492 tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
493 }
494
495 if (len == 1) {
496 tcg_out8(s, offset);
497 } else if (len == 4) {
498 tcg_out32(s, offset);
499 }
500 }
501
502 /* A simplification of the above with no index or shift. */
503 static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
504 int rm, tcg_target_long offset)
505 {
506 tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
507 }
508
509 /* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */
510 static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
511 {
512 /* Propagate an opcode prefix, such as P_REXW. */
513 int ext = subop & ~0x7;
514 subop &= 0x7;
515
516 tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
517 }
518
519 static inline void tcg_out_mov(TCGContext *s, TCGType type,
520 TCGReg ret, TCGReg arg)
521 {
522 if (arg != ret) {
523 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
524 tcg_out_modrm(s, opc, ret, arg);
525 }
526 }
527
528 static void tcg_out_movi(TCGContext *s, TCGType type,
529 TCGReg ret, tcg_target_long arg)
530 {
531 if (arg == 0) {
532 tgen_arithr(s, ARITH_XOR, ret, ret);
533 return;
534 } else if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
535 tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
536 tcg_out32(s, arg);
537 } else if (arg == (int32_t)arg) {
538 tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
539 tcg_out32(s, arg);
540 } else {
541 tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
542 tcg_out32(s, arg);
543 tcg_out32(s, arg >> 31 >> 1);
544 }
545 }
546
547 static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
548 {
549 if (val == (int8_t)val) {
550 tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
551 tcg_out8(s, val);
552 } else if (val == (int32_t)val) {
553 tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
554 tcg_out32(s, val);
555 } else {
556 tcg_abort();
557 }
558 }
559
560 static inline void tcg_out_push(TCGContext *s, int reg)
561 {
562 tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
563 }
564
565 static inline void tcg_out_pop(TCGContext *s, int reg)
566 {
567 tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
568 }
569
570 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
571 TCGReg arg1, tcg_target_long arg2)
572 {
573 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
574 tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
575 }
576
577 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
578 TCGReg arg1, tcg_target_long arg2)
579 {
580 int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
581 tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
582 }
583
584 static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
585 {
586 /* Propagate an opcode prefix, such as P_DATA16. */
587 int ext = subopc & ~0x7;
588 subopc &= 0x7;
589
590 if (count == 1) {
591 tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
592 } else {
593 tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
594 tcg_out8(s, count);
595 }
596 }
597
598 static inline void tcg_out_bswap32(TCGContext *s, int reg)
599 {
600 tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
601 }
602
603 static inline void tcg_out_rolw_8(TCGContext *s, int reg)
604 {
605 tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
606 }
607
608 static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
609 {
610 /* movzbl */
611 assert(src < 4 || TCG_TARGET_REG_BITS == 64);
612 tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
613 }
614
615 static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
616 {
617 /* movsbl */
618 assert(src < 4 || TCG_TARGET_REG_BITS == 64);
619 tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
620 }
621
622 static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
623 {
624 /* movzwl */
625 tcg_out_modrm(s, OPC_MOVZWL, dest, src);
626 }
627
628 static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
629 {
630 /* movsw[lq] */
631 tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
632 }
633
634 static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
635 {
636 /* 32-bit mov zero extends. */
637 tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
638 }
639
640 static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
641 {
642 tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
643 }
644
645 static inline void tcg_out_bswap64(TCGContext *s, int reg)
646 {
647 tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
648 }
649
650 static void tgen_arithi(TCGContext *s, int c, int r0,
651 tcg_target_long val, int cf)
652 {
653 int rexw = 0;
654
655 if (TCG_TARGET_REG_BITS == 64) {
656 rexw = c & -8;
657 c &= 7;
658 }
659
660 /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
661 partial flags update stalls on Pentium4 and are not recommended
662 by current Intel optimization manuals. */
663 if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
664 int is_inc = (c == ARITH_ADD) ^ (val < 0);
665 if (TCG_TARGET_REG_BITS == 64) {
666 /* The single-byte increment encodings are re-tasked as the
667 REX prefixes. Use the MODRM encoding. */
668 tcg_out_modrm(s, OPC_GRP5 + rexw,
669 (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
670 } else {
671 tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
672 }
673 return;
674 }
675
676 if (c == ARITH_AND) {
677 if (TCG_TARGET_REG_BITS == 64) {
678 if (val == 0xffffffffu) {
679 tcg_out_ext32u(s, r0, r0);
680 return;
681 }
682 if (val == (uint32_t)val) {
683 /* AND with no high bits set can use a 32-bit operation. */
684 rexw = 0;
685 }
686 }
687 if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
688 tcg_out_ext8u(s, r0, r0);
689 return;
690 }
691 if (val == 0xffffu) {
692 tcg_out_ext16u(s, r0, r0);
693 return;
694 }
695 }
696
697 if (val == (int8_t)val) {
698 tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
699 tcg_out8(s, val);
700 return;
701 }
702 if (rexw == 0 || val == (int32_t)val) {
703 tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
704 tcg_out32(s, val);
705 return;
706 }
707
708 tcg_abort();
709 }
710
711 static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
712 {
713 if (val != 0) {
714 tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
715 }
716 }
717
718 /* Use SMALL != 0 to force a short forward branch. */
719 static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
720 {
721 int32_t val, val1;
722 TCGLabel *l = &s->labels[label_index];
723
724 if (l->has_value) {
725 val = l->u.value - (tcg_target_long)s->code_ptr;
726 val1 = val - 2;
727 if ((int8_t)val1 == val1) {
728 if (opc == -1) {
729 tcg_out8(s, OPC_JMP_short);
730 } else {
731 tcg_out8(s, OPC_JCC_short + opc);
732 }
733 tcg_out8(s, val1);
734 } else {
735 if (small) {
736 tcg_abort();
737 }
738 if (opc == -1) {
739 tcg_out8(s, OPC_JMP_long);
740 tcg_out32(s, val - 5);
741 } else {
742 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
743 tcg_out32(s, val - 6);
744 }
745 }
746 } else if (small) {
747 if (opc == -1) {
748 tcg_out8(s, OPC_JMP_short);
749 } else {
750 tcg_out8(s, OPC_JCC_short + opc);
751 }
752 tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1);
753 s->code_ptr += 1;
754 } else {
755 if (opc == -1) {
756 tcg_out8(s, OPC_JMP_long);
757 } else {
758 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
759 }
760 tcg_out_reloc(s, s->code_ptr, R_386_PC32, label_index, -4);
761 s->code_ptr += 4;
762 }
763 }
764
765 static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
766 int const_arg2, int rexw)
767 {
768 if (const_arg2) {
769 if (arg2 == 0) {
770 /* test r, r */
771 tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
772 } else {
773 tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
774 }
775 } else {
776 tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
777 }
778 }
779
780 static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
781 TCGArg arg1, TCGArg arg2, int const_arg2,
782 int label_index, int small)
783 {
784 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
785 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
786 }
787
788 #if TCG_TARGET_REG_BITS == 64
789 static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
790 TCGArg arg1, TCGArg arg2, int const_arg2,
791 int label_index, int small)
792 {
793 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
794 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
795 }
796 #else
797 /* XXX: we implement it at the target level to avoid having to
798 handle cross basic blocks temporaries */
799 static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
800 const int *const_args, int small)
801 {
802 int label_next;
803 label_next = gen_new_label();
804 switch(args[4]) {
805 case TCG_COND_EQ:
806 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
807 label_next, 1);
808 tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
809 args[5], small);
810 break;
811 case TCG_COND_NE:
812 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
813 args[5], small);
814 tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
815 args[5], small);
816 break;
817 case TCG_COND_LT:
818 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
819 args[5], small);
820 tcg_out_jxx(s, JCC_JNE, label_next, 1);
821 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
822 args[5], small);
823 break;
824 case TCG_COND_LE:
825 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
826 args[5], small);
827 tcg_out_jxx(s, JCC_JNE, label_next, 1);
828 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
829 args[5], small);
830 break;
831 case TCG_COND_GT:
832 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
833 args[5], small);
834 tcg_out_jxx(s, JCC_JNE, label_next, 1);
835 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
836 args[5], small);
837 break;
838 case TCG_COND_GE:
839 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
840 args[5], small);
841 tcg_out_jxx(s, JCC_JNE, label_next, 1);
842 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
843 args[5], small);
844 break;
845 case TCG_COND_LTU:
846 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
847 args[5], small);
848 tcg_out_jxx(s, JCC_JNE, label_next, 1);
849 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
850 args[5], small);
851 break;
852 case TCG_COND_LEU:
853 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
854 args[5], small);
855 tcg_out_jxx(s, JCC_JNE, label_next, 1);
856 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
857 args[5], small);
858 break;
859 case TCG_COND_GTU:
860 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
861 args[5], small);
862 tcg_out_jxx(s, JCC_JNE, label_next, 1);
863 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
864 args[5], small);
865 break;
866 case TCG_COND_GEU:
867 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
868 args[5], small);
869 tcg_out_jxx(s, JCC_JNE, label_next, 1);
870 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
871 args[5], small);
872 break;
873 default:
874 tcg_abort();
875 }
876 tcg_out_label(s, label_next, s->code_ptr);
877 }
878 #endif
879
880 static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
881 TCGArg arg1, TCGArg arg2, int const_arg2)
882 {
883 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
884 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
885 tcg_out_ext8u(s, dest, dest);
886 }
887
888 #if TCG_TARGET_REG_BITS == 64
889 static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
890 TCGArg arg1, TCGArg arg2, int const_arg2)
891 {
892 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
893 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
894 tcg_out_ext8u(s, dest, dest);
895 }
896 #else
897 static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
898 const int *const_args)
899 {
900 TCGArg new_args[6];
901 int label_true, label_over;
902
903 memcpy(new_args, args+1, 5*sizeof(TCGArg));
904
905 if (args[0] == args[1] || args[0] == args[2]
906 || (!const_args[3] && args[0] == args[3])
907 || (!const_args[4] && args[0] == args[4])) {
908 /* When the destination overlaps with one of the argument
909 registers, don't do anything tricky. */
910 label_true = gen_new_label();
911 label_over = gen_new_label();
912
913 new_args[5] = label_true;
914 tcg_out_brcond2(s, new_args, const_args+1, 1);
915
916 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
917 tcg_out_jxx(s, JCC_JMP, label_over, 1);
918 tcg_out_label(s, label_true, s->code_ptr);
919
920 tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
921 tcg_out_label(s, label_over, s->code_ptr);
922 } else {
923 /* When the destination does not overlap one of the arguments,
924 clear the destination first, jump if cond false, and emit an
925 increment in the true case. This results in smaller code. */
926
927 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
928
929 label_over = gen_new_label();
930 new_args[4] = tcg_invert_cond(new_args[4]);
931 new_args[5] = label_over;
932 tcg_out_brcond2(s, new_args, const_args+1, 1);
933
934 tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
935 tcg_out_label(s, label_over, s->code_ptr);
936 }
937 }
938 #endif
939
940 static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest,
941 TCGArg c1, TCGArg c2, int const_c2,
942 TCGArg v1)
943 {
944 tcg_out_cmp(s, c1, c2, const_c2, 0);
945 tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1);
946 }
947
948 #if TCG_TARGET_REG_BITS == 64
949 static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGArg dest,
950 TCGArg c1, TCGArg c2, int const_c2,
951 TCGArg v1)
952 {
953 tcg_out_cmp(s, c1, c2, const_c2, P_REXW);
954 tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | P_REXW, dest, v1);
955 }
956 #endif
957
958 static void tcg_out_branch(TCGContext *s, int call, tcg_target_long dest)
959 {
960 tcg_target_long disp = dest - (tcg_target_long)s->code_ptr - 5;
961
962 if (disp == (int32_t)disp) {
963 tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
964 tcg_out32(s, disp);
965 } else {
966 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, dest);
967 tcg_out_modrm(s, OPC_GRP5,
968 call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10);
969 }
970 }
971
972 static inline void tcg_out_calli(TCGContext *s, tcg_target_long dest)
973 {
974 tcg_out_branch(s, 1, dest);
975 }
976
977 static void tcg_out_jmp(TCGContext *s, tcg_target_long dest)
978 {
979 tcg_out_branch(s, 0, dest);
980 }
981
982 #if defined(CONFIG_SOFTMMU)
983
984 #include "../../softmmu_defs.h"
985
986 /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
987 int mmu_idx) */
988 static const void *qemu_ld_helpers[4] = {
989 helper_ldb_mmu,
990 helper_ldw_mmu,
991 helper_ldl_mmu,
992 helper_ldq_mmu,
993 };
994
995 /* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
996 uintxx_t val, int mmu_idx) */
997 static const void *qemu_st_helpers[4] = {
998 helper_stb_mmu,
999 helper_stw_mmu,
1000 helper_stl_mmu,
1001 helper_stq_mmu,
1002 };
1003
1004 /* Perform the TLB load and compare.
1005
1006 Inputs:
1007 ADDRLO_IDX contains the index into ARGS of the low part of the
1008 address; the high part of the address is at ADDR_LOW_IDX+1.
1009
1010 MEM_INDEX and S_BITS are the memory context and log2 size of the load.
1011
1012 WHICH is the offset into the CPUTLBEntry structure of the slot to read.
1013 This should be offsetof addr_read or addr_write.
1014
1015 Outputs:
1016 LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
1017 positions of the displacements of forward jumps to the TLB miss case.
1018
1019 First argument register is loaded with the low part of the address.
1020 In the TLB hit case, it has been adjusted as indicated by the TLB
1021 and so is a host address. In the TLB miss case, it continues to
1022 hold a guest address.
1023
1024 Second argument register is clobbered. */
1025
1026 static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
1027 int mem_index, int s_bits,
1028 const TCGArg *args,
1029 uint8_t **label_ptr, int which)
1030 {
1031 const int addrlo = args[addrlo_idx];
1032 const int r0 = TCG_REG_L0;
1033 const int r1 = TCG_REG_L1;
1034 TCGType type = TCG_TYPE_I32;
1035 int rexw = 0;
1036
1037 if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 64) {
1038 type = TCG_TYPE_I64;
1039 rexw = P_REXW;
1040 }
1041
1042 tcg_out_mov(s, type, r1, addrlo);
1043 tcg_out_mov(s, type, r0, addrlo);
1044
1045 tcg_out_shifti(s, SHIFT_SHR + rexw, r1,
1046 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1047
1048 tgen_arithi(s, ARITH_AND + rexw, r0,
1049 TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
1050 tgen_arithi(s, ARITH_AND + rexw, r1,
1051 (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
1052
1053 tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, r1, TCG_AREG0, r1, 0,
1054 offsetof(CPUArchState, tlb_table[mem_index][0])
1055 + which);
1056
1057 /* cmp 0(r1), r0 */
1058 tcg_out_modrm_offset(s, OPC_CMP_GvEv + rexw, r0, r1, 0);
1059
1060 tcg_out_mov(s, type, r0, addrlo);
1061
1062 /* jne label1 */
1063 tcg_out8(s, OPC_JCC_short + JCC_JNE);
1064 label_ptr[0] = s->code_ptr;
1065 s->code_ptr++;
1066
1067 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1068 /* cmp 4(r1), addrhi */
1069 tcg_out_modrm_offset(s, OPC_CMP_GvEv, args[addrlo_idx+1], r1, 4);
1070
1071 /* jne label1 */
1072 tcg_out8(s, OPC_JCC_short + JCC_JNE);
1073 label_ptr[1] = s->code_ptr;
1074 s->code_ptr++;
1075 }
1076
1077 /* TLB Hit. */
1078
1079 /* add addend(r1), r0 */
1080 tcg_out_modrm_offset(s, OPC_ADD_GvEv + P_REXW, r0, r1,
1081 offsetof(CPUTLBEntry, addend) - which);
1082 }
1083 #endif
1084
1085 static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
1086 int base, tcg_target_long ofs, int sizeop)
1087 {
1088 #ifdef TARGET_WORDS_BIGENDIAN
1089 const int bswap = 1;
1090 #else
1091 const int bswap = 0;
1092 #endif
1093 switch (sizeop) {
1094 case 0:
1095 tcg_out_modrm_offset(s, OPC_MOVZBL, datalo, base, ofs);
1096 break;
1097 case 0 | 4:
1098 tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW, datalo, base, ofs);
1099 break;
1100 case 1:
1101 tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
1102 if (bswap) {
1103 tcg_out_rolw_8(s, datalo);
1104 }
1105 break;
1106 case 1 | 4:
1107 if (bswap) {
1108 tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
1109 tcg_out_rolw_8(s, datalo);
1110 tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
1111 } else {
1112 tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW, datalo, base, ofs);
1113 }
1114 break;
1115 case 2:
1116 tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1117 if (bswap) {
1118 tcg_out_bswap32(s, datalo);
1119 }
1120 break;
1121 #if TCG_TARGET_REG_BITS == 64
1122 case 2 | 4:
1123 if (bswap) {
1124 tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1125 tcg_out_bswap32(s, datalo);
1126 tcg_out_ext32s(s, datalo, datalo);
1127 } else {
1128 tcg_out_modrm_offset(s, OPC_MOVSLQ, datalo, base, ofs);
1129 }
1130 break;
1131 #endif
1132 case 3:
1133 if (TCG_TARGET_REG_BITS == 64) {
1134 tcg_out_ld(s, TCG_TYPE_I64, datalo, base, ofs);
1135 if (bswap) {
1136 tcg_out_bswap64(s, datalo);
1137 }
1138 } else {
1139 if (bswap) {
1140 int t = datalo;
1141 datalo = datahi;
1142 datahi = t;
1143 }
1144 if (base != datalo) {
1145 tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1146 tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1147 } else {
1148 tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1149 tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1150 }
1151 if (bswap) {
1152 tcg_out_bswap32(s, datalo);
1153 tcg_out_bswap32(s, datahi);
1154 }
1155 }
1156 break;
1157 default:
1158 tcg_abort();
1159 }
1160 }
1161
1162 /* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
1163 EAX. It will be useful once fixed registers globals are less
1164 common. */
1165 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
1166 int opc)
1167 {
1168 int data_reg, data_reg2 = 0;
1169 int addrlo_idx;
1170 #if defined(CONFIG_SOFTMMU)
1171 int mem_index, s_bits;
1172 #if TCG_TARGET_REG_BITS == 64
1173 int arg_idx;
1174 #else
1175 int stack_adjust;
1176 #endif
1177 uint8_t *label_ptr[3];
1178 #endif
1179
1180 data_reg = args[0];
1181 addrlo_idx = 1;
1182 if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
1183 data_reg2 = args[1];
1184 addrlo_idx = 2;
1185 }
1186
1187 #if defined(CONFIG_SOFTMMU)
1188 mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
1189 s_bits = opc & 3;
1190
1191 tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
1192 label_ptr, offsetof(CPUTLBEntry, addr_read));
1193
1194 /* TLB Hit. */
1195 tcg_out_qemu_ld_direct(s, data_reg, data_reg2, TCG_REG_L0, 0, opc);
1196
1197 /* jmp label2 */
1198 tcg_out8(s, OPC_JMP_short);
1199 label_ptr[2] = s->code_ptr;
1200 s->code_ptr++;
1201
1202 /* TLB Miss. */
1203
1204 /* label1: */
1205 *label_ptr[0] = s->code_ptr - label_ptr[0] - 1;
1206 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1207 *label_ptr[1] = s->code_ptr - label_ptr[1] - 1;
1208 }
1209
1210 /* XXX: move that code at the end of the TB */
1211 #if TCG_TARGET_REG_BITS == 32
1212 tcg_out_pushi(s, mem_index);
1213 stack_adjust = 4;
1214 if (TARGET_LONG_BITS == 64) {
1215 tcg_out_push(s, args[addrlo_idx + 1]);
1216 stack_adjust += 4;
1217 }
1218 tcg_out_push(s, args[addrlo_idx]);
1219 stack_adjust += 4;
1220 tcg_out_push(s, TCG_AREG0);
1221 stack_adjust += 4;
1222 #else
1223 /* The first argument is already loaded with addrlo. */
1224 arg_idx = 1;
1225 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx],
1226 mem_index);
1227 /* XXX/FIXME: suboptimal */
1228 tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3], TCG_REG_L2);
1229 tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2], TCG_REG_L1);
1230 tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1], TCG_REG_L0);
1231 tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], TCG_AREG0);
1232 #endif
1233
1234 tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]);
1235
1236 #if TCG_TARGET_REG_BITS == 32
1237 if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) {
1238 /* Pop and discard. This is 2 bytes smaller than the add. */
1239 tcg_out_pop(s, TCG_REG_ECX);
1240 } else if (stack_adjust != 0) {
1241 tcg_out_addi(s, TCG_REG_CALL_STACK, stack_adjust);
1242 }
1243 #endif
1244
1245 switch(opc) {
1246 case 0 | 4:
1247 tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
1248 break;
1249 case 1 | 4:
1250 tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
1251 break;
1252 case 0:
1253 tcg_out_ext8u(s, data_reg, TCG_REG_EAX);
1254 break;
1255 case 1:
1256 tcg_out_ext16u(s, data_reg, TCG_REG_EAX);
1257 break;
1258 case 2:
1259 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1260 break;
1261 #if TCG_TARGET_REG_BITS == 64
1262 case 2 | 4:
1263 tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
1264 break;
1265 #endif
1266 case 3:
1267 if (TCG_TARGET_REG_BITS == 64) {
1268 tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
1269 } else if (data_reg == TCG_REG_EDX) {
1270 /* xchg %edx, %eax */
1271 tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
1272 tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EAX);
1273 } else {
1274 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1275 tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EDX);
1276 }
1277 break;
1278 default:
1279 tcg_abort();
1280 }
1281
1282 /* label2: */
1283 *label_ptr[2] = s->code_ptr - label_ptr[2] - 1;
1284 #else
1285 {
1286 int32_t offset = GUEST_BASE;
1287 int base = args[addrlo_idx];
1288
1289 if (TCG_TARGET_REG_BITS == 64) {
1290 /* ??? We assume all operations have left us with register
1291 contents that are zero extended. So far this appears to
1292 be true. If we want to enforce this, we can either do
1293 an explicit zero-extension here, or (if GUEST_BASE == 0)
1294 use the ADDR32 prefix. For now, do nothing. */
1295
1296 if (offset != GUEST_BASE) {
1297 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L0, GUEST_BASE);
1298 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L0, base);
1299 base = TCG_REG_L0;
1300 offset = 0;
1301 }
1302 }
1303
1304 tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, opc);
1305 }
1306 #endif
1307 }
1308
1309 static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
1310 int base, tcg_target_long ofs, int sizeop)
1311 {
1312 #ifdef TARGET_WORDS_BIGENDIAN
1313 const int bswap = 1;
1314 #else
1315 const int bswap = 0;
1316 #endif
1317 /* ??? Ideally we wouldn't need a scratch register. For user-only,
1318 we could perform the bswap twice to restore the original value
1319 instead of moving to the scratch. But as it is, the L constraint
1320 means that TCG_REG_L1 is definitely free here. */
1321 const int scratch = TCG_REG_L1;
1322
1323 switch (sizeop) {
1324 case 0:
1325 tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R, datalo, base, ofs);
1326 break;
1327 case 1:
1328 if (bswap) {
1329 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1330 tcg_out_rolw_8(s, scratch);
1331 datalo = scratch;
1332 }
1333 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16, datalo, base, ofs);
1334 break;
1335 case 2:
1336 if (bswap) {
1337 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1338 tcg_out_bswap32(s, scratch);
1339 datalo = scratch;
1340 }
1341 tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
1342 break;
1343 case 3:
1344 if (TCG_TARGET_REG_BITS == 64) {
1345 if (bswap) {
1346 tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
1347 tcg_out_bswap64(s, scratch);
1348 datalo = scratch;
1349 }
1350 tcg_out_st(s, TCG_TYPE_I64, datalo, base, ofs);
1351 } else if (bswap) {
1352 tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
1353 tcg_out_bswap32(s, scratch);
1354 tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs);
1355 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1356 tcg_out_bswap32(s, scratch);
1357 tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs + 4);
1358 } else {
1359 tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
1360 tcg_out_st(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1361 }
1362 break;
1363 default:
1364 tcg_abort();
1365 }
1366 }
1367
1368 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
1369 int opc)
1370 {
1371 int data_reg, data_reg2 = 0;
1372 int addrlo_idx;
1373 #if defined(CONFIG_SOFTMMU)
1374 int mem_index, s_bits;
1375 int stack_adjust;
1376 uint8_t *label_ptr[3];
1377 #endif
1378
1379 data_reg = args[0];
1380 addrlo_idx = 1;
1381 if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
1382 data_reg2 = args[1];
1383 addrlo_idx = 2;
1384 }
1385
1386 #if defined(CONFIG_SOFTMMU)
1387 mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
1388 s_bits = opc;
1389
1390 tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
1391 label_ptr, offsetof(CPUTLBEntry, addr_write));
1392
1393 /* TLB Hit. */
1394 tcg_out_qemu_st_direct(s, data_reg, data_reg2, TCG_REG_L0, 0, opc);
1395
1396 /* jmp label2 */
1397 tcg_out8(s, OPC_JMP_short);
1398 label_ptr[2] = s->code_ptr;
1399 s->code_ptr++;
1400
1401 /* TLB Miss. */
1402
1403 /* label1: */
1404 *label_ptr[0] = s->code_ptr - label_ptr[0] - 1;
1405 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1406 *label_ptr[1] = s->code_ptr - label_ptr[1] - 1;
1407 }
1408
1409 /* XXX: move that code at the end of the TB */
1410 #if TCG_TARGET_REG_BITS == 32
1411 tcg_out_pushi(s, mem_index);
1412 stack_adjust = 4;
1413 if (opc == 3) {
1414 tcg_out_push(s, data_reg2);
1415 stack_adjust += 4;
1416 }
1417 tcg_out_push(s, data_reg);
1418 stack_adjust += 4;
1419 if (TARGET_LONG_BITS == 64) {
1420 tcg_out_push(s, args[addrlo_idx + 1]);
1421 stack_adjust += 4;
1422 }
1423 tcg_out_push(s, args[addrlo_idx]);
1424 stack_adjust += 4;
1425 tcg_out_push(s, TCG_AREG0);
1426 stack_adjust += 4;
1427 #else
1428 tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
1429 TCG_REG_L1, data_reg);
1430 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_L2, mem_index);
1431 stack_adjust = 0;
1432 /* XXX/FIXME: suboptimal */
1433 tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3], TCG_REG_L2);
1434 tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2], TCG_REG_L1);
1435 tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1], TCG_REG_L0);
1436 tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], TCG_AREG0);
1437 #endif
1438
1439 tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]);
1440
1441 if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) {
1442 /* Pop and discard. This is 2 bytes smaller than the add. */
1443 tcg_out_pop(s, TCG_REG_ECX);
1444 } else if (stack_adjust != 0) {
1445 tcg_out_addi(s, TCG_REG_CALL_STACK, stack_adjust);
1446 }
1447
1448 /* label2: */
1449 *label_ptr[2] = s->code_ptr - label_ptr[2] - 1;
1450 #else
1451 {
1452 int32_t offset = GUEST_BASE;
1453 int base = args[addrlo_idx];
1454
1455 if (TCG_TARGET_REG_BITS == 64) {
1456 /* ??? We assume all operations have left us with register
1457 contents that are zero extended. So far this appears to
1458 be true. If we want to enforce this, we can either do
1459 an explicit zero-extension here, or (if GUEST_BASE == 0)
1460 use the ADDR32 prefix. For now, do nothing. */
1461
1462 if (offset != GUEST_BASE) {
1463 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L0, GUEST_BASE);
1464 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L0, base);
1465 base = TCG_REG_L0;
1466 offset = 0;
1467 }
1468 }
1469
1470 tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, opc);
1471 }
1472 #endif
1473 }
1474
1475 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1476 const TCGArg *args, const int *const_args)
1477 {
1478 int c, rexw = 0;
1479
1480 #if TCG_TARGET_REG_BITS == 64
1481 # define OP_32_64(x) \
1482 case glue(glue(INDEX_op_, x), _i64): \
1483 rexw = P_REXW; /* FALLTHRU */ \
1484 case glue(glue(INDEX_op_, x), _i32)
1485 #else
1486 # define OP_32_64(x) \
1487 case glue(glue(INDEX_op_, x), _i32)
1488 #endif
1489
1490 switch(opc) {
1491 case INDEX_op_exit_tb:
1492 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]);
1493 tcg_out_jmp(s, (tcg_target_long) tb_ret_addr);
1494 break;
1495 case INDEX_op_goto_tb:
1496 if (s->tb_jmp_offset) {
1497 /* direct jump method */
1498 tcg_out8(s, OPC_JMP_long); /* jmp im */
1499 s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
1500 tcg_out32(s, 0);
1501 } else {
1502 /* indirect jump method */
1503 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
1504 (tcg_target_long)(s->tb_next + args[0]));
1505 }
1506 s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
1507 break;
1508 case INDEX_op_call:
1509 if (const_args[0]) {
1510 tcg_out_calli(s, args[0]);
1511 } else {
1512 /* call *reg */
1513 tcg_out_modrm(s, OPC_GRP5, EXT5_CALLN_Ev, args[0]);
1514 }
1515 break;
1516 case INDEX_op_br:
1517 tcg_out_jxx(s, JCC_JMP, args[0], 0);
1518 break;
1519 case INDEX_op_movi_i32:
1520 tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
1521 break;
1522 OP_32_64(ld8u):
1523 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1524 tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
1525 break;
1526 OP_32_64(ld8s):
1527 tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]);
1528 break;
1529 OP_32_64(ld16u):
1530 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1531 tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
1532 break;
1533 OP_32_64(ld16s):
1534 tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]);
1535 break;
1536 #if TCG_TARGET_REG_BITS == 64
1537 case INDEX_op_ld32u_i64:
1538 #endif
1539 case INDEX_op_ld_i32:
1540 tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1541 break;
1542
1543 OP_32_64(st8):
1544 if (const_args[0]) {
1545 tcg_out_modrm_offset(s, OPC_MOVB_EvIz,
1546 0, args[1], args[2]);
1547 tcg_out8(s, args[0]);
1548 } else {
1549 tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
1550 args[0], args[1], args[2]);
1551 }
1552 break;
1553 OP_32_64(st16):
1554 if (const_args[0]) {
1555 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16,
1556 0, args[1], args[2]);
1557 tcg_out16(s, args[0]);
1558 } else {
1559 tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
1560 args[0], args[1], args[2]);
1561 }
1562 break;
1563 #if TCG_TARGET_REG_BITS == 64
1564 case INDEX_op_st32_i64:
1565 #endif
1566 case INDEX_op_st_i32:
1567 if (const_args[0]) {
1568 tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, args[1], args[2]);
1569 tcg_out32(s, args[0]);
1570 } else {
1571 tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1572 }
1573 break;
1574
1575 OP_32_64(add):
1576 /* For 3-operand addition, use LEA. */
1577 if (args[0] != args[1]) {
1578 TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
1579
1580 if (const_args[2]) {
1581 c3 = a2, a2 = -1;
1582 } else if (a0 == a2) {
1583 /* Watch out for dest = src + dest, since we've removed
1584 the matching constraint on the add. */
1585 tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
1586 break;
1587 }
1588
1589 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
1590 break;
1591 }
1592 c = ARITH_ADD;
1593 goto gen_arith;
1594 OP_32_64(sub):
1595 c = ARITH_SUB;
1596 goto gen_arith;
1597 OP_32_64(and):
1598 c = ARITH_AND;
1599 goto gen_arith;
1600 OP_32_64(or):
1601 c = ARITH_OR;
1602 goto gen_arith;
1603 OP_32_64(xor):
1604 c = ARITH_XOR;
1605 goto gen_arith;
1606 gen_arith:
1607 if (const_args[2]) {
1608 tgen_arithi(s, c + rexw, args[0], args[2], 0);
1609 } else {
1610 tgen_arithr(s, c + rexw, args[0], args[2]);
1611 }
1612 break;
1613
1614 OP_32_64(mul):
1615 if (const_args[2]) {
1616 int32_t val;
1617 val = args[2];
1618 if (val == (int8_t)val) {
1619 tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]);
1620 tcg_out8(s, val);
1621 } else {
1622 tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]);
1623 tcg_out32(s, val);
1624 }
1625 } else {
1626 tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]);
1627 }
1628 break;
1629
1630 OP_32_64(div2):
1631 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
1632 break;
1633 OP_32_64(divu2):
1634 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
1635 break;
1636
1637 OP_32_64(shl):
1638 c = SHIFT_SHL;
1639 goto gen_shift;
1640 OP_32_64(shr):
1641 c = SHIFT_SHR;
1642 goto gen_shift;
1643 OP_32_64(sar):
1644 c = SHIFT_SAR;
1645 goto gen_shift;
1646 OP_32_64(rotl):
1647 c = SHIFT_ROL;
1648 goto gen_shift;
1649 OP_32_64(rotr):
1650 c = SHIFT_ROR;
1651 goto gen_shift;
1652 gen_shift:
1653 if (const_args[2]) {
1654 tcg_out_shifti(s, c + rexw, args[0], args[2]);
1655 } else {
1656 tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]);
1657 }
1658 break;
1659
1660 case INDEX_op_brcond_i32:
1661 tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
1662 args[3], 0);
1663 break;
1664 case INDEX_op_setcond_i32:
1665 tcg_out_setcond32(s, args[3], args[0], args[1],
1666 args[2], const_args[2]);
1667 break;
1668 case INDEX_op_movcond_i32:
1669 tcg_out_movcond32(s, args[5], args[0], args[1],
1670 args[2], const_args[2], args[3]);
1671 break;
1672
1673 OP_32_64(bswap16):
1674 tcg_out_rolw_8(s, args[0]);
1675 break;
1676 OP_32_64(bswap32):
1677 tcg_out_bswap32(s, args[0]);
1678 break;
1679
1680 OP_32_64(neg):
1681 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]);
1682 break;
1683 OP_32_64(not):
1684 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]);
1685 break;
1686
1687 OP_32_64(ext8s):
1688 tcg_out_ext8s(s, args[0], args[1], rexw);
1689 break;
1690 OP_32_64(ext16s):
1691 tcg_out_ext16s(s, args[0], args[1], rexw);
1692 break;
1693 OP_32_64(ext8u):
1694 tcg_out_ext8u(s, args[0], args[1]);
1695 break;
1696 OP_32_64(ext16u):
1697 tcg_out_ext16u(s, args[0], args[1]);
1698 break;
1699
1700 case INDEX_op_qemu_ld8u:
1701 tcg_out_qemu_ld(s, args, 0);
1702 break;
1703 case INDEX_op_qemu_ld8s:
1704 tcg_out_qemu_ld(s, args, 0 | 4);
1705 break;
1706 case INDEX_op_qemu_ld16u:
1707 tcg_out_qemu_ld(s, args, 1);
1708 break;
1709 case INDEX_op_qemu_ld16s:
1710 tcg_out_qemu_ld(s, args, 1 | 4);
1711 break;
1712 #if TCG_TARGET_REG_BITS == 64
1713 case INDEX_op_qemu_ld32u:
1714 #endif
1715 case INDEX_op_qemu_ld32:
1716 tcg_out_qemu_ld(s, args, 2);
1717 break;
1718 case INDEX_op_qemu_ld64:
1719 tcg_out_qemu_ld(s, args, 3);
1720 break;
1721
1722 case INDEX_op_qemu_st8:
1723 tcg_out_qemu_st(s, args, 0);
1724 break;
1725 case INDEX_op_qemu_st16:
1726 tcg_out_qemu_st(s, args, 1);
1727 break;
1728 case INDEX_op_qemu_st32:
1729 tcg_out_qemu_st(s, args, 2);
1730 break;
1731 case INDEX_op_qemu_st64:
1732 tcg_out_qemu_st(s, args, 3);
1733 break;
1734
1735 #if TCG_TARGET_REG_BITS == 32
1736 case INDEX_op_brcond2_i32:
1737 tcg_out_brcond2(s, args, const_args, 0);
1738 break;
1739 case INDEX_op_setcond2_i32:
1740 tcg_out_setcond2(s, args, const_args);
1741 break;
1742 case INDEX_op_mulu2_i32:
1743 tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_MUL, args[3]);
1744 break;
1745 case INDEX_op_add2_i32:
1746 if (const_args[4]) {
1747 tgen_arithi(s, ARITH_ADD, args[0], args[4], 1);
1748 } else {
1749 tgen_arithr(s, ARITH_ADD, args[0], args[4]);
1750 }
1751 if (const_args[5]) {
1752 tgen_arithi(s, ARITH_ADC, args[1], args[5], 1);
1753 } else {
1754 tgen_arithr(s, ARITH_ADC, args[1], args[5]);
1755 }
1756 break;
1757 case INDEX_op_sub2_i32:
1758 if (const_args[4]) {
1759 tgen_arithi(s, ARITH_SUB, args[0], args[4], 1);
1760 } else {
1761 tgen_arithr(s, ARITH_SUB, args[0], args[4]);
1762 }
1763 if (const_args[5]) {
1764 tgen_arithi(s, ARITH_SBB, args[1], args[5], 1);
1765 } else {
1766 tgen_arithr(s, ARITH_SBB, args[1], args[5]);
1767 }
1768 break;
1769 #else /* TCG_TARGET_REG_BITS == 64 */
1770 case INDEX_op_movi_i64:
1771 tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
1772 break;
1773 case INDEX_op_ld32s_i64:
1774 tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]);
1775 break;
1776 case INDEX_op_ld_i64:
1777 tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1778 break;
1779 case INDEX_op_st_i64:
1780 if (const_args[0]) {
1781 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW,
1782 0, args[1], args[2]);
1783 tcg_out32(s, args[0]);
1784 } else {
1785 tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1786 }
1787 break;
1788 case INDEX_op_qemu_ld32s:
1789 tcg_out_qemu_ld(s, args, 2 | 4);
1790 break;
1791
1792 case INDEX_op_brcond_i64:
1793 tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1],
1794 args[3], 0);
1795 break;
1796 case INDEX_op_setcond_i64:
1797 tcg_out_setcond64(s, args[3], args[0], args[1],
1798 args[2], const_args[2]);
1799 break;
1800 case INDEX_op_movcond_i64:
1801 tcg_out_movcond64(s, args[5], args[0], args[1],
1802 args[2], const_args[2], args[3]);
1803 break;
1804
1805 case INDEX_op_bswap64_i64:
1806 tcg_out_bswap64(s, args[0]);
1807 break;
1808 case INDEX_op_ext32u_i64:
1809 tcg_out_ext32u(s, args[0], args[1]);
1810 break;
1811 case INDEX_op_ext32s_i64:
1812 tcg_out_ext32s(s, args[0], args[1]);
1813 break;
1814 #endif
1815
1816 OP_32_64(deposit):
1817 if (args[3] == 0 && args[4] == 8) {
1818 /* load bits 0..7 */
1819 tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM,
1820 args[2], args[0]);
1821 } else if (args[3] == 8 && args[4] == 8) {
1822 /* load bits 8..15 */
1823 tcg_out_modrm(s, OPC_MOVB_EvGv, args[2], args[0] + 4);
1824 } else if (args[3] == 0 && args[4] == 16) {
1825 /* load bits 0..15 */
1826 tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, args[2], args[0]);
1827 } else {
1828 tcg_abort();
1829 }
1830 break;
1831
1832 default:
1833 tcg_abort();
1834 }
1835
1836 #undef OP_32_64
1837 }
1838
1839 static const TCGTargetOpDef x86_op_defs[] = {
1840 { INDEX_op_exit_tb, { } },
1841 { INDEX_op_goto_tb, { } },
1842 { INDEX_op_call, { "ri" } },
1843 { INDEX_op_br, { } },
1844 { INDEX_op_mov_i32, { "r", "r" } },
1845 { INDEX_op_movi_i32, { "r" } },
1846 { INDEX_op_ld8u_i32, { "r", "r" } },
1847 { INDEX_op_ld8s_i32, { "r", "r" } },
1848 { INDEX_op_ld16u_i32, { "r", "r" } },
1849 { INDEX_op_ld16s_i32, { "r", "r" } },
1850 { INDEX_op_ld_i32, { "r", "r" } },
1851 { INDEX_op_st8_i32, { "qi", "r" } },
1852 { INDEX_op_st16_i32, { "ri", "r" } },
1853 { INDEX_op_st_i32, { "ri", "r" } },
1854
1855 { INDEX_op_add_i32, { "r", "r", "ri" } },
1856 { INDEX_op_sub_i32, { "r", "0", "ri" } },
1857 { INDEX_op_mul_i32, { "r", "0", "ri" } },
1858 { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } },
1859 { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } },
1860 { INDEX_op_and_i32, { "r", "0", "ri" } },
1861 { INDEX_op_or_i32, { "r", "0", "ri" } },
1862 { INDEX_op_xor_i32, { "r", "0", "ri" } },
1863
1864 { INDEX_op_shl_i32, { "r", "0", "ci" } },
1865 { INDEX_op_shr_i32, { "r", "0", "ci" } },
1866 { INDEX_op_sar_i32, { "r", "0", "ci" } },
1867 { INDEX_op_rotl_i32, { "r", "0", "ci" } },
1868 { INDEX_op_rotr_i32, { "r", "0", "ci" } },
1869
1870 { INDEX_op_brcond_i32, { "r", "ri" } },
1871
1872 { INDEX_op_bswap16_i32, { "r", "0" } },
1873 { INDEX_op_bswap32_i32, { "r", "0" } },
1874
1875 { INDEX_op_neg_i32, { "r", "0" } },
1876
1877 { INDEX_op_not_i32, { "r", "0" } },
1878
1879 { INDEX_op_ext8s_i32, { "r", "q" } },
1880 { INDEX_op_ext16s_i32, { "r", "r" } },
1881 { INDEX_op_ext8u_i32, { "r", "q" } },
1882 { INDEX_op_ext16u_i32, { "r", "r" } },
1883
1884 { INDEX_op_setcond_i32, { "q", "r", "ri" } },
1885
1886 { INDEX_op_deposit_i32, { "Q", "0", "Q" } },
1887 #if TCG_TARGET_HAS_movcond_i32
1888 { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } },
1889 #endif
1890
1891 #if TCG_TARGET_REG_BITS == 32
1892 { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
1893 { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
1894 { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
1895 { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
1896 { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
1897 #else
1898 { INDEX_op_mov_i64, { "r", "r" } },
1899 { INDEX_op_movi_i64, { "r" } },
1900 { INDEX_op_ld8u_i64, { "r", "r" } },
1901 { INDEX_op_ld8s_i64, { "r", "r" } },
1902 { INDEX_op_ld16u_i64, { "r", "r" } },
1903 { INDEX_op_ld16s_i64, { "r", "r" } },
1904 { INDEX_op_ld32u_i64, { "r", "r" } },
1905 { INDEX_op_ld32s_i64, { "r", "r" } },
1906 { INDEX_op_ld_i64, { "r", "r" } },
1907 { INDEX_op_st8_i64, { "ri", "r" } },
1908 { INDEX_op_st16_i64, { "ri", "r" } },
1909 { INDEX_op_st32_i64, { "ri", "r" } },
1910 { INDEX_op_st_i64, { "re", "r" } },
1911
1912 { INDEX_op_add_i64, { "r", "0", "re" } },
1913 { INDEX_op_mul_i64, { "r", "0", "re" } },
1914 { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },
1915 { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },
1916 { INDEX_op_sub_i64, { "r", "0", "re" } },
1917 { INDEX_op_and_i64, { "r", "0", "reZ" } },
1918 { INDEX_op_or_i64, { "r", "0", "re" } },
1919 { INDEX_op_xor_i64, { "r", "0", "re" } },
1920
1921 { INDEX_op_shl_i64, { "r", "0", "ci" } },
1922 { INDEX_op_shr_i64, { "r", "0", "ci" } },
1923 { INDEX_op_sar_i64, { "r", "0", "ci" } },
1924 { INDEX_op_rotl_i64, { "r", "0", "ci" } },
1925 { INDEX_op_rotr_i64, { "r", "0", "ci" } },
1926
1927 { INDEX_op_brcond_i64, { "r", "re" } },
1928 { INDEX_op_setcond_i64, { "r", "r", "re" } },
1929
1930 { INDEX_op_bswap16_i64, { "r", "0" } },
1931 { INDEX_op_bswap32_i64, { "r", "0" } },
1932 { INDEX_op_bswap64_i64, { "r", "0" } },
1933 { INDEX_op_neg_i64, { "r", "0" } },
1934 { INDEX_op_not_i64, { "r", "0" } },
1935
1936 { INDEX_op_ext8s_i64, { "r", "r" } },
1937 { INDEX_op_ext16s_i64, { "r", "r" } },
1938 { INDEX_op_ext32s_i64, { "r", "r" } },
1939 { INDEX_op_ext8u_i64, { "r", "r" } },
1940 { INDEX_op_ext16u_i64, { "r", "r" } },
1941 { INDEX_op_ext32u_i64, { "r", "r" } },
1942
1943 { INDEX_op_deposit_i64, { "Q", "0", "Q" } },
1944 { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } },
1945 #endif
1946
1947 #if TCG_TARGET_REG_BITS == 64
1948 { INDEX_op_qemu_ld8u, { "r", "L" } },
1949 { INDEX_op_qemu_ld8s, { "r", "L" } },
1950 { INDEX_op_qemu_ld16u, { "r", "L" } },
1951 { INDEX_op_qemu_ld16s, { "r", "L" } },
1952 { INDEX_op_qemu_ld32, { "r", "L" } },
1953 { INDEX_op_qemu_ld32u, { "r", "L" } },
1954 { INDEX_op_qemu_ld32s, { "r", "L" } },
1955 { INDEX_op_qemu_ld64, { "r", "L" } },
1956
1957 { INDEX_op_qemu_st8, { "L", "L" } },
1958 { INDEX_op_qemu_st16, { "L", "L" } },
1959 { INDEX_op_qemu_st32, { "L", "L" } },
1960 { INDEX_op_qemu_st64, { "L", "L" } },
1961 #elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
1962 { INDEX_op_qemu_ld8u, { "r", "L" } },
1963 { INDEX_op_qemu_ld8s, { "r", "L" } },
1964 { INDEX_op_qemu_ld16u, { "r", "L" } },
1965 { INDEX_op_qemu_ld16s, { "r", "L" } },
1966 { INDEX_op_qemu_ld32, { "r", "L" } },
1967 { INDEX_op_qemu_ld64, { "r", "r", "L" } },
1968
1969 { INDEX_op_qemu_st8, { "cb", "L" } },
1970 { INDEX_op_qemu_st16, { "L", "L" } },
1971 { INDEX_op_qemu_st32, { "L", "L" } },
1972 { INDEX_op_qemu_st64, { "L", "L", "L" } },
1973 #else
1974 { INDEX_op_qemu_ld8u, { "r", "L", "L" } },
1975 { INDEX_op_qemu_ld8s, { "r", "L", "L" } },
1976 { INDEX_op_qemu_ld16u, { "r", "L", "L" } },
1977 { INDEX_op_qemu_ld16s, { "r", "L", "L" } },
1978 { INDEX_op_qemu_ld32, { "r", "L", "L" } },
1979 { INDEX_op_qemu_ld64, { "r", "r", "L", "L" } },
1980
1981 { INDEX_op_qemu_st8, { "cb", "L", "L" } },
1982 { INDEX_op_qemu_st16, { "L", "L", "L" } },
1983 { INDEX_op_qemu_st32, { "L", "L", "L" } },
1984 { INDEX_op_qemu_st64, { "L", "L", "L", "L" } },
1985 #endif
1986 { -1 },
1987 };
1988
1989 static int tcg_target_callee_save_regs[] = {
1990 #if TCG_TARGET_REG_BITS == 64
1991 TCG_REG_RBP,
1992 TCG_REG_RBX,
1993 #if defined(_WIN64)
1994 TCG_REG_RDI,
1995 TCG_REG_RSI,
1996 #endif
1997 TCG_REG_R12,
1998 TCG_REG_R13,
1999 TCG_REG_R14, /* Currently used for the global env. */
2000 TCG_REG_R15,
2001 #else
2002 TCG_REG_EBP, /* Currently used for the global env. */
2003 TCG_REG_EBX,
2004 TCG_REG_ESI,
2005 TCG_REG_EDI,
2006 #endif
2007 };
2008
2009 /* Compute frame size via macros, to share between tcg_target_qemu_prologue
2010 and tcg_register_jit. */
2011
2012 #define PUSH_SIZE \
2013 ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \
2014 * (TCG_TARGET_REG_BITS / 8))
2015
2016 #define FRAME_SIZE \
2017 ((PUSH_SIZE \
2018 + TCG_STATIC_CALL_ARGS_SIZE \
2019 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2020 + TCG_TARGET_STACK_ALIGN - 1) \
2021 & ~(TCG_TARGET_STACK_ALIGN - 1))
2022
2023 /* Generate global QEMU prologue and epilogue code */
2024 static void tcg_target_qemu_prologue(TCGContext *s)
2025 {
2026 int i, stack_addend;
2027
2028 /* TB prologue */
2029
2030 /* Reserve some stack space, also for TCG temps. */
2031 stack_addend = FRAME_SIZE - PUSH_SIZE;
2032 tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
2033 CPU_TEMP_BUF_NLONGS * sizeof(long));
2034
2035 /* Save all callee saved registers. */
2036 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
2037 tcg_out_push(s, tcg_target_callee_save_regs[i]);
2038 }
2039
2040 #if TCG_TARGET_REG_BITS == 32
2041 tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
2042 (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
2043 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2044 /* jmp *tb. */
2045 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP,
2046 (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4
2047 + stack_addend);
2048 #else
2049 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2050 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2051 /* jmp *tb. */
2052 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
2053 #endif
2054
2055 /* TB epilogue */
2056 tb_ret_addr = s->code_ptr;
2057
2058 tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend);
2059
2060 for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
2061 tcg_out_pop(s, tcg_target_callee_save_regs[i]);
2062 }
2063 tcg_out_opc(s, OPC_RET, 0, 0, 0);
2064 }
2065
2066 static void tcg_target_init(TCGContext *s)
2067 {
2068 #if !defined(CONFIG_USER_ONLY)
2069 /* fail safe */
2070 if ((1 << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry))
2071 tcg_abort();
2072 #endif
2073
2074 if (TCG_TARGET_REG_BITS == 64) {
2075 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
2076 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
2077 } else {
2078 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
2079 }
2080
2081 tcg_regset_clear(tcg_target_call_clobber_regs);
2082 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
2083 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
2084 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
2085 if (TCG_TARGET_REG_BITS == 64) {
2086 #if !defined(_WIN64)
2087 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
2088 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
2089 #endif
2090 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
2091 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
2092 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
2093 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
2094 }
2095
2096 tcg_regset_clear(s->reserved_regs);
2097 tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
2098
2099 tcg_add_target_add_op_defs(x86_op_defs);
2100 }
2101
2102 typedef struct {
2103 uint32_t len __attribute__((aligned((sizeof(void *)))));
2104 uint32_t id;
2105 uint8_t version;
2106 char augmentation[1];
2107 uint8_t code_align;
2108 uint8_t data_align;
2109 uint8_t return_column;
2110 } DebugFrameCIE;
2111
2112 typedef struct {
2113 uint32_t len __attribute__((aligned((sizeof(void *)))));
2114 uint32_t cie_offset;
2115 tcg_target_long func_start __attribute__((packed));
2116 tcg_target_long func_len __attribute__((packed));
2117 uint8_t def_cfa[4];
2118 uint8_t reg_ofs[14];
2119 } DebugFrameFDE;
2120
2121 typedef struct {
2122 DebugFrameCIE cie;
2123 DebugFrameFDE fde;
2124 } DebugFrame;
2125
2126 #if !defined(__ELF__)
2127 /* Host machine without ELF. */
2128 #elif TCG_TARGET_REG_BITS == 64
2129 #define ELF_HOST_MACHINE EM_X86_64
2130 static DebugFrame debug_frame = {
2131 .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2132 .cie.id = -1,
2133 .cie.version = 1,
2134 .cie.code_align = 1,
2135 .cie.data_align = 0x78, /* sleb128 -8 */
2136 .cie.return_column = 16,
2137
2138 .fde.len = sizeof(DebugFrameFDE)-4, /* length after .len member */
2139 .fde.def_cfa = {
2140 12, 7, /* DW_CFA_def_cfa %rsp, ... */
2141 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2142 (FRAME_SIZE >> 7)
2143 },
2144 .fde.reg_ofs = {
2145 0x90, 1, /* DW_CFA_offset, %rip, -8 */
2146 /* The following ordering must match tcg_target_callee_save_regs. */
2147 0x86, 2, /* DW_CFA_offset, %rbp, -16 */
2148 0x83, 3, /* DW_CFA_offset, %rbx, -24 */
2149 0x8c, 4, /* DW_CFA_offset, %r12, -32 */
2150 0x8d, 5, /* DW_CFA_offset, %r13, -40 */
2151 0x8e, 6, /* DW_CFA_offset, %r14, -48 */
2152 0x8f, 7, /* DW_CFA_offset, %r15, -56 */
2153 }
2154 };
2155 #else
2156 #define ELF_HOST_MACHINE EM_386
2157 static DebugFrame debug_frame = {
2158 .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2159 .cie.id = -1,
2160 .cie.version = 1,
2161 .cie.code_align = 1,
2162 .cie.data_align = 0x7c, /* sleb128 -4 */
2163 .cie.return_column = 8,
2164
2165 .fde.len = sizeof(DebugFrameFDE)-4, /* length after .len member */
2166 .fde.def_cfa = {
2167 12, 4, /* DW_CFA_def_cfa %esp, ... */
2168 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2169 (FRAME_SIZE >> 7)
2170 },
2171 .fde.reg_ofs = {
2172 0x88, 1, /* DW_CFA_offset, %eip, -4 */
2173 /* The following ordering must match tcg_target_callee_save_regs. */
2174 0x85, 2, /* DW_CFA_offset, %ebp, -8 */
2175 0x83, 3, /* DW_CFA_offset, %ebx, -12 */
2176 0x86, 4, /* DW_CFA_offset, %esi, -16 */
2177 0x87, 5, /* DW_CFA_offset, %edi, -20 */
2178 }
2179 };
2180 #endif
2181
2182 #if defined(ELF_HOST_MACHINE)
2183 void tcg_register_jit(void *buf, size_t buf_size)
2184 {
2185 /* We're expecting a 2 byte uleb128 encoded value. */
2186 assert(FRAME_SIZE >> 14 == 0);
2187
2188 debug_frame.fde.func_start = (tcg_target_long) buf;
2189 debug_frame.fde.func_len = buf_size;
2190
2191 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2192 }
2193 #endif