]> git.proxmox.com Git - mirror_qemu.git/blame - tcg/i386/tcg-target.inc.c
tcg/i386: Add support for fence
[mirror_qemu.git] / tcg / i386 / tcg-target.inc.c
CommitLineData
c896fe29
FB
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
d4a9eb1f 24
9ecefc84
RH
25#include "tcg-be-ldst.h"
26
8d8fdbae 27#ifdef CONFIG_DEBUG_TCG
d4a9eb1f 28static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
5d8a4f8f
RH
29#if TCG_TARGET_REG_BITS == 64
30 "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
31 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
32#else
33 "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
34#endif
c896fe29 35};
d4a9eb1f 36#endif
c896fe29 37
d4a9eb1f 38static const int tcg_target_reg_alloc_order[] = {
5d8a4f8f
RH
39#if TCG_TARGET_REG_BITS == 64
40 TCG_REG_RBP,
41 TCG_REG_RBX,
42 TCG_REG_R12,
43 TCG_REG_R13,
44 TCG_REG_R14,
45 TCG_REG_R15,
46 TCG_REG_R10,
47 TCG_REG_R11,
48 TCG_REG_R9,
49 TCG_REG_R8,
50 TCG_REG_RCX,
51 TCG_REG_RDX,
52 TCG_REG_RSI,
53 TCG_REG_RDI,
54 TCG_REG_RAX,
55#else
c896fe29
FB
56 TCG_REG_EBX,
57 TCG_REG_ESI,
58 TCG_REG_EDI,
59 TCG_REG_EBP,
6648e296
RH
60 TCG_REG_ECX,
61 TCG_REG_EDX,
62 TCG_REG_EAX,
5d8a4f8f 63#endif
c896fe29
FB
64};
65
5d8a4f8f
RH
66static const int tcg_target_call_iarg_regs[] = {
67#if TCG_TARGET_REG_BITS == 64
8d918718
SW
68#if defined(_WIN64)
69 TCG_REG_RCX,
70 TCG_REG_RDX,
71#else
5d8a4f8f
RH
72 TCG_REG_RDI,
73 TCG_REG_RSI,
74 TCG_REG_RDX,
75 TCG_REG_RCX,
8d918718 76#endif
5d8a4f8f
RH
77 TCG_REG_R8,
78 TCG_REG_R9,
79#else
d73685e3 80 /* 32 bit mode uses stack based calling convention (GCC default). */
5d8a4f8f
RH
81#endif
82};
83
68af23af 84static const int tcg_target_call_oarg_regs[] = {
5d8a4f8f 85 TCG_REG_EAX,
68af23af 86#if TCG_TARGET_REG_BITS == 32
5d8a4f8f 87 TCG_REG_EDX
68af23af 88#endif
5d8a4f8f 89};
c896fe29 90
a1b29c9a
RH
91/* Constants we accept. */
92#define TCG_CT_CONST_S32 0x100
93#define TCG_CT_CONST_U32 0x200
9d2eec20 94#define TCG_CT_CONST_I32 0x400
a1b29c9a 95
b18212c6
SW
96/* Registers used with L constraint, which are the first argument
97 registers on x86_64, and two random call clobbered registers on
98 i386. */
99#if TCG_TARGET_REG_BITS == 64
100# define TCG_REG_L0 tcg_target_call_iarg_regs[0]
101# define TCG_REG_L1 tcg_target_call_iarg_regs[1]
b18212c6
SW
102#else
103# define TCG_REG_L0 TCG_REG_EAX
104# define TCG_REG_L1 TCG_REG_EDX
105#endif
106
085bb5bb
AJ
107/* The host compiler should supply <cpuid.h> to enable runtime features
108 detection, as we're not going to go so far as our own inline assembly.
109 If not available, default values will be assumed. */
110#if defined(CONFIG_CPUID_H)
111#include <cpuid.h>
112#endif
113
76a347e1 114/* For 32-bit, we are going to attempt to determine at runtime whether cmov
085bb5bb 115 is available. */
76a347e1
RH
116#if TCG_TARGET_REG_BITS == 64
117# define have_cmov 1
774d566c 118#elif defined(CONFIG_CPUID_H) && defined(bit_CMOV)
76a347e1
RH
119static bool have_cmov;
120#else
121# define have_cmov 0
122#endif
123
085bb5bb
AJ
124/* If bit_MOVBE is defined in cpuid.h (added in GCC version 4.6), we are
125 going to attempt to determine at runtime whether movbe is available. */
126#if defined(CONFIG_CPUID_H) && defined(bit_MOVBE)
127static bool have_movbe;
128#else
129# define have_movbe 0
130#endif
131
9d2eec20
RH
132/* We need this symbol in tcg-target.h, and we can't properly conditionalize
133 it there. Therefore we always define the variable. */
134bool have_bmi1;
135
6399ab33
RH
136#if defined(CONFIG_CPUID_H) && defined(bit_BMI2)
137static bool have_bmi2;
138#else
139# define have_bmi2 0
140#endif
141
f6bff89d 142static tcg_insn_unit *tb_ret_addr;
b03cce8e 143
f6bff89d 144static void patch_reloc(tcg_insn_unit *code_ptr, int type,
2ba7fae2 145 intptr_t value, intptr_t addend)
c896fe29 146{
f54b3f92 147 value += addend;
c896fe29 148 switch(type) {
c896fe29 149 case R_386_PC32:
5d8a4f8f
RH
150 value -= (uintptr_t)code_ptr;
151 if (value != (int32_t)value) {
152 tcg_abort();
153 }
5c53bb81 154 tcg_patch32(code_ptr, value);
c896fe29 155 break;
f75b56c1 156 case R_386_PC8:
5d8a4f8f 157 value -= (uintptr_t)code_ptr;
f75b56c1
RH
158 if (value != (int8_t)value) {
159 tcg_abort();
160 }
5c53bb81 161 tcg_patch8(code_ptr, value);
f75b56c1 162 break;
c896fe29
FB
163 default:
164 tcg_abort();
165 }
166}
167
c896fe29 168/* parse target specific constraints */
d4a9eb1f 169static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
c896fe29
FB
170{
171 const char *ct_str;
172
173 ct_str = *pct_str;
174 switch(ct_str[0]) {
175 case 'a':
176 ct->ct |= TCG_CT_REG;
177 tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
178 break;
179 case 'b':
180 ct->ct |= TCG_CT_REG;
181 tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
182 break;
183 case 'c':
6399ab33 184 case_c:
c896fe29
FB
185 ct->ct |= TCG_CT_REG;
186 tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
187 break;
188 case 'd':
189 ct->ct |= TCG_CT_REG;
190 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
191 break;
192 case 'S':
193 ct->ct |= TCG_CT_REG;
194 tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
195 break;
196 case 'D':
197 ct->ct |= TCG_CT_REG;
198 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
199 break;
200 case 'q':
201 ct->ct |= TCG_CT_REG;
5d8a4f8f
RH
202 if (TCG_TARGET_REG_BITS == 64) {
203 tcg_regset_set32(ct->u.regs, 0, 0xffff);
204 } else {
205 tcg_regset_set32(ct->u.regs, 0, 0xf);
206 }
c896fe29 207 break;
a4773324
JK
208 case 'Q':
209 ct->ct |= TCG_CT_REG;
210 tcg_regset_set32(ct->u.regs, 0, 0xf);
211 break;
c896fe29 212 case 'r':
6399ab33 213 case_r:
c896fe29 214 ct->ct |= TCG_CT_REG;
5d8a4f8f
RH
215 if (TCG_TARGET_REG_BITS == 64) {
216 tcg_regset_set32(ct->u.regs, 0, 0xffff);
217 } else {
218 tcg_regset_set32(ct->u.regs, 0, 0xff);
219 }
c896fe29 220 break;
6399ab33
RH
221 case 'C':
222 /* With SHRX et al, we need not use ECX as shift count register. */
223 if (have_bmi2) {
224 goto case_r;
225 } else {
226 goto case_c;
227 }
c896fe29
FB
228
229 /* qemu_ld/st address constraint */
230 case 'L':
231 ct->ct |= TCG_CT_REG;
401c227b 232 if (TCG_TARGET_REG_BITS == 64) {
5d8a4f8f 233 tcg_regset_set32(ct->u.regs, 0, 0xffff);
401c227b 234 } else {
5d8a4f8f 235 tcg_regset_set32(ct->u.regs, 0, 0xff);
401c227b 236 }
17b91491
AJ
237 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
238 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
5d8a4f8f
RH
239 break;
240
241 case 'e':
242 ct->ct |= TCG_CT_CONST_S32;
243 break;
244 case 'Z':
245 ct->ct |= TCG_CT_CONST_U32;
c896fe29 246 break;
9d2eec20
RH
247 case 'I':
248 ct->ct |= TCG_CT_CONST_I32;
249 break;
5d8a4f8f 250
c896fe29
FB
251 default:
252 return -1;
253 }
254 ct_str++;
255 *pct_str = ct_str;
256 return 0;
257}
258
259/* test if a constant matches the constraint */
f6c6afc1 260static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
c896fe29
FB
261 const TCGArgConstraint *arg_ct)
262{
5d8a4f8f
RH
263 int ct = arg_ct->ct;
264 if (ct & TCG_CT_CONST) {
c896fe29 265 return 1;
5d8a4f8f
RH
266 }
267 if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
268 return 1;
269 }
270 if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
271 return 1;
272 }
9d2eec20
RH
273 if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) {
274 return 1;
275 }
5d8a4f8f 276 return 0;
c896fe29
FB
277}
278
5d8a4f8f
RH
279#if TCG_TARGET_REG_BITS == 64
280# define LOWREGMASK(x) ((x) & 7)
281#else
282# define LOWREGMASK(x) (x)
283#endif
284
96b4cf38 285#define P_EXT 0x100 /* 0x0f opcode prefix */
2a113775
AJ
286#define P_EXT38 0x200 /* 0x0f 0x38 opcode prefix */
287#define P_DATA16 0x400 /* 0x66 opcode prefix */
5d8a4f8f 288#if TCG_TARGET_REG_BITS == 64
2a113775
AJ
289# define P_ADDR32 0x800 /* 0x67 opcode prefix */
290# define P_REXW 0x1000 /* Set REX.W = 1 */
291# define P_REXB_R 0x2000 /* REG field as byte register */
292# define P_REXB_RM 0x4000 /* R/M field as byte register */
293# define P_GS 0x8000 /* gs segment override */
5d8a4f8f
RH
294#else
295# define P_ADDR32 0
296# define P_REXW 0
297# define P_REXB_R 0
298# define P_REXB_RM 0
44b37ace 299# define P_GS 0
5d8a4f8f 300#endif
6399ab33
RH
301#define P_SIMDF3 0x10000 /* 0xf3 opcode prefix */
302#define P_SIMDF2 0x20000 /* 0xf2 opcode prefix */
fcb5dac1 303
a369a702
RH
304#define OPC_ARITH_EvIz (0x81)
305#define OPC_ARITH_EvIb (0x83)
81570a70 306#define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
9d2eec20 307#define OPC_ANDN (0xf2 | P_EXT38)
81570a70 308#define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
fcb5dac1 309#define OPC_BSWAP (0xc8 | P_EXT)
aadb21a4 310#define OPC_CALL_Jz (0xe8)
d0a16297 311#define OPC_CMOVCC (0x40 | P_EXT) /* ... plus condition code */
81570a70
RH
312#define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
313#define OPC_DEC_r32 (0x48)
0566d387
RH
314#define OPC_IMUL_GvEv (0xaf | P_EXT)
315#define OPC_IMUL_GvEvIb (0x6b)
316#define OPC_IMUL_GvEvIz (0x69)
81570a70 317#define OPC_INC_r32 (0x40)
da441cff
RH
318#define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */
319#define OPC_JCC_short (0x70) /* ... plus condition code */
320#define OPC_JMP_long (0xe9)
321#define OPC_JMP_short (0xeb)
34a6d0b7 322#define OPC_LEA (0x8d)
af266089
RH
323#define OPC_MOVB_EvGv (0x88) /* stores, more or less */
324#define OPC_MOVL_EvGv (0x89) /* stores, more or less */
325#define OPC_MOVL_GvEv (0x8b) /* loads, more or less */
5c2d2a9e 326#define OPC_MOVB_EvIz (0xc6)
5d8a4f8f 327#define OPC_MOVL_EvIz (0xc7)
ef10b106 328#define OPC_MOVL_Iv (0xb8)
085bb5bb
AJ
329#define OPC_MOVBE_GyMy (0xf0 | P_EXT38)
330#define OPC_MOVBE_MyGy (0xf1 | P_EXT38)
6817c355
RH
331#define OPC_MOVSBL (0xbe | P_EXT)
332#define OPC_MOVSWL (0xbf | P_EXT)
5d8a4f8f 333#define OPC_MOVSLQ (0x63 | P_REXW)
55e082a7
RH
334#define OPC_MOVZBL (0xb6 | P_EXT)
335#define OPC_MOVZWL (0xb7 | P_EXT)
6858614e
RH
336#define OPC_POP_r32 (0x58)
337#define OPC_PUSH_r32 (0x50)
338#define OPC_PUSH_Iv (0x68)
339#define OPC_PUSH_Ib (0x6a)
3c3accc6 340#define OPC_RET (0xc3)
5d8a4f8f 341#define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
f53dba01
RH
342#define OPC_SHIFT_1 (0xd1)
343#define OPC_SHIFT_Ib (0xc1)
344#define OPC_SHIFT_cl (0xd3)
6399ab33
RH
345#define OPC_SARX (0xf7 | P_EXT38 | P_SIMDF3)
346#define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16)
347#define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2)
81570a70 348#define OPC_TESTL (0x85)
b3e66df7 349#define OPC_XCHG_ax_r32 (0x90)
fcb5dac1 350
9363dedb
RH
351#define OPC_GRP3_Ev (0xf7)
352#define OPC_GRP5 (0xff)
353
354/* Group 1 opcode extensions for 0x80-0x83.
355 These are also used as modifiers for OPC_ARITH. */
c896fe29
FB
356#define ARITH_ADD 0
357#define ARITH_OR 1
358#define ARITH_ADC 2
359#define ARITH_SBB 3
360#define ARITH_AND 4
361#define ARITH_SUB 5
362#define ARITH_XOR 6
363#define ARITH_CMP 7
364
da441cff 365/* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */
9619376c
AJ
366#define SHIFT_ROL 0
367#define SHIFT_ROR 1
c896fe29
FB
368#define SHIFT_SHL 4
369#define SHIFT_SHR 5
370#define SHIFT_SAR 7
371
9363dedb
RH
372/* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */
373#define EXT3_NOT 2
374#define EXT3_NEG 3
375#define EXT3_MUL 4
376#define EXT3_IMUL 5
377#define EXT3_DIV 6
378#define EXT3_IDIV 7
379
380/* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */
5d8a4f8f
RH
381#define EXT5_INC_Ev 0
382#define EXT5_DEC_Ev 1
9363dedb
RH
383#define EXT5_CALLN_Ev 2
384#define EXT5_JMPN_Ev 4
da441cff
RH
385
386/* Condition codes to be added to OPC_JCC_{long,short}. */
c896fe29
FB
387#define JCC_JMP (-1)
388#define JCC_JO 0x0
389#define JCC_JNO 0x1
390#define JCC_JB 0x2
391#define JCC_JAE 0x3
392#define JCC_JE 0x4
393#define JCC_JNE 0x5
394#define JCC_JBE 0x6
395#define JCC_JA 0x7
396#define JCC_JS 0x8
397#define JCC_JNS 0x9
398#define JCC_JP 0xa
399#define JCC_JNP 0xb
400#define JCC_JL 0xc
401#define JCC_JGE 0xd
402#define JCC_JLE 0xe
403#define JCC_JG 0xf
404
0aed257f 405static const uint8_t tcg_cond_to_jcc[] = {
c896fe29
FB
406 [TCG_COND_EQ] = JCC_JE,
407 [TCG_COND_NE] = JCC_JNE,
408 [TCG_COND_LT] = JCC_JL,
409 [TCG_COND_GE] = JCC_JGE,
410 [TCG_COND_LE] = JCC_JLE,
411 [TCG_COND_GT] = JCC_JG,
412 [TCG_COND_LTU] = JCC_JB,
413 [TCG_COND_GEU] = JCC_JAE,
414 [TCG_COND_LEU] = JCC_JBE,
415 [TCG_COND_GTU] = JCC_JA,
416};
417
5d8a4f8f
RH
418#if TCG_TARGET_REG_BITS == 64
419static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
420{
421 int rex;
422
44b37ace
RH
423 if (opc & P_GS) {
424 tcg_out8(s, 0x65);
425 }
5d8a4f8f
RH
426 if (opc & P_DATA16) {
427 /* We should never be asking for both 16 and 64-bit operation. */
eabb7b91 428 tcg_debug_assert((opc & P_REXW) == 0);
5d8a4f8f
RH
429 tcg_out8(s, 0x66);
430 }
431 if (opc & P_ADDR32) {
432 tcg_out8(s, 0x67);
433 }
434
435 rex = 0;
c9d78213 436 rex |= (opc & P_REXW) ? 0x8 : 0x0; /* REX.W */
ecc7e843
RH
437 rex |= (r & 8) >> 1; /* REX.R */
438 rex |= (x & 8) >> 2; /* REX.X */
439 rex |= (rm & 8) >> 3; /* REX.B */
5d8a4f8f
RH
440
441 /* P_REXB_{R,RM} indicates that the given register is the low byte.
442 For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
443 as otherwise the encoding indicates %[abcd]h. Note that the values
444 that are ORed in merely indicate that the REX byte must be present;
445 those bits get discarded in output. */
446 rex |= opc & (r >= 4 ? P_REXB_R : 0);
447 rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
448
449 if (rex) {
450 tcg_out8(s, (uint8_t)(rex | 0x40));
451 }
452
2a113775 453 if (opc & (P_EXT | P_EXT38)) {
5d8a4f8f 454 tcg_out8(s, 0x0f);
2a113775
AJ
455 if (opc & P_EXT38) {
456 tcg_out8(s, 0x38);
457 }
5d8a4f8f 458 }
2a113775 459
5d8a4f8f
RH
460 tcg_out8(s, opc);
461}
462#else
463static void tcg_out_opc(TCGContext *s, int opc)
c896fe29 464{
96b4cf38
RH
465 if (opc & P_DATA16) {
466 tcg_out8(s, 0x66);
467 }
2a113775 468 if (opc & (P_EXT | P_EXT38)) {
c896fe29 469 tcg_out8(s, 0x0f);
2a113775
AJ
470 if (opc & P_EXT38) {
471 tcg_out8(s, 0x38);
472 }
96b4cf38 473 }
c896fe29
FB
474 tcg_out8(s, opc);
475}
5d8a4f8f
RH
476/* Discard the register arguments to tcg_out_opc early, so as not to penalize
477 the 32-bit compilation paths. This method works with all versions of gcc,
478 whereas relying on optimization may not be able to exclude them. */
479#define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc)
480#endif
c896fe29 481
5d8a4f8f 482static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
c896fe29 483{
5d8a4f8f
RH
484 tcg_out_opc(s, opc, r, rm, 0);
485 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
c896fe29
FB
486}
487
ecc7e843
RH
488static void tcg_out_vex_modrm(TCGContext *s, int opc, int r, int v, int rm)
489{
490 int tmp;
491
492 if ((opc & (P_REXW | P_EXT | P_EXT38)) || (rm & 8)) {
493 /* Three byte VEX prefix. */
494 tcg_out8(s, 0xc4);
495
496 /* VEX.m-mmmm */
497 if (opc & P_EXT38) {
498 tmp = 2;
499 } else if (opc & P_EXT) {
500 tmp = 1;
501 } else {
502 tcg_abort();
503 }
504 tmp |= 0x40; /* VEX.X */
505 tmp |= (r & 8 ? 0 : 0x80); /* VEX.R */
506 tmp |= (rm & 8 ? 0 : 0x20); /* VEX.B */
507 tcg_out8(s, tmp);
508
509 tmp = (opc & P_REXW ? 0x80 : 0); /* VEX.W */
510 } else {
511 /* Two byte VEX prefix. */
512 tcg_out8(s, 0xc5);
513
514 tmp = (r & 8 ? 0 : 0x80); /* VEX.R */
515 }
6399ab33
RH
516 /* VEX.pp */
517 if (opc & P_DATA16) {
518 tmp |= 1; /* 0x66 */
519 } else if (opc & P_SIMDF3) {
520 tmp |= 2; /* 0xf3 */
521 } else if (opc & P_SIMDF2) {
522 tmp |= 3; /* 0xf2 */
523 }
ecc7e843
RH
524 tmp |= (~v & 15) << 3; /* VEX.vvvv */
525 tcg_out8(s, tmp);
526 tcg_out8(s, opc);
527 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
528}
529
34a6d0b7 530/* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
5d8a4f8f
RH
531 We handle either RM and INDEX missing with a negative value. In 64-bit
532 mode for absolute addresses, ~RM is the size of the immediate operand
533 that will follow the instruction. */
34a6d0b7
RH
534
535static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
357e3d8a 536 int index, int shift, intptr_t offset)
c896fe29 537{
34a6d0b7
RH
538 int mod, len;
539
5d8a4f8f
RH
540 if (index < 0 && rm < 0) {
541 if (TCG_TARGET_REG_BITS == 64) {
542 /* Try for a rip-relative addressing mode. This has replaced
543 the 32-bit-mode absolute addressing encoding. */
357e3d8a
RH
544 intptr_t pc = (intptr_t)s->code_ptr + 5 + ~rm;
545 intptr_t disp = offset - pc;
5d8a4f8f
RH
546 if (disp == (int32_t)disp) {
547 tcg_out_opc(s, opc, r, 0, 0);
548 tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
549 tcg_out32(s, disp);
550 return;
551 }
34a6d0b7 552
5d8a4f8f
RH
553 /* Try for an absolute address encoding. This requires the
554 use of the MODRM+SIB encoding and is therefore larger than
555 rip-relative addressing. */
556 if (offset == (int32_t)offset) {
557 tcg_out_opc(s, opc, r, 0, 0);
558 tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
559 tcg_out8(s, (4 << 3) | 5);
560 tcg_out32(s, offset);
561 return;
562 }
563
564 /* ??? The memory isn't directly addressable. */
565 tcg_abort();
566 } else {
567 /* Absolute address. */
568 tcg_out_opc(s, opc, r, 0, 0);
569 tcg_out8(s, (r << 3) | 5);
570 tcg_out32(s, offset);
571 return;
572 }
573 }
34a6d0b7
RH
574
575 /* Find the length of the immediate addend. Note that the encoding
576 that would be used for (%ebp) indicates absolute addressing. */
5d8a4f8f 577 if (rm < 0) {
34a6d0b7 578 mod = 0, len = 4, rm = 5;
5d8a4f8f 579 } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
34a6d0b7
RH
580 mod = 0, len = 0;
581 } else if (offset == (int8_t)offset) {
582 mod = 0x40, len = 1;
c896fe29 583 } else {
34a6d0b7
RH
584 mod = 0x80, len = 4;
585 }
586
587 /* Use a single byte MODRM format if possible. Note that the encoding
588 that would be used for %esp is the escape to the two byte form. */
5d8a4f8f 589 if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
34a6d0b7 590 /* Single byte MODRM format. */
5d8a4f8f
RH
591 tcg_out_opc(s, opc, r, rm, 0);
592 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
34a6d0b7
RH
593 } else {
594 /* Two byte MODRM+SIB format. */
595
596 /* Note that the encoding that would place %esp into the index
5d8a4f8f
RH
597 field indicates no index register. In 64-bit mode, the REX.X
598 bit counts, so %r12 can be used as the index. */
599 if (index < 0) {
34a6d0b7 600 index = 4;
c896fe29 601 } else {
eabb7b91 602 tcg_debug_assert(index != TCG_REG_ESP);
c896fe29 603 }
34a6d0b7 604
5d8a4f8f
RH
605 tcg_out_opc(s, opc, r, rm, index);
606 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
607 tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
34a6d0b7
RH
608 }
609
610 if (len == 1) {
611 tcg_out8(s, offset);
612 } else if (len == 4) {
c896fe29
FB
613 tcg_out32(s, offset);
614 }
615}
616
5d8a4f8f
RH
617/* A simplification of the above with no index or shift. */
618static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
357e3d8a 619 int rm, intptr_t offset)
34a6d0b7
RH
620{
621 tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
622}
623
81570a70
RH
624/* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */
625static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
626{
5d8a4f8f
RH
627 /* Propagate an opcode prefix, such as P_REXW. */
628 int ext = subop & ~0x7;
629 subop &= 0x7;
630
631 tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
81570a70
RH
632}
633
2a534aff
RH
634static inline void tcg_out_mov(TCGContext *s, TCGType type,
635 TCGReg ret, TCGReg arg)
c896fe29 636{
af266089 637 if (arg != ret) {
5d8a4f8f
RH
638 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
639 tcg_out_modrm(s, opc, ret, arg);
af266089 640 }
c896fe29
FB
641}
642
5d8a4f8f 643static void tcg_out_movi(TCGContext *s, TCGType type,
2a534aff 644 TCGReg ret, tcg_target_long arg)
c896fe29 645{
8023ccda
RH
646 tcg_target_long diff;
647
c896fe29 648 if (arg == 0) {
81570a70 649 tgen_arithr(s, ARITH_XOR, ret, ret);
5d8a4f8f 650 return;
8023ccda
RH
651 }
652 if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
5d8a4f8f
RH
653 tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
654 tcg_out32(s, arg);
8023ccda
RH
655 return;
656 }
657 if (arg == (int32_t)arg) {
5d8a4f8f
RH
658 tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
659 tcg_out32(s, arg);
8023ccda 660 return;
c896fe29 661 }
8023ccda
RH
662
663 /* Try a 7 byte pc-relative lea before the 10 byte movq. */
357e3d8a 664 diff = arg - ((uintptr_t)s->code_ptr + 7);
8023ccda
RH
665 if (diff == (int32_t)diff) {
666 tcg_out_opc(s, OPC_LEA | P_REXW, ret, 0, 0);
667 tcg_out8(s, (LOWREGMASK(ret) << 3) | 5);
668 tcg_out32(s, diff);
669 return;
670 }
671
672 tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
673 tcg_out64(s, arg);
c896fe29
FB
674}
675
6858614e
RH
676static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
677{
678 if (val == (int8_t)val) {
5d8a4f8f 679 tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
6858614e 680 tcg_out8(s, val);
5d8a4f8f
RH
681 } else if (val == (int32_t)val) {
682 tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
6858614e 683 tcg_out32(s, val);
5d8a4f8f
RH
684 } else {
685 tcg_abort();
6858614e
RH
686 }
687}
688
a7d00d4e
PK
689static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
690{
691 /* Given the strength of x86 memory ordering, we only need care for
692 store-load ordering. Experimentally, "lock orl $0,0(%esp)" is
693 faster than "mfence", so don't bother with the sse insn. */
694 if (a0 & TCG_MO_ST_LD) {
695 tcg_out8(s, 0xf0);
696 tcg_out_modrm_offset(s, OPC_ARITH_EvIb, ARITH_OR, TCG_REG_ESP, 0);
697 tcg_out8(s, 0);
698 }
699}
700
6858614e
RH
701static inline void tcg_out_push(TCGContext *s, int reg)
702{
5d8a4f8f 703 tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
6858614e
RH
704}
705
706static inline void tcg_out_pop(TCGContext *s, int reg)
707{
5d8a4f8f 708 tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
6858614e
RH
709}
710
2a534aff 711static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
a05b5b9b 712 TCGReg arg1, intptr_t arg2)
c896fe29 713{
5d8a4f8f
RH
714 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
715 tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
c896fe29
FB
716}
717
2a534aff 718static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
a05b5b9b 719 TCGReg arg1, intptr_t arg2)
c896fe29 720{
5d8a4f8f
RH
721 int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
722 tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
c896fe29
FB
723}
724
59d7c14e
RH
725static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
726 TCGReg base, intptr_t ofs)
c6f29ff0 727{
59d7c14e
RH
728 int rexw = 0;
729 if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64) {
730 if (val != (int32_t)val) {
731 return false;
732 }
733 rexw = P_REXW;
734 }
735 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | rexw, 0, base, ofs);
c6f29ff0 736 tcg_out32(s, val);
59d7c14e 737 return true;
c6f29ff0
RH
738}
739
f53dba01
RH
740static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
741{
96b4cf38
RH
742 /* Propagate an opcode prefix, such as P_DATA16. */
743 int ext = subopc & ~0x7;
744 subopc &= 0x7;
745
f53dba01 746 if (count == 1) {
5d8a4f8f 747 tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
f53dba01 748 } else {
5d8a4f8f 749 tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
f53dba01
RH
750 tcg_out8(s, count);
751 }
752}
753
fcb5dac1
RH
754static inline void tcg_out_bswap32(TCGContext *s, int reg)
755{
5d8a4f8f 756 tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
fcb5dac1
RH
757}
758
759static inline void tcg_out_rolw_8(TCGContext *s, int reg)
760{
5d8a4f8f 761 tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
fcb5dac1
RH
762}
763
55e082a7
RH
764static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
765{
766 /* movzbl */
eabb7b91 767 tcg_debug_assert(src < 4 || TCG_TARGET_REG_BITS == 64);
5d8a4f8f 768 tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
55e082a7
RH
769}
770
5d8a4f8f 771static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
6817c355
RH
772{
773 /* movsbl */
eabb7b91 774 tcg_debug_assert(src < 4 || TCG_TARGET_REG_BITS == 64);
5d8a4f8f 775 tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
6817c355
RH
776}
777
55e082a7
RH
778static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
779{
780 /* movzwl */
781 tcg_out_modrm(s, OPC_MOVZWL, dest, src);
782}
783
5d8a4f8f 784static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
6817c355 785{
5d8a4f8f
RH
786 /* movsw[lq] */
787 tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
6817c355
RH
788}
789
5d8a4f8f 790static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
c896fe29 791{
5d8a4f8f
RH
792 /* 32-bit mov zero extends. */
793 tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
794}
795
796static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
797{
798 tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
799}
800
801static inline void tcg_out_bswap64(TCGContext *s, int reg)
802{
803 tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
804}
805
806static void tgen_arithi(TCGContext *s, int c, int r0,
807 tcg_target_long val, int cf)
808{
809 int rexw = 0;
810
811 if (TCG_TARGET_REG_BITS == 64) {
812 rexw = c & -8;
813 c &= 7;
814 }
815
81570a70
RH
816 /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
817 partial flags update stalls on Pentium4 and are not recommended
818 by current Intel optimization manuals. */
819 if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
447d681e 820 int is_inc = (c == ARITH_ADD) ^ (val < 0);
5d8a4f8f
RH
821 if (TCG_TARGET_REG_BITS == 64) {
822 /* The single-byte increment encodings are re-tasked as the
823 REX prefixes. Use the MODRM encoding. */
824 tcg_out_modrm(s, OPC_GRP5 + rexw,
825 (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
826 } else {
827 tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
828 }
829 return;
830 }
831
832 if (c == ARITH_AND) {
833 if (TCG_TARGET_REG_BITS == 64) {
834 if (val == 0xffffffffu) {
835 tcg_out_ext32u(s, r0, r0);
836 return;
837 }
838 if (val == (uint32_t)val) {
839 /* AND with no high bits set can use a 32-bit operation. */
840 rexw = 0;
841 }
842 }
dc397ca3 843 if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
5d8a4f8f
RH
844 tcg_out_ext8u(s, r0, r0);
845 return;
846 }
847 if (val == 0xffffu) {
848 tcg_out_ext16u(s, r0, r0);
849 return;
850 }
851 }
852
853 if (val == (int8_t)val) {
854 tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
c896fe29 855 tcg_out8(s, val);
5d8a4f8f
RH
856 return;
857 }
858 if (rexw == 0 || val == (int32_t)val) {
859 tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
c896fe29 860 tcg_out32(s, val);
5d8a4f8f 861 return;
c896fe29 862 }
5d8a4f8f
RH
863
864 tcg_abort();
c896fe29
FB
865}
866
3e9a474e 867static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
c896fe29 868{
5d8a4f8f
RH
869 if (val != 0) {
870 tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
871 }
c896fe29
FB
872}
873
f75b56c1 874/* Use SMALL != 0 to force a short forward branch. */
bec16311 875static void tcg_out_jxx(TCGContext *s, int opc, TCGLabel *l, int small)
c896fe29
FB
876{
877 int32_t val, val1;
78686523 878
c896fe29 879 if (l->has_value) {
f6bff89d 880 val = tcg_pcrel_diff(s, l->u.value_ptr);
c896fe29
FB
881 val1 = val - 2;
882 if ((int8_t)val1 == val1) {
f75b56c1 883 if (opc == -1) {
da441cff 884 tcg_out8(s, OPC_JMP_short);
f75b56c1 885 } else {
da441cff 886 tcg_out8(s, OPC_JCC_short + opc);
f75b56c1 887 }
c896fe29
FB
888 tcg_out8(s, val1);
889 } else {
f75b56c1
RH
890 if (small) {
891 tcg_abort();
892 }
c896fe29 893 if (opc == -1) {
da441cff 894 tcg_out8(s, OPC_JMP_long);
c896fe29
FB
895 tcg_out32(s, val - 5);
896 } else {
5d8a4f8f 897 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
c896fe29
FB
898 tcg_out32(s, val - 6);
899 }
900 }
f75b56c1
RH
901 } else if (small) {
902 if (opc == -1) {
da441cff 903 tcg_out8(s, OPC_JMP_short);
f75b56c1 904 } else {
da441cff 905 tcg_out8(s, OPC_JCC_short + opc);
f75b56c1 906 }
bec16311 907 tcg_out_reloc(s, s->code_ptr, R_386_PC8, l, -1);
f75b56c1 908 s->code_ptr += 1;
c896fe29
FB
909 } else {
910 if (opc == -1) {
da441cff 911 tcg_out8(s, OPC_JMP_long);
c896fe29 912 } else {
5d8a4f8f 913 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
c896fe29 914 }
bec16311 915 tcg_out_reloc(s, s->code_ptr, R_386_PC32, l, -4);
623e265c 916 s->code_ptr += 4;
c896fe29
FB
917 }
918}
919
1d2699ae 920static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
5d8a4f8f 921 int const_arg2, int rexw)
c896fe29 922{
c896fe29
FB
923 if (const_arg2) {
924 if (arg2 == 0) {
c896fe29 925 /* test r, r */
5d8a4f8f 926 tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
c896fe29 927 } else {
5d8a4f8f 928 tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
c896fe29
FB
929 }
930 } else {
5d8a4f8f 931 tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
c896fe29 932 }
1d2699ae
RH
933}
934
5d8a4f8f
RH
935static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
936 TCGArg arg1, TCGArg arg2, int const_arg2,
bec16311 937 TCGLabel *label, int small)
1d2699ae 938{
5d8a4f8f 939 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
bec16311 940 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small);
c896fe29
FB
941}
942
5d8a4f8f
RH
943#if TCG_TARGET_REG_BITS == 64
944static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
945 TCGArg arg1, TCGArg arg2, int const_arg2,
bec16311 946 TCGLabel *label, int small)
5d8a4f8f
RH
947{
948 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
bec16311 949 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small);
5d8a4f8f
RH
950}
951#else
c896fe29
FB
952/* XXX: we implement it at the target level to avoid having to
953 handle cross basic blocks temporaries */
f75b56c1
RH
954static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
955 const int *const_args, int small)
c896fe29 956{
bec16311
RH
957 TCGLabel *label_next = gen_new_label();
958 TCGLabel *label_this = arg_label(args[5]);
42a268c2 959
c896fe29
FB
960 switch(args[4]) {
961 case TCG_COND_EQ:
5d8a4f8f
RH
962 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
963 label_next, 1);
964 tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
bec16311 965 label_this, small);
c896fe29
FB
966 break;
967 case TCG_COND_NE:
5d8a4f8f 968 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
bec16311 969 label_this, small);
5d8a4f8f 970 tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
bec16311 971 label_this, small);
c896fe29
FB
972 break;
973 case TCG_COND_LT:
5d8a4f8f 974 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
bec16311 975 label_this, small);
f75b56c1 976 tcg_out_jxx(s, JCC_JNE, label_next, 1);
5d8a4f8f 977 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
bec16311 978 label_this, small);
c896fe29
FB
979 break;
980 case TCG_COND_LE:
5d8a4f8f 981 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
bec16311 982 label_this, small);
f75b56c1 983 tcg_out_jxx(s, JCC_JNE, label_next, 1);
5d8a4f8f 984 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
bec16311 985 label_this, small);
c896fe29
FB
986 break;
987 case TCG_COND_GT:
5d8a4f8f 988 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
bec16311 989 label_this, small);
f75b56c1 990 tcg_out_jxx(s, JCC_JNE, label_next, 1);
5d8a4f8f 991 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
bec16311 992 label_this, small);
c896fe29
FB
993 break;
994 case TCG_COND_GE:
5d8a4f8f 995 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
bec16311 996 label_this, small);
f75b56c1 997 tcg_out_jxx(s, JCC_JNE, label_next, 1);
5d8a4f8f 998 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
bec16311 999 label_this, small);
c896fe29
FB
1000 break;
1001 case TCG_COND_LTU:
5d8a4f8f 1002 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
bec16311 1003 label_this, small);
f75b56c1 1004 tcg_out_jxx(s, JCC_JNE, label_next, 1);
5d8a4f8f 1005 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
bec16311 1006 label_this, small);
c896fe29
FB
1007 break;
1008 case TCG_COND_LEU:
5d8a4f8f 1009 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
bec16311 1010 label_this, small);
f75b56c1 1011 tcg_out_jxx(s, JCC_JNE, label_next, 1);
5d8a4f8f 1012 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
bec16311 1013 label_this, small);
c896fe29
FB
1014 break;
1015 case TCG_COND_GTU:
5d8a4f8f 1016 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
bec16311 1017 label_this, small);
f75b56c1 1018 tcg_out_jxx(s, JCC_JNE, label_next, 1);
5d8a4f8f 1019 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
bec16311 1020 label_this, small);
c896fe29
FB
1021 break;
1022 case TCG_COND_GEU:
5d8a4f8f 1023 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
bec16311 1024 label_this, small);
f75b56c1 1025 tcg_out_jxx(s, JCC_JNE, label_next, 1);
5d8a4f8f 1026 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
bec16311 1027 label_this, small);
c896fe29
FB
1028 break;
1029 default:
1030 tcg_abort();
1031 }
9d6fca70 1032 tcg_out_label(s, label_next, s->code_ptr);
c896fe29 1033}
5d8a4f8f 1034#endif
c896fe29 1035
5d8a4f8f
RH
1036static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
1037 TCGArg arg1, TCGArg arg2, int const_arg2)
1d2699ae 1038{
5d8a4f8f 1039 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
32a8ffb9 1040 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
a369a702 1041 tcg_out_ext8u(s, dest, dest);
1d2699ae
RH
1042}
1043
5d8a4f8f
RH
1044#if TCG_TARGET_REG_BITS == 64
1045static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
1046 TCGArg arg1, TCGArg arg2, int const_arg2)
1047{
1048 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
1049 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
1050 tcg_out_ext8u(s, dest, dest);
1051}
1052#else
1d2699ae
RH
1053static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
1054 const int *const_args)
1055{
1056 TCGArg new_args[6];
bec16311 1057 TCGLabel *label_true, *label_over;
1d2699ae
RH
1058
1059 memcpy(new_args, args+1, 5*sizeof(TCGArg));
1060
1061 if (args[0] == args[1] || args[0] == args[2]
1062 || (!const_args[3] && args[0] == args[3])
1063 || (!const_args[4] && args[0] == args[4])) {
1064 /* When the destination overlaps with one of the argument
1065 registers, don't do anything tricky. */
bec16311
RH
1066 label_true = gen_new_label();
1067 label_over = gen_new_label();
1d2699ae 1068
bec16311 1069 new_args[5] = label_arg(label_true);
1d2699ae
RH
1070 tcg_out_brcond2(s, new_args, const_args+1, 1);
1071
1072 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
1073 tcg_out_jxx(s, JCC_JMP, label_over, 1);
9d6fca70 1074 tcg_out_label(s, label_true, s->code_ptr);
1d2699ae
RH
1075
1076 tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
9d6fca70 1077 tcg_out_label(s, label_over, s->code_ptr);
1d2699ae
RH
1078 } else {
1079 /* When the destination does not overlap one of the arguments,
1080 clear the destination first, jump if cond false, and emit an
1081 increment in the true case. This results in smaller code. */
1082
1083 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
1084
bec16311 1085 label_over = gen_new_label();
1d2699ae 1086 new_args[4] = tcg_invert_cond(new_args[4]);
bec16311 1087 new_args[5] = label_arg(label_over);
1d2699ae
RH
1088 tcg_out_brcond2(s, new_args, const_args+1, 1);
1089
1090 tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
9d6fca70 1091 tcg_out_label(s, label_over, s->code_ptr);
1d2699ae
RH
1092 }
1093}
5d8a4f8f
RH
1094#endif
1095
d0a16297
RH
1096static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest,
1097 TCGArg c1, TCGArg c2, int const_c2,
1098 TCGArg v1)
1099{
1100 tcg_out_cmp(s, c1, c2, const_c2, 0);
76a347e1
RH
1101 if (have_cmov) {
1102 tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1);
1103 } else {
bec16311 1104 TCGLabel *over = gen_new_label();
76a347e1
RH
1105 tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1);
1106 tcg_out_mov(s, TCG_TYPE_I32, dest, v1);
1107 tcg_out_label(s, over, s->code_ptr);
1108 }
d0a16297
RH
1109}
1110
1111#if TCG_TARGET_REG_BITS == 64
1112static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGArg dest,
1113 TCGArg c1, TCGArg c2, int const_c2,
1114 TCGArg v1)
1115{
1116 tcg_out_cmp(s, c1, c2, const_c2, P_REXW);
1117 tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | P_REXW, dest, v1);
1118}
1119#endif
1120
f6bff89d 1121static void tcg_out_branch(TCGContext *s, int call, tcg_insn_unit *dest)
5d8a4f8f 1122{
f6bff89d 1123 intptr_t disp = tcg_pcrel_diff(s, dest) - 5;
5d8a4f8f
RH
1124
1125 if (disp == (int32_t)disp) {
1126 tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
1127 tcg_out32(s, disp);
1128 } else {
f6bff89d 1129 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, (uintptr_t)dest);
5d8a4f8f
RH
1130 tcg_out_modrm(s, OPC_GRP5,
1131 call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10);
1132 }
1133}
1134
6bf3e997 1135static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *dest)
5d8a4f8f
RH
1136{
1137 tcg_out_branch(s, 1, dest);
1138}
1d2699ae 1139
f6bff89d 1140static void tcg_out_jmp(TCGContext *s, tcg_insn_unit *dest)
aadb21a4 1141{
5d8a4f8f 1142 tcg_out_branch(s, 0, dest);
aadb21a4
RH
1143}
1144
0d07abf0
SF
1145static void tcg_out_nopn(TCGContext *s, int n)
1146{
1147 int i;
1148 /* Emit 1 or 2 operand size prefixes for the standard one byte nop,
1149 * "xchg %eax,%eax", forming "xchg %ax,%ax". All cores accept the
1150 * duplicate prefix, and all of the interesting recent cores can
1151 * decode and discard the duplicates in a single cycle.
1152 */
1153 tcg_debug_assert(n >= 1);
1154 for (i = 1; i < n; ++i) {
1155 tcg_out8(s, 0x66);
1156 }
1157 tcg_out8(s, 0x90);
1158}
1159
c896fe29 1160#if defined(CONFIG_SOFTMMU)
401c227b
RH
1161/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1162 * int mmu_idx, uintptr_t ra)
1163 */
f6bff89d 1164static void * const qemu_ld_helpers[16] = {
8221a267
RH
1165 [MO_UB] = helper_ret_ldub_mmu,
1166 [MO_LEUW] = helper_le_lduw_mmu,
1167 [MO_LEUL] = helper_le_ldul_mmu,
1168 [MO_LEQ] = helper_le_ldq_mmu,
1169 [MO_BEUW] = helper_be_lduw_mmu,
1170 [MO_BEUL] = helper_be_ldul_mmu,
1171 [MO_BEQ] = helper_be_ldq_mmu,
e141ab52
BS
1172};
1173
401c227b
RH
1174/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1175 * uintxx_t val, int mmu_idx, uintptr_t ra)
1176 */
f6bff89d 1177static void * const qemu_st_helpers[16] = {
8221a267
RH
1178 [MO_UB] = helper_ret_stb_mmu,
1179 [MO_LEUW] = helper_le_stw_mmu,
1180 [MO_LEUL] = helper_le_stl_mmu,
1181 [MO_LEQ] = helper_le_stq_mmu,
1182 [MO_BEUW] = helper_be_stw_mmu,
1183 [MO_BEUL] = helper_be_stl_mmu,
1184 [MO_BEQ] = helper_be_stq_mmu,
e141ab52 1185};
8516a044
RH
1186
1187/* Perform the TLB load and compare.
1188
1189 Inputs:
7352ee54 1190 ADDRLO and ADDRHI contain the low and high part of the address.
8516a044
RH
1191
1192 MEM_INDEX and S_BITS are the memory context and log2 size of the load.
1193
1194 WHICH is the offset into the CPUTLBEntry structure of the slot to read.
1195 This should be offsetof addr_read or addr_write.
1196
1197 Outputs:
1198 LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
1199 positions of the displacements of forward jumps to the TLB miss case.
1200
166792f7 1201 Second argument register is loaded with the low part of the address.
5d8a4f8f
RH
1202 In the TLB hit case, it has been adjusted as indicated by the TLB
1203 and so is a host address. In the TLB miss case, it continues to
1204 hold a guest address.
8516a044 1205
166792f7 1206 First argument register is clobbered. */
8516a044 1207
7352ee54 1208static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
8cc580f6 1209 int mem_index, TCGMemOp opc,
f6bff89d 1210 tcg_insn_unit **label_ptr, int which)
8516a044 1211{
7352ee54
RH
1212 const TCGReg r0 = TCG_REG_L0;
1213 const TCGReg r1 = TCG_REG_L1;
d5dad3be 1214 TCGType ttype = TCG_TYPE_I32;
08b0b23b
AJ
1215 TCGType tlbtype = TCG_TYPE_I32;
1216 int trexw = 0, hrexw = 0, tlbrexw = 0;
85aa8081
RH
1217 unsigned a_bits = get_alignment_bits(opc);
1218 unsigned s_bits = opc & MO_SIZE;
1219 unsigned a_mask = (1 << a_bits) - 1;
1220 unsigned s_mask = (1 << s_bits) - 1;
1f00b27f 1221 target_ulong tlb_mask;
5d8a4f8f 1222
d5dad3be
RH
1223 if (TCG_TARGET_REG_BITS == 64) {
1224 if (TARGET_LONG_BITS == 64) {
1225 ttype = TCG_TYPE_I64;
1226 trexw = P_REXW;
1227 }
1228 if (TCG_TYPE_PTR == TCG_TYPE_I64) {
d5dad3be 1229 hrexw = P_REXW;
08b0b23b
AJ
1230 if (TARGET_PAGE_BITS + CPU_TLB_BITS > 32) {
1231 tlbtype = TCG_TYPE_I64;
1232 tlbrexw = P_REXW;
1233 }
d5dad3be 1234 }
5d8a4f8f 1235 }
8516a044 1236
08b0b23b 1237 tcg_out_mov(s, tlbtype, r0, addrlo);
85aa8081
RH
1238 /* If the required alignment is at least as large as the access, simply
1239 copy the address and mask. For lesser alignments, check that we don't
1240 cross pages for the complete access. */
1241 if (a_bits >= s_bits) {
8cc580f6
AJ
1242 tcg_out_mov(s, ttype, r1, addrlo);
1243 } else {
85aa8081 1244 tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo, s_mask - a_mask);
8cc580f6 1245 }
85aa8081 1246 tlb_mask = TARGET_PAGE_MASK | a_mask;
8516a044 1247
08b0b23b 1248 tcg_out_shifti(s, SHIFT_SHR + tlbrexw, r0,
5d8a4f8f 1249 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
8516a044 1250
1f00b27f 1251 tgen_arithi(s, ARITH_AND + trexw, r1, tlb_mask, 0);
08b0b23b 1252 tgen_arithi(s, ARITH_AND + tlbrexw, r0,
5d8a4f8f 1253 (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
8516a044 1254
d5dad3be 1255 tcg_out_modrm_sib_offset(s, OPC_LEA + hrexw, r0, TCG_AREG0, r0, 0,
9349b4f9 1256 offsetof(CPUArchState, tlb_table[mem_index][0])
8516a044
RH
1257 + which);
1258
166792f7 1259 /* cmp 0(r0), r1 */
d5dad3be 1260 tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, 0);
8516a044 1261
d5dad3be
RH
1262 /* Prepare for both the fast path add of the tlb addend, and the slow
1263 path function argument setup. There are two cases worth note:
1264 For 32-bit guest and x86_64 host, MOVL zero-extends the guest address
1265 before the fastpath ADDQ below. For 64-bit guest and x32 host, MOVQ
1266 copies the entire guest address for the slow path, while truncation
1267 for the 32-bit host happens with the fastpath ADDL below. */
1268 tcg_out_mov(s, ttype, r1, addrlo);
8516a044 1269
b76f0d8c
YL
1270 /* jne slow_path */
1271 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
8516a044 1272 label_ptr[0] = s->code_ptr;
b76f0d8c 1273 s->code_ptr += 4;
8516a044 1274
5d8a4f8f 1275 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
166792f7 1276 /* cmp 4(r0), addrhi */
7352ee54 1277 tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, r0, 4);
8516a044 1278
b76f0d8c
YL
1279 /* jne slow_path */
1280 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
8516a044 1281 label_ptr[1] = s->code_ptr;
b76f0d8c 1282 s->code_ptr += 4;
8516a044
RH
1283 }
1284
1285 /* TLB Hit. */
1286
166792f7 1287 /* add addend(r0), r1 */
d5dad3be 1288 tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r1, r0,
8516a044
RH
1289 offsetof(CPUTLBEntry, addend) - which);
1290}
7352ee54
RH
1291
1292/*
1293 * Record the context of a call to the out of line helper code for the slow path
1294 * for a load or store, so that we can later generate the correct helper code
1295 */
3972ef6f 1296static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
7352ee54
RH
1297 TCGReg datalo, TCGReg datahi,
1298 TCGReg addrlo, TCGReg addrhi,
3972ef6f 1299 tcg_insn_unit *raddr,
f6bff89d 1300 tcg_insn_unit **label_ptr)
7352ee54
RH
1301{
1302 TCGLabelQemuLdst *label = new_ldst_label(s);
1303
1304 label->is_ld = is_ld;
3972ef6f 1305 label->oi = oi;
7352ee54
RH
1306 label->datalo_reg = datalo;
1307 label->datahi_reg = datahi;
1308 label->addrlo_reg = addrlo;
1309 label->addrhi_reg = addrhi;
7352ee54
RH
1310 label->raddr = raddr;
1311 label->label_ptr[0] = label_ptr[0];
1312 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1313 label->label_ptr[1] = label_ptr[1];
1314 }
1315}
1316
1317/*
1318 * Generate code for the slow path for a load at the end of block
1319 */
1320static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1321{
3972ef6f
RH
1322 TCGMemOpIdx oi = l->oi;
1323 TCGMemOp opc = get_memop(oi);
7352ee54 1324 TCGReg data_reg;
f6bff89d 1325 tcg_insn_unit **label_ptr = &l->label_ptr[0];
7352ee54
RH
1326
1327 /* resolve label address */
5c53bb81 1328 tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
7352ee54 1329 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
5c53bb81 1330 tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
7352ee54
RH
1331 }
1332
1333 if (TCG_TARGET_REG_BITS == 32) {
1334 int ofs = 0;
1335
1336 tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1337 ofs += 4;
1338
1339 tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1340 ofs += 4;
1341
1342 if (TARGET_LONG_BITS == 64) {
1343 tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1344 ofs += 4;
1345 }
1346
59d7c14e 1347 tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs);
7352ee54
RH
1348 ofs += 4;
1349
59d7c14e 1350 tcg_out_sti(s, TCG_TYPE_PTR, (uintptr_t)l->raddr, TCG_REG_ESP, ofs);
7352ee54
RH
1351 } else {
1352 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1353 /* The second argument is already loaded with addrlo. */
3972ef6f 1354 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], oi);
7352ee54
RH
1355 tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3],
1356 (uintptr_t)l->raddr);
1357 }
1358
2b7ec66f 1359 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
7352ee54
RH
1360
1361 data_reg = l->datalo_reg;
1362 switch (opc & MO_SSIZE) {
1363 case MO_SB:
1364 tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
1365 break;
1366 case MO_SW:
1367 tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
1368 break;
1369#if TCG_TARGET_REG_BITS == 64
1370 case MO_SL:
1371 tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
1372 break;
1373#endif
1374 case MO_UB:
1375 case MO_UW:
1376 /* Note that the helpers have zero-extended to tcg_target_long. */
1377 case MO_UL:
1378 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1379 break;
1380 case MO_Q:
1381 if (TCG_TARGET_REG_BITS == 64) {
1382 tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
1383 } else if (data_reg == TCG_REG_EDX) {
1384 /* xchg %edx, %eax */
1385 tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
1386 tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX);
1387 } else {
1388 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1389 tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX);
1390 }
1391 break;
1392 default:
1393 tcg_abort();
1394 }
1395
1396 /* Jump to the code corresponding to next IR of qemu_st */
f6bff89d 1397 tcg_out_jmp(s, l->raddr);
7352ee54
RH
1398}
1399
1400/*
1401 * Generate code for the slow path for a store at the end of block
1402 */
1403static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1404{
3972ef6f
RH
1405 TCGMemOpIdx oi = l->oi;
1406 TCGMemOp opc = get_memop(oi);
7352ee54 1407 TCGMemOp s_bits = opc & MO_SIZE;
f6bff89d 1408 tcg_insn_unit **label_ptr = &l->label_ptr[0];
7352ee54
RH
1409 TCGReg retaddr;
1410
1411 /* resolve label address */
5c53bb81 1412 tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
7352ee54 1413 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
5c53bb81 1414 tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
7352ee54
RH
1415 }
1416
1417 if (TCG_TARGET_REG_BITS == 32) {
1418 int ofs = 0;
1419
1420 tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1421 ofs += 4;
1422
1423 tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1424 ofs += 4;
1425
1426 if (TARGET_LONG_BITS == 64) {
1427 tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1428 ofs += 4;
1429 }
1430
1431 tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs);
1432 ofs += 4;
1433
1434 if (s_bits == MO_64) {
1435 tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs);
1436 ofs += 4;
1437 }
1438
59d7c14e 1439 tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs);
7352ee54
RH
1440 ofs += 4;
1441
1442 retaddr = TCG_REG_EAX;
3972ef6f
RH
1443 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1444 tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, ofs);
7352ee54
RH
1445 } else {
1446 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1447 /* The second argument is already loaded with addrlo. */
1448 tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
1449 tcg_target_call_iarg_regs[2], l->datalo_reg);
3972ef6f 1450 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], oi);
7352ee54
RH
1451
1452 if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) {
1453 retaddr = tcg_target_call_iarg_regs[4];
1454 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1455 } else {
1456 retaddr = TCG_REG_RAX;
1457 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
0b919667
RH
1458 tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP,
1459 TCG_TARGET_CALL_STACK_OFFSET);
7352ee54
RH
1460 }
1461 }
1462
1463 /* "Tail call" to the helper, with the return address back inline. */
1464 tcg_out_push(s, retaddr);
2b7ec66f 1465 tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
7352ee54 1466}
44b37ace
RH
1467#elif defined(__x86_64__) && defined(__linux__)
1468# include <asm/prctl.h>
1469# include <sys/prctl.h>
1470
1471int arch_prctl(int code, unsigned long addr);
1472
1473static int guest_base_flags;
1474static inline void setup_guest_base_seg(void)
1475{
b76f21a7 1476 if (arch_prctl(ARCH_SET_GS, guest_base) == 0) {
44b37ace
RH
1477 guest_base_flags = P_GS;
1478 }
1479}
1480#else
1481# define guest_base_flags 0
1482static inline void setup_guest_base_seg(void) { }
1483#endif /* SOFTMMU */
c896fe29 1484
37c5d0d5 1485static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
ee8ba9e4
RH
1486 TCGReg base, int index, intptr_t ofs,
1487 int seg, TCGMemOp memop)
be5a4eb7 1488{
085bb5bb
AJ
1489 const TCGMemOp real_bswap = memop & MO_BSWAP;
1490 TCGMemOp bswap = real_bswap;
1491 int movop = OPC_MOVL_GvEv;
1492
1493 if (have_movbe && real_bswap) {
1494 bswap = 0;
1495 movop = OPC_MOVBE_GyMy;
1496 }
37c5d0d5
RH
1497
1498 switch (memop & MO_SSIZE) {
1499 case MO_UB:
ee8ba9e4
RH
1500 tcg_out_modrm_sib_offset(s, OPC_MOVZBL + seg, datalo,
1501 base, index, 0, ofs);
be5a4eb7 1502 break;
37c5d0d5 1503 case MO_SB:
ee8ba9e4
RH
1504 tcg_out_modrm_sib_offset(s, OPC_MOVSBL + P_REXW + seg, datalo,
1505 base, index, 0, ofs);
be5a4eb7 1506 break;
37c5d0d5 1507 case MO_UW:
ee8ba9e4
RH
1508 tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
1509 base, index, 0, ofs);
085bb5bb 1510 if (real_bswap) {
be5a4eb7
RH
1511 tcg_out_rolw_8(s, datalo);
1512 }
1513 break;
37c5d0d5 1514 case MO_SW:
085bb5bb
AJ
1515 if (real_bswap) {
1516 if (have_movbe) {
ee8ba9e4
RH
1517 tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
1518 datalo, base, index, 0, ofs);
085bb5bb 1519 } else {
ee8ba9e4
RH
1520 tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
1521 base, index, 0, ofs);
085bb5bb
AJ
1522 tcg_out_rolw_8(s, datalo);
1523 }
5d8a4f8f
RH
1524 tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
1525 } else {
ee8ba9e4
RH
1526 tcg_out_modrm_sib_offset(s, OPC_MOVSWL + P_REXW + seg,
1527 datalo, base, index, 0, ofs);
be5a4eb7
RH
1528 }
1529 break;
37c5d0d5 1530 case MO_UL:
ee8ba9e4 1531 tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs);
be5a4eb7
RH
1532 if (bswap) {
1533 tcg_out_bswap32(s, datalo);
1534 }
1535 break;
5d8a4f8f 1536#if TCG_TARGET_REG_BITS == 64
37c5d0d5 1537 case MO_SL:
085bb5bb 1538 if (real_bswap) {
ee8ba9e4
RH
1539 tcg_out_modrm_sib_offset(s, movop + seg, datalo,
1540 base, index, 0, ofs);
085bb5bb
AJ
1541 if (bswap) {
1542 tcg_out_bswap32(s, datalo);
1543 }
5d8a4f8f 1544 tcg_out_ext32s(s, datalo, datalo);
be5a4eb7 1545 } else {
ee8ba9e4
RH
1546 tcg_out_modrm_sib_offset(s, OPC_MOVSLQ + seg, datalo,
1547 base, index, 0, ofs);
be5a4eb7 1548 }
5d8a4f8f
RH
1549 break;
1550#endif
37c5d0d5 1551 case MO_Q:
5d8a4f8f 1552 if (TCG_TARGET_REG_BITS == 64) {
ee8ba9e4
RH
1553 tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo,
1554 base, index, 0, ofs);
5d8a4f8f
RH
1555 if (bswap) {
1556 tcg_out_bswap64(s, datalo);
1557 }
1558 } else {
085bb5bb 1559 if (real_bswap) {
5d8a4f8f
RH
1560 int t = datalo;
1561 datalo = datahi;
1562 datahi = t;
1563 }
1564 if (base != datalo) {
ee8ba9e4
RH
1565 tcg_out_modrm_sib_offset(s, movop + seg, datalo,
1566 base, index, 0, ofs);
1567 tcg_out_modrm_sib_offset(s, movop + seg, datahi,
1568 base, index, 0, ofs + 4);
5d8a4f8f 1569 } else {
ee8ba9e4
RH
1570 tcg_out_modrm_sib_offset(s, movop + seg, datahi,
1571 base, index, 0, ofs + 4);
1572 tcg_out_modrm_sib_offset(s, movop + seg, datalo,
1573 base, index, 0, ofs);
5d8a4f8f
RH
1574 }
1575 if (bswap) {
1576 tcg_out_bswap32(s, datalo);
1577 tcg_out_bswap32(s, datahi);
1578 }
be5a4eb7
RH
1579 }
1580 break;
1581 default:
1582 tcg_abort();
1583 }
1584}
379f6698 1585
c896fe29
FB
1586/* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
1587 EAX. It will be useful once fixed registers globals are less
1588 common. */
8221a267 1589static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
c896fe29 1590{
7352ee54 1591 TCGReg datalo, datahi, addrlo;
8221a267 1592 TCGReg addrhi __attribute__((unused));
59227d5d 1593 TCGMemOpIdx oi;
8221a267 1594 TCGMemOp opc;
c896fe29 1595#if defined(CONFIG_SOFTMMU)
37c5d0d5 1596 int mem_index;
f6bff89d 1597 tcg_insn_unit *label_ptr[2];
c896fe29
FB
1598#endif
1599
7352ee54 1600 datalo = *args++;
8221a267 1601 datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
7352ee54 1602 addrlo = *args++;
8221a267 1603 addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
59227d5d
RH
1604 oi = *args++;
1605 opc = get_memop(oi);
c896fe29
FB
1606
1607#if defined(CONFIG_SOFTMMU)
59227d5d 1608 mem_index = get_mmuidx(oi);
1a6dc1e4 1609
8cc580f6 1610 tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
8516a044 1611 label_ptr, offsetof(CPUTLBEntry, addr_read));
1a6dc1e4
RH
1612
1613 /* TLB Hit. */
ee8ba9e4 1614 tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, opc);
c896fe29 1615
b76f0d8c 1616 /* Record the current context of a load into ldst label */
3972ef6f
RH
1617 add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
1618 s->code_ptr, label_ptr);
c896fe29 1619#else
5d8a4f8f 1620 {
b76f21a7 1621 int32_t offset = guest_base;
7352ee54 1622 TCGReg base = addrlo;
ee8ba9e4 1623 int index = -1;
44b37ace
RH
1624 int seg = 0;
1625
ee8ba9e4
RH
1626 /* For a 32-bit guest, the high 32 bits may contain garbage.
1627 We can do this with the ADDR32 prefix if we're not using
1628 a guest base, or when using segmentation. Otherwise we
1629 need to zero-extend manually. */
b76f21a7 1630 if (guest_base == 0 || guest_base_flags) {
44b37ace
RH
1631 seg = guest_base_flags;
1632 offset = 0;
ee8ba9e4
RH
1633 if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
1634 seg |= P_ADDR32;
1635 }
1636 } else if (TCG_TARGET_REG_BITS == 64) {
1637 if (TARGET_LONG_BITS == 32) {
1638 tcg_out_ext32u(s, TCG_REG_L0, base);
1639 base = TCG_REG_L0;
1640 }
b76f21a7
LV
1641 if (offset != guest_base) {
1642 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base);
ee8ba9e4
RH
1643 index = TCG_REG_L1;
1644 offset = 0;
1645 }
5d8a4f8f
RH
1646 }
1647
ee8ba9e4
RH
1648 tcg_out_qemu_ld_direct(s, datalo, datahi,
1649 base, index, offset, seg, opc);
5d8a4f8f 1650 }
c896fe29 1651#endif
be5a4eb7 1652}
c896fe29 1653
37c5d0d5
RH
1654static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
1655 TCGReg base, intptr_t ofs, int seg,
1656 TCGMemOp memop)
be5a4eb7 1657{
be5a4eb7
RH
1658 /* ??? Ideally we wouldn't need a scratch register. For user-only,
1659 we could perform the bswap twice to restore the original value
1660 instead of moving to the scratch. But as it is, the L constraint
166792f7 1661 means that TCG_REG_L0 is definitely free here. */
37c5d0d5 1662 const TCGReg scratch = TCG_REG_L0;
085bb5bb
AJ
1663 const TCGMemOp real_bswap = memop & MO_BSWAP;
1664 TCGMemOp bswap = real_bswap;
1665 int movop = OPC_MOVL_EvGv;
1666
1667 if (have_movbe && real_bswap) {
1668 bswap = 0;
1669 movop = OPC_MOVBE_MyGy;
1670 }
be5a4eb7 1671
37c5d0d5
RH
1672 switch (memop & MO_SIZE) {
1673 case MO_8:
8589467f 1674 /* In 32-bit mode, 8-bit stores can only happen from [abcd]x.
b3e2bc50
RH
1675 Use the scratch register if necessary. */
1676 if (TCG_TARGET_REG_BITS == 32 && datalo >= 4) {
1677 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1678 datalo = scratch;
1679 }
44b37ace
RH
1680 tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
1681 datalo, base, ofs);
c896fe29 1682 break;
37c5d0d5 1683 case MO_16:
c896fe29 1684 if (bswap) {
3b6dac34 1685 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
be5a4eb7
RH
1686 tcg_out_rolw_8(s, scratch);
1687 datalo = scratch;
c896fe29 1688 }
085bb5bb 1689 tcg_out_modrm_offset(s, movop + P_DATA16 + seg, datalo, base, ofs);
c896fe29 1690 break;
37c5d0d5 1691 case MO_32:
c896fe29 1692 if (bswap) {
3b6dac34 1693 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
be5a4eb7
RH
1694 tcg_out_bswap32(s, scratch);
1695 datalo = scratch;
c896fe29 1696 }
085bb5bb 1697 tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
c896fe29 1698 break;
37c5d0d5 1699 case MO_64:
5d8a4f8f
RH
1700 if (TCG_TARGET_REG_BITS == 64) {
1701 if (bswap) {
1702 tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
1703 tcg_out_bswap64(s, scratch);
1704 datalo = scratch;
1705 }
085bb5bb 1706 tcg_out_modrm_offset(s, movop + P_REXW + seg, datalo, base, ofs);
5d8a4f8f 1707 } else if (bswap) {
3b6dac34 1708 tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
be5a4eb7 1709 tcg_out_bswap32(s, scratch);
44b37ace 1710 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs);
3b6dac34 1711 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
be5a4eb7 1712 tcg_out_bswap32(s, scratch);
44b37ace 1713 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4);
c896fe29 1714 } else {
085bb5bb
AJ
1715 if (real_bswap) {
1716 int t = datalo;
1717 datalo = datahi;
1718 datahi = t;
1719 }
1720 tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
1721 tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs+4);
c896fe29
FB
1722 }
1723 break;
1724 default:
1725 tcg_abort();
1726 }
c896fe29
FB
1727}
1728
8221a267 1729static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
c896fe29 1730{
7352ee54 1731 TCGReg datalo, datahi, addrlo;
8221a267 1732 TCGReg addrhi __attribute__((unused));
59227d5d 1733 TCGMemOpIdx oi;
8221a267 1734 TCGMemOp opc;
c896fe29 1735#if defined(CONFIG_SOFTMMU)
37c5d0d5 1736 int mem_index;
f6bff89d 1737 tcg_insn_unit *label_ptr[2];
c896fe29
FB
1738#endif
1739
7352ee54 1740 datalo = *args++;
8221a267 1741 datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
7352ee54 1742 addrlo = *args++;
8221a267 1743 addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
59227d5d
RH
1744 oi = *args++;
1745 opc = get_memop(oi);
c896fe29
FB
1746
1747#if defined(CONFIG_SOFTMMU)
59227d5d 1748 mem_index = get_mmuidx(oi);
1a6dc1e4 1749
8cc580f6 1750 tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
8516a044 1751 label_ptr, offsetof(CPUTLBEntry, addr_write));
1a6dc1e4
RH
1752
1753 /* TLB Hit. */
7352ee54 1754 tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
c896fe29 1755
b76f0d8c 1756 /* Record the current context of a store into ldst label */
3972ef6f
RH
1757 add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
1758 s->code_ptr, label_ptr);
b76f0d8c
YL
1759#else
1760 {
b76f21a7 1761 int32_t offset = guest_base;
7352ee54 1762 TCGReg base = addrlo;
b76f0d8c
YL
1763 int seg = 0;
1764
ee8ba9e4 1765 /* See comment in tcg_out_qemu_ld re zero-extension of addrlo. */
b76f21a7 1766 if (guest_base == 0 || guest_base_flags) {
b76f0d8c
YL
1767 seg = guest_base_flags;
1768 offset = 0;
ee8ba9e4
RH
1769 if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
1770 seg |= P_ADDR32;
1771 }
1772 } else if (TCG_TARGET_REG_BITS == 64) {
1773 /* ??? Note that we can't use the same SIB addressing scheme
1774 as for loads, since we require L0 free for bswap. */
b76f21a7 1775 if (offset != guest_base) {
ee8ba9e4
RH
1776 if (TARGET_LONG_BITS == 32) {
1777 tcg_out_ext32u(s, TCG_REG_L0, base);
1778 base = TCG_REG_L0;
1779 }
b76f21a7 1780 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base);
ee8ba9e4
RH
1781 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
1782 base = TCG_REG_L1;
1783 offset = 0;
1784 } else if (TARGET_LONG_BITS == 32) {
1785 tcg_out_ext32u(s, TCG_REG_L1, base);
1786 base = TCG_REG_L1;
1787 }
b76f0d8c
YL
1788 }
1789
7352ee54 1790 tcg_out_qemu_st_direct(s, datalo, datahi, base, offset, seg, opc);
b76f0d8c 1791 }
b76f0d8c 1792#endif
b76f0d8c 1793}
c896fe29 1794
a9751609 1795static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
c896fe29
FB
1796 const TCGArg *args, const int *const_args)
1797{
6399ab33 1798 int c, vexop, rexw = 0;
5d8a4f8f
RH
1799
1800#if TCG_TARGET_REG_BITS == 64
1801# define OP_32_64(x) \
1802 case glue(glue(INDEX_op_, x), _i64): \
1803 rexw = P_REXW; /* FALLTHRU */ \
1804 case glue(glue(INDEX_op_, x), _i32)
1805#else
1806# define OP_32_64(x) \
1807 case glue(glue(INDEX_op_, x), _i32)
1808#endif
78686523 1809
c896fe29
FB
1810 switch(opc) {
1811 case INDEX_op_exit_tb:
5d8a4f8f 1812 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]);
f6bff89d 1813 tcg_out_jmp(s, tb_ret_addr);
c896fe29
FB
1814 break;
1815 case INDEX_op_goto_tb:
f309101c 1816 if (s->tb_jmp_insn_offset) {
c896fe29 1817 /* direct jump method */
0d07abf0
SF
1818 int gap;
1819 /* jump displacement must be aligned for atomic patching;
1820 * see if we need to add extra nops before jump
1821 */
1822 gap = tcg_pcrel_diff(s, QEMU_ALIGN_PTR_UP(s->code_ptr + 1, 4));
1823 if (gap != 1) {
1824 tcg_out_nopn(s, gap - 1);
1825 }
da441cff 1826 tcg_out8(s, OPC_JMP_long); /* jmp im */
f309101c 1827 s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
c896fe29
FB
1828 tcg_out32(s, 0);
1829 } else {
1830 /* indirect jump method */
9363dedb 1831 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
f309101c 1832 (intptr_t)(s->tb_jmp_target_addr + args[0]));
c896fe29 1833 }
f309101c 1834 s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s);
c896fe29 1835 break;
c896fe29 1836 case INDEX_op_br:
bec16311 1837 tcg_out_jxx(s, JCC_JMP, arg_label(args[0]), 0);
c896fe29 1838 break;
5d8a4f8f
RH
1839 OP_32_64(ld8u):
1840 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
55e082a7 1841 tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
c896fe29 1842 break;
5d8a4f8f
RH
1843 OP_32_64(ld8s):
1844 tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]);
c896fe29 1845 break;
5d8a4f8f
RH
1846 OP_32_64(ld16u):
1847 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
55e082a7 1848 tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
c896fe29 1849 break;
5d8a4f8f
RH
1850 OP_32_64(ld16s):
1851 tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]);
c896fe29 1852 break;
5d8a4f8f
RH
1853#if TCG_TARGET_REG_BITS == 64
1854 case INDEX_op_ld32u_i64:
1855#endif
c896fe29 1856 case INDEX_op_ld_i32:
af266089 1857 tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
c896fe29 1858 break;
5d8a4f8f
RH
1859
1860 OP_32_64(st8):
5c2d2a9e
AJ
1861 if (const_args[0]) {
1862 tcg_out_modrm_offset(s, OPC_MOVB_EvIz,
1863 0, args[1], args[2]);
1864 tcg_out8(s, args[0]);
1865 } else {
1866 tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
1867 args[0], args[1], args[2]);
1868 }
c896fe29 1869 break;
5d8a4f8f 1870 OP_32_64(st16):
5c2d2a9e
AJ
1871 if (const_args[0]) {
1872 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16,
1873 0, args[1], args[2]);
1874 tcg_out16(s, args[0]);
1875 } else {
1876 tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
1877 args[0], args[1], args[2]);
1878 }
c896fe29 1879 break;
5d8a4f8f
RH
1880#if TCG_TARGET_REG_BITS == 64
1881 case INDEX_op_st32_i64:
1882#endif
c896fe29 1883 case INDEX_op_st_i32:
5c2d2a9e
AJ
1884 if (const_args[0]) {
1885 tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, args[1], args[2]);
1886 tcg_out32(s, args[0]);
1887 } else {
1888 tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1889 }
c896fe29 1890 break;
5d8a4f8f
RH
1891
1892 OP_32_64(add):
5d1e4e85
RH
1893 /* For 3-operand addition, use LEA. */
1894 if (args[0] != args[1]) {
1895 TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
1896
1897 if (const_args[2]) {
1898 c3 = a2, a2 = -1;
1899 } else if (a0 == a2) {
1900 /* Watch out for dest = src + dest, since we've removed
1901 the matching constraint on the add. */
5d8a4f8f 1902 tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
5d1e4e85
RH
1903 break;
1904 }
1905
5d8a4f8f 1906 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
5d1e4e85
RH
1907 break;
1908 }
1909 c = ARITH_ADD;
1910 goto gen_arith;
5d8a4f8f 1911 OP_32_64(sub):
c896fe29
FB
1912 c = ARITH_SUB;
1913 goto gen_arith;
5d8a4f8f 1914 OP_32_64(and):
c896fe29
FB
1915 c = ARITH_AND;
1916 goto gen_arith;
5d8a4f8f 1917 OP_32_64(or):
c896fe29
FB
1918 c = ARITH_OR;
1919 goto gen_arith;
5d8a4f8f 1920 OP_32_64(xor):
c896fe29
FB
1921 c = ARITH_XOR;
1922 goto gen_arith;
c896fe29
FB
1923 gen_arith:
1924 if (const_args[2]) {
5d8a4f8f 1925 tgen_arithi(s, c + rexw, args[0], args[2], 0);
c896fe29 1926 } else {
5d8a4f8f 1927 tgen_arithr(s, c + rexw, args[0], args[2]);
c896fe29
FB
1928 }
1929 break;
5d8a4f8f 1930
9d2eec20
RH
1931 OP_32_64(andc):
1932 if (const_args[2]) {
1933 tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32,
1934 args[0], args[1]);
1935 tgen_arithi(s, ARITH_AND + rexw, args[0], ~args[2], 0);
1936 } else {
1937 tcg_out_vex_modrm(s, OPC_ANDN + rexw, args[0], args[2], args[1]);
1938 }
1939 break;
1940
5d8a4f8f 1941 OP_32_64(mul):
c896fe29
FB
1942 if (const_args[2]) {
1943 int32_t val;
1944 val = args[2];
1945 if (val == (int8_t)val) {
5d8a4f8f 1946 tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]);
c896fe29
FB
1947 tcg_out8(s, val);
1948 } else {
5d8a4f8f 1949 tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]);
c896fe29
FB
1950 tcg_out32(s, val);
1951 }
1952 } else {
5d8a4f8f 1953 tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]);
c896fe29
FB
1954 }
1955 break;
5d8a4f8f
RH
1956
1957 OP_32_64(div2):
1958 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
c896fe29 1959 break;
5d8a4f8f
RH
1960 OP_32_64(divu2):
1961 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
c896fe29 1962 break;
5d8a4f8f
RH
1963
1964 OP_32_64(shl):
c896fe29 1965 c = SHIFT_SHL;
6399ab33
RH
1966 vexop = OPC_SHLX;
1967 goto gen_shift_maybe_vex;
5d8a4f8f 1968 OP_32_64(shr):
c896fe29 1969 c = SHIFT_SHR;
6399ab33
RH
1970 vexop = OPC_SHRX;
1971 goto gen_shift_maybe_vex;
5d8a4f8f 1972 OP_32_64(sar):
c896fe29 1973 c = SHIFT_SAR;
6399ab33
RH
1974 vexop = OPC_SARX;
1975 goto gen_shift_maybe_vex;
5d8a4f8f 1976 OP_32_64(rotl):
9619376c 1977 c = SHIFT_ROL;
5d8a4f8f
RH
1978 goto gen_shift;
1979 OP_32_64(rotr):
9619376c 1980 c = SHIFT_ROR;
5d8a4f8f 1981 goto gen_shift;
6399ab33
RH
1982 gen_shift_maybe_vex:
1983 if (have_bmi2 && !const_args[2]) {
1984 tcg_out_vex_modrm(s, vexop + rexw, args[0], args[2], args[1]);
1985 break;
1986 }
1987 /* FALLTHRU */
5d8a4f8f
RH
1988 gen_shift:
1989 if (const_args[2]) {
1990 tcg_out_shifti(s, c + rexw, args[0], args[2]);
81570a70 1991 } else {
5d8a4f8f 1992 tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]);
81570a70 1993 }
c896fe29 1994 break;
5d8a4f8f 1995
c896fe29 1996 case INDEX_op_brcond_i32:
5d8a4f8f 1997 tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
bec16311 1998 arg_label(args[3]), 0);
c896fe29 1999 break;
5d8a4f8f
RH
2000 case INDEX_op_setcond_i32:
2001 tcg_out_setcond32(s, args[3], args[0], args[1],
2002 args[2], const_args[2]);
c896fe29 2003 break;
d0a16297
RH
2004 case INDEX_op_movcond_i32:
2005 tcg_out_movcond32(s, args[5], args[0], args[1],
2006 args[2], const_args[2], args[3]);
2007 break;
c896fe29 2008
5d8a4f8f 2009 OP_32_64(bswap16):
fcb5dac1 2010 tcg_out_rolw_8(s, args[0]);
5d40cd63 2011 break;
5d8a4f8f 2012 OP_32_64(bswap32):
fcb5dac1 2013 tcg_out_bswap32(s, args[0]);
9619376c
AJ
2014 break;
2015
5d8a4f8f
RH
2016 OP_32_64(neg):
2017 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]);
9619376c 2018 break;
5d8a4f8f
RH
2019 OP_32_64(not):
2020 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]);
9619376c
AJ
2021 break;
2022
5d8a4f8f
RH
2023 OP_32_64(ext8s):
2024 tcg_out_ext8s(s, args[0], args[1], rexw);
9619376c 2025 break;
5d8a4f8f
RH
2026 OP_32_64(ext16s):
2027 tcg_out_ext16s(s, args[0], args[1], rexw);
9619376c 2028 break;
5d8a4f8f 2029 OP_32_64(ext8u):
55e082a7 2030 tcg_out_ext8u(s, args[0], args[1]);
5f0ce17f 2031 break;
5d8a4f8f 2032 OP_32_64(ext16u):
55e082a7 2033 tcg_out_ext16u(s, args[0], args[1]);
5f0ce17f 2034 break;
9619376c 2035
8221a267
RH
2036 case INDEX_op_qemu_ld_i32:
2037 tcg_out_qemu_ld(s, args, 0);
c896fe29 2038 break;
8221a267
RH
2039 case INDEX_op_qemu_ld_i64:
2040 tcg_out_qemu_ld(s, args, 1);
c896fe29 2041 break;
8221a267
RH
2042 case INDEX_op_qemu_st_i32:
2043 tcg_out_qemu_st(s, args, 0);
c896fe29 2044 break;
8221a267
RH
2045 case INDEX_op_qemu_st_i64:
2046 tcg_out_qemu_st(s, args, 1);
c896fe29
FB
2047 break;
2048
624988a5
RH
2049 OP_32_64(mulu2):
2050 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]);
5d8a4f8f 2051 break;
624988a5
RH
2052 OP_32_64(muls2):
2053 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]);
2054 break;
2055 OP_32_64(add2):
5d8a4f8f 2056 if (const_args[4]) {
624988a5 2057 tgen_arithi(s, ARITH_ADD + rexw, args[0], args[4], 1);
5d8a4f8f 2058 } else {
624988a5 2059 tgen_arithr(s, ARITH_ADD + rexw, args[0], args[4]);
5d8a4f8f
RH
2060 }
2061 if (const_args[5]) {
624988a5 2062 tgen_arithi(s, ARITH_ADC + rexw, args[1], args[5], 1);
5d8a4f8f 2063 } else {
624988a5 2064 tgen_arithr(s, ARITH_ADC + rexw, args[1], args[5]);
5d8a4f8f
RH
2065 }
2066 break;
624988a5 2067 OP_32_64(sub2):
5d8a4f8f 2068 if (const_args[4]) {
624988a5 2069 tgen_arithi(s, ARITH_SUB + rexw, args[0], args[4], 1);
5d8a4f8f 2070 } else {
624988a5 2071 tgen_arithr(s, ARITH_SUB + rexw, args[0], args[4]);
5d8a4f8f
RH
2072 }
2073 if (const_args[5]) {
624988a5 2074 tgen_arithi(s, ARITH_SBB + rexw, args[1], args[5], 1);
5d8a4f8f 2075 } else {
624988a5 2076 tgen_arithr(s, ARITH_SBB + rexw, args[1], args[5]);
5d8a4f8f
RH
2077 }
2078 break;
bbc863bf
RH
2079
2080#if TCG_TARGET_REG_BITS == 32
2081 case INDEX_op_brcond2_i32:
2082 tcg_out_brcond2(s, args, const_args, 0);
2083 break;
2084 case INDEX_op_setcond2_i32:
2085 tcg_out_setcond2(s, args, const_args);
2086 break;
5d8a4f8f 2087#else /* TCG_TARGET_REG_BITS == 64 */
5d8a4f8f
RH
2088 case INDEX_op_ld32s_i64:
2089 tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]);
2090 break;
2091 case INDEX_op_ld_i64:
2092 tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2093 break;
2094 case INDEX_op_st_i64:
5c2d2a9e
AJ
2095 if (const_args[0]) {
2096 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW,
2097 0, args[1], args[2]);
2098 tcg_out32(s, args[0]);
2099 } else {
2100 tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2101 }
5d8a4f8f 2102 break;
5d8a4f8f
RH
2103
2104 case INDEX_op_brcond_i64:
2105 tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1],
bec16311 2106 arg_label(args[3]), 0);
5d8a4f8f
RH
2107 break;
2108 case INDEX_op_setcond_i64:
2109 tcg_out_setcond64(s, args[3], args[0], args[1],
2110 args[2], const_args[2]);
2111 break;
d0a16297
RH
2112 case INDEX_op_movcond_i64:
2113 tcg_out_movcond64(s, args[5], args[0], args[1],
2114 args[2], const_args[2], args[3]);
2115 break;
5d8a4f8f
RH
2116
2117 case INDEX_op_bswap64_i64:
2118 tcg_out_bswap64(s, args[0]);
2119 break;
4f2331e5 2120 case INDEX_op_extu_i32_i64:
5d8a4f8f
RH
2121 case INDEX_op_ext32u_i64:
2122 tcg_out_ext32u(s, args[0], args[1]);
2123 break;
4f2331e5 2124 case INDEX_op_ext_i32_i64:
5d8a4f8f
RH
2125 case INDEX_op_ext32s_i64:
2126 tcg_out_ext32s(s, args[0], args[1]);
2127 break;
2128#endif
2129
a4773324
JK
2130 OP_32_64(deposit):
2131 if (args[3] == 0 && args[4] == 8) {
2132 /* load bits 0..7 */
2133 tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM,
2134 args[2], args[0]);
2135 } else if (args[3] == 8 && args[4] == 8) {
2136 /* load bits 8..15 */
2137 tcg_out_modrm(s, OPC_MOVB_EvGv, args[2], args[0] + 4);
2138 } else if (args[3] == 0 && args[4] == 16) {
2139 /* load bits 0..15 */
2140 tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, args[2], args[0]);
2141 } else {
2142 tcg_abort();
2143 }
2144 break;
2145
a7d00d4e
PK
2146 case INDEX_op_mb:
2147 tcg_out_mb(s, args[0]);
2148 break;
96d0ee7f
RH
2149 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
2150 case INDEX_op_mov_i64:
2151 case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
2152 case INDEX_op_movi_i64:
2153 case INDEX_op_call: /* Always emitted via tcg_out_call. */
c896fe29
FB
2154 default:
2155 tcg_abort();
2156 }
5d8a4f8f
RH
2157
2158#undef OP_32_64
c896fe29
FB
2159}
2160
2161static const TCGTargetOpDef x86_op_defs[] = {
2162 { INDEX_op_exit_tb, { } },
2163 { INDEX_op_goto_tb, { } },
c896fe29 2164 { INDEX_op_br, { } },
c896fe29
FB
2165 { INDEX_op_ld8u_i32, { "r", "r" } },
2166 { INDEX_op_ld8s_i32, { "r", "r" } },
2167 { INDEX_op_ld16u_i32, { "r", "r" } },
2168 { INDEX_op_ld16s_i32, { "r", "r" } },
2169 { INDEX_op_ld_i32, { "r", "r" } },
5c2d2a9e
AJ
2170 { INDEX_op_st8_i32, { "qi", "r" } },
2171 { INDEX_op_st16_i32, { "ri", "r" } },
2172 { INDEX_op_st_i32, { "ri", "r" } },
c896fe29 2173
5d1e4e85 2174 { INDEX_op_add_i32, { "r", "r", "ri" } },
c896fe29
FB
2175 { INDEX_op_sub_i32, { "r", "0", "ri" } },
2176 { INDEX_op_mul_i32, { "r", "0", "ri" } },
c896fe29
FB
2177 { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } },
2178 { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } },
2179 { INDEX_op_and_i32, { "r", "0", "ri" } },
2180 { INDEX_op_or_i32, { "r", "0", "ri" } },
2181 { INDEX_op_xor_i32, { "r", "0", "ri" } },
9d2eec20 2182 { INDEX_op_andc_i32, { "r", "r", "ri" } },
c896fe29 2183
6399ab33
RH
2184 { INDEX_op_shl_i32, { "r", "0", "Ci" } },
2185 { INDEX_op_shr_i32, { "r", "0", "Ci" } },
2186 { INDEX_op_sar_i32, { "r", "0", "Ci" } },
9619376c
AJ
2187 { INDEX_op_rotl_i32, { "r", "0", "ci" } },
2188 { INDEX_op_rotr_i32, { "r", "0", "ci" } },
c896fe29
FB
2189
2190 { INDEX_op_brcond_i32, { "r", "ri" } },
2191
5d40cd63 2192 { INDEX_op_bswap16_i32, { "r", "0" } },
66896cb8 2193 { INDEX_op_bswap32_i32, { "r", "0" } },
9619376c
AJ
2194
2195 { INDEX_op_neg_i32, { "r", "0" } },
2196
2197 { INDEX_op_not_i32, { "r", "0" } },
2198
2199 { INDEX_op_ext8s_i32, { "r", "q" } },
2200 { INDEX_op_ext16s_i32, { "r", "r" } },
55e082a7
RH
2201 { INDEX_op_ext8u_i32, { "r", "q" } },
2202 { INDEX_op_ext16u_i32, { "r", "r" } },
9619376c 2203
1d2699ae 2204 { INDEX_op_setcond_i32, { "q", "r", "ri" } },
5d8a4f8f 2205
a4773324 2206 { INDEX_op_deposit_i32, { "Q", "0", "Q" } },
d0a16297 2207 { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } },
a4773324 2208
5d8a4f8f 2209 { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
624988a5 2210 { INDEX_op_muls2_i32, { "a", "d", "a", "r" } },
5d8a4f8f
RH
2211 { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
2212 { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
bbc863bf 2213
a7d00d4e
PK
2214 { INDEX_op_mb, { } },
2215
bbc863bf 2216#if TCG_TARGET_REG_BITS == 32
5d8a4f8f 2217 { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
1d2699ae 2218 { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
5d8a4f8f 2219#else
5d8a4f8f
RH
2220 { INDEX_op_ld8u_i64, { "r", "r" } },
2221 { INDEX_op_ld8s_i64, { "r", "r" } },
2222 { INDEX_op_ld16u_i64, { "r", "r" } },
2223 { INDEX_op_ld16s_i64, { "r", "r" } },
2224 { INDEX_op_ld32u_i64, { "r", "r" } },
2225 { INDEX_op_ld32s_i64, { "r", "r" } },
2226 { INDEX_op_ld_i64, { "r", "r" } },
5c2d2a9e
AJ
2227 { INDEX_op_st8_i64, { "ri", "r" } },
2228 { INDEX_op_st16_i64, { "ri", "r" } },
2229 { INDEX_op_st32_i64, { "ri", "r" } },
2230 { INDEX_op_st_i64, { "re", "r" } },
5d8a4f8f 2231
163fa4b0 2232 { INDEX_op_add_i64, { "r", "r", "re" } },
5d8a4f8f
RH
2233 { INDEX_op_mul_i64, { "r", "0", "re" } },
2234 { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },
2235 { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },
2236 { INDEX_op_sub_i64, { "r", "0", "re" } },
2237 { INDEX_op_and_i64, { "r", "0", "reZ" } },
2238 { INDEX_op_or_i64, { "r", "0", "re" } },
2239 { INDEX_op_xor_i64, { "r", "0", "re" } },
9d2eec20 2240 { INDEX_op_andc_i64, { "r", "r", "rI" } },
5d8a4f8f 2241
6399ab33
RH
2242 { INDEX_op_shl_i64, { "r", "0", "Ci" } },
2243 { INDEX_op_shr_i64, { "r", "0", "Ci" } },
2244 { INDEX_op_sar_i64, { "r", "0", "Ci" } },
5d8a4f8f
RH
2245 { INDEX_op_rotl_i64, { "r", "0", "ci" } },
2246 { INDEX_op_rotr_i64, { "r", "0", "ci" } },
2247
2248 { INDEX_op_brcond_i64, { "r", "re" } },
2249 { INDEX_op_setcond_i64, { "r", "r", "re" } },
2250
2251 { INDEX_op_bswap16_i64, { "r", "0" } },
2252 { INDEX_op_bswap32_i64, { "r", "0" } },
2253 { INDEX_op_bswap64_i64, { "r", "0" } },
2254 { INDEX_op_neg_i64, { "r", "0" } },
2255 { INDEX_op_not_i64, { "r", "0" } },
2256
2257 { INDEX_op_ext8s_i64, { "r", "r" } },
2258 { INDEX_op_ext16s_i64, { "r", "r" } },
2259 { INDEX_op_ext32s_i64, { "r", "r" } },
2260 { INDEX_op_ext8u_i64, { "r", "r" } },
2261 { INDEX_op_ext16u_i64, { "r", "r" } },
2262 { INDEX_op_ext32u_i64, { "r", "r" } },
a4773324 2263
4f2331e5
AJ
2264 { INDEX_op_ext_i32_i64, { "r", "r" } },
2265 { INDEX_op_extu_i32_i64, { "r", "r" } },
2266
a4773324 2267 { INDEX_op_deposit_i64, { "Q", "0", "Q" } },
d0a16297 2268 { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } },
624988a5
RH
2269
2270 { INDEX_op_mulu2_i64, { "a", "d", "a", "r" } },
2271 { INDEX_op_muls2_i64, { "a", "d", "a", "r" } },
2272 { INDEX_op_add2_i64, { "r", "r", "0", "1", "re", "re" } },
2273 { INDEX_op_sub2_i64, { "r", "r", "0", "1", "re", "re" } },
5d8a4f8f 2274#endif
1d2699ae 2275
5d8a4f8f 2276#if TCG_TARGET_REG_BITS == 64
8221a267
RH
2277 { INDEX_op_qemu_ld_i32, { "r", "L" } },
2278 { INDEX_op_qemu_st_i32, { "L", "L" } },
2279 { INDEX_op_qemu_ld_i64, { "r", "L" } },
2280 { INDEX_op_qemu_st_i64, { "L", "L" } },
5d8a4f8f 2281#elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
8221a267
RH
2282 { INDEX_op_qemu_ld_i32, { "r", "L" } },
2283 { INDEX_op_qemu_st_i32, { "L", "L" } },
2284 { INDEX_op_qemu_ld_i64, { "r", "r", "L" } },
2285 { INDEX_op_qemu_st_i64, { "L", "L", "L" } },
c896fe29 2286#else
8221a267
RH
2287 { INDEX_op_qemu_ld_i32, { "r", "L", "L" } },
2288 { INDEX_op_qemu_st_i32, { "L", "L", "L" } },
2289 { INDEX_op_qemu_ld_i64, { "r", "r", "L", "L" } },
2290 { INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } },
c896fe29
FB
2291#endif
2292 { -1 },
2293};
2294
b03cce8e 2295static int tcg_target_callee_save_regs[] = {
5d8a4f8f
RH
2296#if TCG_TARGET_REG_BITS == 64
2297 TCG_REG_RBP,
2298 TCG_REG_RBX,
8d918718
SW
2299#if defined(_WIN64)
2300 TCG_REG_RDI,
2301 TCG_REG_RSI,
2302#endif
5d8a4f8f
RH
2303 TCG_REG_R12,
2304 TCG_REG_R13,
cea5f9a2 2305 TCG_REG_R14, /* Currently used for the global env. */
5d8a4f8f
RH
2306 TCG_REG_R15,
2307#else
cea5f9a2 2308 TCG_REG_EBP, /* Currently used for the global env. */
b03cce8e
FB
2309 TCG_REG_EBX,
2310 TCG_REG_ESI,
2311 TCG_REG_EDI,
5d8a4f8f 2312#endif
b03cce8e
FB
2313};
2314
813da627
RH
2315/* Compute frame size via macros, to share between tcg_target_qemu_prologue
2316 and tcg_register_jit. */
2317
2318#define PUSH_SIZE \
2319 ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \
2320 * (TCG_TARGET_REG_BITS / 8))
2321
2322#define FRAME_SIZE \
2323 ((PUSH_SIZE \
2324 + TCG_STATIC_CALL_ARGS_SIZE \
2325 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2326 + TCG_TARGET_STACK_ALIGN - 1) \
2327 & ~(TCG_TARGET_STACK_ALIGN - 1))
2328
b03cce8e 2329/* Generate global QEMU prologue and epilogue code */
e4d58b41 2330static void tcg_target_qemu_prologue(TCGContext *s)
b03cce8e 2331{
813da627 2332 int i, stack_addend;
78686523 2333
b03cce8e 2334 /* TB prologue */
5d8a4f8f 2335
ac0275dc 2336 /* Reserve some stack space, also for TCG temps. */
813da627 2337 stack_addend = FRAME_SIZE - PUSH_SIZE;
ac0275dc
BS
2338 tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
2339 CPU_TEMP_BUF_NLONGS * sizeof(long));
2340
2341 /* Save all callee saved registers. */
2342 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
2343 tcg_out_push(s, tcg_target_callee_save_regs[i]);
2344 }
2345
6a18ae2d
BS
2346#if TCG_TARGET_REG_BITS == 32
2347 tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
2348 (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
b18212c6
SW
2349 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2350 /* jmp *tb. */
2351 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP,
2352 (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4
2353 + stack_addend);
6a18ae2d 2354#else
cea5f9a2 2355 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
6a18ae2d 2356 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
5d8a4f8f 2357 /* jmp *tb. */
cea5f9a2 2358 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
b18212c6 2359#endif
78686523 2360
b03cce8e
FB
2361 /* TB epilogue */
2362 tb_ret_addr = s->code_ptr;
5d8a4f8f 2363
e83c80f7 2364 tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend);
5d8a4f8f
RH
2365
2366 for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
b03cce8e
FB
2367 tcg_out_pop(s, tcg_target_callee_save_regs[i]);
2368 }
5d8a4f8f 2369 tcg_out_opc(s, OPC_RET, 0, 0, 0);
44b37ace
RH
2370
2371#if !defined(CONFIG_SOFTMMU)
b76f21a7
LV
2372 /* Try to set up a segment register to point to guest_base. */
2373 if (guest_base) {
44b37ace
RH
2374 setup_guest_base_seg();
2375 }
2376#endif
b03cce8e
FB
2377}
2378
e4d58b41 2379static void tcg_target_init(TCGContext *s)
c896fe29 2380{
774d566c 2381#ifdef CONFIG_CPUID_H
9d2eec20
RH
2382 unsigned a, b, c, d;
2383 int max = __get_cpuid_max(0, 0);
085bb5bb 2384
9d2eec20
RH
2385 if (max >= 1) {
2386 __cpuid(1, a, b, c, d);
2387#ifndef have_cmov
085bb5bb
AJ
2388 /* For 32-bit, 99% certainty that we're running on hardware that
2389 supports cmov, but we still need to check. In case cmov is not
2390 available, we'll use a small forward branch. */
9d2eec20
RH
2391 have_cmov = (d & bit_CMOV) != 0;
2392#endif
2393#ifndef have_movbe
085bb5bb
AJ
2394 /* MOVBE is only available on Intel Atom and Haswell CPUs, so we
2395 need to probe for it. */
9d2eec20
RH
2396 have_movbe = (c & bit_MOVBE) != 0;
2397#endif
76a347e1 2398 }
9d2eec20
RH
2399
2400 if (max >= 7) {
2401 /* BMI1 is available on AMD Piledriver and Intel Haswell CPUs. */
2402 __cpuid_count(7, 0, a, b, c, d);
2403#ifdef bit_BMI
2404 have_bmi1 = (b & bit_BMI) != 0;
6399ab33
RH
2405#endif
2406#ifndef have_bmi2
2407 have_bmi2 = (b & bit_BMI2) != 0;
76a347e1 2408#endif
9d2eec20 2409 }
774d566c 2410#endif
76a347e1 2411
5d8a4f8f
RH
2412 if (TCG_TARGET_REG_BITS == 64) {
2413 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
2414 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
2415 } else {
2416 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
2417 }
4ab50ccf
RH
2418
2419 tcg_regset_clear(tcg_target_call_clobber_regs);
2420 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
2421 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
2422 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
5d8a4f8f 2423 if (TCG_TARGET_REG_BITS == 64) {
8d918718 2424#if !defined(_WIN64)
5d8a4f8f
RH
2425 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
2426 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
8d918718 2427#endif
5d8a4f8f
RH
2428 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
2429 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
2430 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
2431 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
2432 }
4ab50ccf 2433
c896fe29 2434 tcg_regset_clear(s->reserved_regs);
e83c80f7 2435 tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
c896fe29
FB
2436
2437 tcg_add_target_add_op_defs(x86_op_defs);
2438}
813da627 2439
813da627 2440typedef struct {
e9a9a5b6 2441 DebugFrameHeader h;
497a22eb
RH
2442 uint8_t fde_def_cfa[4];
2443 uint8_t fde_reg_ofs[14];
813da627
RH
2444} DebugFrame;
2445
b5cc476d
RH
2446/* We're expecting a 2 byte uleb128 encoded value. */
2447QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2448
c170cb66
SW
2449#if !defined(__ELF__)
2450 /* Host machine without ELF. */
2451#elif TCG_TARGET_REG_BITS == 64
813da627 2452#define ELF_HOST_MACHINE EM_X86_64
e9a9a5b6
RH
2453static const DebugFrame debug_frame = {
2454 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2455 .h.cie.id = -1,
2456 .h.cie.version = 1,
2457 .h.cie.code_align = 1,
2458 .h.cie.data_align = 0x78, /* sleb128 -8 */
2459 .h.cie.return_column = 16,
813da627 2460
497a22eb 2461 /* Total FDE size does not include the "len" member. */
e9a9a5b6 2462 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
497a22eb
RH
2463
2464 .fde_def_cfa = {
813da627
RH
2465 12, 7, /* DW_CFA_def_cfa %rsp, ... */
2466 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2467 (FRAME_SIZE >> 7)
2468 },
497a22eb 2469 .fde_reg_ofs = {
813da627
RH
2470 0x90, 1, /* DW_CFA_offset, %rip, -8 */
2471 /* The following ordering must match tcg_target_callee_save_regs. */
2472 0x86, 2, /* DW_CFA_offset, %rbp, -16 */
2473 0x83, 3, /* DW_CFA_offset, %rbx, -24 */
2474 0x8c, 4, /* DW_CFA_offset, %r12, -32 */
2475 0x8d, 5, /* DW_CFA_offset, %r13, -40 */
2476 0x8e, 6, /* DW_CFA_offset, %r14, -48 */
2477 0x8f, 7, /* DW_CFA_offset, %r15, -56 */
2478 }
2479};
2480#else
2481#define ELF_HOST_MACHINE EM_386
e9a9a5b6
RH
2482static const DebugFrame debug_frame = {
2483 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2484 .h.cie.id = -1,
2485 .h.cie.version = 1,
2486 .h.cie.code_align = 1,
2487 .h.cie.data_align = 0x7c, /* sleb128 -4 */
2488 .h.cie.return_column = 8,
813da627 2489
497a22eb 2490 /* Total FDE size does not include the "len" member. */
e9a9a5b6 2491 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
497a22eb
RH
2492
2493 .fde_def_cfa = {
813da627
RH
2494 12, 4, /* DW_CFA_def_cfa %esp, ... */
2495 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2496 (FRAME_SIZE >> 7)
2497 },
497a22eb 2498 .fde_reg_ofs = {
813da627
RH
2499 0x88, 1, /* DW_CFA_offset, %eip, -4 */
2500 /* The following ordering must match tcg_target_callee_save_regs. */
2501 0x85, 2, /* DW_CFA_offset, %ebp, -8 */
2502 0x83, 3, /* DW_CFA_offset, %ebx, -12 */
2503 0x86, 4, /* DW_CFA_offset, %esi, -16 */
2504 0x87, 5, /* DW_CFA_offset, %edi, -20 */
2505 }
2506};
2507#endif
2508
c170cb66 2509#if defined(ELF_HOST_MACHINE)
813da627
RH
2510void tcg_register_jit(void *buf, size_t buf_size)
2511{
813da627
RH
2512 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2513}
c170cb66 2514#endif