]> git.proxmox.com Git - mirror_qemu.git/blame - tcg/i386/tcg-target.inc.c
tcg: Support arbitrary size + alignment
[mirror_qemu.git] / tcg / i386 / tcg-target.inc.c
CommitLineData
c896fe29
FB
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
d4a9eb1f 24
9ecefc84
RH
25#include "tcg-be-ldst.h"
26
8d8fdbae 27#ifdef CONFIG_DEBUG_TCG
d4a9eb1f 28static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
5d8a4f8f
RH
29#if TCG_TARGET_REG_BITS == 64
30 "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
31 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
32#else
33 "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
34#endif
c896fe29 35};
d4a9eb1f 36#endif
c896fe29 37
d4a9eb1f 38static const int tcg_target_reg_alloc_order[] = {
5d8a4f8f
RH
39#if TCG_TARGET_REG_BITS == 64
40 TCG_REG_RBP,
41 TCG_REG_RBX,
42 TCG_REG_R12,
43 TCG_REG_R13,
44 TCG_REG_R14,
45 TCG_REG_R15,
46 TCG_REG_R10,
47 TCG_REG_R11,
48 TCG_REG_R9,
49 TCG_REG_R8,
50 TCG_REG_RCX,
51 TCG_REG_RDX,
52 TCG_REG_RSI,
53 TCG_REG_RDI,
54 TCG_REG_RAX,
55#else
c896fe29
FB
56 TCG_REG_EBX,
57 TCG_REG_ESI,
58 TCG_REG_EDI,
59 TCG_REG_EBP,
6648e296
RH
60 TCG_REG_ECX,
61 TCG_REG_EDX,
62 TCG_REG_EAX,
5d8a4f8f 63#endif
c896fe29
FB
64};
65
5d8a4f8f
RH
66static const int tcg_target_call_iarg_regs[] = {
67#if TCG_TARGET_REG_BITS == 64
8d918718
SW
68#if defined(_WIN64)
69 TCG_REG_RCX,
70 TCG_REG_RDX,
71#else
5d8a4f8f
RH
72 TCG_REG_RDI,
73 TCG_REG_RSI,
74 TCG_REG_RDX,
75 TCG_REG_RCX,
8d918718 76#endif
5d8a4f8f
RH
77 TCG_REG_R8,
78 TCG_REG_R9,
79#else
d73685e3 80 /* 32 bit mode uses stack based calling convention (GCC default). */
5d8a4f8f
RH
81#endif
82};
83
68af23af 84static const int tcg_target_call_oarg_regs[] = {
5d8a4f8f 85 TCG_REG_EAX,
68af23af 86#if TCG_TARGET_REG_BITS == 32
5d8a4f8f 87 TCG_REG_EDX
68af23af 88#endif
5d8a4f8f 89};
c896fe29 90
a1b29c9a
RH
91/* Constants we accept. */
92#define TCG_CT_CONST_S32 0x100
93#define TCG_CT_CONST_U32 0x200
9d2eec20 94#define TCG_CT_CONST_I32 0x400
a1b29c9a 95
b18212c6
SW
96/* Registers used with L constraint, which are the first argument
97 registers on x86_64, and two random call clobbered registers on
98 i386. */
99#if TCG_TARGET_REG_BITS == 64
100# define TCG_REG_L0 tcg_target_call_iarg_regs[0]
101# define TCG_REG_L1 tcg_target_call_iarg_regs[1]
b18212c6
SW
102#else
103# define TCG_REG_L0 TCG_REG_EAX
104# define TCG_REG_L1 TCG_REG_EDX
105#endif
106
085bb5bb
AJ
107/* The host compiler should supply <cpuid.h> to enable runtime features
108 detection, as we're not going to go so far as our own inline assembly.
109 If not available, default values will be assumed. */
110#if defined(CONFIG_CPUID_H)
111#include <cpuid.h>
112#endif
113
76a347e1 114/* For 32-bit, we are going to attempt to determine at runtime whether cmov
085bb5bb 115 is available. */
76a347e1
RH
116#if TCG_TARGET_REG_BITS == 64
117# define have_cmov 1
774d566c 118#elif defined(CONFIG_CPUID_H) && defined(bit_CMOV)
76a347e1
RH
119static bool have_cmov;
120#else
121# define have_cmov 0
122#endif
123
085bb5bb
AJ
124/* If bit_MOVBE is defined in cpuid.h (added in GCC version 4.6), we are
125 going to attempt to determine at runtime whether movbe is available. */
126#if defined(CONFIG_CPUID_H) && defined(bit_MOVBE)
127static bool have_movbe;
128#else
129# define have_movbe 0
130#endif
131
9d2eec20
RH
132/* We need this symbol in tcg-target.h, and we can't properly conditionalize
133 it there. Therefore we always define the variable. */
134bool have_bmi1;
135
6399ab33
RH
136#if defined(CONFIG_CPUID_H) && defined(bit_BMI2)
137static bool have_bmi2;
138#else
139# define have_bmi2 0
140#endif
141
f6bff89d 142static tcg_insn_unit *tb_ret_addr;
b03cce8e 143
f6bff89d 144static void patch_reloc(tcg_insn_unit *code_ptr, int type,
2ba7fae2 145 intptr_t value, intptr_t addend)
c896fe29 146{
f54b3f92 147 value += addend;
c896fe29 148 switch(type) {
c896fe29 149 case R_386_PC32:
5d8a4f8f
RH
150 value -= (uintptr_t)code_ptr;
151 if (value != (int32_t)value) {
152 tcg_abort();
153 }
5c53bb81 154 tcg_patch32(code_ptr, value);
c896fe29 155 break;
f75b56c1 156 case R_386_PC8:
5d8a4f8f 157 value -= (uintptr_t)code_ptr;
f75b56c1
RH
158 if (value != (int8_t)value) {
159 tcg_abort();
160 }
5c53bb81 161 tcg_patch8(code_ptr, value);
f75b56c1 162 break;
c896fe29
FB
163 default:
164 tcg_abort();
165 }
166}
167
c896fe29 168/* parse target specific constraints */
d4a9eb1f 169static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
c896fe29
FB
170{
171 const char *ct_str;
172
173 ct_str = *pct_str;
174 switch(ct_str[0]) {
175 case 'a':
176 ct->ct |= TCG_CT_REG;
177 tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
178 break;
179 case 'b':
180 ct->ct |= TCG_CT_REG;
181 tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
182 break;
183 case 'c':
6399ab33 184 case_c:
c896fe29
FB
185 ct->ct |= TCG_CT_REG;
186 tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
187 break;
188 case 'd':
189 ct->ct |= TCG_CT_REG;
190 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
191 break;
192 case 'S':
193 ct->ct |= TCG_CT_REG;
194 tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
195 break;
196 case 'D':
197 ct->ct |= TCG_CT_REG;
198 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
199 break;
200 case 'q':
201 ct->ct |= TCG_CT_REG;
5d8a4f8f
RH
202 if (TCG_TARGET_REG_BITS == 64) {
203 tcg_regset_set32(ct->u.regs, 0, 0xffff);
204 } else {
205 tcg_regset_set32(ct->u.regs, 0, 0xf);
206 }
c896fe29 207 break;
a4773324
JK
208 case 'Q':
209 ct->ct |= TCG_CT_REG;
210 tcg_regset_set32(ct->u.regs, 0, 0xf);
211 break;
c896fe29 212 case 'r':
6399ab33 213 case_r:
c896fe29 214 ct->ct |= TCG_CT_REG;
5d8a4f8f
RH
215 if (TCG_TARGET_REG_BITS == 64) {
216 tcg_regset_set32(ct->u.regs, 0, 0xffff);
217 } else {
218 tcg_regset_set32(ct->u.regs, 0, 0xff);
219 }
c896fe29 220 break;
6399ab33
RH
221 case 'C':
222 /* With SHRX et al, we need not use ECX as shift count register. */
223 if (have_bmi2) {
224 goto case_r;
225 } else {
226 goto case_c;
227 }
c896fe29
FB
228
229 /* qemu_ld/st address constraint */
230 case 'L':
231 ct->ct |= TCG_CT_REG;
401c227b 232 if (TCG_TARGET_REG_BITS == 64) {
5d8a4f8f 233 tcg_regset_set32(ct->u.regs, 0, 0xffff);
401c227b 234 } else {
5d8a4f8f 235 tcg_regset_set32(ct->u.regs, 0, 0xff);
401c227b 236 }
17b91491
AJ
237 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
238 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
5d8a4f8f
RH
239 break;
240
241 case 'e':
242 ct->ct |= TCG_CT_CONST_S32;
243 break;
244 case 'Z':
245 ct->ct |= TCG_CT_CONST_U32;
c896fe29 246 break;
9d2eec20
RH
247 case 'I':
248 ct->ct |= TCG_CT_CONST_I32;
249 break;
5d8a4f8f 250
c896fe29
FB
251 default:
252 return -1;
253 }
254 ct_str++;
255 *pct_str = ct_str;
256 return 0;
257}
258
259/* test if a constant matches the constraint */
f6c6afc1 260static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
c896fe29
FB
261 const TCGArgConstraint *arg_ct)
262{
5d8a4f8f
RH
263 int ct = arg_ct->ct;
264 if (ct & TCG_CT_CONST) {
c896fe29 265 return 1;
5d8a4f8f
RH
266 }
267 if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
268 return 1;
269 }
270 if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
271 return 1;
272 }
9d2eec20
RH
273 if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) {
274 return 1;
275 }
5d8a4f8f 276 return 0;
c896fe29
FB
277}
278
5d8a4f8f
RH
279#if TCG_TARGET_REG_BITS == 64
280# define LOWREGMASK(x) ((x) & 7)
281#else
282# define LOWREGMASK(x) (x)
283#endif
284
96b4cf38 285#define P_EXT 0x100 /* 0x0f opcode prefix */
2a113775
AJ
286#define P_EXT38 0x200 /* 0x0f 0x38 opcode prefix */
287#define P_DATA16 0x400 /* 0x66 opcode prefix */
5d8a4f8f 288#if TCG_TARGET_REG_BITS == 64
2a113775
AJ
289# define P_ADDR32 0x800 /* 0x67 opcode prefix */
290# define P_REXW 0x1000 /* Set REX.W = 1 */
291# define P_REXB_R 0x2000 /* REG field as byte register */
292# define P_REXB_RM 0x4000 /* R/M field as byte register */
293# define P_GS 0x8000 /* gs segment override */
5d8a4f8f
RH
294#else
295# define P_ADDR32 0
296# define P_REXW 0
297# define P_REXB_R 0
298# define P_REXB_RM 0
44b37ace 299# define P_GS 0
5d8a4f8f 300#endif
6399ab33
RH
301#define P_SIMDF3 0x10000 /* 0xf3 opcode prefix */
302#define P_SIMDF2 0x20000 /* 0xf2 opcode prefix */
fcb5dac1 303
a369a702
RH
304#define OPC_ARITH_EvIz (0x81)
305#define OPC_ARITH_EvIb (0x83)
81570a70 306#define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
9d2eec20 307#define OPC_ANDN (0xf2 | P_EXT38)
81570a70 308#define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
fcb5dac1 309#define OPC_BSWAP (0xc8 | P_EXT)
aadb21a4 310#define OPC_CALL_Jz (0xe8)
d0a16297 311#define OPC_CMOVCC (0x40 | P_EXT) /* ... plus condition code */
81570a70
RH
312#define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
313#define OPC_DEC_r32 (0x48)
0566d387
RH
314#define OPC_IMUL_GvEv (0xaf | P_EXT)
315#define OPC_IMUL_GvEvIb (0x6b)
316#define OPC_IMUL_GvEvIz (0x69)
81570a70 317#define OPC_INC_r32 (0x40)
da441cff
RH
318#define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */
319#define OPC_JCC_short (0x70) /* ... plus condition code */
320#define OPC_JMP_long (0xe9)
321#define OPC_JMP_short (0xeb)
34a6d0b7 322#define OPC_LEA (0x8d)
af266089
RH
323#define OPC_MOVB_EvGv (0x88) /* stores, more or less */
324#define OPC_MOVL_EvGv (0x89) /* stores, more or less */
325#define OPC_MOVL_GvEv (0x8b) /* loads, more or less */
5c2d2a9e 326#define OPC_MOVB_EvIz (0xc6)
5d8a4f8f 327#define OPC_MOVL_EvIz (0xc7)
ef10b106 328#define OPC_MOVL_Iv (0xb8)
085bb5bb
AJ
329#define OPC_MOVBE_GyMy (0xf0 | P_EXT38)
330#define OPC_MOVBE_MyGy (0xf1 | P_EXT38)
6817c355
RH
331#define OPC_MOVSBL (0xbe | P_EXT)
332#define OPC_MOVSWL (0xbf | P_EXT)
5d8a4f8f 333#define OPC_MOVSLQ (0x63 | P_REXW)
55e082a7
RH
334#define OPC_MOVZBL (0xb6 | P_EXT)
335#define OPC_MOVZWL (0xb7 | P_EXT)
6858614e
RH
336#define OPC_POP_r32 (0x58)
337#define OPC_PUSH_r32 (0x50)
338#define OPC_PUSH_Iv (0x68)
339#define OPC_PUSH_Ib (0x6a)
3c3accc6 340#define OPC_RET (0xc3)
5d8a4f8f 341#define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
f53dba01
RH
342#define OPC_SHIFT_1 (0xd1)
343#define OPC_SHIFT_Ib (0xc1)
344#define OPC_SHIFT_cl (0xd3)
6399ab33
RH
345#define OPC_SARX (0xf7 | P_EXT38 | P_SIMDF3)
346#define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16)
347#define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2)
81570a70 348#define OPC_TESTL (0x85)
b3e66df7 349#define OPC_XCHG_ax_r32 (0x90)
fcb5dac1 350
9363dedb
RH
351#define OPC_GRP3_Ev (0xf7)
352#define OPC_GRP5 (0xff)
353
354/* Group 1 opcode extensions for 0x80-0x83.
355 These are also used as modifiers for OPC_ARITH. */
c896fe29
FB
356#define ARITH_ADD 0
357#define ARITH_OR 1
358#define ARITH_ADC 2
359#define ARITH_SBB 3
360#define ARITH_AND 4
361#define ARITH_SUB 5
362#define ARITH_XOR 6
363#define ARITH_CMP 7
364
da441cff 365/* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */
9619376c
AJ
366#define SHIFT_ROL 0
367#define SHIFT_ROR 1
c896fe29
FB
368#define SHIFT_SHL 4
369#define SHIFT_SHR 5
370#define SHIFT_SAR 7
371
9363dedb
RH
372/* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */
373#define EXT3_NOT 2
374#define EXT3_NEG 3
375#define EXT3_MUL 4
376#define EXT3_IMUL 5
377#define EXT3_DIV 6
378#define EXT3_IDIV 7
379
380/* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */
5d8a4f8f
RH
381#define EXT5_INC_Ev 0
382#define EXT5_DEC_Ev 1
9363dedb
RH
383#define EXT5_CALLN_Ev 2
384#define EXT5_JMPN_Ev 4
da441cff
RH
385
386/* Condition codes to be added to OPC_JCC_{long,short}. */
c896fe29
FB
387#define JCC_JMP (-1)
388#define JCC_JO 0x0
389#define JCC_JNO 0x1
390#define JCC_JB 0x2
391#define JCC_JAE 0x3
392#define JCC_JE 0x4
393#define JCC_JNE 0x5
394#define JCC_JBE 0x6
395#define JCC_JA 0x7
396#define JCC_JS 0x8
397#define JCC_JNS 0x9
398#define JCC_JP 0xa
399#define JCC_JNP 0xb
400#define JCC_JL 0xc
401#define JCC_JGE 0xd
402#define JCC_JLE 0xe
403#define JCC_JG 0xf
404
0aed257f 405static const uint8_t tcg_cond_to_jcc[] = {
c896fe29
FB
406 [TCG_COND_EQ] = JCC_JE,
407 [TCG_COND_NE] = JCC_JNE,
408 [TCG_COND_LT] = JCC_JL,
409 [TCG_COND_GE] = JCC_JGE,
410 [TCG_COND_LE] = JCC_JLE,
411 [TCG_COND_GT] = JCC_JG,
412 [TCG_COND_LTU] = JCC_JB,
413 [TCG_COND_GEU] = JCC_JAE,
414 [TCG_COND_LEU] = JCC_JBE,
415 [TCG_COND_GTU] = JCC_JA,
416};
417
5d8a4f8f
RH
418#if TCG_TARGET_REG_BITS == 64
419static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
420{
421 int rex;
422
44b37ace
RH
423 if (opc & P_GS) {
424 tcg_out8(s, 0x65);
425 }
5d8a4f8f
RH
426 if (opc & P_DATA16) {
427 /* We should never be asking for both 16 and 64-bit operation. */
eabb7b91 428 tcg_debug_assert((opc & P_REXW) == 0);
5d8a4f8f
RH
429 tcg_out8(s, 0x66);
430 }
431 if (opc & P_ADDR32) {
432 tcg_out8(s, 0x67);
433 }
434
435 rex = 0;
c9d78213 436 rex |= (opc & P_REXW) ? 0x8 : 0x0; /* REX.W */
ecc7e843
RH
437 rex |= (r & 8) >> 1; /* REX.R */
438 rex |= (x & 8) >> 2; /* REX.X */
439 rex |= (rm & 8) >> 3; /* REX.B */
5d8a4f8f
RH
440
441 /* P_REXB_{R,RM} indicates that the given register is the low byte.
442 For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
443 as otherwise the encoding indicates %[abcd]h. Note that the values
444 that are ORed in merely indicate that the REX byte must be present;
445 those bits get discarded in output. */
446 rex |= opc & (r >= 4 ? P_REXB_R : 0);
447 rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
448
449 if (rex) {
450 tcg_out8(s, (uint8_t)(rex | 0x40));
451 }
452
2a113775 453 if (opc & (P_EXT | P_EXT38)) {
5d8a4f8f 454 tcg_out8(s, 0x0f);
2a113775
AJ
455 if (opc & P_EXT38) {
456 tcg_out8(s, 0x38);
457 }
5d8a4f8f 458 }
2a113775 459
5d8a4f8f
RH
460 tcg_out8(s, opc);
461}
462#else
463static void tcg_out_opc(TCGContext *s, int opc)
c896fe29 464{
96b4cf38
RH
465 if (opc & P_DATA16) {
466 tcg_out8(s, 0x66);
467 }
2a113775 468 if (opc & (P_EXT | P_EXT38)) {
c896fe29 469 tcg_out8(s, 0x0f);
2a113775
AJ
470 if (opc & P_EXT38) {
471 tcg_out8(s, 0x38);
472 }
96b4cf38 473 }
c896fe29
FB
474 tcg_out8(s, opc);
475}
5d8a4f8f
RH
476/* Discard the register arguments to tcg_out_opc early, so as not to penalize
477 the 32-bit compilation paths. This method works with all versions of gcc,
478 whereas relying on optimization may not be able to exclude them. */
479#define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc)
480#endif
c896fe29 481
5d8a4f8f 482static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
c896fe29 483{
5d8a4f8f
RH
484 tcg_out_opc(s, opc, r, rm, 0);
485 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
c896fe29
FB
486}
487
ecc7e843
RH
488static void tcg_out_vex_modrm(TCGContext *s, int opc, int r, int v, int rm)
489{
490 int tmp;
491
492 if ((opc & (P_REXW | P_EXT | P_EXT38)) || (rm & 8)) {
493 /* Three byte VEX prefix. */
494 tcg_out8(s, 0xc4);
495
496 /* VEX.m-mmmm */
497 if (opc & P_EXT38) {
498 tmp = 2;
499 } else if (opc & P_EXT) {
500 tmp = 1;
501 } else {
502 tcg_abort();
503 }
504 tmp |= 0x40; /* VEX.X */
505 tmp |= (r & 8 ? 0 : 0x80); /* VEX.R */
506 tmp |= (rm & 8 ? 0 : 0x20); /* VEX.B */
507 tcg_out8(s, tmp);
508
509 tmp = (opc & P_REXW ? 0x80 : 0); /* VEX.W */
510 } else {
511 /* Two byte VEX prefix. */
512 tcg_out8(s, 0xc5);
513
514 tmp = (r & 8 ? 0 : 0x80); /* VEX.R */
515 }
6399ab33
RH
516 /* VEX.pp */
517 if (opc & P_DATA16) {
518 tmp |= 1; /* 0x66 */
519 } else if (opc & P_SIMDF3) {
520 tmp |= 2; /* 0xf3 */
521 } else if (opc & P_SIMDF2) {
522 tmp |= 3; /* 0xf2 */
523 }
ecc7e843
RH
524 tmp |= (~v & 15) << 3; /* VEX.vvvv */
525 tcg_out8(s, tmp);
526 tcg_out8(s, opc);
527 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
528}
529
34a6d0b7 530/* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
5d8a4f8f
RH
531 We handle either RM and INDEX missing with a negative value. In 64-bit
532 mode for absolute addresses, ~RM is the size of the immediate operand
533 that will follow the instruction. */
34a6d0b7
RH
534
535static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
357e3d8a 536 int index, int shift, intptr_t offset)
c896fe29 537{
34a6d0b7
RH
538 int mod, len;
539
5d8a4f8f
RH
540 if (index < 0 && rm < 0) {
541 if (TCG_TARGET_REG_BITS == 64) {
542 /* Try for a rip-relative addressing mode. This has replaced
543 the 32-bit-mode absolute addressing encoding. */
357e3d8a
RH
544 intptr_t pc = (intptr_t)s->code_ptr + 5 + ~rm;
545 intptr_t disp = offset - pc;
5d8a4f8f
RH
546 if (disp == (int32_t)disp) {
547 tcg_out_opc(s, opc, r, 0, 0);
548 tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
549 tcg_out32(s, disp);
550 return;
551 }
34a6d0b7 552
5d8a4f8f
RH
553 /* Try for an absolute address encoding. This requires the
554 use of the MODRM+SIB encoding and is therefore larger than
555 rip-relative addressing. */
556 if (offset == (int32_t)offset) {
557 tcg_out_opc(s, opc, r, 0, 0);
558 tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
559 tcg_out8(s, (4 << 3) | 5);
560 tcg_out32(s, offset);
561 return;
562 }
563
564 /* ??? The memory isn't directly addressable. */
565 tcg_abort();
566 } else {
567 /* Absolute address. */
568 tcg_out_opc(s, opc, r, 0, 0);
569 tcg_out8(s, (r << 3) | 5);
570 tcg_out32(s, offset);
571 return;
572 }
573 }
34a6d0b7
RH
574
575 /* Find the length of the immediate addend. Note that the encoding
576 that would be used for (%ebp) indicates absolute addressing. */
5d8a4f8f 577 if (rm < 0) {
34a6d0b7 578 mod = 0, len = 4, rm = 5;
5d8a4f8f 579 } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
34a6d0b7
RH
580 mod = 0, len = 0;
581 } else if (offset == (int8_t)offset) {
582 mod = 0x40, len = 1;
c896fe29 583 } else {
34a6d0b7
RH
584 mod = 0x80, len = 4;
585 }
586
587 /* Use a single byte MODRM format if possible. Note that the encoding
588 that would be used for %esp is the escape to the two byte form. */
5d8a4f8f 589 if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
34a6d0b7 590 /* Single byte MODRM format. */
5d8a4f8f
RH
591 tcg_out_opc(s, opc, r, rm, 0);
592 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
34a6d0b7
RH
593 } else {
594 /* Two byte MODRM+SIB format. */
595
596 /* Note that the encoding that would place %esp into the index
5d8a4f8f
RH
597 field indicates no index register. In 64-bit mode, the REX.X
598 bit counts, so %r12 can be used as the index. */
599 if (index < 0) {
34a6d0b7 600 index = 4;
c896fe29 601 } else {
eabb7b91 602 tcg_debug_assert(index != TCG_REG_ESP);
c896fe29 603 }
34a6d0b7 604
5d8a4f8f
RH
605 tcg_out_opc(s, opc, r, rm, index);
606 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
607 tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
34a6d0b7
RH
608 }
609
610 if (len == 1) {
611 tcg_out8(s, offset);
612 } else if (len == 4) {
c896fe29
FB
613 tcg_out32(s, offset);
614 }
615}
616
5d8a4f8f
RH
617/* A simplification of the above with no index or shift. */
618static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
357e3d8a 619 int rm, intptr_t offset)
34a6d0b7
RH
620{
621 tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
622}
623
81570a70
RH
624/* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */
625static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
626{
5d8a4f8f
RH
627 /* Propagate an opcode prefix, such as P_REXW. */
628 int ext = subop & ~0x7;
629 subop &= 0x7;
630
631 tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
81570a70
RH
632}
633
2a534aff
RH
634static inline void tcg_out_mov(TCGContext *s, TCGType type,
635 TCGReg ret, TCGReg arg)
c896fe29 636{
af266089 637 if (arg != ret) {
5d8a4f8f
RH
638 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
639 tcg_out_modrm(s, opc, ret, arg);
af266089 640 }
c896fe29
FB
641}
642
5d8a4f8f 643static void tcg_out_movi(TCGContext *s, TCGType type,
2a534aff 644 TCGReg ret, tcg_target_long arg)
c896fe29 645{
8023ccda
RH
646 tcg_target_long diff;
647
c896fe29 648 if (arg == 0) {
81570a70 649 tgen_arithr(s, ARITH_XOR, ret, ret);
5d8a4f8f 650 return;
8023ccda
RH
651 }
652 if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
5d8a4f8f
RH
653 tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
654 tcg_out32(s, arg);
8023ccda
RH
655 return;
656 }
657 if (arg == (int32_t)arg) {
5d8a4f8f
RH
658 tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
659 tcg_out32(s, arg);
8023ccda 660 return;
c896fe29 661 }
8023ccda
RH
662
663 /* Try a 7 byte pc-relative lea before the 10 byte movq. */
357e3d8a 664 diff = arg - ((uintptr_t)s->code_ptr + 7);
8023ccda
RH
665 if (diff == (int32_t)diff) {
666 tcg_out_opc(s, OPC_LEA | P_REXW, ret, 0, 0);
667 tcg_out8(s, (LOWREGMASK(ret) << 3) | 5);
668 tcg_out32(s, diff);
669 return;
670 }
671
672 tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
673 tcg_out64(s, arg);
c896fe29
FB
674}
675
6858614e
RH
676static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
677{
678 if (val == (int8_t)val) {
5d8a4f8f 679 tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
6858614e 680 tcg_out8(s, val);
5d8a4f8f
RH
681 } else if (val == (int32_t)val) {
682 tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
6858614e 683 tcg_out32(s, val);
5d8a4f8f
RH
684 } else {
685 tcg_abort();
6858614e
RH
686 }
687}
688
689static inline void tcg_out_push(TCGContext *s, int reg)
690{
5d8a4f8f 691 tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
6858614e
RH
692}
693
694static inline void tcg_out_pop(TCGContext *s, int reg)
695{
5d8a4f8f 696 tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
6858614e
RH
697}
698
2a534aff 699static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
a05b5b9b 700 TCGReg arg1, intptr_t arg2)
c896fe29 701{
5d8a4f8f
RH
702 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
703 tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
c896fe29
FB
704}
705
2a534aff 706static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
a05b5b9b 707 TCGReg arg1, intptr_t arg2)
c896fe29 708{
5d8a4f8f
RH
709 int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
710 tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
c896fe29
FB
711}
712
59d7c14e
RH
713static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
714 TCGReg base, intptr_t ofs)
c6f29ff0 715{
59d7c14e
RH
716 int rexw = 0;
717 if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64) {
718 if (val != (int32_t)val) {
719 return false;
720 }
721 rexw = P_REXW;
722 }
723 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | rexw, 0, base, ofs);
c6f29ff0 724 tcg_out32(s, val);
59d7c14e 725 return true;
c6f29ff0
RH
726}
727
f53dba01
RH
728static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
729{
96b4cf38
RH
730 /* Propagate an opcode prefix, such as P_DATA16. */
731 int ext = subopc & ~0x7;
732 subopc &= 0x7;
733
f53dba01 734 if (count == 1) {
5d8a4f8f 735 tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
f53dba01 736 } else {
5d8a4f8f 737 tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
f53dba01
RH
738 tcg_out8(s, count);
739 }
740}
741
fcb5dac1
RH
742static inline void tcg_out_bswap32(TCGContext *s, int reg)
743{
5d8a4f8f 744 tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
fcb5dac1
RH
745}
746
747static inline void tcg_out_rolw_8(TCGContext *s, int reg)
748{
5d8a4f8f 749 tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
fcb5dac1
RH
750}
751
55e082a7
RH
752static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
753{
754 /* movzbl */
eabb7b91 755 tcg_debug_assert(src < 4 || TCG_TARGET_REG_BITS == 64);
5d8a4f8f 756 tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
55e082a7
RH
757}
758
5d8a4f8f 759static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
6817c355
RH
760{
761 /* movsbl */
eabb7b91 762 tcg_debug_assert(src < 4 || TCG_TARGET_REG_BITS == 64);
5d8a4f8f 763 tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
6817c355
RH
764}
765
55e082a7
RH
766static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
767{
768 /* movzwl */
769 tcg_out_modrm(s, OPC_MOVZWL, dest, src);
770}
771
5d8a4f8f 772static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
6817c355 773{
5d8a4f8f
RH
774 /* movsw[lq] */
775 tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
6817c355
RH
776}
777
5d8a4f8f 778static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
c896fe29 779{
5d8a4f8f
RH
780 /* 32-bit mov zero extends. */
781 tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
782}
783
784static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
785{
786 tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
787}
788
789static inline void tcg_out_bswap64(TCGContext *s, int reg)
790{
791 tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
792}
793
794static void tgen_arithi(TCGContext *s, int c, int r0,
795 tcg_target_long val, int cf)
796{
797 int rexw = 0;
798
799 if (TCG_TARGET_REG_BITS == 64) {
800 rexw = c & -8;
801 c &= 7;
802 }
803
81570a70
RH
804 /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
805 partial flags update stalls on Pentium4 and are not recommended
806 by current Intel optimization manuals. */
807 if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
447d681e 808 int is_inc = (c == ARITH_ADD) ^ (val < 0);
5d8a4f8f
RH
809 if (TCG_TARGET_REG_BITS == 64) {
810 /* The single-byte increment encodings are re-tasked as the
811 REX prefixes. Use the MODRM encoding. */
812 tcg_out_modrm(s, OPC_GRP5 + rexw,
813 (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
814 } else {
815 tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
816 }
817 return;
818 }
819
820 if (c == ARITH_AND) {
821 if (TCG_TARGET_REG_BITS == 64) {
822 if (val == 0xffffffffu) {
823 tcg_out_ext32u(s, r0, r0);
824 return;
825 }
826 if (val == (uint32_t)val) {
827 /* AND with no high bits set can use a 32-bit operation. */
828 rexw = 0;
829 }
830 }
dc397ca3 831 if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
5d8a4f8f
RH
832 tcg_out_ext8u(s, r0, r0);
833 return;
834 }
835 if (val == 0xffffu) {
836 tcg_out_ext16u(s, r0, r0);
837 return;
838 }
839 }
840
841 if (val == (int8_t)val) {
842 tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
c896fe29 843 tcg_out8(s, val);
5d8a4f8f
RH
844 return;
845 }
846 if (rexw == 0 || val == (int32_t)val) {
847 tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
c896fe29 848 tcg_out32(s, val);
5d8a4f8f 849 return;
c896fe29 850 }
5d8a4f8f
RH
851
852 tcg_abort();
c896fe29
FB
853}
854
3e9a474e 855static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
c896fe29 856{
5d8a4f8f
RH
857 if (val != 0) {
858 tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
859 }
c896fe29
FB
860}
861
f75b56c1 862/* Use SMALL != 0 to force a short forward branch. */
bec16311 863static void tcg_out_jxx(TCGContext *s, int opc, TCGLabel *l, int small)
c896fe29
FB
864{
865 int32_t val, val1;
78686523 866
c896fe29 867 if (l->has_value) {
f6bff89d 868 val = tcg_pcrel_diff(s, l->u.value_ptr);
c896fe29
FB
869 val1 = val - 2;
870 if ((int8_t)val1 == val1) {
f75b56c1 871 if (opc == -1) {
da441cff 872 tcg_out8(s, OPC_JMP_short);
f75b56c1 873 } else {
da441cff 874 tcg_out8(s, OPC_JCC_short + opc);
f75b56c1 875 }
c896fe29
FB
876 tcg_out8(s, val1);
877 } else {
f75b56c1
RH
878 if (small) {
879 tcg_abort();
880 }
c896fe29 881 if (opc == -1) {
da441cff 882 tcg_out8(s, OPC_JMP_long);
c896fe29
FB
883 tcg_out32(s, val - 5);
884 } else {
5d8a4f8f 885 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
c896fe29
FB
886 tcg_out32(s, val - 6);
887 }
888 }
f75b56c1
RH
889 } else if (small) {
890 if (opc == -1) {
da441cff 891 tcg_out8(s, OPC_JMP_short);
f75b56c1 892 } else {
da441cff 893 tcg_out8(s, OPC_JCC_short + opc);
f75b56c1 894 }
bec16311 895 tcg_out_reloc(s, s->code_ptr, R_386_PC8, l, -1);
f75b56c1 896 s->code_ptr += 1;
c896fe29
FB
897 } else {
898 if (opc == -1) {
da441cff 899 tcg_out8(s, OPC_JMP_long);
c896fe29 900 } else {
5d8a4f8f 901 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
c896fe29 902 }
bec16311 903 tcg_out_reloc(s, s->code_ptr, R_386_PC32, l, -4);
623e265c 904 s->code_ptr += 4;
c896fe29
FB
905 }
906}
907
1d2699ae 908static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
5d8a4f8f 909 int const_arg2, int rexw)
c896fe29 910{
c896fe29
FB
911 if (const_arg2) {
912 if (arg2 == 0) {
c896fe29 913 /* test r, r */
5d8a4f8f 914 tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
c896fe29 915 } else {
5d8a4f8f 916 tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
c896fe29
FB
917 }
918 } else {
5d8a4f8f 919 tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
c896fe29 920 }
1d2699ae
RH
921}
922
5d8a4f8f
RH
923static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
924 TCGArg arg1, TCGArg arg2, int const_arg2,
bec16311 925 TCGLabel *label, int small)
1d2699ae 926{
5d8a4f8f 927 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
bec16311 928 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small);
c896fe29
FB
929}
930
5d8a4f8f
RH
931#if TCG_TARGET_REG_BITS == 64
932static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
933 TCGArg arg1, TCGArg arg2, int const_arg2,
bec16311 934 TCGLabel *label, int small)
5d8a4f8f
RH
935{
936 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
bec16311 937 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small);
5d8a4f8f
RH
938}
939#else
c896fe29
FB
940/* XXX: we implement it at the target level to avoid having to
941 handle cross basic blocks temporaries */
f75b56c1
RH
942static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
943 const int *const_args, int small)
c896fe29 944{
bec16311
RH
945 TCGLabel *label_next = gen_new_label();
946 TCGLabel *label_this = arg_label(args[5]);
42a268c2 947
c896fe29
FB
948 switch(args[4]) {
949 case TCG_COND_EQ:
5d8a4f8f
RH
950 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
951 label_next, 1);
952 tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
bec16311 953 label_this, small);
c896fe29
FB
954 break;
955 case TCG_COND_NE:
5d8a4f8f 956 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
bec16311 957 label_this, small);
5d8a4f8f 958 tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
bec16311 959 label_this, small);
c896fe29
FB
960 break;
961 case TCG_COND_LT:
5d8a4f8f 962 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
bec16311 963 label_this, small);
f75b56c1 964 tcg_out_jxx(s, JCC_JNE, label_next, 1);
5d8a4f8f 965 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
bec16311 966 label_this, small);
c896fe29
FB
967 break;
968 case TCG_COND_LE:
5d8a4f8f 969 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
bec16311 970 label_this, small);
f75b56c1 971 tcg_out_jxx(s, JCC_JNE, label_next, 1);
5d8a4f8f 972 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
bec16311 973 label_this, small);
c896fe29
FB
974 break;
975 case TCG_COND_GT:
5d8a4f8f 976 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
bec16311 977 label_this, small);
f75b56c1 978 tcg_out_jxx(s, JCC_JNE, label_next, 1);
5d8a4f8f 979 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
bec16311 980 label_this, small);
c896fe29
FB
981 break;
982 case TCG_COND_GE:
5d8a4f8f 983 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
bec16311 984 label_this, small);
f75b56c1 985 tcg_out_jxx(s, JCC_JNE, label_next, 1);
5d8a4f8f 986 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
bec16311 987 label_this, small);
c896fe29
FB
988 break;
989 case TCG_COND_LTU:
5d8a4f8f 990 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
bec16311 991 label_this, small);
f75b56c1 992 tcg_out_jxx(s, JCC_JNE, label_next, 1);
5d8a4f8f 993 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
bec16311 994 label_this, small);
c896fe29
FB
995 break;
996 case TCG_COND_LEU:
5d8a4f8f 997 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
bec16311 998 label_this, small);
f75b56c1 999 tcg_out_jxx(s, JCC_JNE, label_next, 1);
5d8a4f8f 1000 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
bec16311 1001 label_this, small);
c896fe29
FB
1002 break;
1003 case TCG_COND_GTU:
5d8a4f8f 1004 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
bec16311 1005 label_this, small);
f75b56c1 1006 tcg_out_jxx(s, JCC_JNE, label_next, 1);
5d8a4f8f 1007 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
bec16311 1008 label_this, small);
c896fe29
FB
1009 break;
1010 case TCG_COND_GEU:
5d8a4f8f 1011 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
bec16311 1012 label_this, small);
f75b56c1 1013 tcg_out_jxx(s, JCC_JNE, label_next, 1);
5d8a4f8f 1014 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
bec16311 1015 label_this, small);
c896fe29
FB
1016 break;
1017 default:
1018 tcg_abort();
1019 }
9d6fca70 1020 tcg_out_label(s, label_next, s->code_ptr);
c896fe29 1021}
5d8a4f8f 1022#endif
c896fe29 1023
5d8a4f8f
RH
1024static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
1025 TCGArg arg1, TCGArg arg2, int const_arg2)
1d2699ae 1026{
5d8a4f8f 1027 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
32a8ffb9 1028 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
a369a702 1029 tcg_out_ext8u(s, dest, dest);
1d2699ae
RH
1030}
1031
5d8a4f8f
RH
1032#if TCG_TARGET_REG_BITS == 64
1033static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
1034 TCGArg arg1, TCGArg arg2, int const_arg2)
1035{
1036 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
1037 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
1038 tcg_out_ext8u(s, dest, dest);
1039}
1040#else
1d2699ae
RH
1041static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
1042 const int *const_args)
1043{
1044 TCGArg new_args[6];
bec16311 1045 TCGLabel *label_true, *label_over;
1d2699ae
RH
1046
1047 memcpy(new_args, args+1, 5*sizeof(TCGArg));
1048
1049 if (args[0] == args[1] || args[0] == args[2]
1050 || (!const_args[3] && args[0] == args[3])
1051 || (!const_args[4] && args[0] == args[4])) {
1052 /* When the destination overlaps with one of the argument
1053 registers, don't do anything tricky. */
bec16311
RH
1054 label_true = gen_new_label();
1055 label_over = gen_new_label();
1d2699ae 1056
bec16311 1057 new_args[5] = label_arg(label_true);
1d2699ae
RH
1058 tcg_out_brcond2(s, new_args, const_args+1, 1);
1059
1060 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
1061 tcg_out_jxx(s, JCC_JMP, label_over, 1);
9d6fca70 1062 tcg_out_label(s, label_true, s->code_ptr);
1d2699ae
RH
1063
1064 tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
9d6fca70 1065 tcg_out_label(s, label_over, s->code_ptr);
1d2699ae
RH
1066 } else {
1067 /* When the destination does not overlap one of the arguments,
1068 clear the destination first, jump if cond false, and emit an
1069 increment in the true case. This results in smaller code. */
1070
1071 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
1072
bec16311 1073 label_over = gen_new_label();
1d2699ae 1074 new_args[4] = tcg_invert_cond(new_args[4]);
bec16311 1075 new_args[5] = label_arg(label_over);
1d2699ae
RH
1076 tcg_out_brcond2(s, new_args, const_args+1, 1);
1077
1078 tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
9d6fca70 1079 tcg_out_label(s, label_over, s->code_ptr);
1d2699ae
RH
1080 }
1081}
5d8a4f8f
RH
1082#endif
1083
d0a16297
RH
1084static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest,
1085 TCGArg c1, TCGArg c2, int const_c2,
1086 TCGArg v1)
1087{
1088 tcg_out_cmp(s, c1, c2, const_c2, 0);
76a347e1
RH
1089 if (have_cmov) {
1090 tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1);
1091 } else {
bec16311 1092 TCGLabel *over = gen_new_label();
76a347e1
RH
1093 tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1);
1094 tcg_out_mov(s, TCG_TYPE_I32, dest, v1);
1095 tcg_out_label(s, over, s->code_ptr);
1096 }
d0a16297
RH
1097}
1098
1099#if TCG_TARGET_REG_BITS == 64
1100static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGArg dest,
1101 TCGArg c1, TCGArg c2, int const_c2,
1102 TCGArg v1)
1103{
1104 tcg_out_cmp(s, c1, c2, const_c2, P_REXW);
1105 tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | P_REXW, dest, v1);
1106}
1107#endif
1108
f6bff89d 1109static void tcg_out_branch(TCGContext *s, int call, tcg_insn_unit *dest)
5d8a4f8f 1110{
f6bff89d 1111 intptr_t disp = tcg_pcrel_diff(s, dest) - 5;
5d8a4f8f
RH
1112
1113 if (disp == (int32_t)disp) {
1114 tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
1115 tcg_out32(s, disp);
1116 } else {
f6bff89d 1117 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, (uintptr_t)dest);
5d8a4f8f
RH
1118 tcg_out_modrm(s, OPC_GRP5,
1119 call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10);
1120 }
1121}
1122
6bf3e997 1123static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *dest)
5d8a4f8f
RH
1124{
1125 tcg_out_branch(s, 1, dest);
1126}
1d2699ae 1127
f6bff89d 1128static void tcg_out_jmp(TCGContext *s, tcg_insn_unit *dest)
aadb21a4 1129{
5d8a4f8f 1130 tcg_out_branch(s, 0, dest);
aadb21a4
RH
1131}
1132
0d07abf0
SF
1133static void tcg_out_nopn(TCGContext *s, int n)
1134{
1135 int i;
1136 /* Emit 1 or 2 operand size prefixes for the standard one byte nop,
1137 * "xchg %eax,%eax", forming "xchg %ax,%ax". All cores accept the
1138 * duplicate prefix, and all of the interesting recent cores can
1139 * decode and discard the duplicates in a single cycle.
1140 */
1141 tcg_debug_assert(n >= 1);
1142 for (i = 1; i < n; ++i) {
1143 tcg_out8(s, 0x66);
1144 }
1145 tcg_out8(s, 0x90);
1146}
1147
c896fe29 1148#if defined(CONFIG_SOFTMMU)
401c227b
RH
1149/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1150 * int mmu_idx, uintptr_t ra)
1151 */
f6bff89d 1152static void * const qemu_ld_helpers[16] = {
8221a267
RH
1153 [MO_UB] = helper_ret_ldub_mmu,
1154 [MO_LEUW] = helper_le_lduw_mmu,
1155 [MO_LEUL] = helper_le_ldul_mmu,
1156 [MO_LEQ] = helper_le_ldq_mmu,
1157 [MO_BEUW] = helper_be_lduw_mmu,
1158 [MO_BEUL] = helper_be_ldul_mmu,
1159 [MO_BEQ] = helper_be_ldq_mmu,
e141ab52
BS
1160};
1161
401c227b
RH
1162/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1163 * uintxx_t val, int mmu_idx, uintptr_t ra)
1164 */
f6bff89d 1165static void * const qemu_st_helpers[16] = {
8221a267
RH
1166 [MO_UB] = helper_ret_stb_mmu,
1167 [MO_LEUW] = helper_le_stw_mmu,
1168 [MO_LEUL] = helper_le_stl_mmu,
1169 [MO_LEQ] = helper_le_stq_mmu,
1170 [MO_BEUW] = helper_be_stw_mmu,
1171 [MO_BEUL] = helper_be_stl_mmu,
1172 [MO_BEQ] = helper_be_stq_mmu,
e141ab52 1173};
8516a044
RH
1174
1175/* Perform the TLB load and compare.
1176
1177 Inputs:
7352ee54 1178 ADDRLO and ADDRHI contain the low and high part of the address.
8516a044
RH
1179
1180 MEM_INDEX and S_BITS are the memory context and log2 size of the load.
1181
1182 WHICH is the offset into the CPUTLBEntry structure of the slot to read.
1183 This should be offsetof addr_read or addr_write.
1184
1185 Outputs:
1186 LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
1187 positions of the displacements of forward jumps to the TLB miss case.
1188
166792f7 1189 Second argument register is loaded with the low part of the address.
5d8a4f8f
RH
1190 In the TLB hit case, it has been adjusted as indicated by the TLB
1191 and so is a host address. In the TLB miss case, it continues to
1192 hold a guest address.
8516a044 1193
166792f7 1194 First argument register is clobbered. */
8516a044 1195
7352ee54 1196static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
8cc580f6 1197 int mem_index, TCGMemOp opc,
f6bff89d 1198 tcg_insn_unit **label_ptr, int which)
8516a044 1199{
7352ee54
RH
1200 const TCGReg r0 = TCG_REG_L0;
1201 const TCGReg r1 = TCG_REG_L1;
d5dad3be 1202 TCGType ttype = TCG_TYPE_I32;
08b0b23b
AJ
1203 TCGType tlbtype = TCG_TYPE_I32;
1204 int trexw = 0, hrexw = 0, tlbrexw = 0;
85aa8081
RH
1205 unsigned a_bits = get_alignment_bits(opc);
1206 unsigned s_bits = opc & MO_SIZE;
1207 unsigned a_mask = (1 << a_bits) - 1;
1208 unsigned s_mask = (1 << s_bits) - 1;
1f00b27f 1209 target_ulong tlb_mask;
5d8a4f8f 1210
d5dad3be
RH
1211 if (TCG_TARGET_REG_BITS == 64) {
1212 if (TARGET_LONG_BITS == 64) {
1213 ttype = TCG_TYPE_I64;
1214 trexw = P_REXW;
1215 }
1216 if (TCG_TYPE_PTR == TCG_TYPE_I64) {
d5dad3be 1217 hrexw = P_REXW;
08b0b23b
AJ
1218 if (TARGET_PAGE_BITS + CPU_TLB_BITS > 32) {
1219 tlbtype = TCG_TYPE_I64;
1220 tlbrexw = P_REXW;
1221 }
d5dad3be 1222 }
5d8a4f8f 1223 }
8516a044 1224
08b0b23b 1225 tcg_out_mov(s, tlbtype, r0, addrlo);
85aa8081
RH
1226 /* If the required alignment is at least as large as the access, simply
1227 copy the address and mask. For lesser alignments, check that we don't
1228 cross pages for the complete access. */
1229 if (a_bits >= s_bits) {
8cc580f6
AJ
1230 tcg_out_mov(s, ttype, r1, addrlo);
1231 } else {
85aa8081 1232 tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo, s_mask - a_mask);
8cc580f6 1233 }
85aa8081 1234 tlb_mask = TARGET_PAGE_MASK | a_mask;
8516a044 1235
08b0b23b 1236 tcg_out_shifti(s, SHIFT_SHR + tlbrexw, r0,
5d8a4f8f 1237 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
8516a044 1238
1f00b27f 1239 tgen_arithi(s, ARITH_AND + trexw, r1, tlb_mask, 0);
08b0b23b 1240 tgen_arithi(s, ARITH_AND + tlbrexw, r0,
5d8a4f8f 1241 (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
8516a044 1242
d5dad3be 1243 tcg_out_modrm_sib_offset(s, OPC_LEA + hrexw, r0, TCG_AREG0, r0, 0,
9349b4f9 1244 offsetof(CPUArchState, tlb_table[mem_index][0])
8516a044
RH
1245 + which);
1246
166792f7 1247 /* cmp 0(r0), r1 */
d5dad3be 1248 tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, 0);
8516a044 1249
d5dad3be
RH
1250 /* Prepare for both the fast path add of the tlb addend, and the slow
1251 path function argument setup. There are two cases worth note:
1252 For 32-bit guest and x86_64 host, MOVL zero-extends the guest address
1253 before the fastpath ADDQ below. For 64-bit guest and x32 host, MOVQ
1254 copies the entire guest address for the slow path, while truncation
1255 for the 32-bit host happens with the fastpath ADDL below. */
1256 tcg_out_mov(s, ttype, r1, addrlo);
8516a044 1257
b76f0d8c
YL
1258 /* jne slow_path */
1259 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
8516a044 1260 label_ptr[0] = s->code_ptr;
b76f0d8c 1261 s->code_ptr += 4;
8516a044 1262
5d8a4f8f 1263 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
166792f7 1264 /* cmp 4(r0), addrhi */
7352ee54 1265 tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, r0, 4);
8516a044 1266
b76f0d8c
YL
1267 /* jne slow_path */
1268 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
8516a044 1269 label_ptr[1] = s->code_ptr;
b76f0d8c 1270 s->code_ptr += 4;
8516a044
RH
1271 }
1272
1273 /* TLB Hit. */
1274
166792f7 1275 /* add addend(r0), r1 */
d5dad3be 1276 tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r1, r0,
8516a044
RH
1277 offsetof(CPUTLBEntry, addend) - which);
1278}
7352ee54
RH
1279
1280/*
1281 * Record the context of a call to the out of line helper code for the slow path
1282 * for a load or store, so that we can later generate the correct helper code
1283 */
3972ef6f 1284static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
7352ee54
RH
1285 TCGReg datalo, TCGReg datahi,
1286 TCGReg addrlo, TCGReg addrhi,
3972ef6f 1287 tcg_insn_unit *raddr,
f6bff89d 1288 tcg_insn_unit **label_ptr)
7352ee54
RH
1289{
1290 TCGLabelQemuLdst *label = new_ldst_label(s);
1291
1292 label->is_ld = is_ld;
3972ef6f 1293 label->oi = oi;
7352ee54
RH
1294 label->datalo_reg = datalo;
1295 label->datahi_reg = datahi;
1296 label->addrlo_reg = addrlo;
1297 label->addrhi_reg = addrhi;
7352ee54
RH
1298 label->raddr = raddr;
1299 label->label_ptr[0] = label_ptr[0];
1300 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1301 label->label_ptr[1] = label_ptr[1];
1302 }
1303}
1304
1305/*
1306 * Generate code for the slow path for a load at the end of block
1307 */
1308static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1309{
3972ef6f
RH
1310 TCGMemOpIdx oi = l->oi;
1311 TCGMemOp opc = get_memop(oi);
7352ee54 1312 TCGReg data_reg;
f6bff89d 1313 tcg_insn_unit **label_ptr = &l->label_ptr[0];
7352ee54
RH
1314
1315 /* resolve label address */
5c53bb81 1316 tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
7352ee54 1317 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
5c53bb81 1318 tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
7352ee54
RH
1319 }
1320
1321 if (TCG_TARGET_REG_BITS == 32) {
1322 int ofs = 0;
1323
1324 tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1325 ofs += 4;
1326
1327 tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1328 ofs += 4;
1329
1330 if (TARGET_LONG_BITS == 64) {
1331 tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1332 ofs += 4;
1333 }
1334
59d7c14e 1335 tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs);
7352ee54
RH
1336 ofs += 4;
1337
59d7c14e 1338 tcg_out_sti(s, TCG_TYPE_PTR, (uintptr_t)l->raddr, TCG_REG_ESP, ofs);
7352ee54
RH
1339 } else {
1340 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1341 /* The second argument is already loaded with addrlo. */
3972ef6f 1342 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], oi);
7352ee54
RH
1343 tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3],
1344 (uintptr_t)l->raddr);
1345 }
1346
2b7ec66f 1347 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
7352ee54
RH
1348
1349 data_reg = l->datalo_reg;
1350 switch (opc & MO_SSIZE) {
1351 case MO_SB:
1352 tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
1353 break;
1354 case MO_SW:
1355 tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
1356 break;
1357#if TCG_TARGET_REG_BITS == 64
1358 case MO_SL:
1359 tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
1360 break;
1361#endif
1362 case MO_UB:
1363 case MO_UW:
1364 /* Note that the helpers have zero-extended to tcg_target_long. */
1365 case MO_UL:
1366 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1367 break;
1368 case MO_Q:
1369 if (TCG_TARGET_REG_BITS == 64) {
1370 tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
1371 } else if (data_reg == TCG_REG_EDX) {
1372 /* xchg %edx, %eax */
1373 tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
1374 tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX);
1375 } else {
1376 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1377 tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX);
1378 }
1379 break;
1380 default:
1381 tcg_abort();
1382 }
1383
1384 /* Jump to the code corresponding to next IR of qemu_st */
f6bff89d 1385 tcg_out_jmp(s, l->raddr);
7352ee54
RH
1386}
1387
1388/*
1389 * Generate code for the slow path for a store at the end of block
1390 */
1391static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1392{
3972ef6f
RH
1393 TCGMemOpIdx oi = l->oi;
1394 TCGMemOp opc = get_memop(oi);
7352ee54 1395 TCGMemOp s_bits = opc & MO_SIZE;
f6bff89d 1396 tcg_insn_unit **label_ptr = &l->label_ptr[0];
7352ee54
RH
1397 TCGReg retaddr;
1398
1399 /* resolve label address */
5c53bb81 1400 tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
7352ee54 1401 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
5c53bb81 1402 tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
7352ee54
RH
1403 }
1404
1405 if (TCG_TARGET_REG_BITS == 32) {
1406 int ofs = 0;
1407
1408 tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1409 ofs += 4;
1410
1411 tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1412 ofs += 4;
1413
1414 if (TARGET_LONG_BITS == 64) {
1415 tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1416 ofs += 4;
1417 }
1418
1419 tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs);
1420 ofs += 4;
1421
1422 if (s_bits == MO_64) {
1423 tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs);
1424 ofs += 4;
1425 }
1426
59d7c14e 1427 tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs);
7352ee54
RH
1428 ofs += 4;
1429
1430 retaddr = TCG_REG_EAX;
3972ef6f
RH
1431 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1432 tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, ofs);
7352ee54
RH
1433 } else {
1434 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1435 /* The second argument is already loaded with addrlo. */
1436 tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
1437 tcg_target_call_iarg_regs[2], l->datalo_reg);
3972ef6f 1438 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], oi);
7352ee54
RH
1439
1440 if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) {
1441 retaddr = tcg_target_call_iarg_regs[4];
1442 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1443 } else {
1444 retaddr = TCG_REG_RAX;
1445 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
0b919667
RH
1446 tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP,
1447 TCG_TARGET_CALL_STACK_OFFSET);
7352ee54
RH
1448 }
1449 }
1450
1451 /* "Tail call" to the helper, with the return address back inline. */
1452 tcg_out_push(s, retaddr);
2b7ec66f 1453 tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
7352ee54 1454}
44b37ace
RH
1455#elif defined(__x86_64__) && defined(__linux__)
1456# include <asm/prctl.h>
1457# include <sys/prctl.h>
1458
1459int arch_prctl(int code, unsigned long addr);
1460
1461static int guest_base_flags;
1462static inline void setup_guest_base_seg(void)
1463{
b76f21a7 1464 if (arch_prctl(ARCH_SET_GS, guest_base) == 0) {
44b37ace
RH
1465 guest_base_flags = P_GS;
1466 }
1467}
1468#else
1469# define guest_base_flags 0
1470static inline void setup_guest_base_seg(void) { }
1471#endif /* SOFTMMU */
c896fe29 1472
37c5d0d5 1473static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
ee8ba9e4
RH
1474 TCGReg base, int index, intptr_t ofs,
1475 int seg, TCGMemOp memop)
be5a4eb7 1476{
085bb5bb
AJ
1477 const TCGMemOp real_bswap = memop & MO_BSWAP;
1478 TCGMemOp bswap = real_bswap;
1479 int movop = OPC_MOVL_GvEv;
1480
1481 if (have_movbe && real_bswap) {
1482 bswap = 0;
1483 movop = OPC_MOVBE_GyMy;
1484 }
37c5d0d5
RH
1485
1486 switch (memop & MO_SSIZE) {
1487 case MO_UB:
ee8ba9e4
RH
1488 tcg_out_modrm_sib_offset(s, OPC_MOVZBL + seg, datalo,
1489 base, index, 0, ofs);
be5a4eb7 1490 break;
37c5d0d5 1491 case MO_SB:
ee8ba9e4
RH
1492 tcg_out_modrm_sib_offset(s, OPC_MOVSBL + P_REXW + seg, datalo,
1493 base, index, 0, ofs);
be5a4eb7 1494 break;
37c5d0d5 1495 case MO_UW:
ee8ba9e4
RH
1496 tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
1497 base, index, 0, ofs);
085bb5bb 1498 if (real_bswap) {
be5a4eb7
RH
1499 tcg_out_rolw_8(s, datalo);
1500 }
1501 break;
37c5d0d5 1502 case MO_SW:
085bb5bb
AJ
1503 if (real_bswap) {
1504 if (have_movbe) {
ee8ba9e4
RH
1505 tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
1506 datalo, base, index, 0, ofs);
085bb5bb 1507 } else {
ee8ba9e4
RH
1508 tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
1509 base, index, 0, ofs);
085bb5bb
AJ
1510 tcg_out_rolw_8(s, datalo);
1511 }
5d8a4f8f
RH
1512 tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
1513 } else {
ee8ba9e4
RH
1514 tcg_out_modrm_sib_offset(s, OPC_MOVSWL + P_REXW + seg,
1515 datalo, base, index, 0, ofs);
be5a4eb7
RH
1516 }
1517 break;
37c5d0d5 1518 case MO_UL:
ee8ba9e4 1519 tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs);
be5a4eb7
RH
1520 if (bswap) {
1521 tcg_out_bswap32(s, datalo);
1522 }
1523 break;
5d8a4f8f 1524#if TCG_TARGET_REG_BITS == 64
37c5d0d5 1525 case MO_SL:
085bb5bb 1526 if (real_bswap) {
ee8ba9e4
RH
1527 tcg_out_modrm_sib_offset(s, movop + seg, datalo,
1528 base, index, 0, ofs);
085bb5bb
AJ
1529 if (bswap) {
1530 tcg_out_bswap32(s, datalo);
1531 }
5d8a4f8f 1532 tcg_out_ext32s(s, datalo, datalo);
be5a4eb7 1533 } else {
ee8ba9e4
RH
1534 tcg_out_modrm_sib_offset(s, OPC_MOVSLQ + seg, datalo,
1535 base, index, 0, ofs);
be5a4eb7 1536 }
5d8a4f8f
RH
1537 break;
1538#endif
37c5d0d5 1539 case MO_Q:
5d8a4f8f 1540 if (TCG_TARGET_REG_BITS == 64) {
ee8ba9e4
RH
1541 tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo,
1542 base, index, 0, ofs);
5d8a4f8f
RH
1543 if (bswap) {
1544 tcg_out_bswap64(s, datalo);
1545 }
1546 } else {
085bb5bb 1547 if (real_bswap) {
5d8a4f8f
RH
1548 int t = datalo;
1549 datalo = datahi;
1550 datahi = t;
1551 }
1552 if (base != datalo) {
ee8ba9e4
RH
1553 tcg_out_modrm_sib_offset(s, movop + seg, datalo,
1554 base, index, 0, ofs);
1555 tcg_out_modrm_sib_offset(s, movop + seg, datahi,
1556 base, index, 0, ofs + 4);
5d8a4f8f 1557 } else {
ee8ba9e4
RH
1558 tcg_out_modrm_sib_offset(s, movop + seg, datahi,
1559 base, index, 0, ofs + 4);
1560 tcg_out_modrm_sib_offset(s, movop + seg, datalo,
1561 base, index, 0, ofs);
5d8a4f8f
RH
1562 }
1563 if (bswap) {
1564 tcg_out_bswap32(s, datalo);
1565 tcg_out_bswap32(s, datahi);
1566 }
be5a4eb7
RH
1567 }
1568 break;
1569 default:
1570 tcg_abort();
1571 }
1572}
379f6698 1573
c896fe29
FB
1574/* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
1575 EAX. It will be useful once fixed registers globals are less
1576 common. */
8221a267 1577static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
c896fe29 1578{
7352ee54 1579 TCGReg datalo, datahi, addrlo;
8221a267 1580 TCGReg addrhi __attribute__((unused));
59227d5d 1581 TCGMemOpIdx oi;
8221a267 1582 TCGMemOp opc;
c896fe29 1583#if defined(CONFIG_SOFTMMU)
37c5d0d5 1584 int mem_index;
f6bff89d 1585 tcg_insn_unit *label_ptr[2];
c896fe29
FB
1586#endif
1587
7352ee54 1588 datalo = *args++;
8221a267 1589 datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
7352ee54 1590 addrlo = *args++;
8221a267 1591 addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
59227d5d
RH
1592 oi = *args++;
1593 opc = get_memop(oi);
c896fe29
FB
1594
1595#if defined(CONFIG_SOFTMMU)
59227d5d 1596 mem_index = get_mmuidx(oi);
1a6dc1e4 1597
8cc580f6 1598 tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
8516a044 1599 label_ptr, offsetof(CPUTLBEntry, addr_read));
1a6dc1e4
RH
1600
1601 /* TLB Hit. */
ee8ba9e4 1602 tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, opc);
c896fe29 1603
b76f0d8c 1604 /* Record the current context of a load into ldst label */
3972ef6f
RH
1605 add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
1606 s->code_ptr, label_ptr);
c896fe29 1607#else
5d8a4f8f 1608 {
b76f21a7 1609 int32_t offset = guest_base;
7352ee54 1610 TCGReg base = addrlo;
ee8ba9e4 1611 int index = -1;
44b37ace
RH
1612 int seg = 0;
1613
ee8ba9e4
RH
1614 /* For a 32-bit guest, the high 32 bits may contain garbage.
1615 We can do this with the ADDR32 prefix if we're not using
1616 a guest base, or when using segmentation. Otherwise we
1617 need to zero-extend manually. */
b76f21a7 1618 if (guest_base == 0 || guest_base_flags) {
44b37ace
RH
1619 seg = guest_base_flags;
1620 offset = 0;
ee8ba9e4
RH
1621 if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
1622 seg |= P_ADDR32;
1623 }
1624 } else if (TCG_TARGET_REG_BITS == 64) {
1625 if (TARGET_LONG_BITS == 32) {
1626 tcg_out_ext32u(s, TCG_REG_L0, base);
1627 base = TCG_REG_L0;
1628 }
b76f21a7
LV
1629 if (offset != guest_base) {
1630 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base);
ee8ba9e4
RH
1631 index = TCG_REG_L1;
1632 offset = 0;
1633 }
5d8a4f8f
RH
1634 }
1635
ee8ba9e4
RH
1636 tcg_out_qemu_ld_direct(s, datalo, datahi,
1637 base, index, offset, seg, opc);
5d8a4f8f 1638 }
c896fe29 1639#endif
be5a4eb7 1640}
c896fe29 1641
37c5d0d5
RH
1642static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
1643 TCGReg base, intptr_t ofs, int seg,
1644 TCGMemOp memop)
be5a4eb7 1645{
be5a4eb7
RH
1646 /* ??? Ideally we wouldn't need a scratch register. For user-only,
1647 we could perform the bswap twice to restore the original value
1648 instead of moving to the scratch. But as it is, the L constraint
166792f7 1649 means that TCG_REG_L0 is definitely free here. */
37c5d0d5 1650 const TCGReg scratch = TCG_REG_L0;
085bb5bb
AJ
1651 const TCGMemOp real_bswap = memop & MO_BSWAP;
1652 TCGMemOp bswap = real_bswap;
1653 int movop = OPC_MOVL_EvGv;
1654
1655 if (have_movbe && real_bswap) {
1656 bswap = 0;
1657 movop = OPC_MOVBE_MyGy;
1658 }
be5a4eb7 1659
37c5d0d5
RH
1660 switch (memop & MO_SIZE) {
1661 case MO_8:
8589467f 1662 /* In 32-bit mode, 8-bit stores can only happen from [abcd]x.
b3e2bc50
RH
1663 Use the scratch register if necessary. */
1664 if (TCG_TARGET_REG_BITS == 32 && datalo >= 4) {
1665 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1666 datalo = scratch;
1667 }
44b37ace
RH
1668 tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
1669 datalo, base, ofs);
c896fe29 1670 break;
37c5d0d5 1671 case MO_16:
c896fe29 1672 if (bswap) {
3b6dac34 1673 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
be5a4eb7
RH
1674 tcg_out_rolw_8(s, scratch);
1675 datalo = scratch;
c896fe29 1676 }
085bb5bb 1677 tcg_out_modrm_offset(s, movop + P_DATA16 + seg, datalo, base, ofs);
c896fe29 1678 break;
37c5d0d5 1679 case MO_32:
c896fe29 1680 if (bswap) {
3b6dac34 1681 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
be5a4eb7
RH
1682 tcg_out_bswap32(s, scratch);
1683 datalo = scratch;
c896fe29 1684 }
085bb5bb 1685 tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
c896fe29 1686 break;
37c5d0d5 1687 case MO_64:
5d8a4f8f
RH
1688 if (TCG_TARGET_REG_BITS == 64) {
1689 if (bswap) {
1690 tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
1691 tcg_out_bswap64(s, scratch);
1692 datalo = scratch;
1693 }
085bb5bb 1694 tcg_out_modrm_offset(s, movop + P_REXW + seg, datalo, base, ofs);
5d8a4f8f 1695 } else if (bswap) {
3b6dac34 1696 tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
be5a4eb7 1697 tcg_out_bswap32(s, scratch);
44b37ace 1698 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs);
3b6dac34 1699 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
be5a4eb7 1700 tcg_out_bswap32(s, scratch);
44b37ace 1701 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4);
c896fe29 1702 } else {
085bb5bb
AJ
1703 if (real_bswap) {
1704 int t = datalo;
1705 datalo = datahi;
1706 datahi = t;
1707 }
1708 tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
1709 tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs+4);
c896fe29
FB
1710 }
1711 break;
1712 default:
1713 tcg_abort();
1714 }
c896fe29
FB
1715}
1716
8221a267 1717static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
c896fe29 1718{
7352ee54 1719 TCGReg datalo, datahi, addrlo;
8221a267 1720 TCGReg addrhi __attribute__((unused));
59227d5d 1721 TCGMemOpIdx oi;
8221a267 1722 TCGMemOp opc;
c896fe29 1723#if defined(CONFIG_SOFTMMU)
37c5d0d5 1724 int mem_index;
f6bff89d 1725 tcg_insn_unit *label_ptr[2];
c896fe29
FB
1726#endif
1727
7352ee54 1728 datalo = *args++;
8221a267 1729 datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
7352ee54 1730 addrlo = *args++;
8221a267 1731 addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
59227d5d
RH
1732 oi = *args++;
1733 opc = get_memop(oi);
c896fe29
FB
1734
1735#if defined(CONFIG_SOFTMMU)
59227d5d 1736 mem_index = get_mmuidx(oi);
1a6dc1e4 1737
8cc580f6 1738 tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
8516a044 1739 label_ptr, offsetof(CPUTLBEntry, addr_write));
1a6dc1e4
RH
1740
1741 /* TLB Hit. */
7352ee54 1742 tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
c896fe29 1743
b76f0d8c 1744 /* Record the current context of a store into ldst label */
3972ef6f
RH
1745 add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
1746 s->code_ptr, label_ptr);
b76f0d8c
YL
1747#else
1748 {
b76f21a7 1749 int32_t offset = guest_base;
7352ee54 1750 TCGReg base = addrlo;
b76f0d8c
YL
1751 int seg = 0;
1752
ee8ba9e4 1753 /* See comment in tcg_out_qemu_ld re zero-extension of addrlo. */
b76f21a7 1754 if (guest_base == 0 || guest_base_flags) {
b76f0d8c
YL
1755 seg = guest_base_flags;
1756 offset = 0;
ee8ba9e4
RH
1757 if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
1758 seg |= P_ADDR32;
1759 }
1760 } else if (TCG_TARGET_REG_BITS == 64) {
1761 /* ??? Note that we can't use the same SIB addressing scheme
1762 as for loads, since we require L0 free for bswap. */
b76f21a7 1763 if (offset != guest_base) {
ee8ba9e4
RH
1764 if (TARGET_LONG_BITS == 32) {
1765 tcg_out_ext32u(s, TCG_REG_L0, base);
1766 base = TCG_REG_L0;
1767 }
b76f21a7 1768 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base);
ee8ba9e4
RH
1769 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
1770 base = TCG_REG_L1;
1771 offset = 0;
1772 } else if (TARGET_LONG_BITS == 32) {
1773 tcg_out_ext32u(s, TCG_REG_L1, base);
1774 base = TCG_REG_L1;
1775 }
b76f0d8c
YL
1776 }
1777
7352ee54 1778 tcg_out_qemu_st_direct(s, datalo, datahi, base, offset, seg, opc);
b76f0d8c 1779 }
b76f0d8c 1780#endif
b76f0d8c 1781}
c896fe29 1782
a9751609 1783static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
c896fe29
FB
1784 const TCGArg *args, const int *const_args)
1785{
6399ab33 1786 int c, vexop, rexw = 0;
5d8a4f8f
RH
1787
1788#if TCG_TARGET_REG_BITS == 64
1789# define OP_32_64(x) \
1790 case glue(glue(INDEX_op_, x), _i64): \
1791 rexw = P_REXW; /* FALLTHRU */ \
1792 case glue(glue(INDEX_op_, x), _i32)
1793#else
1794# define OP_32_64(x) \
1795 case glue(glue(INDEX_op_, x), _i32)
1796#endif
78686523 1797
c896fe29
FB
1798 switch(opc) {
1799 case INDEX_op_exit_tb:
5d8a4f8f 1800 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]);
f6bff89d 1801 tcg_out_jmp(s, tb_ret_addr);
c896fe29
FB
1802 break;
1803 case INDEX_op_goto_tb:
f309101c 1804 if (s->tb_jmp_insn_offset) {
c896fe29 1805 /* direct jump method */
0d07abf0
SF
1806 int gap;
1807 /* jump displacement must be aligned for atomic patching;
1808 * see if we need to add extra nops before jump
1809 */
1810 gap = tcg_pcrel_diff(s, QEMU_ALIGN_PTR_UP(s->code_ptr + 1, 4));
1811 if (gap != 1) {
1812 tcg_out_nopn(s, gap - 1);
1813 }
da441cff 1814 tcg_out8(s, OPC_JMP_long); /* jmp im */
f309101c 1815 s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
c896fe29
FB
1816 tcg_out32(s, 0);
1817 } else {
1818 /* indirect jump method */
9363dedb 1819 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
f309101c 1820 (intptr_t)(s->tb_jmp_target_addr + args[0]));
c896fe29 1821 }
f309101c 1822 s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s);
c896fe29 1823 break;
c896fe29 1824 case INDEX_op_br:
bec16311 1825 tcg_out_jxx(s, JCC_JMP, arg_label(args[0]), 0);
c896fe29 1826 break;
5d8a4f8f
RH
1827 OP_32_64(ld8u):
1828 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
55e082a7 1829 tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
c896fe29 1830 break;
5d8a4f8f
RH
1831 OP_32_64(ld8s):
1832 tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]);
c896fe29 1833 break;
5d8a4f8f
RH
1834 OP_32_64(ld16u):
1835 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
55e082a7 1836 tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
c896fe29 1837 break;
5d8a4f8f
RH
1838 OP_32_64(ld16s):
1839 tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]);
c896fe29 1840 break;
5d8a4f8f
RH
1841#if TCG_TARGET_REG_BITS == 64
1842 case INDEX_op_ld32u_i64:
1843#endif
c896fe29 1844 case INDEX_op_ld_i32:
af266089 1845 tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
c896fe29 1846 break;
5d8a4f8f
RH
1847
1848 OP_32_64(st8):
5c2d2a9e
AJ
1849 if (const_args[0]) {
1850 tcg_out_modrm_offset(s, OPC_MOVB_EvIz,
1851 0, args[1], args[2]);
1852 tcg_out8(s, args[0]);
1853 } else {
1854 tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
1855 args[0], args[1], args[2]);
1856 }
c896fe29 1857 break;
5d8a4f8f 1858 OP_32_64(st16):
5c2d2a9e
AJ
1859 if (const_args[0]) {
1860 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16,
1861 0, args[1], args[2]);
1862 tcg_out16(s, args[0]);
1863 } else {
1864 tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
1865 args[0], args[1], args[2]);
1866 }
c896fe29 1867 break;
5d8a4f8f
RH
1868#if TCG_TARGET_REG_BITS == 64
1869 case INDEX_op_st32_i64:
1870#endif
c896fe29 1871 case INDEX_op_st_i32:
5c2d2a9e
AJ
1872 if (const_args[0]) {
1873 tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, args[1], args[2]);
1874 tcg_out32(s, args[0]);
1875 } else {
1876 tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1877 }
c896fe29 1878 break;
5d8a4f8f
RH
1879
1880 OP_32_64(add):
5d1e4e85
RH
1881 /* For 3-operand addition, use LEA. */
1882 if (args[0] != args[1]) {
1883 TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
1884
1885 if (const_args[2]) {
1886 c3 = a2, a2 = -1;
1887 } else if (a0 == a2) {
1888 /* Watch out for dest = src + dest, since we've removed
1889 the matching constraint on the add. */
5d8a4f8f 1890 tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
5d1e4e85
RH
1891 break;
1892 }
1893
5d8a4f8f 1894 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
5d1e4e85
RH
1895 break;
1896 }
1897 c = ARITH_ADD;
1898 goto gen_arith;
5d8a4f8f 1899 OP_32_64(sub):
c896fe29
FB
1900 c = ARITH_SUB;
1901 goto gen_arith;
5d8a4f8f 1902 OP_32_64(and):
c896fe29
FB
1903 c = ARITH_AND;
1904 goto gen_arith;
5d8a4f8f 1905 OP_32_64(or):
c896fe29
FB
1906 c = ARITH_OR;
1907 goto gen_arith;
5d8a4f8f 1908 OP_32_64(xor):
c896fe29
FB
1909 c = ARITH_XOR;
1910 goto gen_arith;
c896fe29
FB
1911 gen_arith:
1912 if (const_args[2]) {
5d8a4f8f 1913 tgen_arithi(s, c + rexw, args[0], args[2], 0);
c896fe29 1914 } else {
5d8a4f8f 1915 tgen_arithr(s, c + rexw, args[0], args[2]);
c896fe29
FB
1916 }
1917 break;
5d8a4f8f 1918
9d2eec20
RH
1919 OP_32_64(andc):
1920 if (const_args[2]) {
1921 tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32,
1922 args[0], args[1]);
1923 tgen_arithi(s, ARITH_AND + rexw, args[0], ~args[2], 0);
1924 } else {
1925 tcg_out_vex_modrm(s, OPC_ANDN + rexw, args[0], args[2], args[1]);
1926 }
1927 break;
1928
5d8a4f8f 1929 OP_32_64(mul):
c896fe29
FB
1930 if (const_args[2]) {
1931 int32_t val;
1932 val = args[2];
1933 if (val == (int8_t)val) {
5d8a4f8f 1934 tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]);
c896fe29
FB
1935 tcg_out8(s, val);
1936 } else {
5d8a4f8f 1937 tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]);
c896fe29
FB
1938 tcg_out32(s, val);
1939 }
1940 } else {
5d8a4f8f 1941 tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]);
c896fe29
FB
1942 }
1943 break;
5d8a4f8f
RH
1944
1945 OP_32_64(div2):
1946 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
c896fe29 1947 break;
5d8a4f8f
RH
1948 OP_32_64(divu2):
1949 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
c896fe29 1950 break;
5d8a4f8f
RH
1951
1952 OP_32_64(shl):
c896fe29 1953 c = SHIFT_SHL;
6399ab33
RH
1954 vexop = OPC_SHLX;
1955 goto gen_shift_maybe_vex;
5d8a4f8f 1956 OP_32_64(shr):
c896fe29 1957 c = SHIFT_SHR;
6399ab33
RH
1958 vexop = OPC_SHRX;
1959 goto gen_shift_maybe_vex;
5d8a4f8f 1960 OP_32_64(sar):
c896fe29 1961 c = SHIFT_SAR;
6399ab33
RH
1962 vexop = OPC_SARX;
1963 goto gen_shift_maybe_vex;
5d8a4f8f 1964 OP_32_64(rotl):
9619376c 1965 c = SHIFT_ROL;
5d8a4f8f
RH
1966 goto gen_shift;
1967 OP_32_64(rotr):
9619376c 1968 c = SHIFT_ROR;
5d8a4f8f 1969 goto gen_shift;
6399ab33
RH
1970 gen_shift_maybe_vex:
1971 if (have_bmi2 && !const_args[2]) {
1972 tcg_out_vex_modrm(s, vexop + rexw, args[0], args[2], args[1]);
1973 break;
1974 }
1975 /* FALLTHRU */
5d8a4f8f
RH
1976 gen_shift:
1977 if (const_args[2]) {
1978 tcg_out_shifti(s, c + rexw, args[0], args[2]);
81570a70 1979 } else {
5d8a4f8f 1980 tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]);
81570a70 1981 }
c896fe29 1982 break;
5d8a4f8f 1983
c896fe29 1984 case INDEX_op_brcond_i32:
5d8a4f8f 1985 tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
bec16311 1986 arg_label(args[3]), 0);
c896fe29 1987 break;
5d8a4f8f
RH
1988 case INDEX_op_setcond_i32:
1989 tcg_out_setcond32(s, args[3], args[0], args[1],
1990 args[2], const_args[2]);
c896fe29 1991 break;
d0a16297
RH
1992 case INDEX_op_movcond_i32:
1993 tcg_out_movcond32(s, args[5], args[0], args[1],
1994 args[2], const_args[2], args[3]);
1995 break;
c896fe29 1996
5d8a4f8f 1997 OP_32_64(bswap16):
fcb5dac1 1998 tcg_out_rolw_8(s, args[0]);
5d40cd63 1999 break;
5d8a4f8f 2000 OP_32_64(bswap32):
fcb5dac1 2001 tcg_out_bswap32(s, args[0]);
9619376c
AJ
2002 break;
2003
5d8a4f8f
RH
2004 OP_32_64(neg):
2005 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]);
9619376c 2006 break;
5d8a4f8f
RH
2007 OP_32_64(not):
2008 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]);
9619376c
AJ
2009 break;
2010
5d8a4f8f
RH
2011 OP_32_64(ext8s):
2012 tcg_out_ext8s(s, args[0], args[1], rexw);
9619376c 2013 break;
5d8a4f8f
RH
2014 OP_32_64(ext16s):
2015 tcg_out_ext16s(s, args[0], args[1], rexw);
9619376c 2016 break;
5d8a4f8f 2017 OP_32_64(ext8u):
55e082a7 2018 tcg_out_ext8u(s, args[0], args[1]);
5f0ce17f 2019 break;
5d8a4f8f 2020 OP_32_64(ext16u):
55e082a7 2021 tcg_out_ext16u(s, args[0], args[1]);
5f0ce17f 2022 break;
9619376c 2023
8221a267
RH
2024 case INDEX_op_qemu_ld_i32:
2025 tcg_out_qemu_ld(s, args, 0);
c896fe29 2026 break;
8221a267
RH
2027 case INDEX_op_qemu_ld_i64:
2028 tcg_out_qemu_ld(s, args, 1);
c896fe29 2029 break;
8221a267
RH
2030 case INDEX_op_qemu_st_i32:
2031 tcg_out_qemu_st(s, args, 0);
c896fe29 2032 break;
8221a267
RH
2033 case INDEX_op_qemu_st_i64:
2034 tcg_out_qemu_st(s, args, 1);
c896fe29
FB
2035 break;
2036
624988a5
RH
2037 OP_32_64(mulu2):
2038 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]);
5d8a4f8f 2039 break;
624988a5
RH
2040 OP_32_64(muls2):
2041 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]);
2042 break;
2043 OP_32_64(add2):
5d8a4f8f 2044 if (const_args[4]) {
624988a5 2045 tgen_arithi(s, ARITH_ADD + rexw, args[0], args[4], 1);
5d8a4f8f 2046 } else {
624988a5 2047 tgen_arithr(s, ARITH_ADD + rexw, args[0], args[4]);
5d8a4f8f
RH
2048 }
2049 if (const_args[5]) {
624988a5 2050 tgen_arithi(s, ARITH_ADC + rexw, args[1], args[5], 1);
5d8a4f8f 2051 } else {
624988a5 2052 tgen_arithr(s, ARITH_ADC + rexw, args[1], args[5]);
5d8a4f8f
RH
2053 }
2054 break;
624988a5 2055 OP_32_64(sub2):
5d8a4f8f 2056 if (const_args[4]) {
624988a5 2057 tgen_arithi(s, ARITH_SUB + rexw, args[0], args[4], 1);
5d8a4f8f 2058 } else {
624988a5 2059 tgen_arithr(s, ARITH_SUB + rexw, args[0], args[4]);
5d8a4f8f
RH
2060 }
2061 if (const_args[5]) {
624988a5 2062 tgen_arithi(s, ARITH_SBB + rexw, args[1], args[5], 1);
5d8a4f8f 2063 } else {
624988a5 2064 tgen_arithr(s, ARITH_SBB + rexw, args[1], args[5]);
5d8a4f8f
RH
2065 }
2066 break;
bbc863bf
RH
2067
2068#if TCG_TARGET_REG_BITS == 32
2069 case INDEX_op_brcond2_i32:
2070 tcg_out_brcond2(s, args, const_args, 0);
2071 break;
2072 case INDEX_op_setcond2_i32:
2073 tcg_out_setcond2(s, args, const_args);
2074 break;
5d8a4f8f 2075#else /* TCG_TARGET_REG_BITS == 64 */
5d8a4f8f
RH
2076 case INDEX_op_ld32s_i64:
2077 tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]);
2078 break;
2079 case INDEX_op_ld_i64:
2080 tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2081 break;
2082 case INDEX_op_st_i64:
5c2d2a9e
AJ
2083 if (const_args[0]) {
2084 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW,
2085 0, args[1], args[2]);
2086 tcg_out32(s, args[0]);
2087 } else {
2088 tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2089 }
5d8a4f8f 2090 break;
5d8a4f8f
RH
2091
2092 case INDEX_op_brcond_i64:
2093 tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1],
bec16311 2094 arg_label(args[3]), 0);
5d8a4f8f
RH
2095 break;
2096 case INDEX_op_setcond_i64:
2097 tcg_out_setcond64(s, args[3], args[0], args[1],
2098 args[2], const_args[2]);
2099 break;
d0a16297
RH
2100 case INDEX_op_movcond_i64:
2101 tcg_out_movcond64(s, args[5], args[0], args[1],
2102 args[2], const_args[2], args[3]);
2103 break;
5d8a4f8f
RH
2104
2105 case INDEX_op_bswap64_i64:
2106 tcg_out_bswap64(s, args[0]);
2107 break;
4f2331e5 2108 case INDEX_op_extu_i32_i64:
5d8a4f8f
RH
2109 case INDEX_op_ext32u_i64:
2110 tcg_out_ext32u(s, args[0], args[1]);
2111 break;
4f2331e5 2112 case INDEX_op_ext_i32_i64:
5d8a4f8f
RH
2113 case INDEX_op_ext32s_i64:
2114 tcg_out_ext32s(s, args[0], args[1]);
2115 break;
2116#endif
2117
a4773324
JK
2118 OP_32_64(deposit):
2119 if (args[3] == 0 && args[4] == 8) {
2120 /* load bits 0..7 */
2121 tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM,
2122 args[2], args[0]);
2123 } else if (args[3] == 8 && args[4] == 8) {
2124 /* load bits 8..15 */
2125 tcg_out_modrm(s, OPC_MOVB_EvGv, args[2], args[0] + 4);
2126 } else if (args[3] == 0 && args[4] == 16) {
2127 /* load bits 0..15 */
2128 tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, args[2], args[0]);
2129 } else {
2130 tcg_abort();
2131 }
2132 break;
2133
96d0ee7f
RH
2134 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
2135 case INDEX_op_mov_i64:
2136 case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
2137 case INDEX_op_movi_i64:
2138 case INDEX_op_call: /* Always emitted via tcg_out_call. */
c896fe29
FB
2139 default:
2140 tcg_abort();
2141 }
5d8a4f8f
RH
2142
2143#undef OP_32_64
c896fe29
FB
2144}
2145
2146static const TCGTargetOpDef x86_op_defs[] = {
2147 { INDEX_op_exit_tb, { } },
2148 { INDEX_op_goto_tb, { } },
c896fe29 2149 { INDEX_op_br, { } },
c896fe29
FB
2150 { INDEX_op_ld8u_i32, { "r", "r" } },
2151 { INDEX_op_ld8s_i32, { "r", "r" } },
2152 { INDEX_op_ld16u_i32, { "r", "r" } },
2153 { INDEX_op_ld16s_i32, { "r", "r" } },
2154 { INDEX_op_ld_i32, { "r", "r" } },
5c2d2a9e
AJ
2155 { INDEX_op_st8_i32, { "qi", "r" } },
2156 { INDEX_op_st16_i32, { "ri", "r" } },
2157 { INDEX_op_st_i32, { "ri", "r" } },
c896fe29 2158
5d1e4e85 2159 { INDEX_op_add_i32, { "r", "r", "ri" } },
c896fe29
FB
2160 { INDEX_op_sub_i32, { "r", "0", "ri" } },
2161 { INDEX_op_mul_i32, { "r", "0", "ri" } },
c896fe29
FB
2162 { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } },
2163 { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } },
2164 { INDEX_op_and_i32, { "r", "0", "ri" } },
2165 { INDEX_op_or_i32, { "r", "0", "ri" } },
2166 { INDEX_op_xor_i32, { "r", "0", "ri" } },
9d2eec20 2167 { INDEX_op_andc_i32, { "r", "r", "ri" } },
c896fe29 2168
6399ab33
RH
2169 { INDEX_op_shl_i32, { "r", "0", "Ci" } },
2170 { INDEX_op_shr_i32, { "r", "0", "Ci" } },
2171 { INDEX_op_sar_i32, { "r", "0", "Ci" } },
9619376c
AJ
2172 { INDEX_op_rotl_i32, { "r", "0", "ci" } },
2173 { INDEX_op_rotr_i32, { "r", "0", "ci" } },
c896fe29
FB
2174
2175 { INDEX_op_brcond_i32, { "r", "ri" } },
2176
5d40cd63 2177 { INDEX_op_bswap16_i32, { "r", "0" } },
66896cb8 2178 { INDEX_op_bswap32_i32, { "r", "0" } },
9619376c
AJ
2179
2180 { INDEX_op_neg_i32, { "r", "0" } },
2181
2182 { INDEX_op_not_i32, { "r", "0" } },
2183
2184 { INDEX_op_ext8s_i32, { "r", "q" } },
2185 { INDEX_op_ext16s_i32, { "r", "r" } },
55e082a7
RH
2186 { INDEX_op_ext8u_i32, { "r", "q" } },
2187 { INDEX_op_ext16u_i32, { "r", "r" } },
9619376c 2188
1d2699ae 2189 { INDEX_op_setcond_i32, { "q", "r", "ri" } },
5d8a4f8f 2190
a4773324 2191 { INDEX_op_deposit_i32, { "Q", "0", "Q" } },
d0a16297 2192 { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } },
a4773324 2193
5d8a4f8f 2194 { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
624988a5 2195 { INDEX_op_muls2_i32, { "a", "d", "a", "r" } },
5d8a4f8f
RH
2196 { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
2197 { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
bbc863bf
RH
2198
2199#if TCG_TARGET_REG_BITS == 32
5d8a4f8f 2200 { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
1d2699ae 2201 { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
5d8a4f8f 2202#else
5d8a4f8f
RH
2203 { INDEX_op_ld8u_i64, { "r", "r" } },
2204 { INDEX_op_ld8s_i64, { "r", "r" } },
2205 { INDEX_op_ld16u_i64, { "r", "r" } },
2206 { INDEX_op_ld16s_i64, { "r", "r" } },
2207 { INDEX_op_ld32u_i64, { "r", "r" } },
2208 { INDEX_op_ld32s_i64, { "r", "r" } },
2209 { INDEX_op_ld_i64, { "r", "r" } },
5c2d2a9e
AJ
2210 { INDEX_op_st8_i64, { "ri", "r" } },
2211 { INDEX_op_st16_i64, { "ri", "r" } },
2212 { INDEX_op_st32_i64, { "ri", "r" } },
2213 { INDEX_op_st_i64, { "re", "r" } },
5d8a4f8f 2214
163fa4b0 2215 { INDEX_op_add_i64, { "r", "r", "re" } },
5d8a4f8f
RH
2216 { INDEX_op_mul_i64, { "r", "0", "re" } },
2217 { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },
2218 { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },
2219 { INDEX_op_sub_i64, { "r", "0", "re" } },
2220 { INDEX_op_and_i64, { "r", "0", "reZ" } },
2221 { INDEX_op_or_i64, { "r", "0", "re" } },
2222 { INDEX_op_xor_i64, { "r", "0", "re" } },
9d2eec20 2223 { INDEX_op_andc_i64, { "r", "r", "rI" } },
5d8a4f8f 2224
6399ab33
RH
2225 { INDEX_op_shl_i64, { "r", "0", "Ci" } },
2226 { INDEX_op_shr_i64, { "r", "0", "Ci" } },
2227 { INDEX_op_sar_i64, { "r", "0", "Ci" } },
5d8a4f8f
RH
2228 { INDEX_op_rotl_i64, { "r", "0", "ci" } },
2229 { INDEX_op_rotr_i64, { "r", "0", "ci" } },
2230
2231 { INDEX_op_brcond_i64, { "r", "re" } },
2232 { INDEX_op_setcond_i64, { "r", "r", "re" } },
2233
2234 { INDEX_op_bswap16_i64, { "r", "0" } },
2235 { INDEX_op_bswap32_i64, { "r", "0" } },
2236 { INDEX_op_bswap64_i64, { "r", "0" } },
2237 { INDEX_op_neg_i64, { "r", "0" } },
2238 { INDEX_op_not_i64, { "r", "0" } },
2239
2240 { INDEX_op_ext8s_i64, { "r", "r" } },
2241 { INDEX_op_ext16s_i64, { "r", "r" } },
2242 { INDEX_op_ext32s_i64, { "r", "r" } },
2243 { INDEX_op_ext8u_i64, { "r", "r" } },
2244 { INDEX_op_ext16u_i64, { "r", "r" } },
2245 { INDEX_op_ext32u_i64, { "r", "r" } },
a4773324 2246
4f2331e5
AJ
2247 { INDEX_op_ext_i32_i64, { "r", "r" } },
2248 { INDEX_op_extu_i32_i64, { "r", "r" } },
2249
a4773324 2250 { INDEX_op_deposit_i64, { "Q", "0", "Q" } },
d0a16297 2251 { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } },
624988a5
RH
2252
2253 { INDEX_op_mulu2_i64, { "a", "d", "a", "r" } },
2254 { INDEX_op_muls2_i64, { "a", "d", "a", "r" } },
2255 { INDEX_op_add2_i64, { "r", "r", "0", "1", "re", "re" } },
2256 { INDEX_op_sub2_i64, { "r", "r", "0", "1", "re", "re" } },
5d8a4f8f 2257#endif
1d2699ae 2258
5d8a4f8f 2259#if TCG_TARGET_REG_BITS == 64
8221a267
RH
2260 { INDEX_op_qemu_ld_i32, { "r", "L" } },
2261 { INDEX_op_qemu_st_i32, { "L", "L" } },
2262 { INDEX_op_qemu_ld_i64, { "r", "L" } },
2263 { INDEX_op_qemu_st_i64, { "L", "L" } },
5d8a4f8f 2264#elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
8221a267
RH
2265 { INDEX_op_qemu_ld_i32, { "r", "L" } },
2266 { INDEX_op_qemu_st_i32, { "L", "L" } },
2267 { INDEX_op_qemu_ld_i64, { "r", "r", "L" } },
2268 { INDEX_op_qemu_st_i64, { "L", "L", "L" } },
c896fe29 2269#else
8221a267
RH
2270 { INDEX_op_qemu_ld_i32, { "r", "L", "L" } },
2271 { INDEX_op_qemu_st_i32, { "L", "L", "L" } },
2272 { INDEX_op_qemu_ld_i64, { "r", "r", "L", "L" } },
2273 { INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } },
c896fe29
FB
2274#endif
2275 { -1 },
2276};
2277
b03cce8e 2278static int tcg_target_callee_save_regs[] = {
5d8a4f8f
RH
2279#if TCG_TARGET_REG_BITS == 64
2280 TCG_REG_RBP,
2281 TCG_REG_RBX,
8d918718
SW
2282#if defined(_WIN64)
2283 TCG_REG_RDI,
2284 TCG_REG_RSI,
2285#endif
5d8a4f8f
RH
2286 TCG_REG_R12,
2287 TCG_REG_R13,
cea5f9a2 2288 TCG_REG_R14, /* Currently used for the global env. */
5d8a4f8f
RH
2289 TCG_REG_R15,
2290#else
cea5f9a2 2291 TCG_REG_EBP, /* Currently used for the global env. */
b03cce8e
FB
2292 TCG_REG_EBX,
2293 TCG_REG_ESI,
2294 TCG_REG_EDI,
5d8a4f8f 2295#endif
b03cce8e
FB
2296};
2297
813da627
RH
2298/* Compute frame size via macros, to share between tcg_target_qemu_prologue
2299 and tcg_register_jit. */
2300
2301#define PUSH_SIZE \
2302 ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \
2303 * (TCG_TARGET_REG_BITS / 8))
2304
2305#define FRAME_SIZE \
2306 ((PUSH_SIZE \
2307 + TCG_STATIC_CALL_ARGS_SIZE \
2308 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2309 + TCG_TARGET_STACK_ALIGN - 1) \
2310 & ~(TCG_TARGET_STACK_ALIGN - 1))
2311
b03cce8e 2312/* Generate global QEMU prologue and epilogue code */
e4d58b41 2313static void tcg_target_qemu_prologue(TCGContext *s)
b03cce8e 2314{
813da627 2315 int i, stack_addend;
78686523 2316
b03cce8e 2317 /* TB prologue */
5d8a4f8f 2318
ac0275dc 2319 /* Reserve some stack space, also for TCG temps. */
813da627 2320 stack_addend = FRAME_SIZE - PUSH_SIZE;
ac0275dc
BS
2321 tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
2322 CPU_TEMP_BUF_NLONGS * sizeof(long));
2323
2324 /* Save all callee saved registers. */
2325 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
2326 tcg_out_push(s, tcg_target_callee_save_regs[i]);
2327 }
2328
6a18ae2d
BS
2329#if TCG_TARGET_REG_BITS == 32
2330 tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
2331 (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
b18212c6
SW
2332 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2333 /* jmp *tb. */
2334 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP,
2335 (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4
2336 + stack_addend);
6a18ae2d 2337#else
cea5f9a2 2338 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
6a18ae2d 2339 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
5d8a4f8f 2340 /* jmp *tb. */
cea5f9a2 2341 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
b18212c6 2342#endif
78686523 2343
b03cce8e
FB
2344 /* TB epilogue */
2345 tb_ret_addr = s->code_ptr;
5d8a4f8f 2346
e83c80f7 2347 tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend);
5d8a4f8f
RH
2348
2349 for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
b03cce8e
FB
2350 tcg_out_pop(s, tcg_target_callee_save_regs[i]);
2351 }
5d8a4f8f 2352 tcg_out_opc(s, OPC_RET, 0, 0, 0);
44b37ace
RH
2353
2354#if !defined(CONFIG_SOFTMMU)
b76f21a7
LV
2355 /* Try to set up a segment register to point to guest_base. */
2356 if (guest_base) {
44b37ace
RH
2357 setup_guest_base_seg();
2358 }
2359#endif
b03cce8e
FB
2360}
2361
e4d58b41 2362static void tcg_target_init(TCGContext *s)
c896fe29 2363{
774d566c 2364#ifdef CONFIG_CPUID_H
9d2eec20
RH
2365 unsigned a, b, c, d;
2366 int max = __get_cpuid_max(0, 0);
085bb5bb 2367
9d2eec20
RH
2368 if (max >= 1) {
2369 __cpuid(1, a, b, c, d);
2370#ifndef have_cmov
085bb5bb
AJ
2371 /* For 32-bit, 99% certainty that we're running on hardware that
2372 supports cmov, but we still need to check. In case cmov is not
2373 available, we'll use a small forward branch. */
9d2eec20
RH
2374 have_cmov = (d & bit_CMOV) != 0;
2375#endif
2376#ifndef have_movbe
085bb5bb
AJ
2377 /* MOVBE is only available on Intel Atom and Haswell CPUs, so we
2378 need to probe for it. */
9d2eec20
RH
2379 have_movbe = (c & bit_MOVBE) != 0;
2380#endif
76a347e1 2381 }
9d2eec20
RH
2382
2383 if (max >= 7) {
2384 /* BMI1 is available on AMD Piledriver and Intel Haswell CPUs. */
2385 __cpuid_count(7, 0, a, b, c, d);
2386#ifdef bit_BMI
2387 have_bmi1 = (b & bit_BMI) != 0;
6399ab33
RH
2388#endif
2389#ifndef have_bmi2
2390 have_bmi2 = (b & bit_BMI2) != 0;
76a347e1 2391#endif
9d2eec20 2392 }
774d566c 2393#endif
76a347e1 2394
5d8a4f8f
RH
2395 if (TCG_TARGET_REG_BITS == 64) {
2396 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
2397 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
2398 } else {
2399 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
2400 }
4ab50ccf
RH
2401
2402 tcg_regset_clear(tcg_target_call_clobber_regs);
2403 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
2404 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
2405 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
5d8a4f8f 2406 if (TCG_TARGET_REG_BITS == 64) {
8d918718 2407#if !defined(_WIN64)
5d8a4f8f
RH
2408 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
2409 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
8d918718 2410#endif
5d8a4f8f
RH
2411 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
2412 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
2413 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
2414 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
2415 }
4ab50ccf 2416
c896fe29 2417 tcg_regset_clear(s->reserved_regs);
e83c80f7 2418 tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
c896fe29
FB
2419
2420 tcg_add_target_add_op_defs(x86_op_defs);
2421}
813da627 2422
813da627 2423typedef struct {
e9a9a5b6 2424 DebugFrameHeader h;
497a22eb
RH
2425 uint8_t fde_def_cfa[4];
2426 uint8_t fde_reg_ofs[14];
813da627
RH
2427} DebugFrame;
2428
b5cc476d
RH
2429/* We're expecting a 2 byte uleb128 encoded value. */
2430QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2431
c170cb66
SW
2432#if !defined(__ELF__)
2433 /* Host machine without ELF. */
2434#elif TCG_TARGET_REG_BITS == 64
813da627 2435#define ELF_HOST_MACHINE EM_X86_64
e9a9a5b6
RH
2436static const DebugFrame debug_frame = {
2437 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2438 .h.cie.id = -1,
2439 .h.cie.version = 1,
2440 .h.cie.code_align = 1,
2441 .h.cie.data_align = 0x78, /* sleb128 -8 */
2442 .h.cie.return_column = 16,
813da627 2443
497a22eb 2444 /* Total FDE size does not include the "len" member. */
e9a9a5b6 2445 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
497a22eb
RH
2446
2447 .fde_def_cfa = {
813da627
RH
2448 12, 7, /* DW_CFA_def_cfa %rsp, ... */
2449 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2450 (FRAME_SIZE >> 7)
2451 },
497a22eb 2452 .fde_reg_ofs = {
813da627
RH
2453 0x90, 1, /* DW_CFA_offset, %rip, -8 */
2454 /* The following ordering must match tcg_target_callee_save_regs. */
2455 0x86, 2, /* DW_CFA_offset, %rbp, -16 */
2456 0x83, 3, /* DW_CFA_offset, %rbx, -24 */
2457 0x8c, 4, /* DW_CFA_offset, %r12, -32 */
2458 0x8d, 5, /* DW_CFA_offset, %r13, -40 */
2459 0x8e, 6, /* DW_CFA_offset, %r14, -48 */
2460 0x8f, 7, /* DW_CFA_offset, %r15, -56 */
2461 }
2462};
2463#else
2464#define ELF_HOST_MACHINE EM_386
e9a9a5b6
RH
2465static const DebugFrame debug_frame = {
2466 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2467 .h.cie.id = -1,
2468 .h.cie.version = 1,
2469 .h.cie.code_align = 1,
2470 .h.cie.data_align = 0x7c, /* sleb128 -4 */
2471 .h.cie.return_column = 8,
813da627 2472
497a22eb 2473 /* Total FDE size does not include the "len" member. */
e9a9a5b6 2474 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
497a22eb
RH
2475
2476 .fde_def_cfa = {
813da627
RH
2477 12, 4, /* DW_CFA_def_cfa %esp, ... */
2478 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2479 (FRAME_SIZE >> 7)
2480 },
497a22eb 2481 .fde_reg_ofs = {
813da627
RH
2482 0x88, 1, /* DW_CFA_offset, %eip, -4 */
2483 /* The following ordering must match tcg_target_callee_save_regs. */
2484 0x85, 2, /* DW_CFA_offset, %ebp, -8 */
2485 0x83, 3, /* DW_CFA_offset, %ebx, -12 */
2486 0x86, 4, /* DW_CFA_offset, %esi, -16 */
2487 0x87, 5, /* DW_CFA_offset, %edi, -20 */
2488 }
2489};
2490#endif
2491
c170cb66 2492#if defined(ELF_HOST_MACHINE)
813da627
RH
2493void tcg_register_jit(void *buf, size_t buf_size)
2494{
813da627
RH
2495 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2496}
c170cb66 2497#endif