]> git.proxmox.com Git - mirror_qemu.git/blame - tcg/i386/tcg-target.inc.c
block/curl: Check protocol prefix
[mirror_qemu.git] / tcg / i386 / tcg-target.inc.c
CommitLineData
c896fe29
FB
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
d4a9eb1f 24
9ecefc84
RH
25#include "tcg-be-ldst.h"
26
8d8fdbae 27#ifdef CONFIG_DEBUG_TCG
d4a9eb1f 28static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
5d8a4f8f
RH
29#if TCG_TARGET_REG_BITS == 64
30 "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
31 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
32#else
33 "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
34#endif
c896fe29 35};
d4a9eb1f 36#endif
c896fe29 37
d4a9eb1f 38static const int tcg_target_reg_alloc_order[] = {
5d8a4f8f
RH
39#if TCG_TARGET_REG_BITS == 64
40 TCG_REG_RBP,
41 TCG_REG_RBX,
42 TCG_REG_R12,
43 TCG_REG_R13,
44 TCG_REG_R14,
45 TCG_REG_R15,
46 TCG_REG_R10,
47 TCG_REG_R11,
48 TCG_REG_R9,
49 TCG_REG_R8,
50 TCG_REG_RCX,
51 TCG_REG_RDX,
52 TCG_REG_RSI,
53 TCG_REG_RDI,
54 TCG_REG_RAX,
55#else
c896fe29
FB
56 TCG_REG_EBX,
57 TCG_REG_ESI,
58 TCG_REG_EDI,
59 TCG_REG_EBP,
6648e296
RH
60 TCG_REG_ECX,
61 TCG_REG_EDX,
62 TCG_REG_EAX,
5d8a4f8f 63#endif
c896fe29
FB
64};
65
5d8a4f8f
RH
66static const int tcg_target_call_iarg_regs[] = {
67#if TCG_TARGET_REG_BITS == 64
8d918718
SW
68#if defined(_WIN64)
69 TCG_REG_RCX,
70 TCG_REG_RDX,
71#else
5d8a4f8f
RH
72 TCG_REG_RDI,
73 TCG_REG_RSI,
74 TCG_REG_RDX,
75 TCG_REG_RCX,
8d918718 76#endif
5d8a4f8f
RH
77 TCG_REG_R8,
78 TCG_REG_R9,
79#else
d73685e3 80 /* 32 bit mode uses stack based calling convention (GCC default). */
5d8a4f8f
RH
81#endif
82};
83
68af23af 84static const int tcg_target_call_oarg_regs[] = {
5d8a4f8f 85 TCG_REG_EAX,
68af23af 86#if TCG_TARGET_REG_BITS == 32
5d8a4f8f 87 TCG_REG_EDX
68af23af 88#endif
5d8a4f8f 89};
c896fe29 90
a1b29c9a
RH
91/* Constants we accept. */
92#define TCG_CT_CONST_S32 0x100
93#define TCG_CT_CONST_U32 0x200
9d2eec20 94#define TCG_CT_CONST_I32 0x400
bbf25f90 95#define TCG_CT_CONST_WSZ 0x800
a1b29c9a 96
b18212c6
SW
97/* Registers used with L constraint, which are the first argument
98 registers on x86_64, and two random call clobbered registers on
99 i386. */
100#if TCG_TARGET_REG_BITS == 64
101# define TCG_REG_L0 tcg_target_call_iarg_regs[0]
102# define TCG_REG_L1 tcg_target_call_iarg_regs[1]
b18212c6
SW
103#else
104# define TCG_REG_L0 TCG_REG_EAX
105# define TCG_REG_L1 TCG_REG_EDX
106#endif
107
085bb5bb
AJ
108/* The host compiler should supply <cpuid.h> to enable runtime features
109 detection, as we're not going to go so far as our own inline assembly.
110 If not available, default values will be assumed. */
111#if defined(CONFIG_CPUID_H)
112#include <cpuid.h>
113#endif
114
76a347e1 115/* For 32-bit, we are going to attempt to determine at runtime whether cmov
085bb5bb 116 is available. */
76a347e1
RH
117#if TCG_TARGET_REG_BITS == 64
118# define have_cmov 1
774d566c 119#elif defined(CONFIG_CPUID_H) && defined(bit_CMOV)
76a347e1
RH
120static bool have_cmov;
121#else
122# define have_cmov 0
123#endif
124
085bb5bb
AJ
125/* If bit_MOVBE is defined in cpuid.h (added in GCC version 4.6), we are
126 going to attempt to determine at runtime whether movbe is available. */
127#if defined(CONFIG_CPUID_H) && defined(bit_MOVBE)
128static bool have_movbe;
129#else
130# define have_movbe 0
131#endif
132
993508e4 133/* We need these symbols in tcg-target.h, and we can't properly conditionalize
9d2eec20
RH
134 it there. Therefore we always define the variable. */
135bool have_bmi1;
993508e4 136bool have_popcnt;
9d2eec20 137
6399ab33
RH
138#if defined(CONFIG_CPUID_H) && defined(bit_BMI2)
139static bool have_bmi2;
140#else
141# define have_bmi2 0
142#endif
bbf25f90
RH
143#if defined(CONFIG_CPUID_H) && defined(bit_LZCNT)
144static bool have_lzcnt;
145#else
146# define have_lzcnt 0
147#endif
6399ab33 148
f6bff89d 149static tcg_insn_unit *tb_ret_addr;
b03cce8e 150
f6bff89d 151static void patch_reloc(tcg_insn_unit *code_ptr, int type,
2ba7fae2 152 intptr_t value, intptr_t addend)
c896fe29 153{
f54b3f92 154 value += addend;
c896fe29 155 switch(type) {
c896fe29 156 case R_386_PC32:
5d8a4f8f
RH
157 value -= (uintptr_t)code_ptr;
158 if (value != (int32_t)value) {
159 tcg_abort();
160 }
5c53bb81 161 tcg_patch32(code_ptr, value);
c896fe29 162 break;
f75b56c1 163 case R_386_PC8:
5d8a4f8f 164 value -= (uintptr_t)code_ptr;
f75b56c1
RH
165 if (value != (int8_t)value) {
166 tcg_abort();
167 }
5c53bb81 168 tcg_patch8(code_ptr, value);
f75b56c1 169 break;
c896fe29
FB
170 default:
171 tcg_abort();
172 }
173}
174
c896fe29 175/* parse target specific constraints */
069ea736
RH
176static const char *target_parse_constraint(TCGArgConstraint *ct,
177 const char *ct_str, TCGType type)
c896fe29 178{
069ea736 179 switch(*ct_str++) {
c896fe29
FB
180 case 'a':
181 ct->ct |= TCG_CT_REG;
182 tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
183 break;
184 case 'b':
185 ct->ct |= TCG_CT_REG;
186 tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
187 break;
188 case 'c':
189 ct->ct |= TCG_CT_REG;
190 tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
191 break;
192 case 'd':
193 ct->ct |= TCG_CT_REG;
194 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
195 break;
196 case 'S':
197 ct->ct |= TCG_CT_REG;
198 tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
199 break;
200 case 'D':
201 ct->ct |= TCG_CT_REG;
202 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
203 break;
204 case 'q':
205 ct->ct |= TCG_CT_REG;
5d8a4f8f
RH
206 if (TCG_TARGET_REG_BITS == 64) {
207 tcg_regset_set32(ct->u.regs, 0, 0xffff);
208 } else {
209 tcg_regset_set32(ct->u.regs, 0, 0xf);
210 }
c896fe29 211 break;
a4773324
JK
212 case 'Q':
213 ct->ct |= TCG_CT_REG;
214 tcg_regset_set32(ct->u.regs, 0, 0xf);
215 break;
c896fe29
FB
216 case 'r':
217 ct->ct |= TCG_CT_REG;
5d8a4f8f
RH
218 if (TCG_TARGET_REG_BITS == 64) {
219 tcg_regset_set32(ct->u.regs, 0, 0xffff);
220 } else {
221 tcg_regset_set32(ct->u.regs, 0, 0xff);
222 }
c896fe29 223 break;
bbf25f90
RH
224 case 'W':
225 /* With TZCNT/LZCNT, we can have operand-size as an input. */
226 ct->ct |= TCG_CT_CONST_WSZ;
227 break;
c896fe29
FB
228
229 /* qemu_ld/st address constraint */
230 case 'L':
231 ct->ct |= TCG_CT_REG;
401c227b 232 if (TCG_TARGET_REG_BITS == 64) {
5d8a4f8f 233 tcg_regset_set32(ct->u.regs, 0, 0xffff);
401c227b 234 } else {
5d8a4f8f 235 tcg_regset_set32(ct->u.regs, 0, 0xff);
401c227b 236 }
17b91491
AJ
237 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
238 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
5d8a4f8f
RH
239 break;
240
241 case 'e':
cd26449a 242 ct->ct |= (type == TCG_TYPE_I32 ? TCG_CT_CONST : TCG_CT_CONST_S32);
5d8a4f8f
RH
243 break;
244 case 'Z':
cd26449a 245 ct->ct |= (type == TCG_TYPE_I32 ? TCG_CT_CONST : TCG_CT_CONST_U32);
c896fe29 246 break;
9d2eec20 247 case 'I':
cd26449a 248 ct->ct |= (type == TCG_TYPE_I32 ? TCG_CT_CONST : TCG_CT_CONST_I32);
9d2eec20 249 break;
5d8a4f8f 250
c896fe29 251 default:
069ea736 252 return NULL;
c896fe29 253 }
069ea736 254 return ct_str;
c896fe29
FB
255}
256
257/* test if a constant matches the constraint */
f6c6afc1 258static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
c896fe29
FB
259 const TCGArgConstraint *arg_ct)
260{
5d8a4f8f
RH
261 int ct = arg_ct->ct;
262 if (ct & TCG_CT_CONST) {
c896fe29 263 return 1;
5d8a4f8f
RH
264 }
265 if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
266 return 1;
267 }
268 if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
269 return 1;
270 }
9d2eec20
RH
271 if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) {
272 return 1;
273 }
bbf25f90
RH
274 if ((ct & TCG_CT_CONST_WSZ) && val == (type == TCG_TYPE_I32 ? 32 : 64)) {
275 return 1;
276 }
5d8a4f8f 277 return 0;
c896fe29
FB
278}
279
5d8a4f8f
RH
280#if TCG_TARGET_REG_BITS == 64
281# define LOWREGMASK(x) ((x) & 7)
282#else
283# define LOWREGMASK(x) (x)
284#endif
285
96b4cf38 286#define P_EXT 0x100 /* 0x0f opcode prefix */
2a113775
AJ
287#define P_EXT38 0x200 /* 0x0f 0x38 opcode prefix */
288#define P_DATA16 0x400 /* 0x66 opcode prefix */
5d8a4f8f 289#if TCG_TARGET_REG_BITS == 64
2a113775
AJ
290# define P_ADDR32 0x800 /* 0x67 opcode prefix */
291# define P_REXW 0x1000 /* Set REX.W = 1 */
292# define P_REXB_R 0x2000 /* REG field as byte register */
293# define P_REXB_RM 0x4000 /* R/M field as byte register */
294# define P_GS 0x8000 /* gs segment override */
5d8a4f8f
RH
295#else
296# define P_ADDR32 0
297# define P_REXW 0
298# define P_REXB_R 0
299# define P_REXB_RM 0
44b37ace 300# define P_GS 0
5d8a4f8f 301#endif
6399ab33
RH
302#define P_SIMDF3 0x10000 /* 0xf3 opcode prefix */
303#define P_SIMDF2 0x20000 /* 0xf2 opcode prefix */
fcb5dac1 304
a369a702
RH
305#define OPC_ARITH_EvIz (0x81)
306#define OPC_ARITH_EvIb (0x83)
81570a70 307#define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
9d2eec20 308#define OPC_ANDN (0xf2 | P_EXT38)
81570a70 309#define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
bbf25f90
RH
310#define OPC_BSF (0xbc | P_EXT)
311#define OPC_BSR (0xbd | P_EXT)
fcb5dac1 312#define OPC_BSWAP (0xc8 | P_EXT)
aadb21a4 313#define OPC_CALL_Jz (0xe8)
d0a16297 314#define OPC_CMOVCC (0x40 | P_EXT) /* ... plus condition code */
81570a70
RH
315#define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
316#define OPC_DEC_r32 (0x48)
0566d387
RH
317#define OPC_IMUL_GvEv (0xaf | P_EXT)
318#define OPC_IMUL_GvEvIb (0x6b)
319#define OPC_IMUL_GvEvIz (0x69)
81570a70 320#define OPC_INC_r32 (0x40)
da441cff
RH
321#define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */
322#define OPC_JCC_short (0x70) /* ... plus condition code */
323#define OPC_JMP_long (0xe9)
324#define OPC_JMP_short (0xeb)
34a6d0b7 325#define OPC_LEA (0x8d)
bbf25f90 326#define OPC_LZCNT (0xbd | P_EXT | P_SIMDF3)
af266089
RH
327#define OPC_MOVB_EvGv (0x88) /* stores, more or less */
328#define OPC_MOVL_EvGv (0x89) /* stores, more or less */
329#define OPC_MOVL_GvEv (0x8b) /* loads, more or less */
5c2d2a9e 330#define OPC_MOVB_EvIz (0xc6)
5d8a4f8f 331#define OPC_MOVL_EvIz (0xc7)
ef10b106 332#define OPC_MOVL_Iv (0xb8)
085bb5bb
AJ
333#define OPC_MOVBE_GyMy (0xf0 | P_EXT38)
334#define OPC_MOVBE_MyGy (0xf1 | P_EXT38)
6817c355
RH
335#define OPC_MOVSBL (0xbe | P_EXT)
336#define OPC_MOVSWL (0xbf | P_EXT)
5d8a4f8f 337#define OPC_MOVSLQ (0x63 | P_REXW)
55e082a7
RH
338#define OPC_MOVZBL (0xb6 | P_EXT)
339#define OPC_MOVZWL (0xb7 | P_EXT)
6858614e 340#define OPC_POP_r32 (0x58)
993508e4 341#define OPC_POPCNT (0xb8 | P_EXT | P_SIMDF3)
6858614e
RH
342#define OPC_PUSH_r32 (0x50)
343#define OPC_PUSH_Iv (0x68)
344#define OPC_PUSH_Ib (0x6a)
3c3accc6 345#define OPC_RET (0xc3)
5d8a4f8f 346#define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
f53dba01
RH
347#define OPC_SHIFT_1 (0xd1)
348#define OPC_SHIFT_Ib (0xc1)
349#define OPC_SHIFT_cl (0xd3)
6399ab33
RH
350#define OPC_SARX (0xf7 | P_EXT38 | P_SIMDF3)
351#define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16)
352#define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2)
81570a70 353#define OPC_TESTL (0x85)
bbf25f90 354#define OPC_TZCNT (0xbc | P_EXT | P_SIMDF3)
b3e66df7 355#define OPC_XCHG_ax_r32 (0x90)
fcb5dac1 356
9363dedb
RH
357#define OPC_GRP3_Ev (0xf7)
358#define OPC_GRP5 (0xff)
359
360/* Group 1 opcode extensions for 0x80-0x83.
361 These are also used as modifiers for OPC_ARITH. */
c896fe29
FB
362#define ARITH_ADD 0
363#define ARITH_OR 1
364#define ARITH_ADC 2
365#define ARITH_SBB 3
366#define ARITH_AND 4
367#define ARITH_SUB 5
368#define ARITH_XOR 6
369#define ARITH_CMP 7
370
da441cff 371/* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */
9619376c
AJ
372#define SHIFT_ROL 0
373#define SHIFT_ROR 1
c896fe29
FB
374#define SHIFT_SHL 4
375#define SHIFT_SHR 5
376#define SHIFT_SAR 7
377
9363dedb
RH
378/* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */
379#define EXT3_NOT 2
380#define EXT3_NEG 3
381#define EXT3_MUL 4
382#define EXT3_IMUL 5
383#define EXT3_DIV 6
384#define EXT3_IDIV 7
385
386/* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */
5d8a4f8f
RH
387#define EXT5_INC_Ev 0
388#define EXT5_DEC_Ev 1
9363dedb
RH
389#define EXT5_CALLN_Ev 2
390#define EXT5_JMPN_Ev 4
da441cff
RH
391
392/* Condition codes to be added to OPC_JCC_{long,short}. */
c896fe29
FB
393#define JCC_JMP (-1)
394#define JCC_JO 0x0
395#define JCC_JNO 0x1
396#define JCC_JB 0x2
397#define JCC_JAE 0x3
398#define JCC_JE 0x4
399#define JCC_JNE 0x5
400#define JCC_JBE 0x6
401#define JCC_JA 0x7
402#define JCC_JS 0x8
403#define JCC_JNS 0x9
404#define JCC_JP 0xa
405#define JCC_JNP 0xb
406#define JCC_JL 0xc
407#define JCC_JGE 0xd
408#define JCC_JLE 0xe
409#define JCC_JG 0xf
410
0aed257f 411static const uint8_t tcg_cond_to_jcc[] = {
c896fe29
FB
412 [TCG_COND_EQ] = JCC_JE,
413 [TCG_COND_NE] = JCC_JNE,
414 [TCG_COND_LT] = JCC_JL,
415 [TCG_COND_GE] = JCC_JGE,
416 [TCG_COND_LE] = JCC_JLE,
417 [TCG_COND_GT] = JCC_JG,
418 [TCG_COND_LTU] = JCC_JB,
419 [TCG_COND_GEU] = JCC_JAE,
420 [TCG_COND_LEU] = JCC_JBE,
421 [TCG_COND_GTU] = JCC_JA,
422};
423
5d8a4f8f
RH
424#if TCG_TARGET_REG_BITS == 64
425static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
426{
427 int rex;
428
44b37ace
RH
429 if (opc & P_GS) {
430 tcg_out8(s, 0x65);
431 }
5d8a4f8f
RH
432 if (opc & P_DATA16) {
433 /* We should never be asking for both 16 and 64-bit operation. */
eabb7b91 434 tcg_debug_assert((opc & P_REXW) == 0);
5d8a4f8f
RH
435 tcg_out8(s, 0x66);
436 }
437 if (opc & P_ADDR32) {
438 tcg_out8(s, 0x67);
439 }
bbf25f90
RH
440 if (opc & P_SIMDF3) {
441 tcg_out8(s, 0xf3);
442 } else if (opc & P_SIMDF2) {
443 tcg_out8(s, 0xf2);
444 }
5d8a4f8f
RH
445
446 rex = 0;
c9d78213 447 rex |= (opc & P_REXW) ? 0x8 : 0x0; /* REX.W */
ecc7e843
RH
448 rex |= (r & 8) >> 1; /* REX.R */
449 rex |= (x & 8) >> 2; /* REX.X */
450 rex |= (rm & 8) >> 3; /* REX.B */
5d8a4f8f
RH
451
452 /* P_REXB_{R,RM} indicates that the given register is the low byte.
453 For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
454 as otherwise the encoding indicates %[abcd]h. Note that the values
455 that are ORed in merely indicate that the REX byte must be present;
456 those bits get discarded in output. */
457 rex |= opc & (r >= 4 ? P_REXB_R : 0);
458 rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
459
460 if (rex) {
461 tcg_out8(s, (uint8_t)(rex | 0x40));
462 }
463
2a113775 464 if (opc & (P_EXT | P_EXT38)) {
5d8a4f8f 465 tcg_out8(s, 0x0f);
2a113775
AJ
466 if (opc & P_EXT38) {
467 tcg_out8(s, 0x38);
468 }
5d8a4f8f 469 }
2a113775 470
5d8a4f8f
RH
471 tcg_out8(s, opc);
472}
473#else
474static void tcg_out_opc(TCGContext *s, int opc)
c896fe29 475{
96b4cf38
RH
476 if (opc & P_DATA16) {
477 tcg_out8(s, 0x66);
478 }
bbf25f90
RH
479 if (opc & P_SIMDF3) {
480 tcg_out8(s, 0xf3);
481 } else if (opc & P_SIMDF2) {
482 tcg_out8(s, 0xf2);
483 }
2a113775 484 if (opc & (P_EXT | P_EXT38)) {
c896fe29 485 tcg_out8(s, 0x0f);
2a113775
AJ
486 if (opc & P_EXT38) {
487 tcg_out8(s, 0x38);
488 }
96b4cf38 489 }
c896fe29
FB
490 tcg_out8(s, opc);
491}
5d8a4f8f
RH
492/* Discard the register arguments to tcg_out_opc early, so as not to penalize
493 the 32-bit compilation paths. This method works with all versions of gcc,
494 whereas relying on optimization may not be able to exclude them. */
495#define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc)
496#endif
c896fe29 497
5d8a4f8f 498static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
c896fe29 499{
5d8a4f8f
RH
500 tcg_out_opc(s, opc, r, rm, 0);
501 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
c896fe29
FB
502}
503
ecc7e843
RH
504static void tcg_out_vex_modrm(TCGContext *s, int opc, int r, int v, int rm)
505{
506 int tmp;
507
508 if ((opc & (P_REXW | P_EXT | P_EXT38)) || (rm & 8)) {
509 /* Three byte VEX prefix. */
510 tcg_out8(s, 0xc4);
511
512 /* VEX.m-mmmm */
513 if (opc & P_EXT38) {
514 tmp = 2;
515 } else if (opc & P_EXT) {
516 tmp = 1;
517 } else {
518 tcg_abort();
519 }
520 tmp |= 0x40; /* VEX.X */
521 tmp |= (r & 8 ? 0 : 0x80); /* VEX.R */
522 tmp |= (rm & 8 ? 0 : 0x20); /* VEX.B */
523 tcg_out8(s, tmp);
524
525 tmp = (opc & P_REXW ? 0x80 : 0); /* VEX.W */
526 } else {
527 /* Two byte VEX prefix. */
528 tcg_out8(s, 0xc5);
529
530 tmp = (r & 8 ? 0 : 0x80); /* VEX.R */
531 }
6399ab33
RH
532 /* VEX.pp */
533 if (opc & P_DATA16) {
534 tmp |= 1; /* 0x66 */
535 } else if (opc & P_SIMDF3) {
536 tmp |= 2; /* 0xf3 */
537 } else if (opc & P_SIMDF2) {
538 tmp |= 3; /* 0xf2 */
539 }
ecc7e843
RH
540 tmp |= (~v & 15) << 3; /* VEX.vvvv */
541 tcg_out8(s, tmp);
542 tcg_out8(s, opc);
543 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
544}
545
34a6d0b7 546/* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
5d8a4f8f
RH
547 We handle either RM and INDEX missing with a negative value. In 64-bit
548 mode for absolute addresses, ~RM is the size of the immediate operand
549 that will follow the instruction. */
34a6d0b7
RH
550
551static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
357e3d8a 552 int index, int shift, intptr_t offset)
c896fe29 553{
34a6d0b7
RH
554 int mod, len;
555
5d8a4f8f
RH
556 if (index < 0 && rm < 0) {
557 if (TCG_TARGET_REG_BITS == 64) {
558 /* Try for a rip-relative addressing mode. This has replaced
559 the 32-bit-mode absolute addressing encoding. */
357e3d8a
RH
560 intptr_t pc = (intptr_t)s->code_ptr + 5 + ~rm;
561 intptr_t disp = offset - pc;
5d8a4f8f
RH
562 if (disp == (int32_t)disp) {
563 tcg_out_opc(s, opc, r, 0, 0);
564 tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
565 tcg_out32(s, disp);
566 return;
567 }
34a6d0b7 568
5d8a4f8f
RH
569 /* Try for an absolute address encoding. This requires the
570 use of the MODRM+SIB encoding and is therefore larger than
571 rip-relative addressing. */
572 if (offset == (int32_t)offset) {
573 tcg_out_opc(s, opc, r, 0, 0);
574 tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
575 tcg_out8(s, (4 << 3) | 5);
576 tcg_out32(s, offset);
577 return;
578 }
579
580 /* ??? The memory isn't directly addressable. */
581 tcg_abort();
582 } else {
583 /* Absolute address. */
584 tcg_out_opc(s, opc, r, 0, 0);
585 tcg_out8(s, (r << 3) | 5);
586 tcg_out32(s, offset);
587 return;
588 }
589 }
34a6d0b7
RH
590
591 /* Find the length of the immediate addend. Note that the encoding
592 that would be used for (%ebp) indicates absolute addressing. */
5d8a4f8f 593 if (rm < 0) {
34a6d0b7 594 mod = 0, len = 4, rm = 5;
5d8a4f8f 595 } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
34a6d0b7
RH
596 mod = 0, len = 0;
597 } else if (offset == (int8_t)offset) {
598 mod = 0x40, len = 1;
c896fe29 599 } else {
34a6d0b7
RH
600 mod = 0x80, len = 4;
601 }
602
603 /* Use a single byte MODRM format if possible. Note that the encoding
604 that would be used for %esp is the escape to the two byte form. */
5d8a4f8f 605 if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
34a6d0b7 606 /* Single byte MODRM format. */
5d8a4f8f
RH
607 tcg_out_opc(s, opc, r, rm, 0);
608 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
34a6d0b7
RH
609 } else {
610 /* Two byte MODRM+SIB format. */
611
612 /* Note that the encoding that would place %esp into the index
5d8a4f8f
RH
613 field indicates no index register. In 64-bit mode, the REX.X
614 bit counts, so %r12 can be used as the index. */
615 if (index < 0) {
34a6d0b7 616 index = 4;
c896fe29 617 } else {
eabb7b91 618 tcg_debug_assert(index != TCG_REG_ESP);
c896fe29 619 }
34a6d0b7 620
5d8a4f8f
RH
621 tcg_out_opc(s, opc, r, rm, index);
622 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
623 tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
34a6d0b7
RH
624 }
625
626 if (len == 1) {
627 tcg_out8(s, offset);
628 } else if (len == 4) {
c896fe29
FB
629 tcg_out32(s, offset);
630 }
631}
632
5d8a4f8f
RH
633/* A simplification of the above with no index or shift. */
634static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
357e3d8a 635 int rm, intptr_t offset)
34a6d0b7
RH
636{
637 tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
638}
639
81570a70
RH
640/* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */
641static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
642{
5d8a4f8f
RH
643 /* Propagate an opcode prefix, such as P_REXW. */
644 int ext = subop & ~0x7;
645 subop &= 0x7;
646
647 tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
81570a70
RH
648}
649
2a534aff
RH
650static inline void tcg_out_mov(TCGContext *s, TCGType type,
651 TCGReg ret, TCGReg arg)
c896fe29 652{
af266089 653 if (arg != ret) {
5d8a4f8f
RH
654 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
655 tcg_out_modrm(s, opc, ret, arg);
af266089 656 }
c896fe29
FB
657}
658
5d8a4f8f 659static void tcg_out_movi(TCGContext *s, TCGType type,
2a534aff 660 TCGReg ret, tcg_target_long arg)
c896fe29 661{
8023ccda
RH
662 tcg_target_long diff;
663
c896fe29 664 if (arg == 0) {
81570a70 665 tgen_arithr(s, ARITH_XOR, ret, ret);
5d8a4f8f 666 return;
8023ccda
RH
667 }
668 if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
5d8a4f8f
RH
669 tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
670 tcg_out32(s, arg);
8023ccda
RH
671 return;
672 }
673 if (arg == (int32_t)arg) {
5d8a4f8f
RH
674 tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
675 tcg_out32(s, arg);
8023ccda 676 return;
c896fe29 677 }
8023ccda
RH
678
679 /* Try a 7 byte pc-relative lea before the 10 byte movq. */
357e3d8a 680 diff = arg - ((uintptr_t)s->code_ptr + 7);
8023ccda
RH
681 if (diff == (int32_t)diff) {
682 tcg_out_opc(s, OPC_LEA | P_REXW, ret, 0, 0);
683 tcg_out8(s, (LOWREGMASK(ret) << 3) | 5);
684 tcg_out32(s, diff);
685 return;
686 }
687
688 tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
689 tcg_out64(s, arg);
c896fe29
FB
690}
691
6858614e
RH
692static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
693{
694 if (val == (int8_t)val) {
5d8a4f8f 695 tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
6858614e 696 tcg_out8(s, val);
5d8a4f8f
RH
697 } else if (val == (int32_t)val) {
698 tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
6858614e 699 tcg_out32(s, val);
5d8a4f8f
RH
700 } else {
701 tcg_abort();
6858614e
RH
702 }
703}
704
a7d00d4e
PK
705static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
706{
707 /* Given the strength of x86 memory ordering, we only need care for
708 store-load ordering. Experimentally, "lock orl $0,0(%esp)" is
709 faster than "mfence", so don't bother with the sse insn. */
710 if (a0 & TCG_MO_ST_LD) {
711 tcg_out8(s, 0xf0);
712 tcg_out_modrm_offset(s, OPC_ARITH_EvIb, ARITH_OR, TCG_REG_ESP, 0);
713 tcg_out8(s, 0);
714 }
715}
716
6858614e
RH
717static inline void tcg_out_push(TCGContext *s, int reg)
718{
5d8a4f8f 719 tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
6858614e
RH
720}
721
722static inline void tcg_out_pop(TCGContext *s, int reg)
723{
5d8a4f8f 724 tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
6858614e
RH
725}
726
2a534aff 727static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
a05b5b9b 728 TCGReg arg1, intptr_t arg2)
c896fe29 729{
5d8a4f8f
RH
730 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
731 tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
c896fe29
FB
732}
733
2a534aff 734static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
a05b5b9b 735 TCGReg arg1, intptr_t arg2)
c896fe29 736{
5d8a4f8f
RH
737 int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
738 tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
c896fe29
FB
739}
740
59d7c14e
RH
741static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
742 TCGReg base, intptr_t ofs)
c6f29ff0 743{
59d7c14e
RH
744 int rexw = 0;
745 if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64) {
746 if (val != (int32_t)val) {
747 return false;
748 }
749 rexw = P_REXW;
750 }
751 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | rexw, 0, base, ofs);
c6f29ff0 752 tcg_out32(s, val);
59d7c14e 753 return true;
c6f29ff0
RH
754}
755
f53dba01
RH
756static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
757{
96b4cf38
RH
758 /* Propagate an opcode prefix, such as P_DATA16. */
759 int ext = subopc & ~0x7;
760 subopc &= 0x7;
761
f53dba01 762 if (count == 1) {
5d8a4f8f 763 tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
f53dba01 764 } else {
5d8a4f8f 765 tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
f53dba01
RH
766 tcg_out8(s, count);
767 }
768}
769
fcb5dac1
RH
770static inline void tcg_out_bswap32(TCGContext *s, int reg)
771{
5d8a4f8f 772 tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
fcb5dac1
RH
773}
774
775static inline void tcg_out_rolw_8(TCGContext *s, int reg)
776{
5d8a4f8f 777 tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
fcb5dac1
RH
778}
779
55e082a7
RH
780static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
781{
782 /* movzbl */
eabb7b91 783 tcg_debug_assert(src < 4 || TCG_TARGET_REG_BITS == 64);
5d8a4f8f 784 tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
55e082a7
RH
785}
786
5d8a4f8f 787static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
6817c355
RH
788{
789 /* movsbl */
eabb7b91 790 tcg_debug_assert(src < 4 || TCG_TARGET_REG_BITS == 64);
5d8a4f8f 791 tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
6817c355
RH
792}
793
55e082a7
RH
794static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
795{
796 /* movzwl */
797 tcg_out_modrm(s, OPC_MOVZWL, dest, src);
798}
799
5d8a4f8f 800static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
6817c355 801{
5d8a4f8f
RH
802 /* movsw[lq] */
803 tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
6817c355
RH
804}
805
5d8a4f8f 806static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
c896fe29 807{
5d8a4f8f
RH
808 /* 32-bit mov zero extends. */
809 tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
810}
811
812static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
813{
814 tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
815}
816
817static inline void tcg_out_bswap64(TCGContext *s, int reg)
818{
819 tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
820}
821
822static void tgen_arithi(TCGContext *s, int c, int r0,
823 tcg_target_long val, int cf)
824{
825 int rexw = 0;
826
827 if (TCG_TARGET_REG_BITS == 64) {
828 rexw = c & -8;
829 c &= 7;
830 }
831
81570a70
RH
832 /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
833 partial flags update stalls on Pentium4 and are not recommended
834 by current Intel optimization manuals. */
835 if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
447d681e 836 int is_inc = (c == ARITH_ADD) ^ (val < 0);
5d8a4f8f
RH
837 if (TCG_TARGET_REG_BITS == 64) {
838 /* The single-byte increment encodings are re-tasked as the
839 REX prefixes. Use the MODRM encoding. */
840 tcg_out_modrm(s, OPC_GRP5 + rexw,
841 (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
842 } else {
843 tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
844 }
845 return;
846 }
847
848 if (c == ARITH_AND) {
849 if (TCG_TARGET_REG_BITS == 64) {
850 if (val == 0xffffffffu) {
851 tcg_out_ext32u(s, r0, r0);
852 return;
853 }
854 if (val == (uint32_t)val) {
855 /* AND with no high bits set can use a 32-bit operation. */
856 rexw = 0;
857 }
858 }
dc397ca3 859 if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
5d8a4f8f
RH
860 tcg_out_ext8u(s, r0, r0);
861 return;
862 }
863 if (val == 0xffffu) {
864 tcg_out_ext16u(s, r0, r0);
865 return;
866 }
867 }
868
869 if (val == (int8_t)val) {
870 tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
c896fe29 871 tcg_out8(s, val);
5d8a4f8f
RH
872 return;
873 }
874 if (rexw == 0 || val == (int32_t)val) {
875 tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
c896fe29 876 tcg_out32(s, val);
5d8a4f8f 877 return;
c896fe29 878 }
5d8a4f8f
RH
879
880 tcg_abort();
c896fe29
FB
881}
882
3e9a474e 883static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
c896fe29 884{
5d8a4f8f
RH
885 if (val != 0) {
886 tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
887 }
c896fe29
FB
888}
889
f75b56c1 890/* Use SMALL != 0 to force a short forward branch. */
bec16311 891static void tcg_out_jxx(TCGContext *s, int opc, TCGLabel *l, int small)
c896fe29
FB
892{
893 int32_t val, val1;
78686523 894
c896fe29 895 if (l->has_value) {
f6bff89d 896 val = tcg_pcrel_diff(s, l->u.value_ptr);
c896fe29
FB
897 val1 = val - 2;
898 if ((int8_t)val1 == val1) {
f75b56c1 899 if (opc == -1) {
da441cff 900 tcg_out8(s, OPC_JMP_short);
f75b56c1 901 } else {
da441cff 902 tcg_out8(s, OPC_JCC_short + opc);
f75b56c1 903 }
c896fe29
FB
904 tcg_out8(s, val1);
905 } else {
f75b56c1
RH
906 if (small) {
907 tcg_abort();
908 }
c896fe29 909 if (opc == -1) {
da441cff 910 tcg_out8(s, OPC_JMP_long);
c896fe29
FB
911 tcg_out32(s, val - 5);
912 } else {
5d8a4f8f 913 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
c896fe29
FB
914 tcg_out32(s, val - 6);
915 }
916 }
f75b56c1
RH
917 } else if (small) {
918 if (opc == -1) {
da441cff 919 tcg_out8(s, OPC_JMP_short);
f75b56c1 920 } else {
da441cff 921 tcg_out8(s, OPC_JCC_short + opc);
f75b56c1 922 }
bec16311 923 tcg_out_reloc(s, s->code_ptr, R_386_PC8, l, -1);
f75b56c1 924 s->code_ptr += 1;
c896fe29
FB
925 } else {
926 if (opc == -1) {
da441cff 927 tcg_out8(s, OPC_JMP_long);
c896fe29 928 } else {
5d8a4f8f 929 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
c896fe29 930 }
bec16311 931 tcg_out_reloc(s, s->code_ptr, R_386_PC32, l, -4);
623e265c 932 s->code_ptr += 4;
c896fe29
FB
933 }
934}
935
1d2699ae 936static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
5d8a4f8f 937 int const_arg2, int rexw)
c896fe29 938{
c896fe29
FB
939 if (const_arg2) {
940 if (arg2 == 0) {
c896fe29 941 /* test r, r */
5d8a4f8f 942 tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
c896fe29 943 } else {
5d8a4f8f 944 tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
c896fe29
FB
945 }
946 } else {
5d8a4f8f 947 tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
c896fe29 948 }
1d2699ae
RH
949}
950
5d8a4f8f
RH
951static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
952 TCGArg arg1, TCGArg arg2, int const_arg2,
bec16311 953 TCGLabel *label, int small)
1d2699ae 954{
5d8a4f8f 955 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
bec16311 956 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small);
c896fe29
FB
957}
958
5d8a4f8f
RH
959#if TCG_TARGET_REG_BITS == 64
960static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
961 TCGArg arg1, TCGArg arg2, int const_arg2,
bec16311 962 TCGLabel *label, int small)
5d8a4f8f
RH
963{
964 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
bec16311 965 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small);
5d8a4f8f
RH
966}
967#else
c896fe29
FB
968/* XXX: we implement it at the target level to avoid having to
969 handle cross basic blocks temporaries */
f75b56c1
RH
970static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
971 const int *const_args, int small)
c896fe29 972{
bec16311
RH
973 TCGLabel *label_next = gen_new_label();
974 TCGLabel *label_this = arg_label(args[5]);
42a268c2 975
c896fe29
FB
976 switch(args[4]) {
977 case TCG_COND_EQ:
5d8a4f8f
RH
978 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
979 label_next, 1);
980 tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
bec16311 981 label_this, small);
c896fe29
FB
982 break;
983 case TCG_COND_NE:
5d8a4f8f 984 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
bec16311 985 label_this, small);
5d8a4f8f 986 tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
bec16311 987 label_this, small);
c896fe29
FB
988 break;
989 case TCG_COND_LT:
5d8a4f8f 990 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
bec16311 991 label_this, small);
f75b56c1 992 tcg_out_jxx(s, JCC_JNE, label_next, 1);
5d8a4f8f 993 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
bec16311 994 label_this, small);
c896fe29
FB
995 break;
996 case TCG_COND_LE:
5d8a4f8f 997 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
bec16311 998 label_this, small);
f75b56c1 999 tcg_out_jxx(s, JCC_JNE, label_next, 1);
5d8a4f8f 1000 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
bec16311 1001 label_this, small);
c896fe29
FB
1002 break;
1003 case TCG_COND_GT:
5d8a4f8f 1004 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
bec16311 1005 label_this, small);
f75b56c1 1006 tcg_out_jxx(s, JCC_JNE, label_next, 1);
5d8a4f8f 1007 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
bec16311 1008 label_this, small);
c896fe29
FB
1009 break;
1010 case TCG_COND_GE:
5d8a4f8f 1011 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
bec16311 1012 label_this, small);
f75b56c1 1013 tcg_out_jxx(s, JCC_JNE, label_next, 1);
5d8a4f8f 1014 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
bec16311 1015 label_this, small);
c896fe29
FB
1016 break;
1017 case TCG_COND_LTU:
5d8a4f8f 1018 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
bec16311 1019 label_this, small);
f75b56c1 1020 tcg_out_jxx(s, JCC_JNE, label_next, 1);
5d8a4f8f 1021 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
bec16311 1022 label_this, small);
c896fe29
FB
1023 break;
1024 case TCG_COND_LEU:
5d8a4f8f 1025 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
bec16311 1026 label_this, small);
f75b56c1 1027 tcg_out_jxx(s, JCC_JNE, label_next, 1);
5d8a4f8f 1028 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
bec16311 1029 label_this, small);
c896fe29
FB
1030 break;
1031 case TCG_COND_GTU:
5d8a4f8f 1032 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
bec16311 1033 label_this, small);
f75b56c1 1034 tcg_out_jxx(s, JCC_JNE, label_next, 1);
5d8a4f8f 1035 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
bec16311 1036 label_this, small);
c896fe29
FB
1037 break;
1038 case TCG_COND_GEU:
5d8a4f8f 1039 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
bec16311 1040 label_this, small);
f75b56c1 1041 tcg_out_jxx(s, JCC_JNE, label_next, 1);
5d8a4f8f 1042 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
bec16311 1043 label_this, small);
c896fe29
FB
1044 break;
1045 default:
1046 tcg_abort();
1047 }
9d6fca70 1048 tcg_out_label(s, label_next, s->code_ptr);
c896fe29 1049}
5d8a4f8f 1050#endif
c896fe29 1051
5d8a4f8f
RH
1052static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
1053 TCGArg arg1, TCGArg arg2, int const_arg2)
1d2699ae 1054{
5d8a4f8f 1055 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
32a8ffb9 1056 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
a369a702 1057 tcg_out_ext8u(s, dest, dest);
1d2699ae
RH
1058}
1059
5d8a4f8f
RH
1060#if TCG_TARGET_REG_BITS == 64
1061static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
1062 TCGArg arg1, TCGArg arg2, int const_arg2)
1063{
1064 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
1065 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
1066 tcg_out_ext8u(s, dest, dest);
1067}
1068#else
1d2699ae
RH
1069static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
1070 const int *const_args)
1071{
1072 TCGArg new_args[6];
bec16311 1073 TCGLabel *label_true, *label_over;
1d2699ae
RH
1074
1075 memcpy(new_args, args+1, 5*sizeof(TCGArg));
1076
1077 if (args[0] == args[1] || args[0] == args[2]
1078 || (!const_args[3] && args[0] == args[3])
1079 || (!const_args[4] && args[0] == args[4])) {
1080 /* When the destination overlaps with one of the argument
1081 registers, don't do anything tricky. */
bec16311
RH
1082 label_true = gen_new_label();
1083 label_over = gen_new_label();
1d2699ae 1084
bec16311 1085 new_args[5] = label_arg(label_true);
1d2699ae
RH
1086 tcg_out_brcond2(s, new_args, const_args+1, 1);
1087
1088 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
1089 tcg_out_jxx(s, JCC_JMP, label_over, 1);
9d6fca70 1090 tcg_out_label(s, label_true, s->code_ptr);
1d2699ae
RH
1091
1092 tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
9d6fca70 1093 tcg_out_label(s, label_over, s->code_ptr);
1d2699ae
RH
1094 } else {
1095 /* When the destination does not overlap one of the arguments,
1096 clear the destination first, jump if cond false, and emit an
1097 increment in the true case. This results in smaller code. */
1098
1099 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
1100
bec16311 1101 label_over = gen_new_label();
1d2699ae 1102 new_args[4] = tcg_invert_cond(new_args[4]);
bec16311 1103 new_args[5] = label_arg(label_over);
1d2699ae
RH
1104 tcg_out_brcond2(s, new_args, const_args+1, 1);
1105
1106 tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
9d6fca70 1107 tcg_out_label(s, label_over, s->code_ptr);
1d2699ae
RH
1108 }
1109}
5d8a4f8f
RH
1110#endif
1111
bbf25f90
RH
1112static void tcg_out_cmov(TCGContext *s, TCGCond cond, int rexw,
1113 TCGReg dest, TCGReg v1)
d0a16297 1114{
76a347e1 1115 if (have_cmov) {
bbf25f90 1116 tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | rexw, dest, v1);
76a347e1 1117 } else {
bec16311 1118 TCGLabel *over = gen_new_label();
76a347e1
RH
1119 tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1);
1120 tcg_out_mov(s, TCG_TYPE_I32, dest, v1);
1121 tcg_out_label(s, over, s->code_ptr);
1122 }
d0a16297
RH
1123}
1124
bbf25f90
RH
1125static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGReg dest,
1126 TCGReg c1, TCGArg c2, int const_c2,
1127 TCGReg v1)
1128{
1129 tcg_out_cmp(s, c1, c2, const_c2, 0);
1130 tcg_out_cmov(s, cond, 0, dest, v1);
1131}
1132
d0a16297 1133#if TCG_TARGET_REG_BITS == 64
bbf25f90
RH
1134static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGReg dest,
1135 TCGReg c1, TCGArg c2, int const_c2,
1136 TCGReg v1)
d0a16297
RH
1137{
1138 tcg_out_cmp(s, c1, c2, const_c2, P_REXW);
bbf25f90 1139 tcg_out_cmov(s, cond, P_REXW, dest, v1);
d0a16297
RH
1140}
1141#endif
1142
bbf25f90
RH
1143static void tcg_out_ctz(TCGContext *s, int rexw, TCGReg dest, TCGReg arg1,
1144 TCGArg arg2, bool const_a2)
1145{
39f099ec 1146 if (have_bmi1) {
bbf25f90 1147 tcg_out_modrm(s, OPC_TZCNT + rexw, dest, arg1);
39f099ec
RH
1148 if (const_a2) {
1149 tcg_debug_assert(arg2 == (rexw ? 64 : 32));
1150 } else {
1151 tcg_debug_assert(dest != arg2);
1152 tcg_out_cmov(s, TCG_COND_LTU, rexw, dest, arg2);
1153 }
bbf25f90 1154 } else {
9bf38308 1155 tcg_debug_assert(dest != arg2);
bbf25f90 1156 tcg_out_modrm(s, OPC_BSF + rexw, dest, arg1);
9bf38308 1157 tcg_out_cmov(s, TCG_COND_EQ, rexw, dest, arg2);
bbf25f90
RH
1158 }
1159}
1160
1161static void tcg_out_clz(TCGContext *s, int rexw, TCGReg dest, TCGReg arg1,
1162 TCGArg arg2, bool const_a2)
1163{
1164 if (have_lzcnt) {
1165 tcg_out_modrm(s, OPC_LZCNT + rexw, dest, arg1);
1166 if (const_a2) {
1167 tcg_debug_assert(arg2 == (rexw ? 64 : 32));
1168 } else {
1169 tcg_debug_assert(dest != arg2);
1170 tcg_out_cmov(s, TCG_COND_LTU, rexw, dest, arg2);
1171 }
1172 } else {
9bf38308
RH
1173 tcg_debug_assert(!const_a2);
1174 tcg_debug_assert(dest != arg1);
1175 tcg_debug_assert(dest != arg2);
bbf25f90 1176
9bf38308 1177 /* Recall that the output of BSR is the index not the count. */
bbf25f90 1178 tcg_out_modrm(s, OPC_BSR + rexw, dest, arg1);
9bf38308
RH
1179 tgen_arithi(s, ARITH_XOR + rexw, dest, rexw ? 63 : 31, 0);
1180
1181 /* Since we have destroyed the flags from BSR, we have to re-test. */
1182 tcg_out_cmp(s, arg1, 0, 1, rexw);
1183 tcg_out_cmov(s, TCG_COND_EQ, rexw, dest, arg2);
bbf25f90
RH
1184 }
1185}
1186
f6bff89d 1187static void tcg_out_branch(TCGContext *s, int call, tcg_insn_unit *dest)
5d8a4f8f 1188{
f6bff89d 1189 intptr_t disp = tcg_pcrel_diff(s, dest) - 5;
5d8a4f8f
RH
1190
1191 if (disp == (int32_t)disp) {
1192 tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
1193 tcg_out32(s, disp);
1194 } else {
f6bff89d 1195 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, (uintptr_t)dest);
5d8a4f8f
RH
1196 tcg_out_modrm(s, OPC_GRP5,
1197 call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10);
1198 }
1199}
1200
6bf3e997 1201static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *dest)
5d8a4f8f
RH
1202{
1203 tcg_out_branch(s, 1, dest);
1204}
1d2699ae 1205
f6bff89d 1206static void tcg_out_jmp(TCGContext *s, tcg_insn_unit *dest)
aadb21a4 1207{
5d8a4f8f 1208 tcg_out_branch(s, 0, dest);
aadb21a4
RH
1209}
1210
0d07abf0
SF
1211static void tcg_out_nopn(TCGContext *s, int n)
1212{
1213 int i;
1214 /* Emit 1 or 2 operand size prefixes for the standard one byte nop,
1215 * "xchg %eax,%eax", forming "xchg %ax,%ax". All cores accept the
1216 * duplicate prefix, and all of the interesting recent cores can
1217 * decode and discard the duplicates in a single cycle.
1218 */
1219 tcg_debug_assert(n >= 1);
1220 for (i = 1; i < n; ++i) {
1221 tcg_out8(s, 0x66);
1222 }
1223 tcg_out8(s, 0x90);
1224}
1225
c896fe29 1226#if defined(CONFIG_SOFTMMU)
401c227b
RH
1227/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1228 * int mmu_idx, uintptr_t ra)
1229 */
f6bff89d 1230static void * const qemu_ld_helpers[16] = {
8221a267
RH
1231 [MO_UB] = helper_ret_ldub_mmu,
1232 [MO_LEUW] = helper_le_lduw_mmu,
1233 [MO_LEUL] = helper_le_ldul_mmu,
1234 [MO_LEQ] = helper_le_ldq_mmu,
1235 [MO_BEUW] = helper_be_lduw_mmu,
1236 [MO_BEUL] = helper_be_ldul_mmu,
1237 [MO_BEQ] = helper_be_ldq_mmu,
e141ab52
BS
1238};
1239
401c227b
RH
1240/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1241 * uintxx_t val, int mmu_idx, uintptr_t ra)
1242 */
f6bff89d 1243static void * const qemu_st_helpers[16] = {
8221a267
RH
1244 [MO_UB] = helper_ret_stb_mmu,
1245 [MO_LEUW] = helper_le_stw_mmu,
1246 [MO_LEUL] = helper_le_stl_mmu,
1247 [MO_LEQ] = helper_le_stq_mmu,
1248 [MO_BEUW] = helper_be_stw_mmu,
1249 [MO_BEUL] = helper_be_stl_mmu,
1250 [MO_BEQ] = helper_be_stq_mmu,
e141ab52 1251};
8516a044
RH
1252
1253/* Perform the TLB load and compare.
1254
1255 Inputs:
7352ee54 1256 ADDRLO and ADDRHI contain the low and high part of the address.
8516a044
RH
1257
1258 MEM_INDEX and S_BITS are the memory context and log2 size of the load.
1259
1260 WHICH is the offset into the CPUTLBEntry structure of the slot to read.
1261 This should be offsetof addr_read or addr_write.
1262
1263 Outputs:
1264 LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
1265 positions of the displacements of forward jumps to the TLB miss case.
1266
166792f7 1267 Second argument register is loaded with the low part of the address.
5d8a4f8f
RH
1268 In the TLB hit case, it has been adjusted as indicated by the TLB
1269 and so is a host address. In the TLB miss case, it continues to
1270 hold a guest address.
8516a044 1271
166792f7 1272 First argument register is clobbered. */
8516a044 1273
7352ee54 1274static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
8cc580f6 1275 int mem_index, TCGMemOp opc,
f6bff89d 1276 tcg_insn_unit **label_ptr, int which)
8516a044 1277{
7352ee54
RH
1278 const TCGReg r0 = TCG_REG_L0;
1279 const TCGReg r1 = TCG_REG_L1;
d5dad3be 1280 TCGType ttype = TCG_TYPE_I32;
08b0b23b
AJ
1281 TCGType tlbtype = TCG_TYPE_I32;
1282 int trexw = 0, hrexw = 0, tlbrexw = 0;
85aa8081
RH
1283 unsigned a_bits = get_alignment_bits(opc);
1284 unsigned s_bits = opc & MO_SIZE;
1285 unsigned a_mask = (1 << a_bits) - 1;
1286 unsigned s_mask = (1 << s_bits) - 1;
1f00b27f 1287 target_ulong tlb_mask;
5d8a4f8f 1288
d5dad3be
RH
1289 if (TCG_TARGET_REG_BITS == 64) {
1290 if (TARGET_LONG_BITS == 64) {
1291 ttype = TCG_TYPE_I64;
1292 trexw = P_REXW;
1293 }
1294 if (TCG_TYPE_PTR == TCG_TYPE_I64) {
d5dad3be 1295 hrexw = P_REXW;
08b0b23b
AJ
1296 if (TARGET_PAGE_BITS + CPU_TLB_BITS > 32) {
1297 tlbtype = TCG_TYPE_I64;
1298 tlbrexw = P_REXW;
1299 }
d5dad3be 1300 }
5d8a4f8f 1301 }
8516a044 1302
08b0b23b 1303 tcg_out_mov(s, tlbtype, r0, addrlo);
85aa8081
RH
1304 /* If the required alignment is at least as large as the access, simply
1305 copy the address and mask. For lesser alignments, check that we don't
1306 cross pages for the complete access. */
1307 if (a_bits >= s_bits) {
8cc580f6
AJ
1308 tcg_out_mov(s, ttype, r1, addrlo);
1309 } else {
85aa8081 1310 tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo, s_mask - a_mask);
8cc580f6 1311 }
ebb90a00 1312 tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask;
8516a044 1313
08b0b23b 1314 tcg_out_shifti(s, SHIFT_SHR + tlbrexw, r0,
5d8a4f8f 1315 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
8516a044 1316
1f00b27f 1317 tgen_arithi(s, ARITH_AND + trexw, r1, tlb_mask, 0);
08b0b23b 1318 tgen_arithi(s, ARITH_AND + tlbrexw, r0,
5d8a4f8f 1319 (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
8516a044 1320
d5dad3be 1321 tcg_out_modrm_sib_offset(s, OPC_LEA + hrexw, r0, TCG_AREG0, r0, 0,
9349b4f9 1322 offsetof(CPUArchState, tlb_table[mem_index][0])
8516a044
RH
1323 + which);
1324
166792f7 1325 /* cmp 0(r0), r1 */
d5dad3be 1326 tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, 0);
8516a044 1327
d5dad3be
RH
1328 /* Prepare for both the fast path add of the tlb addend, and the slow
1329 path function argument setup. There are two cases worth note:
1330 For 32-bit guest and x86_64 host, MOVL zero-extends the guest address
1331 before the fastpath ADDQ below. For 64-bit guest and x32 host, MOVQ
1332 copies the entire guest address for the slow path, while truncation
1333 for the 32-bit host happens with the fastpath ADDL below. */
1334 tcg_out_mov(s, ttype, r1, addrlo);
8516a044 1335
b76f0d8c
YL
1336 /* jne slow_path */
1337 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
8516a044 1338 label_ptr[0] = s->code_ptr;
b76f0d8c 1339 s->code_ptr += 4;
8516a044 1340
5d8a4f8f 1341 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
166792f7 1342 /* cmp 4(r0), addrhi */
7352ee54 1343 tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, r0, 4);
8516a044 1344
b76f0d8c
YL
1345 /* jne slow_path */
1346 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
8516a044 1347 label_ptr[1] = s->code_ptr;
b76f0d8c 1348 s->code_ptr += 4;
8516a044
RH
1349 }
1350
1351 /* TLB Hit. */
1352
166792f7 1353 /* add addend(r0), r1 */
d5dad3be 1354 tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r1, r0,
8516a044
RH
1355 offsetof(CPUTLBEntry, addend) - which);
1356}
7352ee54
RH
1357
1358/*
1359 * Record the context of a call to the out of line helper code for the slow path
1360 * for a load or store, so that we can later generate the correct helper code
1361 */
3972ef6f 1362static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
7352ee54
RH
1363 TCGReg datalo, TCGReg datahi,
1364 TCGReg addrlo, TCGReg addrhi,
3972ef6f 1365 tcg_insn_unit *raddr,
f6bff89d 1366 tcg_insn_unit **label_ptr)
7352ee54
RH
1367{
1368 TCGLabelQemuLdst *label = new_ldst_label(s);
1369
1370 label->is_ld = is_ld;
3972ef6f 1371 label->oi = oi;
7352ee54
RH
1372 label->datalo_reg = datalo;
1373 label->datahi_reg = datahi;
1374 label->addrlo_reg = addrlo;
1375 label->addrhi_reg = addrhi;
7352ee54
RH
1376 label->raddr = raddr;
1377 label->label_ptr[0] = label_ptr[0];
1378 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1379 label->label_ptr[1] = label_ptr[1];
1380 }
1381}
1382
1383/*
1384 * Generate code for the slow path for a load at the end of block
1385 */
1386static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1387{
3972ef6f
RH
1388 TCGMemOpIdx oi = l->oi;
1389 TCGMemOp opc = get_memop(oi);
7352ee54 1390 TCGReg data_reg;
f6bff89d 1391 tcg_insn_unit **label_ptr = &l->label_ptr[0];
7352ee54
RH
1392
1393 /* resolve label address */
5c53bb81 1394 tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
7352ee54 1395 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
5c53bb81 1396 tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
7352ee54
RH
1397 }
1398
1399 if (TCG_TARGET_REG_BITS == 32) {
1400 int ofs = 0;
1401
1402 tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1403 ofs += 4;
1404
1405 tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1406 ofs += 4;
1407
1408 if (TARGET_LONG_BITS == 64) {
1409 tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1410 ofs += 4;
1411 }
1412
59d7c14e 1413 tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs);
7352ee54
RH
1414 ofs += 4;
1415
59d7c14e 1416 tcg_out_sti(s, TCG_TYPE_PTR, (uintptr_t)l->raddr, TCG_REG_ESP, ofs);
7352ee54
RH
1417 } else {
1418 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1419 /* The second argument is already loaded with addrlo. */
3972ef6f 1420 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], oi);
7352ee54
RH
1421 tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3],
1422 (uintptr_t)l->raddr);
1423 }
1424
2b7ec66f 1425 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
7352ee54
RH
1426
1427 data_reg = l->datalo_reg;
1428 switch (opc & MO_SSIZE) {
1429 case MO_SB:
1430 tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
1431 break;
1432 case MO_SW:
1433 tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
1434 break;
1435#if TCG_TARGET_REG_BITS == 64
1436 case MO_SL:
1437 tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
1438 break;
1439#endif
1440 case MO_UB:
1441 case MO_UW:
1442 /* Note that the helpers have zero-extended to tcg_target_long. */
1443 case MO_UL:
1444 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1445 break;
1446 case MO_Q:
1447 if (TCG_TARGET_REG_BITS == 64) {
1448 tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
1449 } else if (data_reg == TCG_REG_EDX) {
1450 /* xchg %edx, %eax */
1451 tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
1452 tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX);
1453 } else {
1454 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1455 tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX);
1456 }
1457 break;
1458 default:
1459 tcg_abort();
1460 }
1461
1462 /* Jump to the code corresponding to next IR of qemu_st */
f6bff89d 1463 tcg_out_jmp(s, l->raddr);
7352ee54
RH
1464}
1465
1466/*
1467 * Generate code for the slow path for a store at the end of block
1468 */
1469static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1470{
3972ef6f
RH
1471 TCGMemOpIdx oi = l->oi;
1472 TCGMemOp opc = get_memop(oi);
7352ee54 1473 TCGMemOp s_bits = opc & MO_SIZE;
f6bff89d 1474 tcg_insn_unit **label_ptr = &l->label_ptr[0];
7352ee54
RH
1475 TCGReg retaddr;
1476
1477 /* resolve label address */
5c53bb81 1478 tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
7352ee54 1479 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
5c53bb81 1480 tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
7352ee54
RH
1481 }
1482
1483 if (TCG_TARGET_REG_BITS == 32) {
1484 int ofs = 0;
1485
1486 tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1487 ofs += 4;
1488
1489 tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1490 ofs += 4;
1491
1492 if (TARGET_LONG_BITS == 64) {
1493 tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1494 ofs += 4;
1495 }
1496
1497 tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs);
1498 ofs += 4;
1499
1500 if (s_bits == MO_64) {
1501 tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs);
1502 ofs += 4;
1503 }
1504
59d7c14e 1505 tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs);
7352ee54
RH
1506 ofs += 4;
1507
1508 retaddr = TCG_REG_EAX;
3972ef6f
RH
1509 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1510 tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, ofs);
7352ee54
RH
1511 } else {
1512 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1513 /* The second argument is already loaded with addrlo. */
1514 tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
1515 tcg_target_call_iarg_regs[2], l->datalo_reg);
3972ef6f 1516 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], oi);
7352ee54
RH
1517
1518 if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) {
1519 retaddr = tcg_target_call_iarg_regs[4];
1520 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1521 } else {
1522 retaddr = TCG_REG_RAX;
1523 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
0b919667
RH
1524 tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP,
1525 TCG_TARGET_CALL_STACK_OFFSET);
7352ee54
RH
1526 }
1527 }
1528
1529 /* "Tail call" to the helper, with the return address back inline. */
1530 tcg_out_push(s, retaddr);
2b7ec66f 1531 tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
7352ee54 1532}
44b37ace
RH
1533#elif defined(__x86_64__) && defined(__linux__)
1534# include <asm/prctl.h>
1535# include <sys/prctl.h>
1536
1537int arch_prctl(int code, unsigned long addr);
1538
1539static int guest_base_flags;
1540static inline void setup_guest_base_seg(void)
1541{
b76f21a7 1542 if (arch_prctl(ARCH_SET_GS, guest_base) == 0) {
44b37ace
RH
1543 guest_base_flags = P_GS;
1544 }
1545}
1546#else
1547# define guest_base_flags 0
1548static inline void setup_guest_base_seg(void) { }
1549#endif /* SOFTMMU */
c896fe29 1550
37c5d0d5 1551static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
ee8ba9e4
RH
1552 TCGReg base, int index, intptr_t ofs,
1553 int seg, TCGMemOp memop)
be5a4eb7 1554{
085bb5bb
AJ
1555 const TCGMemOp real_bswap = memop & MO_BSWAP;
1556 TCGMemOp bswap = real_bswap;
1557 int movop = OPC_MOVL_GvEv;
1558
1559 if (have_movbe && real_bswap) {
1560 bswap = 0;
1561 movop = OPC_MOVBE_GyMy;
1562 }
37c5d0d5
RH
1563
1564 switch (memop & MO_SSIZE) {
1565 case MO_UB:
ee8ba9e4
RH
1566 tcg_out_modrm_sib_offset(s, OPC_MOVZBL + seg, datalo,
1567 base, index, 0, ofs);
be5a4eb7 1568 break;
37c5d0d5 1569 case MO_SB:
ee8ba9e4
RH
1570 tcg_out_modrm_sib_offset(s, OPC_MOVSBL + P_REXW + seg, datalo,
1571 base, index, 0, ofs);
be5a4eb7 1572 break;
37c5d0d5 1573 case MO_UW:
ee8ba9e4
RH
1574 tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
1575 base, index, 0, ofs);
085bb5bb 1576 if (real_bswap) {
be5a4eb7
RH
1577 tcg_out_rolw_8(s, datalo);
1578 }
1579 break;
37c5d0d5 1580 case MO_SW:
085bb5bb
AJ
1581 if (real_bswap) {
1582 if (have_movbe) {
ee8ba9e4
RH
1583 tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
1584 datalo, base, index, 0, ofs);
085bb5bb 1585 } else {
ee8ba9e4
RH
1586 tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
1587 base, index, 0, ofs);
085bb5bb
AJ
1588 tcg_out_rolw_8(s, datalo);
1589 }
5d8a4f8f
RH
1590 tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
1591 } else {
ee8ba9e4
RH
1592 tcg_out_modrm_sib_offset(s, OPC_MOVSWL + P_REXW + seg,
1593 datalo, base, index, 0, ofs);
be5a4eb7
RH
1594 }
1595 break;
37c5d0d5 1596 case MO_UL:
ee8ba9e4 1597 tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs);
be5a4eb7
RH
1598 if (bswap) {
1599 tcg_out_bswap32(s, datalo);
1600 }
1601 break;
5d8a4f8f 1602#if TCG_TARGET_REG_BITS == 64
37c5d0d5 1603 case MO_SL:
085bb5bb 1604 if (real_bswap) {
ee8ba9e4
RH
1605 tcg_out_modrm_sib_offset(s, movop + seg, datalo,
1606 base, index, 0, ofs);
085bb5bb
AJ
1607 if (bswap) {
1608 tcg_out_bswap32(s, datalo);
1609 }
5d8a4f8f 1610 tcg_out_ext32s(s, datalo, datalo);
be5a4eb7 1611 } else {
ee8ba9e4
RH
1612 tcg_out_modrm_sib_offset(s, OPC_MOVSLQ + seg, datalo,
1613 base, index, 0, ofs);
be5a4eb7 1614 }
5d8a4f8f
RH
1615 break;
1616#endif
37c5d0d5 1617 case MO_Q:
5d8a4f8f 1618 if (TCG_TARGET_REG_BITS == 64) {
ee8ba9e4
RH
1619 tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo,
1620 base, index, 0, ofs);
5d8a4f8f
RH
1621 if (bswap) {
1622 tcg_out_bswap64(s, datalo);
1623 }
1624 } else {
085bb5bb 1625 if (real_bswap) {
5d8a4f8f
RH
1626 int t = datalo;
1627 datalo = datahi;
1628 datahi = t;
1629 }
1630 if (base != datalo) {
ee8ba9e4
RH
1631 tcg_out_modrm_sib_offset(s, movop + seg, datalo,
1632 base, index, 0, ofs);
1633 tcg_out_modrm_sib_offset(s, movop + seg, datahi,
1634 base, index, 0, ofs + 4);
5d8a4f8f 1635 } else {
ee8ba9e4
RH
1636 tcg_out_modrm_sib_offset(s, movop + seg, datahi,
1637 base, index, 0, ofs + 4);
1638 tcg_out_modrm_sib_offset(s, movop + seg, datalo,
1639 base, index, 0, ofs);
5d8a4f8f
RH
1640 }
1641 if (bswap) {
1642 tcg_out_bswap32(s, datalo);
1643 tcg_out_bswap32(s, datahi);
1644 }
be5a4eb7
RH
1645 }
1646 break;
1647 default:
1648 tcg_abort();
1649 }
1650}
379f6698 1651
c896fe29
FB
1652/* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
1653 EAX. It will be useful once fixed registers globals are less
1654 common. */
8221a267 1655static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
c896fe29 1656{
7352ee54 1657 TCGReg datalo, datahi, addrlo;
8221a267 1658 TCGReg addrhi __attribute__((unused));
59227d5d 1659 TCGMemOpIdx oi;
8221a267 1660 TCGMemOp opc;
c896fe29 1661#if defined(CONFIG_SOFTMMU)
37c5d0d5 1662 int mem_index;
f6bff89d 1663 tcg_insn_unit *label_ptr[2];
c896fe29
FB
1664#endif
1665
7352ee54 1666 datalo = *args++;
8221a267 1667 datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
7352ee54 1668 addrlo = *args++;
8221a267 1669 addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
59227d5d
RH
1670 oi = *args++;
1671 opc = get_memop(oi);
c896fe29
FB
1672
1673#if defined(CONFIG_SOFTMMU)
59227d5d 1674 mem_index = get_mmuidx(oi);
1a6dc1e4 1675
8cc580f6 1676 tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
8516a044 1677 label_ptr, offsetof(CPUTLBEntry, addr_read));
1a6dc1e4
RH
1678
1679 /* TLB Hit. */
ee8ba9e4 1680 tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, opc);
c896fe29 1681
b76f0d8c 1682 /* Record the current context of a load into ldst label */
3972ef6f
RH
1683 add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
1684 s->code_ptr, label_ptr);
c896fe29 1685#else
5d8a4f8f 1686 {
b76f21a7 1687 int32_t offset = guest_base;
7352ee54 1688 TCGReg base = addrlo;
ee8ba9e4 1689 int index = -1;
44b37ace
RH
1690 int seg = 0;
1691
ee8ba9e4
RH
1692 /* For a 32-bit guest, the high 32 bits may contain garbage.
1693 We can do this with the ADDR32 prefix if we're not using
1694 a guest base, or when using segmentation. Otherwise we
1695 need to zero-extend manually. */
b76f21a7 1696 if (guest_base == 0 || guest_base_flags) {
44b37ace
RH
1697 seg = guest_base_flags;
1698 offset = 0;
ee8ba9e4
RH
1699 if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
1700 seg |= P_ADDR32;
1701 }
1702 } else if (TCG_TARGET_REG_BITS == 64) {
1703 if (TARGET_LONG_BITS == 32) {
1704 tcg_out_ext32u(s, TCG_REG_L0, base);
1705 base = TCG_REG_L0;
1706 }
b76f21a7
LV
1707 if (offset != guest_base) {
1708 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base);
ee8ba9e4
RH
1709 index = TCG_REG_L1;
1710 offset = 0;
1711 }
5d8a4f8f
RH
1712 }
1713
ee8ba9e4
RH
1714 tcg_out_qemu_ld_direct(s, datalo, datahi,
1715 base, index, offset, seg, opc);
5d8a4f8f 1716 }
c896fe29 1717#endif
be5a4eb7 1718}
c896fe29 1719
37c5d0d5
RH
1720static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
1721 TCGReg base, intptr_t ofs, int seg,
1722 TCGMemOp memop)
be5a4eb7 1723{
be5a4eb7
RH
1724 /* ??? Ideally we wouldn't need a scratch register. For user-only,
1725 we could perform the bswap twice to restore the original value
1726 instead of moving to the scratch. But as it is, the L constraint
166792f7 1727 means that TCG_REG_L0 is definitely free here. */
37c5d0d5 1728 const TCGReg scratch = TCG_REG_L0;
085bb5bb
AJ
1729 const TCGMemOp real_bswap = memop & MO_BSWAP;
1730 TCGMemOp bswap = real_bswap;
1731 int movop = OPC_MOVL_EvGv;
1732
1733 if (have_movbe && real_bswap) {
1734 bswap = 0;
1735 movop = OPC_MOVBE_MyGy;
1736 }
be5a4eb7 1737
37c5d0d5
RH
1738 switch (memop & MO_SIZE) {
1739 case MO_8:
8589467f 1740 /* In 32-bit mode, 8-bit stores can only happen from [abcd]x.
b3e2bc50
RH
1741 Use the scratch register if necessary. */
1742 if (TCG_TARGET_REG_BITS == 32 && datalo >= 4) {
1743 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1744 datalo = scratch;
1745 }
44b37ace
RH
1746 tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
1747 datalo, base, ofs);
c896fe29 1748 break;
37c5d0d5 1749 case MO_16:
c896fe29 1750 if (bswap) {
3b6dac34 1751 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
be5a4eb7
RH
1752 tcg_out_rolw_8(s, scratch);
1753 datalo = scratch;
c896fe29 1754 }
085bb5bb 1755 tcg_out_modrm_offset(s, movop + P_DATA16 + seg, datalo, base, ofs);
c896fe29 1756 break;
37c5d0d5 1757 case MO_32:
c896fe29 1758 if (bswap) {
3b6dac34 1759 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
be5a4eb7
RH
1760 tcg_out_bswap32(s, scratch);
1761 datalo = scratch;
c896fe29 1762 }
085bb5bb 1763 tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
c896fe29 1764 break;
37c5d0d5 1765 case MO_64:
5d8a4f8f
RH
1766 if (TCG_TARGET_REG_BITS == 64) {
1767 if (bswap) {
1768 tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
1769 tcg_out_bswap64(s, scratch);
1770 datalo = scratch;
1771 }
085bb5bb 1772 tcg_out_modrm_offset(s, movop + P_REXW + seg, datalo, base, ofs);
5d8a4f8f 1773 } else if (bswap) {
3b6dac34 1774 tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
be5a4eb7 1775 tcg_out_bswap32(s, scratch);
44b37ace 1776 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs);
3b6dac34 1777 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
be5a4eb7 1778 tcg_out_bswap32(s, scratch);
44b37ace 1779 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4);
c896fe29 1780 } else {
085bb5bb
AJ
1781 if (real_bswap) {
1782 int t = datalo;
1783 datalo = datahi;
1784 datahi = t;
1785 }
1786 tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
1787 tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs+4);
c896fe29
FB
1788 }
1789 break;
1790 default:
1791 tcg_abort();
1792 }
c896fe29
FB
1793}
1794
8221a267 1795static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
c896fe29 1796{
7352ee54 1797 TCGReg datalo, datahi, addrlo;
8221a267 1798 TCGReg addrhi __attribute__((unused));
59227d5d 1799 TCGMemOpIdx oi;
8221a267 1800 TCGMemOp opc;
c896fe29 1801#if defined(CONFIG_SOFTMMU)
37c5d0d5 1802 int mem_index;
f6bff89d 1803 tcg_insn_unit *label_ptr[2];
c896fe29
FB
1804#endif
1805
7352ee54 1806 datalo = *args++;
8221a267 1807 datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
7352ee54 1808 addrlo = *args++;
8221a267 1809 addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
59227d5d
RH
1810 oi = *args++;
1811 opc = get_memop(oi);
c896fe29
FB
1812
1813#if defined(CONFIG_SOFTMMU)
59227d5d 1814 mem_index = get_mmuidx(oi);
1a6dc1e4 1815
8cc580f6 1816 tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
8516a044 1817 label_ptr, offsetof(CPUTLBEntry, addr_write));
1a6dc1e4
RH
1818
1819 /* TLB Hit. */
7352ee54 1820 tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
c896fe29 1821
b76f0d8c 1822 /* Record the current context of a store into ldst label */
3972ef6f
RH
1823 add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
1824 s->code_ptr, label_ptr);
b76f0d8c
YL
1825#else
1826 {
b76f21a7 1827 int32_t offset = guest_base;
7352ee54 1828 TCGReg base = addrlo;
b76f0d8c
YL
1829 int seg = 0;
1830
ee8ba9e4 1831 /* See comment in tcg_out_qemu_ld re zero-extension of addrlo. */
b76f21a7 1832 if (guest_base == 0 || guest_base_flags) {
b76f0d8c
YL
1833 seg = guest_base_flags;
1834 offset = 0;
ee8ba9e4
RH
1835 if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
1836 seg |= P_ADDR32;
1837 }
1838 } else if (TCG_TARGET_REG_BITS == 64) {
1839 /* ??? Note that we can't use the same SIB addressing scheme
1840 as for loads, since we require L0 free for bswap. */
b76f21a7 1841 if (offset != guest_base) {
ee8ba9e4
RH
1842 if (TARGET_LONG_BITS == 32) {
1843 tcg_out_ext32u(s, TCG_REG_L0, base);
1844 base = TCG_REG_L0;
1845 }
b76f21a7 1846 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base);
ee8ba9e4
RH
1847 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
1848 base = TCG_REG_L1;
1849 offset = 0;
1850 } else if (TARGET_LONG_BITS == 32) {
1851 tcg_out_ext32u(s, TCG_REG_L1, base);
1852 base = TCG_REG_L1;
1853 }
b76f0d8c
YL
1854 }
1855
7352ee54 1856 tcg_out_qemu_st_direct(s, datalo, datahi, base, offset, seg, opc);
b76f0d8c 1857 }
b76f0d8c 1858#endif
b76f0d8c 1859}
c896fe29 1860
a9751609 1861static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
c896fe29
FB
1862 const TCGArg *args, const int *const_args)
1863{
42d5b514
RH
1864 TCGArg a0, a1, a2;
1865 int c, const_a2, vexop, rexw = 0;
5d8a4f8f
RH
1866
1867#if TCG_TARGET_REG_BITS == 64
1868# define OP_32_64(x) \
1869 case glue(glue(INDEX_op_, x), _i64): \
1870 rexw = P_REXW; /* FALLTHRU */ \
1871 case glue(glue(INDEX_op_, x), _i32)
1872#else
1873# define OP_32_64(x) \
1874 case glue(glue(INDEX_op_, x), _i32)
1875#endif
78686523 1876
42d5b514
RH
1877 /* Hoist the loads of the most common arguments. */
1878 a0 = args[0];
1879 a1 = args[1];
1880 a2 = args[2];
1881 const_a2 = const_args[2];
1882
1883 switch (opc) {
c896fe29 1884 case INDEX_op_exit_tb:
42d5b514 1885 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, a0);
f6bff89d 1886 tcg_out_jmp(s, tb_ret_addr);
c896fe29
FB
1887 break;
1888 case INDEX_op_goto_tb:
f309101c 1889 if (s->tb_jmp_insn_offset) {
c896fe29 1890 /* direct jump method */
0d07abf0
SF
1891 int gap;
1892 /* jump displacement must be aligned for atomic patching;
1893 * see if we need to add extra nops before jump
1894 */
1895 gap = tcg_pcrel_diff(s, QEMU_ALIGN_PTR_UP(s->code_ptr + 1, 4));
1896 if (gap != 1) {
1897 tcg_out_nopn(s, gap - 1);
1898 }
da441cff 1899 tcg_out8(s, OPC_JMP_long); /* jmp im */
42d5b514 1900 s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
c896fe29
FB
1901 tcg_out32(s, 0);
1902 } else {
1903 /* indirect jump method */
9363dedb 1904 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
42d5b514 1905 (intptr_t)(s->tb_jmp_target_addr + a0));
c896fe29 1906 }
42d5b514 1907 s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s);
c896fe29 1908 break;
c896fe29 1909 case INDEX_op_br:
42d5b514 1910 tcg_out_jxx(s, JCC_JMP, arg_label(a0), 0);
c896fe29 1911 break;
5d8a4f8f
RH
1912 OP_32_64(ld8u):
1913 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
42d5b514 1914 tcg_out_modrm_offset(s, OPC_MOVZBL, a0, a1, a2);
c896fe29 1915 break;
5d8a4f8f 1916 OP_32_64(ld8s):
42d5b514 1917 tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, a0, a1, a2);
c896fe29 1918 break;
5d8a4f8f
RH
1919 OP_32_64(ld16u):
1920 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
42d5b514 1921 tcg_out_modrm_offset(s, OPC_MOVZWL, a0, a1, a2);
c896fe29 1922 break;
5d8a4f8f 1923 OP_32_64(ld16s):
42d5b514 1924 tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, a0, a1, a2);
c896fe29 1925 break;
5d8a4f8f
RH
1926#if TCG_TARGET_REG_BITS == 64
1927 case INDEX_op_ld32u_i64:
1928#endif
c896fe29 1929 case INDEX_op_ld_i32:
42d5b514 1930 tcg_out_ld(s, TCG_TYPE_I32, a0, a1, a2);
c896fe29 1931 break;
5d8a4f8f
RH
1932
1933 OP_32_64(st8):
5c2d2a9e 1934 if (const_args[0]) {
42d5b514
RH
1935 tcg_out_modrm_offset(s, OPC_MOVB_EvIz, 0, a1, a2);
1936 tcg_out8(s, a0);
5c2d2a9e 1937 } else {
42d5b514 1938 tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R, a0, a1, a2);
5c2d2a9e 1939 }
c896fe29 1940 break;
5d8a4f8f 1941 OP_32_64(st16):
5c2d2a9e 1942 if (const_args[0]) {
42d5b514
RH
1943 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16, 0, a1, a2);
1944 tcg_out16(s, a0);
5c2d2a9e 1945 } else {
42d5b514 1946 tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16, a0, a1, a2);
5c2d2a9e 1947 }
c896fe29 1948 break;
5d8a4f8f
RH
1949#if TCG_TARGET_REG_BITS == 64
1950 case INDEX_op_st32_i64:
1951#endif
c896fe29 1952 case INDEX_op_st_i32:
5c2d2a9e 1953 if (const_args[0]) {
42d5b514
RH
1954 tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, a1, a2);
1955 tcg_out32(s, a0);
5c2d2a9e 1956 } else {
42d5b514 1957 tcg_out_st(s, TCG_TYPE_I32, a0, a1, a2);
5c2d2a9e 1958 }
c896fe29 1959 break;
5d8a4f8f
RH
1960
1961 OP_32_64(add):
5d1e4e85 1962 /* For 3-operand addition, use LEA. */
42d5b514
RH
1963 if (a0 != a1) {
1964 TCGArg c3 = 0;
1965 if (const_a2) {
5d1e4e85
RH
1966 c3 = a2, a2 = -1;
1967 } else if (a0 == a2) {
1968 /* Watch out for dest = src + dest, since we've removed
1969 the matching constraint on the add. */
5d8a4f8f 1970 tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
5d1e4e85
RH
1971 break;
1972 }
1973
5d8a4f8f 1974 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
5d1e4e85
RH
1975 break;
1976 }
1977 c = ARITH_ADD;
1978 goto gen_arith;
5d8a4f8f 1979 OP_32_64(sub):
c896fe29
FB
1980 c = ARITH_SUB;
1981 goto gen_arith;
5d8a4f8f 1982 OP_32_64(and):
c896fe29
FB
1983 c = ARITH_AND;
1984 goto gen_arith;
5d8a4f8f 1985 OP_32_64(or):
c896fe29
FB
1986 c = ARITH_OR;
1987 goto gen_arith;
5d8a4f8f 1988 OP_32_64(xor):
c896fe29
FB
1989 c = ARITH_XOR;
1990 goto gen_arith;
c896fe29 1991 gen_arith:
42d5b514
RH
1992 if (const_a2) {
1993 tgen_arithi(s, c + rexw, a0, a2, 0);
c896fe29 1994 } else {
42d5b514 1995 tgen_arithr(s, c + rexw, a0, a2);
c896fe29
FB
1996 }
1997 break;
5d8a4f8f 1998
9d2eec20 1999 OP_32_64(andc):
42d5b514
RH
2000 if (const_a2) {
2001 tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32, a0, a1);
2002 tgen_arithi(s, ARITH_AND + rexw, a0, ~a2, 0);
9d2eec20 2003 } else {
42d5b514 2004 tcg_out_vex_modrm(s, OPC_ANDN + rexw, a0, a2, a1);
9d2eec20
RH
2005 }
2006 break;
2007
5d8a4f8f 2008 OP_32_64(mul):
42d5b514 2009 if (const_a2) {
c896fe29 2010 int32_t val;
42d5b514 2011 val = a2;
c896fe29 2012 if (val == (int8_t)val) {
42d5b514 2013 tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, a0, a0);
c896fe29
FB
2014 tcg_out8(s, val);
2015 } else {
42d5b514 2016 tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, a0, a0);
c896fe29
FB
2017 tcg_out32(s, val);
2018 }
2019 } else {
42d5b514 2020 tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, a0, a2);
c896fe29
FB
2021 }
2022 break;
5d8a4f8f
RH
2023
2024 OP_32_64(div2):
2025 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
c896fe29 2026 break;
5d8a4f8f
RH
2027 OP_32_64(divu2):
2028 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
c896fe29 2029 break;
5d8a4f8f
RH
2030
2031 OP_32_64(shl):
6a5aed4b
RH
2032 /* For small constant 3-operand shift, use LEA. */
2033 if (const_a2 && a0 != a1 && (a2 - 1) < 3) {
2034 if (a2 - 1 == 0) {
2035 /* shl $1,a1,a0 -> lea (a1,a1),a0 */
2036 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a1, 0, 0);
2037 } else {
2038 /* shl $n,a1,a0 -> lea 0(,a1,n),a0 */
2039 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, -1, a1, a2, 0);
2040 }
2041 break;
2042 }
c896fe29 2043 c = SHIFT_SHL;
6399ab33
RH
2044 vexop = OPC_SHLX;
2045 goto gen_shift_maybe_vex;
5d8a4f8f 2046 OP_32_64(shr):
c896fe29 2047 c = SHIFT_SHR;
6399ab33
RH
2048 vexop = OPC_SHRX;
2049 goto gen_shift_maybe_vex;
5d8a4f8f 2050 OP_32_64(sar):
c896fe29 2051 c = SHIFT_SAR;
6399ab33
RH
2052 vexop = OPC_SARX;
2053 goto gen_shift_maybe_vex;
5d8a4f8f 2054 OP_32_64(rotl):
9619376c 2055 c = SHIFT_ROL;
5d8a4f8f
RH
2056 goto gen_shift;
2057 OP_32_64(rotr):
9619376c 2058 c = SHIFT_ROR;
5d8a4f8f 2059 goto gen_shift;
6399ab33 2060 gen_shift_maybe_vex:
6a5aed4b
RH
2061 if (have_bmi2) {
2062 if (!const_a2) {
2063 tcg_out_vex_modrm(s, vexop + rexw, a0, a2, a1);
2064 break;
2065 }
2066 tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32, a0, a1);
6399ab33
RH
2067 }
2068 /* FALLTHRU */
5d8a4f8f 2069 gen_shift:
42d5b514
RH
2070 if (const_a2) {
2071 tcg_out_shifti(s, c + rexw, a0, a2);
81570a70 2072 } else {
42d5b514 2073 tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, a0);
81570a70 2074 }
c896fe29 2075 break;
5d8a4f8f 2076
bbf25f90
RH
2077 OP_32_64(ctz):
2078 tcg_out_ctz(s, rexw, args[0], args[1], args[2], const_args[2]);
2079 break;
2080 OP_32_64(clz):
2081 tcg_out_clz(s, rexw, args[0], args[1], args[2], const_args[2]);
2082 break;
993508e4
RH
2083 OP_32_64(ctpop):
2084 tcg_out_modrm(s, OPC_POPCNT + rexw, a0, a1);
2085 break;
bbf25f90 2086
c896fe29 2087 case INDEX_op_brcond_i32:
42d5b514 2088 tcg_out_brcond32(s, a2, a0, a1, const_args[1], arg_label(args[3]), 0);
c896fe29 2089 break;
5d8a4f8f 2090 case INDEX_op_setcond_i32:
42d5b514 2091 tcg_out_setcond32(s, args[3], a0, a1, a2, const_a2);
c896fe29 2092 break;
d0a16297 2093 case INDEX_op_movcond_i32:
42d5b514 2094 tcg_out_movcond32(s, args[5], a0, a1, a2, const_a2, args[3]);
d0a16297 2095 break;
c896fe29 2096
5d8a4f8f 2097 OP_32_64(bswap16):
42d5b514 2098 tcg_out_rolw_8(s, a0);
5d40cd63 2099 break;
5d8a4f8f 2100 OP_32_64(bswap32):
42d5b514 2101 tcg_out_bswap32(s, a0);
9619376c
AJ
2102 break;
2103
5d8a4f8f 2104 OP_32_64(neg):
42d5b514 2105 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, a0);
9619376c 2106 break;
5d8a4f8f 2107 OP_32_64(not):
42d5b514 2108 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, a0);
9619376c
AJ
2109 break;
2110
5d8a4f8f 2111 OP_32_64(ext8s):
42d5b514 2112 tcg_out_ext8s(s, a0, a1, rexw);
9619376c 2113 break;
5d8a4f8f 2114 OP_32_64(ext16s):
42d5b514 2115 tcg_out_ext16s(s, a0, a1, rexw);
9619376c 2116 break;
5d8a4f8f 2117 OP_32_64(ext8u):
42d5b514 2118 tcg_out_ext8u(s, a0, a1);
5f0ce17f 2119 break;
5d8a4f8f 2120 OP_32_64(ext16u):
42d5b514 2121 tcg_out_ext16u(s, a0, a1);
5f0ce17f 2122 break;
9619376c 2123
8221a267
RH
2124 case INDEX_op_qemu_ld_i32:
2125 tcg_out_qemu_ld(s, args, 0);
c896fe29 2126 break;
8221a267
RH
2127 case INDEX_op_qemu_ld_i64:
2128 tcg_out_qemu_ld(s, args, 1);
c896fe29 2129 break;
8221a267
RH
2130 case INDEX_op_qemu_st_i32:
2131 tcg_out_qemu_st(s, args, 0);
c896fe29 2132 break;
8221a267
RH
2133 case INDEX_op_qemu_st_i64:
2134 tcg_out_qemu_st(s, args, 1);
c896fe29
FB
2135 break;
2136
624988a5
RH
2137 OP_32_64(mulu2):
2138 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]);
5d8a4f8f 2139 break;
624988a5
RH
2140 OP_32_64(muls2):
2141 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]);
2142 break;
2143 OP_32_64(add2):
5d8a4f8f 2144 if (const_args[4]) {
42d5b514 2145 tgen_arithi(s, ARITH_ADD + rexw, a0, args[4], 1);
5d8a4f8f 2146 } else {
42d5b514 2147 tgen_arithr(s, ARITH_ADD + rexw, a0, args[4]);
5d8a4f8f
RH
2148 }
2149 if (const_args[5]) {
42d5b514 2150 tgen_arithi(s, ARITH_ADC + rexw, a1, args[5], 1);
5d8a4f8f 2151 } else {
42d5b514 2152 tgen_arithr(s, ARITH_ADC + rexw, a1, args[5]);
5d8a4f8f
RH
2153 }
2154 break;
624988a5 2155 OP_32_64(sub2):
5d8a4f8f 2156 if (const_args[4]) {
42d5b514 2157 tgen_arithi(s, ARITH_SUB + rexw, a0, args[4], 1);
5d8a4f8f 2158 } else {
42d5b514 2159 tgen_arithr(s, ARITH_SUB + rexw, a0, args[4]);
5d8a4f8f
RH
2160 }
2161 if (const_args[5]) {
42d5b514 2162 tgen_arithi(s, ARITH_SBB + rexw, a1, args[5], 1);
5d8a4f8f 2163 } else {
42d5b514 2164 tgen_arithr(s, ARITH_SBB + rexw, a1, args[5]);
5d8a4f8f
RH
2165 }
2166 break;
bbc863bf
RH
2167
2168#if TCG_TARGET_REG_BITS == 32
2169 case INDEX_op_brcond2_i32:
2170 tcg_out_brcond2(s, args, const_args, 0);
2171 break;
2172 case INDEX_op_setcond2_i32:
2173 tcg_out_setcond2(s, args, const_args);
2174 break;
5d8a4f8f 2175#else /* TCG_TARGET_REG_BITS == 64 */
5d8a4f8f 2176 case INDEX_op_ld32s_i64:
42d5b514 2177 tcg_out_modrm_offset(s, OPC_MOVSLQ, a0, a1, a2);
5d8a4f8f
RH
2178 break;
2179 case INDEX_op_ld_i64:
42d5b514 2180 tcg_out_ld(s, TCG_TYPE_I64, a0, a1, a2);
5d8a4f8f
RH
2181 break;
2182 case INDEX_op_st_i64:
5c2d2a9e 2183 if (const_args[0]) {
42d5b514
RH
2184 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW, 0, a1, a2);
2185 tcg_out32(s, a0);
5c2d2a9e 2186 } else {
42d5b514 2187 tcg_out_st(s, TCG_TYPE_I64, a0, a1, a2);
5c2d2a9e 2188 }
5d8a4f8f 2189 break;
5d8a4f8f
RH
2190
2191 case INDEX_op_brcond_i64:
42d5b514 2192 tcg_out_brcond64(s, a2, a0, a1, const_args[1], arg_label(args[3]), 0);
5d8a4f8f
RH
2193 break;
2194 case INDEX_op_setcond_i64:
42d5b514 2195 tcg_out_setcond64(s, args[3], a0, a1, a2, const_a2);
5d8a4f8f 2196 break;
d0a16297 2197 case INDEX_op_movcond_i64:
42d5b514 2198 tcg_out_movcond64(s, args[5], a0, a1, a2, const_a2, args[3]);
d0a16297 2199 break;
5d8a4f8f
RH
2200
2201 case INDEX_op_bswap64_i64:
42d5b514 2202 tcg_out_bswap64(s, a0);
5d8a4f8f 2203 break;
4f2331e5 2204 case INDEX_op_extu_i32_i64:
5d8a4f8f 2205 case INDEX_op_ext32u_i64:
42d5b514 2206 tcg_out_ext32u(s, a0, a1);
5d8a4f8f 2207 break;
4f2331e5 2208 case INDEX_op_ext_i32_i64:
5d8a4f8f 2209 case INDEX_op_ext32s_i64:
42d5b514 2210 tcg_out_ext32s(s, a0, a1);
5d8a4f8f
RH
2211 break;
2212#endif
2213
a4773324
JK
2214 OP_32_64(deposit):
2215 if (args[3] == 0 && args[4] == 8) {
2216 /* load bits 0..7 */
42d5b514 2217 tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM, a2, a0);
a4773324
JK
2218 } else if (args[3] == 8 && args[4] == 8) {
2219 /* load bits 8..15 */
42d5b514 2220 tcg_out_modrm(s, OPC_MOVB_EvGv, a2, a0 + 4);
a4773324
JK
2221 } else if (args[3] == 0 && args[4] == 16) {
2222 /* load bits 0..15 */
42d5b514 2223 tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, a2, a0);
a4773324
JK
2224 } else {
2225 tcg_abort();
2226 }
2227 break;
2228
78fdbfb9 2229 case INDEX_op_extract_i64:
42d5b514 2230 if (a2 + args[3] == 32) {
78fdbfb9 2231 /* This is a 32-bit zero-extending right shift. */
42d5b514
RH
2232 tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2233 tcg_out_shifti(s, SHIFT_SHR, a0, a2);
78fdbfb9
RH
2234 break;
2235 }
2236 /* FALLTHRU */
2237 case INDEX_op_extract_i32:
2238 /* On the off-chance that we can use the high-byte registers.
2239 Otherwise we emit the same ext16 + shift pattern that we
2240 would have gotten from the normal tcg-op.c expansion. */
42d5b514
RH
2241 tcg_debug_assert(a2 == 8 && args[3] == 8);
2242 if (a1 < 4 && a0 < 8) {
2243 tcg_out_modrm(s, OPC_MOVZBL, a0, a1 + 4);
78fdbfb9 2244 } else {
42d5b514
RH
2245 tcg_out_ext16u(s, a0, a1);
2246 tcg_out_shifti(s, SHIFT_SHR, a0, 8);
78fdbfb9
RH
2247 }
2248 break;
2249
2250 case INDEX_op_sextract_i32:
2251 /* We don't implement sextract_i64, as we cannot sign-extend to
2252 64-bits without using the REX prefix that explicitly excludes
2253 access to the high-byte registers. */
42d5b514
RH
2254 tcg_debug_assert(a2 == 8 && args[3] == 8);
2255 if (a1 < 4 && a0 < 8) {
2256 tcg_out_modrm(s, OPC_MOVSBL, a0, a1 + 4);
78fdbfb9 2257 } else {
42d5b514
RH
2258 tcg_out_ext16s(s, a0, a1, 0);
2259 tcg_out_shifti(s, SHIFT_SAR, a0, 8);
78fdbfb9
RH
2260 }
2261 break;
2262
a7d00d4e 2263 case INDEX_op_mb:
42d5b514 2264 tcg_out_mb(s, a0);
a7d00d4e 2265 break;
96d0ee7f
RH
2266 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
2267 case INDEX_op_mov_i64:
2268 case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
2269 case INDEX_op_movi_i64:
2270 case INDEX_op_call: /* Always emitted via tcg_out_call. */
c896fe29
FB
2271 default:
2272 tcg_abort();
2273 }
5d8a4f8f
RH
2274
2275#undef OP_32_64
c896fe29
FB
2276}
2277
cd26449a
RH
2278static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
2279{
2280 static const TCGTargetOpDef ri_r = { .args_ct_str = { "ri", "r" } };
2281 static const TCGTargetOpDef re_r = { .args_ct_str = { "re", "r" } };
2282 static const TCGTargetOpDef qi_r = { .args_ct_str = { "qi", "r" } };
2283 static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
2284 static const TCGTargetOpDef r_q = { .args_ct_str = { "r", "q" } };
2285 static const TCGTargetOpDef r_re = { .args_ct_str = { "r", "re" } };
2286 static const TCGTargetOpDef r_0 = { .args_ct_str = { "r", "0" } };
6a5aed4b 2287 static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
cd26449a
RH
2288 static const TCGTargetOpDef r_r_re = { .args_ct_str = { "r", "r", "re" } };
2289 static const TCGTargetOpDef r_0_re = { .args_ct_str = { "r", "0", "re" } };
cd26449a
RH
2290 static const TCGTargetOpDef r_0_ci = { .args_ct_str = { "r", "0", "ci" } };
2291 static const TCGTargetOpDef r_L = { .args_ct_str = { "r", "L" } };
2292 static const TCGTargetOpDef L_L = { .args_ct_str = { "L", "L" } };
2293 static const TCGTargetOpDef r_L_L = { .args_ct_str = { "r", "L", "L" } };
2294 static const TCGTargetOpDef r_r_L = { .args_ct_str = { "r", "r", "L" } };
2295 static const TCGTargetOpDef L_L_L = { .args_ct_str = { "L", "L", "L" } };
2296 static const TCGTargetOpDef r_r_L_L
2297 = { .args_ct_str = { "r", "r", "L", "L" } };
2298 static const TCGTargetOpDef L_L_L_L
2299 = { .args_ct_str = { "L", "L", "L", "L" } };
2300
2301 switch (op) {
2302 case INDEX_op_ld8u_i32:
2303 case INDEX_op_ld8u_i64:
2304 case INDEX_op_ld8s_i32:
2305 case INDEX_op_ld8s_i64:
2306 case INDEX_op_ld16u_i32:
2307 case INDEX_op_ld16u_i64:
2308 case INDEX_op_ld16s_i32:
2309 case INDEX_op_ld16s_i64:
2310 case INDEX_op_ld_i32:
2311 case INDEX_op_ld32u_i64:
2312 case INDEX_op_ld32s_i64:
2313 case INDEX_op_ld_i64:
2314 return &r_r;
a4773324 2315
cd26449a
RH
2316 case INDEX_op_st8_i32:
2317 case INDEX_op_st8_i64:
2318 return &qi_r;
2319 case INDEX_op_st16_i32:
2320 case INDEX_op_st16_i64:
2321 case INDEX_op_st_i32:
2322 case INDEX_op_st32_i64:
2323 return &ri_r;
2324 case INDEX_op_st_i64:
2325 return &re_r;
2326
2327 case INDEX_op_add_i32:
2328 case INDEX_op_add_i64:
2329 return &r_r_re;
2330 case INDEX_op_sub_i32:
2331 case INDEX_op_sub_i64:
2332 case INDEX_op_mul_i32:
2333 case INDEX_op_mul_i64:
2334 case INDEX_op_or_i32:
2335 case INDEX_op_or_i64:
2336 case INDEX_op_xor_i32:
2337 case INDEX_op_xor_i64:
2338 return &r_0_re;
2339
2340 case INDEX_op_and_i32:
2341 case INDEX_op_and_i64:
2342 {
2343 static const TCGTargetOpDef and
2344 = { .args_ct_str = { "r", "0", "reZ" } };
2345 return &and;
2346 }
2347 break;
2348 case INDEX_op_andc_i32:
2349 case INDEX_op_andc_i64:
2350 {
2351 static const TCGTargetOpDef andc
2352 = { .args_ct_str = { "r", "r", "rI" } };
2353 return &andc;
2354 }
2355 break;
bbc863bf 2356
cd26449a
RH
2357 case INDEX_op_shl_i32:
2358 case INDEX_op_shl_i64:
2359 case INDEX_op_shr_i32:
2360 case INDEX_op_shr_i64:
2361 case INDEX_op_sar_i32:
2362 case INDEX_op_sar_i64:
6a5aed4b 2363 return have_bmi2 ? &r_r_ri : &r_0_ci;
cd26449a
RH
2364 case INDEX_op_rotl_i32:
2365 case INDEX_op_rotl_i64:
2366 case INDEX_op_rotr_i32:
2367 case INDEX_op_rotr_i64:
2368 return &r_0_ci;
a7d00d4e 2369
cd26449a
RH
2370 case INDEX_op_brcond_i32:
2371 case INDEX_op_brcond_i64:
2372 return &r_re;
1d2699ae 2373
cd26449a
RH
2374 case INDEX_op_bswap16_i32:
2375 case INDEX_op_bswap16_i64:
2376 case INDEX_op_bswap32_i32:
2377 case INDEX_op_bswap32_i64:
2378 case INDEX_op_bswap64_i64:
2379 case INDEX_op_neg_i32:
2380 case INDEX_op_neg_i64:
2381 case INDEX_op_not_i32:
2382 case INDEX_op_not_i64:
2383 return &r_0;
2384
2385 case INDEX_op_ext8s_i32:
2386 case INDEX_op_ext8s_i64:
2387 case INDEX_op_ext8u_i32:
2388 case INDEX_op_ext8u_i64:
2389 return &r_q;
2390 case INDEX_op_ext16s_i32:
2391 case INDEX_op_ext16s_i64:
2392 case INDEX_op_ext16u_i32:
2393 case INDEX_op_ext16u_i64:
2394 case INDEX_op_ext32s_i64:
2395 case INDEX_op_ext32u_i64:
2396 case INDEX_op_ext_i32_i64:
2397 case INDEX_op_extu_i32_i64:
2398 case INDEX_op_extract_i32:
2399 case INDEX_op_extract_i64:
2400 case INDEX_op_sextract_i32:
993508e4
RH
2401 case INDEX_op_ctpop_i32:
2402 case INDEX_op_ctpop_i64:
cd26449a
RH
2403 return &r_r;
2404
2405 case INDEX_op_deposit_i32:
2406 case INDEX_op_deposit_i64:
2407 {
2408 static const TCGTargetOpDef dep
2409 = { .args_ct_str = { "Q", "0", "Q" } };
2410 return &dep;
2411 }
2412 case INDEX_op_setcond_i32:
2413 case INDEX_op_setcond_i64:
2414 {
2415 static const TCGTargetOpDef setc
2416 = { .args_ct_str = { "q", "r", "re" } };
2417 return &setc;
2418 }
2419 case INDEX_op_movcond_i32:
2420 case INDEX_op_movcond_i64:
2421 {
2422 static const TCGTargetOpDef movc
2423 = { .args_ct_str = { "r", "r", "re", "r", "0" } };
2424 return &movc;
2425 }
2426 case INDEX_op_div2_i32:
2427 case INDEX_op_div2_i64:
2428 case INDEX_op_divu2_i32:
2429 case INDEX_op_divu2_i64:
2430 {
2431 static const TCGTargetOpDef div2
2432 = { .args_ct_str = { "a", "d", "0", "1", "r" } };
2433 return &div2;
2434 }
2435 case INDEX_op_mulu2_i32:
2436 case INDEX_op_mulu2_i64:
2437 case INDEX_op_muls2_i32:
2438 case INDEX_op_muls2_i64:
2439 {
2440 static const TCGTargetOpDef mul2
2441 = { .args_ct_str = { "a", "d", "a", "r" } };
2442 return &mul2;
2443 }
2444 case INDEX_op_add2_i32:
2445 case INDEX_op_add2_i64:
2446 case INDEX_op_sub2_i32:
2447 case INDEX_op_sub2_i64:
2448 {
2449 static const TCGTargetOpDef arith2
2450 = { .args_ct_str = { "r", "r", "0", "1", "re", "re" } };
2451 return &arith2;
2452 }
bbf25f90
RH
2453 case INDEX_op_ctz_i32:
2454 case INDEX_op_ctz_i64:
2455 {
2456 static const TCGTargetOpDef ctz[2] = {
9bf38308 2457 { .args_ct_str = { "&r", "r", "r" } },
bbf25f90
RH
2458 { .args_ct_str = { "&r", "r", "rW" } },
2459 };
2460 return &ctz[have_bmi1];
2461 }
2462 case INDEX_op_clz_i32:
2463 case INDEX_op_clz_i64:
2464 {
2465 static const TCGTargetOpDef clz[2] = {
9bf38308 2466 { .args_ct_str = { "&r", "r", "r" } },
bbf25f90
RH
2467 { .args_ct_str = { "&r", "r", "rW" } },
2468 };
2469 return &clz[have_lzcnt];
2470 }
c896fe29 2471
cd26449a
RH
2472 case INDEX_op_qemu_ld_i32:
2473 return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &r_L : &r_L_L;
2474 case INDEX_op_qemu_st_i32:
2475 return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &L_L : &L_L_L;
2476 case INDEX_op_qemu_ld_i64:
2477 return (TCG_TARGET_REG_BITS == 64 ? &r_L
2478 : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &r_r_L
2479 : &r_r_L_L);
2480 case INDEX_op_qemu_st_i64:
2481 return (TCG_TARGET_REG_BITS == 64 ? &L_L
2482 : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &L_L_L
2483 : &L_L_L_L);
f69d277e 2484
cd26449a
RH
2485 case INDEX_op_brcond2_i32:
2486 {
2487 static const TCGTargetOpDef b2
2488 = { .args_ct_str = { "r", "r", "ri", "ri" } };
2489 return &b2;
2490 }
2491 case INDEX_op_setcond2_i32:
2492 {
2493 static const TCGTargetOpDef s2
2494 = { .args_ct_str = { "r", "r", "r", "ri", "ri" } };
2495 return &s2;
f69d277e 2496 }
cd26449a
RH
2497
2498 default:
2499 break;
f69d277e
RH
2500 }
2501 return NULL;
2502}
2503
b03cce8e 2504static int tcg_target_callee_save_regs[] = {
5d8a4f8f
RH
2505#if TCG_TARGET_REG_BITS == 64
2506 TCG_REG_RBP,
2507 TCG_REG_RBX,
8d918718
SW
2508#if defined(_WIN64)
2509 TCG_REG_RDI,
2510 TCG_REG_RSI,
2511#endif
5d8a4f8f
RH
2512 TCG_REG_R12,
2513 TCG_REG_R13,
cea5f9a2 2514 TCG_REG_R14, /* Currently used for the global env. */
5d8a4f8f
RH
2515 TCG_REG_R15,
2516#else
cea5f9a2 2517 TCG_REG_EBP, /* Currently used for the global env. */
b03cce8e
FB
2518 TCG_REG_EBX,
2519 TCG_REG_ESI,
2520 TCG_REG_EDI,
5d8a4f8f 2521#endif
b03cce8e
FB
2522};
2523
813da627
RH
2524/* Compute frame size via macros, to share between tcg_target_qemu_prologue
2525 and tcg_register_jit. */
2526
2527#define PUSH_SIZE \
2528 ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \
2529 * (TCG_TARGET_REG_BITS / 8))
2530
2531#define FRAME_SIZE \
2532 ((PUSH_SIZE \
2533 + TCG_STATIC_CALL_ARGS_SIZE \
2534 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2535 + TCG_TARGET_STACK_ALIGN - 1) \
2536 & ~(TCG_TARGET_STACK_ALIGN - 1))
2537
b03cce8e 2538/* Generate global QEMU prologue and epilogue code */
e4d58b41 2539static void tcg_target_qemu_prologue(TCGContext *s)
b03cce8e 2540{
813da627 2541 int i, stack_addend;
78686523 2542
b03cce8e 2543 /* TB prologue */
5d8a4f8f 2544
ac0275dc 2545 /* Reserve some stack space, also for TCG temps. */
813da627 2546 stack_addend = FRAME_SIZE - PUSH_SIZE;
ac0275dc
BS
2547 tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
2548 CPU_TEMP_BUF_NLONGS * sizeof(long));
2549
2550 /* Save all callee saved registers. */
2551 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
2552 tcg_out_push(s, tcg_target_callee_save_regs[i]);
2553 }
2554
6a18ae2d
BS
2555#if TCG_TARGET_REG_BITS == 32
2556 tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
2557 (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
b18212c6
SW
2558 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2559 /* jmp *tb. */
2560 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP,
2561 (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4
2562 + stack_addend);
6a18ae2d 2563#else
cea5f9a2 2564 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
6a18ae2d 2565 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
5d8a4f8f 2566 /* jmp *tb. */
cea5f9a2 2567 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
b18212c6 2568#endif
78686523 2569
b03cce8e
FB
2570 /* TB epilogue */
2571 tb_ret_addr = s->code_ptr;
5d8a4f8f 2572
e83c80f7 2573 tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend);
5d8a4f8f
RH
2574
2575 for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
b03cce8e
FB
2576 tcg_out_pop(s, tcg_target_callee_save_regs[i]);
2577 }
5d8a4f8f 2578 tcg_out_opc(s, OPC_RET, 0, 0, 0);
44b37ace
RH
2579
2580#if !defined(CONFIG_SOFTMMU)
b76f21a7
LV
2581 /* Try to set up a segment register to point to guest_base. */
2582 if (guest_base) {
44b37ace
RH
2583 setup_guest_base_seg();
2584 }
2585#endif
b03cce8e
FB
2586}
2587
e4d58b41 2588static void tcg_target_init(TCGContext *s)
c896fe29 2589{
774d566c 2590#ifdef CONFIG_CPUID_H
9d2eec20
RH
2591 unsigned a, b, c, d;
2592 int max = __get_cpuid_max(0, 0);
085bb5bb 2593
9d2eec20
RH
2594 if (max >= 1) {
2595 __cpuid(1, a, b, c, d);
2596#ifndef have_cmov
085bb5bb
AJ
2597 /* For 32-bit, 99% certainty that we're running on hardware that
2598 supports cmov, but we still need to check. In case cmov is not
2599 available, we'll use a small forward branch. */
9d2eec20
RH
2600 have_cmov = (d & bit_CMOV) != 0;
2601#endif
2602#ifndef have_movbe
085bb5bb
AJ
2603 /* MOVBE is only available on Intel Atom and Haswell CPUs, so we
2604 need to probe for it. */
9d2eec20 2605 have_movbe = (c & bit_MOVBE) != 0;
993508e4
RH
2606#endif
2607#ifdef bit_POPCNT
2608 have_popcnt = (c & bit_POPCNT) != 0;
9d2eec20 2609#endif
76a347e1 2610 }
9d2eec20
RH
2611
2612 if (max >= 7) {
2613 /* BMI1 is available on AMD Piledriver and Intel Haswell CPUs. */
2614 __cpuid_count(7, 0, a, b, c, d);
2615#ifdef bit_BMI
2616 have_bmi1 = (b & bit_BMI) != 0;
6399ab33
RH
2617#endif
2618#ifndef have_bmi2
2619 have_bmi2 = (b & bit_BMI2) != 0;
76a347e1 2620#endif
9d2eec20 2621 }
774d566c 2622#endif
76a347e1 2623
bbf25f90
RH
2624#ifndef have_lzcnt
2625 max = __get_cpuid_max(0x8000000, 0);
2626 if (max >= 1) {
2627 __cpuid(0x80000001, a, b, c, d);
2628 /* LZCNT was introduced with AMD Barcelona and Intel Haswell CPUs. */
2629 have_lzcnt = (c & bit_LZCNT) != 0;
2630 }
2631#endif
2632
5d8a4f8f
RH
2633 if (TCG_TARGET_REG_BITS == 64) {
2634 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
2635 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
2636 } else {
2637 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
2638 }
4ab50ccf
RH
2639
2640 tcg_regset_clear(tcg_target_call_clobber_regs);
2641 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
2642 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
2643 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
5d8a4f8f 2644 if (TCG_TARGET_REG_BITS == 64) {
8d918718 2645#if !defined(_WIN64)
5d8a4f8f
RH
2646 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
2647 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
8d918718 2648#endif
5d8a4f8f
RH
2649 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
2650 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
2651 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
2652 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
2653 }
4ab50ccf 2654
c896fe29 2655 tcg_regset_clear(s->reserved_regs);
e83c80f7 2656 tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
c896fe29 2657}
813da627 2658
813da627 2659typedef struct {
e9a9a5b6 2660 DebugFrameHeader h;
497a22eb
RH
2661 uint8_t fde_def_cfa[4];
2662 uint8_t fde_reg_ofs[14];
813da627
RH
2663} DebugFrame;
2664
b5cc476d
RH
2665/* We're expecting a 2 byte uleb128 encoded value. */
2666QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2667
c170cb66
SW
2668#if !defined(__ELF__)
2669 /* Host machine without ELF. */
2670#elif TCG_TARGET_REG_BITS == 64
813da627 2671#define ELF_HOST_MACHINE EM_X86_64
e9a9a5b6
RH
2672static const DebugFrame debug_frame = {
2673 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2674 .h.cie.id = -1,
2675 .h.cie.version = 1,
2676 .h.cie.code_align = 1,
2677 .h.cie.data_align = 0x78, /* sleb128 -8 */
2678 .h.cie.return_column = 16,
813da627 2679
497a22eb 2680 /* Total FDE size does not include the "len" member. */
e9a9a5b6 2681 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
497a22eb
RH
2682
2683 .fde_def_cfa = {
813da627
RH
2684 12, 7, /* DW_CFA_def_cfa %rsp, ... */
2685 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2686 (FRAME_SIZE >> 7)
2687 },
497a22eb 2688 .fde_reg_ofs = {
813da627
RH
2689 0x90, 1, /* DW_CFA_offset, %rip, -8 */
2690 /* The following ordering must match tcg_target_callee_save_regs. */
2691 0x86, 2, /* DW_CFA_offset, %rbp, -16 */
2692 0x83, 3, /* DW_CFA_offset, %rbx, -24 */
2693 0x8c, 4, /* DW_CFA_offset, %r12, -32 */
2694 0x8d, 5, /* DW_CFA_offset, %r13, -40 */
2695 0x8e, 6, /* DW_CFA_offset, %r14, -48 */
2696 0x8f, 7, /* DW_CFA_offset, %r15, -56 */
2697 }
2698};
2699#else
2700#define ELF_HOST_MACHINE EM_386
e9a9a5b6
RH
2701static const DebugFrame debug_frame = {
2702 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2703 .h.cie.id = -1,
2704 .h.cie.version = 1,
2705 .h.cie.code_align = 1,
2706 .h.cie.data_align = 0x7c, /* sleb128 -4 */
2707 .h.cie.return_column = 8,
813da627 2708
497a22eb 2709 /* Total FDE size does not include the "len" member. */
e9a9a5b6 2710 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
497a22eb
RH
2711
2712 .fde_def_cfa = {
813da627
RH
2713 12, 4, /* DW_CFA_def_cfa %esp, ... */
2714 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2715 (FRAME_SIZE >> 7)
2716 },
497a22eb 2717 .fde_reg_ofs = {
813da627
RH
2718 0x88, 1, /* DW_CFA_offset, %eip, -4 */
2719 /* The following ordering must match tcg_target_callee_save_regs. */
2720 0x85, 2, /* DW_CFA_offset, %ebp, -8 */
2721 0x83, 3, /* DW_CFA_offset, %ebx, -12 */
2722 0x86, 4, /* DW_CFA_offset, %esi, -16 */
2723 0x87, 5, /* DW_CFA_offset, %edi, -20 */
2724 }
2725};
2726#endif
2727
c170cb66 2728#if defined(ELF_HOST_MACHINE)
813da627
RH
2729void tcg_register_jit(void *buf, size_t buf_size)
2730{
813da627
RH
2731 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2732}
c170cb66 2733#endif