]> git.proxmox.com Git - mirror_qemu.git/blame - tcg/aarch64/tcg-target.c.inc
fpu: Handle m68k extended precision denormals properly
[mirror_qemu.git] / tcg / aarch64 / tcg-target.c.inc
CommitLineData
4a136e0a
CF
1/*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
f85ab3d2 13#include "../tcg-ldst.c.inc"
139c1837 14#include "../tcg-pool.c.inc"
4a136e0a
CF
15#include "qemu/bitops.h"
16
7763ffa0
RH
17/* We're going to re-use TCGType in setting of the SF bit, which controls
18 the size of the operation performed. If we know the values match, it
19 makes things much cleaner. */
20QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
21
8d8fdbae 22#ifdef CONFIG_DEBUG_TCG
4a136e0a 23static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
14e4c1e2
RH
24 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
25 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
26 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
27 "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
28
29 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
30 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
31 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
32 "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
4a136e0a 33};
8d8fdbae 34#endif /* CONFIG_DEBUG_TCG */
4a136e0a
CF
35
36static const int tcg_target_reg_alloc_order[] = {
37 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
38 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
b76f21a7 39 TCG_REG_X28, /* we will reserve this for guest_base if configured */
4a136e0a 40
d82b78e4
RH
41 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
42 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
4a136e0a 43
4a136e0a
CF
44 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
45 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
46
da4d0d95
RH
47 /* X16 reserved as temporary */
48 /* X17 reserved as temporary */
d82b78e4
RH
49 /* X18 reserved by system */
50 /* X19 reserved for AREG0 */
51 /* X29 reserved as fp */
52 /* X30 reserved as temporary */
14e4c1e2
RH
53
54 TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
55 TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
56 /* V8 - V15 are call-saved, and skipped. */
57 TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
58 TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
59 TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
60 TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
4a136e0a
CF
61};
62
63static const int tcg_target_call_iarg_regs[8] = {
64 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
65 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
66};
5e3d0c19
RH
67
68static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
69{
70 tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
71 tcg_debug_assert(slot >= 0 && slot <= 1);
72 return TCG_REG_X0 + slot;
73}
4a136e0a 74
da4d0d95
RH
75#define TCG_REG_TMP0 TCG_REG_X16
76#define TCG_REG_TMP1 TCG_REG_X17
77#define TCG_REG_TMP2 TCG_REG_X30
d67bcbdd 78#define TCG_VEC_TMP0 TCG_REG_V31
4a136e0a 79
6a91c7c9 80#ifndef CONFIG_SOFTMMU
4cbea598 81#define TCG_REG_GUEST_BASE TCG_REG_X28
6a91c7c9
JK
82#endif
83
ffba3eb3 84static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
4a136e0a 85{
ffba3eb3
RH
86 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
87 ptrdiff_t offset = target - src_rx;
88
214bfe83
RH
89 if (offset == sextract64(offset, 0, 26)) {
90 /* read instruction, mask away previous PC_REL26 parameter contents,
91 set the proper offset, then write back the instruction. */
ffba3eb3 92 *src_rw = deposit32(*src_rw, 0, 26, offset);
214bfe83
RH
93 return true;
94 }
95 return false;
4a136e0a
CF
96}
97
ffba3eb3 98static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
4a136e0a 99{
ffba3eb3
RH
100 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
101 ptrdiff_t offset = target - src_rx;
102
214bfe83 103 if (offset == sextract64(offset, 0, 19)) {
ffba3eb3 104 *src_rw = deposit32(*src_rw, 5, 19, offset);
214bfe83
RH
105 return true;
106 }
107 return false;
4a136e0a
CF
108}
109
ffba3eb3
RH
110static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
111 intptr_t value, intptr_t addend)
4a136e0a 112{
eabb7b91 113 tcg_debug_assert(addend == 0);
4a136e0a
CF
114 switch (type) {
115 case R_AARCH64_JUMP26:
116 case R_AARCH64_CALL26:
ffba3eb3 117 return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
4a136e0a 118 case R_AARCH64_CONDBR19:
ffba3eb3 119 return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
4a136e0a 120 default:
214bfe83 121 g_assert_not_reached();
4a136e0a
CF
122 }
123}
124
170bf931
RH
125#define TCG_CT_CONST_AIMM 0x100
126#define TCG_CT_CONST_LIMM 0x200
127#define TCG_CT_CONST_ZERO 0x400
128#define TCG_CT_CONST_MONE 0x800
9e27f58b
RH
129#define TCG_CT_CONST_ORRI 0x1000
130#define TCG_CT_CONST_ANDI 0x2000
90f1cd91 131
abc730e1
RH
132#define ALL_GENERAL_REGS 0xffffffffu
133#define ALL_VECTOR_REGS 0xffffffff00000000ull
134
14e4c1e2 135/* Match a constant valid for addition (12-bit, optionally shifted). */
90f1cd91
RH
136static inline bool is_aimm(uint64_t val)
137{
138 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
139}
140
14e4c1e2 141/* Match a constant valid for logical operations. */
e029f293
RH
142static inline bool is_limm(uint64_t val)
143{
144 /* Taking a simplified view of the logical immediates for now, ignoring
145 the replication that can happen across the field. Match bit patterns
146 of the forms
147 0....01....1
148 0..01..10..0
149 and their inverses. */
150
151 /* Make things easier below, by testing the form with msb clear. */
152 if ((int64_t)val < 0) {
153 val = ~val;
154 }
155 if (val == 0) {
156 return false;
157 }
158 val += val & -val;
159 return (val & (val - 1)) == 0;
160}
161
984fdcee
RH
162/* Return true if v16 is a valid 16-bit shifted immediate. */
163static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
14e4c1e2 164{
984fdcee
RH
165 if (v16 == (v16 & 0xff)) {
166 *cmode = 0x8;
167 *imm8 = v16 & 0xff;
168 return true;
169 } else if (v16 == (v16 & 0xff00)) {
170 *cmode = 0xa;
171 *imm8 = v16 >> 8;
172 return true;
173 }
174 return false;
175}
14e4c1e2 176
984fdcee
RH
177/* Return true if v32 is a valid 32-bit shifted immediate. */
178static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
179{
180 if (v32 == (v32 & 0xff)) {
181 *cmode = 0x0;
182 *imm8 = v32 & 0xff;
183 return true;
184 } else if (v32 == (v32 & 0xff00)) {
185 *cmode = 0x2;
186 *imm8 = (v32 >> 8) & 0xff;
187 return true;
188 } else if (v32 == (v32 & 0xff0000)) {
189 *cmode = 0x4;
190 *imm8 = (v32 >> 16) & 0xff;
191 return true;
192 } else if (v32 == (v32 & 0xff000000)) {
193 *cmode = 0x6;
194 *imm8 = v32 >> 24;
14e4c1e2
RH
195 return true;
196 }
984fdcee
RH
197 return false;
198}
14e4c1e2 199
984fdcee
RH
200/* Return true if v32 is a valid 32-bit shifting ones immediate. */
201static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
202{
203 if ((v32 & 0xffff00ff) == 0xff) {
204 *cmode = 0xc;
205 *imm8 = (v32 >> 8) & 0xff;
206 return true;
207 } else if ((v32 & 0xff00ffff) == 0xffff) {
208 *cmode = 0xd;
209 *imm8 = (v32 >> 16) & 0xff;
210 return true;
14e4c1e2 211 }
984fdcee
RH
212 return false;
213}
14e4c1e2 214
984fdcee
RH
215/* Return true if v32 is a valid float32 immediate. */
216static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
217{
218 if (extract32(v32, 0, 19) == 0
219 && (extract32(v32, 25, 6) == 0x20
220 || extract32(v32, 25, 6) == 0x1f)) {
221 *cmode = 0xf;
222 *imm8 = (extract32(v32, 31, 1) << 7)
223 | (extract32(v32, 25, 1) << 6)
224 | extract32(v32, 19, 6);
225 return true;
14e4c1e2 226 }
984fdcee
RH
227 return false;
228}
229
230/* Return true if v64 is a valid float64 immediate. */
231static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
232{
14e4c1e2
RH
233 if (extract64(v64, 0, 48) == 0
234 && (extract64(v64, 54, 9) == 0x100
235 || extract64(v64, 54, 9) == 0x0ff)) {
236 *cmode = 0xf;
14e4c1e2
RH
237 *imm8 = (extract64(v64, 63, 1) << 7)
238 | (extract64(v64, 54, 1) << 6)
239 | extract64(v64, 48, 6);
240 return true;
241 }
14e4c1e2
RH
242 return false;
243}
244
02f3a5b4
RH
245/*
246 * Return non-zero if v32 can be formed by MOVI+ORR.
247 * Place the parameters for MOVI in (cmode, imm8).
248 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
249 */
250static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
251{
252 int i;
253
254 for (i = 6; i > 0; i -= 2) {
255 /* Mask out one byte we can add with ORR. */
256 uint32_t tmp = v32 & ~(0xffu << (i * 4));
257 if (is_shimm32(tmp, cmode, imm8) ||
258 is_soimm32(tmp, cmode, imm8)) {
259 break;
260 }
261 }
262 return i;
263}
264
9e27f58b
RH
265/* Return true if V is a valid 16-bit or 32-bit shifted immediate. */
266static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
267{
268 if (v32 == deposit32(v32, 16, 16, v32)) {
269 return is_shimm16(v32, cmode, imm8);
270 } else {
271 return is_shimm32(v32, cmode, imm8);
272 }
273}
274
ebe92db2 275static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
4a136e0a 276{
4a136e0a
CF
277 if (ct & TCG_CT_CONST) {
278 return 1;
279 }
170bf931 280 if (type == TCG_TYPE_I32) {
90f1cd91
RH
281 val = (int32_t)val;
282 }
283 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
284 return 1;
285 }
e029f293
RH
286 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
287 return 1;
288 }
04ce397b
RH
289 if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
290 return 1;
291 }
c6e929e7
RH
292 if ((ct & TCG_CT_CONST_MONE) && val == -1) {
293 return 1;
294 }
4a136e0a 295
9e27f58b
RH
296 switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
297 case 0:
298 break;
299 case TCG_CT_CONST_ANDI:
300 val = ~val;
301 /* fallthru */
302 case TCG_CT_CONST_ORRI:
303 if (val == deposit64(val, 32, 32, val)) {
304 int cmode, imm8;
305 return is_shimm1632(val, &cmode, &imm8);
306 }
307 break;
308 default:
309 /* Both bits should not be set for the same insn. */
310 g_assert_not_reached();
311 }
312
4a136e0a
CF
313 return 0;
314}
315
316enum aarch64_cond_code {
317 COND_EQ = 0x0,
318 COND_NE = 0x1,
319 COND_CS = 0x2, /* Unsigned greater or equal */
320 COND_HS = COND_CS, /* ALIAS greater or equal */
321 COND_CC = 0x3, /* Unsigned less than */
322 COND_LO = COND_CC, /* ALIAS Lower */
323 COND_MI = 0x4, /* Negative */
324 COND_PL = 0x5, /* Zero or greater */
325 COND_VS = 0x6, /* Overflow */
326 COND_VC = 0x7, /* No overflow */
327 COND_HI = 0x8, /* Unsigned greater than */
328 COND_LS = 0x9, /* Unsigned less or equal */
329 COND_GE = 0xa,
330 COND_LT = 0xb,
331 COND_GT = 0xc,
332 COND_LE = 0xd,
333 COND_AL = 0xe,
334 COND_NV = 0xf, /* behaves like COND_AL here */
335};
336
337static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
338 [TCG_COND_EQ] = COND_EQ,
339 [TCG_COND_NE] = COND_NE,
340 [TCG_COND_LT] = COND_LT,
341 [TCG_COND_GE] = COND_GE,
342 [TCG_COND_LE] = COND_LE,
343 [TCG_COND_GT] = COND_GT,
344 /* unsigned */
345 [TCG_COND_LTU] = COND_LO,
346 [TCG_COND_GTU] = COND_HI,
347 [TCG_COND_GEU] = COND_HS,
348 [TCG_COND_LEU] = COND_LS,
349};
350
3d4299f4
RH
351typedef enum {
352 LDST_ST = 0, /* store */
353 LDST_LD = 1, /* load */
354 LDST_LD_S_X = 2, /* load and sign-extend into Xt */
355 LDST_LD_S_W = 3, /* load and sign-extend into Wt */
356} AArch64LdstType;
4a136e0a 357
50573c66
RH
358/* We encode the format of the insn into the beginning of the name, so that
359 we can have the preprocessor help "typecheck" the insn vs the output
360 function. Arm didn't provide us with nice names for the formats, so we
361 use the section number of the architecture reference manual in which the
362 instruction group is described. */
363typedef enum {
3d9e69a2
RH
364 /* Compare and branch (immediate). */
365 I3201_CBZ = 0x34000000,
366 I3201_CBNZ = 0x35000000,
367
81d8a5ee
RH
368 /* Conditional branch (immediate). */
369 I3202_B_C = 0x54000000,
370
371 /* Unconditional branch (immediate). */
372 I3206_B = 0x14000000,
373 I3206_BL = 0x94000000,
374
375 /* Unconditional branch (register). */
376 I3207_BR = 0xd61f0000,
377 I3207_BLR = 0xd63f0000,
378 I3207_RET = 0xd65f0000,
379
f23e5e15
RH
380 /* AdvSIMD load/store single structure. */
381 I3303_LD1R = 0x0d40c000,
382
2acee8b2
PK
383 /* Load literal for loading the address at pc-relative offset */
384 I3305_LDR = 0x58000000,
14e4c1e2
RH
385 I3305_LDR_v64 = 0x5c000000,
386 I3305_LDR_v128 = 0x9c000000,
387
929124ec
RH
388 /* Load/store exclusive. */
389 I3306_LDXP = 0xc8600000,
390 I3306_STXP = 0xc8200000,
391
3d4299f4
RH
392 /* Load/store register. Described here as 3.3.12, but the helper
393 that emits them can transform to 3.3.10 or 3.3.13. */
394 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
395 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
396 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
397 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
398
399 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
400 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
401 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
402 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
403
404 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
405 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
406
407 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
408 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
409 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
410
14e4c1e2
RH
411 I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
412 I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
413
414 I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
415 I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
416
417 I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30,
418 I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30,
419
6c0f0c0f 420 I3312_TO_I3310 = 0x00200800,
3d4299f4
RH
421 I3312_TO_I3313 = 0x01000000,
422
95f72aa9
RH
423 /* Load/store register pair instructions. */
424 I3314_LDP = 0x28400000,
425 I3314_STP = 0x28000000,
426
096c46c0
RH
427 /* Add/subtract immediate instructions. */
428 I3401_ADDI = 0x11000000,
429 I3401_ADDSI = 0x31000000,
430 I3401_SUBI = 0x51000000,
431 I3401_SUBSI = 0x71000000,
432
b3c56df7
RH
433 /* Bitfield instructions. */
434 I3402_BFM = 0x33000000,
435 I3402_SBFM = 0x13000000,
436 I3402_UBFM = 0x53000000,
437
438 /* Extract instruction. */
439 I3403_EXTR = 0x13800000,
440
e029f293
RH
441 /* Logical immediate instructions. */
442 I3404_ANDI = 0x12000000,
443 I3404_ORRI = 0x32000000,
444 I3404_EORI = 0x52000000,
f85ab3d2 445 I3404_ANDSI = 0x72000000,
e029f293 446
582ab779
RH
447 /* Move wide immediate instructions. */
448 I3405_MOVN = 0x12800000,
449 I3405_MOVZ = 0x52800000,
450 I3405_MOVK = 0x72800000,
451
c6e310d9
RH
452 /* PC relative addressing instructions. */
453 I3406_ADR = 0x10000000,
454 I3406_ADRP = 0x90000000,
455
929124ec
RH
456 /* Add/subtract extended register instructions. */
457 I3501_ADD = 0x0b200000,
458
50573c66
RH
459 /* Add/subtract shifted register instructions (without a shift). */
460 I3502_ADD = 0x0b000000,
461 I3502_ADDS = 0x2b000000,
462 I3502_SUB = 0x4b000000,
463 I3502_SUBS = 0x6b000000,
464
465 /* Add/subtract shifted register instructions (with a shift). */
466 I3502S_ADD_LSL = I3502_ADD,
467
c6e929e7
RH
468 /* Add/subtract with carry instructions. */
469 I3503_ADC = 0x1a000000,
470 I3503_SBC = 0x5a000000,
471
04ce397b
RH
472 /* Conditional select instructions. */
473 I3506_CSEL = 0x1a800000,
474 I3506_CSINC = 0x1a800400,
53c76c19
RH
475 I3506_CSINV = 0x5a800000,
476 I3506_CSNEG = 0x5a800400,
04ce397b 477
edd8824c 478 /* Data-processing (1 source) instructions. */
53c76c19
RH
479 I3507_CLZ = 0x5ac01000,
480 I3507_RBIT = 0x5ac00000,
dfa24dfa 481 I3507_REV = 0x5ac00000, /* + size << 10 */
edd8824c 482
df9351e3
RH
483 /* Data-processing (2 source) instructions. */
484 I3508_LSLV = 0x1ac02000,
485 I3508_LSRV = 0x1ac02400,
486 I3508_ASRV = 0x1ac02800,
487 I3508_RORV = 0x1ac02c00,
1fcc9ddf
RH
488 I3508_SMULH = 0x9b407c00,
489 I3508_UMULH = 0x9bc07c00,
8678b71c
RH
490 I3508_UDIV = 0x1ac00800,
491 I3508_SDIV = 0x1ac00c00,
492
493 /* Data-processing (3 source) instructions. */
494 I3509_MADD = 0x1b000000,
495 I3509_MSUB = 0x1b008000,
df9351e3 496
50573c66
RH
497 /* Logical shifted register instructions (without a shift). */
498 I3510_AND = 0x0a000000,
14b155dd 499 I3510_BIC = 0x0a200000,
50573c66 500 I3510_ORR = 0x2a000000,
14b155dd 501 I3510_ORN = 0x2a200000,
50573c66 502 I3510_EOR = 0x4a000000,
14b155dd 503 I3510_EON = 0x4a200000,
50573c66 504 I3510_ANDS = 0x6a000000,
c7a59c2a 505
f7bcd966
RH
506 /* Logical shifted register instructions (with a shift). */
507 I3502S_AND_LSR = I3510_AND | (1 << 22),
508
14e4c1e2
RH
509 /* AdvSIMD copy */
510 I3605_DUP = 0x0e000400,
511 I3605_INS = 0x4e001c00,
512 I3605_UMOV = 0x0e003c00,
513
514 /* AdvSIMD modified immediate */
515 I3606_MOVI = 0x0f000400,
7e308e00 516 I3606_MVNI = 0x2f000400,
02f3a5b4
RH
517 I3606_BIC = 0x2f001400,
518 I3606_ORR = 0x0f001400,
14e4c1e2 519
d81bad24
RH
520 /* AdvSIMD scalar shift by immediate */
521 I3609_SSHR = 0x5f000400,
522 I3609_SSRA = 0x5f001400,
523 I3609_SHL = 0x5f005400,
524 I3609_USHR = 0x7f000400,
525 I3609_USRA = 0x7f001400,
526 I3609_SLI = 0x7f005400,
527
528 /* AdvSIMD scalar three same */
529 I3611_SQADD = 0x5e200c00,
530 I3611_SQSUB = 0x5e202c00,
531 I3611_CMGT = 0x5e203400,
532 I3611_CMGE = 0x5e203c00,
533 I3611_SSHL = 0x5e204400,
534 I3611_ADD = 0x5e208400,
535 I3611_CMTST = 0x5e208c00,
536 I3611_UQADD = 0x7e200c00,
537 I3611_UQSUB = 0x7e202c00,
538 I3611_CMHI = 0x7e203400,
539 I3611_CMHS = 0x7e203c00,
540 I3611_USHL = 0x7e204400,
541 I3611_SUB = 0x7e208400,
542 I3611_CMEQ = 0x7e208c00,
543
544 /* AdvSIMD scalar two-reg misc */
545 I3612_CMGT0 = 0x5e208800,
546 I3612_CMEQ0 = 0x5e209800,
547 I3612_CMLT0 = 0x5e20a800,
548 I3612_ABS = 0x5e20b800,
549 I3612_CMGE0 = 0x7e208800,
550 I3612_CMLE0 = 0x7e209800,
551 I3612_NEG = 0x7e20b800,
552
14e4c1e2
RH
553 /* AdvSIMD shift by immediate */
554 I3614_SSHR = 0x0f000400,
555 I3614_SSRA = 0x0f001400,
556 I3614_SHL = 0x0f005400,
7cff8988 557 I3614_SLI = 0x2f005400,
14e4c1e2
RH
558 I3614_USHR = 0x2f000400,
559 I3614_USRA = 0x2f001400,
560
561 /* AdvSIMD three same. */
562 I3616_ADD = 0x0e208400,
563 I3616_AND = 0x0e201c00,
564 I3616_BIC = 0x0e601c00,
a9e434a5
RH
565 I3616_BIF = 0x2ee01c00,
566 I3616_BIT = 0x2ea01c00,
567 I3616_BSL = 0x2e601c00,
14e4c1e2
RH
568 I3616_EOR = 0x2e201c00,
569 I3616_MUL = 0x0e209c00,
570 I3616_ORR = 0x0ea01c00,
571 I3616_ORN = 0x0ee01c00,
572 I3616_SUB = 0x2e208400,
573 I3616_CMGT = 0x0e203400,
574 I3616_CMGE = 0x0e203c00,
575 I3616_CMTST = 0x0e208c00,
576 I3616_CMHI = 0x2e203400,
577 I3616_CMHS = 0x2e203c00,
578 I3616_CMEQ = 0x2e208c00,
93f332a5
RH
579 I3616_SMAX = 0x0e206400,
580 I3616_SMIN = 0x0e206c00,
79525dfd 581 I3616_SSHL = 0x0e204400,
d32648d4
RH
582 I3616_SQADD = 0x0e200c00,
583 I3616_SQSUB = 0x0e202c00,
93f332a5
RH
584 I3616_UMAX = 0x2e206400,
585 I3616_UMIN = 0x2e206c00,
d32648d4
RH
586 I3616_UQADD = 0x2e200c00,
587 I3616_UQSUB = 0x2e202c00,
79525dfd 588 I3616_USHL = 0x2e204400,
14e4c1e2
RH
589
590 /* AdvSIMD two-reg misc. */
591 I3617_CMGT0 = 0x0e208800,
592 I3617_CMEQ0 = 0x0e209800,
593 I3617_CMLT0 = 0x0e20a800,
594 I3617_CMGE0 = 0x2e208800,
6c2c7772 595 I3617_CMLE0 = 0x2e209800,
14e4c1e2 596 I3617_NOT = 0x2e205800,
a456394a 597 I3617_ABS = 0x0e20b800,
14e4c1e2
RH
598 I3617_NEG = 0x2e20b800,
599
c7a59c2a 600 /* System instructions. */
14e4c1e2 601 NOP = 0xd503201f,
c7a59c2a
PK
602 DMB_ISH = 0xd50338bf,
603 DMB_LD = 0x00000100,
604 DMB_ST = 0x00000200,
50573c66 605} AArch64Insn;
4a136e0a 606
4a136e0a
CF
607static inline uint32_t tcg_in32(TCGContext *s)
608{
609 uint32_t v = *(uint32_t *)s->code_ptr;
610 return v;
611}
612
50573c66
RH
613/* Emit an opcode with "type-checking" of the format. */
614#define tcg_out_insn(S, FMT, OP, ...) \
615 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
616
f23e5e15
RH
617static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
618 TCGReg rt, TCGReg rn, unsigned size)
619{
620 tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
621}
622
623static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
624 int imm19, TCGReg rt)
2acee8b2
PK
625{
626 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
627}
628
929124ec
RH
629static void tcg_out_insn_3306(TCGContext *s, AArch64Insn insn, TCGReg rs,
630 TCGReg rt, TCGReg rt2, TCGReg rn)
631{
632 tcg_out32(s, insn | rs << 16 | rt2 << 10 | rn << 5 | rt);
633}
634
3d9e69a2
RH
635static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
636 TCGReg rt, int imm19)
637{
638 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
639}
640
81d8a5ee
RH
641static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
642 TCGCond c, int imm19)
643{
644 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
645}
646
647static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
648{
649 tcg_out32(s, insn | (imm26 & 0x03ffffff));
650}
651
652static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
653{
654 tcg_out32(s, insn | rn << 5);
655}
656
95f72aa9
RH
657static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
658 TCGReg r1, TCGReg r2, TCGReg rn,
659 tcg_target_long ofs, bool pre, bool w)
660{
661 insn |= 1u << 31; /* ext */
662 insn |= pre << 24;
663 insn |= w << 23;
664
eabb7b91 665 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
95f72aa9
RH
666 insn |= (ofs & (0x7f << 3)) << (15 - 3);
667
668 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
669}
670
096c46c0
RH
671static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
672 TCGReg rd, TCGReg rn, uint64_t aimm)
673{
674 if (aimm > 0xfff) {
eabb7b91 675 tcg_debug_assert((aimm & 0xfff) == 0);
096c46c0 676 aimm >>= 12;
eabb7b91 677 tcg_debug_assert(aimm <= 0xfff);
096c46c0
RH
678 aimm |= 1 << 12; /* apply LSL 12 */
679 }
680 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
681}
682
e029f293
RH
683/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
684 (Logical immediate). Both insn groups have N, IMMR and IMMS fields
685 that feed the DecodeBitMasks pseudo function. */
686static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
687 TCGReg rd, TCGReg rn, int n, int immr, int imms)
688{
689 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
690 | rn << 5 | rd);
691}
692
693#define tcg_out_insn_3404 tcg_out_insn_3402
694
b3c56df7
RH
695static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
696 TCGReg rd, TCGReg rn, TCGReg rm, int imms)
697{
698 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
699 | rn << 5 | rd);
700}
701
582ab779
RH
702/* This function is used for the Move (wide immediate) instruction group.
703 Note that SHIFT is a full shift count, not the 2 bit HW field. */
704static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
705 TCGReg rd, uint16_t half, unsigned shift)
706{
eabb7b91 707 tcg_debug_assert((shift & ~0x30) == 0);
582ab779
RH
708 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
709}
710
c6e310d9
RH
711static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
712 TCGReg rd, int64_t disp)
713{
714 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
715}
716
929124ec
RH
717static inline void tcg_out_insn_3501(TCGContext *s, AArch64Insn insn,
718 TCGType sf, TCGReg rd, TCGReg rn,
719 TCGReg rm, int opt, int imm3)
720{
721 tcg_out32(s, insn | sf << 31 | rm << 16 | opt << 13 |
722 imm3 << 10 | rn << 5 | rd);
723}
724
50573c66
RH
725/* This function is for both 3.5.2 (Add/Subtract shifted register), for
726 the rare occasion when we actually want to supply a shift amount. */
727static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
728 TCGType ext, TCGReg rd, TCGReg rn,
729 TCGReg rm, int imm6)
730{
731 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
732}
733
734/* This function is for 3.5.2 (Add/subtract shifted register),
735 and 3.5.10 (Logical shifted register), for the vast majorty of cases
736 when we don't want to apply a shift. Thus it can also be used for
737 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */
738static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
739 TCGReg rd, TCGReg rn, TCGReg rm)
740{
741 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
742}
743
744#define tcg_out_insn_3503 tcg_out_insn_3502
745#define tcg_out_insn_3508 tcg_out_insn_3502
746#define tcg_out_insn_3510 tcg_out_insn_3502
747
04ce397b
RH
748static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
749 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
750{
751 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
752 | tcg_cond_to_aarch64[c] << 12);
753}
754
edd8824c
RH
755static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
756 TCGReg rd, TCGReg rn)
757{
758 tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
759}
760
8678b71c
RH
761static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
762 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
763{
764 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
765}
766
14e4c1e2
RH
767static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
768 TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
769{
770 /* Note that bit 11 set means general register input. Therefore
771 we can handle both register sets with one function. */
772 tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
773 | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
774}
775
776static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
777 TCGReg rd, bool op, int cmode, uint8_t imm8)
778{
779 tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
780 | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
781}
782
d81bad24
RH
783static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn,
784 TCGReg rd, TCGReg rn, unsigned immhb)
785{
786 tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f));
787}
788
789static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn,
790 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
791{
792 tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16
793 | (rn & 0x1f) << 5 | (rd & 0x1f));
794}
795
796static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn,
797 unsigned size, TCGReg rd, TCGReg rn)
798{
799 tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f));
800}
801
14e4c1e2
RH
802static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
803 TCGReg rd, TCGReg rn, unsigned immhb)
804{
805 tcg_out32(s, insn | q << 30 | immhb << 16
806 | (rn & 0x1f) << 5 | (rd & 0x1f));
807}
808
809static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
810 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
811{
812 tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
813 | (rn & 0x1f) << 5 | (rd & 0x1f));
814}
815
816static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
817 unsigned size, TCGReg rd, TCGReg rn)
818{
819 tcg_out32(s, insn | q << 30 | (size << 22)
820 | (rn & 0x1f) << 5 | (rd & 0x1f));
821}
822
3d4299f4 823static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
6c0f0c0f
PB
824 TCGReg rd, TCGReg base, TCGType ext,
825 TCGReg regoff)
3d4299f4
RH
826{
827 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
6c0f0c0f 828 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
14e4c1e2 829 0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
3d4299f4 830}
50573c66 831
3d4299f4
RH
832static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
833 TCGReg rd, TCGReg rn, intptr_t offset)
4a136e0a 834{
14e4c1e2 835 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
4a136e0a
CF
836}
837
3d4299f4
RH
838static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
839 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
b1f6dc0d 840{
3d4299f4 841 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
14e4c1e2
RH
842 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
843 | rn << 5 | (rd & 0x1f));
b1f6dc0d
CF
844}
845
7d11fc7c
RH
846/* Register to register move using ORR (shifted register with no shift). */
847static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
4a136e0a 848{
7d11fc7c
RH
849 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
850}
851
852/* Register to register move using ADDI (move to/from SP). */
853static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
854{
855 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
4a136e0a
CF
856}
857
4ec4f0bd
RH
858/* This function is used for the Logical (immediate) instruction group.
859 The value of LIMM must satisfy IS_LIMM. See the comment above about
860 only supporting simplified logical immediates. */
861static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
862 TCGReg rd, TCGReg rn, uint64_t limm)
863{
864 unsigned h, l, r, c;
865
eabb7b91 866 tcg_debug_assert(is_limm(limm));
4ec4f0bd
RH
867
868 h = clz64(limm);
869 l = ctz64(limm);
870 if (l == 0) {
871 r = 0; /* form 0....01....1 */
872 c = ctz64(~limm) - 1;
873 if (h == 0) {
874 r = clz64(~limm); /* form 1..10..01..1 */
875 c += r;
876 }
877 } else {
878 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */
879 c = r - h - 1;
880 }
881 if (ext == TCG_TYPE_I32) {
882 r &= 31;
883 c &= 31;
884 }
885
886 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
887}
888
4e186175
RH
889static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
890 TCGReg rd, int64_t v64)
14e4c1e2 891{
984fdcee
RH
892 bool q = type == TCG_TYPE_V128;
893 int cmode, imm8, i;
894
895 /* Test all bytes equal first. */
4e186175 896 if (vece == MO_8) {
984fdcee
RH
897 imm8 = (uint8_t)v64;
898 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
899 return;
900 }
901
902 /*
903 * Test all bytes 0x00 or 0xff second. This can match cases that
904 * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
905 */
906 for (i = imm8 = 0; i < 8; i++) {
907 uint8_t byte = v64 >> (i * 8);
908 if (byte == 0xff) {
909 imm8 |= 1 << i;
910 } else if (byte != 0) {
911 goto fail_bytes;
912 }
913 }
914 tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
915 return;
916 fail_bytes:
917
918 /*
919 * Tests for various replications. For each element width, if we
920 * cannot find an expansion there's no point checking a larger
921 * width because we already know by replication it cannot match.
922 */
4e186175 923 if (vece == MO_16) {
984fdcee
RH
924 uint16_t v16 = v64;
925
926 if (is_shimm16(v16, &cmode, &imm8)) {
927 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
928 return;
929 }
7e308e00
RH
930 if (is_shimm16(~v16, &cmode, &imm8)) {
931 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
932 return;
933 }
02f3a5b4
RH
934
935 /*
936 * Otherwise, all remaining constants can be loaded in two insns:
937 * rd = v16 & 0xff, rd |= v16 & 0xff00.
938 */
939 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
940 tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
941 return;
4e186175 942 } else if (vece == MO_32) {
984fdcee 943 uint32_t v32 = v64;
7e308e00 944 uint32_t n32 = ~v32;
984fdcee
RH
945
946 if (is_shimm32(v32, &cmode, &imm8) ||
947 is_soimm32(v32, &cmode, &imm8) ||
948 is_fimm32(v32, &cmode, &imm8)) {
949 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
950 return;
951 }
7e308e00
RH
952 if (is_shimm32(n32, &cmode, &imm8) ||
953 is_soimm32(n32, &cmode, &imm8)) {
954 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
955 return;
956 }
02f3a5b4
RH
957
958 /*
959 * Restrict the set of constants to those we can load with
960 * two instructions. Others we load from the pool.
961 */
962 i = is_shimm32_pair(v32, &cmode, &imm8);
963 if (i) {
964 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
965 tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
966 return;
967 }
968 i = is_shimm32_pair(n32, &cmode, &imm8);
969 if (i) {
970 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
971 tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
972 return;
973 }
984fdcee
RH
974 } else if (is_fimm64(v64, &cmode, &imm8)) {
975 tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
976 return;
977 }
14e4c1e2 978
984fdcee
RH
979 /*
980 * As a last resort, load from the constant pool. Sadly there
981 * is no LD1R (literal), so store the full 16-byte vector.
982 */
983 if (type == TCG_TYPE_V128) {
14e4c1e2
RH
984 new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
985 tcg_out_insn(s, 3305, LDR_v128, 0, rd);
986 } else {
987 new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
988 tcg_out_insn(s, 3305, LDR_v64, 0, rd);
989 }
990}
991
e7632cfa
RH
992static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
993 TCGReg rd, TCGReg rs)
994{
995 int is_q = type - TCG_TYPE_V64;
996 tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
997 return true;
998}
999
d6ecb4a9
RH
1000static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
1001 TCGReg r, TCGReg base, intptr_t offset)
1002{
d67bcbdd 1003 TCGReg temp = TCG_REG_TMP0;
f23e5e15
RH
1004
1005 if (offset < -0xffffff || offset > 0xffffff) {
1006 tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
1007 tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
1008 base = temp;
1009 } else {
1010 AArch64Insn add_insn = I3401_ADDI;
1011
1012 if (offset < 0) {
1013 add_insn = I3401_SUBI;
1014 offset = -offset;
1015 }
1016 if (offset & 0xfff000) {
1017 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
1018 base = temp;
1019 }
1020 if (offset & 0xfff) {
1021 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
1022 base = temp;
1023 }
1024 }
1025 tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
1026 return true;
d6ecb4a9
RH
1027}
1028
582ab779
RH
1029static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
1030 tcg_target_long value)
4a136e0a 1031{
dfeb5fe7
RH
1032 tcg_target_long svalue = value;
1033 tcg_target_long ivalue = ~value;
55129955
RH
1034 tcg_target_long t0, t1, t2;
1035 int s0, s1;
1036 AArch64Insn opc;
dfeb5fe7 1037
14e4c1e2
RH
1038 switch (type) {
1039 case TCG_TYPE_I32:
1040 case TCG_TYPE_I64:
1041 tcg_debug_assert(rd < 32);
1042 break;
14e4c1e2
RH
1043 default:
1044 g_assert_not_reached();
1045 }
1046
dfeb5fe7
RH
1047 /* For 32-bit values, discard potential garbage in value. For 64-bit
1048 values within [2**31, 2**32-1], we can create smaller sequences by
1049 interpreting this as a negative 32-bit number, while ensuring that
1050 the high 32 bits are cleared by setting SF=0. */
1051 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1052 svalue = (int32_t)value;
582ab779 1053 value = (uint32_t)value;
dfeb5fe7
RH
1054 ivalue = (uint32_t)ivalue;
1055 type = TCG_TYPE_I32;
1056 }
1057
d8918df5
RH
1058 /* Speed things up by handling the common case of small positive
1059 and negative values specially. */
1060 if ((value & ~0xffffull) == 0) {
1061 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1062 return;
1063 } else if ((ivalue & ~0xffffull) == 0) {
1064 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1065 return;
1066 }
1067
4ec4f0bd
RH
1068 /* Check for bitfield immediates. For the benefit of 32-bit quantities,
1069 use the sign-extended value. That lets us match rotated values such
1070 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1071 if (is_limm(svalue)) {
1072 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1073 return;
1074 }
1075
c6e310d9
RH
1076 /* Look for host pointer values within 4G of the PC. This happens
1077 often when loading pointers to QEMU's own data structures. */
1078 if (type == TCG_TYPE_I64) {
ffba3eb3
RH
1079 intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
1080 tcg_target_long disp = value - src_rx;
cc74d332
RH
1081 if (disp == sextract64(disp, 0, 21)) {
1082 tcg_out_insn(s, 3406, ADR, rd, disp);
1083 return;
1084 }
ffba3eb3 1085 disp = (value >> 12) - (src_rx >> 12);
c6e310d9
RH
1086 if (disp == sextract64(disp, 0, 21)) {
1087 tcg_out_insn(s, 3406, ADRP, rd, disp);
1088 if (value & 0xfff) {
1089 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1090 }
1091 return;
1092 }
1093 }
1094
55129955
RH
1095 /* Would it take fewer insns to begin with MOVN? */
1096 if (ctpop64(value) >= 32) {
1097 t0 = ivalue;
1098 opc = I3405_MOVN;
8cf9a3d3 1099 } else {
55129955
RH
1100 t0 = value;
1101 opc = I3405_MOVZ;
1102 }
1103 s0 = ctz64(t0) & (63 & -16);
7ceee3a1 1104 t1 = t0 & ~(0xffffull << s0);
55129955 1105 s1 = ctz64(t1) & (63 & -16);
7ceee3a1 1106 t2 = t1 & ~(0xffffull << s1);
55129955
RH
1107 if (t2 == 0) {
1108 tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1109 if (t1 != 0) {
1110 tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
8cf9a3d3 1111 }
55129955 1112 return;
dfeb5fe7 1113 }
55129955
RH
1114
1115 /* For more than 2 insns, dump it into the constant pool. */
1116 new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1117 tcg_out_insn(s, 3305, LDR, 0, rd);
4a136e0a
CF
1118}
1119
767c2503
RH
1120static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
1121{
1122 return false;
1123}
1124
6a6d772e
RH
1125static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1126 tcg_target_long imm)
1127{
1128 /* This function is only used for passing structs by reference. */
1129 g_assert_not_reached();
1130}
1131
3d4299f4
RH
1132/* Define something more legible for general use. */
1133#define tcg_out_ldst_r tcg_out_insn_3310
4a136e0a 1134
14e4c1e2
RH
1135static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1136 TCGReg rn, intptr_t offset, int lgsize)
4a136e0a 1137{
3d4299f4
RH
1138 /* If the offset is naturally aligned and in range, then we can
1139 use the scaled uimm12 encoding */
14e4c1e2
RH
1140 if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1141 uintptr_t scaled_uimm = offset >> lgsize;
3d4299f4
RH
1142 if (scaled_uimm <= 0xfff) {
1143 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1144 return;
b1f6dc0d
CF
1145 }
1146 }
1147
a056c9fa
RH
1148 /* Small signed offsets can use the unscaled encoding. */
1149 if (offset >= -256 && offset < 256) {
1150 tcg_out_insn_3312(s, insn, rd, rn, offset);
1151 return;
1152 }
1153
3d4299f4 1154 /* Worst-case scenario, move offset to temp register, use reg offset. */
d67bcbdd
RH
1155 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, offset);
1156 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP0);
4a136e0a
CF
1157}
1158
78113e83 1159static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
4a136e0a 1160{
14e4c1e2 1161 if (ret == arg) {
78113e83 1162 return true;
14e4c1e2
RH
1163 }
1164 switch (type) {
1165 case TCG_TYPE_I32:
1166 case TCG_TYPE_I64:
1167 if (ret < 32 && arg < 32) {
1168 tcg_out_movr(s, type, ret, arg);
1169 break;
1170 } else if (ret < 32) {
1171 tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1172 break;
1173 } else if (arg < 32) {
1174 tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1175 break;
1176 }
1177 /* FALLTHRU */
1178
1179 case TCG_TYPE_V64:
1180 tcg_debug_assert(ret >= 32 && arg >= 32);
1181 tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1182 break;
1183 case TCG_TYPE_V128:
1184 tcg_debug_assert(ret >= 32 && arg >= 32);
1185 tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1186 break;
1187
1188 default:
1189 g_assert_not_reached();
4a136e0a 1190 }
78113e83 1191 return true;
4a136e0a
CF
1192}
1193
14e4c1e2
RH
1194static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1195 TCGReg base, intptr_t ofs)
4a136e0a 1196{
14e4c1e2
RH
1197 AArch64Insn insn;
1198 int lgsz;
1199
1200 switch (type) {
1201 case TCG_TYPE_I32:
1202 insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1203 lgsz = 2;
1204 break;
1205 case TCG_TYPE_I64:
1206 insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1207 lgsz = 3;
1208 break;
1209 case TCG_TYPE_V64:
1210 insn = I3312_LDRVD;
1211 lgsz = 3;
1212 break;
1213 case TCG_TYPE_V128:
1214 insn = I3312_LDRVQ;
1215 lgsz = 4;
1216 break;
1217 default:
1218 g_assert_not_reached();
1219 }
1220 tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
4a136e0a
CF
1221}
1222
14e4c1e2
RH
1223static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1224 TCGReg base, intptr_t ofs)
4a136e0a 1225{
14e4c1e2
RH
1226 AArch64Insn insn;
1227 int lgsz;
1228
1229 switch (type) {
1230 case TCG_TYPE_I32:
1231 insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1232 lgsz = 2;
1233 break;
1234 case TCG_TYPE_I64:
1235 insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1236 lgsz = 3;
1237 break;
1238 case TCG_TYPE_V64:
1239 insn = I3312_STRVD;
1240 lgsz = 3;
1241 break;
1242 case TCG_TYPE_V128:
1243 insn = I3312_STRVQ;
1244 lgsz = 4;
1245 break;
1246 default:
1247 g_assert_not_reached();
1248 }
1249 tcg_out_ldst(s, insn, src, base, ofs, lgsz);
4a136e0a
CF
1250}
1251
59d7c14e
RH
1252static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1253 TCGReg base, intptr_t ofs)
1254{
14e4c1e2 1255 if (type <= TCG_TYPE_I64 && val == 0) {
59d7c14e
RH
1256 tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1257 return true;
1258 }
1259 return false;
1260}
1261
b3c56df7
RH
1262static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1263 TCGReg rn, unsigned int a, unsigned int b)
1264{
1265 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1266}
1267
7763ffa0
RH
1268static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1269 TCGReg rn, unsigned int a, unsigned int b)
4a136e0a 1270{
b3c56df7 1271 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
4a136e0a
CF
1272}
1273
7763ffa0
RH
1274static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1275 TCGReg rn, unsigned int a, unsigned int b)
4a136e0a 1276{
b3c56df7 1277 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
4a136e0a
CF
1278}
1279
7763ffa0 1280static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
4a136e0a
CF
1281 TCGReg rn, TCGReg rm, unsigned int a)
1282{
b3c56df7 1283 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
4a136e0a
CF
1284}
1285
7763ffa0 1286static inline void tcg_out_shl(TCGContext *s, TCGType ext,
4a136e0a
CF
1287 TCGReg rd, TCGReg rn, unsigned int m)
1288{
b3c56df7
RH
1289 int bits = ext ? 64 : 32;
1290 int max = bits - 1;
94bcc91b 1291 tcg_out_ubfm(s, ext, rd, rn, (bits - m) & max, (max - m) & max);
4a136e0a
CF
1292}
1293
7763ffa0 1294static inline void tcg_out_shr(TCGContext *s, TCGType ext,
4a136e0a
CF
1295 TCGReg rd, TCGReg rn, unsigned int m)
1296{
1297 int max = ext ? 63 : 31;
1298 tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1299}
1300
7763ffa0 1301static inline void tcg_out_sar(TCGContext *s, TCGType ext,
4a136e0a
CF
1302 TCGReg rd, TCGReg rn, unsigned int m)
1303{
1304 int max = ext ? 63 : 31;
1305 tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1306}
1307
7763ffa0 1308static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
4a136e0a
CF
1309 TCGReg rd, TCGReg rn, unsigned int m)
1310{
1311 int max = ext ? 63 : 31;
1312 tcg_out_extr(s, ext, rd, rn, rn, m & max);
1313}
1314
7763ffa0 1315static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
4a136e0a
CF
1316 TCGReg rd, TCGReg rn, unsigned int m)
1317{
26b1248f
YK
1318 int max = ext ? 63 : 31;
1319 tcg_out_extr(s, ext, rd, rn, rn, -m & max);
4a136e0a
CF
1320}
1321
b3c56df7
RH
1322static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1323 TCGReg rn, unsigned lsb, unsigned width)
1324{
1325 unsigned size = ext ? 64 : 32;
1326 unsigned a = (size - lsb) & (size - 1);
1327 unsigned b = width - 1;
1328 tcg_out_bfm(s, ext, rd, rn, a, b);
1329}
1330
90f1cd91
RH
1331static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1332 tcg_target_long b, bool const_b)
4a136e0a 1333{
90f1cd91
RH
1334 if (const_b) {
1335 /* Using CMP or CMN aliases. */
1336 if (b >= 0) {
1337 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1338 } else {
1339 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1340 }
1341 } else {
1342 /* Using CMP alias SUBS wzr, Wn, Wm */
1343 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1344 }
4a136e0a
CF
1345}
1346
ffd0e507 1347static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
4a136e0a 1348{
ffba3eb3 1349 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
eabb7b91 1350 tcg_debug_assert(offset == sextract64(offset, 0, 26));
81d8a5ee 1351 tcg_out_insn(s, 3206, B, offset);
4a136e0a
CF
1352}
1353
ffba3eb3 1354static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target)
23b7aa1d 1355{
ffba3eb3 1356 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
23b7aa1d 1357 if (offset == sextract64(offset, 0, 26)) {
f716bab3 1358 tcg_out_insn(s, 3206, B, offset);
23b7aa1d 1359 } else {
f85ab3d2
RH
1360 /* Choose X9 as a call-clobbered non-LR temporary. */
1361 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X9, (intptr_t)target);
1362 tcg_out_insn(s, 3207, BR, TCG_REG_X9);
23b7aa1d
PK
1363 }
1364}
1365
cee44b03 1366static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target)
4a136e0a 1367{
ffba3eb3 1368 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
8587c30c 1369 if (offset == sextract64(offset, 0, 26)) {
81d8a5ee 1370 tcg_out_insn(s, 3206, BL, offset);
8587c30c 1371 } else {
d67bcbdd
RH
1372 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target);
1373 tcg_out_insn(s, 3207, BLR, TCG_REG_TMP0);
4a136e0a
CF
1374 }
1375}
1376
cee44b03
RH
1377static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
1378 const TCGHelperInfo *info)
1379{
1380 tcg_out_call_int(s, target);
1381}
1382
bec16311 1383static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
4a136e0a 1384{
4a136e0a 1385 if (!l->has_value) {
bec16311 1386 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
733589b3 1387 tcg_out_insn(s, 3206, B, 0);
4a136e0a 1388 } else {
8587c30c 1389 tcg_out_goto(s, l->u.value_ptr);
4a136e0a
CF
1390 }
1391}
1392
dc1eccd6 1393static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
bec16311 1394 TCGArg b, bool b_const, TCGLabel *l)
4a136e0a 1395{
cae1f6f3 1396 intptr_t offset;
3d9e69a2 1397 bool need_cmp;
cae1f6f3 1398
3d9e69a2
RH
1399 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1400 need_cmp = false;
1401 } else {
1402 need_cmp = true;
1403 tcg_out_cmp(s, ext, a, b, b_const);
1404 }
4a136e0a
CF
1405
1406 if (!l->has_value) {
bec16311 1407 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
cae1f6f3 1408 offset = tcg_in32(s) >> 5;
4a136e0a 1409 } else {
ffba3eb3 1410 offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2;
eabb7b91 1411 tcg_debug_assert(offset == sextract64(offset, 0, 19));
4a136e0a 1412 }
cae1f6f3 1413
3d9e69a2
RH
1414 if (need_cmp) {
1415 tcg_out_insn(s, 3202, B_C, c, offset);
1416 } else if (c == TCG_COND_EQ) {
1417 tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1418 } else {
1419 tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1420 }
4a136e0a
CF
1421}
1422
dfa24dfa
RH
1423static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits,
1424 TCGReg rd, TCGReg rn)
edd8824c 1425{
dfa24dfa
RH
1426 /* REV, REV16, REV32 */
1427 tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn);
9c4a059d
CF
1428}
1429
14776ab5 1430static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
31f1275b
CF
1431 TCGReg rd, TCGReg rn)
1432{
b3c56df7 1433 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
929f8b55 1434 int bits = (8 << s_bits) - 1;
31f1275b
CF
1435 tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1436}
1437
678155b2
RH
1438static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1439{
1440 tcg_out_sxt(s, type, MO_8, rd, rn);
1441}
1442
753e42ea
RH
1443static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1444{
1445 tcg_out_sxt(s, type, MO_16, rd, rn);
1446}
1447
52bf3398
RH
1448static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn)
1449{
1450 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, rd, rn);
1451}
1452
9c6aa274
RH
1453static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1454{
1455 tcg_out_ext32s(s, rd, rn);
1456}
1457
14776ab5 1458static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
31f1275b
CF
1459 TCGReg rd, TCGReg rn)
1460{
b3c56df7 1461 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
929f8b55 1462 int bits = (8 << s_bits) - 1;
31f1275b
CF
1463 tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1464}
1465
d0e66c89
RH
1466static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn)
1467{
1468 tcg_out_uxt(s, MO_8, rd, rn);
1469}
1470
379afdff
RH
1471static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn)
1472{
1473 tcg_out_uxt(s, MO_16, rd, rn);
1474}
1475
9ecf5f61
RH
1476static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rn)
1477{
1478 tcg_out_movr(s, TCG_TYPE_I32, rd, rn);
1479}
1480
b9bfe000
RH
1481static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1482{
1483 tcg_out_ext32u(s, rd, rn);
1484}
1485
b8b94ac6
RH
1486static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn)
1487{
1488 tcg_out_mov(s, TCG_TYPE_I32, rd, rn);
1489}
1490
90f1cd91
RH
1491static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1492 TCGReg rn, int64_t aimm)
1493{
1494 if (aimm >= 0) {
1495 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1496 } else {
1497 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1498 }
1499}
1500
707b45a2
RH
1501static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1502 TCGReg rh, TCGReg al, TCGReg ah,
1503 tcg_target_long bl, tcg_target_long bh,
1504 bool const_bl, bool const_bh, bool sub)
c6e929e7
RH
1505{
1506 TCGReg orig_rl = rl;
1507 AArch64Insn insn;
1508
1509 if (rl == ah || (!const_bh && rl == bh)) {
d67bcbdd 1510 rl = TCG_REG_TMP0;
c6e929e7
RH
1511 }
1512
1513 if (const_bl) {
707b45a2 1514 if (bl < 0) {
c6e929e7 1515 bl = -bl;
707b45a2
RH
1516 insn = sub ? I3401_ADDSI : I3401_SUBSI;
1517 } else {
1518 insn = sub ? I3401_SUBSI : I3401_ADDSI;
c6e929e7 1519 }
707b45a2 1520
b1eb20da
RH
1521 if (unlikely(al == TCG_REG_XZR)) {
1522 /* ??? We want to allow al to be zero for the benefit of
1523 negation via subtraction. However, that leaves open the
1524 possibility of adding 0+const in the low part, and the
1525 immediate add instructions encode XSP not XZR. Don't try
1526 anything more elaborate here than loading another zero. */
d67bcbdd 1527 al = TCG_REG_TMP0;
b1eb20da
RH
1528 tcg_out_movi(s, ext, al, 0);
1529 }
c6e929e7
RH
1530 tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1531 } else {
1532 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1533 }
1534
1535 insn = I3503_ADC;
1536 if (const_bh) {
1537 /* Note that the only two constants we support are 0 and -1, and
1538 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
1539 if ((bh != 0) ^ sub) {
1540 insn = I3503_SBC;
1541 }
1542 bh = TCG_REG_XZR;
1543 } else if (sub) {
1544 insn = I3503_SBC;
1545 }
1546 tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1547
b825025f 1548 tcg_out_mov(s, ext, orig_rl, rl);
c6e929e7
RH
1549}
1550
c7a59c2a
PK
1551static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1552{
1553 static const uint32_t sync[] = {
1554 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST,
1555 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST,
1556 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1557 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD,
1558 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1559 };
1560 tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1561}
1562
53c76c19
RH
1563static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1564 TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1565{
1566 TCGReg a1 = a0;
1567 if (is_ctz) {
d67bcbdd 1568 a1 = TCG_REG_TMP0;
53c76c19
RH
1569 tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1570 }
1571 if (const_b && b == (ext ? 64 : 32)) {
1572 tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1573 } else {
1574 AArch64Insn sel = I3506_CSEL;
1575
1576 tcg_out_cmp(s, ext, a0, 0, 1);
d67bcbdd 1577 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP0, a1);
53c76c19
RH
1578
1579 if (const_b) {
1580 if (b == -1) {
1581 b = TCG_REG_XZR;
1582 sel = I3506_CSINV;
1583 } else if (b == 0) {
1584 b = TCG_REG_XZR;
1585 } else {
1586 tcg_out_movi(s, ext, d, b);
1587 b = d;
1588 }
1589 }
d67bcbdd 1590 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP0, b, TCG_COND_NE);
53c76c19
RH
1591 }
1592}
1593
7f65be51
RH
1594typedef struct {
1595 TCGReg base;
1596 TCGReg index;
1597 TCGType index_ext;
64741d99 1598 TCGAtomAlign aa;
7f65be51
RH
1599} HostAddress;
1600
7b880107
RH
1601bool tcg_target_has_memory_bswap(MemOp memop)
1602{
1603 return false;
1604}
1605
6e96422b 1606static const TCGLdstHelperParam ldst_helper_param = {
d67bcbdd 1607 .ntmp = 1, .tmp = { TCG_REG_TMP0 }
6e96422b
RH
1608};
1609
aeee05f5 1610static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
c6d8ed24 1611{
6e96422b 1612 MemOp opc = get_memop(lb->oi);
929f8b55 1613
ffba3eb3 1614 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
aeee05f5
RH
1615 return false;
1616 }
017a86f7 1617
6e96422b 1618 tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
cee44b03 1619 tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
6e96422b 1620 tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
8587c30c 1621 tcg_out_goto(s, lb->raddr);
aeee05f5 1622 return true;
c6d8ed24
JK
1623}
1624
aeee05f5 1625static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
c6d8ed24 1626{
6e96422b 1627 MemOp opc = get_memop(lb->oi);
929f8b55 1628
ffba3eb3 1629 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
aeee05f5
RH
1630 return false;
1631 }
c6d8ed24 1632
6e96422b 1633 tcg_out_st_helper_args(s, lb, &ldst_helper_param);
cee44b03 1634 tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]);
8587c30c 1635 tcg_out_goto(s, lb->raddr);
aeee05f5 1636 return true;
c6d8ed24 1637}
65b23204 1638
d0a9bb5e
RH
1639/* We expect to use a 7-bit scaled negative offset from ENV. */
1640#define MIN_TLB_MASK_TABLE_OFS -512
1641
1e612dd6
RH
1642/*
1643 * For softmmu, perform the TLB load and compare.
1644 * For useronly, perform any required alignment tests.
1645 * In both cases, return a TCGLabelQemuLdst structure if the slow path
1646 * is required and fill in @h with the host address for the fast path.
1647 */
1648static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
1649 TCGReg addr_reg, MemOpIdx oi,
1650 bool is_ld)
c6d8ed24 1651{
60c452a1 1652 TCGType addr_type = s->addr_type;
1e612dd6
RH
1653 TCGLabelQemuLdst *ldst = NULL;
1654 MemOp opc = get_memop(oi);
929124ec 1655 MemOp s_bits = opc & MO_SIZE;
64741d99
RH
1656 unsigned a_mask;
1657
1658 h->aa = atom_and_align_for_opc(s, opc,
1659 have_lse2 ? MO_ATOM_WITHIN16
1660 : MO_ATOM_IFALIGN,
929124ec 1661 s_bits == MO_128);
64741d99 1662 a_mask = (1 << h->aa.align) - 1;
1e612dd6
RH
1663
1664#ifdef CONFIG_SOFTMMU
85aa8081 1665 unsigned s_mask = (1u << s_bits) - 1;
1e612dd6 1666 unsigned mem_index = get_mmuidx(oi);
285a691f 1667 TCGReg addr_adj;
f7bcd966
RH
1668 TCGType mask_type;
1669 uint64_t compare_mask;
1670
1e612dd6
RH
1671 ldst = new_ldst_label(s);
1672 ldst->is_ld = is_ld;
1673 ldst->oi = oi;
1674 ldst->addrlo_reg = addr_reg;
1675
a66efde1 1676 mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32
f7bcd966
RH
1677 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1678
285a691f 1679 /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {tmp0,tmp1}. */
1e612dd6
RH
1680 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1681 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
285a691f 1682 tcg_out_insn(s, 3314, LDP, TCG_REG_TMP0, TCG_REG_TMP1, TCG_AREG0,
d0a9bb5e 1683 tlb_mask_table_ofs(s, mem_index), 1, 0);
f7bcd966
RH
1684
1685 /* Extract the TLB index from the address into X0. */
1686 tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
285a691f 1687 TCG_REG_TMP0, TCG_REG_TMP0, addr_reg,
aece72b7 1688 s->page_bits - CPU_TLB_ENTRY_BITS);
f7bcd966 1689
285a691f
RH
1690 /* Add the tlb_table pointer, forming the CPUTLBEntry address in TMP1. */
1691 tcg_out_insn(s, 3502, ADD, 1, TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP0);
f7bcd966 1692
285a691f 1693 /* Load the tlb comparator into TMP0, and the fast path addend into TMP1. */
238f4380 1694 QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN);
285a691f 1695 tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP1,
1e612dd6
RH
1696 is_ld ? offsetof(CPUTLBEntry, addr_read)
1697 : offsetof(CPUTLBEntry, addr_write));
285a691f 1698 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
f7bcd966 1699 offsetof(CPUTLBEntry, addend));
9ee14902 1700
1e612dd6
RH
1701 /*
1702 * For aligned accesses, we check the first byte and include the alignment
1703 * bits within the address. For unaligned access, we check that we don't
1704 * cross pages using the address of the last byte of the access.
1705 */
64741d99 1706 if (a_mask >= s_mask) {
285a691f 1707 addr_adj = addr_reg;
9ee14902 1708 } else {
285a691f 1709 addr_adj = TCG_REG_TMP2;
60c452a1 1710 tcg_out_insn(s, 3401, ADDI, addr_type,
285a691f 1711 addr_adj, addr_reg, s_mask - a_mask);
9ee14902 1712 }
aece72b7 1713 compare_mask = (uint64_t)s->page_mask | a_mask;
6f472467 1714
285a691f
RH
1715 /* Store the page mask part of the address into TMP2. */
1716 tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_TMP2,
1717 addr_adj, compare_mask);
6f472467 1718
c6d8ed24 1719 /* Perform the address comparison. */
285a691f 1720 tcg_out_cmp(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2, 0);
6f472467 1721
c6d8ed24 1722 /* If not equal, we jump to the slow path. */
1e612dd6 1723 ldst->label_ptr[0] = s->code_ptr;
733589b3 1724 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
c6d8ed24 1725
285a691f 1726 h->base = TCG_REG_TMP1;
64741d99
RH
1727 h->index = addr_reg;
1728 h->index_ext = addr_type;
f85ab3d2 1729#else
1e612dd6
RH
1730 if (a_mask) {
1731 ldst = new_ldst_label(s);
f85ab3d2 1732
1e612dd6
RH
1733 ldst->is_ld = is_ld;
1734 ldst->oi = oi;
1735 ldst->addrlo_reg = addr_reg;
f85ab3d2 1736
1e612dd6
RH
1737 /* tst addr, #mask */
1738 tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
f85ab3d2 1739
1e612dd6
RH
1740 /* b.ne slow_path */
1741 ldst->label_ptr[0] = s->code_ptr;
1742 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
f85ab3d2
RH
1743 }
1744
52533157 1745 if (guest_base || addr_type == TCG_TYPE_I32) {
64741d99
RH
1746 h->base = TCG_REG_GUEST_BASE;
1747 h->index = addr_reg;
1748 h->index_ext = addr_type;
1e612dd6 1749 } else {
64741d99
RH
1750 h->base = addr_reg;
1751 h->index = TCG_REG_XZR;
1752 h->index_ext = TCG_TYPE_I64;
1e612dd6
RH
1753 }
1754#endif
f85ab3d2 1755
1e612dd6 1756 return ldst;
f85ab3d2 1757}
6a91c7c9 1758
14776ab5 1759static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
7f65be51 1760 TCGReg data_r, HostAddress h)
6a91c7c9 1761{
9e4177ad
RH
1762 switch (memop & MO_SSIZE) {
1763 case MO_UB:
7f65be51 1764 tcg_out_ldst_r(s, I3312_LDRB, data_r, h.base, h.index_ext, h.index);
6a91c7c9 1765 break;
9e4177ad 1766 case MO_SB:
9c53889b 1767 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
7f65be51 1768 data_r, h.base, h.index_ext, h.index);
6a91c7c9 1769 break;
9e4177ad 1770 case MO_UW:
7f65be51 1771 tcg_out_ldst_r(s, I3312_LDRH, data_r, h.base, h.index_ext, h.index);
6a91c7c9 1772 break;
9e4177ad 1773 case MO_SW:
51c559c7 1774 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
7f65be51 1775 data_r, h.base, h.index_ext, h.index);
6a91c7c9 1776 break;
9e4177ad 1777 case MO_UL:
7f65be51 1778 tcg_out_ldst_r(s, I3312_LDRW, data_r, h.base, h.index_ext, h.index);
6a91c7c9 1779 break;
9e4177ad 1780 case MO_SL:
7f65be51 1781 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, h.base, h.index_ext, h.index);
6a91c7c9 1782 break;
fc313c64 1783 case MO_UQ:
7f65be51 1784 tcg_out_ldst_r(s, I3312_LDRX, data_r, h.base, h.index_ext, h.index);
6a91c7c9
JK
1785 break;
1786 default:
732e89f4 1787 g_assert_not_reached();
6a91c7c9
JK
1788 }
1789}
1790
14776ab5 1791static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
7f65be51 1792 TCGReg data_r, HostAddress h)
6a91c7c9 1793{
9e4177ad
RH
1794 switch (memop & MO_SIZE) {
1795 case MO_8:
7f65be51 1796 tcg_out_ldst_r(s, I3312_STRB, data_r, h.base, h.index_ext, h.index);
6a91c7c9 1797 break;
9e4177ad 1798 case MO_16:
7f65be51 1799 tcg_out_ldst_r(s, I3312_STRH, data_r, h.base, h.index_ext, h.index);
6a91c7c9 1800 break;
9e4177ad 1801 case MO_32:
7f65be51 1802 tcg_out_ldst_r(s, I3312_STRW, data_r, h.base, h.index_ext, h.index);
6a91c7c9 1803 break;
9e4177ad 1804 case MO_64:
7f65be51 1805 tcg_out_ldst_r(s, I3312_STRX, data_r, h.base, h.index_ext, h.index);
6a91c7c9
JK
1806 break;
1807 default:
732e89f4 1808 g_assert_not_reached();
6a91c7c9
JK
1809 }
1810}
4a136e0a 1811
667b1cdd 1812static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
ff0cc85e 1813 MemOpIdx oi, TCGType data_type)
4a136e0a 1814{
1e612dd6 1815 TCGLabelQemuLdst *ldst;
7f65be51 1816 HostAddress h;
f85ab3d2 1817
1e612dd6
RH
1818 ldst = prepare_host_addr(s, &h, addr_reg, oi, true);
1819 tcg_out_qemu_ld_direct(s, get_memop(oi), data_type, data_reg, h);
f85ab3d2 1820
1e612dd6
RH
1821 if (ldst) {
1822 ldst->type = data_type;
1823 ldst->datalo_reg = data_reg;
1824 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
352bcb0a 1825 }
4a136e0a
CF
1826}
1827
667b1cdd 1828static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
ff0cc85e 1829 MemOpIdx oi, TCGType data_type)
4a136e0a 1830{
1e612dd6 1831 TCGLabelQemuLdst *ldst;
7f65be51 1832 HostAddress h;
f85ab3d2 1833
1e612dd6
RH
1834 ldst = prepare_host_addr(s, &h, addr_reg, oi, false);
1835 tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h);
7f65be51 1836
1e612dd6
RH
1837 if (ldst) {
1838 ldst->type = data_type;
1839 ldst->datalo_reg = data_reg;
1840 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
f85ab3d2 1841 }
4a136e0a
CF
1842}
1843
929124ec
RH
1844static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
1845 TCGReg addr_reg, MemOpIdx oi, bool is_ld)
1846{
1847 TCGLabelQemuLdst *ldst;
1848 HostAddress h;
1849 TCGReg base;
1850 bool use_pair;
1851
1852 ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
1853
1854 /* Compose the final address, as LDP/STP have no indexing. */
1855 if (h.index == TCG_REG_XZR) {
1856 base = h.base;
1857 } else {
1858 base = TCG_REG_TMP2;
1859 if (h.index_ext == TCG_TYPE_I32) {
1860 /* add base, base, index, uxtw */
1861 tcg_out_insn(s, 3501, ADD, TCG_TYPE_I64, base,
1862 h.base, h.index, MO_32, 0);
1863 } else {
1864 /* add base, base, index */
1865 tcg_out_insn(s, 3502, ADD, 1, base, h.base, h.index);
1866 }
1867 }
1868
1869 use_pair = h.aa.atom < MO_128 || have_lse2;
1870
1871 if (!use_pair) {
1872 tcg_insn_unit *branch = NULL;
1873 TCGReg ll, lh, sl, sh;
1874
1875 /*
1876 * If we have already checked for 16-byte alignment, that's all
1877 * we need. Otherwise we have determined that misaligned atomicity
1878 * may be handled with two 8-byte loads.
1879 */
1880 if (h.aa.align < MO_128) {
1881 /*
1882 * TODO: align should be MO_64, so we only need test bit 3,
1883 * which means we could use TBNZ instead of ANDS+B_C.
1884 */
1885 tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, 15);
1886 branch = s->code_ptr;
1887 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1888 use_pair = true;
1889 }
1890
1891 if (is_ld) {
1892 /*
1893 * 16-byte atomicity without LSE2 requires LDXP+STXP loop:
1894 * ldxp lo, hi, [base]
1895 * stxp t0, lo, hi, [base]
1896 * cbnz t0, .-8
1897 * Require no overlap between data{lo,hi} and base.
1898 */
1899 if (datalo == base || datahi == base) {
1900 tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_TMP2, base);
1901 base = TCG_REG_TMP2;
1902 }
1903 ll = sl = datalo;
1904 lh = sh = datahi;
1905 } else {
1906 /*
1907 * 16-byte atomicity without LSE2 requires LDXP+STXP loop:
1908 * 1: ldxp t0, t1, [base]
1909 * stxp t0, lo, hi, [base]
1910 * cbnz t0, 1b
1911 */
1912 tcg_debug_assert(base != TCG_REG_TMP0 && base != TCG_REG_TMP1);
1913 ll = TCG_REG_TMP0;
1914 lh = TCG_REG_TMP1;
1915 sl = datalo;
1916 sh = datahi;
1917 }
1918
1919 tcg_out_insn(s, 3306, LDXP, TCG_REG_XZR, ll, lh, base);
1920 tcg_out_insn(s, 3306, STXP, TCG_REG_TMP0, sl, sh, base);
1921 tcg_out_insn(s, 3201, CBNZ, 0, TCG_REG_TMP0, -2);
1922
1923 if (use_pair) {
1924 /* "b .+8", branching across the one insn of use_pair. */
1925 tcg_out_insn(s, 3206, B, 2);
1926 reloc_pc19(branch, tcg_splitwx_to_rx(s->code_ptr));
1927 }
1928 }
1929
1930 if (use_pair) {
1931 if (is_ld) {
1932 tcg_out_insn(s, 3314, LDP, datalo, datahi, base, 0, 1, 0);
1933 } else {
1934 tcg_out_insn(s, 3314, STP, datalo, datahi, base, 0, 1, 0);
1935 }
1936 }
1937
1938 if (ldst) {
1939 ldst->type = TCG_TYPE_I128;
1940 ldst->datalo_reg = datalo;
1941 ldst->datahi_reg = datahi;
1942 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1943 }
1944}
1945
ffba3eb3 1946static const tcg_insn_unit *tb_ret_addr;
4a136e0a 1947
b55a8d9d
RH
1948static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
1949{
1950 /* Reuse the zeroing that exists for goto_ptr. */
1951 if (a0 == 0) {
1952 tcg_out_goto_long(s, tcg_code_gen_epilogue);
1953 } else {
1954 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1955 tcg_out_goto_long(s, tb_ret_addr);
1956 }
1957}
1958
cf7d6b8e
RH
1959static void tcg_out_goto_tb(TCGContext *s, int which)
1960{
1961 /*
d59d83a1
RH
1962 * Direct branch, or indirect address load, will be patched
1963 * by tb_target_set_jmp_target. Assert indirect load offset
1964 * in range early, regardless of direct branch distance.
cf7d6b8e 1965 */
d59d83a1
RH
1966 intptr_t i_off = tcg_pcrel_diff(s, (void *)get_jmp_target_addr(s, which));
1967 tcg_debug_assert(i_off == sextract64(i_off, 0, 21));
1968
cf7d6b8e 1969 set_jmp_insn_offset(s, which);
d59d83a1 1970 tcg_out32(s, I3206_B);
d67bcbdd 1971 tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
cf7d6b8e
RH
1972 set_jmp_reset_offset(s, which);
1973}
1974
d59d83a1
RH
1975void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
1976 uintptr_t jmp_rx, uintptr_t jmp_rw)
1977{
1978 uintptr_t d_addr = tb->jmp_target_addr[n];
1979 ptrdiff_t d_offset = d_addr - jmp_rx;
1980 tcg_insn_unit insn;
1981
1982 /* Either directly branch, or indirect branch load. */
1983 if (d_offset == sextract64(d_offset, 0, 28)) {
1984 insn = deposit32(I3206_B, 0, 26, d_offset >> 2);
1985 } else {
1986 uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n];
1987 ptrdiff_t i_offset = i_addr - jmp_rx;
1988
1989 /* Note that we asserted this in range in tcg_out_goto_tb. */
d67bcbdd 1990 insn = deposit32(I3305_LDR | TCG_REG_TMP0, 5, 19, i_offset >> 2);
d59d83a1
RH
1991 }
1992 qatomic_set((uint32_t *)jmp_rw, insn);
1993 flush_idcache_range(jmp_rx, jmp_rw, 4);
1994}
1995
4a136e0a 1996static void tcg_out_op(TCGContext *s, TCGOpcode opc,
8d8db193
RH
1997 const TCGArg args[TCG_MAX_OP_ARGS],
1998 const int const_args[TCG_MAX_OP_ARGS])
4a136e0a 1999{
f0293414
RH
2000 /* 99% of the time, we can signal the use of extension registers
2001 by looking to see if the opcode handles 64-bit data. */
2002 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
4a136e0a 2003
8d8db193
RH
2004 /* Hoist the loads of the most common arguments. */
2005 TCGArg a0 = args[0];
2006 TCGArg a1 = args[1];
2007 TCGArg a2 = args[2];
2008 int c2 = const_args[2];
2009
04ce397b
RH
2010 /* Some operands are defined with "rZ" constraint, a register or
2011 the zero register. These need not actually test args[I] == 0. */
2012#define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
2013
4a136e0a 2014 switch (opc) {
b19f0c2e
RH
2015 case INDEX_op_goto_ptr:
2016 tcg_out_insn(s, 3207, BR, a0);
2017 break;
2018
4a136e0a 2019 case INDEX_op_br:
bec16311 2020 tcg_out_goto_label(s, arg_label(a0));
4a136e0a
CF
2021 break;
2022
4a136e0a 2023 case INDEX_op_ld8u_i32:
4a136e0a 2024 case INDEX_op_ld8u_i64:
14e4c1e2 2025 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
dc73dfd4
RH
2026 break;
2027 case INDEX_op_ld8s_i32:
14e4c1e2 2028 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
dc73dfd4 2029 break;
4a136e0a 2030 case INDEX_op_ld8s_i64:
14e4c1e2 2031 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
dc73dfd4
RH
2032 break;
2033 case INDEX_op_ld16u_i32:
4a136e0a 2034 case INDEX_op_ld16u_i64:
14e4c1e2 2035 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
dc73dfd4
RH
2036 break;
2037 case INDEX_op_ld16s_i32:
14e4c1e2 2038 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
dc73dfd4 2039 break;
4a136e0a 2040 case INDEX_op_ld16s_i64:
14e4c1e2 2041 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
dc73dfd4
RH
2042 break;
2043 case INDEX_op_ld_i32:
4a136e0a 2044 case INDEX_op_ld32u_i64:
14e4c1e2 2045 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
dc73dfd4 2046 break;
4a136e0a 2047 case INDEX_op_ld32s_i64:
14e4c1e2 2048 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
e81864a1 2049 break;
dc73dfd4 2050 case INDEX_op_ld_i64:
14e4c1e2 2051 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
dc73dfd4
RH
2052 break;
2053
4a136e0a
CF
2054 case INDEX_op_st8_i32:
2055 case INDEX_op_st8_i64:
14e4c1e2 2056 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
dc73dfd4 2057 break;
4a136e0a
CF
2058 case INDEX_op_st16_i32:
2059 case INDEX_op_st16_i64:
14e4c1e2 2060 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
dc73dfd4
RH
2061 break;
2062 case INDEX_op_st_i32:
4a136e0a 2063 case INDEX_op_st32_i64:
14e4c1e2 2064 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
dc73dfd4
RH
2065 break;
2066 case INDEX_op_st_i64:
14e4c1e2 2067 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
4a136e0a
CF
2068 break;
2069
4a136e0a 2070 case INDEX_op_add_i32:
90f1cd91
RH
2071 a2 = (int32_t)a2;
2072 /* FALLTHRU */
2073 case INDEX_op_add_i64:
2074 if (c2) {
2075 tcg_out_addsubi(s, ext, a0, a1, a2);
2076 } else {
2077 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
2078 }
4a136e0a
CF
2079 break;
2080
4a136e0a 2081 case INDEX_op_sub_i32:
90f1cd91
RH
2082 a2 = (int32_t)a2;
2083 /* FALLTHRU */
2084 case INDEX_op_sub_i64:
2085 if (c2) {
2086 tcg_out_addsubi(s, ext, a0, a1, -a2);
2087 } else {
2088 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
2089 }
4a136e0a
CF
2090 break;
2091
14b155dd
RH
2092 case INDEX_op_neg_i64:
2093 case INDEX_op_neg_i32:
2094 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
2095 break;
2096
4a136e0a 2097 case INDEX_op_and_i32:
e029f293
RH
2098 a2 = (int32_t)a2;
2099 /* FALLTHRU */
2100 case INDEX_op_and_i64:
2101 if (c2) {
2102 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
2103 } else {
2104 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
2105 }
4a136e0a
CF
2106 break;
2107
14b155dd
RH
2108 case INDEX_op_andc_i32:
2109 a2 = (int32_t)a2;
2110 /* FALLTHRU */
2111 case INDEX_op_andc_i64:
2112 if (c2) {
2113 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
2114 } else {
2115 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
2116 }
2117 break;
2118
4a136e0a 2119 case INDEX_op_or_i32:
e029f293
RH
2120 a2 = (int32_t)a2;
2121 /* FALLTHRU */
2122 case INDEX_op_or_i64:
2123 if (c2) {
2124 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
2125 } else {
2126 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
2127 }
4a136e0a
CF
2128 break;
2129
14b155dd
RH
2130 case INDEX_op_orc_i32:
2131 a2 = (int32_t)a2;
2132 /* FALLTHRU */
2133 case INDEX_op_orc_i64:
2134 if (c2) {
2135 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2136 } else {
2137 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2138 }
2139 break;
2140
4a136e0a 2141 case INDEX_op_xor_i32:
e029f293
RH
2142 a2 = (int32_t)a2;
2143 /* FALLTHRU */
2144 case INDEX_op_xor_i64:
2145 if (c2) {
2146 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2147 } else {
2148 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2149 }
4a136e0a
CF
2150 break;
2151
14b155dd
RH
2152 case INDEX_op_eqv_i32:
2153 a2 = (int32_t)a2;
2154 /* FALLTHRU */
2155 case INDEX_op_eqv_i64:
2156 if (c2) {
2157 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2158 } else {
2159 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2160 }
2161 break;
2162
2163 case INDEX_op_not_i64:
2164 case INDEX_op_not_i32:
2165 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2166 break;
2167
4a136e0a 2168 case INDEX_op_mul_i64:
4a136e0a 2169 case INDEX_op_mul_i32:
8678b71c
RH
2170 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2171 break;
2172
2173 case INDEX_op_div_i64:
2174 case INDEX_op_div_i32:
2175 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2176 break;
2177 case INDEX_op_divu_i64:
2178 case INDEX_op_divu_i32:
2179 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2180 break;
2181
2182 case INDEX_op_rem_i64:
2183 case INDEX_op_rem_i32:
d67bcbdd
RH
2184 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP0, a1, a2);
2185 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1);
8678b71c
RH
2186 break;
2187 case INDEX_op_remu_i64:
2188 case INDEX_op_remu_i32:
d67bcbdd
RH
2189 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP0, a1, a2);
2190 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1);
4a136e0a
CF
2191 break;
2192
2193 case INDEX_op_shl_i64:
4a136e0a 2194 case INDEX_op_shl_i32:
df9351e3 2195 if (c2) {
8d8db193 2196 tcg_out_shl(s, ext, a0, a1, a2);
df9351e3
RH
2197 } else {
2198 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
4a136e0a
CF
2199 }
2200 break;
2201
2202 case INDEX_op_shr_i64:
4a136e0a 2203 case INDEX_op_shr_i32:
df9351e3 2204 if (c2) {
8d8db193 2205 tcg_out_shr(s, ext, a0, a1, a2);
df9351e3
RH
2206 } else {
2207 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
4a136e0a
CF
2208 }
2209 break;
2210
2211 case INDEX_op_sar_i64:
4a136e0a 2212 case INDEX_op_sar_i32:
df9351e3 2213 if (c2) {
8d8db193 2214 tcg_out_sar(s, ext, a0, a1, a2);
df9351e3
RH
2215 } else {
2216 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
4a136e0a
CF
2217 }
2218 break;
2219
2220 case INDEX_op_rotr_i64:
4a136e0a 2221 case INDEX_op_rotr_i32:
df9351e3 2222 if (c2) {
8d8db193 2223 tcg_out_rotr(s, ext, a0, a1, a2);
df9351e3
RH
2224 } else {
2225 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
4a136e0a
CF
2226 }
2227 break;
2228
2229 case INDEX_op_rotl_i64:
df9351e3
RH
2230 case INDEX_op_rotl_i32:
2231 if (c2) {
8d8db193 2232 tcg_out_rotl(s, ext, a0, a1, a2);
4a136e0a 2233 } else {
d67bcbdd
RH
2234 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP0, TCG_REG_XZR, a2);
2235 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP0);
4a136e0a
CF
2236 }
2237 break;
2238
53c76c19
RH
2239 case INDEX_op_clz_i64:
2240 case INDEX_op_clz_i32:
2241 tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2242 break;
2243 case INDEX_op_ctz_i64:
2244 case INDEX_op_ctz_i32:
2245 tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2246 break;
2247
8d8db193 2248 case INDEX_op_brcond_i32:
90f1cd91
RH
2249 a1 = (int32_t)a1;
2250 /* FALLTHRU */
2251 case INDEX_op_brcond_i64:
bec16311 2252 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
4a136e0a
CF
2253 break;
2254
4a136e0a 2255 case INDEX_op_setcond_i32:
90f1cd91
RH
2256 a2 = (int32_t)a2;
2257 /* FALLTHRU */
2258 case INDEX_op_setcond_i64:
2259 tcg_out_cmp(s, ext, a1, a2, c2);
ed7a0aa8
RH
2260 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */
2261 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2262 TCG_REG_XZR, tcg_invert_cond(args[3]));
4a136e0a
CF
2263 break;
2264
f58a7dea
RH
2265 case INDEX_op_negsetcond_i32:
2266 a2 = (int32_t)a2;
2267 /* FALLTHRU */
2268 case INDEX_op_negsetcond_i64:
2269 tcg_out_cmp(s, ext, a1, a2, c2);
2270 /* Use CSETM alias of CSINV Wd, WZR, WZR, invert(cond). */
2271 tcg_out_insn(s, 3506, CSINV, ext, a0, TCG_REG_XZR,
2272 TCG_REG_XZR, tcg_invert_cond(args[3]));
2273 break;
2274
04ce397b
RH
2275 case INDEX_op_movcond_i32:
2276 a2 = (int32_t)a2;
2277 /* FALLTHRU */
2278 case INDEX_op_movcond_i64:
2279 tcg_out_cmp(s, ext, a1, a2, c2);
2280 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2281 break;
2282
fecccfcc
RH
2283 case INDEX_op_qemu_ld_a32_i32:
2284 case INDEX_op_qemu_ld_a64_i32:
2285 case INDEX_op_qemu_ld_a32_i64:
2286 case INDEX_op_qemu_ld_a64_i64:
59227d5d 2287 tcg_out_qemu_ld(s, a0, a1, a2, ext);
4a136e0a 2288 break;
fecccfcc
RH
2289 case INDEX_op_qemu_st_a32_i32:
2290 case INDEX_op_qemu_st_a64_i32:
2291 case INDEX_op_qemu_st_a32_i64:
2292 case INDEX_op_qemu_st_a64_i64:
ff0cc85e 2293 tcg_out_qemu_st(s, REG0(0), a1, a2, ext);
4a136e0a 2294 break;
929124ec
RH
2295 case INDEX_op_qemu_ld_a32_i128:
2296 case INDEX_op_qemu_ld_a64_i128:
2297 tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], true);
2298 break;
2299 case INDEX_op_qemu_st_a32_i128:
2300 case INDEX_op_qemu_st_a64_i128:
2301 tcg_out_qemu_ldst_i128(s, REG0(0), REG0(1), a2, args[3], false);
2302 break;
4a136e0a 2303
f0293414 2304 case INDEX_op_bswap64_i64:
dfa24dfa 2305 tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
edd8824c
RH
2306 break;
2307 case INDEX_op_bswap32_i64:
8fcfc6bf
RH
2308 tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2309 if (a2 & TCG_BSWAP_OS) {
52bf3398 2310 tcg_out_ext32s(s, a0, a0);
8fcfc6bf
RH
2311 }
2312 break;
9c4a059d 2313 case INDEX_op_bswap32_i32:
dfa24dfa 2314 tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
9c4a059d
CF
2315 break;
2316 case INDEX_op_bswap16_i64:
2317 case INDEX_op_bswap16_i32:
dfa24dfa 2318 tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1);
8fcfc6bf
RH
2319 if (a2 & TCG_BSWAP_OS) {
2320 /* Output must be sign-extended. */
753e42ea 2321 tcg_out_ext16s(s, ext, a0, a0);
8fcfc6bf
RH
2322 } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2323 /* Output must be zero-extended, but input isn't. */
379afdff 2324 tcg_out_ext16u(s, a0, a0);
8fcfc6bf 2325 }
9c4a059d
CF
2326 break;
2327
b3c56df7
RH
2328 case INDEX_op_deposit_i64:
2329 case INDEX_op_deposit_i32:
2330 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2331 break;
2332
e2179f94
RH
2333 case INDEX_op_extract_i64:
2334 case INDEX_op_extract_i32:
2335 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2336 break;
2337
2338 case INDEX_op_sextract_i64:
2339 case INDEX_op_sextract_i32:
2340 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2341 break;
2342
464c2969
RH
2343 case INDEX_op_extract2_i64:
2344 case INDEX_op_extract2_i32:
1789d427 2345 tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
464c2969
RH
2346 break;
2347
c6e929e7
RH
2348 case INDEX_op_add2_i32:
2349 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2350 (int32_t)args[4], args[5], const_args[4],
2351 const_args[5], false);
2352 break;
2353 case INDEX_op_add2_i64:
2354 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2355 args[5], const_args[4], const_args[5], false);
2356 break;
2357 case INDEX_op_sub2_i32:
2358 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2359 (int32_t)args[4], args[5], const_args[4],
2360 const_args[5], true);
2361 break;
2362 case INDEX_op_sub2_i64:
2363 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2364 args[5], const_args[4], const_args[5], true);
2365 break;
2366
1fcc9ddf
RH
2367 case INDEX_op_muluh_i64:
2368 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2369 break;
2370 case INDEX_op_mulsh_i64:
2371 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2372 break;
2373
c7a59c2a
PK
2374 case INDEX_op_mb:
2375 tcg_out_mb(s, a0);
2376 break;
2377
96d0ee7f 2378 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
a51a6b6a 2379 case INDEX_op_mov_i64:
96d0ee7f 2380 case INDEX_op_call: /* Always emitted via tcg_out_call. */
b55a8d9d 2381 case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */
cf7d6b8e 2382 case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */
678155b2
RH
2383 case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */
2384 case INDEX_op_ext8s_i64:
d0e66c89
RH
2385 case INDEX_op_ext8u_i32:
2386 case INDEX_op_ext8u_i64:
753e42ea
RH
2387 case INDEX_op_ext16s_i64:
2388 case INDEX_op_ext16s_i32:
379afdff
RH
2389 case INDEX_op_ext16u_i64:
2390 case INDEX_op_ext16u_i32:
52bf3398 2391 case INDEX_op_ext32s_i64:
9ecf5f61 2392 case INDEX_op_ext32u_i64:
9c6aa274 2393 case INDEX_op_ext_i32_i64:
b9bfe000 2394 case INDEX_op_extu_i32_i64:
b8b94ac6 2395 case INDEX_op_extrl_i64_i32:
4a136e0a 2396 default:
14e4c1e2 2397 g_assert_not_reached();
4a136e0a 2398 }
04ce397b
RH
2399
2400#undef REG0
4a136e0a
CF
2401}
2402
14e4c1e2
RH
2403static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2404 unsigned vecl, unsigned vece,
5e8892db
MR
2405 const TCGArg args[TCG_MAX_OP_ARGS],
2406 const int const_args[TCG_MAX_OP_ARGS])
14e4c1e2 2407{
d81bad24 2408 static const AArch64Insn cmp_vec_insn[16] = {
14e4c1e2
RH
2409 [TCG_COND_EQ] = I3616_CMEQ,
2410 [TCG_COND_GT] = I3616_CMGT,
2411 [TCG_COND_GE] = I3616_CMGE,
2412 [TCG_COND_GTU] = I3616_CMHI,
2413 [TCG_COND_GEU] = I3616_CMHS,
2414 };
d81bad24
RH
2415 static const AArch64Insn cmp_scalar_insn[16] = {
2416 [TCG_COND_EQ] = I3611_CMEQ,
2417 [TCG_COND_GT] = I3611_CMGT,
2418 [TCG_COND_GE] = I3611_CMGE,
2419 [TCG_COND_GTU] = I3611_CMHI,
2420 [TCG_COND_GEU] = I3611_CMHS,
2421 };
2422 static const AArch64Insn cmp0_vec_insn[16] = {
14e4c1e2
RH
2423 [TCG_COND_EQ] = I3617_CMEQ0,
2424 [TCG_COND_GT] = I3617_CMGT0,
2425 [TCG_COND_GE] = I3617_CMGE0,
2426 [TCG_COND_LT] = I3617_CMLT0,
2427 [TCG_COND_LE] = I3617_CMLE0,
2428 };
d81bad24
RH
2429 static const AArch64Insn cmp0_scalar_insn[16] = {
2430 [TCG_COND_EQ] = I3612_CMEQ0,
2431 [TCG_COND_GT] = I3612_CMGT0,
2432 [TCG_COND_GE] = I3612_CMGE0,
2433 [TCG_COND_LT] = I3612_CMLT0,
2434 [TCG_COND_LE] = I3612_CMLE0,
2435 };
14e4c1e2
RH
2436
2437 TCGType type = vecl + TCG_TYPE_V64;
2438 unsigned is_q = vecl;
d81bad24 2439 bool is_scalar = !is_q && vece == MO_64;
a9e434a5 2440 TCGArg a0, a1, a2, a3;
9e27f58b 2441 int cmode, imm8;
14e4c1e2
RH
2442
2443 a0 = args[0];
2444 a1 = args[1];
2445 a2 = args[2];
2446
2447 switch (opc) {
2448 case INDEX_op_ld_vec:
2449 tcg_out_ld(s, type, a0, a1, a2);
2450 break;
2451 case INDEX_op_st_vec:
2452 tcg_out_st(s, type, a0, a1, a2);
2453 break;
37ee55a0
RH
2454 case INDEX_op_dupm_vec:
2455 tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2456 break;
14e4c1e2 2457 case INDEX_op_add_vec:
d81bad24
RH
2458 if (is_scalar) {
2459 tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2);
2460 } else {
2461 tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2462 }
14e4c1e2
RH
2463 break;
2464 case INDEX_op_sub_vec:
d81bad24
RH
2465 if (is_scalar) {
2466 tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2);
2467 } else {
2468 tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2469 }
14e4c1e2
RH
2470 break;
2471 case INDEX_op_mul_vec:
2472 tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2473 break;
2474 case INDEX_op_neg_vec:
d81bad24
RH
2475 if (is_scalar) {
2476 tcg_out_insn(s, 3612, NEG, vece, a0, a1);
2477 } else {
2478 tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2479 }
14e4c1e2 2480 break;
a456394a 2481 case INDEX_op_abs_vec:
d81bad24
RH
2482 if (is_scalar) {
2483 tcg_out_insn(s, 3612, ABS, vece, a0, a1);
2484 } else {
2485 tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2486 }
a456394a 2487 break;
14e4c1e2 2488 case INDEX_op_and_vec:
9e27f58b
RH
2489 if (const_args[2]) {
2490 is_shimm1632(~a2, &cmode, &imm8);
2491 if (a0 == a1) {
2492 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2493 return;
2494 }
2495 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2496 a2 = a0;
2497 }
14e4c1e2
RH
2498 tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2499 break;
2500 case INDEX_op_or_vec:
9e27f58b
RH
2501 if (const_args[2]) {
2502 is_shimm1632(a2, &cmode, &imm8);
2503 if (a0 == a1) {
2504 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2505 return;
2506 }
2507 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2508 a2 = a0;
2509 }
14e4c1e2
RH
2510 tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2511 break;
14e4c1e2 2512 case INDEX_op_andc_vec:
9e27f58b
RH
2513 if (const_args[2]) {
2514 is_shimm1632(a2, &cmode, &imm8);
2515 if (a0 == a1) {
2516 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2517 return;
2518 }
2519 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2520 a2 = a0;
2521 }
14e4c1e2
RH
2522 tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2523 break;
2524 case INDEX_op_orc_vec:
9e27f58b
RH
2525 if (const_args[2]) {
2526 is_shimm1632(~a2, &cmode, &imm8);
2527 if (a0 == a1) {
2528 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2529 return;
2530 }
2531 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2532 a2 = a0;
2533 }
14e4c1e2
RH
2534 tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2535 break;
9e27f58b
RH
2536 case INDEX_op_xor_vec:
2537 tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2538 break;
d32648d4 2539 case INDEX_op_ssadd_vec:
d81bad24
RH
2540 if (is_scalar) {
2541 tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2);
2542 } else {
2543 tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2544 }
d32648d4
RH
2545 break;
2546 case INDEX_op_sssub_vec:
d81bad24
RH
2547 if (is_scalar) {
2548 tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2);
2549 } else {
2550 tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2551 }
d32648d4
RH
2552 break;
2553 case INDEX_op_usadd_vec:
d81bad24
RH
2554 if (is_scalar) {
2555 tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2);
2556 } else {
2557 tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2558 }
d32648d4
RH
2559 break;
2560 case INDEX_op_ussub_vec:
d81bad24
RH
2561 if (is_scalar) {
2562 tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2);
2563 } else {
2564 tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2565 }
d32648d4 2566 break;
93f332a5
RH
2567 case INDEX_op_smax_vec:
2568 tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2569 break;
2570 case INDEX_op_smin_vec:
2571 tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2572 break;
2573 case INDEX_op_umax_vec:
2574 tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2575 break;
2576 case INDEX_op_umin_vec:
2577 tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2578 break;
14e4c1e2
RH
2579 case INDEX_op_not_vec:
2580 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2581 break;
14e4c1e2 2582 case INDEX_op_shli_vec:
d81bad24
RH
2583 if (is_scalar) {
2584 tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece));
2585 } else {
2586 tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2587 }
14e4c1e2
RH
2588 break;
2589 case INDEX_op_shri_vec:
d81bad24
RH
2590 if (is_scalar) {
2591 tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2);
2592 } else {
2593 tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2594 }
14e4c1e2
RH
2595 break;
2596 case INDEX_op_sari_vec:
d81bad24
RH
2597 if (is_scalar) {
2598 tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2);
2599 } else {
2600 tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2601 }
14e4c1e2 2602 break;
7cff8988 2603 case INDEX_op_aa64_sli_vec:
d81bad24
RH
2604 if (is_scalar) {
2605 tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece));
2606 } else {
2607 tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2608 }
7cff8988 2609 break;
79525dfd 2610 case INDEX_op_shlv_vec:
d81bad24
RH
2611 if (is_scalar) {
2612 tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2);
2613 } else {
2614 tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2615 }
79525dfd
RH
2616 break;
2617 case INDEX_op_aa64_sshl_vec:
d81bad24
RH
2618 if (is_scalar) {
2619 tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2);
2620 } else {
2621 tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2622 }
79525dfd 2623 break;
14e4c1e2
RH
2624 case INDEX_op_cmp_vec:
2625 {
2626 TCGCond cond = args[3];
2627 AArch64Insn insn;
2628
2629 if (cond == TCG_COND_NE) {
2630 if (const_args[2]) {
d81bad24
RH
2631 if (is_scalar) {
2632 tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1);
2633 } else {
2634 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2635 }
14e4c1e2 2636 } else {
d81bad24
RH
2637 if (is_scalar) {
2638 tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2);
2639 } else {
2640 tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2641 }
14e4c1e2
RH
2642 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2643 }
2644 } else {
2645 if (const_args[2]) {
d81bad24
RH
2646 if (is_scalar) {
2647 insn = cmp0_scalar_insn[cond];
2648 if (insn) {
2649 tcg_out_insn_3612(s, insn, vece, a0, a1);
2650 break;
2651 }
2652 } else {
2653 insn = cmp0_vec_insn[cond];
2654 if (insn) {
2655 tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2656 break;
2657 }
14e4c1e2 2658 }
d67bcbdd
RH
2659 tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP0, 0);
2660 a2 = TCG_VEC_TMP0;
14e4c1e2 2661 }
d81bad24
RH
2662 if (is_scalar) {
2663 insn = cmp_scalar_insn[cond];
2664 if (insn == 0) {
2665 TCGArg t;
2666 t = a1, a1 = a2, a2 = t;
2667 cond = tcg_swap_cond(cond);
2668 insn = cmp_scalar_insn[cond];
2669 tcg_debug_assert(insn != 0);
2670 }
2671 tcg_out_insn_3611(s, insn, vece, a0, a1, a2);
2672 } else {
2673 insn = cmp_vec_insn[cond];
2674 if (insn == 0) {
2675 TCGArg t;
2676 t = a1, a1 = a2, a2 = t;
2677 cond = tcg_swap_cond(cond);
2678 insn = cmp_vec_insn[cond];
2679 tcg_debug_assert(insn != 0);
2680 }
2681 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
14e4c1e2 2682 }
14e4c1e2
RH
2683 }
2684 }
2685 break;
bab1671f 2686
a9e434a5
RH
2687 case INDEX_op_bitsel_vec:
2688 a3 = args[3];
2689 if (a0 == a3) {
2690 tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2691 } else if (a0 == a2) {
2692 tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2693 } else {
2694 if (a0 != a1) {
2695 tcg_out_mov(s, type, a0, a1);
2696 }
2697 tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2698 }
2699 break;
2700
bab1671f 2701 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
bab1671f 2702 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
14e4c1e2
RH
2703 default:
2704 g_assert_not_reached();
2705 }
2706}
2707
2708int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2709{
2710 switch (opc) {
2711 case INDEX_op_add_vec:
2712 case INDEX_op_sub_vec:
14e4c1e2
RH
2713 case INDEX_op_and_vec:
2714 case INDEX_op_or_vec:
2715 case INDEX_op_xor_vec:
2716 case INDEX_op_andc_vec:
2717 case INDEX_op_orc_vec:
2718 case INDEX_op_neg_vec:
a456394a 2719 case INDEX_op_abs_vec:
14e4c1e2
RH
2720 case INDEX_op_not_vec:
2721 case INDEX_op_cmp_vec:
2722 case INDEX_op_shli_vec:
2723 case INDEX_op_shri_vec:
2724 case INDEX_op_sari_vec:
d32648d4
RH
2725 case INDEX_op_ssadd_vec:
2726 case INDEX_op_sssub_vec:
2727 case INDEX_op_usadd_vec:
2728 case INDEX_op_ussub_vec:
79525dfd 2729 case INDEX_op_shlv_vec:
a9e434a5 2730 case INDEX_op_bitsel_vec:
14e4c1e2 2731 return 1;
7cff8988 2732 case INDEX_op_rotli_vec:
79525dfd
RH
2733 case INDEX_op_shrv_vec:
2734 case INDEX_op_sarv_vec:
7cff8988
RH
2735 case INDEX_op_rotlv_vec:
2736 case INDEX_op_rotrv_vec:
79525dfd 2737 return -1;
e65a5f22 2738 case INDEX_op_mul_vec:
a7b6d286
RH
2739 case INDEX_op_smax_vec:
2740 case INDEX_op_smin_vec:
2741 case INDEX_op_umax_vec:
2742 case INDEX_op_umin_vec:
e65a5f22 2743 return vece < MO_64;
14e4c1e2
RH
2744
2745 default:
2746 return 0;
2747 }
2748}
2749
2750void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2751 TCGArg a0, ...)
2752{
79525dfd 2753 va_list va;
10061ffe 2754 TCGv_vec v0, v1, v2, t1, t2, c1;
7cff8988 2755 TCGArg a2;
79525dfd
RH
2756
2757 va_start(va, a0);
2758 v0 = temp_tcgv_vec(arg_temp(a0));
2759 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
7cff8988 2760 a2 = va_arg(va, TCGArg);
2dfa2f18 2761 va_end(va);
79525dfd
RH
2762
2763 switch (opc) {
7cff8988
RH
2764 case INDEX_op_rotli_vec:
2765 t1 = tcg_temp_new_vec(type);
2766 tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
2767 vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
2768 tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
2769 tcg_temp_free_vec(t1);
2770 break;
2771
79525dfd
RH
2772 case INDEX_op_shrv_vec:
2773 case INDEX_op_sarv_vec:
2774 /* Right shifts are negative left shifts for AArch64. */
2dfa2f18 2775 v2 = temp_tcgv_vec(arg_temp(a2));
79525dfd
RH
2776 t1 = tcg_temp_new_vec(type);
2777 tcg_gen_neg_vec(vece, t1, v2);
2778 opc = (opc == INDEX_op_shrv_vec
2779 ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2780 vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2781 tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2782 tcg_temp_free_vec(t1);
2783 break;
2784
7cff8988 2785 case INDEX_op_rotlv_vec:
2dfa2f18 2786 v2 = temp_tcgv_vec(arg_temp(a2));
7cff8988 2787 t1 = tcg_temp_new_vec(type);
10061ffe
RH
2788 c1 = tcg_constant_vec(type, vece, 8 << vece);
2789 tcg_gen_sub_vec(vece, t1, v2, c1);
7cff8988
RH
2790 /* Right shifts are negative left shifts for AArch64. */
2791 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2792 tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2793 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
2794 tcgv_vec_arg(v1), tcgv_vec_arg(v2));
2795 tcg_gen_or_vec(vece, v0, v0, t1);
2796 tcg_temp_free_vec(t1);
2797 break;
2798
2799 case INDEX_op_rotrv_vec:
2dfa2f18 2800 v2 = temp_tcgv_vec(arg_temp(a2));
7cff8988
RH
2801 t1 = tcg_temp_new_vec(type);
2802 t2 = tcg_temp_new_vec(type);
10061ffe 2803 c1 = tcg_constant_vec(type, vece, 8 << vece);
7cff8988 2804 tcg_gen_neg_vec(vece, t1, v2);
10061ffe 2805 tcg_gen_sub_vec(vece, t2, c1, v2);
7cff8988
RH
2806 /* Right shifts are negative left shifts for AArch64. */
2807 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2808 tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2809 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
2810 tcgv_vec_arg(v1), tcgv_vec_arg(t2));
2811 tcg_gen_or_vec(vece, v0, t1, t2);
2812 tcg_temp_free_vec(t1);
2813 tcg_temp_free_vec(t2);
2814 break;
2815
79525dfd
RH
2816 default:
2817 g_assert_not_reached();
2818 }
14e4c1e2
RH
2819}
2820
39e7522b
RH
2821static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
2822{
1897cc2e
RH
2823 switch (op) {
2824 case INDEX_op_goto_ptr:
39e7522b 2825 return C_O0_I1(r);
f69d277e 2826
1897cc2e
RH
2827 case INDEX_op_ld8u_i32:
2828 case INDEX_op_ld8s_i32:
2829 case INDEX_op_ld16u_i32:
2830 case INDEX_op_ld16s_i32:
2831 case INDEX_op_ld_i32:
2832 case INDEX_op_ld8u_i64:
2833 case INDEX_op_ld8s_i64:
2834 case INDEX_op_ld16u_i64:
2835 case INDEX_op_ld16s_i64:
2836 case INDEX_op_ld32u_i64:
2837 case INDEX_op_ld32s_i64:
2838 case INDEX_op_ld_i64:
2839 case INDEX_op_neg_i32:
2840 case INDEX_op_neg_i64:
2841 case INDEX_op_not_i32:
2842 case INDEX_op_not_i64:
2843 case INDEX_op_bswap16_i32:
2844 case INDEX_op_bswap32_i32:
2845 case INDEX_op_bswap16_i64:
2846 case INDEX_op_bswap32_i64:
2847 case INDEX_op_bswap64_i64:
2848 case INDEX_op_ext8s_i32:
2849 case INDEX_op_ext16s_i32:
2850 case INDEX_op_ext8u_i32:
2851 case INDEX_op_ext16u_i32:
2852 case INDEX_op_ext8s_i64:
2853 case INDEX_op_ext16s_i64:
2854 case INDEX_op_ext32s_i64:
2855 case INDEX_op_ext8u_i64:
2856 case INDEX_op_ext16u_i64:
2857 case INDEX_op_ext32u_i64:
2858 case INDEX_op_ext_i32_i64:
2859 case INDEX_op_extu_i32_i64:
2860 case INDEX_op_extract_i32:
2861 case INDEX_op_extract_i64:
2862 case INDEX_op_sextract_i32:
2863 case INDEX_op_sextract_i64:
39e7522b 2864 return C_O1_I1(r, r);
1897cc2e
RH
2865
2866 case INDEX_op_st8_i32:
2867 case INDEX_op_st16_i32:
2868 case INDEX_op_st_i32:
2869 case INDEX_op_st8_i64:
2870 case INDEX_op_st16_i64:
2871 case INDEX_op_st32_i64:
2872 case INDEX_op_st_i64:
39e7522b 2873 return C_O0_I2(rZ, r);
1897cc2e
RH
2874
2875 case INDEX_op_add_i32:
2876 case INDEX_op_add_i64:
2877 case INDEX_op_sub_i32:
2878 case INDEX_op_sub_i64:
2879 case INDEX_op_setcond_i32:
2880 case INDEX_op_setcond_i64:
f58a7dea
RH
2881 case INDEX_op_negsetcond_i32:
2882 case INDEX_op_negsetcond_i64:
39e7522b 2883 return C_O1_I2(r, r, rA);
1897cc2e
RH
2884
2885 case INDEX_op_mul_i32:
2886 case INDEX_op_mul_i64:
2887 case INDEX_op_div_i32:
2888 case INDEX_op_div_i64:
2889 case INDEX_op_divu_i32:
2890 case INDEX_op_divu_i64:
2891 case INDEX_op_rem_i32:
2892 case INDEX_op_rem_i64:
2893 case INDEX_op_remu_i32:
2894 case INDEX_op_remu_i64:
2895 case INDEX_op_muluh_i64:
2896 case INDEX_op_mulsh_i64:
39e7522b 2897 return C_O1_I2(r, r, r);
1897cc2e
RH
2898
2899 case INDEX_op_and_i32:
2900 case INDEX_op_and_i64:
2901 case INDEX_op_or_i32:
2902 case INDEX_op_or_i64:
2903 case INDEX_op_xor_i32:
2904 case INDEX_op_xor_i64:
2905 case INDEX_op_andc_i32:
2906 case INDEX_op_andc_i64:
2907 case INDEX_op_orc_i32:
2908 case INDEX_op_orc_i64:
2909 case INDEX_op_eqv_i32:
2910 case INDEX_op_eqv_i64:
39e7522b 2911 return C_O1_I2(r, r, rL);
1897cc2e
RH
2912
2913 case INDEX_op_shl_i32:
2914 case INDEX_op_shr_i32:
2915 case INDEX_op_sar_i32:
2916 case INDEX_op_rotl_i32:
2917 case INDEX_op_rotr_i32:
2918 case INDEX_op_shl_i64:
2919 case INDEX_op_shr_i64:
2920 case INDEX_op_sar_i64:
2921 case INDEX_op_rotl_i64:
2922 case INDEX_op_rotr_i64:
39e7522b 2923 return C_O1_I2(r, r, ri);
1897cc2e
RH
2924
2925 case INDEX_op_clz_i32:
2926 case INDEX_op_ctz_i32:
2927 case INDEX_op_clz_i64:
2928 case INDEX_op_ctz_i64:
39e7522b 2929 return C_O1_I2(r, r, rAL);
1897cc2e
RH
2930
2931 case INDEX_op_brcond_i32:
2932 case INDEX_op_brcond_i64:
39e7522b 2933 return C_O0_I2(r, rA);
1897cc2e
RH
2934
2935 case INDEX_op_movcond_i32:
2936 case INDEX_op_movcond_i64:
39e7522b 2937 return C_O1_I4(r, r, rA, rZ, rZ);
1897cc2e 2938
fecccfcc
RH
2939 case INDEX_op_qemu_ld_a32_i32:
2940 case INDEX_op_qemu_ld_a64_i32:
2941 case INDEX_op_qemu_ld_a32_i64:
2942 case INDEX_op_qemu_ld_a64_i64:
285a691f 2943 return C_O1_I1(r, r);
929124ec
RH
2944 case INDEX_op_qemu_ld_a32_i128:
2945 case INDEX_op_qemu_ld_a64_i128:
2946 return C_O2_I1(r, r, r);
fecccfcc
RH
2947 case INDEX_op_qemu_st_a32_i32:
2948 case INDEX_op_qemu_st_a64_i32:
2949 case INDEX_op_qemu_st_a32_i64:
2950 case INDEX_op_qemu_st_a64_i64:
285a691f 2951 return C_O0_I2(rZ, r);
929124ec
RH
2952 case INDEX_op_qemu_st_a32_i128:
2953 case INDEX_op_qemu_st_a64_i128:
2954 return C_O0_I3(rZ, rZ, r);
1897cc2e
RH
2955
2956 case INDEX_op_deposit_i32:
2957 case INDEX_op_deposit_i64:
39e7522b 2958 return C_O1_I2(r, 0, rZ);
1897cc2e 2959
464c2969
RH
2960 case INDEX_op_extract2_i32:
2961 case INDEX_op_extract2_i64:
39e7522b 2962 return C_O1_I2(r, rZ, rZ);
464c2969 2963
1897cc2e
RH
2964 case INDEX_op_add2_i32:
2965 case INDEX_op_add2_i64:
2966 case INDEX_op_sub2_i32:
2967 case INDEX_op_sub2_i64:
39e7522b 2968 return C_O2_I4(r, r, rZ, rZ, rA, rMZ);
1897cc2e 2969
14e4c1e2
RH
2970 case INDEX_op_add_vec:
2971 case INDEX_op_sub_vec:
2972 case INDEX_op_mul_vec:
14e4c1e2 2973 case INDEX_op_xor_vec:
d32648d4
RH
2974 case INDEX_op_ssadd_vec:
2975 case INDEX_op_sssub_vec:
2976 case INDEX_op_usadd_vec:
2977 case INDEX_op_ussub_vec:
93f332a5
RH
2978 case INDEX_op_smax_vec:
2979 case INDEX_op_smin_vec:
2980 case INDEX_op_umax_vec:
2981 case INDEX_op_umin_vec:
79525dfd
RH
2982 case INDEX_op_shlv_vec:
2983 case INDEX_op_shrv_vec:
2984 case INDEX_op_sarv_vec:
2985 case INDEX_op_aa64_sshl_vec:
39e7522b 2986 return C_O1_I2(w, w, w);
14e4c1e2
RH
2987 case INDEX_op_not_vec:
2988 case INDEX_op_neg_vec:
a456394a 2989 case INDEX_op_abs_vec:
14e4c1e2
RH
2990 case INDEX_op_shli_vec:
2991 case INDEX_op_shri_vec:
2992 case INDEX_op_sari_vec:
39e7522b 2993 return C_O1_I1(w, w);
14e4c1e2 2994 case INDEX_op_ld_vec:
37ee55a0 2995 case INDEX_op_dupm_vec:
39e7522b
RH
2996 return C_O1_I1(w, r);
2997 case INDEX_op_st_vec:
2998 return C_O0_I2(w, r);
14e4c1e2 2999 case INDEX_op_dup_vec:
39e7522b 3000 return C_O1_I1(w, wr);
9e27f58b
RH
3001 case INDEX_op_or_vec:
3002 case INDEX_op_andc_vec:
39e7522b 3003 return C_O1_I2(w, w, wO);
9e27f58b
RH
3004 case INDEX_op_and_vec:
3005 case INDEX_op_orc_vec:
39e7522b 3006 return C_O1_I2(w, w, wN);
14e4c1e2 3007 case INDEX_op_cmp_vec:
39e7522b 3008 return C_O1_I2(w, w, wZ);
a9e434a5 3009 case INDEX_op_bitsel_vec:
39e7522b 3010 return C_O1_I3(w, w, w, w);
7cff8988 3011 case INDEX_op_aa64_sli_vec:
39e7522b 3012 return C_O1_I2(w, 0, w);
14e4c1e2 3013
1897cc2e 3014 default:
39e7522b 3015 g_assert_not_reached();
f69d277e 3016 }
f69d277e
RH
3017}
3018
4a136e0a
CF
3019static void tcg_target_init(TCGContext *s)
3020{
f46934df
RH
3021 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
3022 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
14e4c1e2
RH
3023 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
3024 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
f46934df 3025
14e4c1e2 3026 tcg_target_call_clobber_regs = -1ull;
f46934df
RH
3027 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
3028 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
3029 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
3030 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
3031 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
3032 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
3033 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
3034 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
3035 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
3036 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
3037 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
14e4c1e2
RH
3038 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
3039 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
3040 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
3041 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
3042 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
3043 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
3044 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
3045 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
4a136e0a 3046
ccb1bb66 3047 s->reserved_regs = 0;
4a136e0a
CF
3048 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
3049 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
4a136e0a 3050 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
d67bcbdd 3051 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
da4d0d95
RH
3052 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
3053 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
d67bcbdd 3054 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0);
4a136e0a
CF
3055}
3056
38d195aa
RH
3057/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
3058#define PUSH_SIZE ((30 - 19 + 1) * 8)
3059
3060#define FRAME_SIZE \
3061 ((PUSH_SIZE \
3062 + TCG_STATIC_CALL_ARGS_SIZE \
3063 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
3064 + TCG_TARGET_STACK_ALIGN - 1) \
3065 & ~(TCG_TARGET_STACK_ALIGN - 1))
3066
3067/* We're expecting a 2 byte uleb128 encoded value. */
3068QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3069
3070/* We're expecting to use a single ADDI insn. */
3071QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
3072
4a136e0a
CF
3073static void tcg_target_qemu_prologue(TCGContext *s)
3074{
4a136e0a
CF
3075 TCGReg r;
3076
95f72aa9
RH
3077 /* Push (FP, LR) and allocate space for all saved registers. */
3078 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
38d195aa 3079 TCG_REG_SP, -PUSH_SIZE, 1, 1);
4a136e0a 3080
d82b78e4 3081 /* Set up frame pointer for canonical unwinding. */
929f8b55 3082 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
4a136e0a 3083
d82b78e4 3084 /* Store callee-preserved regs x19..x28. */
4a136e0a 3085 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
95f72aa9
RH
3086 int ofs = (r - TCG_REG_X19 + 2) * 8;
3087 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
4a136e0a
CF
3088 }
3089
096c46c0
RH
3090 /* Make stack space for TCG locals. */
3091 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
38d195aa 3092 FRAME_SIZE - PUSH_SIZE);
096c46c0 3093
95f72aa9 3094 /* Inform TCG about how to find TCG locals with register, offset, size. */
4a136e0a
CF
3095 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
3096 CPU_TEMP_BUF_NLONGS * sizeof(long));
3097
4cbea598 3098#if !defined(CONFIG_SOFTMMU)
52533157
RH
3099 /*
3100 * Note that XZR cannot be encoded in the address base register slot,
4daad8d9 3101 * as that actually encodes SP. Depending on the guest, we may need
52533157
RH
3102 * to zero-extend the guest address via the address index register slot,
3103 * therefore we need to load even a zero guest base into a register.
3104 */
3105 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
3106 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
6a91c7c9
JK
3107#endif
3108
4a136e0a 3109 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
81d8a5ee 3110 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
4a136e0a 3111
b19f0c2e
RH
3112 /*
3113 * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
3114 * and fall through to the rest of the epilogue.
3115 */
c8bc1168 3116 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
b19f0c2e
RH
3117 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
3118
3119 /* TB epilogue */
ffba3eb3 3120 tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
4a136e0a 3121
096c46c0
RH
3122 /* Remove TCG locals stack space. */
3123 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
38d195aa 3124 FRAME_SIZE - PUSH_SIZE);
4a136e0a 3125
95f72aa9 3126 /* Restore registers x19..x28. */
4a136e0a 3127 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
95f72aa9
RH
3128 int ofs = (r - TCG_REG_X19 + 2) * 8;
3129 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
4a136e0a
CF
3130 }
3131
95f72aa9
RH
3132 /* Pop (FP, LR), restore SP to previous frame. */
3133 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
38d195aa 3134 TCG_REG_SP, PUSH_SIZE, 0, 1);
81d8a5ee 3135 tcg_out_insn(s, 3207, RET, TCG_REG_LR);
4a136e0a 3136}
38d195aa 3137
55129955
RH
3138static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3139{
3140 int i;
3141 for (i = 0; i < count; ++i) {
3142 p[i] = NOP;
3143 }
3144}
3145
38d195aa 3146typedef struct {
3d9bddb3 3147 DebugFrameHeader h;
38d195aa
RH
3148 uint8_t fde_def_cfa[4];
3149 uint8_t fde_reg_ofs[24];
3150} DebugFrame;
3151
3152#define ELF_HOST_MACHINE EM_AARCH64
3153
3d9bddb3
RH
3154static const DebugFrame debug_frame = {
3155 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3156 .h.cie.id = -1,
3157 .h.cie.version = 1,
3158 .h.cie.code_align = 1,
3159 .h.cie.data_align = 0x78, /* sleb128 -8 */
3160 .h.cie.return_column = TCG_REG_LR,
38d195aa
RH
3161
3162 /* Total FDE size does not include the "len" member. */
3d9bddb3 3163 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
38d195aa
RH
3164
3165 .fde_def_cfa = {
3166 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */
3167 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
3168 (FRAME_SIZE >> 7)
3169 },
3170 .fde_reg_ofs = {
3171 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */
3172 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */
3173 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */
3174 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */
3175 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */
3176 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */
3177 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */
3178 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */
3179 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */
3180 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */
3181 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */
3182 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */
3183 }
3184};
3185
755bf9e5 3186void tcg_register_jit(const void *buf, size_t buf_size)
38d195aa 3187{
38d195aa
RH
3188 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3189}