]> git.proxmox.com Git - mirror_qemu.git/blame - tcg/aarch64/tcg-target.c.inc
target/hppa: Introduce MMU_IDX_MMU_DISABLED
[mirror_qemu.git] / tcg / aarch64 / tcg-target.c.inc
CommitLineData
4a136e0a
CF
1/*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
f85ab3d2 13#include "../tcg-ldst.c.inc"
139c1837 14#include "../tcg-pool.c.inc"
4a136e0a
CF
15#include "qemu/bitops.h"
16
7763ffa0
RH
17/* We're going to re-use TCGType in setting of the SF bit, which controls
18 the size of the operation performed. If we know the values match, it
19 makes things much cleaner. */
20QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
21
8d8fdbae 22#ifdef CONFIG_DEBUG_TCG
4a136e0a 23static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
14e4c1e2
RH
24 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
25 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
26 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
27 "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
28
29 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
30 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
31 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
32 "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
4a136e0a 33};
8d8fdbae 34#endif /* CONFIG_DEBUG_TCG */
4a136e0a
CF
35
36static const int tcg_target_reg_alloc_order[] = {
37 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
38 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
b76f21a7 39 TCG_REG_X28, /* we will reserve this for guest_base if configured */
4a136e0a 40
d82b78e4
RH
41 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
42 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
4a136e0a 43
4a136e0a
CF
44 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
45 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
46
da4d0d95
RH
47 /* X16 reserved as temporary */
48 /* X17 reserved as temporary */
d82b78e4
RH
49 /* X18 reserved by system */
50 /* X19 reserved for AREG0 */
51 /* X29 reserved as fp */
52 /* X30 reserved as temporary */
14e4c1e2
RH
53
54 TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
55 TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
56 /* V8 - V15 are call-saved, and skipped. */
57 TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
58 TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
59 TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
60 TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
4a136e0a
CF
61};
62
63static const int tcg_target_call_iarg_regs[8] = {
64 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
65 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
66};
5e3d0c19
RH
67
68static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
69{
70 tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
71 tcg_debug_assert(slot >= 0 && slot <= 1);
72 return TCG_REG_X0 + slot;
73}
4a136e0a 74
da4d0d95
RH
75#define TCG_REG_TMP0 TCG_REG_X16
76#define TCG_REG_TMP1 TCG_REG_X17
77#define TCG_REG_TMP2 TCG_REG_X30
d67bcbdd 78#define TCG_VEC_TMP0 TCG_REG_V31
4a136e0a 79
4cbea598 80#define TCG_REG_GUEST_BASE TCG_REG_X28
6a91c7c9 81
ffba3eb3 82static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
4a136e0a 83{
ffba3eb3
RH
84 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
85 ptrdiff_t offset = target - src_rx;
86
214bfe83
RH
87 if (offset == sextract64(offset, 0, 26)) {
88 /* read instruction, mask away previous PC_REL26 parameter contents,
89 set the proper offset, then write back the instruction. */
ffba3eb3 90 *src_rw = deposit32(*src_rw, 0, 26, offset);
214bfe83
RH
91 return true;
92 }
93 return false;
4a136e0a
CF
94}
95
ffba3eb3 96static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
4a136e0a 97{
ffba3eb3
RH
98 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
99 ptrdiff_t offset = target - src_rx;
100
214bfe83 101 if (offset == sextract64(offset, 0, 19)) {
ffba3eb3 102 *src_rw = deposit32(*src_rw, 5, 19, offset);
214bfe83
RH
103 return true;
104 }
105 return false;
4a136e0a
CF
106}
107
ffba3eb3
RH
108static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
109 intptr_t value, intptr_t addend)
4a136e0a 110{
eabb7b91 111 tcg_debug_assert(addend == 0);
4a136e0a
CF
112 switch (type) {
113 case R_AARCH64_JUMP26:
114 case R_AARCH64_CALL26:
ffba3eb3 115 return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
4a136e0a 116 case R_AARCH64_CONDBR19:
ffba3eb3 117 return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
4a136e0a 118 default:
214bfe83 119 g_assert_not_reached();
4a136e0a
CF
120 }
121}
122
170bf931
RH
123#define TCG_CT_CONST_AIMM 0x100
124#define TCG_CT_CONST_LIMM 0x200
125#define TCG_CT_CONST_ZERO 0x400
126#define TCG_CT_CONST_MONE 0x800
9e27f58b
RH
127#define TCG_CT_CONST_ORRI 0x1000
128#define TCG_CT_CONST_ANDI 0x2000
90f1cd91 129
abc730e1
RH
130#define ALL_GENERAL_REGS 0xffffffffu
131#define ALL_VECTOR_REGS 0xffffffff00000000ull
132
14e4c1e2 133/* Match a constant valid for addition (12-bit, optionally shifted). */
90f1cd91
RH
134static inline bool is_aimm(uint64_t val)
135{
136 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
137}
138
14e4c1e2 139/* Match a constant valid for logical operations. */
e029f293
RH
140static inline bool is_limm(uint64_t val)
141{
142 /* Taking a simplified view of the logical immediates for now, ignoring
143 the replication that can happen across the field. Match bit patterns
144 of the forms
145 0....01....1
146 0..01..10..0
147 and their inverses. */
148
149 /* Make things easier below, by testing the form with msb clear. */
150 if ((int64_t)val < 0) {
151 val = ~val;
152 }
153 if (val == 0) {
154 return false;
155 }
156 val += val & -val;
157 return (val & (val - 1)) == 0;
158}
159
984fdcee
RH
160/* Return true if v16 is a valid 16-bit shifted immediate. */
161static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
14e4c1e2 162{
984fdcee
RH
163 if (v16 == (v16 & 0xff)) {
164 *cmode = 0x8;
165 *imm8 = v16 & 0xff;
166 return true;
167 } else if (v16 == (v16 & 0xff00)) {
168 *cmode = 0xa;
169 *imm8 = v16 >> 8;
170 return true;
171 }
172 return false;
173}
14e4c1e2 174
984fdcee
RH
175/* Return true if v32 is a valid 32-bit shifted immediate. */
176static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
177{
178 if (v32 == (v32 & 0xff)) {
179 *cmode = 0x0;
180 *imm8 = v32 & 0xff;
181 return true;
182 } else if (v32 == (v32 & 0xff00)) {
183 *cmode = 0x2;
184 *imm8 = (v32 >> 8) & 0xff;
185 return true;
186 } else if (v32 == (v32 & 0xff0000)) {
187 *cmode = 0x4;
188 *imm8 = (v32 >> 16) & 0xff;
189 return true;
190 } else if (v32 == (v32 & 0xff000000)) {
191 *cmode = 0x6;
192 *imm8 = v32 >> 24;
14e4c1e2
RH
193 return true;
194 }
984fdcee
RH
195 return false;
196}
14e4c1e2 197
984fdcee
RH
198/* Return true if v32 is a valid 32-bit shifting ones immediate. */
199static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
200{
201 if ((v32 & 0xffff00ff) == 0xff) {
202 *cmode = 0xc;
203 *imm8 = (v32 >> 8) & 0xff;
204 return true;
205 } else if ((v32 & 0xff00ffff) == 0xffff) {
206 *cmode = 0xd;
207 *imm8 = (v32 >> 16) & 0xff;
208 return true;
14e4c1e2 209 }
984fdcee
RH
210 return false;
211}
14e4c1e2 212
984fdcee
RH
213/* Return true if v32 is a valid float32 immediate. */
214static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
215{
216 if (extract32(v32, 0, 19) == 0
217 && (extract32(v32, 25, 6) == 0x20
218 || extract32(v32, 25, 6) == 0x1f)) {
219 *cmode = 0xf;
220 *imm8 = (extract32(v32, 31, 1) << 7)
221 | (extract32(v32, 25, 1) << 6)
222 | extract32(v32, 19, 6);
223 return true;
14e4c1e2 224 }
984fdcee
RH
225 return false;
226}
227
228/* Return true if v64 is a valid float64 immediate. */
229static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
230{
14e4c1e2
RH
231 if (extract64(v64, 0, 48) == 0
232 && (extract64(v64, 54, 9) == 0x100
233 || extract64(v64, 54, 9) == 0x0ff)) {
234 *cmode = 0xf;
14e4c1e2
RH
235 *imm8 = (extract64(v64, 63, 1) << 7)
236 | (extract64(v64, 54, 1) << 6)
237 | extract64(v64, 48, 6);
238 return true;
239 }
14e4c1e2
RH
240 return false;
241}
242
02f3a5b4
RH
243/*
244 * Return non-zero if v32 can be formed by MOVI+ORR.
245 * Place the parameters for MOVI in (cmode, imm8).
246 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
247 */
248static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
249{
250 int i;
251
252 for (i = 6; i > 0; i -= 2) {
253 /* Mask out one byte we can add with ORR. */
254 uint32_t tmp = v32 & ~(0xffu << (i * 4));
255 if (is_shimm32(tmp, cmode, imm8) ||
256 is_soimm32(tmp, cmode, imm8)) {
257 break;
258 }
259 }
260 return i;
261}
262
9e27f58b
RH
263/* Return true if V is a valid 16-bit or 32-bit shifted immediate. */
264static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
265{
266 if (v32 == deposit32(v32, 16, 16, v32)) {
267 return is_shimm16(v32, cmode, imm8);
268 } else {
269 return is_shimm32(v32, cmode, imm8);
270 }
271}
272
ebe92db2 273static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
4a136e0a 274{
4a136e0a
CF
275 if (ct & TCG_CT_CONST) {
276 return 1;
277 }
170bf931 278 if (type == TCG_TYPE_I32) {
90f1cd91
RH
279 val = (int32_t)val;
280 }
281 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
282 return 1;
283 }
e029f293
RH
284 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
285 return 1;
286 }
04ce397b
RH
287 if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
288 return 1;
289 }
c6e929e7
RH
290 if ((ct & TCG_CT_CONST_MONE) && val == -1) {
291 return 1;
292 }
4a136e0a 293
9e27f58b
RH
294 switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
295 case 0:
296 break;
297 case TCG_CT_CONST_ANDI:
298 val = ~val;
299 /* fallthru */
300 case TCG_CT_CONST_ORRI:
301 if (val == deposit64(val, 32, 32, val)) {
302 int cmode, imm8;
303 return is_shimm1632(val, &cmode, &imm8);
304 }
305 break;
306 default:
307 /* Both bits should not be set for the same insn. */
308 g_assert_not_reached();
309 }
310
4a136e0a
CF
311 return 0;
312}
313
314enum aarch64_cond_code {
315 COND_EQ = 0x0,
316 COND_NE = 0x1,
317 COND_CS = 0x2, /* Unsigned greater or equal */
318 COND_HS = COND_CS, /* ALIAS greater or equal */
319 COND_CC = 0x3, /* Unsigned less than */
320 COND_LO = COND_CC, /* ALIAS Lower */
321 COND_MI = 0x4, /* Negative */
322 COND_PL = 0x5, /* Zero or greater */
323 COND_VS = 0x6, /* Overflow */
324 COND_VC = 0x7, /* No overflow */
325 COND_HI = 0x8, /* Unsigned greater than */
326 COND_LS = 0x9, /* Unsigned less or equal */
327 COND_GE = 0xa,
328 COND_LT = 0xb,
329 COND_GT = 0xc,
330 COND_LE = 0xd,
331 COND_AL = 0xe,
332 COND_NV = 0xf, /* behaves like COND_AL here */
333};
334
335static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
336 [TCG_COND_EQ] = COND_EQ,
337 [TCG_COND_NE] = COND_NE,
338 [TCG_COND_LT] = COND_LT,
339 [TCG_COND_GE] = COND_GE,
340 [TCG_COND_LE] = COND_LE,
341 [TCG_COND_GT] = COND_GT,
342 /* unsigned */
343 [TCG_COND_LTU] = COND_LO,
344 [TCG_COND_GTU] = COND_HI,
345 [TCG_COND_GEU] = COND_HS,
346 [TCG_COND_LEU] = COND_LS,
347};
348
3d4299f4
RH
349typedef enum {
350 LDST_ST = 0, /* store */
351 LDST_LD = 1, /* load */
352 LDST_LD_S_X = 2, /* load and sign-extend into Xt */
353 LDST_LD_S_W = 3, /* load and sign-extend into Wt */
354} AArch64LdstType;
4a136e0a 355
50573c66
RH
356/* We encode the format of the insn into the beginning of the name, so that
357 we can have the preprocessor help "typecheck" the insn vs the output
358 function. Arm didn't provide us with nice names for the formats, so we
359 use the section number of the architecture reference manual in which the
360 instruction group is described. */
361typedef enum {
3d9e69a2
RH
362 /* Compare and branch (immediate). */
363 I3201_CBZ = 0x34000000,
364 I3201_CBNZ = 0x35000000,
365
81d8a5ee
RH
366 /* Conditional branch (immediate). */
367 I3202_B_C = 0x54000000,
368
369 /* Unconditional branch (immediate). */
370 I3206_B = 0x14000000,
371 I3206_BL = 0x94000000,
372
373 /* Unconditional branch (register). */
374 I3207_BR = 0xd61f0000,
375 I3207_BLR = 0xd63f0000,
376 I3207_RET = 0xd65f0000,
377
f23e5e15
RH
378 /* AdvSIMD load/store single structure. */
379 I3303_LD1R = 0x0d40c000,
380
2acee8b2
PK
381 /* Load literal for loading the address at pc-relative offset */
382 I3305_LDR = 0x58000000,
14e4c1e2
RH
383 I3305_LDR_v64 = 0x5c000000,
384 I3305_LDR_v128 = 0x9c000000,
385
929124ec
RH
386 /* Load/store exclusive. */
387 I3306_LDXP = 0xc8600000,
388 I3306_STXP = 0xc8200000,
389
3d4299f4
RH
390 /* Load/store register. Described here as 3.3.12, but the helper
391 that emits them can transform to 3.3.10 or 3.3.13. */
392 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
393 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
394 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
395 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
396
397 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
398 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
399 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
400 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
401
402 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
403 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
404
405 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
406 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
407 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
408
14e4c1e2
RH
409 I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
410 I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
411
412 I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
413 I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
414
415 I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30,
416 I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30,
417
6c0f0c0f 418 I3312_TO_I3310 = 0x00200800,
3d4299f4
RH
419 I3312_TO_I3313 = 0x01000000,
420
95f72aa9
RH
421 /* Load/store register pair instructions. */
422 I3314_LDP = 0x28400000,
423 I3314_STP = 0x28000000,
424
096c46c0
RH
425 /* Add/subtract immediate instructions. */
426 I3401_ADDI = 0x11000000,
427 I3401_ADDSI = 0x31000000,
428 I3401_SUBI = 0x51000000,
429 I3401_SUBSI = 0x71000000,
430
b3c56df7
RH
431 /* Bitfield instructions. */
432 I3402_BFM = 0x33000000,
433 I3402_SBFM = 0x13000000,
434 I3402_UBFM = 0x53000000,
435
436 /* Extract instruction. */
437 I3403_EXTR = 0x13800000,
438
e029f293
RH
439 /* Logical immediate instructions. */
440 I3404_ANDI = 0x12000000,
441 I3404_ORRI = 0x32000000,
442 I3404_EORI = 0x52000000,
f85ab3d2 443 I3404_ANDSI = 0x72000000,
e029f293 444
582ab779
RH
445 /* Move wide immediate instructions. */
446 I3405_MOVN = 0x12800000,
447 I3405_MOVZ = 0x52800000,
448 I3405_MOVK = 0x72800000,
449
c6e310d9
RH
450 /* PC relative addressing instructions. */
451 I3406_ADR = 0x10000000,
452 I3406_ADRP = 0x90000000,
453
929124ec
RH
454 /* Add/subtract extended register instructions. */
455 I3501_ADD = 0x0b200000,
456
50573c66
RH
457 /* Add/subtract shifted register instructions (without a shift). */
458 I3502_ADD = 0x0b000000,
459 I3502_ADDS = 0x2b000000,
460 I3502_SUB = 0x4b000000,
461 I3502_SUBS = 0x6b000000,
462
463 /* Add/subtract shifted register instructions (with a shift). */
464 I3502S_ADD_LSL = I3502_ADD,
465
c6e929e7
RH
466 /* Add/subtract with carry instructions. */
467 I3503_ADC = 0x1a000000,
468 I3503_SBC = 0x5a000000,
469
04ce397b
RH
470 /* Conditional select instructions. */
471 I3506_CSEL = 0x1a800000,
472 I3506_CSINC = 0x1a800400,
53c76c19
RH
473 I3506_CSINV = 0x5a800000,
474 I3506_CSNEG = 0x5a800400,
04ce397b 475
edd8824c 476 /* Data-processing (1 source) instructions. */
53c76c19
RH
477 I3507_CLZ = 0x5ac01000,
478 I3507_RBIT = 0x5ac00000,
dfa24dfa 479 I3507_REV = 0x5ac00000, /* + size << 10 */
edd8824c 480
df9351e3
RH
481 /* Data-processing (2 source) instructions. */
482 I3508_LSLV = 0x1ac02000,
483 I3508_LSRV = 0x1ac02400,
484 I3508_ASRV = 0x1ac02800,
485 I3508_RORV = 0x1ac02c00,
1fcc9ddf
RH
486 I3508_SMULH = 0x9b407c00,
487 I3508_UMULH = 0x9bc07c00,
8678b71c
RH
488 I3508_UDIV = 0x1ac00800,
489 I3508_SDIV = 0x1ac00c00,
490
491 /* Data-processing (3 source) instructions. */
492 I3509_MADD = 0x1b000000,
493 I3509_MSUB = 0x1b008000,
df9351e3 494
50573c66
RH
495 /* Logical shifted register instructions (without a shift). */
496 I3510_AND = 0x0a000000,
14b155dd 497 I3510_BIC = 0x0a200000,
50573c66 498 I3510_ORR = 0x2a000000,
14b155dd 499 I3510_ORN = 0x2a200000,
50573c66 500 I3510_EOR = 0x4a000000,
14b155dd 501 I3510_EON = 0x4a200000,
50573c66 502 I3510_ANDS = 0x6a000000,
c7a59c2a 503
f7bcd966
RH
504 /* Logical shifted register instructions (with a shift). */
505 I3502S_AND_LSR = I3510_AND | (1 << 22),
506
14e4c1e2
RH
507 /* AdvSIMD copy */
508 I3605_DUP = 0x0e000400,
509 I3605_INS = 0x4e001c00,
510 I3605_UMOV = 0x0e003c00,
511
512 /* AdvSIMD modified immediate */
513 I3606_MOVI = 0x0f000400,
7e308e00 514 I3606_MVNI = 0x2f000400,
02f3a5b4
RH
515 I3606_BIC = 0x2f001400,
516 I3606_ORR = 0x0f001400,
14e4c1e2 517
d81bad24
RH
518 /* AdvSIMD scalar shift by immediate */
519 I3609_SSHR = 0x5f000400,
520 I3609_SSRA = 0x5f001400,
521 I3609_SHL = 0x5f005400,
522 I3609_USHR = 0x7f000400,
523 I3609_USRA = 0x7f001400,
524 I3609_SLI = 0x7f005400,
525
526 /* AdvSIMD scalar three same */
527 I3611_SQADD = 0x5e200c00,
528 I3611_SQSUB = 0x5e202c00,
529 I3611_CMGT = 0x5e203400,
530 I3611_CMGE = 0x5e203c00,
531 I3611_SSHL = 0x5e204400,
532 I3611_ADD = 0x5e208400,
533 I3611_CMTST = 0x5e208c00,
534 I3611_UQADD = 0x7e200c00,
535 I3611_UQSUB = 0x7e202c00,
536 I3611_CMHI = 0x7e203400,
537 I3611_CMHS = 0x7e203c00,
538 I3611_USHL = 0x7e204400,
539 I3611_SUB = 0x7e208400,
540 I3611_CMEQ = 0x7e208c00,
541
542 /* AdvSIMD scalar two-reg misc */
543 I3612_CMGT0 = 0x5e208800,
544 I3612_CMEQ0 = 0x5e209800,
545 I3612_CMLT0 = 0x5e20a800,
546 I3612_ABS = 0x5e20b800,
547 I3612_CMGE0 = 0x7e208800,
548 I3612_CMLE0 = 0x7e209800,
549 I3612_NEG = 0x7e20b800,
550
14e4c1e2
RH
551 /* AdvSIMD shift by immediate */
552 I3614_SSHR = 0x0f000400,
553 I3614_SSRA = 0x0f001400,
554 I3614_SHL = 0x0f005400,
7cff8988 555 I3614_SLI = 0x2f005400,
14e4c1e2
RH
556 I3614_USHR = 0x2f000400,
557 I3614_USRA = 0x2f001400,
558
559 /* AdvSIMD three same. */
560 I3616_ADD = 0x0e208400,
561 I3616_AND = 0x0e201c00,
562 I3616_BIC = 0x0e601c00,
a9e434a5
RH
563 I3616_BIF = 0x2ee01c00,
564 I3616_BIT = 0x2ea01c00,
565 I3616_BSL = 0x2e601c00,
14e4c1e2
RH
566 I3616_EOR = 0x2e201c00,
567 I3616_MUL = 0x0e209c00,
568 I3616_ORR = 0x0ea01c00,
569 I3616_ORN = 0x0ee01c00,
570 I3616_SUB = 0x2e208400,
571 I3616_CMGT = 0x0e203400,
572 I3616_CMGE = 0x0e203c00,
573 I3616_CMTST = 0x0e208c00,
574 I3616_CMHI = 0x2e203400,
575 I3616_CMHS = 0x2e203c00,
576 I3616_CMEQ = 0x2e208c00,
93f332a5
RH
577 I3616_SMAX = 0x0e206400,
578 I3616_SMIN = 0x0e206c00,
79525dfd 579 I3616_SSHL = 0x0e204400,
d32648d4
RH
580 I3616_SQADD = 0x0e200c00,
581 I3616_SQSUB = 0x0e202c00,
93f332a5
RH
582 I3616_UMAX = 0x2e206400,
583 I3616_UMIN = 0x2e206c00,
d32648d4
RH
584 I3616_UQADD = 0x2e200c00,
585 I3616_UQSUB = 0x2e202c00,
79525dfd 586 I3616_USHL = 0x2e204400,
14e4c1e2
RH
587
588 /* AdvSIMD two-reg misc. */
589 I3617_CMGT0 = 0x0e208800,
590 I3617_CMEQ0 = 0x0e209800,
591 I3617_CMLT0 = 0x0e20a800,
592 I3617_CMGE0 = 0x2e208800,
6c2c7772 593 I3617_CMLE0 = 0x2e209800,
14e4c1e2 594 I3617_NOT = 0x2e205800,
a456394a 595 I3617_ABS = 0x0e20b800,
14e4c1e2
RH
596 I3617_NEG = 0x2e20b800,
597
c7a59c2a 598 /* System instructions. */
14e4c1e2 599 NOP = 0xd503201f,
c7a59c2a
PK
600 DMB_ISH = 0xd50338bf,
601 DMB_LD = 0x00000100,
602 DMB_ST = 0x00000200,
5826a0db
RH
603
604 BTI_C = 0xd503245f,
605 BTI_J = 0xd503249f,
606 BTI_JC = 0xd50324df,
50573c66 607} AArch64Insn;
4a136e0a 608
4a136e0a
CF
609static inline uint32_t tcg_in32(TCGContext *s)
610{
611 uint32_t v = *(uint32_t *)s->code_ptr;
612 return v;
613}
614
50573c66
RH
615/* Emit an opcode with "type-checking" of the format. */
616#define tcg_out_insn(S, FMT, OP, ...) \
617 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
618
f23e5e15
RH
619static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
620 TCGReg rt, TCGReg rn, unsigned size)
621{
622 tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
623}
624
625static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
626 int imm19, TCGReg rt)
2acee8b2
PK
627{
628 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
629}
630
929124ec
RH
631static void tcg_out_insn_3306(TCGContext *s, AArch64Insn insn, TCGReg rs,
632 TCGReg rt, TCGReg rt2, TCGReg rn)
633{
634 tcg_out32(s, insn | rs << 16 | rt2 << 10 | rn << 5 | rt);
635}
636
3d9e69a2
RH
637static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
638 TCGReg rt, int imm19)
639{
640 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
641}
642
81d8a5ee
RH
643static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
644 TCGCond c, int imm19)
645{
646 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
647}
648
649static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
650{
651 tcg_out32(s, insn | (imm26 & 0x03ffffff));
652}
653
654static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
655{
656 tcg_out32(s, insn | rn << 5);
657}
658
95f72aa9
RH
659static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
660 TCGReg r1, TCGReg r2, TCGReg rn,
661 tcg_target_long ofs, bool pre, bool w)
662{
663 insn |= 1u << 31; /* ext */
664 insn |= pre << 24;
665 insn |= w << 23;
666
eabb7b91 667 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
95f72aa9
RH
668 insn |= (ofs & (0x7f << 3)) << (15 - 3);
669
670 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
671}
672
096c46c0
RH
673static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
674 TCGReg rd, TCGReg rn, uint64_t aimm)
675{
676 if (aimm > 0xfff) {
eabb7b91 677 tcg_debug_assert((aimm & 0xfff) == 0);
096c46c0 678 aimm >>= 12;
eabb7b91 679 tcg_debug_assert(aimm <= 0xfff);
096c46c0
RH
680 aimm |= 1 << 12; /* apply LSL 12 */
681 }
682 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
683}
684
e029f293
RH
685/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
686 (Logical immediate). Both insn groups have N, IMMR and IMMS fields
687 that feed the DecodeBitMasks pseudo function. */
688static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
689 TCGReg rd, TCGReg rn, int n, int immr, int imms)
690{
691 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
692 | rn << 5 | rd);
693}
694
695#define tcg_out_insn_3404 tcg_out_insn_3402
696
b3c56df7
RH
697static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
698 TCGReg rd, TCGReg rn, TCGReg rm, int imms)
699{
700 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
701 | rn << 5 | rd);
702}
703
582ab779
RH
704/* This function is used for the Move (wide immediate) instruction group.
705 Note that SHIFT is a full shift count, not the 2 bit HW field. */
706static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
707 TCGReg rd, uint16_t half, unsigned shift)
708{
eabb7b91 709 tcg_debug_assert((shift & ~0x30) == 0);
582ab779
RH
710 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
711}
712
c6e310d9
RH
713static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
714 TCGReg rd, int64_t disp)
715{
716 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
717}
718
929124ec
RH
719static inline void tcg_out_insn_3501(TCGContext *s, AArch64Insn insn,
720 TCGType sf, TCGReg rd, TCGReg rn,
721 TCGReg rm, int opt, int imm3)
722{
723 tcg_out32(s, insn | sf << 31 | rm << 16 | opt << 13 |
724 imm3 << 10 | rn << 5 | rd);
725}
726
50573c66
RH
727/* This function is for both 3.5.2 (Add/Subtract shifted register), for
728 the rare occasion when we actually want to supply a shift amount. */
729static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
730 TCGType ext, TCGReg rd, TCGReg rn,
731 TCGReg rm, int imm6)
732{
733 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
734}
735
736/* This function is for 3.5.2 (Add/subtract shifted register),
737 and 3.5.10 (Logical shifted register), for the vast majorty of cases
738 when we don't want to apply a shift. Thus it can also be used for
739 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */
740static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
741 TCGReg rd, TCGReg rn, TCGReg rm)
742{
743 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
744}
745
746#define tcg_out_insn_3503 tcg_out_insn_3502
747#define tcg_out_insn_3508 tcg_out_insn_3502
748#define tcg_out_insn_3510 tcg_out_insn_3502
749
04ce397b
RH
750static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
751 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
752{
753 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
754 | tcg_cond_to_aarch64[c] << 12);
755}
756
edd8824c
RH
757static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
758 TCGReg rd, TCGReg rn)
759{
760 tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
761}
762
8678b71c
RH
763static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
764 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
765{
766 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
767}
768
14e4c1e2
RH
769static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
770 TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
771{
772 /* Note that bit 11 set means general register input. Therefore
773 we can handle both register sets with one function. */
774 tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
775 | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
776}
777
778static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
779 TCGReg rd, bool op, int cmode, uint8_t imm8)
780{
781 tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
782 | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
783}
784
d81bad24
RH
785static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn,
786 TCGReg rd, TCGReg rn, unsigned immhb)
787{
788 tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f));
789}
790
791static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn,
792 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
793{
794 tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16
795 | (rn & 0x1f) << 5 | (rd & 0x1f));
796}
797
798static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn,
799 unsigned size, TCGReg rd, TCGReg rn)
800{
801 tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f));
802}
803
14e4c1e2
RH
804static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
805 TCGReg rd, TCGReg rn, unsigned immhb)
806{
807 tcg_out32(s, insn | q << 30 | immhb << 16
808 | (rn & 0x1f) << 5 | (rd & 0x1f));
809}
810
811static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
812 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
813{
814 tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
815 | (rn & 0x1f) << 5 | (rd & 0x1f));
816}
817
818static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
819 unsigned size, TCGReg rd, TCGReg rn)
820{
821 tcg_out32(s, insn | q << 30 | (size << 22)
822 | (rn & 0x1f) << 5 | (rd & 0x1f));
823}
824
3d4299f4 825static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
6c0f0c0f
PB
826 TCGReg rd, TCGReg base, TCGType ext,
827 TCGReg regoff)
3d4299f4
RH
828{
829 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
6c0f0c0f 830 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
14e4c1e2 831 0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
3d4299f4 832}
50573c66 833
3d4299f4
RH
834static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
835 TCGReg rd, TCGReg rn, intptr_t offset)
4a136e0a 836{
14e4c1e2 837 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
4a136e0a
CF
838}
839
3d4299f4
RH
840static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
841 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
b1f6dc0d 842{
3d4299f4 843 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
14e4c1e2
RH
844 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
845 | rn << 5 | (rd & 0x1f));
b1f6dc0d
CF
846}
847
5826a0db
RH
848static void tcg_out_bti(TCGContext *s, AArch64Insn insn)
849{
850 /*
851 * While BTI insns are nops on hosts without FEAT_BTI,
852 * there is no point in emitting them in that case either.
853 */
854 if (cpuinfo & CPUINFO_BTI) {
855 tcg_out32(s, insn);
856 }
857}
858
7d11fc7c
RH
859/* Register to register move using ORR (shifted register with no shift). */
860static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
4a136e0a 861{
7d11fc7c
RH
862 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
863}
864
865/* Register to register move using ADDI (move to/from SP). */
866static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
867{
868 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
4a136e0a
CF
869}
870
4ec4f0bd
RH
871/* This function is used for the Logical (immediate) instruction group.
872 The value of LIMM must satisfy IS_LIMM. See the comment above about
873 only supporting simplified logical immediates. */
874static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
875 TCGReg rd, TCGReg rn, uint64_t limm)
876{
877 unsigned h, l, r, c;
878
eabb7b91 879 tcg_debug_assert(is_limm(limm));
4ec4f0bd
RH
880
881 h = clz64(limm);
882 l = ctz64(limm);
883 if (l == 0) {
884 r = 0; /* form 0....01....1 */
885 c = ctz64(~limm) - 1;
886 if (h == 0) {
887 r = clz64(~limm); /* form 1..10..01..1 */
888 c += r;
889 }
890 } else {
891 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */
892 c = r - h - 1;
893 }
894 if (ext == TCG_TYPE_I32) {
895 r &= 31;
896 c &= 31;
897 }
898
899 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
900}
901
4e186175
RH
902static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
903 TCGReg rd, int64_t v64)
14e4c1e2 904{
984fdcee
RH
905 bool q = type == TCG_TYPE_V128;
906 int cmode, imm8, i;
907
908 /* Test all bytes equal first. */
4e186175 909 if (vece == MO_8) {
984fdcee
RH
910 imm8 = (uint8_t)v64;
911 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
912 return;
913 }
914
915 /*
916 * Test all bytes 0x00 or 0xff second. This can match cases that
917 * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
918 */
919 for (i = imm8 = 0; i < 8; i++) {
920 uint8_t byte = v64 >> (i * 8);
921 if (byte == 0xff) {
922 imm8 |= 1 << i;
923 } else if (byte != 0) {
924 goto fail_bytes;
925 }
926 }
927 tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
928 return;
929 fail_bytes:
930
931 /*
932 * Tests for various replications. For each element width, if we
933 * cannot find an expansion there's no point checking a larger
934 * width because we already know by replication it cannot match.
935 */
4e186175 936 if (vece == MO_16) {
984fdcee
RH
937 uint16_t v16 = v64;
938
939 if (is_shimm16(v16, &cmode, &imm8)) {
940 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
941 return;
942 }
7e308e00
RH
943 if (is_shimm16(~v16, &cmode, &imm8)) {
944 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
945 return;
946 }
02f3a5b4
RH
947
948 /*
949 * Otherwise, all remaining constants can be loaded in two insns:
950 * rd = v16 & 0xff, rd |= v16 & 0xff00.
951 */
952 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
953 tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
954 return;
4e186175 955 } else if (vece == MO_32) {
984fdcee 956 uint32_t v32 = v64;
7e308e00 957 uint32_t n32 = ~v32;
984fdcee
RH
958
959 if (is_shimm32(v32, &cmode, &imm8) ||
960 is_soimm32(v32, &cmode, &imm8) ||
961 is_fimm32(v32, &cmode, &imm8)) {
962 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
963 return;
964 }
7e308e00
RH
965 if (is_shimm32(n32, &cmode, &imm8) ||
966 is_soimm32(n32, &cmode, &imm8)) {
967 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
968 return;
969 }
02f3a5b4
RH
970
971 /*
972 * Restrict the set of constants to those we can load with
973 * two instructions. Others we load from the pool.
974 */
975 i = is_shimm32_pair(v32, &cmode, &imm8);
976 if (i) {
977 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
978 tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
979 return;
980 }
981 i = is_shimm32_pair(n32, &cmode, &imm8);
982 if (i) {
983 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
984 tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
985 return;
986 }
984fdcee
RH
987 } else if (is_fimm64(v64, &cmode, &imm8)) {
988 tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
989 return;
990 }
14e4c1e2 991
984fdcee
RH
992 /*
993 * As a last resort, load from the constant pool. Sadly there
994 * is no LD1R (literal), so store the full 16-byte vector.
995 */
996 if (type == TCG_TYPE_V128) {
14e4c1e2
RH
997 new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
998 tcg_out_insn(s, 3305, LDR_v128, 0, rd);
999 } else {
1000 new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
1001 tcg_out_insn(s, 3305, LDR_v64, 0, rd);
1002 }
1003}
1004
e7632cfa
RH
1005static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
1006 TCGReg rd, TCGReg rs)
1007{
1008 int is_q = type - TCG_TYPE_V64;
1009 tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
1010 return true;
1011}
1012
d6ecb4a9
RH
1013static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
1014 TCGReg r, TCGReg base, intptr_t offset)
1015{
d67bcbdd 1016 TCGReg temp = TCG_REG_TMP0;
f23e5e15
RH
1017
1018 if (offset < -0xffffff || offset > 0xffffff) {
1019 tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
1020 tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
1021 base = temp;
1022 } else {
1023 AArch64Insn add_insn = I3401_ADDI;
1024
1025 if (offset < 0) {
1026 add_insn = I3401_SUBI;
1027 offset = -offset;
1028 }
1029 if (offset & 0xfff000) {
1030 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
1031 base = temp;
1032 }
1033 if (offset & 0xfff) {
1034 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
1035 base = temp;
1036 }
1037 }
1038 tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
1039 return true;
d6ecb4a9
RH
1040}
1041
582ab779
RH
1042static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
1043 tcg_target_long value)
4a136e0a 1044{
dfeb5fe7
RH
1045 tcg_target_long svalue = value;
1046 tcg_target_long ivalue = ~value;
55129955
RH
1047 tcg_target_long t0, t1, t2;
1048 int s0, s1;
1049 AArch64Insn opc;
dfeb5fe7 1050
14e4c1e2
RH
1051 switch (type) {
1052 case TCG_TYPE_I32:
1053 case TCG_TYPE_I64:
1054 tcg_debug_assert(rd < 32);
1055 break;
14e4c1e2
RH
1056 default:
1057 g_assert_not_reached();
1058 }
1059
dfeb5fe7
RH
1060 /* For 32-bit values, discard potential garbage in value. For 64-bit
1061 values within [2**31, 2**32-1], we can create smaller sequences by
1062 interpreting this as a negative 32-bit number, while ensuring that
1063 the high 32 bits are cleared by setting SF=0. */
1064 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1065 svalue = (int32_t)value;
582ab779 1066 value = (uint32_t)value;
dfeb5fe7
RH
1067 ivalue = (uint32_t)ivalue;
1068 type = TCG_TYPE_I32;
1069 }
1070
d8918df5
RH
1071 /* Speed things up by handling the common case of small positive
1072 and negative values specially. */
1073 if ((value & ~0xffffull) == 0) {
1074 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1075 return;
1076 } else if ((ivalue & ~0xffffull) == 0) {
1077 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1078 return;
1079 }
1080
4ec4f0bd
RH
1081 /* Check for bitfield immediates. For the benefit of 32-bit quantities,
1082 use the sign-extended value. That lets us match rotated values such
1083 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1084 if (is_limm(svalue)) {
1085 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1086 return;
1087 }
1088
c6e310d9
RH
1089 /* Look for host pointer values within 4G of the PC. This happens
1090 often when loading pointers to QEMU's own data structures. */
1091 if (type == TCG_TYPE_I64) {
ffba3eb3
RH
1092 intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
1093 tcg_target_long disp = value - src_rx;
cc74d332
RH
1094 if (disp == sextract64(disp, 0, 21)) {
1095 tcg_out_insn(s, 3406, ADR, rd, disp);
1096 return;
1097 }
ffba3eb3 1098 disp = (value >> 12) - (src_rx >> 12);
c6e310d9
RH
1099 if (disp == sextract64(disp, 0, 21)) {
1100 tcg_out_insn(s, 3406, ADRP, rd, disp);
1101 if (value & 0xfff) {
1102 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1103 }
1104 return;
1105 }
1106 }
1107
55129955
RH
1108 /* Would it take fewer insns to begin with MOVN? */
1109 if (ctpop64(value) >= 32) {
1110 t0 = ivalue;
1111 opc = I3405_MOVN;
8cf9a3d3 1112 } else {
55129955
RH
1113 t0 = value;
1114 opc = I3405_MOVZ;
1115 }
1116 s0 = ctz64(t0) & (63 & -16);
7ceee3a1 1117 t1 = t0 & ~(0xffffull << s0);
55129955 1118 s1 = ctz64(t1) & (63 & -16);
7ceee3a1 1119 t2 = t1 & ~(0xffffull << s1);
55129955
RH
1120 if (t2 == 0) {
1121 tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1122 if (t1 != 0) {
1123 tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
8cf9a3d3 1124 }
55129955 1125 return;
dfeb5fe7 1126 }
55129955
RH
1127
1128 /* For more than 2 insns, dump it into the constant pool. */
1129 new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1130 tcg_out_insn(s, 3305, LDR, 0, rd);
4a136e0a
CF
1131}
1132
767c2503
RH
1133static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
1134{
1135 return false;
1136}
1137
6a6d772e
RH
1138static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1139 tcg_target_long imm)
1140{
1141 /* This function is only used for passing structs by reference. */
1142 g_assert_not_reached();
1143}
1144
3d4299f4
RH
1145/* Define something more legible for general use. */
1146#define tcg_out_ldst_r tcg_out_insn_3310
4a136e0a 1147
14e4c1e2
RH
1148static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1149 TCGReg rn, intptr_t offset, int lgsize)
4a136e0a 1150{
3d4299f4
RH
1151 /* If the offset is naturally aligned and in range, then we can
1152 use the scaled uimm12 encoding */
14e4c1e2
RH
1153 if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1154 uintptr_t scaled_uimm = offset >> lgsize;
3d4299f4
RH
1155 if (scaled_uimm <= 0xfff) {
1156 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1157 return;
b1f6dc0d
CF
1158 }
1159 }
1160
a056c9fa
RH
1161 /* Small signed offsets can use the unscaled encoding. */
1162 if (offset >= -256 && offset < 256) {
1163 tcg_out_insn_3312(s, insn, rd, rn, offset);
1164 return;
1165 }
1166
3d4299f4 1167 /* Worst-case scenario, move offset to temp register, use reg offset. */
d67bcbdd
RH
1168 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, offset);
1169 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP0);
4a136e0a
CF
1170}
1171
78113e83 1172static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
4a136e0a 1173{
14e4c1e2 1174 if (ret == arg) {
78113e83 1175 return true;
14e4c1e2
RH
1176 }
1177 switch (type) {
1178 case TCG_TYPE_I32:
1179 case TCG_TYPE_I64:
1180 if (ret < 32 && arg < 32) {
1181 tcg_out_movr(s, type, ret, arg);
1182 break;
1183 } else if (ret < 32) {
1184 tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1185 break;
1186 } else if (arg < 32) {
1187 tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1188 break;
1189 }
1190 /* FALLTHRU */
1191
1192 case TCG_TYPE_V64:
1193 tcg_debug_assert(ret >= 32 && arg >= 32);
1194 tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1195 break;
1196 case TCG_TYPE_V128:
1197 tcg_debug_assert(ret >= 32 && arg >= 32);
1198 tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1199 break;
1200
1201 default:
1202 g_assert_not_reached();
4a136e0a 1203 }
78113e83 1204 return true;
4a136e0a
CF
1205}
1206
14e4c1e2
RH
1207static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1208 TCGReg base, intptr_t ofs)
4a136e0a 1209{
14e4c1e2
RH
1210 AArch64Insn insn;
1211 int lgsz;
1212
1213 switch (type) {
1214 case TCG_TYPE_I32:
1215 insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1216 lgsz = 2;
1217 break;
1218 case TCG_TYPE_I64:
1219 insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1220 lgsz = 3;
1221 break;
1222 case TCG_TYPE_V64:
1223 insn = I3312_LDRVD;
1224 lgsz = 3;
1225 break;
1226 case TCG_TYPE_V128:
1227 insn = I3312_LDRVQ;
1228 lgsz = 4;
1229 break;
1230 default:
1231 g_assert_not_reached();
1232 }
1233 tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
4a136e0a
CF
1234}
1235
14e4c1e2
RH
1236static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1237 TCGReg base, intptr_t ofs)
4a136e0a 1238{
14e4c1e2
RH
1239 AArch64Insn insn;
1240 int lgsz;
1241
1242 switch (type) {
1243 case TCG_TYPE_I32:
1244 insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1245 lgsz = 2;
1246 break;
1247 case TCG_TYPE_I64:
1248 insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1249 lgsz = 3;
1250 break;
1251 case TCG_TYPE_V64:
1252 insn = I3312_STRVD;
1253 lgsz = 3;
1254 break;
1255 case TCG_TYPE_V128:
1256 insn = I3312_STRVQ;
1257 lgsz = 4;
1258 break;
1259 default:
1260 g_assert_not_reached();
1261 }
1262 tcg_out_ldst(s, insn, src, base, ofs, lgsz);
4a136e0a
CF
1263}
1264
59d7c14e
RH
1265static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1266 TCGReg base, intptr_t ofs)
1267{
14e4c1e2 1268 if (type <= TCG_TYPE_I64 && val == 0) {
59d7c14e
RH
1269 tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1270 return true;
1271 }
1272 return false;
1273}
1274
b3c56df7
RH
1275static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1276 TCGReg rn, unsigned int a, unsigned int b)
1277{
1278 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1279}
1280
7763ffa0
RH
1281static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1282 TCGReg rn, unsigned int a, unsigned int b)
4a136e0a 1283{
b3c56df7 1284 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
4a136e0a
CF
1285}
1286
7763ffa0
RH
1287static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1288 TCGReg rn, unsigned int a, unsigned int b)
4a136e0a 1289{
b3c56df7 1290 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
4a136e0a
CF
1291}
1292
7763ffa0 1293static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
4a136e0a
CF
1294 TCGReg rn, TCGReg rm, unsigned int a)
1295{
b3c56df7 1296 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
4a136e0a
CF
1297}
1298
7763ffa0 1299static inline void tcg_out_shl(TCGContext *s, TCGType ext,
4a136e0a
CF
1300 TCGReg rd, TCGReg rn, unsigned int m)
1301{
b3c56df7
RH
1302 int bits = ext ? 64 : 32;
1303 int max = bits - 1;
94bcc91b 1304 tcg_out_ubfm(s, ext, rd, rn, (bits - m) & max, (max - m) & max);
4a136e0a
CF
1305}
1306
7763ffa0 1307static inline void tcg_out_shr(TCGContext *s, TCGType ext,
4a136e0a
CF
1308 TCGReg rd, TCGReg rn, unsigned int m)
1309{
1310 int max = ext ? 63 : 31;
1311 tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1312}
1313
7763ffa0 1314static inline void tcg_out_sar(TCGContext *s, TCGType ext,
4a136e0a
CF
1315 TCGReg rd, TCGReg rn, unsigned int m)
1316{
1317 int max = ext ? 63 : 31;
1318 tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1319}
1320
7763ffa0 1321static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
4a136e0a
CF
1322 TCGReg rd, TCGReg rn, unsigned int m)
1323{
1324 int max = ext ? 63 : 31;
1325 tcg_out_extr(s, ext, rd, rn, rn, m & max);
1326}
1327
7763ffa0 1328static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
4a136e0a
CF
1329 TCGReg rd, TCGReg rn, unsigned int m)
1330{
26b1248f
YK
1331 int max = ext ? 63 : 31;
1332 tcg_out_extr(s, ext, rd, rn, rn, -m & max);
4a136e0a
CF
1333}
1334
b3c56df7
RH
1335static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1336 TCGReg rn, unsigned lsb, unsigned width)
1337{
1338 unsigned size = ext ? 64 : 32;
1339 unsigned a = (size - lsb) & (size - 1);
1340 unsigned b = width - 1;
1341 tcg_out_bfm(s, ext, rd, rn, a, b);
1342}
1343
90f1cd91
RH
1344static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1345 tcg_target_long b, bool const_b)
4a136e0a 1346{
90f1cd91
RH
1347 if (const_b) {
1348 /* Using CMP or CMN aliases. */
1349 if (b >= 0) {
1350 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1351 } else {
1352 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1353 }
1354 } else {
1355 /* Using CMP alias SUBS wzr, Wn, Wm */
1356 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1357 }
4a136e0a
CF
1358}
1359
ffd0e507 1360static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
4a136e0a 1361{
ffba3eb3 1362 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
eabb7b91 1363 tcg_debug_assert(offset == sextract64(offset, 0, 26));
81d8a5ee 1364 tcg_out_insn(s, 3206, B, offset);
4a136e0a
CF
1365}
1366
cee44b03 1367static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target)
4a136e0a 1368{
ffba3eb3 1369 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
8587c30c 1370 if (offset == sextract64(offset, 0, 26)) {
81d8a5ee 1371 tcg_out_insn(s, 3206, BL, offset);
8587c30c 1372 } else {
d67bcbdd
RH
1373 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target);
1374 tcg_out_insn(s, 3207, BLR, TCG_REG_TMP0);
4a136e0a
CF
1375 }
1376}
1377
cee44b03
RH
1378static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
1379 const TCGHelperInfo *info)
1380{
1381 tcg_out_call_int(s, target);
1382}
1383
bec16311 1384static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
4a136e0a 1385{
4a136e0a 1386 if (!l->has_value) {
bec16311 1387 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
733589b3 1388 tcg_out_insn(s, 3206, B, 0);
4a136e0a 1389 } else {
8587c30c 1390 tcg_out_goto(s, l->u.value_ptr);
4a136e0a
CF
1391 }
1392}
1393
dc1eccd6 1394static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
bec16311 1395 TCGArg b, bool b_const, TCGLabel *l)
4a136e0a 1396{
cae1f6f3 1397 intptr_t offset;
3d9e69a2 1398 bool need_cmp;
cae1f6f3 1399
3d9e69a2
RH
1400 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1401 need_cmp = false;
1402 } else {
1403 need_cmp = true;
1404 tcg_out_cmp(s, ext, a, b, b_const);
1405 }
4a136e0a
CF
1406
1407 if (!l->has_value) {
bec16311 1408 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
cae1f6f3 1409 offset = tcg_in32(s) >> 5;
4a136e0a 1410 } else {
ffba3eb3 1411 offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2;
eabb7b91 1412 tcg_debug_assert(offset == sextract64(offset, 0, 19));
4a136e0a 1413 }
cae1f6f3 1414
3d9e69a2
RH
1415 if (need_cmp) {
1416 tcg_out_insn(s, 3202, B_C, c, offset);
1417 } else if (c == TCG_COND_EQ) {
1418 tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1419 } else {
1420 tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1421 }
4a136e0a
CF
1422}
1423
dfa24dfa
RH
1424static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits,
1425 TCGReg rd, TCGReg rn)
edd8824c 1426{
dfa24dfa
RH
1427 /* REV, REV16, REV32 */
1428 tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn);
9c4a059d
CF
1429}
1430
14776ab5 1431static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
31f1275b
CF
1432 TCGReg rd, TCGReg rn)
1433{
b3c56df7 1434 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
929f8b55 1435 int bits = (8 << s_bits) - 1;
31f1275b
CF
1436 tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1437}
1438
678155b2
RH
1439static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1440{
1441 tcg_out_sxt(s, type, MO_8, rd, rn);
1442}
1443
753e42ea
RH
1444static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1445{
1446 tcg_out_sxt(s, type, MO_16, rd, rn);
1447}
1448
52bf3398
RH
1449static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn)
1450{
1451 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, rd, rn);
1452}
1453
9c6aa274
RH
1454static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1455{
1456 tcg_out_ext32s(s, rd, rn);
1457}
1458
14776ab5 1459static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
31f1275b
CF
1460 TCGReg rd, TCGReg rn)
1461{
b3c56df7 1462 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
929f8b55 1463 int bits = (8 << s_bits) - 1;
31f1275b
CF
1464 tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1465}
1466
d0e66c89
RH
1467static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn)
1468{
1469 tcg_out_uxt(s, MO_8, rd, rn);
1470}
1471
379afdff
RH
1472static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn)
1473{
1474 tcg_out_uxt(s, MO_16, rd, rn);
1475}
1476
9ecf5f61
RH
1477static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rn)
1478{
1479 tcg_out_movr(s, TCG_TYPE_I32, rd, rn);
1480}
1481
b9bfe000
RH
1482static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1483{
1484 tcg_out_ext32u(s, rd, rn);
1485}
1486
b8b94ac6
RH
1487static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn)
1488{
1489 tcg_out_mov(s, TCG_TYPE_I32, rd, rn);
1490}
1491
90f1cd91
RH
1492static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1493 TCGReg rn, int64_t aimm)
1494{
1495 if (aimm >= 0) {
1496 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1497 } else {
1498 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1499 }
1500}
1501
707b45a2
RH
1502static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1503 TCGReg rh, TCGReg al, TCGReg ah,
1504 tcg_target_long bl, tcg_target_long bh,
1505 bool const_bl, bool const_bh, bool sub)
c6e929e7
RH
1506{
1507 TCGReg orig_rl = rl;
1508 AArch64Insn insn;
1509
1510 if (rl == ah || (!const_bh && rl == bh)) {
d67bcbdd 1511 rl = TCG_REG_TMP0;
c6e929e7
RH
1512 }
1513
1514 if (const_bl) {
707b45a2 1515 if (bl < 0) {
c6e929e7 1516 bl = -bl;
707b45a2
RH
1517 insn = sub ? I3401_ADDSI : I3401_SUBSI;
1518 } else {
1519 insn = sub ? I3401_SUBSI : I3401_ADDSI;
c6e929e7 1520 }
707b45a2 1521
b1eb20da
RH
1522 if (unlikely(al == TCG_REG_XZR)) {
1523 /* ??? We want to allow al to be zero for the benefit of
1524 negation via subtraction. However, that leaves open the
1525 possibility of adding 0+const in the low part, and the
1526 immediate add instructions encode XSP not XZR. Don't try
1527 anything more elaborate here than loading another zero. */
d67bcbdd 1528 al = TCG_REG_TMP0;
b1eb20da
RH
1529 tcg_out_movi(s, ext, al, 0);
1530 }
c6e929e7
RH
1531 tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1532 } else {
1533 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1534 }
1535
1536 insn = I3503_ADC;
1537 if (const_bh) {
1538 /* Note that the only two constants we support are 0 and -1, and
1539 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
1540 if ((bh != 0) ^ sub) {
1541 insn = I3503_SBC;
1542 }
1543 bh = TCG_REG_XZR;
1544 } else if (sub) {
1545 insn = I3503_SBC;
1546 }
1547 tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1548
b825025f 1549 tcg_out_mov(s, ext, orig_rl, rl);
c6e929e7
RH
1550}
1551
c7a59c2a
PK
1552static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1553{
1554 static const uint32_t sync[] = {
1555 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST,
1556 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST,
1557 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1558 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD,
1559 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1560 };
1561 tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1562}
1563
53c76c19
RH
1564static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1565 TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1566{
1567 TCGReg a1 = a0;
1568 if (is_ctz) {
d67bcbdd 1569 a1 = TCG_REG_TMP0;
53c76c19
RH
1570 tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1571 }
1572 if (const_b && b == (ext ? 64 : 32)) {
1573 tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1574 } else {
1575 AArch64Insn sel = I3506_CSEL;
1576
1577 tcg_out_cmp(s, ext, a0, 0, 1);
d67bcbdd 1578 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP0, a1);
53c76c19
RH
1579
1580 if (const_b) {
1581 if (b == -1) {
1582 b = TCG_REG_XZR;
1583 sel = I3506_CSINV;
1584 } else if (b == 0) {
1585 b = TCG_REG_XZR;
1586 } else {
1587 tcg_out_movi(s, ext, d, b);
1588 b = d;
1589 }
1590 }
d67bcbdd 1591 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP0, b, TCG_COND_NE);
53c76c19
RH
1592 }
1593}
1594
7f65be51
RH
1595typedef struct {
1596 TCGReg base;
1597 TCGReg index;
1598 TCGType index_ext;
64741d99 1599 TCGAtomAlign aa;
7f65be51
RH
1600} HostAddress;
1601
7b880107
RH
1602bool tcg_target_has_memory_bswap(MemOp memop)
1603{
1604 return false;
1605}
1606
6e96422b 1607static const TCGLdstHelperParam ldst_helper_param = {
d67bcbdd 1608 .ntmp = 1, .tmp = { TCG_REG_TMP0 }
6e96422b
RH
1609};
1610
aeee05f5 1611static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
c6d8ed24 1612{
6e96422b 1613 MemOp opc = get_memop(lb->oi);
929f8b55 1614
ffba3eb3 1615 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
aeee05f5
RH
1616 return false;
1617 }
017a86f7 1618
6e96422b 1619 tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
cee44b03 1620 tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
6e96422b 1621 tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
8587c30c 1622 tcg_out_goto(s, lb->raddr);
aeee05f5 1623 return true;
c6d8ed24
JK
1624}
1625
aeee05f5 1626static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
c6d8ed24 1627{
6e96422b 1628 MemOp opc = get_memop(lb->oi);
929f8b55 1629
ffba3eb3 1630 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
aeee05f5
RH
1631 return false;
1632 }
c6d8ed24 1633
6e96422b 1634 tcg_out_st_helper_args(s, lb, &ldst_helper_param);
cee44b03 1635 tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]);
8587c30c 1636 tcg_out_goto(s, lb->raddr);
aeee05f5 1637 return true;
c6d8ed24 1638}
65b23204 1639
d0a9bb5e
RH
1640/* We expect to use a 7-bit scaled negative offset from ENV. */
1641#define MIN_TLB_MASK_TABLE_OFS -512
1642
1e612dd6 1643/*
7893e42d
PMD
1644 * For system-mode, perform the TLB load and compare.
1645 * For user-mode, perform any required alignment tests.
1e612dd6
RH
1646 * In both cases, return a TCGLabelQemuLdst structure if the slow path
1647 * is required and fill in @h with the host address for the fast path.
1648 */
1649static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
1650 TCGReg addr_reg, MemOpIdx oi,
1651 bool is_ld)
c6d8ed24 1652{
60c452a1 1653 TCGType addr_type = s->addr_type;
1e612dd6
RH
1654 TCGLabelQemuLdst *ldst = NULL;
1655 MemOp opc = get_memop(oi);
929124ec 1656 MemOp s_bits = opc & MO_SIZE;
64741d99
RH
1657 unsigned a_mask;
1658
1659 h->aa = atom_and_align_for_opc(s, opc,
1660 have_lse2 ? MO_ATOM_WITHIN16
1661 : MO_ATOM_IFALIGN,
929124ec 1662 s_bits == MO_128);
64741d99 1663 a_mask = (1 << h->aa.align) - 1;
1e612dd6 1664
e2b7a40d
RH
1665 if (tcg_use_softmmu) {
1666 unsigned s_mask = (1u << s_bits) - 1;
1667 unsigned mem_index = get_mmuidx(oi);
1668 TCGReg addr_adj;
1669 TCGType mask_type;
1670 uint64_t compare_mask;
9ee14902 1671
e2b7a40d
RH
1672 ldst = new_ldst_label(s);
1673 ldst->is_ld = is_ld;
1674 ldst->oi = oi;
1675 ldst->addrlo_reg = addr_reg;
6f472467 1676
e2b7a40d
RH
1677 mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32
1678 ? TCG_TYPE_I64 : TCG_TYPE_I32);
6f472467 1679
e2b7a40d
RH
1680 /* Load cpu->neg.tlb.f[mmu_idx].{mask,table} into {tmp0,tmp1}. */
1681 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1682 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1683 tcg_out_insn(s, 3314, LDP, TCG_REG_TMP0, TCG_REG_TMP1, TCG_AREG0,
1684 tlb_mask_table_ofs(s, mem_index), 1, 0);
6f472467 1685
e2b7a40d
RH
1686 /* Extract the TLB index from the address into X0. */
1687 tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1688 TCG_REG_TMP0, TCG_REG_TMP0, addr_reg,
1689 s->page_bits - CPU_TLB_ENTRY_BITS);
c6d8ed24 1690
e2b7a40d
RH
1691 /* Add the tlb_table pointer, forming the CPUTLBEntry address. */
1692 tcg_out_insn(s, 3502, ADD, 1, TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP0);
f85ab3d2 1693
e2b7a40d
RH
1694 /* Load the tlb comparator into TMP0, and the fast path addend. */
1695 QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN);
1696 tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP1,
1697 is_ld ? offsetof(CPUTLBEntry, addr_read)
1698 : offsetof(CPUTLBEntry, addr_write));
1699 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
1700 offsetof(CPUTLBEntry, addend));
1701
1702 /*
1703 * For aligned accesses, we check the first byte and include
1704 * the alignment bits within the address. For unaligned access,
1705 * we check that we don't cross pages using the address of the
1706 * last byte of the access.
1707 */
1708 if (a_mask >= s_mask) {
1709 addr_adj = addr_reg;
1710 } else {
1711 addr_adj = TCG_REG_TMP2;
1712 tcg_out_insn(s, 3401, ADDI, addr_type,
1713 addr_adj, addr_reg, s_mask - a_mask);
1714 }
1715 compare_mask = (uint64_t)s->page_mask | a_mask;
1716
1717 /* Store the page mask part of the address into TMP2. */
1718 tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_TMP2,
1719 addr_adj, compare_mask);
f85ab3d2 1720
e2b7a40d
RH
1721 /* Perform the address comparison. */
1722 tcg_out_cmp(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2, 0);
f85ab3d2 1723
e2b7a40d 1724 /* If not equal, we jump to the slow path. */
1e612dd6
RH
1725 ldst->label_ptr[0] = s->code_ptr;
1726 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
f85ab3d2 1727
e2b7a40d 1728 h->base = TCG_REG_TMP1;
64741d99
RH
1729 h->index = addr_reg;
1730 h->index_ext = addr_type;
1e612dd6 1731 } else {
e2b7a40d
RH
1732 if (a_mask) {
1733 ldst = new_ldst_label(s);
1734
1735 ldst->is_ld = is_ld;
1736 ldst->oi = oi;
1737 ldst->addrlo_reg = addr_reg;
1738
1739 /* tst addr, #mask */
1740 tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
1741
1742 /* b.ne slow_path */
1743 ldst->label_ptr[0] = s->code_ptr;
1744 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1745 }
1746
1747 if (guest_base || addr_type == TCG_TYPE_I32) {
1748 h->base = TCG_REG_GUEST_BASE;
1749 h->index = addr_reg;
1750 h->index_ext = addr_type;
1751 } else {
1752 h->base = addr_reg;
1753 h->index = TCG_REG_XZR;
1754 h->index_ext = TCG_TYPE_I64;
1755 }
1e612dd6 1756 }
f85ab3d2 1757
1e612dd6 1758 return ldst;
f85ab3d2 1759}
6a91c7c9 1760
14776ab5 1761static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
7f65be51 1762 TCGReg data_r, HostAddress h)
6a91c7c9 1763{
9e4177ad
RH
1764 switch (memop & MO_SSIZE) {
1765 case MO_UB:
7f65be51 1766 tcg_out_ldst_r(s, I3312_LDRB, data_r, h.base, h.index_ext, h.index);
6a91c7c9 1767 break;
9e4177ad 1768 case MO_SB:
9c53889b 1769 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
7f65be51 1770 data_r, h.base, h.index_ext, h.index);
6a91c7c9 1771 break;
9e4177ad 1772 case MO_UW:
7f65be51 1773 tcg_out_ldst_r(s, I3312_LDRH, data_r, h.base, h.index_ext, h.index);
6a91c7c9 1774 break;
9e4177ad 1775 case MO_SW:
51c559c7 1776 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
7f65be51 1777 data_r, h.base, h.index_ext, h.index);
6a91c7c9 1778 break;
9e4177ad 1779 case MO_UL:
7f65be51 1780 tcg_out_ldst_r(s, I3312_LDRW, data_r, h.base, h.index_ext, h.index);
6a91c7c9 1781 break;
9e4177ad 1782 case MO_SL:
7f65be51 1783 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, h.base, h.index_ext, h.index);
6a91c7c9 1784 break;
fc313c64 1785 case MO_UQ:
7f65be51 1786 tcg_out_ldst_r(s, I3312_LDRX, data_r, h.base, h.index_ext, h.index);
6a91c7c9
JK
1787 break;
1788 default:
732e89f4 1789 g_assert_not_reached();
6a91c7c9
JK
1790 }
1791}
1792
14776ab5 1793static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
7f65be51 1794 TCGReg data_r, HostAddress h)
6a91c7c9 1795{
9e4177ad
RH
1796 switch (memop & MO_SIZE) {
1797 case MO_8:
7f65be51 1798 tcg_out_ldst_r(s, I3312_STRB, data_r, h.base, h.index_ext, h.index);
6a91c7c9 1799 break;
9e4177ad 1800 case MO_16:
7f65be51 1801 tcg_out_ldst_r(s, I3312_STRH, data_r, h.base, h.index_ext, h.index);
6a91c7c9 1802 break;
9e4177ad 1803 case MO_32:
7f65be51 1804 tcg_out_ldst_r(s, I3312_STRW, data_r, h.base, h.index_ext, h.index);
6a91c7c9 1805 break;
9e4177ad 1806 case MO_64:
7f65be51 1807 tcg_out_ldst_r(s, I3312_STRX, data_r, h.base, h.index_ext, h.index);
6a91c7c9
JK
1808 break;
1809 default:
732e89f4 1810 g_assert_not_reached();
6a91c7c9
JK
1811 }
1812}
4a136e0a 1813
667b1cdd 1814static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
ff0cc85e 1815 MemOpIdx oi, TCGType data_type)
4a136e0a 1816{
1e612dd6 1817 TCGLabelQemuLdst *ldst;
7f65be51 1818 HostAddress h;
f85ab3d2 1819
1e612dd6
RH
1820 ldst = prepare_host_addr(s, &h, addr_reg, oi, true);
1821 tcg_out_qemu_ld_direct(s, get_memop(oi), data_type, data_reg, h);
f85ab3d2 1822
1e612dd6
RH
1823 if (ldst) {
1824 ldst->type = data_type;
1825 ldst->datalo_reg = data_reg;
1826 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
352bcb0a 1827 }
4a136e0a
CF
1828}
1829
667b1cdd 1830static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
ff0cc85e 1831 MemOpIdx oi, TCGType data_type)
4a136e0a 1832{
1e612dd6 1833 TCGLabelQemuLdst *ldst;
7f65be51 1834 HostAddress h;
f85ab3d2 1835
1e612dd6
RH
1836 ldst = prepare_host_addr(s, &h, addr_reg, oi, false);
1837 tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h);
7f65be51 1838
1e612dd6
RH
1839 if (ldst) {
1840 ldst->type = data_type;
1841 ldst->datalo_reg = data_reg;
1842 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
f85ab3d2 1843 }
4a136e0a
CF
1844}
1845
929124ec
RH
1846static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
1847 TCGReg addr_reg, MemOpIdx oi, bool is_ld)
1848{
1849 TCGLabelQemuLdst *ldst;
1850 HostAddress h;
1851 TCGReg base;
1852 bool use_pair;
1853
1854 ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
1855
1856 /* Compose the final address, as LDP/STP have no indexing. */
1857 if (h.index == TCG_REG_XZR) {
1858 base = h.base;
1859 } else {
1860 base = TCG_REG_TMP2;
1861 if (h.index_ext == TCG_TYPE_I32) {
1862 /* add base, base, index, uxtw */
1863 tcg_out_insn(s, 3501, ADD, TCG_TYPE_I64, base,
1864 h.base, h.index, MO_32, 0);
1865 } else {
1866 /* add base, base, index */
1867 tcg_out_insn(s, 3502, ADD, 1, base, h.base, h.index);
1868 }
1869 }
1870
1871 use_pair = h.aa.atom < MO_128 || have_lse2;
1872
1873 if (!use_pair) {
1874 tcg_insn_unit *branch = NULL;
1875 TCGReg ll, lh, sl, sh;
1876
1877 /*
1878 * If we have already checked for 16-byte alignment, that's all
1879 * we need. Otherwise we have determined that misaligned atomicity
1880 * may be handled with two 8-byte loads.
1881 */
1882 if (h.aa.align < MO_128) {
1883 /*
1884 * TODO: align should be MO_64, so we only need test bit 3,
1885 * which means we could use TBNZ instead of ANDS+B_C.
1886 */
1887 tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, 15);
1888 branch = s->code_ptr;
1889 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1890 use_pair = true;
1891 }
1892
1893 if (is_ld) {
1894 /*
1895 * 16-byte atomicity without LSE2 requires LDXP+STXP loop:
1896 * ldxp lo, hi, [base]
1897 * stxp t0, lo, hi, [base]
1898 * cbnz t0, .-8
1899 * Require no overlap between data{lo,hi} and base.
1900 */
1901 if (datalo == base || datahi == base) {
1902 tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_TMP2, base);
1903 base = TCG_REG_TMP2;
1904 }
1905 ll = sl = datalo;
1906 lh = sh = datahi;
1907 } else {
1908 /*
1909 * 16-byte atomicity without LSE2 requires LDXP+STXP loop:
1910 * 1: ldxp t0, t1, [base]
1911 * stxp t0, lo, hi, [base]
1912 * cbnz t0, 1b
1913 */
1914 tcg_debug_assert(base != TCG_REG_TMP0 && base != TCG_REG_TMP1);
1915 ll = TCG_REG_TMP0;
1916 lh = TCG_REG_TMP1;
1917 sl = datalo;
1918 sh = datahi;
1919 }
1920
1921 tcg_out_insn(s, 3306, LDXP, TCG_REG_XZR, ll, lh, base);
1922 tcg_out_insn(s, 3306, STXP, TCG_REG_TMP0, sl, sh, base);
1923 tcg_out_insn(s, 3201, CBNZ, 0, TCG_REG_TMP0, -2);
1924
1925 if (use_pair) {
1926 /* "b .+8", branching across the one insn of use_pair. */
1927 tcg_out_insn(s, 3206, B, 2);
1928 reloc_pc19(branch, tcg_splitwx_to_rx(s->code_ptr));
1929 }
1930 }
1931
1932 if (use_pair) {
1933 if (is_ld) {
1934 tcg_out_insn(s, 3314, LDP, datalo, datahi, base, 0, 1, 0);
1935 } else {
1936 tcg_out_insn(s, 3314, STP, datalo, datahi, base, 0, 1, 0);
1937 }
1938 }
1939
1940 if (ldst) {
1941 ldst->type = TCG_TYPE_I128;
1942 ldst->datalo_reg = datalo;
1943 ldst->datahi_reg = datahi;
1944 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1945 }
1946}
1947
ffba3eb3 1948static const tcg_insn_unit *tb_ret_addr;
4a136e0a 1949
b55a8d9d
RH
1950static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
1951{
5826a0db
RH
1952 const tcg_insn_unit *target;
1953 ptrdiff_t offset;
1954
b55a8d9d
RH
1955 /* Reuse the zeroing that exists for goto_ptr. */
1956 if (a0 == 0) {
5826a0db 1957 target = tcg_code_gen_epilogue;
b55a8d9d
RH
1958 } else {
1959 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
5826a0db
RH
1960 target = tb_ret_addr;
1961 }
1962
1963 offset = tcg_pcrel_diff(s, target) >> 2;
1964 if (offset == sextract64(offset, 0, 26)) {
1965 tcg_out_insn(s, 3206, B, offset);
1966 } else {
1967 /*
1968 * Only x16/x17 generate BTI type Jump (2),
1969 * other registers generate BTI type Jump|Call (3).
1970 */
1971 QEMU_BUILD_BUG_ON(TCG_REG_TMP0 != TCG_REG_X16);
1972 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target);
1973 tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
b55a8d9d
RH
1974 }
1975}
1976
cf7d6b8e
RH
1977static void tcg_out_goto_tb(TCGContext *s, int which)
1978{
1979 /*
d59d83a1
RH
1980 * Direct branch, or indirect address load, will be patched
1981 * by tb_target_set_jmp_target. Assert indirect load offset
1982 * in range early, regardless of direct branch distance.
cf7d6b8e 1983 */
d59d83a1
RH
1984 intptr_t i_off = tcg_pcrel_diff(s, (void *)get_jmp_target_addr(s, which));
1985 tcg_debug_assert(i_off == sextract64(i_off, 0, 21));
1986
cf7d6b8e 1987 set_jmp_insn_offset(s, which);
d59d83a1 1988 tcg_out32(s, I3206_B);
d67bcbdd 1989 tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
cf7d6b8e 1990 set_jmp_reset_offset(s, which);
5826a0db 1991 tcg_out_bti(s, BTI_J);
cf7d6b8e
RH
1992}
1993
d59d83a1
RH
1994void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
1995 uintptr_t jmp_rx, uintptr_t jmp_rw)
1996{
1997 uintptr_t d_addr = tb->jmp_target_addr[n];
1998 ptrdiff_t d_offset = d_addr - jmp_rx;
1999 tcg_insn_unit insn;
2000
2001 /* Either directly branch, or indirect branch load. */
2002 if (d_offset == sextract64(d_offset, 0, 28)) {
2003 insn = deposit32(I3206_B, 0, 26, d_offset >> 2);
2004 } else {
2005 uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n];
2006 ptrdiff_t i_offset = i_addr - jmp_rx;
2007
2008 /* Note that we asserted this in range in tcg_out_goto_tb. */
d67bcbdd 2009 insn = deposit32(I3305_LDR | TCG_REG_TMP0, 5, 19, i_offset >> 2);
d59d83a1
RH
2010 }
2011 qatomic_set((uint32_t *)jmp_rw, insn);
2012 flush_idcache_range(jmp_rx, jmp_rw, 4);
2013}
2014
4a136e0a 2015static void tcg_out_op(TCGContext *s, TCGOpcode opc,
8d8db193
RH
2016 const TCGArg args[TCG_MAX_OP_ARGS],
2017 const int const_args[TCG_MAX_OP_ARGS])
4a136e0a 2018{
f0293414
RH
2019 /* 99% of the time, we can signal the use of extension registers
2020 by looking to see if the opcode handles 64-bit data. */
2021 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
4a136e0a 2022
8d8db193
RH
2023 /* Hoist the loads of the most common arguments. */
2024 TCGArg a0 = args[0];
2025 TCGArg a1 = args[1];
2026 TCGArg a2 = args[2];
2027 int c2 = const_args[2];
2028
04ce397b
RH
2029 /* Some operands are defined with "rZ" constraint, a register or
2030 the zero register. These need not actually test args[I] == 0. */
2031#define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
2032
4a136e0a 2033 switch (opc) {
b19f0c2e
RH
2034 case INDEX_op_goto_ptr:
2035 tcg_out_insn(s, 3207, BR, a0);
2036 break;
2037
4a136e0a 2038 case INDEX_op_br:
bec16311 2039 tcg_out_goto_label(s, arg_label(a0));
4a136e0a
CF
2040 break;
2041
4a136e0a 2042 case INDEX_op_ld8u_i32:
4a136e0a 2043 case INDEX_op_ld8u_i64:
14e4c1e2 2044 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
dc73dfd4
RH
2045 break;
2046 case INDEX_op_ld8s_i32:
14e4c1e2 2047 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
dc73dfd4 2048 break;
4a136e0a 2049 case INDEX_op_ld8s_i64:
14e4c1e2 2050 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
dc73dfd4
RH
2051 break;
2052 case INDEX_op_ld16u_i32:
4a136e0a 2053 case INDEX_op_ld16u_i64:
14e4c1e2 2054 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
dc73dfd4
RH
2055 break;
2056 case INDEX_op_ld16s_i32:
14e4c1e2 2057 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
dc73dfd4 2058 break;
4a136e0a 2059 case INDEX_op_ld16s_i64:
14e4c1e2 2060 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
dc73dfd4
RH
2061 break;
2062 case INDEX_op_ld_i32:
4a136e0a 2063 case INDEX_op_ld32u_i64:
14e4c1e2 2064 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
dc73dfd4 2065 break;
4a136e0a 2066 case INDEX_op_ld32s_i64:
14e4c1e2 2067 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
e81864a1 2068 break;
dc73dfd4 2069 case INDEX_op_ld_i64:
14e4c1e2 2070 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
dc73dfd4
RH
2071 break;
2072
4a136e0a
CF
2073 case INDEX_op_st8_i32:
2074 case INDEX_op_st8_i64:
14e4c1e2 2075 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
dc73dfd4 2076 break;
4a136e0a
CF
2077 case INDEX_op_st16_i32:
2078 case INDEX_op_st16_i64:
14e4c1e2 2079 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
dc73dfd4
RH
2080 break;
2081 case INDEX_op_st_i32:
4a136e0a 2082 case INDEX_op_st32_i64:
14e4c1e2 2083 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
dc73dfd4
RH
2084 break;
2085 case INDEX_op_st_i64:
14e4c1e2 2086 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
4a136e0a
CF
2087 break;
2088
4a136e0a 2089 case INDEX_op_add_i32:
90f1cd91
RH
2090 a2 = (int32_t)a2;
2091 /* FALLTHRU */
2092 case INDEX_op_add_i64:
2093 if (c2) {
2094 tcg_out_addsubi(s, ext, a0, a1, a2);
2095 } else {
2096 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
2097 }
4a136e0a
CF
2098 break;
2099
4a136e0a 2100 case INDEX_op_sub_i32:
90f1cd91
RH
2101 a2 = (int32_t)a2;
2102 /* FALLTHRU */
2103 case INDEX_op_sub_i64:
2104 if (c2) {
2105 tcg_out_addsubi(s, ext, a0, a1, -a2);
2106 } else {
2107 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
2108 }
4a136e0a
CF
2109 break;
2110
14b155dd
RH
2111 case INDEX_op_neg_i64:
2112 case INDEX_op_neg_i32:
2113 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
2114 break;
2115
4a136e0a 2116 case INDEX_op_and_i32:
e029f293
RH
2117 a2 = (int32_t)a2;
2118 /* FALLTHRU */
2119 case INDEX_op_and_i64:
2120 if (c2) {
2121 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
2122 } else {
2123 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
2124 }
4a136e0a
CF
2125 break;
2126
14b155dd
RH
2127 case INDEX_op_andc_i32:
2128 a2 = (int32_t)a2;
2129 /* FALLTHRU */
2130 case INDEX_op_andc_i64:
2131 if (c2) {
2132 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
2133 } else {
2134 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
2135 }
2136 break;
2137
4a136e0a 2138 case INDEX_op_or_i32:
e029f293
RH
2139 a2 = (int32_t)a2;
2140 /* FALLTHRU */
2141 case INDEX_op_or_i64:
2142 if (c2) {
2143 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
2144 } else {
2145 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
2146 }
4a136e0a
CF
2147 break;
2148
14b155dd
RH
2149 case INDEX_op_orc_i32:
2150 a2 = (int32_t)a2;
2151 /* FALLTHRU */
2152 case INDEX_op_orc_i64:
2153 if (c2) {
2154 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2155 } else {
2156 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2157 }
2158 break;
2159
4a136e0a 2160 case INDEX_op_xor_i32:
e029f293
RH
2161 a2 = (int32_t)a2;
2162 /* FALLTHRU */
2163 case INDEX_op_xor_i64:
2164 if (c2) {
2165 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2166 } else {
2167 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2168 }
4a136e0a
CF
2169 break;
2170
14b155dd
RH
2171 case INDEX_op_eqv_i32:
2172 a2 = (int32_t)a2;
2173 /* FALLTHRU */
2174 case INDEX_op_eqv_i64:
2175 if (c2) {
2176 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2177 } else {
2178 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2179 }
2180 break;
2181
2182 case INDEX_op_not_i64:
2183 case INDEX_op_not_i32:
2184 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2185 break;
2186
4a136e0a 2187 case INDEX_op_mul_i64:
4a136e0a 2188 case INDEX_op_mul_i32:
8678b71c
RH
2189 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2190 break;
2191
2192 case INDEX_op_div_i64:
2193 case INDEX_op_div_i32:
2194 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2195 break;
2196 case INDEX_op_divu_i64:
2197 case INDEX_op_divu_i32:
2198 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2199 break;
2200
2201 case INDEX_op_rem_i64:
2202 case INDEX_op_rem_i32:
d67bcbdd
RH
2203 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP0, a1, a2);
2204 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1);
8678b71c
RH
2205 break;
2206 case INDEX_op_remu_i64:
2207 case INDEX_op_remu_i32:
d67bcbdd
RH
2208 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP0, a1, a2);
2209 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1);
4a136e0a
CF
2210 break;
2211
2212 case INDEX_op_shl_i64:
4a136e0a 2213 case INDEX_op_shl_i32:
df9351e3 2214 if (c2) {
8d8db193 2215 tcg_out_shl(s, ext, a0, a1, a2);
df9351e3
RH
2216 } else {
2217 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
4a136e0a
CF
2218 }
2219 break;
2220
2221 case INDEX_op_shr_i64:
4a136e0a 2222 case INDEX_op_shr_i32:
df9351e3 2223 if (c2) {
8d8db193 2224 tcg_out_shr(s, ext, a0, a1, a2);
df9351e3
RH
2225 } else {
2226 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
4a136e0a
CF
2227 }
2228 break;
2229
2230 case INDEX_op_sar_i64:
4a136e0a 2231 case INDEX_op_sar_i32:
df9351e3 2232 if (c2) {
8d8db193 2233 tcg_out_sar(s, ext, a0, a1, a2);
df9351e3
RH
2234 } else {
2235 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
4a136e0a
CF
2236 }
2237 break;
2238
2239 case INDEX_op_rotr_i64:
4a136e0a 2240 case INDEX_op_rotr_i32:
df9351e3 2241 if (c2) {
8d8db193 2242 tcg_out_rotr(s, ext, a0, a1, a2);
df9351e3
RH
2243 } else {
2244 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
4a136e0a
CF
2245 }
2246 break;
2247
2248 case INDEX_op_rotl_i64:
df9351e3
RH
2249 case INDEX_op_rotl_i32:
2250 if (c2) {
8d8db193 2251 tcg_out_rotl(s, ext, a0, a1, a2);
4a136e0a 2252 } else {
d67bcbdd
RH
2253 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP0, TCG_REG_XZR, a2);
2254 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP0);
4a136e0a
CF
2255 }
2256 break;
2257
53c76c19
RH
2258 case INDEX_op_clz_i64:
2259 case INDEX_op_clz_i32:
2260 tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2261 break;
2262 case INDEX_op_ctz_i64:
2263 case INDEX_op_ctz_i32:
2264 tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2265 break;
2266
8d8db193 2267 case INDEX_op_brcond_i32:
90f1cd91
RH
2268 a1 = (int32_t)a1;
2269 /* FALLTHRU */
2270 case INDEX_op_brcond_i64:
bec16311 2271 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
4a136e0a
CF
2272 break;
2273
4a136e0a 2274 case INDEX_op_setcond_i32:
90f1cd91
RH
2275 a2 = (int32_t)a2;
2276 /* FALLTHRU */
2277 case INDEX_op_setcond_i64:
2278 tcg_out_cmp(s, ext, a1, a2, c2);
ed7a0aa8
RH
2279 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */
2280 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2281 TCG_REG_XZR, tcg_invert_cond(args[3]));
4a136e0a
CF
2282 break;
2283
f58a7dea
RH
2284 case INDEX_op_negsetcond_i32:
2285 a2 = (int32_t)a2;
2286 /* FALLTHRU */
2287 case INDEX_op_negsetcond_i64:
2288 tcg_out_cmp(s, ext, a1, a2, c2);
2289 /* Use CSETM alias of CSINV Wd, WZR, WZR, invert(cond). */
2290 tcg_out_insn(s, 3506, CSINV, ext, a0, TCG_REG_XZR,
2291 TCG_REG_XZR, tcg_invert_cond(args[3]));
2292 break;
2293
04ce397b
RH
2294 case INDEX_op_movcond_i32:
2295 a2 = (int32_t)a2;
2296 /* FALLTHRU */
2297 case INDEX_op_movcond_i64:
2298 tcg_out_cmp(s, ext, a1, a2, c2);
2299 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2300 break;
2301
fecccfcc
RH
2302 case INDEX_op_qemu_ld_a32_i32:
2303 case INDEX_op_qemu_ld_a64_i32:
2304 case INDEX_op_qemu_ld_a32_i64:
2305 case INDEX_op_qemu_ld_a64_i64:
59227d5d 2306 tcg_out_qemu_ld(s, a0, a1, a2, ext);
4a136e0a 2307 break;
fecccfcc
RH
2308 case INDEX_op_qemu_st_a32_i32:
2309 case INDEX_op_qemu_st_a64_i32:
2310 case INDEX_op_qemu_st_a32_i64:
2311 case INDEX_op_qemu_st_a64_i64:
ff0cc85e 2312 tcg_out_qemu_st(s, REG0(0), a1, a2, ext);
4a136e0a 2313 break;
929124ec
RH
2314 case INDEX_op_qemu_ld_a32_i128:
2315 case INDEX_op_qemu_ld_a64_i128:
2316 tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], true);
2317 break;
2318 case INDEX_op_qemu_st_a32_i128:
2319 case INDEX_op_qemu_st_a64_i128:
2320 tcg_out_qemu_ldst_i128(s, REG0(0), REG0(1), a2, args[3], false);
2321 break;
4a136e0a 2322
f0293414 2323 case INDEX_op_bswap64_i64:
dfa24dfa 2324 tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
edd8824c
RH
2325 break;
2326 case INDEX_op_bswap32_i64:
8fcfc6bf
RH
2327 tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2328 if (a2 & TCG_BSWAP_OS) {
52bf3398 2329 tcg_out_ext32s(s, a0, a0);
8fcfc6bf
RH
2330 }
2331 break;
9c4a059d 2332 case INDEX_op_bswap32_i32:
dfa24dfa 2333 tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
9c4a059d
CF
2334 break;
2335 case INDEX_op_bswap16_i64:
2336 case INDEX_op_bswap16_i32:
dfa24dfa 2337 tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1);
8fcfc6bf
RH
2338 if (a2 & TCG_BSWAP_OS) {
2339 /* Output must be sign-extended. */
753e42ea 2340 tcg_out_ext16s(s, ext, a0, a0);
8fcfc6bf
RH
2341 } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2342 /* Output must be zero-extended, but input isn't. */
379afdff 2343 tcg_out_ext16u(s, a0, a0);
8fcfc6bf 2344 }
9c4a059d
CF
2345 break;
2346
b3c56df7
RH
2347 case INDEX_op_deposit_i64:
2348 case INDEX_op_deposit_i32:
2349 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2350 break;
2351
e2179f94
RH
2352 case INDEX_op_extract_i64:
2353 case INDEX_op_extract_i32:
2354 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2355 break;
2356
2357 case INDEX_op_sextract_i64:
2358 case INDEX_op_sextract_i32:
2359 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2360 break;
2361
464c2969
RH
2362 case INDEX_op_extract2_i64:
2363 case INDEX_op_extract2_i32:
1789d427 2364 tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
464c2969
RH
2365 break;
2366
c6e929e7
RH
2367 case INDEX_op_add2_i32:
2368 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2369 (int32_t)args[4], args[5], const_args[4],
2370 const_args[5], false);
2371 break;
2372 case INDEX_op_add2_i64:
2373 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2374 args[5], const_args[4], const_args[5], false);
2375 break;
2376 case INDEX_op_sub2_i32:
2377 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2378 (int32_t)args[4], args[5], const_args[4],
2379 const_args[5], true);
2380 break;
2381 case INDEX_op_sub2_i64:
2382 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2383 args[5], const_args[4], const_args[5], true);
2384 break;
2385
1fcc9ddf
RH
2386 case INDEX_op_muluh_i64:
2387 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2388 break;
2389 case INDEX_op_mulsh_i64:
2390 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2391 break;
2392
c7a59c2a
PK
2393 case INDEX_op_mb:
2394 tcg_out_mb(s, a0);
2395 break;
2396
96d0ee7f 2397 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
a51a6b6a 2398 case INDEX_op_mov_i64:
96d0ee7f 2399 case INDEX_op_call: /* Always emitted via tcg_out_call. */
b55a8d9d 2400 case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */
cf7d6b8e 2401 case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */
678155b2
RH
2402 case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */
2403 case INDEX_op_ext8s_i64:
d0e66c89
RH
2404 case INDEX_op_ext8u_i32:
2405 case INDEX_op_ext8u_i64:
753e42ea
RH
2406 case INDEX_op_ext16s_i64:
2407 case INDEX_op_ext16s_i32:
379afdff
RH
2408 case INDEX_op_ext16u_i64:
2409 case INDEX_op_ext16u_i32:
52bf3398 2410 case INDEX_op_ext32s_i64:
9ecf5f61 2411 case INDEX_op_ext32u_i64:
9c6aa274 2412 case INDEX_op_ext_i32_i64:
b9bfe000 2413 case INDEX_op_extu_i32_i64:
b8b94ac6 2414 case INDEX_op_extrl_i64_i32:
4a136e0a 2415 default:
14e4c1e2 2416 g_assert_not_reached();
4a136e0a 2417 }
04ce397b
RH
2418
2419#undef REG0
4a136e0a
CF
2420}
2421
14e4c1e2
RH
2422static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2423 unsigned vecl, unsigned vece,
5e8892db
MR
2424 const TCGArg args[TCG_MAX_OP_ARGS],
2425 const int const_args[TCG_MAX_OP_ARGS])
14e4c1e2 2426{
d81bad24 2427 static const AArch64Insn cmp_vec_insn[16] = {
14e4c1e2
RH
2428 [TCG_COND_EQ] = I3616_CMEQ,
2429 [TCG_COND_GT] = I3616_CMGT,
2430 [TCG_COND_GE] = I3616_CMGE,
2431 [TCG_COND_GTU] = I3616_CMHI,
2432 [TCG_COND_GEU] = I3616_CMHS,
2433 };
d81bad24
RH
2434 static const AArch64Insn cmp_scalar_insn[16] = {
2435 [TCG_COND_EQ] = I3611_CMEQ,
2436 [TCG_COND_GT] = I3611_CMGT,
2437 [TCG_COND_GE] = I3611_CMGE,
2438 [TCG_COND_GTU] = I3611_CMHI,
2439 [TCG_COND_GEU] = I3611_CMHS,
2440 };
2441 static const AArch64Insn cmp0_vec_insn[16] = {
14e4c1e2
RH
2442 [TCG_COND_EQ] = I3617_CMEQ0,
2443 [TCG_COND_GT] = I3617_CMGT0,
2444 [TCG_COND_GE] = I3617_CMGE0,
2445 [TCG_COND_LT] = I3617_CMLT0,
2446 [TCG_COND_LE] = I3617_CMLE0,
2447 };
d81bad24
RH
2448 static const AArch64Insn cmp0_scalar_insn[16] = {
2449 [TCG_COND_EQ] = I3612_CMEQ0,
2450 [TCG_COND_GT] = I3612_CMGT0,
2451 [TCG_COND_GE] = I3612_CMGE0,
2452 [TCG_COND_LT] = I3612_CMLT0,
2453 [TCG_COND_LE] = I3612_CMLE0,
2454 };
14e4c1e2
RH
2455
2456 TCGType type = vecl + TCG_TYPE_V64;
2457 unsigned is_q = vecl;
d81bad24 2458 bool is_scalar = !is_q && vece == MO_64;
a9e434a5 2459 TCGArg a0, a1, a2, a3;
9e27f58b 2460 int cmode, imm8;
14e4c1e2
RH
2461
2462 a0 = args[0];
2463 a1 = args[1];
2464 a2 = args[2];
2465
2466 switch (opc) {
2467 case INDEX_op_ld_vec:
2468 tcg_out_ld(s, type, a0, a1, a2);
2469 break;
2470 case INDEX_op_st_vec:
2471 tcg_out_st(s, type, a0, a1, a2);
2472 break;
37ee55a0
RH
2473 case INDEX_op_dupm_vec:
2474 tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2475 break;
14e4c1e2 2476 case INDEX_op_add_vec:
d81bad24
RH
2477 if (is_scalar) {
2478 tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2);
2479 } else {
2480 tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2481 }
14e4c1e2
RH
2482 break;
2483 case INDEX_op_sub_vec:
d81bad24
RH
2484 if (is_scalar) {
2485 tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2);
2486 } else {
2487 tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2488 }
14e4c1e2
RH
2489 break;
2490 case INDEX_op_mul_vec:
2491 tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2492 break;
2493 case INDEX_op_neg_vec:
d81bad24
RH
2494 if (is_scalar) {
2495 tcg_out_insn(s, 3612, NEG, vece, a0, a1);
2496 } else {
2497 tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2498 }
14e4c1e2 2499 break;
a456394a 2500 case INDEX_op_abs_vec:
d81bad24
RH
2501 if (is_scalar) {
2502 tcg_out_insn(s, 3612, ABS, vece, a0, a1);
2503 } else {
2504 tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2505 }
a456394a 2506 break;
14e4c1e2 2507 case INDEX_op_and_vec:
9e27f58b
RH
2508 if (const_args[2]) {
2509 is_shimm1632(~a2, &cmode, &imm8);
2510 if (a0 == a1) {
2511 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2512 return;
2513 }
2514 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2515 a2 = a0;
2516 }
14e4c1e2
RH
2517 tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2518 break;
2519 case INDEX_op_or_vec:
9e27f58b
RH
2520 if (const_args[2]) {
2521 is_shimm1632(a2, &cmode, &imm8);
2522 if (a0 == a1) {
2523 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2524 return;
2525 }
2526 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2527 a2 = a0;
2528 }
14e4c1e2
RH
2529 tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2530 break;
14e4c1e2 2531 case INDEX_op_andc_vec:
9e27f58b
RH
2532 if (const_args[2]) {
2533 is_shimm1632(a2, &cmode, &imm8);
2534 if (a0 == a1) {
2535 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2536 return;
2537 }
2538 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2539 a2 = a0;
2540 }
14e4c1e2
RH
2541 tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2542 break;
2543 case INDEX_op_orc_vec:
9e27f58b
RH
2544 if (const_args[2]) {
2545 is_shimm1632(~a2, &cmode, &imm8);
2546 if (a0 == a1) {
2547 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2548 return;
2549 }
2550 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2551 a2 = a0;
2552 }
14e4c1e2
RH
2553 tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2554 break;
9e27f58b
RH
2555 case INDEX_op_xor_vec:
2556 tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2557 break;
d32648d4 2558 case INDEX_op_ssadd_vec:
d81bad24
RH
2559 if (is_scalar) {
2560 tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2);
2561 } else {
2562 tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2563 }
d32648d4
RH
2564 break;
2565 case INDEX_op_sssub_vec:
d81bad24
RH
2566 if (is_scalar) {
2567 tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2);
2568 } else {
2569 tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2570 }
d32648d4
RH
2571 break;
2572 case INDEX_op_usadd_vec:
d81bad24
RH
2573 if (is_scalar) {
2574 tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2);
2575 } else {
2576 tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2577 }
d32648d4
RH
2578 break;
2579 case INDEX_op_ussub_vec:
d81bad24
RH
2580 if (is_scalar) {
2581 tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2);
2582 } else {
2583 tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2584 }
d32648d4 2585 break;
93f332a5
RH
2586 case INDEX_op_smax_vec:
2587 tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2588 break;
2589 case INDEX_op_smin_vec:
2590 tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2591 break;
2592 case INDEX_op_umax_vec:
2593 tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2594 break;
2595 case INDEX_op_umin_vec:
2596 tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2597 break;
14e4c1e2
RH
2598 case INDEX_op_not_vec:
2599 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2600 break;
14e4c1e2 2601 case INDEX_op_shli_vec:
d81bad24
RH
2602 if (is_scalar) {
2603 tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece));
2604 } else {
2605 tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2606 }
14e4c1e2
RH
2607 break;
2608 case INDEX_op_shri_vec:
d81bad24
RH
2609 if (is_scalar) {
2610 tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2);
2611 } else {
2612 tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2613 }
14e4c1e2
RH
2614 break;
2615 case INDEX_op_sari_vec:
d81bad24
RH
2616 if (is_scalar) {
2617 tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2);
2618 } else {
2619 tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2620 }
14e4c1e2 2621 break;
7cff8988 2622 case INDEX_op_aa64_sli_vec:
d81bad24
RH
2623 if (is_scalar) {
2624 tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece));
2625 } else {
2626 tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2627 }
7cff8988 2628 break;
79525dfd 2629 case INDEX_op_shlv_vec:
d81bad24
RH
2630 if (is_scalar) {
2631 tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2);
2632 } else {
2633 tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2634 }
79525dfd
RH
2635 break;
2636 case INDEX_op_aa64_sshl_vec:
d81bad24
RH
2637 if (is_scalar) {
2638 tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2);
2639 } else {
2640 tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2641 }
79525dfd 2642 break;
14e4c1e2
RH
2643 case INDEX_op_cmp_vec:
2644 {
2645 TCGCond cond = args[3];
2646 AArch64Insn insn;
2647
2648 if (cond == TCG_COND_NE) {
2649 if (const_args[2]) {
d81bad24
RH
2650 if (is_scalar) {
2651 tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1);
2652 } else {
2653 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2654 }
14e4c1e2 2655 } else {
d81bad24
RH
2656 if (is_scalar) {
2657 tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2);
2658 } else {
2659 tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2660 }
14e4c1e2
RH
2661 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2662 }
2663 } else {
2664 if (const_args[2]) {
d81bad24
RH
2665 if (is_scalar) {
2666 insn = cmp0_scalar_insn[cond];
2667 if (insn) {
2668 tcg_out_insn_3612(s, insn, vece, a0, a1);
2669 break;
2670 }
2671 } else {
2672 insn = cmp0_vec_insn[cond];
2673 if (insn) {
2674 tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2675 break;
2676 }
14e4c1e2 2677 }
d67bcbdd
RH
2678 tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP0, 0);
2679 a2 = TCG_VEC_TMP0;
14e4c1e2 2680 }
d81bad24
RH
2681 if (is_scalar) {
2682 insn = cmp_scalar_insn[cond];
2683 if (insn == 0) {
2684 TCGArg t;
2685 t = a1, a1 = a2, a2 = t;
2686 cond = tcg_swap_cond(cond);
2687 insn = cmp_scalar_insn[cond];
2688 tcg_debug_assert(insn != 0);
2689 }
2690 tcg_out_insn_3611(s, insn, vece, a0, a1, a2);
2691 } else {
2692 insn = cmp_vec_insn[cond];
2693 if (insn == 0) {
2694 TCGArg t;
2695 t = a1, a1 = a2, a2 = t;
2696 cond = tcg_swap_cond(cond);
2697 insn = cmp_vec_insn[cond];
2698 tcg_debug_assert(insn != 0);
2699 }
2700 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
14e4c1e2 2701 }
14e4c1e2
RH
2702 }
2703 }
2704 break;
bab1671f 2705
a9e434a5
RH
2706 case INDEX_op_bitsel_vec:
2707 a3 = args[3];
2708 if (a0 == a3) {
2709 tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2710 } else if (a0 == a2) {
2711 tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2712 } else {
2713 if (a0 != a1) {
2714 tcg_out_mov(s, type, a0, a1);
2715 }
2716 tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2717 }
2718 break;
2719
bab1671f 2720 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
bab1671f 2721 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
14e4c1e2
RH
2722 default:
2723 g_assert_not_reached();
2724 }
2725}
2726
2727int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2728{
2729 switch (opc) {
2730 case INDEX_op_add_vec:
2731 case INDEX_op_sub_vec:
14e4c1e2
RH
2732 case INDEX_op_and_vec:
2733 case INDEX_op_or_vec:
2734 case INDEX_op_xor_vec:
2735 case INDEX_op_andc_vec:
2736 case INDEX_op_orc_vec:
2737 case INDEX_op_neg_vec:
a456394a 2738 case INDEX_op_abs_vec:
14e4c1e2
RH
2739 case INDEX_op_not_vec:
2740 case INDEX_op_cmp_vec:
2741 case INDEX_op_shli_vec:
2742 case INDEX_op_shri_vec:
2743 case INDEX_op_sari_vec:
d32648d4
RH
2744 case INDEX_op_ssadd_vec:
2745 case INDEX_op_sssub_vec:
2746 case INDEX_op_usadd_vec:
2747 case INDEX_op_ussub_vec:
79525dfd 2748 case INDEX_op_shlv_vec:
a9e434a5 2749 case INDEX_op_bitsel_vec:
14e4c1e2 2750 return 1;
7cff8988 2751 case INDEX_op_rotli_vec:
79525dfd
RH
2752 case INDEX_op_shrv_vec:
2753 case INDEX_op_sarv_vec:
7cff8988
RH
2754 case INDEX_op_rotlv_vec:
2755 case INDEX_op_rotrv_vec:
79525dfd 2756 return -1;
e65a5f22 2757 case INDEX_op_mul_vec:
a7b6d286
RH
2758 case INDEX_op_smax_vec:
2759 case INDEX_op_smin_vec:
2760 case INDEX_op_umax_vec:
2761 case INDEX_op_umin_vec:
e65a5f22 2762 return vece < MO_64;
14e4c1e2
RH
2763
2764 default:
2765 return 0;
2766 }
2767}
2768
2769void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2770 TCGArg a0, ...)
2771{
79525dfd 2772 va_list va;
10061ffe 2773 TCGv_vec v0, v1, v2, t1, t2, c1;
7cff8988 2774 TCGArg a2;
79525dfd
RH
2775
2776 va_start(va, a0);
2777 v0 = temp_tcgv_vec(arg_temp(a0));
2778 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
7cff8988 2779 a2 = va_arg(va, TCGArg);
2dfa2f18 2780 va_end(va);
79525dfd
RH
2781
2782 switch (opc) {
7cff8988
RH
2783 case INDEX_op_rotli_vec:
2784 t1 = tcg_temp_new_vec(type);
2785 tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
2786 vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
2787 tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
2788 tcg_temp_free_vec(t1);
2789 break;
2790
79525dfd
RH
2791 case INDEX_op_shrv_vec:
2792 case INDEX_op_sarv_vec:
2793 /* Right shifts are negative left shifts for AArch64. */
2dfa2f18 2794 v2 = temp_tcgv_vec(arg_temp(a2));
79525dfd
RH
2795 t1 = tcg_temp_new_vec(type);
2796 tcg_gen_neg_vec(vece, t1, v2);
2797 opc = (opc == INDEX_op_shrv_vec
2798 ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2799 vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2800 tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2801 tcg_temp_free_vec(t1);
2802 break;
2803
7cff8988 2804 case INDEX_op_rotlv_vec:
2dfa2f18 2805 v2 = temp_tcgv_vec(arg_temp(a2));
7cff8988 2806 t1 = tcg_temp_new_vec(type);
10061ffe
RH
2807 c1 = tcg_constant_vec(type, vece, 8 << vece);
2808 tcg_gen_sub_vec(vece, t1, v2, c1);
7cff8988
RH
2809 /* Right shifts are negative left shifts for AArch64. */
2810 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2811 tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2812 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
2813 tcgv_vec_arg(v1), tcgv_vec_arg(v2));
2814 tcg_gen_or_vec(vece, v0, v0, t1);
2815 tcg_temp_free_vec(t1);
2816 break;
2817
2818 case INDEX_op_rotrv_vec:
2dfa2f18 2819 v2 = temp_tcgv_vec(arg_temp(a2));
7cff8988
RH
2820 t1 = tcg_temp_new_vec(type);
2821 t2 = tcg_temp_new_vec(type);
10061ffe 2822 c1 = tcg_constant_vec(type, vece, 8 << vece);
7cff8988 2823 tcg_gen_neg_vec(vece, t1, v2);
10061ffe 2824 tcg_gen_sub_vec(vece, t2, c1, v2);
7cff8988
RH
2825 /* Right shifts are negative left shifts for AArch64. */
2826 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2827 tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2828 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
2829 tcgv_vec_arg(v1), tcgv_vec_arg(t2));
2830 tcg_gen_or_vec(vece, v0, t1, t2);
2831 tcg_temp_free_vec(t1);
2832 tcg_temp_free_vec(t2);
2833 break;
2834
79525dfd
RH
2835 default:
2836 g_assert_not_reached();
2837 }
14e4c1e2
RH
2838}
2839
39e7522b
RH
2840static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
2841{
1897cc2e
RH
2842 switch (op) {
2843 case INDEX_op_goto_ptr:
39e7522b 2844 return C_O0_I1(r);
f69d277e 2845
1897cc2e
RH
2846 case INDEX_op_ld8u_i32:
2847 case INDEX_op_ld8s_i32:
2848 case INDEX_op_ld16u_i32:
2849 case INDEX_op_ld16s_i32:
2850 case INDEX_op_ld_i32:
2851 case INDEX_op_ld8u_i64:
2852 case INDEX_op_ld8s_i64:
2853 case INDEX_op_ld16u_i64:
2854 case INDEX_op_ld16s_i64:
2855 case INDEX_op_ld32u_i64:
2856 case INDEX_op_ld32s_i64:
2857 case INDEX_op_ld_i64:
2858 case INDEX_op_neg_i32:
2859 case INDEX_op_neg_i64:
2860 case INDEX_op_not_i32:
2861 case INDEX_op_not_i64:
2862 case INDEX_op_bswap16_i32:
2863 case INDEX_op_bswap32_i32:
2864 case INDEX_op_bswap16_i64:
2865 case INDEX_op_bswap32_i64:
2866 case INDEX_op_bswap64_i64:
2867 case INDEX_op_ext8s_i32:
2868 case INDEX_op_ext16s_i32:
2869 case INDEX_op_ext8u_i32:
2870 case INDEX_op_ext16u_i32:
2871 case INDEX_op_ext8s_i64:
2872 case INDEX_op_ext16s_i64:
2873 case INDEX_op_ext32s_i64:
2874 case INDEX_op_ext8u_i64:
2875 case INDEX_op_ext16u_i64:
2876 case INDEX_op_ext32u_i64:
2877 case INDEX_op_ext_i32_i64:
2878 case INDEX_op_extu_i32_i64:
2879 case INDEX_op_extract_i32:
2880 case INDEX_op_extract_i64:
2881 case INDEX_op_sextract_i32:
2882 case INDEX_op_sextract_i64:
39e7522b 2883 return C_O1_I1(r, r);
1897cc2e
RH
2884
2885 case INDEX_op_st8_i32:
2886 case INDEX_op_st16_i32:
2887 case INDEX_op_st_i32:
2888 case INDEX_op_st8_i64:
2889 case INDEX_op_st16_i64:
2890 case INDEX_op_st32_i64:
2891 case INDEX_op_st_i64:
39e7522b 2892 return C_O0_I2(rZ, r);
1897cc2e
RH
2893
2894 case INDEX_op_add_i32:
2895 case INDEX_op_add_i64:
2896 case INDEX_op_sub_i32:
2897 case INDEX_op_sub_i64:
2898 case INDEX_op_setcond_i32:
2899 case INDEX_op_setcond_i64:
f58a7dea
RH
2900 case INDEX_op_negsetcond_i32:
2901 case INDEX_op_negsetcond_i64:
39e7522b 2902 return C_O1_I2(r, r, rA);
1897cc2e
RH
2903
2904 case INDEX_op_mul_i32:
2905 case INDEX_op_mul_i64:
2906 case INDEX_op_div_i32:
2907 case INDEX_op_div_i64:
2908 case INDEX_op_divu_i32:
2909 case INDEX_op_divu_i64:
2910 case INDEX_op_rem_i32:
2911 case INDEX_op_rem_i64:
2912 case INDEX_op_remu_i32:
2913 case INDEX_op_remu_i64:
2914 case INDEX_op_muluh_i64:
2915 case INDEX_op_mulsh_i64:
39e7522b 2916 return C_O1_I2(r, r, r);
1897cc2e
RH
2917
2918 case INDEX_op_and_i32:
2919 case INDEX_op_and_i64:
2920 case INDEX_op_or_i32:
2921 case INDEX_op_or_i64:
2922 case INDEX_op_xor_i32:
2923 case INDEX_op_xor_i64:
2924 case INDEX_op_andc_i32:
2925 case INDEX_op_andc_i64:
2926 case INDEX_op_orc_i32:
2927 case INDEX_op_orc_i64:
2928 case INDEX_op_eqv_i32:
2929 case INDEX_op_eqv_i64:
39e7522b 2930 return C_O1_I2(r, r, rL);
1897cc2e
RH
2931
2932 case INDEX_op_shl_i32:
2933 case INDEX_op_shr_i32:
2934 case INDEX_op_sar_i32:
2935 case INDEX_op_rotl_i32:
2936 case INDEX_op_rotr_i32:
2937 case INDEX_op_shl_i64:
2938 case INDEX_op_shr_i64:
2939 case INDEX_op_sar_i64:
2940 case INDEX_op_rotl_i64:
2941 case INDEX_op_rotr_i64:
39e7522b 2942 return C_O1_I2(r, r, ri);
1897cc2e
RH
2943
2944 case INDEX_op_clz_i32:
2945 case INDEX_op_ctz_i32:
2946 case INDEX_op_clz_i64:
2947 case INDEX_op_ctz_i64:
39e7522b 2948 return C_O1_I2(r, r, rAL);
1897cc2e
RH
2949
2950 case INDEX_op_brcond_i32:
2951 case INDEX_op_brcond_i64:
39e7522b 2952 return C_O0_I2(r, rA);
1897cc2e
RH
2953
2954 case INDEX_op_movcond_i32:
2955 case INDEX_op_movcond_i64:
39e7522b 2956 return C_O1_I4(r, r, rA, rZ, rZ);
1897cc2e 2957
fecccfcc
RH
2958 case INDEX_op_qemu_ld_a32_i32:
2959 case INDEX_op_qemu_ld_a64_i32:
2960 case INDEX_op_qemu_ld_a32_i64:
2961 case INDEX_op_qemu_ld_a64_i64:
285a691f 2962 return C_O1_I1(r, r);
929124ec
RH
2963 case INDEX_op_qemu_ld_a32_i128:
2964 case INDEX_op_qemu_ld_a64_i128:
2965 return C_O2_I1(r, r, r);
fecccfcc
RH
2966 case INDEX_op_qemu_st_a32_i32:
2967 case INDEX_op_qemu_st_a64_i32:
2968 case INDEX_op_qemu_st_a32_i64:
2969 case INDEX_op_qemu_st_a64_i64:
285a691f 2970 return C_O0_I2(rZ, r);
929124ec
RH
2971 case INDEX_op_qemu_st_a32_i128:
2972 case INDEX_op_qemu_st_a64_i128:
2973 return C_O0_I3(rZ, rZ, r);
1897cc2e
RH
2974
2975 case INDEX_op_deposit_i32:
2976 case INDEX_op_deposit_i64:
39e7522b 2977 return C_O1_I2(r, 0, rZ);
1897cc2e 2978
464c2969
RH
2979 case INDEX_op_extract2_i32:
2980 case INDEX_op_extract2_i64:
39e7522b 2981 return C_O1_I2(r, rZ, rZ);
464c2969 2982
1897cc2e
RH
2983 case INDEX_op_add2_i32:
2984 case INDEX_op_add2_i64:
2985 case INDEX_op_sub2_i32:
2986 case INDEX_op_sub2_i64:
39e7522b 2987 return C_O2_I4(r, r, rZ, rZ, rA, rMZ);
1897cc2e 2988
14e4c1e2
RH
2989 case INDEX_op_add_vec:
2990 case INDEX_op_sub_vec:
2991 case INDEX_op_mul_vec:
14e4c1e2 2992 case INDEX_op_xor_vec:
d32648d4
RH
2993 case INDEX_op_ssadd_vec:
2994 case INDEX_op_sssub_vec:
2995 case INDEX_op_usadd_vec:
2996 case INDEX_op_ussub_vec:
93f332a5
RH
2997 case INDEX_op_smax_vec:
2998 case INDEX_op_smin_vec:
2999 case INDEX_op_umax_vec:
3000 case INDEX_op_umin_vec:
79525dfd
RH
3001 case INDEX_op_shlv_vec:
3002 case INDEX_op_shrv_vec:
3003 case INDEX_op_sarv_vec:
3004 case INDEX_op_aa64_sshl_vec:
39e7522b 3005 return C_O1_I2(w, w, w);
14e4c1e2
RH
3006 case INDEX_op_not_vec:
3007 case INDEX_op_neg_vec:
a456394a 3008 case INDEX_op_abs_vec:
14e4c1e2
RH
3009 case INDEX_op_shli_vec:
3010 case INDEX_op_shri_vec:
3011 case INDEX_op_sari_vec:
39e7522b 3012 return C_O1_I1(w, w);
14e4c1e2 3013 case INDEX_op_ld_vec:
37ee55a0 3014 case INDEX_op_dupm_vec:
39e7522b
RH
3015 return C_O1_I1(w, r);
3016 case INDEX_op_st_vec:
3017 return C_O0_I2(w, r);
14e4c1e2 3018 case INDEX_op_dup_vec:
39e7522b 3019 return C_O1_I1(w, wr);
9e27f58b
RH
3020 case INDEX_op_or_vec:
3021 case INDEX_op_andc_vec:
39e7522b 3022 return C_O1_I2(w, w, wO);
9e27f58b
RH
3023 case INDEX_op_and_vec:
3024 case INDEX_op_orc_vec:
39e7522b 3025 return C_O1_I2(w, w, wN);
14e4c1e2 3026 case INDEX_op_cmp_vec:
39e7522b 3027 return C_O1_I2(w, w, wZ);
a9e434a5 3028 case INDEX_op_bitsel_vec:
39e7522b 3029 return C_O1_I3(w, w, w, w);
7cff8988 3030 case INDEX_op_aa64_sli_vec:
39e7522b 3031 return C_O1_I2(w, 0, w);
14e4c1e2 3032
1897cc2e 3033 default:
39e7522b 3034 g_assert_not_reached();
f69d277e 3035 }
f69d277e
RH
3036}
3037
4a136e0a
CF
3038static void tcg_target_init(TCGContext *s)
3039{
f46934df
RH
3040 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
3041 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
14e4c1e2
RH
3042 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
3043 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
f46934df 3044
14e4c1e2 3045 tcg_target_call_clobber_regs = -1ull;
f46934df
RH
3046 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
3047 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
3048 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
3049 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
3050 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
3051 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
3052 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
3053 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
3054 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
3055 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
3056 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
14e4c1e2
RH
3057 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
3058 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
3059 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
3060 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
3061 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
3062 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
3063 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
3064 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
4a136e0a 3065
ccb1bb66 3066 s->reserved_regs = 0;
4a136e0a
CF
3067 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
3068 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
4a136e0a 3069 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
d67bcbdd 3070 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
da4d0d95
RH
3071 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
3072 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
d67bcbdd 3073 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0);
4a136e0a
CF
3074}
3075
38d195aa
RH
3076/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
3077#define PUSH_SIZE ((30 - 19 + 1) * 8)
3078
3079#define FRAME_SIZE \
3080 ((PUSH_SIZE \
3081 + TCG_STATIC_CALL_ARGS_SIZE \
3082 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
3083 + TCG_TARGET_STACK_ALIGN - 1) \
3084 & ~(TCG_TARGET_STACK_ALIGN - 1))
3085
3086/* We're expecting a 2 byte uleb128 encoded value. */
3087QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3088
3089/* We're expecting to use a single ADDI insn. */
3090QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
3091
4a136e0a
CF
3092static void tcg_target_qemu_prologue(TCGContext *s)
3093{
4a136e0a
CF
3094 TCGReg r;
3095
5826a0db
RH
3096 tcg_out_bti(s, BTI_C);
3097
95f72aa9
RH
3098 /* Push (FP, LR) and allocate space for all saved registers. */
3099 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
38d195aa 3100 TCG_REG_SP, -PUSH_SIZE, 1, 1);
4a136e0a 3101
d82b78e4 3102 /* Set up frame pointer for canonical unwinding. */
929f8b55 3103 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
4a136e0a 3104
d82b78e4 3105 /* Store callee-preserved regs x19..x28. */
4a136e0a 3106 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
95f72aa9
RH
3107 int ofs = (r - TCG_REG_X19 + 2) * 8;
3108 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
4a136e0a
CF
3109 }
3110
096c46c0
RH
3111 /* Make stack space for TCG locals. */
3112 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
38d195aa 3113 FRAME_SIZE - PUSH_SIZE);
096c46c0 3114
95f72aa9 3115 /* Inform TCG about how to find TCG locals with register, offset, size. */
4a136e0a
CF
3116 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
3117 CPU_TEMP_BUF_NLONGS * sizeof(long));
3118
e2b7a40d
RH
3119 if (!tcg_use_softmmu) {
3120 /*
3121 * Note that XZR cannot be encoded in the address base register slot,
3122 * as that actually encodes SP. Depending on the guest, we may need
3123 * to zero-extend the guest address via the address index register slot,
3124 * therefore we need to load even a zero guest base into a register.
3125 */
3126 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
3127 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
3128 }
6a91c7c9 3129
4a136e0a 3130 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
81d8a5ee 3131 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
4a136e0a 3132
b19f0c2e
RH
3133 /*
3134 * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
3135 * and fall through to the rest of the epilogue.
3136 */
c8bc1168 3137 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
5826a0db 3138 tcg_out_bti(s, BTI_J);
b19f0c2e
RH
3139 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
3140
3141 /* TB epilogue */
ffba3eb3 3142 tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
5826a0db 3143 tcg_out_bti(s, BTI_J);
4a136e0a 3144
096c46c0
RH
3145 /* Remove TCG locals stack space. */
3146 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
38d195aa 3147 FRAME_SIZE - PUSH_SIZE);
4a136e0a 3148
95f72aa9 3149 /* Restore registers x19..x28. */
4a136e0a 3150 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
95f72aa9
RH
3151 int ofs = (r - TCG_REG_X19 + 2) * 8;
3152 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
4a136e0a
CF
3153 }
3154
95f72aa9
RH
3155 /* Pop (FP, LR), restore SP to previous frame. */
3156 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
38d195aa 3157 TCG_REG_SP, PUSH_SIZE, 0, 1);
81d8a5ee 3158 tcg_out_insn(s, 3207, RET, TCG_REG_LR);
4a136e0a 3159}
38d195aa 3160
9358fbbf
RH
3161static void tcg_out_tb_start(TCGContext *s)
3162{
5826a0db 3163 tcg_out_bti(s, BTI_J);
9358fbbf
RH
3164}
3165
55129955
RH
3166static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3167{
3168 int i;
3169 for (i = 0; i < count; ++i) {
3170 p[i] = NOP;
3171 }
3172}
3173
38d195aa 3174typedef struct {
3d9bddb3 3175 DebugFrameHeader h;
38d195aa
RH
3176 uint8_t fde_def_cfa[4];
3177 uint8_t fde_reg_ofs[24];
3178} DebugFrame;
3179
3180#define ELF_HOST_MACHINE EM_AARCH64
3181
3d9bddb3
RH
3182static const DebugFrame debug_frame = {
3183 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3184 .h.cie.id = -1,
3185 .h.cie.version = 1,
3186 .h.cie.code_align = 1,
3187 .h.cie.data_align = 0x78, /* sleb128 -8 */
3188 .h.cie.return_column = TCG_REG_LR,
38d195aa
RH
3189
3190 /* Total FDE size does not include the "len" member. */
3d9bddb3 3191 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
38d195aa
RH
3192
3193 .fde_def_cfa = {
3194 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */
3195 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
3196 (FRAME_SIZE >> 7)
3197 },
3198 .fde_reg_ofs = {
3199 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */
3200 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */
3201 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */
3202 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */
3203 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */
3204 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */
3205 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */
3206 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */
3207 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */
3208 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */
3209 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */
3210 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */
3211 }
3212};
3213
755bf9e5 3214void tcg_register_jit(const void *buf, size_t buf_size)
38d195aa 3215{
38d195aa
RH
3216 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3217}