]> git.proxmox.com Git - mirror_qemu.git/blame - tcg/aarch64/tcg-target.c.inc
tcg/riscv: Support softmmu unaligned accesses
[mirror_qemu.git] / tcg / aarch64 / tcg-target.c.inc
CommitLineData
4a136e0a
CF
1/*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
f85ab3d2 13#include "../tcg-ldst.c.inc"
139c1837 14#include "../tcg-pool.c.inc"
4a136e0a 15#include "qemu/bitops.h"
b7649419
RH
16#ifdef __linux__
17#include <asm/hwcap.h>
18#endif
1ce12a8c
RH
19#ifdef CONFIG_DARWIN
20#include <sys/sysctl.h>
21#endif
4a136e0a 22
7763ffa0
RH
23/* We're going to re-use TCGType in setting of the SF bit, which controls
24 the size of the operation performed. If we know the values match, it
25 makes things much cleaner. */
26QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
27
8d8fdbae 28#ifdef CONFIG_DEBUG_TCG
4a136e0a 29static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
14e4c1e2
RH
30 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
31 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
32 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
33 "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
34
35 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
36 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
37 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
38 "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
4a136e0a 39};
8d8fdbae 40#endif /* CONFIG_DEBUG_TCG */
4a136e0a
CF
41
42static const int tcg_target_reg_alloc_order[] = {
43 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
44 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
b76f21a7 45 TCG_REG_X28, /* we will reserve this for guest_base if configured */
4a136e0a 46
d82b78e4
RH
47 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
48 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
4a136e0a
CF
49 TCG_REG_X16, TCG_REG_X17,
50
4a136e0a
CF
51 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
52 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
53
d82b78e4
RH
54 /* X18 reserved by system */
55 /* X19 reserved for AREG0 */
56 /* X29 reserved as fp */
57 /* X30 reserved as temporary */
14e4c1e2
RH
58
59 TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
60 TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
61 /* V8 - V15 are call-saved, and skipped. */
62 TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
63 TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
64 TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
65 TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
4a136e0a
CF
66};
67
68static const int tcg_target_call_iarg_regs[8] = {
69 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
70 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
71};
5e3d0c19
RH
72
73static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
74{
75 tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
76 tcg_debug_assert(slot >= 0 && slot <= 1);
77 return TCG_REG_X0 + slot;
78}
4a136e0a 79
b7649419
RH
80bool have_lse;
81bool have_lse2;
82
d82b78e4 83#define TCG_REG_TMP TCG_REG_X30
14e4c1e2 84#define TCG_VEC_TMP TCG_REG_V31
4a136e0a 85
6a91c7c9 86#ifndef CONFIG_SOFTMMU
352bcb0a
RH
87/* Note that XZR cannot be encoded in the address base register slot,
88 as that actaully encodes SP. So if we need to zero-extend the guest
89 address, via the address index register slot, we need to load even
90 a zero guest base into a register. */
91#define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32)
4cbea598 92#define TCG_REG_GUEST_BASE TCG_REG_X28
6a91c7c9
JK
93#endif
94
ffba3eb3 95static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
4a136e0a 96{
ffba3eb3
RH
97 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
98 ptrdiff_t offset = target - src_rx;
99
214bfe83
RH
100 if (offset == sextract64(offset, 0, 26)) {
101 /* read instruction, mask away previous PC_REL26 parameter contents,
102 set the proper offset, then write back the instruction. */
ffba3eb3 103 *src_rw = deposit32(*src_rw, 0, 26, offset);
214bfe83
RH
104 return true;
105 }
106 return false;
4a136e0a
CF
107}
108
ffba3eb3 109static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
4a136e0a 110{
ffba3eb3
RH
111 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
112 ptrdiff_t offset = target - src_rx;
113
214bfe83 114 if (offset == sextract64(offset, 0, 19)) {
ffba3eb3 115 *src_rw = deposit32(*src_rw, 5, 19, offset);
214bfe83
RH
116 return true;
117 }
118 return false;
4a136e0a
CF
119}
120
ffba3eb3
RH
121static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
122 intptr_t value, intptr_t addend)
4a136e0a 123{
eabb7b91 124 tcg_debug_assert(addend == 0);
4a136e0a
CF
125 switch (type) {
126 case R_AARCH64_JUMP26:
127 case R_AARCH64_CALL26:
ffba3eb3 128 return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
4a136e0a 129 case R_AARCH64_CONDBR19:
ffba3eb3 130 return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
4a136e0a 131 default:
214bfe83 132 g_assert_not_reached();
4a136e0a
CF
133 }
134}
135
170bf931
RH
136#define TCG_CT_CONST_AIMM 0x100
137#define TCG_CT_CONST_LIMM 0x200
138#define TCG_CT_CONST_ZERO 0x400
139#define TCG_CT_CONST_MONE 0x800
9e27f58b
RH
140#define TCG_CT_CONST_ORRI 0x1000
141#define TCG_CT_CONST_ANDI 0x2000
90f1cd91 142
abc730e1
RH
143#define ALL_GENERAL_REGS 0xffffffffu
144#define ALL_VECTOR_REGS 0xffffffff00000000ull
145
4a136e0a 146#ifdef CONFIG_SOFTMMU
abc730e1
RH
147#define ALL_QLDST_REGS \
148 (ALL_GENERAL_REGS & ~((1 << TCG_REG_X0) | (1 << TCG_REG_X1) | \
149 (1 << TCG_REG_X2) | (1 << TCG_REG_X3)))
150#else
151#define ALL_QLDST_REGS ALL_GENERAL_REGS
4a136e0a 152#endif
4a136e0a 153
14e4c1e2 154/* Match a constant valid for addition (12-bit, optionally shifted). */
90f1cd91
RH
155static inline bool is_aimm(uint64_t val)
156{
157 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
158}
159
14e4c1e2 160/* Match a constant valid for logical operations. */
e029f293
RH
161static inline bool is_limm(uint64_t val)
162{
163 /* Taking a simplified view of the logical immediates for now, ignoring
164 the replication that can happen across the field. Match bit patterns
165 of the forms
166 0....01....1
167 0..01..10..0
168 and their inverses. */
169
170 /* Make things easier below, by testing the form with msb clear. */
171 if ((int64_t)val < 0) {
172 val = ~val;
173 }
174 if (val == 0) {
175 return false;
176 }
177 val += val & -val;
178 return (val & (val - 1)) == 0;
179}
180
984fdcee
RH
181/* Return true if v16 is a valid 16-bit shifted immediate. */
182static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
14e4c1e2 183{
984fdcee
RH
184 if (v16 == (v16 & 0xff)) {
185 *cmode = 0x8;
186 *imm8 = v16 & 0xff;
187 return true;
188 } else if (v16 == (v16 & 0xff00)) {
189 *cmode = 0xa;
190 *imm8 = v16 >> 8;
191 return true;
192 }
193 return false;
194}
14e4c1e2 195
984fdcee
RH
196/* Return true if v32 is a valid 32-bit shifted immediate. */
197static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
198{
199 if (v32 == (v32 & 0xff)) {
200 *cmode = 0x0;
201 *imm8 = v32 & 0xff;
202 return true;
203 } else if (v32 == (v32 & 0xff00)) {
204 *cmode = 0x2;
205 *imm8 = (v32 >> 8) & 0xff;
206 return true;
207 } else if (v32 == (v32 & 0xff0000)) {
208 *cmode = 0x4;
209 *imm8 = (v32 >> 16) & 0xff;
210 return true;
211 } else if (v32 == (v32 & 0xff000000)) {
212 *cmode = 0x6;
213 *imm8 = v32 >> 24;
14e4c1e2
RH
214 return true;
215 }
984fdcee
RH
216 return false;
217}
14e4c1e2 218
984fdcee
RH
219/* Return true if v32 is a valid 32-bit shifting ones immediate. */
220static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
221{
222 if ((v32 & 0xffff00ff) == 0xff) {
223 *cmode = 0xc;
224 *imm8 = (v32 >> 8) & 0xff;
225 return true;
226 } else if ((v32 & 0xff00ffff) == 0xffff) {
227 *cmode = 0xd;
228 *imm8 = (v32 >> 16) & 0xff;
229 return true;
14e4c1e2 230 }
984fdcee
RH
231 return false;
232}
14e4c1e2 233
984fdcee
RH
234/* Return true if v32 is a valid float32 immediate. */
235static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
236{
237 if (extract32(v32, 0, 19) == 0
238 && (extract32(v32, 25, 6) == 0x20
239 || extract32(v32, 25, 6) == 0x1f)) {
240 *cmode = 0xf;
241 *imm8 = (extract32(v32, 31, 1) << 7)
242 | (extract32(v32, 25, 1) << 6)
243 | extract32(v32, 19, 6);
244 return true;
14e4c1e2 245 }
984fdcee
RH
246 return false;
247}
248
249/* Return true if v64 is a valid float64 immediate. */
250static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
251{
14e4c1e2
RH
252 if (extract64(v64, 0, 48) == 0
253 && (extract64(v64, 54, 9) == 0x100
254 || extract64(v64, 54, 9) == 0x0ff)) {
255 *cmode = 0xf;
14e4c1e2
RH
256 *imm8 = (extract64(v64, 63, 1) << 7)
257 | (extract64(v64, 54, 1) << 6)
258 | extract64(v64, 48, 6);
259 return true;
260 }
14e4c1e2
RH
261 return false;
262}
263
02f3a5b4
RH
264/*
265 * Return non-zero if v32 can be formed by MOVI+ORR.
266 * Place the parameters for MOVI in (cmode, imm8).
267 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
268 */
269static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
270{
271 int i;
272
273 for (i = 6; i > 0; i -= 2) {
274 /* Mask out one byte we can add with ORR. */
275 uint32_t tmp = v32 & ~(0xffu << (i * 4));
276 if (is_shimm32(tmp, cmode, imm8) ||
277 is_soimm32(tmp, cmode, imm8)) {
278 break;
279 }
280 }
281 return i;
282}
283
9e27f58b
RH
284/* Return true if V is a valid 16-bit or 32-bit shifted immediate. */
285static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
286{
287 if (v32 == deposit32(v32, 16, 16, v32)) {
288 return is_shimm16(v32, cmode, imm8);
289 } else {
290 return is_shimm32(v32, cmode, imm8);
291 }
292}
293
a4fbbd77 294static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
4a136e0a 295{
4a136e0a
CF
296 if (ct & TCG_CT_CONST) {
297 return 1;
298 }
170bf931 299 if (type == TCG_TYPE_I32) {
90f1cd91
RH
300 val = (int32_t)val;
301 }
302 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
303 return 1;
304 }
e029f293
RH
305 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
306 return 1;
307 }
04ce397b
RH
308 if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
309 return 1;
310 }
c6e929e7
RH
311 if ((ct & TCG_CT_CONST_MONE) && val == -1) {
312 return 1;
313 }
4a136e0a 314
9e27f58b
RH
315 switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
316 case 0:
317 break;
318 case TCG_CT_CONST_ANDI:
319 val = ~val;
320 /* fallthru */
321 case TCG_CT_CONST_ORRI:
322 if (val == deposit64(val, 32, 32, val)) {
323 int cmode, imm8;
324 return is_shimm1632(val, &cmode, &imm8);
325 }
326 break;
327 default:
328 /* Both bits should not be set for the same insn. */
329 g_assert_not_reached();
330 }
331
4a136e0a
CF
332 return 0;
333}
334
335enum aarch64_cond_code {
336 COND_EQ = 0x0,
337 COND_NE = 0x1,
338 COND_CS = 0x2, /* Unsigned greater or equal */
339 COND_HS = COND_CS, /* ALIAS greater or equal */
340 COND_CC = 0x3, /* Unsigned less than */
341 COND_LO = COND_CC, /* ALIAS Lower */
342 COND_MI = 0x4, /* Negative */
343 COND_PL = 0x5, /* Zero or greater */
344 COND_VS = 0x6, /* Overflow */
345 COND_VC = 0x7, /* No overflow */
346 COND_HI = 0x8, /* Unsigned greater than */
347 COND_LS = 0x9, /* Unsigned less or equal */
348 COND_GE = 0xa,
349 COND_LT = 0xb,
350 COND_GT = 0xc,
351 COND_LE = 0xd,
352 COND_AL = 0xe,
353 COND_NV = 0xf, /* behaves like COND_AL here */
354};
355
356static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
357 [TCG_COND_EQ] = COND_EQ,
358 [TCG_COND_NE] = COND_NE,
359 [TCG_COND_LT] = COND_LT,
360 [TCG_COND_GE] = COND_GE,
361 [TCG_COND_LE] = COND_LE,
362 [TCG_COND_GT] = COND_GT,
363 /* unsigned */
364 [TCG_COND_LTU] = COND_LO,
365 [TCG_COND_GTU] = COND_HI,
366 [TCG_COND_GEU] = COND_HS,
367 [TCG_COND_LEU] = COND_LS,
368};
369
3d4299f4
RH
370typedef enum {
371 LDST_ST = 0, /* store */
372 LDST_LD = 1, /* load */
373 LDST_LD_S_X = 2, /* load and sign-extend into Xt */
374 LDST_LD_S_W = 3, /* load and sign-extend into Wt */
375} AArch64LdstType;
4a136e0a 376
50573c66
RH
377/* We encode the format of the insn into the beginning of the name, so that
378 we can have the preprocessor help "typecheck" the insn vs the output
379 function. Arm didn't provide us with nice names for the formats, so we
380 use the section number of the architecture reference manual in which the
381 instruction group is described. */
382typedef enum {
3d9e69a2
RH
383 /* Compare and branch (immediate). */
384 I3201_CBZ = 0x34000000,
385 I3201_CBNZ = 0x35000000,
386
81d8a5ee
RH
387 /* Conditional branch (immediate). */
388 I3202_B_C = 0x54000000,
389
390 /* Unconditional branch (immediate). */
391 I3206_B = 0x14000000,
392 I3206_BL = 0x94000000,
393
394 /* Unconditional branch (register). */
395 I3207_BR = 0xd61f0000,
396 I3207_BLR = 0xd63f0000,
397 I3207_RET = 0xd65f0000,
398
f23e5e15
RH
399 /* AdvSIMD load/store single structure. */
400 I3303_LD1R = 0x0d40c000,
401
2acee8b2
PK
402 /* Load literal for loading the address at pc-relative offset */
403 I3305_LDR = 0x58000000,
14e4c1e2
RH
404 I3305_LDR_v64 = 0x5c000000,
405 I3305_LDR_v128 = 0x9c000000,
406
3d4299f4
RH
407 /* Load/store register. Described here as 3.3.12, but the helper
408 that emits them can transform to 3.3.10 or 3.3.13. */
409 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
410 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
411 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
412 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
413
414 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
415 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
416 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
417 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
418
419 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
420 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
421
422 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
423 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
424 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
425
14e4c1e2
RH
426 I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
427 I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
428
429 I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
430 I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
431
432 I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30,
433 I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30,
434
6c0f0c0f 435 I3312_TO_I3310 = 0x00200800,
3d4299f4
RH
436 I3312_TO_I3313 = 0x01000000,
437
95f72aa9
RH
438 /* Load/store register pair instructions. */
439 I3314_LDP = 0x28400000,
440 I3314_STP = 0x28000000,
441
096c46c0
RH
442 /* Add/subtract immediate instructions. */
443 I3401_ADDI = 0x11000000,
444 I3401_ADDSI = 0x31000000,
445 I3401_SUBI = 0x51000000,
446 I3401_SUBSI = 0x71000000,
447
b3c56df7
RH
448 /* Bitfield instructions. */
449 I3402_BFM = 0x33000000,
450 I3402_SBFM = 0x13000000,
451 I3402_UBFM = 0x53000000,
452
453 /* Extract instruction. */
454 I3403_EXTR = 0x13800000,
455
e029f293
RH
456 /* Logical immediate instructions. */
457 I3404_ANDI = 0x12000000,
458 I3404_ORRI = 0x32000000,
459 I3404_EORI = 0x52000000,
f85ab3d2 460 I3404_ANDSI = 0x72000000,
e029f293 461
582ab779
RH
462 /* Move wide immediate instructions. */
463 I3405_MOVN = 0x12800000,
464 I3405_MOVZ = 0x52800000,
465 I3405_MOVK = 0x72800000,
466
c6e310d9
RH
467 /* PC relative addressing instructions. */
468 I3406_ADR = 0x10000000,
469 I3406_ADRP = 0x90000000,
470
50573c66
RH
471 /* Add/subtract shifted register instructions (without a shift). */
472 I3502_ADD = 0x0b000000,
473 I3502_ADDS = 0x2b000000,
474 I3502_SUB = 0x4b000000,
475 I3502_SUBS = 0x6b000000,
476
477 /* Add/subtract shifted register instructions (with a shift). */
478 I3502S_ADD_LSL = I3502_ADD,
479
c6e929e7
RH
480 /* Add/subtract with carry instructions. */
481 I3503_ADC = 0x1a000000,
482 I3503_SBC = 0x5a000000,
483
04ce397b
RH
484 /* Conditional select instructions. */
485 I3506_CSEL = 0x1a800000,
486 I3506_CSINC = 0x1a800400,
53c76c19
RH
487 I3506_CSINV = 0x5a800000,
488 I3506_CSNEG = 0x5a800400,
04ce397b 489
edd8824c 490 /* Data-processing (1 source) instructions. */
53c76c19
RH
491 I3507_CLZ = 0x5ac01000,
492 I3507_RBIT = 0x5ac00000,
dfa24dfa 493 I3507_REV = 0x5ac00000, /* + size << 10 */
edd8824c 494
df9351e3
RH
495 /* Data-processing (2 source) instructions. */
496 I3508_LSLV = 0x1ac02000,
497 I3508_LSRV = 0x1ac02400,
498 I3508_ASRV = 0x1ac02800,
499 I3508_RORV = 0x1ac02c00,
1fcc9ddf
RH
500 I3508_SMULH = 0x9b407c00,
501 I3508_UMULH = 0x9bc07c00,
8678b71c
RH
502 I3508_UDIV = 0x1ac00800,
503 I3508_SDIV = 0x1ac00c00,
504
505 /* Data-processing (3 source) instructions. */
506 I3509_MADD = 0x1b000000,
507 I3509_MSUB = 0x1b008000,
df9351e3 508
50573c66
RH
509 /* Logical shifted register instructions (without a shift). */
510 I3510_AND = 0x0a000000,
14b155dd 511 I3510_BIC = 0x0a200000,
50573c66 512 I3510_ORR = 0x2a000000,
14b155dd 513 I3510_ORN = 0x2a200000,
50573c66 514 I3510_EOR = 0x4a000000,
14b155dd 515 I3510_EON = 0x4a200000,
50573c66 516 I3510_ANDS = 0x6a000000,
c7a59c2a 517
f7bcd966
RH
518 /* Logical shifted register instructions (with a shift). */
519 I3502S_AND_LSR = I3510_AND | (1 << 22),
520
14e4c1e2
RH
521 /* AdvSIMD copy */
522 I3605_DUP = 0x0e000400,
523 I3605_INS = 0x4e001c00,
524 I3605_UMOV = 0x0e003c00,
525
526 /* AdvSIMD modified immediate */
527 I3606_MOVI = 0x0f000400,
7e308e00 528 I3606_MVNI = 0x2f000400,
02f3a5b4
RH
529 I3606_BIC = 0x2f001400,
530 I3606_ORR = 0x0f001400,
14e4c1e2 531
d81bad24
RH
532 /* AdvSIMD scalar shift by immediate */
533 I3609_SSHR = 0x5f000400,
534 I3609_SSRA = 0x5f001400,
535 I3609_SHL = 0x5f005400,
536 I3609_USHR = 0x7f000400,
537 I3609_USRA = 0x7f001400,
538 I3609_SLI = 0x7f005400,
539
540 /* AdvSIMD scalar three same */
541 I3611_SQADD = 0x5e200c00,
542 I3611_SQSUB = 0x5e202c00,
543 I3611_CMGT = 0x5e203400,
544 I3611_CMGE = 0x5e203c00,
545 I3611_SSHL = 0x5e204400,
546 I3611_ADD = 0x5e208400,
547 I3611_CMTST = 0x5e208c00,
548 I3611_UQADD = 0x7e200c00,
549 I3611_UQSUB = 0x7e202c00,
550 I3611_CMHI = 0x7e203400,
551 I3611_CMHS = 0x7e203c00,
552 I3611_USHL = 0x7e204400,
553 I3611_SUB = 0x7e208400,
554 I3611_CMEQ = 0x7e208c00,
555
556 /* AdvSIMD scalar two-reg misc */
557 I3612_CMGT0 = 0x5e208800,
558 I3612_CMEQ0 = 0x5e209800,
559 I3612_CMLT0 = 0x5e20a800,
560 I3612_ABS = 0x5e20b800,
561 I3612_CMGE0 = 0x7e208800,
562 I3612_CMLE0 = 0x7e209800,
563 I3612_NEG = 0x7e20b800,
564
14e4c1e2
RH
565 /* AdvSIMD shift by immediate */
566 I3614_SSHR = 0x0f000400,
567 I3614_SSRA = 0x0f001400,
568 I3614_SHL = 0x0f005400,
7cff8988 569 I3614_SLI = 0x2f005400,
14e4c1e2
RH
570 I3614_USHR = 0x2f000400,
571 I3614_USRA = 0x2f001400,
572
573 /* AdvSIMD three same. */
574 I3616_ADD = 0x0e208400,
575 I3616_AND = 0x0e201c00,
576 I3616_BIC = 0x0e601c00,
a9e434a5
RH
577 I3616_BIF = 0x2ee01c00,
578 I3616_BIT = 0x2ea01c00,
579 I3616_BSL = 0x2e601c00,
14e4c1e2
RH
580 I3616_EOR = 0x2e201c00,
581 I3616_MUL = 0x0e209c00,
582 I3616_ORR = 0x0ea01c00,
583 I3616_ORN = 0x0ee01c00,
584 I3616_SUB = 0x2e208400,
585 I3616_CMGT = 0x0e203400,
586 I3616_CMGE = 0x0e203c00,
587 I3616_CMTST = 0x0e208c00,
588 I3616_CMHI = 0x2e203400,
589 I3616_CMHS = 0x2e203c00,
590 I3616_CMEQ = 0x2e208c00,
93f332a5
RH
591 I3616_SMAX = 0x0e206400,
592 I3616_SMIN = 0x0e206c00,
79525dfd 593 I3616_SSHL = 0x0e204400,
d32648d4
RH
594 I3616_SQADD = 0x0e200c00,
595 I3616_SQSUB = 0x0e202c00,
93f332a5
RH
596 I3616_UMAX = 0x2e206400,
597 I3616_UMIN = 0x2e206c00,
d32648d4
RH
598 I3616_UQADD = 0x2e200c00,
599 I3616_UQSUB = 0x2e202c00,
79525dfd 600 I3616_USHL = 0x2e204400,
14e4c1e2
RH
601
602 /* AdvSIMD two-reg misc. */
603 I3617_CMGT0 = 0x0e208800,
604 I3617_CMEQ0 = 0x0e209800,
605 I3617_CMLT0 = 0x0e20a800,
606 I3617_CMGE0 = 0x2e208800,
6c2c7772 607 I3617_CMLE0 = 0x2e209800,
14e4c1e2 608 I3617_NOT = 0x2e205800,
a456394a 609 I3617_ABS = 0x0e20b800,
14e4c1e2
RH
610 I3617_NEG = 0x2e20b800,
611
c7a59c2a 612 /* System instructions. */
14e4c1e2 613 NOP = 0xd503201f,
c7a59c2a
PK
614 DMB_ISH = 0xd50338bf,
615 DMB_LD = 0x00000100,
616 DMB_ST = 0x00000200,
50573c66 617} AArch64Insn;
4a136e0a 618
4a136e0a
CF
619static inline uint32_t tcg_in32(TCGContext *s)
620{
621 uint32_t v = *(uint32_t *)s->code_ptr;
622 return v;
623}
624
50573c66
RH
625/* Emit an opcode with "type-checking" of the format. */
626#define tcg_out_insn(S, FMT, OP, ...) \
627 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
628
f23e5e15
RH
629static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
630 TCGReg rt, TCGReg rn, unsigned size)
631{
632 tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
633}
634
635static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
636 int imm19, TCGReg rt)
2acee8b2
PK
637{
638 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
639}
640
3d9e69a2
RH
641static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
642 TCGReg rt, int imm19)
643{
644 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
645}
646
81d8a5ee
RH
647static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
648 TCGCond c, int imm19)
649{
650 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
651}
652
653static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
654{
655 tcg_out32(s, insn | (imm26 & 0x03ffffff));
656}
657
658static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
659{
660 tcg_out32(s, insn | rn << 5);
661}
662
95f72aa9
RH
663static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
664 TCGReg r1, TCGReg r2, TCGReg rn,
665 tcg_target_long ofs, bool pre, bool w)
666{
667 insn |= 1u << 31; /* ext */
668 insn |= pre << 24;
669 insn |= w << 23;
670
eabb7b91 671 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
95f72aa9
RH
672 insn |= (ofs & (0x7f << 3)) << (15 - 3);
673
674 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
675}
676
096c46c0
RH
677static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
678 TCGReg rd, TCGReg rn, uint64_t aimm)
679{
680 if (aimm > 0xfff) {
eabb7b91 681 tcg_debug_assert((aimm & 0xfff) == 0);
096c46c0 682 aimm >>= 12;
eabb7b91 683 tcg_debug_assert(aimm <= 0xfff);
096c46c0
RH
684 aimm |= 1 << 12; /* apply LSL 12 */
685 }
686 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
687}
688
e029f293
RH
689/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
690 (Logical immediate). Both insn groups have N, IMMR and IMMS fields
691 that feed the DecodeBitMasks pseudo function. */
692static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
693 TCGReg rd, TCGReg rn, int n, int immr, int imms)
694{
695 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
696 | rn << 5 | rd);
697}
698
699#define tcg_out_insn_3404 tcg_out_insn_3402
700
b3c56df7
RH
701static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
702 TCGReg rd, TCGReg rn, TCGReg rm, int imms)
703{
704 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
705 | rn << 5 | rd);
706}
707
582ab779
RH
708/* This function is used for the Move (wide immediate) instruction group.
709 Note that SHIFT is a full shift count, not the 2 bit HW field. */
710static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
711 TCGReg rd, uint16_t half, unsigned shift)
712{
eabb7b91 713 tcg_debug_assert((shift & ~0x30) == 0);
582ab779
RH
714 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
715}
716
c6e310d9
RH
717static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
718 TCGReg rd, int64_t disp)
719{
720 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
721}
722
50573c66
RH
723/* This function is for both 3.5.2 (Add/Subtract shifted register), for
724 the rare occasion when we actually want to supply a shift amount. */
725static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
726 TCGType ext, TCGReg rd, TCGReg rn,
727 TCGReg rm, int imm6)
728{
729 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
730}
731
732/* This function is for 3.5.2 (Add/subtract shifted register),
733 and 3.5.10 (Logical shifted register), for the vast majorty of cases
734 when we don't want to apply a shift. Thus it can also be used for
735 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */
736static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
737 TCGReg rd, TCGReg rn, TCGReg rm)
738{
739 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
740}
741
742#define tcg_out_insn_3503 tcg_out_insn_3502
743#define tcg_out_insn_3508 tcg_out_insn_3502
744#define tcg_out_insn_3510 tcg_out_insn_3502
745
04ce397b
RH
746static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
747 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
748{
749 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
750 | tcg_cond_to_aarch64[c] << 12);
751}
752
edd8824c
RH
753static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
754 TCGReg rd, TCGReg rn)
755{
756 tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
757}
758
8678b71c
RH
759static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
760 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
761{
762 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
763}
764
14e4c1e2
RH
765static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
766 TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
767{
768 /* Note that bit 11 set means general register input. Therefore
769 we can handle both register sets with one function. */
770 tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
771 | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
772}
773
774static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
775 TCGReg rd, bool op, int cmode, uint8_t imm8)
776{
777 tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
778 | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
779}
780
d81bad24
RH
781static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn,
782 TCGReg rd, TCGReg rn, unsigned immhb)
783{
784 tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f));
785}
786
787static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn,
788 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
789{
790 tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16
791 | (rn & 0x1f) << 5 | (rd & 0x1f));
792}
793
794static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn,
795 unsigned size, TCGReg rd, TCGReg rn)
796{
797 tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f));
798}
799
14e4c1e2
RH
800static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
801 TCGReg rd, TCGReg rn, unsigned immhb)
802{
803 tcg_out32(s, insn | q << 30 | immhb << 16
804 | (rn & 0x1f) << 5 | (rd & 0x1f));
805}
806
807static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
808 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
809{
810 tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
811 | (rn & 0x1f) << 5 | (rd & 0x1f));
812}
813
814static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
815 unsigned size, TCGReg rd, TCGReg rn)
816{
817 tcg_out32(s, insn | q << 30 | (size << 22)
818 | (rn & 0x1f) << 5 | (rd & 0x1f));
819}
820
3d4299f4 821static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
6c0f0c0f
PB
822 TCGReg rd, TCGReg base, TCGType ext,
823 TCGReg regoff)
3d4299f4
RH
824{
825 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
6c0f0c0f 826 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
14e4c1e2 827 0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
3d4299f4 828}
50573c66 829
3d4299f4
RH
830static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
831 TCGReg rd, TCGReg rn, intptr_t offset)
4a136e0a 832{
14e4c1e2 833 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
4a136e0a
CF
834}
835
3d4299f4
RH
836static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
837 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
b1f6dc0d 838{
3d4299f4 839 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
14e4c1e2
RH
840 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
841 | rn << 5 | (rd & 0x1f));
b1f6dc0d
CF
842}
843
7d11fc7c
RH
844/* Register to register move using ORR (shifted register with no shift). */
845static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
4a136e0a 846{
7d11fc7c
RH
847 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
848}
849
850/* Register to register move using ADDI (move to/from SP). */
851static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
852{
853 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
4a136e0a
CF
854}
855
4ec4f0bd
RH
856/* This function is used for the Logical (immediate) instruction group.
857 The value of LIMM must satisfy IS_LIMM. See the comment above about
858 only supporting simplified logical immediates. */
859static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
860 TCGReg rd, TCGReg rn, uint64_t limm)
861{
862 unsigned h, l, r, c;
863
eabb7b91 864 tcg_debug_assert(is_limm(limm));
4ec4f0bd
RH
865
866 h = clz64(limm);
867 l = ctz64(limm);
868 if (l == 0) {
869 r = 0; /* form 0....01....1 */
870 c = ctz64(~limm) - 1;
871 if (h == 0) {
872 r = clz64(~limm); /* form 1..10..01..1 */
873 c += r;
874 }
875 } else {
876 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */
877 c = r - h - 1;
878 }
879 if (ext == TCG_TYPE_I32) {
880 r &= 31;
881 c &= 31;
882 }
883
884 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
885}
886
4e186175
RH
887static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
888 TCGReg rd, int64_t v64)
14e4c1e2 889{
984fdcee
RH
890 bool q = type == TCG_TYPE_V128;
891 int cmode, imm8, i;
892
893 /* Test all bytes equal first. */
4e186175 894 if (vece == MO_8) {
984fdcee
RH
895 imm8 = (uint8_t)v64;
896 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
897 return;
898 }
899
900 /*
901 * Test all bytes 0x00 or 0xff second. This can match cases that
902 * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
903 */
904 for (i = imm8 = 0; i < 8; i++) {
905 uint8_t byte = v64 >> (i * 8);
906 if (byte == 0xff) {
907 imm8 |= 1 << i;
908 } else if (byte != 0) {
909 goto fail_bytes;
910 }
911 }
912 tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
913 return;
914 fail_bytes:
915
916 /*
917 * Tests for various replications. For each element width, if we
918 * cannot find an expansion there's no point checking a larger
919 * width because we already know by replication it cannot match.
920 */
4e186175 921 if (vece == MO_16) {
984fdcee
RH
922 uint16_t v16 = v64;
923
924 if (is_shimm16(v16, &cmode, &imm8)) {
925 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
926 return;
927 }
7e308e00
RH
928 if (is_shimm16(~v16, &cmode, &imm8)) {
929 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
930 return;
931 }
02f3a5b4
RH
932
933 /*
934 * Otherwise, all remaining constants can be loaded in two insns:
935 * rd = v16 & 0xff, rd |= v16 & 0xff00.
936 */
937 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
938 tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
939 return;
4e186175 940 } else if (vece == MO_32) {
984fdcee 941 uint32_t v32 = v64;
7e308e00 942 uint32_t n32 = ~v32;
984fdcee
RH
943
944 if (is_shimm32(v32, &cmode, &imm8) ||
945 is_soimm32(v32, &cmode, &imm8) ||
946 is_fimm32(v32, &cmode, &imm8)) {
947 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
948 return;
949 }
7e308e00
RH
950 if (is_shimm32(n32, &cmode, &imm8) ||
951 is_soimm32(n32, &cmode, &imm8)) {
952 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
953 return;
954 }
02f3a5b4
RH
955
956 /*
957 * Restrict the set of constants to those we can load with
958 * two instructions. Others we load from the pool.
959 */
960 i = is_shimm32_pair(v32, &cmode, &imm8);
961 if (i) {
962 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
963 tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
964 return;
965 }
966 i = is_shimm32_pair(n32, &cmode, &imm8);
967 if (i) {
968 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
969 tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
970 return;
971 }
984fdcee
RH
972 } else if (is_fimm64(v64, &cmode, &imm8)) {
973 tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
974 return;
975 }
14e4c1e2 976
984fdcee
RH
977 /*
978 * As a last resort, load from the constant pool. Sadly there
979 * is no LD1R (literal), so store the full 16-byte vector.
980 */
981 if (type == TCG_TYPE_V128) {
14e4c1e2
RH
982 new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
983 tcg_out_insn(s, 3305, LDR_v128, 0, rd);
984 } else {
985 new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
986 tcg_out_insn(s, 3305, LDR_v64, 0, rd);
987 }
988}
989
e7632cfa
RH
990static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
991 TCGReg rd, TCGReg rs)
992{
993 int is_q = type - TCG_TYPE_V64;
994 tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
995 return true;
996}
997
d6ecb4a9
RH
998static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
999 TCGReg r, TCGReg base, intptr_t offset)
1000{
f23e5e15
RH
1001 TCGReg temp = TCG_REG_TMP;
1002
1003 if (offset < -0xffffff || offset > 0xffffff) {
1004 tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
1005 tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
1006 base = temp;
1007 } else {
1008 AArch64Insn add_insn = I3401_ADDI;
1009
1010 if (offset < 0) {
1011 add_insn = I3401_SUBI;
1012 offset = -offset;
1013 }
1014 if (offset & 0xfff000) {
1015 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
1016 base = temp;
1017 }
1018 if (offset & 0xfff) {
1019 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
1020 base = temp;
1021 }
1022 }
1023 tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
1024 return true;
d6ecb4a9
RH
1025}
1026
582ab779
RH
1027static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
1028 tcg_target_long value)
4a136e0a 1029{
dfeb5fe7
RH
1030 tcg_target_long svalue = value;
1031 tcg_target_long ivalue = ~value;
55129955
RH
1032 tcg_target_long t0, t1, t2;
1033 int s0, s1;
1034 AArch64Insn opc;
dfeb5fe7 1035
14e4c1e2
RH
1036 switch (type) {
1037 case TCG_TYPE_I32:
1038 case TCG_TYPE_I64:
1039 tcg_debug_assert(rd < 32);
1040 break;
14e4c1e2
RH
1041 default:
1042 g_assert_not_reached();
1043 }
1044
dfeb5fe7
RH
1045 /* For 32-bit values, discard potential garbage in value. For 64-bit
1046 values within [2**31, 2**32-1], we can create smaller sequences by
1047 interpreting this as a negative 32-bit number, while ensuring that
1048 the high 32 bits are cleared by setting SF=0. */
1049 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1050 svalue = (int32_t)value;
582ab779 1051 value = (uint32_t)value;
dfeb5fe7
RH
1052 ivalue = (uint32_t)ivalue;
1053 type = TCG_TYPE_I32;
1054 }
1055
d8918df5
RH
1056 /* Speed things up by handling the common case of small positive
1057 and negative values specially. */
1058 if ((value & ~0xffffull) == 0) {
1059 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1060 return;
1061 } else if ((ivalue & ~0xffffull) == 0) {
1062 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1063 return;
1064 }
1065
4ec4f0bd
RH
1066 /* Check for bitfield immediates. For the benefit of 32-bit quantities,
1067 use the sign-extended value. That lets us match rotated values such
1068 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1069 if (is_limm(svalue)) {
1070 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1071 return;
1072 }
1073
c6e310d9
RH
1074 /* Look for host pointer values within 4G of the PC. This happens
1075 often when loading pointers to QEMU's own data structures. */
1076 if (type == TCG_TYPE_I64) {
ffba3eb3
RH
1077 intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
1078 tcg_target_long disp = value - src_rx;
cc74d332
RH
1079 if (disp == sextract64(disp, 0, 21)) {
1080 tcg_out_insn(s, 3406, ADR, rd, disp);
1081 return;
1082 }
ffba3eb3 1083 disp = (value >> 12) - (src_rx >> 12);
c6e310d9
RH
1084 if (disp == sextract64(disp, 0, 21)) {
1085 tcg_out_insn(s, 3406, ADRP, rd, disp);
1086 if (value & 0xfff) {
1087 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1088 }
1089 return;
1090 }
1091 }
1092
55129955
RH
1093 /* Would it take fewer insns to begin with MOVN? */
1094 if (ctpop64(value) >= 32) {
1095 t0 = ivalue;
1096 opc = I3405_MOVN;
8cf9a3d3 1097 } else {
55129955
RH
1098 t0 = value;
1099 opc = I3405_MOVZ;
1100 }
1101 s0 = ctz64(t0) & (63 & -16);
7ceee3a1 1102 t1 = t0 & ~(0xffffull << s0);
55129955 1103 s1 = ctz64(t1) & (63 & -16);
7ceee3a1 1104 t2 = t1 & ~(0xffffull << s1);
55129955
RH
1105 if (t2 == 0) {
1106 tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1107 if (t1 != 0) {
1108 tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
8cf9a3d3 1109 }
55129955 1110 return;
dfeb5fe7 1111 }
55129955
RH
1112
1113 /* For more than 2 insns, dump it into the constant pool. */
1114 new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1115 tcg_out_insn(s, 3305, LDR, 0, rd);
4a136e0a
CF
1116}
1117
767c2503
RH
1118static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
1119{
1120 return false;
1121}
1122
6a6d772e
RH
1123static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1124 tcg_target_long imm)
1125{
1126 /* This function is only used for passing structs by reference. */
1127 g_assert_not_reached();
1128}
1129
3d4299f4
RH
1130/* Define something more legible for general use. */
1131#define tcg_out_ldst_r tcg_out_insn_3310
4a136e0a 1132
14e4c1e2
RH
1133static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1134 TCGReg rn, intptr_t offset, int lgsize)
4a136e0a 1135{
3d4299f4
RH
1136 /* If the offset is naturally aligned and in range, then we can
1137 use the scaled uimm12 encoding */
14e4c1e2
RH
1138 if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1139 uintptr_t scaled_uimm = offset >> lgsize;
3d4299f4
RH
1140 if (scaled_uimm <= 0xfff) {
1141 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1142 return;
b1f6dc0d
CF
1143 }
1144 }
1145
a056c9fa
RH
1146 /* Small signed offsets can use the unscaled encoding. */
1147 if (offset >= -256 && offset < 256) {
1148 tcg_out_insn_3312(s, insn, rd, rn, offset);
1149 return;
1150 }
1151
3d4299f4 1152 /* Worst-case scenario, move offset to temp register, use reg offset. */
b1f6dc0d 1153 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
6c0f0c0f 1154 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
4a136e0a
CF
1155}
1156
78113e83 1157static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
4a136e0a 1158{
14e4c1e2 1159 if (ret == arg) {
78113e83 1160 return true;
14e4c1e2
RH
1161 }
1162 switch (type) {
1163 case TCG_TYPE_I32:
1164 case TCG_TYPE_I64:
1165 if (ret < 32 && arg < 32) {
1166 tcg_out_movr(s, type, ret, arg);
1167 break;
1168 } else if (ret < 32) {
1169 tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1170 break;
1171 } else if (arg < 32) {
1172 tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1173 break;
1174 }
1175 /* FALLTHRU */
1176
1177 case TCG_TYPE_V64:
1178 tcg_debug_assert(ret >= 32 && arg >= 32);
1179 tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1180 break;
1181 case TCG_TYPE_V128:
1182 tcg_debug_assert(ret >= 32 && arg >= 32);
1183 tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1184 break;
1185
1186 default:
1187 g_assert_not_reached();
4a136e0a 1188 }
78113e83 1189 return true;
4a136e0a
CF
1190}
1191
14e4c1e2
RH
1192static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1193 TCGReg base, intptr_t ofs)
4a136e0a 1194{
14e4c1e2
RH
1195 AArch64Insn insn;
1196 int lgsz;
1197
1198 switch (type) {
1199 case TCG_TYPE_I32:
1200 insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1201 lgsz = 2;
1202 break;
1203 case TCG_TYPE_I64:
1204 insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1205 lgsz = 3;
1206 break;
1207 case TCG_TYPE_V64:
1208 insn = I3312_LDRVD;
1209 lgsz = 3;
1210 break;
1211 case TCG_TYPE_V128:
1212 insn = I3312_LDRVQ;
1213 lgsz = 4;
1214 break;
1215 default:
1216 g_assert_not_reached();
1217 }
1218 tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
4a136e0a
CF
1219}
1220
14e4c1e2
RH
1221static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1222 TCGReg base, intptr_t ofs)
4a136e0a 1223{
14e4c1e2
RH
1224 AArch64Insn insn;
1225 int lgsz;
1226
1227 switch (type) {
1228 case TCG_TYPE_I32:
1229 insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1230 lgsz = 2;
1231 break;
1232 case TCG_TYPE_I64:
1233 insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1234 lgsz = 3;
1235 break;
1236 case TCG_TYPE_V64:
1237 insn = I3312_STRVD;
1238 lgsz = 3;
1239 break;
1240 case TCG_TYPE_V128:
1241 insn = I3312_STRVQ;
1242 lgsz = 4;
1243 break;
1244 default:
1245 g_assert_not_reached();
1246 }
1247 tcg_out_ldst(s, insn, src, base, ofs, lgsz);
4a136e0a
CF
1248}
1249
59d7c14e
RH
1250static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1251 TCGReg base, intptr_t ofs)
1252{
14e4c1e2 1253 if (type <= TCG_TYPE_I64 && val == 0) {
59d7c14e
RH
1254 tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1255 return true;
1256 }
1257 return false;
1258}
1259
b3c56df7
RH
1260static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1261 TCGReg rn, unsigned int a, unsigned int b)
1262{
1263 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1264}
1265
7763ffa0
RH
1266static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1267 TCGReg rn, unsigned int a, unsigned int b)
4a136e0a 1268{
b3c56df7 1269 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
4a136e0a
CF
1270}
1271
7763ffa0
RH
1272static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1273 TCGReg rn, unsigned int a, unsigned int b)
4a136e0a 1274{
b3c56df7 1275 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
4a136e0a
CF
1276}
1277
7763ffa0 1278static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
4a136e0a
CF
1279 TCGReg rn, TCGReg rm, unsigned int a)
1280{
b3c56df7 1281 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
4a136e0a
CF
1282}
1283
7763ffa0 1284static inline void tcg_out_shl(TCGContext *s, TCGType ext,
4a136e0a
CF
1285 TCGReg rd, TCGReg rn, unsigned int m)
1286{
b3c56df7
RH
1287 int bits = ext ? 64 : 32;
1288 int max = bits - 1;
94bcc91b 1289 tcg_out_ubfm(s, ext, rd, rn, (bits - m) & max, (max - m) & max);
4a136e0a
CF
1290}
1291
7763ffa0 1292static inline void tcg_out_shr(TCGContext *s, TCGType ext,
4a136e0a
CF
1293 TCGReg rd, TCGReg rn, unsigned int m)
1294{
1295 int max = ext ? 63 : 31;
1296 tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1297}
1298
7763ffa0 1299static inline void tcg_out_sar(TCGContext *s, TCGType ext,
4a136e0a
CF
1300 TCGReg rd, TCGReg rn, unsigned int m)
1301{
1302 int max = ext ? 63 : 31;
1303 tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1304}
1305
7763ffa0 1306static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
4a136e0a
CF
1307 TCGReg rd, TCGReg rn, unsigned int m)
1308{
1309 int max = ext ? 63 : 31;
1310 tcg_out_extr(s, ext, rd, rn, rn, m & max);
1311}
1312
7763ffa0 1313static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
4a136e0a
CF
1314 TCGReg rd, TCGReg rn, unsigned int m)
1315{
26b1248f
YK
1316 int max = ext ? 63 : 31;
1317 tcg_out_extr(s, ext, rd, rn, rn, -m & max);
4a136e0a
CF
1318}
1319
b3c56df7
RH
1320static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1321 TCGReg rn, unsigned lsb, unsigned width)
1322{
1323 unsigned size = ext ? 64 : 32;
1324 unsigned a = (size - lsb) & (size - 1);
1325 unsigned b = width - 1;
1326 tcg_out_bfm(s, ext, rd, rn, a, b);
1327}
1328
90f1cd91
RH
1329static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1330 tcg_target_long b, bool const_b)
4a136e0a 1331{
90f1cd91
RH
1332 if (const_b) {
1333 /* Using CMP or CMN aliases. */
1334 if (b >= 0) {
1335 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1336 } else {
1337 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1338 }
1339 } else {
1340 /* Using CMP alias SUBS wzr, Wn, Wm */
1341 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1342 }
4a136e0a
CF
1343}
1344
ffd0e507 1345static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
4a136e0a 1346{
ffba3eb3 1347 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
eabb7b91 1348 tcg_debug_assert(offset == sextract64(offset, 0, 26));
81d8a5ee 1349 tcg_out_insn(s, 3206, B, offset);
4a136e0a
CF
1350}
1351
ffba3eb3 1352static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target)
23b7aa1d 1353{
ffba3eb3 1354 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
23b7aa1d 1355 if (offset == sextract64(offset, 0, 26)) {
f716bab3 1356 tcg_out_insn(s, 3206, B, offset);
23b7aa1d 1357 } else {
f85ab3d2
RH
1358 /* Choose X9 as a call-clobbered non-LR temporary. */
1359 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X9, (intptr_t)target);
1360 tcg_out_insn(s, 3207, BR, TCG_REG_X9);
23b7aa1d
PK
1361 }
1362}
1363
cee44b03 1364static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target)
4a136e0a 1365{
ffba3eb3 1366 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
8587c30c 1367 if (offset == sextract64(offset, 0, 26)) {
81d8a5ee 1368 tcg_out_insn(s, 3206, BL, offset);
8587c30c
RH
1369 } else {
1370 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
fa3cb9f9 1371 tcg_out_insn(s, 3207, BLR, TCG_REG_TMP);
4a136e0a
CF
1372 }
1373}
1374
cee44b03
RH
1375static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
1376 const TCGHelperInfo *info)
1377{
1378 tcg_out_call_int(s, target);
1379}
1380
bec16311 1381static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
4a136e0a 1382{
4a136e0a 1383 if (!l->has_value) {
bec16311 1384 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
733589b3 1385 tcg_out_insn(s, 3206, B, 0);
4a136e0a 1386 } else {
8587c30c 1387 tcg_out_goto(s, l->u.value_ptr);
4a136e0a
CF
1388 }
1389}
1390
dc1eccd6 1391static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
bec16311 1392 TCGArg b, bool b_const, TCGLabel *l)
4a136e0a 1393{
cae1f6f3 1394 intptr_t offset;
3d9e69a2 1395 bool need_cmp;
cae1f6f3 1396
3d9e69a2
RH
1397 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1398 need_cmp = false;
1399 } else {
1400 need_cmp = true;
1401 tcg_out_cmp(s, ext, a, b, b_const);
1402 }
4a136e0a
CF
1403
1404 if (!l->has_value) {
bec16311 1405 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
cae1f6f3 1406 offset = tcg_in32(s) >> 5;
4a136e0a 1407 } else {
ffba3eb3 1408 offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2;
eabb7b91 1409 tcg_debug_assert(offset == sextract64(offset, 0, 19));
4a136e0a 1410 }
cae1f6f3 1411
3d9e69a2
RH
1412 if (need_cmp) {
1413 tcg_out_insn(s, 3202, B_C, c, offset);
1414 } else if (c == TCG_COND_EQ) {
1415 tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1416 } else {
1417 tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1418 }
4a136e0a
CF
1419}
1420
dfa24dfa
RH
1421static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits,
1422 TCGReg rd, TCGReg rn)
edd8824c 1423{
dfa24dfa
RH
1424 /* REV, REV16, REV32 */
1425 tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn);
9c4a059d
CF
1426}
1427
14776ab5 1428static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
31f1275b
CF
1429 TCGReg rd, TCGReg rn)
1430{
b3c56df7 1431 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
929f8b55 1432 int bits = (8 << s_bits) - 1;
31f1275b
CF
1433 tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1434}
1435
678155b2
RH
1436static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1437{
1438 tcg_out_sxt(s, type, MO_8, rd, rn);
1439}
1440
753e42ea
RH
1441static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1442{
1443 tcg_out_sxt(s, type, MO_16, rd, rn);
1444}
1445
52bf3398
RH
1446static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn)
1447{
1448 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, rd, rn);
1449}
1450
9c6aa274
RH
1451static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1452{
1453 tcg_out_ext32s(s, rd, rn);
1454}
1455
14776ab5 1456static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
31f1275b
CF
1457 TCGReg rd, TCGReg rn)
1458{
b3c56df7 1459 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
929f8b55 1460 int bits = (8 << s_bits) - 1;
31f1275b
CF
1461 tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1462}
1463
d0e66c89
RH
1464static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn)
1465{
1466 tcg_out_uxt(s, MO_8, rd, rn);
1467}
1468
379afdff
RH
1469static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn)
1470{
1471 tcg_out_uxt(s, MO_16, rd, rn);
1472}
1473
9ecf5f61
RH
1474static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rn)
1475{
1476 tcg_out_movr(s, TCG_TYPE_I32, rd, rn);
1477}
1478
b9bfe000
RH
1479static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1480{
1481 tcg_out_ext32u(s, rd, rn);
1482}
1483
b8b94ac6
RH
1484static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn)
1485{
1486 tcg_out_mov(s, TCG_TYPE_I32, rd, rn);
1487}
1488
90f1cd91
RH
1489static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1490 TCGReg rn, int64_t aimm)
1491{
1492 if (aimm >= 0) {
1493 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1494 } else {
1495 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1496 }
1497}
1498
707b45a2
RH
1499static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1500 TCGReg rh, TCGReg al, TCGReg ah,
1501 tcg_target_long bl, tcg_target_long bh,
1502 bool const_bl, bool const_bh, bool sub)
c6e929e7
RH
1503{
1504 TCGReg orig_rl = rl;
1505 AArch64Insn insn;
1506
1507 if (rl == ah || (!const_bh && rl == bh)) {
1508 rl = TCG_REG_TMP;
1509 }
1510
1511 if (const_bl) {
707b45a2 1512 if (bl < 0) {
c6e929e7 1513 bl = -bl;
707b45a2
RH
1514 insn = sub ? I3401_ADDSI : I3401_SUBSI;
1515 } else {
1516 insn = sub ? I3401_SUBSI : I3401_ADDSI;
c6e929e7 1517 }
707b45a2 1518
b1eb20da
RH
1519 if (unlikely(al == TCG_REG_XZR)) {
1520 /* ??? We want to allow al to be zero for the benefit of
1521 negation via subtraction. However, that leaves open the
1522 possibility of adding 0+const in the low part, and the
1523 immediate add instructions encode XSP not XZR. Don't try
1524 anything more elaborate here than loading another zero. */
1525 al = TCG_REG_TMP;
1526 tcg_out_movi(s, ext, al, 0);
1527 }
c6e929e7
RH
1528 tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1529 } else {
1530 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1531 }
1532
1533 insn = I3503_ADC;
1534 if (const_bh) {
1535 /* Note that the only two constants we support are 0 and -1, and
1536 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
1537 if ((bh != 0) ^ sub) {
1538 insn = I3503_SBC;
1539 }
1540 bh = TCG_REG_XZR;
1541 } else if (sub) {
1542 insn = I3503_SBC;
1543 }
1544 tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1545
b825025f 1546 tcg_out_mov(s, ext, orig_rl, rl);
c6e929e7
RH
1547}
1548
c7a59c2a
PK
1549static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1550{
1551 static const uint32_t sync[] = {
1552 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST,
1553 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST,
1554 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1555 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD,
1556 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1557 };
1558 tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1559}
1560
53c76c19
RH
1561static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1562 TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1563{
1564 TCGReg a1 = a0;
1565 if (is_ctz) {
1566 a1 = TCG_REG_TMP;
1567 tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1568 }
1569 if (const_b && b == (ext ? 64 : 32)) {
1570 tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1571 } else {
1572 AArch64Insn sel = I3506_CSEL;
1573
1574 tcg_out_cmp(s, ext, a0, 0, 1);
1575 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1576
1577 if (const_b) {
1578 if (b == -1) {
1579 b = TCG_REG_XZR;
1580 sel = I3506_CSINV;
1581 } else if (b == 0) {
1582 b = TCG_REG_XZR;
1583 } else {
1584 tcg_out_movi(s, ext, d, b);
1585 b = d;
1586 }
1587 }
1588 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1589 }
1590}
1591
7f65be51
RH
1592typedef struct {
1593 TCGReg base;
1594 TCGReg index;
1595 TCGType index_ext;
1596} HostAddress;
1597
6e96422b
RH
1598static const TCGLdstHelperParam ldst_helper_param = {
1599 .ntmp = 1, .tmp = { TCG_REG_TMP }
1600};
1601
aeee05f5 1602static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
c6d8ed24 1603{
6e96422b 1604 MemOp opc = get_memop(lb->oi);
929f8b55 1605
ffba3eb3 1606 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
aeee05f5
RH
1607 return false;
1608 }
017a86f7 1609
6e96422b 1610 tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
cee44b03 1611 tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
6e96422b 1612 tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
8587c30c 1613 tcg_out_goto(s, lb->raddr);
aeee05f5 1614 return true;
c6d8ed24
JK
1615}
1616
aeee05f5 1617static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
c6d8ed24 1618{
6e96422b 1619 MemOp opc = get_memop(lb->oi);
929f8b55 1620
ffba3eb3 1621 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
aeee05f5
RH
1622 return false;
1623 }
c6d8ed24 1624
6e96422b 1625 tcg_out_st_helper_args(s, lb, &ldst_helper_param);
cee44b03 1626 tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]);
8587c30c 1627 tcg_out_goto(s, lb->raddr);
aeee05f5 1628 return true;
c6d8ed24 1629}
65b23204 1630
1e612dd6
RH
1631/*
1632 * For softmmu, perform the TLB load and compare.
1633 * For useronly, perform any required alignment tests.
1634 * In both cases, return a TCGLabelQemuLdst structure if the slow path
1635 * is required and fill in @h with the host address for the fast path.
1636 */
1637static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
1638 TCGReg addr_reg, MemOpIdx oi,
1639 bool is_ld)
c6d8ed24 1640{
1e612dd6
RH
1641 TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1642 TCGLabelQemuLdst *ldst = NULL;
1643 MemOp opc = get_memop(oi);
85aa8081 1644 unsigned a_bits = get_alignment_bits(opc);
85aa8081 1645 unsigned a_mask = (1u << a_bits) - 1;
1e612dd6
RH
1646
1647#ifdef CONFIG_SOFTMMU
1648 unsigned s_bits = opc & MO_SIZE;
85aa8081 1649 unsigned s_mask = (1u << s_bits) - 1;
1e612dd6 1650 unsigned mem_index = get_mmuidx(oi);
65b23204 1651 TCGReg x3;
f7bcd966
RH
1652 TCGType mask_type;
1653 uint64_t compare_mask;
1654
1e612dd6
RH
1655 ldst = new_ldst_label(s);
1656 ldst->is_ld = is_ld;
1657 ldst->oi = oi;
1658 ldst->addrlo_reg = addr_reg;
1659
f7bcd966
RH
1660 mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
1661 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1662
65b23204 1663 /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */
1e612dd6
RH
1664 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1665 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
1666 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1667 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
65b23204
RH
1668 tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
1669 TLB_MASK_TABLE_OFS(mem_index), 1, 0);
f7bcd966
RH
1670
1671 /* Extract the TLB index from the address into X0. */
1672 tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1673 TCG_REG_X0, TCG_REG_X0, addr_reg,
1674 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1675
1676 /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */
1677 tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
1678
1679 /* Load the tlb comparator into X0, and the fast path addend into X1. */
1e612dd6
RH
1680 tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1,
1681 is_ld ? offsetof(CPUTLBEntry, addr_read)
1682 : offsetof(CPUTLBEntry, addr_write));
f7bcd966
RH
1683 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
1684 offsetof(CPUTLBEntry, addend));
9ee14902 1685
1e612dd6
RH
1686 /*
1687 * For aligned accesses, we check the first byte and include the alignment
1688 * bits within the address. For unaligned access, we check that we don't
1689 * cross pages using the address of the last byte of the access.
1690 */
85aa8081 1691 if (a_bits >= s_bits) {
9ee14902
RH
1692 x3 = addr_reg;
1693 } else {
1694 tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
85aa8081 1695 TCG_REG_X3, addr_reg, s_mask - a_mask);
9ee14902
RH
1696 x3 = TCG_REG_X3;
1697 }
f7bcd966 1698 compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
6f472467 1699
9ee14902
RH
1700 /* Store the page mask part of the address into X3. */
1701 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
f7bcd966 1702 TCG_REG_X3, x3, compare_mask);
6f472467 1703
c6d8ed24 1704 /* Perform the address comparison. */
f7bcd966 1705 tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
6f472467 1706
c6d8ed24 1707 /* If not equal, we jump to the slow path. */
1e612dd6 1708 ldst->label_ptr[0] = s->code_ptr;
733589b3 1709 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
c6d8ed24 1710
1e612dd6
RH
1711 *h = (HostAddress){
1712 .base = TCG_REG_X1,
1713 .index = addr_reg,
1714 .index_ext = addr_type
1715 };
f85ab3d2 1716#else
1e612dd6
RH
1717 if (a_mask) {
1718 ldst = new_ldst_label(s);
f85ab3d2 1719
1e612dd6
RH
1720 ldst->is_ld = is_ld;
1721 ldst->oi = oi;
1722 ldst->addrlo_reg = addr_reg;
f85ab3d2 1723
1e612dd6
RH
1724 /* tst addr, #mask */
1725 tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
f85ab3d2 1726
1e612dd6
RH
1727 /* b.ne slow_path */
1728 ldst->label_ptr[0] = s->code_ptr;
1729 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
f85ab3d2
RH
1730 }
1731
1e612dd6
RH
1732 if (USE_GUEST_BASE) {
1733 *h = (HostAddress){
1734 .base = TCG_REG_GUEST_BASE,
1735 .index = addr_reg,
1736 .index_ext = addr_type
1737 };
1738 } else {
1739 *h = (HostAddress){
1740 .base = addr_reg,
1741 .index = TCG_REG_XZR,
1742 .index_ext = TCG_TYPE_I64
1743 };
1744 }
1745#endif
f85ab3d2 1746
1e612dd6 1747 return ldst;
f85ab3d2 1748}
6a91c7c9 1749
14776ab5 1750static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
7f65be51 1751 TCGReg data_r, HostAddress h)
6a91c7c9 1752{
9e4177ad
RH
1753 switch (memop & MO_SSIZE) {
1754 case MO_UB:
7f65be51 1755 tcg_out_ldst_r(s, I3312_LDRB, data_r, h.base, h.index_ext, h.index);
6a91c7c9 1756 break;
9e4177ad 1757 case MO_SB:
9c53889b 1758 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
7f65be51 1759 data_r, h.base, h.index_ext, h.index);
6a91c7c9 1760 break;
9e4177ad 1761 case MO_UW:
7f65be51 1762 tcg_out_ldst_r(s, I3312_LDRH, data_r, h.base, h.index_ext, h.index);
6a91c7c9 1763 break;
9e4177ad 1764 case MO_SW:
51c559c7 1765 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
7f65be51 1766 data_r, h.base, h.index_ext, h.index);
6a91c7c9 1767 break;
9e4177ad 1768 case MO_UL:
7f65be51 1769 tcg_out_ldst_r(s, I3312_LDRW, data_r, h.base, h.index_ext, h.index);
6a91c7c9 1770 break;
9e4177ad 1771 case MO_SL:
7f65be51 1772 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, h.base, h.index_ext, h.index);
6a91c7c9 1773 break;
fc313c64 1774 case MO_UQ:
7f65be51 1775 tcg_out_ldst_r(s, I3312_LDRX, data_r, h.base, h.index_ext, h.index);
6a91c7c9
JK
1776 break;
1777 default:
732e89f4 1778 g_assert_not_reached();
6a91c7c9
JK
1779 }
1780}
1781
14776ab5 1782static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
7f65be51 1783 TCGReg data_r, HostAddress h)
6a91c7c9 1784{
9e4177ad
RH
1785 switch (memop & MO_SIZE) {
1786 case MO_8:
7f65be51 1787 tcg_out_ldst_r(s, I3312_STRB, data_r, h.base, h.index_ext, h.index);
6a91c7c9 1788 break;
9e4177ad 1789 case MO_16:
7f65be51 1790 tcg_out_ldst_r(s, I3312_STRH, data_r, h.base, h.index_ext, h.index);
6a91c7c9 1791 break;
9e4177ad 1792 case MO_32:
7f65be51 1793 tcg_out_ldst_r(s, I3312_STRW, data_r, h.base, h.index_ext, h.index);
6a91c7c9 1794 break;
9e4177ad 1795 case MO_64:
7f65be51 1796 tcg_out_ldst_r(s, I3312_STRX, data_r, h.base, h.index_ext, h.index);
6a91c7c9
JK
1797 break;
1798 default:
732e89f4 1799 g_assert_not_reached();
6a91c7c9
JK
1800 }
1801}
4a136e0a 1802
667b1cdd 1803static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
ff0cc85e 1804 MemOpIdx oi, TCGType data_type)
4a136e0a 1805{
1e612dd6 1806 TCGLabelQemuLdst *ldst;
7f65be51 1807 HostAddress h;
f85ab3d2 1808
1e612dd6
RH
1809 ldst = prepare_host_addr(s, &h, addr_reg, oi, true);
1810 tcg_out_qemu_ld_direct(s, get_memop(oi), data_type, data_reg, h);
f85ab3d2 1811
1e612dd6
RH
1812 if (ldst) {
1813 ldst->type = data_type;
1814 ldst->datalo_reg = data_reg;
1815 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
352bcb0a 1816 }
4a136e0a
CF
1817}
1818
667b1cdd 1819static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
ff0cc85e 1820 MemOpIdx oi, TCGType data_type)
4a136e0a 1821{
1e612dd6 1822 TCGLabelQemuLdst *ldst;
7f65be51 1823 HostAddress h;
f85ab3d2 1824
1e612dd6
RH
1825 ldst = prepare_host_addr(s, &h, addr_reg, oi, false);
1826 tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h);
7f65be51 1827
1e612dd6
RH
1828 if (ldst) {
1829 ldst->type = data_type;
1830 ldst->datalo_reg = data_reg;
1831 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
f85ab3d2 1832 }
4a136e0a
CF
1833}
1834
ffba3eb3 1835static const tcg_insn_unit *tb_ret_addr;
4a136e0a 1836
b55a8d9d
RH
1837static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
1838{
1839 /* Reuse the zeroing that exists for goto_ptr. */
1840 if (a0 == 0) {
1841 tcg_out_goto_long(s, tcg_code_gen_epilogue);
1842 } else {
1843 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1844 tcg_out_goto_long(s, tb_ret_addr);
1845 }
1846}
1847
cf7d6b8e
RH
1848static void tcg_out_goto_tb(TCGContext *s, int which)
1849{
1850 /*
d59d83a1
RH
1851 * Direct branch, or indirect address load, will be patched
1852 * by tb_target_set_jmp_target. Assert indirect load offset
1853 * in range early, regardless of direct branch distance.
cf7d6b8e 1854 */
d59d83a1
RH
1855 intptr_t i_off = tcg_pcrel_diff(s, (void *)get_jmp_target_addr(s, which));
1856 tcg_debug_assert(i_off == sextract64(i_off, 0, 21));
1857
cf7d6b8e 1858 set_jmp_insn_offset(s, which);
d59d83a1 1859 tcg_out32(s, I3206_B);
cf7d6b8e
RH
1860 tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1861 set_jmp_reset_offset(s, which);
1862}
1863
d59d83a1
RH
1864void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
1865 uintptr_t jmp_rx, uintptr_t jmp_rw)
1866{
1867 uintptr_t d_addr = tb->jmp_target_addr[n];
1868 ptrdiff_t d_offset = d_addr - jmp_rx;
1869 tcg_insn_unit insn;
1870
1871 /* Either directly branch, or indirect branch load. */
1872 if (d_offset == sextract64(d_offset, 0, 28)) {
1873 insn = deposit32(I3206_B, 0, 26, d_offset >> 2);
1874 } else {
1875 uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n];
1876 ptrdiff_t i_offset = i_addr - jmp_rx;
1877
1878 /* Note that we asserted this in range in tcg_out_goto_tb. */
a2495ede 1879 insn = deposit32(I3305_LDR | TCG_REG_TMP, 5, 19, i_offset >> 2);
d59d83a1
RH
1880 }
1881 qatomic_set((uint32_t *)jmp_rw, insn);
1882 flush_idcache_range(jmp_rx, jmp_rw, 4);
1883}
1884
4a136e0a 1885static void tcg_out_op(TCGContext *s, TCGOpcode opc,
8d8db193
RH
1886 const TCGArg args[TCG_MAX_OP_ARGS],
1887 const int const_args[TCG_MAX_OP_ARGS])
4a136e0a 1888{
f0293414
RH
1889 /* 99% of the time, we can signal the use of extension registers
1890 by looking to see if the opcode handles 64-bit data. */
1891 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
4a136e0a 1892
8d8db193
RH
1893 /* Hoist the loads of the most common arguments. */
1894 TCGArg a0 = args[0];
1895 TCGArg a1 = args[1];
1896 TCGArg a2 = args[2];
1897 int c2 = const_args[2];
1898
04ce397b
RH
1899 /* Some operands are defined with "rZ" constraint, a register or
1900 the zero register. These need not actually test args[I] == 0. */
1901#define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1902
4a136e0a 1903 switch (opc) {
b19f0c2e
RH
1904 case INDEX_op_goto_ptr:
1905 tcg_out_insn(s, 3207, BR, a0);
1906 break;
1907
4a136e0a 1908 case INDEX_op_br:
bec16311 1909 tcg_out_goto_label(s, arg_label(a0));
4a136e0a
CF
1910 break;
1911
4a136e0a 1912 case INDEX_op_ld8u_i32:
4a136e0a 1913 case INDEX_op_ld8u_i64:
14e4c1e2 1914 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
dc73dfd4
RH
1915 break;
1916 case INDEX_op_ld8s_i32:
14e4c1e2 1917 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
dc73dfd4 1918 break;
4a136e0a 1919 case INDEX_op_ld8s_i64:
14e4c1e2 1920 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
dc73dfd4
RH
1921 break;
1922 case INDEX_op_ld16u_i32:
4a136e0a 1923 case INDEX_op_ld16u_i64:
14e4c1e2 1924 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
dc73dfd4
RH
1925 break;
1926 case INDEX_op_ld16s_i32:
14e4c1e2 1927 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
dc73dfd4 1928 break;
4a136e0a 1929 case INDEX_op_ld16s_i64:
14e4c1e2 1930 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
dc73dfd4
RH
1931 break;
1932 case INDEX_op_ld_i32:
4a136e0a 1933 case INDEX_op_ld32u_i64:
14e4c1e2 1934 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
dc73dfd4 1935 break;
4a136e0a 1936 case INDEX_op_ld32s_i64:
14e4c1e2 1937 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
e81864a1 1938 break;
dc73dfd4 1939 case INDEX_op_ld_i64:
14e4c1e2 1940 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
dc73dfd4
RH
1941 break;
1942
4a136e0a
CF
1943 case INDEX_op_st8_i32:
1944 case INDEX_op_st8_i64:
14e4c1e2 1945 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
dc73dfd4 1946 break;
4a136e0a
CF
1947 case INDEX_op_st16_i32:
1948 case INDEX_op_st16_i64:
14e4c1e2 1949 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
dc73dfd4
RH
1950 break;
1951 case INDEX_op_st_i32:
4a136e0a 1952 case INDEX_op_st32_i64:
14e4c1e2 1953 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
dc73dfd4
RH
1954 break;
1955 case INDEX_op_st_i64:
14e4c1e2 1956 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
4a136e0a
CF
1957 break;
1958
4a136e0a 1959 case INDEX_op_add_i32:
90f1cd91
RH
1960 a2 = (int32_t)a2;
1961 /* FALLTHRU */
1962 case INDEX_op_add_i64:
1963 if (c2) {
1964 tcg_out_addsubi(s, ext, a0, a1, a2);
1965 } else {
1966 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1967 }
4a136e0a
CF
1968 break;
1969
4a136e0a 1970 case INDEX_op_sub_i32:
90f1cd91
RH
1971 a2 = (int32_t)a2;
1972 /* FALLTHRU */
1973 case INDEX_op_sub_i64:
1974 if (c2) {
1975 tcg_out_addsubi(s, ext, a0, a1, -a2);
1976 } else {
1977 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1978 }
4a136e0a
CF
1979 break;
1980
14b155dd
RH
1981 case INDEX_op_neg_i64:
1982 case INDEX_op_neg_i32:
1983 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1984 break;
1985
4a136e0a 1986 case INDEX_op_and_i32:
e029f293
RH
1987 a2 = (int32_t)a2;
1988 /* FALLTHRU */
1989 case INDEX_op_and_i64:
1990 if (c2) {
1991 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1992 } else {
1993 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1994 }
4a136e0a
CF
1995 break;
1996
14b155dd
RH
1997 case INDEX_op_andc_i32:
1998 a2 = (int32_t)a2;
1999 /* FALLTHRU */
2000 case INDEX_op_andc_i64:
2001 if (c2) {
2002 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
2003 } else {
2004 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
2005 }
2006 break;
2007
4a136e0a 2008 case INDEX_op_or_i32:
e029f293
RH
2009 a2 = (int32_t)a2;
2010 /* FALLTHRU */
2011 case INDEX_op_or_i64:
2012 if (c2) {
2013 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
2014 } else {
2015 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
2016 }
4a136e0a
CF
2017 break;
2018
14b155dd
RH
2019 case INDEX_op_orc_i32:
2020 a2 = (int32_t)a2;
2021 /* FALLTHRU */
2022 case INDEX_op_orc_i64:
2023 if (c2) {
2024 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2025 } else {
2026 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2027 }
2028 break;
2029
4a136e0a 2030 case INDEX_op_xor_i32:
e029f293
RH
2031 a2 = (int32_t)a2;
2032 /* FALLTHRU */
2033 case INDEX_op_xor_i64:
2034 if (c2) {
2035 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2036 } else {
2037 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2038 }
4a136e0a
CF
2039 break;
2040
14b155dd
RH
2041 case INDEX_op_eqv_i32:
2042 a2 = (int32_t)a2;
2043 /* FALLTHRU */
2044 case INDEX_op_eqv_i64:
2045 if (c2) {
2046 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2047 } else {
2048 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2049 }
2050 break;
2051
2052 case INDEX_op_not_i64:
2053 case INDEX_op_not_i32:
2054 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2055 break;
2056
4a136e0a 2057 case INDEX_op_mul_i64:
4a136e0a 2058 case INDEX_op_mul_i32:
8678b71c
RH
2059 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2060 break;
2061
2062 case INDEX_op_div_i64:
2063 case INDEX_op_div_i32:
2064 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2065 break;
2066 case INDEX_op_divu_i64:
2067 case INDEX_op_divu_i32:
2068 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2069 break;
2070
2071 case INDEX_op_rem_i64:
2072 case INDEX_op_rem_i32:
2073 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
2074 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2075 break;
2076 case INDEX_op_remu_i64:
2077 case INDEX_op_remu_i32:
2078 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
2079 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
4a136e0a
CF
2080 break;
2081
2082 case INDEX_op_shl_i64:
4a136e0a 2083 case INDEX_op_shl_i32:
df9351e3 2084 if (c2) {
8d8db193 2085 tcg_out_shl(s, ext, a0, a1, a2);
df9351e3
RH
2086 } else {
2087 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
4a136e0a
CF
2088 }
2089 break;
2090
2091 case INDEX_op_shr_i64:
4a136e0a 2092 case INDEX_op_shr_i32:
df9351e3 2093 if (c2) {
8d8db193 2094 tcg_out_shr(s, ext, a0, a1, a2);
df9351e3
RH
2095 } else {
2096 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
4a136e0a
CF
2097 }
2098 break;
2099
2100 case INDEX_op_sar_i64:
4a136e0a 2101 case INDEX_op_sar_i32:
df9351e3 2102 if (c2) {
8d8db193 2103 tcg_out_sar(s, ext, a0, a1, a2);
df9351e3
RH
2104 } else {
2105 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
4a136e0a
CF
2106 }
2107 break;
2108
2109 case INDEX_op_rotr_i64:
4a136e0a 2110 case INDEX_op_rotr_i32:
df9351e3 2111 if (c2) {
8d8db193 2112 tcg_out_rotr(s, ext, a0, a1, a2);
df9351e3
RH
2113 } else {
2114 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
4a136e0a
CF
2115 }
2116 break;
2117
2118 case INDEX_op_rotl_i64:
df9351e3
RH
2119 case INDEX_op_rotl_i32:
2120 if (c2) {
8d8db193 2121 tcg_out_rotl(s, ext, a0, a1, a2);
4a136e0a 2122 } else {
50573c66 2123 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
df9351e3 2124 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
4a136e0a
CF
2125 }
2126 break;
2127
53c76c19
RH
2128 case INDEX_op_clz_i64:
2129 case INDEX_op_clz_i32:
2130 tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2131 break;
2132 case INDEX_op_ctz_i64:
2133 case INDEX_op_ctz_i32:
2134 tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2135 break;
2136
8d8db193 2137 case INDEX_op_brcond_i32:
90f1cd91
RH
2138 a1 = (int32_t)a1;
2139 /* FALLTHRU */
2140 case INDEX_op_brcond_i64:
bec16311 2141 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
4a136e0a
CF
2142 break;
2143
4a136e0a 2144 case INDEX_op_setcond_i32:
90f1cd91
RH
2145 a2 = (int32_t)a2;
2146 /* FALLTHRU */
2147 case INDEX_op_setcond_i64:
2148 tcg_out_cmp(s, ext, a1, a2, c2);
ed7a0aa8
RH
2149 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */
2150 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2151 TCG_REG_XZR, tcg_invert_cond(args[3]));
4a136e0a
CF
2152 break;
2153
04ce397b
RH
2154 case INDEX_op_movcond_i32:
2155 a2 = (int32_t)a2;
2156 /* FALLTHRU */
2157 case INDEX_op_movcond_i64:
2158 tcg_out_cmp(s, ext, a1, a2, c2);
2159 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2160 break;
2161
de61d14f
RH
2162 case INDEX_op_qemu_ld_i32:
2163 case INDEX_op_qemu_ld_i64:
59227d5d 2164 tcg_out_qemu_ld(s, a0, a1, a2, ext);
4a136e0a 2165 break;
de61d14f
RH
2166 case INDEX_op_qemu_st_i32:
2167 case INDEX_op_qemu_st_i64:
ff0cc85e 2168 tcg_out_qemu_st(s, REG0(0), a1, a2, ext);
4a136e0a
CF
2169 break;
2170
f0293414 2171 case INDEX_op_bswap64_i64:
dfa24dfa 2172 tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
edd8824c
RH
2173 break;
2174 case INDEX_op_bswap32_i64:
8fcfc6bf
RH
2175 tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2176 if (a2 & TCG_BSWAP_OS) {
52bf3398 2177 tcg_out_ext32s(s, a0, a0);
8fcfc6bf
RH
2178 }
2179 break;
9c4a059d 2180 case INDEX_op_bswap32_i32:
dfa24dfa 2181 tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
9c4a059d
CF
2182 break;
2183 case INDEX_op_bswap16_i64:
2184 case INDEX_op_bswap16_i32:
dfa24dfa 2185 tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1);
8fcfc6bf
RH
2186 if (a2 & TCG_BSWAP_OS) {
2187 /* Output must be sign-extended. */
753e42ea 2188 tcg_out_ext16s(s, ext, a0, a0);
8fcfc6bf
RH
2189 } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2190 /* Output must be zero-extended, but input isn't. */
379afdff 2191 tcg_out_ext16u(s, a0, a0);
8fcfc6bf 2192 }
9c4a059d
CF
2193 break;
2194
b3c56df7
RH
2195 case INDEX_op_deposit_i64:
2196 case INDEX_op_deposit_i32:
2197 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2198 break;
2199
e2179f94
RH
2200 case INDEX_op_extract_i64:
2201 case INDEX_op_extract_i32:
2202 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2203 break;
2204
2205 case INDEX_op_sextract_i64:
2206 case INDEX_op_sextract_i32:
2207 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2208 break;
2209
464c2969
RH
2210 case INDEX_op_extract2_i64:
2211 case INDEX_op_extract2_i32:
1789d427 2212 tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
464c2969
RH
2213 break;
2214
c6e929e7
RH
2215 case INDEX_op_add2_i32:
2216 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2217 (int32_t)args[4], args[5], const_args[4],
2218 const_args[5], false);
2219 break;
2220 case INDEX_op_add2_i64:
2221 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2222 args[5], const_args[4], const_args[5], false);
2223 break;
2224 case INDEX_op_sub2_i32:
2225 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2226 (int32_t)args[4], args[5], const_args[4],
2227 const_args[5], true);
2228 break;
2229 case INDEX_op_sub2_i64:
2230 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2231 args[5], const_args[4], const_args[5], true);
2232 break;
2233
1fcc9ddf
RH
2234 case INDEX_op_muluh_i64:
2235 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2236 break;
2237 case INDEX_op_mulsh_i64:
2238 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2239 break;
2240
c7a59c2a
PK
2241 case INDEX_op_mb:
2242 tcg_out_mb(s, a0);
2243 break;
2244
96d0ee7f 2245 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
a51a6b6a 2246 case INDEX_op_mov_i64:
96d0ee7f 2247 case INDEX_op_call: /* Always emitted via tcg_out_call. */
b55a8d9d 2248 case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */
cf7d6b8e 2249 case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */
678155b2
RH
2250 case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */
2251 case INDEX_op_ext8s_i64:
d0e66c89
RH
2252 case INDEX_op_ext8u_i32:
2253 case INDEX_op_ext8u_i64:
753e42ea
RH
2254 case INDEX_op_ext16s_i64:
2255 case INDEX_op_ext16s_i32:
379afdff
RH
2256 case INDEX_op_ext16u_i64:
2257 case INDEX_op_ext16u_i32:
52bf3398 2258 case INDEX_op_ext32s_i64:
9ecf5f61 2259 case INDEX_op_ext32u_i64:
9c6aa274 2260 case INDEX_op_ext_i32_i64:
b9bfe000 2261 case INDEX_op_extu_i32_i64:
b8b94ac6 2262 case INDEX_op_extrl_i64_i32:
4a136e0a 2263 default:
14e4c1e2 2264 g_assert_not_reached();
4a136e0a 2265 }
04ce397b
RH
2266
2267#undef REG0
4a136e0a
CF
2268}
2269
14e4c1e2
RH
2270static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2271 unsigned vecl, unsigned vece,
5e8892db
MR
2272 const TCGArg args[TCG_MAX_OP_ARGS],
2273 const int const_args[TCG_MAX_OP_ARGS])
14e4c1e2 2274{
d81bad24 2275 static const AArch64Insn cmp_vec_insn[16] = {
14e4c1e2
RH
2276 [TCG_COND_EQ] = I3616_CMEQ,
2277 [TCG_COND_GT] = I3616_CMGT,
2278 [TCG_COND_GE] = I3616_CMGE,
2279 [TCG_COND_GTU] = I3616_CMHI,
2280 [TCG_COND_GEU] = I3616_CMHS,
2281 };
d81bad24
RH
2282 static const AArch64Insn cmp_scalar_insn[16] = {
2283 [TCG_COND_EQ] = I3611_CMEQ,
2284 [TCG_COND_GT] = I3611_CMGT,
2285 [TCG_COND_GE] = I3611_CMGE,
2286 [TCG_COND_GTU] = I3611_CMHI,
2287 [TCG_COND_GEU] = I3611_CMHS,
2288 };
2289 static const AArch64Insn cmp0_vec_insn[16] = {
14e4c1e2
RH
2290 [TCG_COND_EQ] = I3617_CMEQ0,
2291 [TCG_COND_GT] = I3617_CMGT0,
2292 [TCG_COND_GE] = I3617_CMGE0,
2293 [TCG_COND_LT] = I3617_CMLT0,
2294 [TCG_COND_LE] = I3617_CMLE0,
2295 };
d81bad24
RH
2296 static const AArch64Insn cmp0_scalar_insn[16] = {
2297 [TCG_COND_EQ] = I3612_CMEQ0,
2298 [TCG_COND_GT] = I3612_CMGT0,
2299 [TCG_COND_GE] = I3612_CMGE0,
2300 [TCG_COND_LT] = I3612_CMLT0,
2301 [TCG_COND_LE] = I3612_CMLE0,
2302 };
14e4c1e2
RH
2303
2304 TCGType type = vecl + TCG_TYPE_V64;
2305 unsigned is_q = vecl;
d81bad24 2306 bool is_scalar = !is_q && vece == MO_64;
a9e434a5 2307 TCGArg a0, a1, a2, a3;
9e27f58b 2308 int cmode, imm8;
14e4c1e2
RH
2309
2310 a0 = args[0];
2311 a1 = args[1];
2312 a2 = args[2];
2313
2314 switch (opc) {
2315 case INDEX_op_ld_vec:
2316 tcg_out_ld(s, type, a0, a1, a2);
2317 break;
2318 case INDEX_op_st_vec:
2319 tcg_out_st(s, type, a0, a1, a2);
2320 break;
37ee55a0
RH
2321 case INDEX_op_dupm_vec:
2322 tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2323 break;
14e4c1e2 2324 case INDEX_op_add_vec:
d81bad24
RH
2325 if (is_scalar) {
2326 tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2);
2327 } else {
2328 tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2329 }
14e4c1e2
RH
2330 break;
2331 case INDEX_op_sub_vec:
d81bad24
RH
2332 if (is_scalar) {
2333 tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2);
2334 } else {
2335 tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2336 }
14e4c1e2
RH
2337 break;
2338 case INDEX_op_mul_vec:
2339 tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2340 break;
2341 case INDEX_op_neg_vec:
d81bad24
RH
2342 if (is_scalar) {
2343 tcg_out_insn(s, 3612, NEG, vece, a0, a1);
2344 } else {
2345 tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2346 }
14e4c1e2 2347 break;
a456394a 2348 case INDEX_op_abs_vec:
d81bad24
RH
2349 if (is_scalar) {
2350 tcg_out_insn(s, 3612, ABS, vece, a0, a1);
2351 } else {
2352 tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2353 }
a456394a 2354 break;
14e4c1e2 2355 case INDEX_op_and_vec:
9e27f58b
RH
2356 if (const_args[2]) {
2357 is_shimm1632(~a2, &cmode, &imm8);
2358 if (a0 == a1) {
2359 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2360 return;
2361 }
2362 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2363 a2 = a0;
2364 }
14e4c1e2
RH
2365 tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2366 break;
2367 case INDEX_op_or_vec:
9e27f58b
RH
2368 if (const_args[2]) {
2369 is_shimm1632(a2, &cmode, &imm8);
2370 if (a0 == a1) {
2371 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2372 return;
2373 }
2374 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2375 a2 = a0;
2376 }
14e4c1e2
RH
2377 tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2378 break;
14e4c1e2 2379 case INDEX_op_andc_vec:
9e27f58b
RH
2380 if (const_args[2]) {
2381 is_shimm1632(a2, &cmode, &imm8);
2382 if (a0 == a1) {
2383 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2384 return;
2385 }
2386 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2387 a2 = a0;
2388 }
14e4c1e2
RH
2389 tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2390 break;
2391 case INDEX_op_orc_vec:
9e27f58b
RH
2392 if (const_args[2]) {
2393 is_shimm1632(~a2, &cmode, &imm8);
2394 if (a0 == a1) {
2395 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2396 return;
2397 }
2398 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2399 a2 = a0;
2400 }
14e4c1e2
RH
2401 tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2402 break;
9e27f58b
RH
2403 case INDEX_op_xor_vec:
2404 tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2405 break;
d32648d4 2406 case INDEX_op_ssadd_vec:
d81bad24
RH
2407 if (is_scalar) {
2408 tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2);
2409 } else {
2410 tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2411 }
d32648d4
RH
2412 break;
2413 case INDEX_op_sssub_vec:
d81bad24
RH
2414 if (is_scalar) {
2415 tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2);
2416 } else {
2417 tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2418 }
d32648d4
RH
2419 break;
2420 case INDEX_op_usadd_vec:
d81bad24
RH
2421 if (is_scalar) {
2422 tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2);
2423 } else {
2424 tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2425 }
d32648d4
RH
2426 break;
2427 case INDEX_op_ussub_vec:
d81bad24
RH
2428 if (is_scalar) {
2429 tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2);
2430 } else {
2431 tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2432 }
d32648d4 2433 break;
93f332a5
RH
2434 case INDEX_op_smax_vec:
2435 tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2436 break;
2437 case INDEX_op_smin_vec:
2438 tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2439 break;
2440 case INDEX_op_umax_vec:
2441 tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2442 break;
2443 case INDEX_op_umin_vec:
2444 tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2445 break;
14e4c1e2
RH
2446 case INDEX_op_not_vec:
2447 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2448 break;
14e4c1e2 2449 case INDEX_op_shli_vec:
d81bad24
RH
2450 if (is_scalar) {
2451 tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece));
2452 } else {
2453 tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2454 }
14e4c1e2
RH
2455 break;
2456 case INDEX_op_shri_vec:
d81bad24
RH
2457 if (is_scalar) {
2458 tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2);
2459 } else {
2460 tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2461 }
14e4c1e2
RH
2462 break;
2463 case INDEX_op_sari_vec:
d81bad24
RH
2464 if (is_scalar) {
2465 tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2);
2466 } else {
2467 tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2468 }
14e4c1e2 2469 break;
7cff8988 2470 case INDEX_op_aa64_sli_vec:
d81bad24
RH
2471 if (is_scalar) {
2472 tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece));
2473 } else {
2474 tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2475 }
7cff8988 2476 break;
79525dfd 2477 case INDEX_op_shlv_vec:
d81bad24
RH
2478 if (is_scalar) {
2479 tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2);
2480 } else {
2481 tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2482 }
79525dfd
RH
2483 break;
2484 case INDEX_op_aa64_sshl_vec:
d81bad24
RH
2485 if (is_scalar) {
2486 tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2);
2487 } else {
2488 tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2489 }
79525dfd 2490 break;
14e4c1e2
RH
2491 case INDEX_op_cmp_vec:
2492 {
2493 TCGCond cond = args[3];
2494 AArch64Insn insn;
2495
2496 if (cond == TCG_COND_NE) {
2497 if (const_args[2]) {
d81bad24
RH
2498 if (is_scalar) {
2499 tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1);
2500 } else {
2501 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2502 }
14e4c1e2 2503 } else {
d81bad24
RH
2504 if (is_scalar) {
2505 tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2);
2506 } else {
2507 tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2508 }
14e4c1e2
RH
2509 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2510 }
2511 } else {
2512 if (const_args[2]) {
d81bad24
RH
2513 if (is_scalar) {
2514 insn = cmp0_scalar_insn[cond];
2515 if (insn) {
2516 tcg_out_insn_3612(s, insn, vece, a0, a1);
2517 break;
2518 }
2519 } else {
2520 insn = cmp0_vec_insn[cond];
2521 if (insn) {
2522 tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2523 break;
2524 }
14e4c1e2 2525 }
4e186175 2526 tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
14e4c1e2
RH
2527 a2 = TCG_VEC_TMP;
2528 }
d81bad24
RH
2529 if (is_scalar) {
2530 insn = cmp_scalar_insn[cond];
2531 if (insn == 0) {
2532 TCGArg t;
2533 t = a1, a1 = a2, a2 = t;
2534 cond = tcg_swap_cond(cond);
2535 insn = cmp_scalar_insn[cond];
2536 tcg_debug_assert(insn != 0);
2537 }
2538 tcg_out_insn_3611(s, insn, vece, a0, a1, a2);
2539 } else {
2540 insn = cmp_vec_insn[cond];
2541 if (insn == 0) {
2542 TCGArg t;
2543 t = a1, a1 = a2, a2 = t;
2544 cond = tcg_swap_cond(cond);
2545 insn = cmp_vec_insn[cond];
2546 tcg_debug_assert(insn != 0);
2547 }
2548 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
14e4c1e2 2549 }
14e4c1e2
RH
2550 }
2551 }
2552 break;
bab1671f 2553
a9e434a5
RH
2554 case INDEX_op_bitsel_vec:
2555 a3 = args[3];
2556 if (a0 == a3) {
2557 tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2558 } else if (a0 == a2) {
2559 tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2560 } else {
2561 if (a0 != a1) {
2562 tcg_out_mov(s, type, a0, a1);
2563 }
2564 tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2565 }
2566 break;
2567
bab1671f 2568 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
bab1671f 2569 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
14e4c1e2
RH
2570 default:
2571 g_assert_not_reached();
2572 }
2573}
2574
2575int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2576{
2577 switch (opc) {
2578 case INDEX_op_add_vec:
2579 case INDEX_op_sub_vec:
14e4c1e2
RH
2580 case INDEX_op_and_vec:
2581 case INDEX_op_or_vec:
2582 case INDEX_op_xor_vec:
2583 case INDEX_op_andc_vec:
2584 case INDEX_op_orc_vec:
2585 case INDEX_op_neg_vec:
a456394a 2586 case INDEX_op_abs_vec:
14e4c1e2
RH
2587 case INDEX_op_not_vec:
2588 case INDEX_op_cmp_vec:
2589 case INDEX_op_shli_vec:
2590 case INDEX_op_shri_vec:
2591 case INDEX_op_sari_vec:
d32648d4
RH
2592 case INDEX_op_ssadd_vec:
2593 case INDEX_op_sssub_vec:
2594 case INDEX_op_usadd_vec:
2595 case INDEX_op_ussub_vec:
79525dfd 2596 case INDEX_op_shlv_vec:
a9e434a5 2597 case INDEX_op_bitsel_vec:
14e4c1e2 2598 return 1;
7cff8988 2599 case INDEX_op_rotli_vec:
79525dfd
RH
2600 case INDEX_op_shrv_vec:
2601 case INDEX_op_sarv_vec:
7cff8988
RH
2602 case INDEX_op_rotlv_vec:
2603 case INDEX_op_rotrv_vec:
79525dfd 2604 return -1;
e65a5f22 2605 case INDEX_op_mul_vec:
a7b6d286
RH
2606 case INDEX_op_smax_vec:
2607 case INDEX_op_smin_vec:
2608 case INDEX_op_umax_vec:
2609 case INDEX_op_umin_vec:
e65a5f22 2610 return vece < MO_64;
14e4c1e2
RH
2611
2612 default:
2613 return 0;
2614 }
2615}
2616
2617void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2618 TCGArg a0, ...)
2619{
79525dfd 2620 va_list va;
10061ffe 2621 TCGv_vec v0, v1, v2, t1, t2, c1;
7cff8988 2622 TCGArg a2;
79525dfd
RH
2623
2624 va_start(va, a0);
2625 v0 = temp_tcgv_vec(arg_temp(a0));
2626 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
7cff8988 2627 a2 = va_arg(va, TCGArg);
2dfa2f18 2628 va_end(va);
79525dfd
RH
2629
2630 switch (opc) {
7cff8988
RH
2631 case INDEX_op_rotli_vec:
2632 t1 = tcg_temp_new_vec(type);
2633 tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
2634 vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
2635 tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
2636 tcg_temp_free_vec(t1);
2637 break;
2638
79525dfd
RH
2639 case INDEX_op_shrv_vec:
2640 case INDEX_op_sarv_vec:
2641 /* Right shifts are negative left shifts for AArch64. */
2dfa2f18 2642 v2 = temp_tcgv_vec(arg_temp(a2));
79525dfd
RH
2643 t1 = tcg_temp_new_vec(type);
2644 tcg_gen_neg_vec(vece, t1, v2);
2645 opc = (opc == INDEX_op_shrv_vec
2646 ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2647 vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2648 tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2649 tcg_temp_free_vec(t1);
2650 break;
2651
7cff8988 2652 case INDEX_op_rotlv_vec:
2dfa2f18 2653 v2 = temp_tcgv_vec(arg_temp(a2));
7cff8988 2654 t1 = tcg_temp_new_vec(type);
10061ffe
RH
2655 c1 = tcg_constant_vec(type, vece, 8 << vece);
2656 tcg_gen_sub_vec(vece, t1, v2, c1);
7cff8988
RH
2657 /* Right shifts are negative left shifts for AArch64. */
2658 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2659 tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2660 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
2661 tcgv_vec_arg(v1), tcgv_vec_arg(v2));
2662 tcg_gen_or_vec(vece, v0, v0, t1);
2663 tcg_temp_free_vec(t1);
2664 break;
2665
2666 case INDEX_op_rotrv_vec:
2dfa2f18 2667 v2 = temp_tcgv_vec(arg_temp(a2));
7cff8988
RH
2668 t1 = tcg_temp_new_vec(type);
2669 t2 = tcg_temp_new_vec(type);
10061ffe 2670 c1 = tcg_constant_vec(type, vece, 8 << vece);
7cff8988 2671 tcg_gen_neg_vec(vece, t1, v2);
10061ffe 2672 tcg_gen_sub_vec(vece, t2, c1, v2);
7cff8988
RH
2673 /* Right shifts are negative left shifts for AArch64. */
2674 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2675 tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2676 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
2677 tcgv_vec_arg(v1), tcgv_vec_arg(t2));
2678 tcg_gen_or_vec(vece, v0, t1, t2);
2679 tcg_temp_free_vec(t1);
2680 tcg_temp_free_vec(t2);
2681 break;
2682
79525dfd
RH
2683 default:
2684 g_assert_not_reached();
2685 }
14e4c1e2
RH
2686}
2687
39e7522b
RH
2688static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
2689{
1897cc2e
RH
2690 switch (op) {
2691 case INDEX_op_goto_ptr:
39e7522b 2692 return C_O0_I1(r);
f69d277e 2693
1897cc2e
RH
2694 case INDEX_op_ld8u_i32:
2695 case INDEX_op_ld8s_i32:
2696 case INDEX_op_ld16u_i32:
2697 case INDEX_op_ld16s_i32:
2698 case INDEX_op_ld_i32:
2699 case INDEX_op_ld8u_i64:
2700 case INDEX_op_ld8s_i64:
2701 case INDEX_op_ld16u_i64:
2702 case INDEX_op_ld16s_i64:
2703 case INDEX_op_ld32u_i64:
2704 case INDEX_op_ld32s_i64:
2705 case INDEX_op_ld_i64:
2706 case INDEX_op_neg_i32:
2707 case INDEX_op_neg_i64:
2708 case INDEX_op_not_i32:
2709 case INDEX_op_not_i64:
2710 case INDEX_op_bswap16_i32:
2711 case INDEX_op_bswap32_i32:
2712 case INDEX_op_bswap16_i64:
2713 case INDEX_op_bswap32_i64:
2714 case INDEX_op_bswap64_i64:
2715 case INDEX_op_ext8s_i32:
2716 case INDEX_op_ext16s_i32:
2717 case INDEX_op_ext8u_i32:
2718 case INDEX_op_ext16u_i32:
2719 case INDEX_op_ext8s_i64:
2720 case INDEX_op_ext16s_i64:
2721 case INDEX_op_ext32s_i64:
2722 case INDEX_op_ext8u_i64:
2723 case INDEX_op_ext16u_i64:
2724 case INDEX_op_ext32u_i64:
2725 case INDEX_op_ext_i32_i64:
2726 case INDEX_op_extu_i32_i64:
2727 case INDEX_op_extract_i32:
2728 case INDEX_op_extract_i64:
2729 case INDEX_op_sextract_i32:
2730 case INDEX_op_sextract_i64:
39e7522b 2731 return C_O1_I1(r, r);
1897cc2e
RH
2732
2733 case INDEX_op_st8_i32:
2734 case INDEX_op_st16_i32:
2735 case INDEX_op_st_i32:
2736 case INDEX_op_st8_i64:
2737 case INDEX_op_st16_i64:
2738 case INDEX_op_st32_i64:
2739 case INDEX_op_st_i64:
39e7522b 2740 return C_O0_I2(rZ, r);
1897cc2e
RH
2741
2742 case INDEX_op_add_i32:
2743 case INDEX_op_add_i64:
2744 case INDEX_op_sub_i32:
2745 case INDEX_op_sub_i64:
2746 case INDEX_op_setcond_i32:
2747 case INDEX_op_setcond_i64:
39e7522b 2748 return C_O1_I2(r, r, rA);
1897cc2e
RH
2749
2750 case INDEX_op_mul_i32:
2751 case INDEX_op_mul_i64:
2752 case INDEX_op_div_i32:
2753 case INDEX_op_div_i64:
2754 case INDEX_op_divu_i32:
2755 case INDEX_op_divu_i64:
2756 case INDEX_op_rem_i32:
2757 case INDEX_op_rem_i64:
2758 case INDEX_op_remu_i32:
2759 case INDEX_op_remu_i64:
2760 case INDEX_op_muluh_i64:
2761 case INDEX_op_mulsh_i64:
39e7522b 2762 return C_O1_I2(r, r, r);
1897cc2e
RH
2763
2764 case INDEX_op_and_i32:
2765 case INDEX_op_and_i64:
2766 case INDEX_op_or_i32:
2767 case INDEX_op_or_i64:
2768 case INDEX_op_xor_i32:
2769 case INDEX_op_xor_i64:
2770 case INDEX_op_andc_i32:
2771 case INDEX_op_andc_i64:
2772 case INDEX_op_orc_i32:
2773 case INDEX_op_orc_i64:
2774 case INDEX_op_eqv_i32:
2775 case INDEX_op_eqv_i64:
39e7522b 2776 return C_O1_I2(r, r, rL);
1897cc2e
RH
2777
2778 case INDEX_op_shl_i32:
2779 case INDEX_op_shr_i32:
2780 case INDEX_op_sar_i32:
2781 case INDEX_op_rotl_i32:
2782 case INDEX_op_rotr_i32:
2783 case INDEX_op_shl_i64:
2784 case INDEX_op_shr_i64:
2785 case INDEX_op_sar_i64:
2786 case INDEX_op_rotl_i64:
2787 case INDEX_op_rotr_i64:
39e7522b 2788 return C_O1_I2(r, r, ri);
1897cc2e
RH
2789
2790 case INDEX_op_clz_i32:
2791 case INDEX_op_ctz_i32:
2792 case INDEX_op_clz_i64:
2793 case INDEX_op_ctz_i64:
39e7522b 2794 return C_O1_I2(r, r, rAL);
1897cc2e
RH
2795
2796 case INDEX_op_brcond_i32:
2797 case INDEX_op_brcond_i64:
39e7522b 2798 return C_O0_I2(r, rA);
1897cc2e
RH
2799
2800 case INDEX_op_movcond_i32:
2801 case INDEX_op_movcond_i64:
39e7522b 2802 return C_O1_I4(r, r, rA, rZ, rZ);
1897cc2e
RH
2803
2804 case INDEX_op_qemu_ld_i32:
2805 case INDEX_op_qemu_ld_i64:
39e7522b 2806 return C_O1_I1(r, l);
1897cc2e
RH
2807 case INDEX_op_qemu_st_i32:
2808 case INDEX_op_qemu_st_i64:
39e7522b 2809 return C_O0_I2(lZ, l);
1897cc2e
RH
2810
2811 case INDEX_op_deposit_i32:
2812 case INDEX_op_deposit_i64:
39e7522b 2813 return C_O1_I2(r, 0, rZ);
1897cc2e 2814
464c2969
RH
2815 case INDEX_op_extract2_i32:
2816 case INDEX_op_extract2_i64:
39e7522b 2817 return C_O1_I2(r, rZ, rZ);
464c2969 2818
1897cc2e
RH
2819 case INDEX_op_add2_i32:
2820 case INDEX_op_add2_i64:
2821 case INDEX_op_sub2_i32:
2822 case INDEX_op_sub2_i64:
39e7522b 2823 return C_O2_I4(r, r, rZ, rZ, rA, rMZ);
1897cc2e 2824
14e4c1e2
RH
2825 case INDEX_op_add_vec:
2826 case INDEX_op_sub_vec:
2827 case INDEX_op_mul_vec:
14e4c1e2 2828 case INDEX_op_xor_vec:
d32648d4
RH
2829 case INDEX_op_ssadd_vec:
2830 case INDEX_op_sssub_vec:
2831 case INDEX_op_usadd_vec:
2832 case INDEX_op_ussub_vec:
93f332a5
RH
2833 case INDEX_op_smax_vec:
2834 case INDEX_op_smin_vec:
2835 case INDEX_op_umax_vec:
2836 case INDEX_op_umin_vec:
79525dfd
RH
2837 case INDEX_op_shlv_vec:
2838 case INDEX_op_shrv_vec:
2839 case INDEX_op_sarv_vec:
2840 case INDEX_op_aa64_sshl_vec:
39e7522b 2841 return C_O1_I2(w, w, w);
14e4c1e2
RH
2842 case INDEX_op_not_vec:
2843 case INDEX_op_neg_vec:
a456394a 2844 case INDEX_op_abs_vec:
14e4c1e2
RH
2845 case INDEX_op_shli_vec:
2846 case INDEX_op_shri_vec:
2847 case INDEX_op_sari_vec:
39e7522b 2848 return C_O1_I1(w, w);
14e4c1e2 2849 case INDEX_op_ld_vec:
37ee55a0 2850 case INDEX_op_dupm_vec:
39e7522b
RH
2851 return C_O1_I1(w, r);
2852 case INDEX_op_st_vec:
2853 return C_O0_I2(w, r);
14e4c1e2 2854 case INDEX_op_dup_vec:
39e7522b 2855 return C_O1_I1(w, wr);
9e27f58b
RH
2856 case INDEX_op_or_vec:
2857 case INDEX_op_andc_vec:
39e7522b 2858 return C_O1_I2(w, w, wO);
9e27f58b
RH
2859 case INDEX_op_and_vec:
2860 case INDEX_op_orc_vec:
39e7522b 2861 return C_O1_I2(w, w, wN);
14e4c1e2 2862 case INDEX_op_cmp_vec:
39e7522b 2863 return C_O1_I2(w, w, wZ);
a9e434a5 2864 case INDEX_op_bitsel_vec:
39e7522b 2865 return C_O1_I3(w, w, w, w);
7cff8988 2866 case INDEX_op_aa64_sli_vec:
39e7522b 2867 return C_O1_I2(w, 0, w);
14e4c1e2 2868
1897cc2e 2869 default:
39e7522b 2870 g_assert_not_reached();
f69d277e 2871 }
f69d277e
RH
2872}
2873
1ce12a8c
RH
2874#ifdef CONFIG_DARWIN
2875static bool sysctl_for_bool(const char *name)
2876{
2877 int val = 0;
2878 size_t len = sizeof(val);
2879
2880 if (sysctlbyname(name, &val, &len, NULL, 0) == 0) {
2881 return val != 0;
2882 }
2883
2884 /*
2885 * We might in the future ask for properties not present in older kernels,
2886 * but we're only asking about static properties, all of which should be
2887 * 'int'. So we shouln't see ENOMEM (val too small), or any of the other
2888 * more exotic errors.
2889 */
2890 assert(errno == ENOENT);
2891 return false;
2892}
2893#endif
2894
4a136e0a
CF
2895static void tcg_target_init(TCGContext *s)
2896{
b7649419
RH
2897#ifdef __linux__
2898 unsigned long hwcap = qemu_getauxval(AT_HWCAP);
2899 have_lse = hwcap & HWCAP_ATOMICS;
2900 have_lse2 = hwcap & HWCAP_USCAT;
2901#endif
1ce12a8c
RH
2902#ifdef CONFIG_DARWIN
2903 have_lse = sysctl_for_bool("hw.optional.arm.FEAT_LSE");
2904 have_lse2 = sysctl_for_bool("hw.optional.arm.FEAT_LSE2");
2905#endif
b7649419 2906
f46934df
RH
2907 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2908 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
14e4c1e2
RH
2909 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2910 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
f46934df 2911
14e4c1e2 2912 tcg_target_call_clobber_regs = -1ull;
f46934df
RH
2913 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2914 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2915 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2916 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2917 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2918 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2919 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2920 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2921 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2922 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2923 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
14e4c1e2
RH
2924 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2925 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2926 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2927 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2928 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2929 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2930 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2931 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
4a136e0a 2932
ccb1bb66 2933 s->reserved_regs = 0;
4a136e0a
CF
2934 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2935 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2936 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2937 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
14e4c1e2 2938 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
4a136e0a
CF
2939}
2940
38d195aa
RH
2941/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
2942#define PUSH_SIZE ((30 - 19 + 1) * 8)
2943
2944#define FRAME_SIZE \
2945 ((PUSH_SIZE \
2946 + TCG_STATIC_CALL_ARGS_SIZE \
2947 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2948 + TCG_TARGET_STACK_ALIGN - 1) \
2949 & ~(TCG_TARGET_STACK_ALIGN - 1))
2950
2951/* We're expecting a 2 byte uleb128 encoded value. */
2952QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2953
2954/* We're expecting to use a single ADDI insn. */
2955QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
2956
4a136e0a
CF
2957static void tcg_target_qemu_prologue(TCGContext *s)
2958{
4a136e0a
CF
2959 TCGReg r;
2960
95f72aa9
RH
2961 /* Push (FP, LR) and allocate space for all saved registers. */
2962 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
38d195aa 2963 TCG_REG_SP, -PUSH_SIZE, 1, 1);
4a136e0a 2964
d82b78e4 2965 /* Set up frame pointer for canonical unwinding. */
929f8b55 2966 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
4a136e0a 2967
d82b78e4 2968 /* Store callee-preserved regs x19..x28. */
4a136e0a 2969 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
95f72aa9
RH
2970 int ofs = (r - TCG_REG_X19 + 2) * 8;
2971 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
4a136e0a
CF
2972 }
2973
096c46c0
RH
2974 /* Make stack space for TCG locals. */
2975 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
38d195aa 2976 FRAME_SIZE - PUSH_SIZE);
096c46c0 2977
95f72aa9 2978 /* Inform TCG about how to find TCG locals with register, offset, size. */
4a136e0a
CF
2979 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
2980 CPU_TEMP_BUF_NLONGS * sizeof(long));
2981
4cbea598 2982#if !defined(CONFIG_SOFTMMU)
352bcb0a 2983 if (USE_GUEST_BASE) {
b76f21a7 2984 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
6a91c7c9
JK
2985 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
2986 }
2987#endif
2988
4a136e0a 2989 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
81d8a5ee 2990 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
4a136e0a 2991
b19f0c2e
RH
2992 /*
2993 * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2994 * and fall through to the rest of the epilogue.
2995 */
c8bc1168 2996 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
b19f0c2e
RH
2997 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
2998
2999 /* TB epilogue */
ffba3eb3 3000 tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
4a136e0a 3001
096c46c0
RH
3002 /* Remove TCG locals stack space. */
3003 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
38d195aa 3004 FRAME_SIZE - PUSH_SIZE);
4a136e0a 3005
95f72aa9 3006 /* Restore registers x19..x28. */
4a136e0a 3007 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
95f72aa9
RH
3008 int ofs = (r - TCG_REG_X19 + 2) * 8;
3009 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
4a136e0a
CF
3010 }
3011
95f72aa9
RH
3012 /* Pop (FP, LR), restore SP to previous frame. */
3013 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
38d195aa 3014 TCG_REG_SP, PUSH_SIZE, 0, 1);
81d8a5ee 3015 tcg_out_insn(s, 3207, RET, TCG_REG_LR);
4a136e0a 3016}
38d195aa 3017
55129955
RH
3018static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3019{
3020 int i;
3021 for (i = 0; i < count; ++i) {
3022 p[i] = NOP;
3023 }
3024}
3025
38d195aa 3026typedef struct {
3d9bddb3 3027 DebugFrameHeader h;
38d195aa
RH
3028 uint8_t fde_def_cfa[4];
3029 uint8_t fde_reg_ofs[24];
3030} DebugFrame;
3031
3032#define ELF_HOST_MACHINE EM_AARCH64
3033
3d9bddb3
RH
3034static const DebugFrame debug_frame = {
3035 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3036 .h.cie.id = -1,
3037 .h.cie.version = 1,
3038 .h.cie.code_align = 1,
3039 .h.cie.data_align = 0x78, /* sleb128 -8 */
3040 .h.cie.return_column = TCG_REG_LR,
38d195aa
RH
3041
3042 /* Total FDE size does not include the "len" member. */
3d9bddb3 3043 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
38d195aa
RH
3044
3045 .fde_def_cfa = {
3046 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */
3047 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
3048 (FRAME_SIZE >> 7)
3049 },
3050 .fde_reg_ofs = {
3051 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */
3052 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */
3053 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */
3054 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */
3055 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */
3056 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */
3057 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */
3058 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */
3059 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */
3060 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */
3061 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */
3062 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */
3063 }
3064};
3065
755bf9e5 3066void tcg_register_jit(const void *buf, size_t buf_size)
38d195aa 3067{
38d195aa
RH
3068 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3069}