]> git.proxmox.com Git - mirror_qemu.git/blame - tcg/aarch64/tcg-target.c.inc
Merge remote-tracking branch 'remotes/jasowang/tags/net-pull-request' into staging
[mirror_qemu.git] / tcg / aarch64 / tcg-target.c.inc
CommitLineData
4a136e0a
CF
1/*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
139c1837 13#include "../tcg-pool.c.inc"
4a136e0a
CF
14#include "qemu/bitops.h"
15
7763ffa0
RH
16/* We're going to re-use TCGType in setting of the SF bit, which controls
17 the size of the operation performed. If we know the values match, it
18 makes things much cleaner. */
19QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
20
8d8fdbae 21#ifdef CONFIG_DEBUG_TCG
4a136e0a 22static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
14e4c1e2
RH
23 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
24 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
25 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
26 "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
27
28 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
29 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
30 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
31 "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
4a136e0a 32};
8d8fdbae 33#endif /* CONFIG_DEBUG_TCG */
4a136e0a
CF
34
35static const int tcg_target_reg_alloc_order[] = {
36 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
37 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
b76f21a7 38 TCG_REG_X28, /* we will reserve this for guest_base if configured */
4a136e0a 39
d82b78e4
RH
40 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
41 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
4a136e0a
CF
42 TCG_REG_X16, TCG_REG_X17,
43
4a136e0a
CF
44 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
45 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
46
d82b78e4
RH
47 /* X18 reserved by system */
48 /* X19 reserved for AREG0 */
49 /* X29 reserved as fp */
50 /* X30 reserved as temporary */
14e4c1e2
RH
51
52 TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
53 TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
54 /* V8 - V15 are call-saved, and skipped. */
55 TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
56 TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
57 TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
58 TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
4a136e0a
CF
59};
60
61static const int tcg_target_call_iarg_regs[8] = {
62 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
63 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
64};
65static const int tcg_target_call_oarg_regs[1] = {
66 TCG_REG_X0
67};
68
d82b78e4 69#define TCG_REG_TMP TCG_REG_X30
14e4c1e2 70#define TCG_VEC_TMP TCG_REG_V31
4a136e0a 71
6a91c7c9 72#ifndef CONFIG_SOFTMMU
352bcb0a
RH
73/* Note that XZR cannot be encoded in the address base register slot,
74 as that actaully encodes SP. So if we need to zero-extend the guest
75 address, via the address index register slot, we need to load even
76 a zero guest base into a register. */
77#define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32)
4cbea598 78#define TCG_REG_GUEST_BASE TCG_REG_X28
6a91c7c9
JK
79#endif
80
ffba3eb3 81static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
4a136e0a 82{
ffba3eb3
RH
83 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
84 ptrdiff_t offset = target - src_rx;
85
214bfe83
RH
86 if (offset == sextract64(offset, 0, 26)) {
87 /* read instruction, mask away previous PC_REL26 parameter contents,
88 set the proper offset, then write back the instruction. */
ffba3eb3 89 *src_rw = deposit32(*src_rw, 0, 26, offset);
214bfe83
RH
90 return true;
91 }
92 return false;
4a136e0a
CF
93}
94
ffba3eb3 95static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
4a136e0a 96{
ffba3eb3
RH
97 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
98 ptrdiff_t offset = target - src_rx;
99
214bfe83 100 if (offset == sextract64(offset, 0, 19)) {
ffba3eb3 101 *src_rw = deposit32(*src_rw, 5, 19, offset);
214bfe83
RH
102 return true;
103 }
104 return false;
4a136e0a
CF
105}
106
ffba3eb3
RH
107static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
108 intptr_t value, intptr_t addend)
4a136e0a 109{
eabb7b91 110 tcg_debug_assert(addend == 0);
4a136e0a
CF
111 switch (type) {
112 case R_AARCH64_JUMP26:
113 case R_AARCH64_CALL26:
ffba3eb3 114 return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
4a136e0a 115 case R_AARCH64_CONDBR19:
ffba3eb3 116 return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
4a136e0a 117 default:
214bfe83 118 g_assert_not_reached();
4a136e0a
CF
119 }
120}
121
170bf931
RH
122#define TCG_CT_CONST_AIMM 0x100
123#define TCG_CT_CONST_LIMM 0x200
124#define TCG_CT_CONST_ZERO 0x400
125#define TCG_CT_CONST_MONE 0x800
9e27f58b
RH
126#define TCG_CT_CONST_ORRI 0x1000
127#define TCG_CT_CONST_ANDI 0x2000
90f1cd91 128
abc730e1
RH
129#define ALL_GENERAL_REGS 0xffffffffu
130#define ALL_VECTOR_REGS 0xffffffff00000000ull
131
4a136e0a 132#ifdef CONFIG_SOFTMMU
abc730e1
RH
133#define ALL_QLDST_REGS \
134 (ALL_GENERAL_REGS & ~((1 << TCG_REG_X0) | (1 << TCG_REG_X1) | \
135 (1 << TCG_REG_X2) | (1 << TCG_REG_X3)))
136#else
137#define ALL_QLDST_REGS ALL_GENERAL_REGS
4a136e0a 138#endif
4a136e0a 139
14e4c1e2 140/* Match a constant valid for addition (12-bit, optionally shifted). */
90f1cd91
RH
141static inline bool is_aimm(uint64_t val)
142{
143 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
144}
145
14e4c1e2 146/* Match a constant valid for logical operations. */
e029f293
RH
147static inline bool is_limm(uint64_t val)
148{
149 /* Taking a simplified view of the logical immediates for now, ignoring
150 the replication that can happen across the field. Match bit patterns
151 of the forms
152 0....01....1
153 0..01..10..0
154 and their inverses. */
155
156 /* Make things easier below, by testing the form with msb clear. */
157 if ((int64_t)val < 0) {
158 val = ~val;
159 }
160 if (val == 0) {
161 return false;
162 }
163 val += val & -val;
164 return (val & (val - 1)) == 0;
165}
166
984fdcee
RH
167/* Return true if v16 is a valid 16-bit shifted immediate. */
168static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
14e4c1e2 169{
984fdcee
RH
170 if (v16 == (v16 & 0xff)) {
171 *cmode = 0x8;
172 *imm8 = v16 & 0xff;
173 return true;
174 } else if (v16 == (v16 & 0xff00)) {
175 *cmode = 0xa;
176 *imm8 = v16 >> 8;
177 return true;
178 }
179 return false;
180}
14e4c1e2 181
984fdcee
RH
182/* Return true if v32 is a valid 32-bit shifted immediate. */
183static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
184{
185 if (v32 == (v32 & 0xff)) {
186 *cmode = 0x0;
187 *imm8 = v32 & 0xff;
188 return true;
189 } else if (v32 == (v32 & 0xff00)) {
190 *cmode = 0x2;
191 *imm8 = (v32 >> 8) & 0xff;
192 return true;
193 } else if (v32 == (v32 & 0xff0000)) {
194 *cmode = 0x4;
195 *imm8 = (v32 >> 16) & 0xff;
196 return true;
197 } else if (v32 == (v32 & 0xff000000)) {
198 *cmode = 0x6;
199 *imm8 = v32 >> 24;
14e4c1e2
RH
200 return true;
201 }
984fdcee
RH
202 return false;
203}
14e4c1e2 204
984fdcee
RH
205/* Return true if v32 is a valid 32-bit shifting ones immediate. */
206static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
207{
208 if ((v32 & 0xffff00ff) == 0xff) {
209 *cmode = 0xc;
210 *imm8 = (v32 >> 8) & 0xff;
211 return true;
212 } else if ((v32 & 0xff00ffff) == 0xffff) {
213 *cmode = 0xd;
214 *imm8 = (v32 >> 16) & 0xff;
215 return true;
14e4c1e2 216 }
984fdcee
RH
217 return false;
218}
14e4c1e2 219
984fdcee
RH
220/* Return true if v32 is a valid float32 immediate. */
221static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
222{
223 if (extract32(v32, 0, 19) == 0
224 && (extract32(v32, 25, 6) == 0x20
225 || extract32(v32, 25, 6) == 0x1f)) {
226 *cmode = 0xf;
227 *imm8 = (extract32(v32, 31, 1) << 7)
228 | (extract32(v32, 25, 1) << 6)
229 | extract32(v32, 19, 6);
230 return true;
14e4c1e2 231 }
984fdcee
RH
232 return false;
233}
234
235/* Return true if v64 is a valid float64 immediate. */
236static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
237{
14e4c1e2
RH
238 if (extract64(v64, 0, 48) == 0
239 && (extract64(v64, 54, 9) == 0x100
240 || extract64(v64, 54, 9) == 0x0ff)) {
241 *cmode = 0xf;
14e4c1e2
RH
242 *imm8 = (extract64(v64, 63, 1) << 7)
243 | (extract64(v64, 54, 1) << 6)
244 | extract64(v64, 48, 6);
245 return true;
246 }
14e4c1e2
RH
247 return false;
248}
249
02f3a5b4
RH
250/*
251 * Return non-zero if v32 can be formed by MOVI+ORR.
252 * Place the parameters for MOVI in (cmode, imm8).
253 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
254 */
255static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
256{
257 int i;
258
259 for (i = 6; i > 0; i -= 2) {
260 /* Mask out one byte we can add with ORR. */
261 uint32_t tmp = v32 & ~(0xffu << (i * 4));
262 if (is_shimm32(tmp, cmode, imm8) ||
263 is_soimm32(tmp, cmode, imm8)) {
264 break;
265 }
266 }
267 return i;
268}
269
9e27f58b
RH
270/* Return true if V is a valid 16-bit or 32-bit shifted immediate. */
271static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
272{
273 if (v32 == deposit32(v32, 16, 16, v32)) {
274 return is_shimm16(v32, cmode, imm8);
275 } else {
276 return is_shimm32(v32, cmode, imm8);
277 }
278}
279
f6c6afc1 280static int tcg_target_const_match(tcg_target_long val, TCGType type,
90f1cd91 281 const TCGArgConstraint *arg_ct)
4a136e0a
CF
282{
283 int ct = arg_ct->ct;
284
285 if (ct & TCG_CT_CONST) {
286 return 1;
287 }
170bf931 288 if (type == TCG_TYPE_I32) {
90f1cd91
RH
289 val = (int32_t)val;
290 }
291 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
292 return 1;
293 }
e029f293
RH
294 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
295 return 1;
296 }
04ce397b
RH
297 if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
298 return 1;
299 }
c6e929e7
RH
300 if ((ct & TCG_CT_CONST_MONE) && val == -1) {
301 return 1;
302 }
4a136e0a 303
9e27f58b
RH
304 switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
305 case 0:
306 break;
307 case TCG_CT_CONST_ANDI:
308 val = ~val;
309 /* fallthru */
310 case TCG_CT_CONST_ORRI:
311 if (val == deposit64(val, 32, 32, val)) {
312 int cmode, imm8;
313 return is_shimm1632(val, &cmode, &imm8);
314 }
315 break;
316 default:
317 /* Both bits should not be set for the same insn. */
318 g_assert_not_reached();
319 }
320
4a136e0a
CF
321 return 0;
322}
323
324enum aarch64_cond_code {
325 COND_EQ = 0x0,
326 COND_NE = 0x1,
327 COND_CS = 0x2, /* Unsigned greater or equal */
328 COND_HS = COND_CS, /* ALIAS greater or equal */
329 COND_CC = 0x3, /* Unsigned less than */
330 COND_LO = COND_CC, /* ALIAS Lower */
331 COND_MI = 0x4, /* Negative */
332 COND_PL = 0x5, /* Zero or greater */
333 COND_VS = 0x6, /* Overflow */
334 COND_VC = 0x7, /* No overflow */
335 COND_HI = 0x8, /* Unsigned greater than */
336 COND_LS = 0x9, /* Unsigned less or equal */
337 COND_GE = 0xa,
338 COND_LT = 0xb,
339 COND_GT = 0xc,
340 COND_LE = 0xd,
341 COND_AL = 0xe,
342 COND_NV = 0xf, /* behaves like COND_AL here */
343};
344
345static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
346 [TCG_COND_EQ] = COND_EQ,
347 [TCG_COND_NE] = COND_NE,
348 [TCG_COND_LT] = COND_LT,
349 [TCG_COND_GE] = COND_GE,
350 [TCG_COND_LE] = COND_LE,
351 [TCG_COND_GT] = COND_GT,
352 /* unsigned */
353 [TCG_COND_LTU] = COND_LO,
354 [TCG_COND_GTU] = COND_HI,
355 [TCG_COND_GEU] = COND_HS,
356 [TCG_COND_LEU] = COND_LS,
357};
358
3d4299f4
RH
359typedef enum {
360 LDST_ST = 0, /* store */
361 LDST_LD = 1, /* load */
362 LDST_LD_S_X = 2, /* load and sign-extend into Xt */
363 LDST_LD_S_W = 3, /* load and sign-extend into Wt */
364} AArch64LdstType;
4a136e0a 365
50573c66
RH
366/* We encode the format of the insn into the beginning of the name, so that
367 we can have the preprocessor help "typecheck" the insn vs the output
368 function. Arm didn't provide us with nice names for the formats, so we
369 use the section number of the architecture reference manual in which the
370 instruction group is described. */
371typedef enum {
3d9e69a2
RH
372 /* Compare and branch (immediate). */
373 I3201_CBZ = 0x34000000,
374 I3201_CBNZ = 0x35000000,
375
81d8a5ee
RH
376 /* Conditional branch (immediate). */
377 I3202_B_C = 0x54000000,
378
379 /* Unconditional branch (immediate). */
380 I3206_B = 0x14000000,
381 I3206_BL = 0x94000000,
382
383 /* Unconditional branch (register). */
384 I3207_BR = 0xd61f0000,
385 I3207_BLR = 0xd63f0000,
386 I3207_RET = 0xd65f0000,
387
f23e5e15
RH
388 /* AdvSIMD load/store single structure. */
389 I3303_LD1R = 0x0d40c000,
390
2acee8b2
PK
391 /* Load literal for loading the address at pc-relative offset */
392 I3305_LDR = 0x58000000,
14e4c1e2
RH
393 I3305_LDR_v64 = 0x5c000000,
394 I3305_LDR_v128 = 0x9c000000,
395
3d4299f4
RH
396 /* Load/store register. Described here as 3.3.12, but the helper
397 that emits them can transform to 3.3.10 or 3.3.13. */
398 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
399 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
400 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
401 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
402
403 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
404 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
405 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
406 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
407
408 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
409 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
410
411 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
412 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
413 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
414
14e4c1e2
RH
415 I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
416 I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
417
418 I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
419 I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
420
421 I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30,
422 I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30,
423
6c0f0c0f 424 I3312_TO_I3310 = 0x00200800,
3d4299f4
RH
425 I3312_TO_I3313 = 0x01000000,
426
95f72aa9
RH
427 /* Load/store register pair instructions. */
428 I3314_LDP = 0x28400000,
429 I3314_STP = 0x28000000,
430
096c46c0
RH
431 /* Add/subtract immediate instructions. */
432 I3401_ADDI = 0x11000000,
433 I3401_ADDSI = 0x31000000,
434 I3401_SUBI = 0x51000000,
435 I3401_SUBSI = 0x71000000,
436
b3c56df7
RH
437 /* Bitfield instructions. */
438 I3402_BFM = 0x33000000,
439 I3402_SBFM = 0x13000000,
440 I3402_UBFM = 0x53000000,
441
442 /* Extract instruction. */
443 I3403_EXTR = 0x13800000,
444
e029f293
RH
445 /* Logical immediate instructions. */
446 I3404_ANDI = 0x12000000,
447 I3404_ORRI = 0x32000000,
448 I3404_EORI = 0x52000000,
449
582ab779
RH
450 /* Move wide immediate instructions. */
451 I3405_MOVN = 0x12800000,
452 I3405_MOVZ = 0x52800000,
453 I3405_MOVK = 0x72800000,
454
c6e310d9
RH
455 /* PC relative addressing instructions. */
456 I3406_ADR = 0x10000000,
457 I3406_ADRP = 0x90000000,
458
50573c66
RH
459 /* Add/subtract shifted register instructions (without a shift). */
460 I3502_ADD = 0x0b000000,
461 I3502_ADDS = 0x2b000000,
462 I3502_SUB = 0x4b000000,
463 I3502_SUBS = 0x6b000000,
464
465 /* Add/subtract shifted register instructions (with a shift). */
466 I3502S_ADD_LSL = I3502_ADD,
467
c6e929e7
RH
468 /* Add/subtract with carry instructions. */
469 I3503_ADC = 0x1a000000,
470 I3503_SBC = 0x5a000000,
471
04ce397b
RH
472 /* Conditional select instructions. */
473 I3506_CSEL = 0x1a800000,
474 I3506_CSINC = 0x1a800400,
53c76c19
RH
475 I3506_CSINV = 0x5a800000,
476 I3506_CSNEG = 0x5a800400,
04ce397b 477
edd8824c 478 /* Data-processing (1 source) instructions. */
53c76c19
RH
479 I3507_CLZ = 0x5ac01000,
480 I3507_RBIT = 0x5ac00000,
edd8824c
RH
481 I3507_REV16 = 0x5ac00400,
482 I3507_REV32 = 0x5ac00800,
483 I3507_REV64 = 0x5ac00c00,
484
df9351e3
RH
485 /* Data-processing (2 source) instructions. */
486 I3508_LSLV = 0x1ac02000,
487 I3508_LSRV = 0x1ac02400,
488 I3508_ASRV = 0x1ac02800,
489 I3508_RORV = 0x1ac02c00,
1fcc9ddf
RH
490 I3508_SMULH = 0x9b407c00,
491 I3508_UMULH = 0x9bc07c00,
8678b71c
RH
492 I3508_UDIV = 0x1ac00800,
493 I3508_SDIV = 0x1ac00c00,
494
495 /* Data-processing (3 source) instructions. */
496 I3509_MADD = 0x1b000000,
497 I3509_MSUB = 0x1b008000,
df9351e3 498
50573c66
RH
499 /* Logical shifted register instructions (without a shift). */
500 I3510_AND = 0x0a000000,
14b155dd 501 I3510_BIC = 0x0a200000,
50573c66 502 I3510_ORR = 0x2a000000,
14b155dd 503 I3510_ORN = 0x2a200000,
50573c66 504 I3510_EOR = 0x4a000000,
14b155dd 505 I3510_EON = 0x4a200000,
50573c66 506 I3510_ANDS = 0x6a000000,
c7a59c2a 507
f7bcd966
RH
508 /* Logical shifted register instructions (with a shift). */
509 I3502S_AND_LSR = I3510_AND | (1 << 22),
510
14e4c1e2
RH
511 /* AdvSIMD copy */
512 I3605_DUP = 0x0e000400,
513 I3605_INS = 0x4e001c00,
514 I3605_UMOV = 0x0e003c00,
515
516 /* AdvSIMD modified immediate */
517 I3606_MOVI = 0x0f000400,
7e308e00 518 I3606_MVNI = 0x2f000400,
02f3a5b4
RH
519 I3606_BIC = 0x2f001400,
520 I3606_ORR = 0x0f001400,
14e4c1e2 521
d81bad24
RH
522 /* AdvSIMD scalar shift by immediate */
523 I3609_SSHR = 0x5f000400,
524 I3609_SSRA = 0x5f001400,
525 I3609_SHL = 0x5f005400,
526 I3609_USHR = 0x7f000400,
527 I3609_USRA = 0x7f001400,
528 I3609_SLI = 0x7f005400,
529
530 /* AdvSIMD scalar three same */
531 I3611_SQADD = 0x5e200c00,
532 I3611_SQSUB = 0x5e202c00,
533 I3611_CMGT = 0x5e203400,
534 I3611_CMGE = 0x5e203c00,
535 I3611_SSHL = 0x5e204400,
536 I3611_ADD = 0x5e208400,
537 I3611_CMTST = 0x5e208c00,
538 I3611_UQADD = 0x7e200c00,
539 I3611_UQSUB = 0x7e202c00,
540 I3611_CMHI = 0x7e203400,
541 I3611_CMHS = 0x7e203c00,
542 I3611_USHL = 0x7e204400,
543 I3611_SUB = 0x7e208400,
544 I3611_CMEQ = 0x7e208c00,
545
546 /* AdvSIMD scalar two-reg misc */
547 I3612_CMGT0 = 0x5e208800,
548 I3612_CMEQ0 = 0x5e209800,
549 I3612_CMLT0 = 0x5e20a800,
550 I3612_ABS = 0x5e20b800,
551 I3612_CMGE0 = 0x7e208800,
552 I3612_CMLE0 = 0x7e209800,
553 I3612_NEG = 0x7e20b800,
554
14e4c1e2
RH
555 /* AdvSIMD shift by immediate */
556 I3614_SSHR = 0x0f000400,
557 I3614_SSRA = 0x0f001400,
558 I3614_SHL = 0x0f005400,
7cff8988 559 I3614_SLI = 0x2f005400,
14e4c1e2
RH
560 I3614_USHR = 0x2f000400,
561 I3614_USRA = 0x2f001400,
562
563 /* AdvSIMD three same. */
564 I3616_ADD = 0x0e208400,
565 I3616_AND = 0x0e201c00,
566 I3616_BIC = 0x0e601c00,
a9e434a5
RH
567 I3616_BIF = 0x2ee01c00,
568 I3616_BIT = 0x2ea01c00,
569 I3616_BSL = 0x2e601c00,
14e4c1e2
RH
570 I3616_EOR = 0x2e201c00,
571 I3616_MUL = 0x0e209c00,
572 I3616_ORR = 0x0ea01c00,
573 I3616_ORN = 0x0ee01c00,
574 I3616_SUB = 0x2e208400,
575 I3616_CMGT = 0x0e203400,
576 I3616_CMGE = 0x0e203c00,
577 I3616_CMTST = 0x0e208c00,
578 I3616_CMHI = 0x2e203400,
579 I3616_CMHS = 0x2e203c00,
580 I3616_CMEQ = 0x2e208c00,
93f332a5
RH
581 I3616_SMAX = 0x0e206400,
582 I3616_SMIN = 0x0e206c00,
79525dfd 583 I3616_SSHL = 0x0e204400,
d32648d4
RH
584 I3616_SQADD = 0x0e200c00,
585 I3616_SQSUB = 0x0e202c00,
93f332a5
RH
586 I3616_UMAX = 0x2e206400,
587 I3616_UMIN = 0x2e206c00,
d32648d4
RH
588 I3616_UQADD = 0x2e200c00,
589 I3616_UQSUB = 0x2e202c00,
79525dfd 590 I3616_USHL = 0x2e204400,
14e4c1e2
RH
591
592 /* AdvSIMD two-reg misc. */
593 I3617_CMGT0 = 0x0e208800,
594 I3617_CMEQ0 = 0x0e209800,
595 I3617_CMLT0 = 0x0e20a800,
596 I3617_CMGE0 = 0x2e208800,
6c2c7772 597 I3617_CMLE0 = 0x2e209800,
14e4c1e2 598 I3617_NOT = 0x2e205800,
a456394a 599 I3617_ABS = 0x0e20b800,
14e4c1e2
RH
600 I3617_NEG = 0x2e20b800,
601
c7a59c2a 602 /* System instructions. */
14e4c1e2 603 NOP = 0xd503201f,
c7a59c2a
PK
604 DMB_ISH = 0xd50338bf,
605 DMB_LD = 0x00000100,
606 DMB_ST = 0x00000200,
50573c66 607} AArch64Insn;
4a136e0a 608
4a136e0a
CF
609static inline uint32_t tcg_in32(TCGContext *s)
610{
611 uint32_t v = *(uint32_t *)s->code_ptr;
612 return v;
613}
614
50573c66
RH
615/* Emit an opcode with "type-checking" of the format. */
616#define tcg_out_insn(S, FMT, OP, ...) \
617 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
618
f23e5e15
RH
619static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
620 TCGReg rt, TCGReg rn, unsigned size)
621{
622 tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
623}
624
625static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
626 int imm19, TCGReg rt)
2acee8b2
PK
627{
628 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
629}
630
3d9e69a2
RH
631static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
632 TCGReg rt, int imm19)
633{
634 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
635}
636
81d8a5ee
RH
637static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
638 TCGCond c, int imm19)
639{
640 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
641}
642
643static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
644{
645 tcg_out32(s, insn | (imm26 & 0x03ffffff));
646}
647
648static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
649{
650 tcg_out32(s, insn | rn << 5);
651}
652
95f72aa9
RH
653static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
654 TCGReg r1, TCGReg r2, TCGReg rn,
655 tcg_target_long ofs, bool pre, bool w)
656{
657 insn |= 1u << 31; /* ext */
658 insn |= pre << 24;
659 insn |= w << 23;
660
eabb7b91 661 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
95f72aa9
RH
662 insn |= (ofs & (0x7f << 3)) << (15 - 3);
663
664 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
665}
666
096c46c0
RH
667static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
668 TCGReg rd, TCGReg rn, uint64_t aimm)
669{
670 if (aimm > 0xfff) {
eabb7b91 671 tcg_debug_assert((aimm & 0xfff) == 0);
096c46c0 672 aimm >>= 12;
eabb7b91 673 tcg_debug_assert(aimm <= 0xfff);
096c46c0
RH
674 aimm |= 1 << 12; /* apply LSL 12 */
675 }
676 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
677}
678
e029f293
RH
679/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
680 (Logical immediate). Both insn groups have N, IMMR and IMMS fields
681 that feed the DecodeBitMasks pseudo function. */
682static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
683 TCGReg rd, TCGReg rn, int n, int immr, int imms)
684{
685 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
686 | rn << 5 | rd);
687}
688
689#define tcg_out_insn_3404 tcg_out_insn_3402
690
b3c56df7
RH
691static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
692 TCGReg rd, TCGReg rn, TCGReg rm, int imms)
693{
694 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
695 | rn << 5 | rd);
696}
697
582ab779
RH
698/* This function is used for the Move (wide immediate) instruction group.
699 Note that SHIFT is a full shift count, not the 2 bit HW field. */
700static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
701 TCGReg rd, uint16_t half, unsigned shift)
702{
eabb7b91 703 tcg_debug_assert((shift & ~0x30) == 0);
582ab779
RH
704 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
705}
706
c6e310d9
RH
707static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
708 TCGReg rd, int64_t disp)
709{
710 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
711}
712
50573c66
RH
713/* This function is for both 3.5.2 (Add/Subtract shifted register), for
714 the rare occasion when we actually want to supply a shift amount. */
715static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
716 TCGType ext, TCGReg rd, TCGReg rn,
717 TCGReg rm, int imm6)
718{
719 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
720}
721
722/* This function is for 3.5.2 (Add/subtract shifted register),
723 and 3.5.10 (Logical shifted register), for the vast majorty of cases
724 when we don't want to apply a shift. Thus it can also be used for
725 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */
726static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
727 TCGReg rd, TCGReg rn, TCGReg rm)
728{
729 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
730}
731
732#define tcg_out_insn_3503 tcg_out_insn_3502
733#define tcg_out_insn_3508 tcg_out_insn_3502
734#define tcg_out_insn_3510 tcg_out_insn_3502
735
04ce397b
RH
736static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
737 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
738{
739 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
740 | tcg_cond_to_aarch64[c] << 12);
741}
742
edd8824c
RH
743static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
744 TCGReg rd, TCGReg rn)
745{
746 tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
747}
748
8678b71c
RH
749static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
750 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
751{
752 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
753}
754
14e4c1e2
RH
755static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
756 TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
757{
758 /* Note that bit 11 set means general register input. Therefore
759 we can handle both register sets with one function. */
760 tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
761 | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
762}
763
764static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
765 TCGReg rd, bool op, int cmode, uint8_t imm8)
766{
767 tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
768 | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
769}
770
d81bad24
RH
771static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn,
772 TCGReg rd, TCGReg rn, unsigned immhb)
773{
774 tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f));
775}
776
777static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn,
778 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
779{
780 tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16
781 | (rn & 0x1f) << 5 | (rd & 0x1f));
782}
783
784static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn,
785 unsigned size, TCGReg rd, TCGReg rn)
786{
787 tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f));
788}
789
14e4c1e2
RH
790static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
791 TCGReg rd, TCGReg rn, unsigned immhb)
792{
793 tcg_out32(s, insn | q << 30 | immhb << 16
794 | (rn & 0x1f) << 5 | (rd & 0x1f));
795}
796
797static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
798 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
799{
800 tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
801 | (rn & 0x1f) << 5 | (rd & 0x1f));
802}
803
804static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
805 unsigned size, TCGReg rd, TCGReg rn)
806{
807 tcg_out32(s, insn | q << 30 | (size << 22)
808 | (rn & 0x1f) << 5 | (rd & 0x1f));
809}
810
3d4299f4 811static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
6c0f0c0f
PB
812 TCGReg rd, TCGReg base, TCGType ext,
813 TCGReg regoff)
3d4299f4
RH
814{
815 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
6c0f0c0f 816 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
14e4c1e2 817 0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
3d4299f4 818}
50573c66 819
3d4299f4
RH
820static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
821 TCGReg rd, TCGReg rn, intptr_t offset)
4a136e0a 822{
14e4c1e2 823 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
4a136e0a
CF
824}
825
3d4299f4
RH
826static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
827 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
b1f6dc0d 828{
3d4299f4 829 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
14e4c1e2
RH
830 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
831 | rn << 5 | (rd & 0x1f));
b1f6dc0d
CF
832}
833
7d11fc7c
RH
834/* Register to register move using ORR (shifted register with no shift). */
835static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
4a136e0a 836{
7d11fc7c
RH
837 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
838}
839
840/* Register to register move using ADDI (move to/from SP). */
841static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
842{
843 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
4a136e0a
CF
844}
845
4ec4f0bd
RH
846/* This function is used for the Logical (immediate) instruction group.
847 The value of LIMM must satisfy IS_LIMM. See the comment above about
848 only supporting simplified logical immediates. */
849static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
850 TCGReg rd, TCGReg rn, uint64_t limm)
851{
852 unsigned h, l, r, c;
853
eabb7b91 854 tcg_debug_assert(is_limm(limm));
4ec4f0bd
RH
855
856 h = clz64(limm);
857 l = ctz64(limm);
858 if (l == 0) {
859 r = 0; /* form 0....01....1 */
860 c = ctz64(~limm) - 1;
861 if (h == 0) {
862 r = clz64(~limm); /* form 1..10..01..1 */
863 c += r;
864 }
865 } else {
866 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */
867 c = r - h - 1;
868 }
869 if (ext == TCG_TYPE_I32) {
870 r &= 31;
871 c &= 31;
872 }
873
874 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
875}
876
4e186175
RH
877static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
878 TCGReg rd, int64_t v64)
14e4c1e2 879{
984fdcee
RH
880 bool q = type == TCG_TYPE_V128;
881 int cmode, imm8, i;
882
883 /* Test all bytes equal first. */
4e186175 884 if (vece == MO_8) {
984fdcee
RH
885 imm8 = (uint8_t)v64;
886 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
887 return;
888 }
889
890 /*
891 * Test all bytes 0x00 or 0xff second. This can match cases that
892 * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
893 */
894 for (i = imm8 = 0; i < 8; i++) {
895 uint8_t byte = v64 >> (i * 8);
896 if (byte == 0xff) {
897 imm8 |= 1 << i;
898 } else if (byte != 0) {
899 goto fail_bytes;
900 }
901 }
902 tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
903 return;
904 fail_bytes:
905
906 /*
907 * Tests for various replications. For each element width, if we
908 * cannot find an expansion there's no point checking a larger
909 * width because we already know by replication it cannot match.
910 */
4e186175 911 if (vece == MO_16) {
984fdcee
RH
912 uint16_t v16 = v64;
913
914 if (is_shimm16(v16, &cmode, &imm8)) {
915 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
916 return;
917 }
7e308e00
RH
918 if (is_shimm16(~v16, &cmode, &imm8)) {
919 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
920 return;
921 }
02f3a5b4
RH
922
923 /*
924 * Otherwise, all remaining constants can be loaded in two insns:
925 * rd = v16 & 0xff, rd |= v16 & 0xff00.
926 */
927 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
928 tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
929 return;
4e186175 930 } else if (vece == MO_32) {
984fdcee 931 uint32_t v32 = v64;
7e308e00 932 uint32_t n32 = ~v32;
984fdcee
RH
933
934 if (is_shimm32(v32, &cmode, &imm8) ||
935 is_soimm32(v32, &cmode, &imm8) ||
936 is_fimm32(v32, &cmode, &imm8)) {
937 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
938 return;
939 }
7e308e00
RH
940 if (is_shimm32(n32, &cmode, &imm8) ||
941 is_soimm32(n32, &cmode, &imm8)) {
942 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
943 return;
944 }
02f3a5b4
RH
945
946 /*
947 * Restrict the set of constants to those we can load with
948 * two instructions. Others we load from the pool.
949 */
950 i = is_shimm32_pair(v32, &cmode, &imm8);
951 if (i) {
952 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
953 tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
954 return;
955 }
956 i = is_shimm32_pair(n32, &cmode, &imm8);
957 if (i) {
958 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
959 tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
960 return;
961 }
984fdcee
RH
962 } else if (is_fimm64(v64, &cmode, &imm8)) {
963 tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
964 return;
965 }
14e4c1e2 966
984fdcee
RH
967 /*
968 * As a last resort, load from the constant pool. Sadly there
969 * is no LD1R (literal), so store the full 16-byte vector.
970 */
971 if (type == TCG_TYPE_V128) {
14e4c1e2
RH
972 new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
973 tcg_out_insn(s, 3305, LDR_v128, 0, rd);
974 } else {
975 new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
976 tcg_out_insn(s, 3305, LDR_v64, 0, rd);
977 }
978}
979
e7632cfa
RH
980static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
981 TCGReg rd, TCGReg rs)
982{
983 int is_q = type - TCG_TYPE_V64;
984 tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
985 return true;
986}
987
d6ecb4a9
RH
988static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
989 TCGReg r, TCGReg base, intptr_t offset)
990{
f23e5e15
RH
991 TCGReg temp = TCG_REG_TMP;
992
993 if (offset < -0xffffff || offset > 0xffffff) {
994 tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
995 tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
996 base = temp;
997 } else {
998 AArch64Insn add_insn = I3401_ADDI;
999
1000 if (offset < 0) {
1001 add_insn = I3401_SUBI;
1002 offset = -offset;
1003 }
1004 if (offset & 0xfff000) {
1005 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
1006 base = temp;
1007 }
1008 if (offset & 0xfff) {
1009 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
1010 base = temp;
1011 }
1012 }
1013 tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
1014 return true;
d6ecb4a9
RH
1015}
1016
582ab779
RH
1017static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
1018 tcg_target_long value)
4a136e0a 1019{
dfeb5fe7
RH
1020 tcg_target_long svalue = value;
1021 tcg_target_long ivalue = ~value;
55129955
RH
1022 tcg_target_long t0, t1, t2;
1023 int s0, s1;
1024 AArch64Insn opc;
dfeb5fe7 1025
14e4c1e2
RH
1026 switch (type) {
1027 case TCG_TYPE_I32:
1028 case TCG_TYPE_I64:
1029 tcg_debug_assert(rd < 32);
1030 break;
14e4c1e2
RH
1031 default:
1032 g_assert_not_reached();
1033 }
1034
dfeb5fe7
RH
1035 /* For 32-bit values, discard potential garbage in value. For 64-bit
1036 values within [2**31, 2**32-1], we can create smaller sequences by
1037 interpreting this as a negative 32-bit number, while ensuring that
1038 the high 32 bits are cleared by setting SF=0. */
1039 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1040 svalue = (int32_t)value;
582ab779 1041 value = (uint32_t)value;
dfeb5fe7
RH
1042 ivalue = (uint32_t)ivalue;
1043 type = TCG_TYPE_I32;
1044 }
1045
d8918df5
RH
1046 /* Speed things up by handling the common case of small positive
1047 and negative values specially. */
1048 if ((value & ~0xffffull) == 0) {
1049 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1050 return;
1051 } else if ((ivalue & ~0xffffull) == 0) {
1052 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1053 return;
1054 }
1055
4ec4f0bd
RH
1056 /* Check for bitfield immediates. For the benefit of 32-bit quantities,
1057 use the sign-extended value. That lets us match rotated values such
1058 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1059 if (is_limm(svalue)) {
1060 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1061 return;
1062 }
1063
c6e310d9
RH
1064 /* Look for host pointer values within 4G of the PC. This happens
1065 often when loading pointers to QEMU's own data structures. */
1066 if (type == TCG_TYPE_I64) {
ffba3eb3
RH
1067 intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
1068 tcg_target_long disp = value - src_rx;
cc74d332
RH
1069 if (disp == sextract64(disp, 0, 21)) {
1070 tcg_out_insn(s, 3406, ADR, rd, disp);
1071 return;
1072 }
ffba3eb3 1073 disp = (value >> 12) - (src_rx >> 12);
c6e310d9
RH
1074 if (disp == sextract64(disp, 0, 21)) {
1075 tcg_out_insn(s, 3406, ADRP, rd, disp);
1076 if (value & 0xfff) {
1077 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1078 }
1079 return;
1080 }
1081 }
1082
55129955
RH
1083 /* Would it take fewer insns to begin with MOVN? */
1084 if (ctpop64(value) >= 32) {
1085 t0 = ivalue;
1086 opc = I3405_MOVN;
8cf9a3d3 1087 } else {
55129955
RH
1088 t0 = value;
1089 opc = I3405_MOVZ;
1090 }
1091 s0 = ctz64(t0) & (63 & -16);
1092 t1 = t0 & ~(0xffffUL << s0);
1093 s1 = ctz64(t1) & (63 & -16);
1094 t2 = t1 & ~(0xffffUL << s1);
1095 if (t2 == 0) {
1096 tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1097 if (t1 != 0) {
1098 tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
8cf9a3d3 1099 }
55129955 1100 return;
dfeb5fe7 1101 }
55129955
RH
1102
1103 /* For more than 2 insns, dump it into the constant pool. */
1104 new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1105 tcg_out_insn(s, 3305, LDR, 0, rd);
4a136e0a
CF
1106}
1107
3d4299f4
RH
1108/* Define something more legible for general use. */
1109#define tcg_out_ldst_r tcg_out_insn_3310
4a136e0a 1110
14e4c1e2
RH
1111static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1112 TCGReg rn, intptr_t offset, int lgsize)
4a136e0a 1113{
3d4299f4
RH
1114 /* If the offset is naturally aligned and in range, then we can
1115 use the scaled uimm12 encoding */
14e4c1e2
RH
1116 if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1117 uintptr_t scaled_uimm = offset >> lgsize;
3d4299f4
RH
1118 if (scaled_uimm <= 0xfff) {
1119 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1120 return;
b1f6dc0d
CF
1121 }
1122 }
1123
a056c9fa
RH
1124 /* Small signed offsets can use the unscaled encoding. */
1125 if (offset >= -256 && offset < 256) {
1126 tcg_out_insn_3312(s, insn, rd, rn, offset);
1127 return;
1128 }
1129
3d4299f4 1130 /* Worst-case scenario, move offset to temp register, use reg offset. */
b1f6dc0d 1131 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
6c0f0c0f 1132 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
4a136e0a
CF
1133}
1134
78113e83 1135static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
4a136e0a 1136{
14e4c1e2 1137 if (ret == arg) {
78113e83 1138 return true;
14e4c1e2
RH
1139 }
1140 switch (type) {
1141 case TCG_TYPE_I32:
1142 case TCG_TYPE_I64:
1143 if (ret < 32 && arg < 32) {
1144 tcg_out_movr(s, type, ret, arg);
1145 break;
1146 } else if (ret < 32) {
1147 tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1148 break;
1149 } else if (arg < 32) {
1150 tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1151 break;
1152 }
1153 /* FALLTHRU */
1154
1155 case TCG_TYPE_V64:
1156 tcg_debug_assert(ret >= 32 && arg >= 32);
1157 tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1158 break;
1159 case TCG_TYPE_V128:
1160 tcg_debug_assert(ret >= 32 && arg >= 32);
1161 tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1162 break;
1163
1164 default:
1165 g_assert_not_reached();
4a136e0a 1166 }
78113e83 1167 return true;
4a136e0a
CF
1168}
1169
14e4c1e2
RH
1170static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1171 TCGReg base, intptr_t ofs)
4a136e0a 1172{
14e4c1e2
RH
1173 AArch64Insn insn;
1174 int lgsz;
1175
1176 switch (type) {
1177 case TCG_TYPE_I32:
1178 insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1179 lgsz = 2;
1180 break;
1181 case TCG_TYPE_I64:
1182 insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1183 lgsz = 3;
1184 break;
1185 case TCG_TYPE_V64:
1186 insn = I3312_LDRVD;
1187 lgsz = 3;
1188 break;
1189 case TCG_TYPE_V128:
1190 insn = I3312_LDRVQ;
1191 lgsz = 4;
1192 break;
1193 default:
1194 g_assert_not_reached();
1195 }
1196 tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
4a136e0a
CF
1197}
1198
14e4c1e2
RH
1199static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1200 TCGReg base, intptr_t ofs)
4a136e0a 1201{
14e4c1e2
RH
1202 AArch64Insn insn;
1203 int lgsz;
1204
1205 switch (type) {
1206 case TCG_TYPE_I32:
1207 insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1208 lgsz = 2;
1209 break;
1210 case TCG_TYPE_I64:
1211 insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1212 lgsz = 3;
1213 break;
1214 case TCG_TYPE_V64:
1215 insn = I3312_STRVD;
1216 lgsz = 3;
1217 break;
1218 case TCG_TYPE_V128:
1219 insn = I3312_STRVQ;
1220 lgsz = 4;
1221 break;
1222 default:
1223 g_assert_not_reached();
1224 }
1225 tcg_out_ldst(s, insn, src, base, ofs, lgsz);
4a136e0a
CF
1226}
1227
59d7c14e
RH
1228static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1229 TCGReg base, intptr_t ofs)
1230{
14e4c1e2 1231 if (type <= TCG_TYPE_I64 && val == 0) {
59d7c14e
RH
1232 tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1233 return true;
1234 }
1235 return false;
1236}
1237
b3c56df7
RH
1238static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1239 TCGReg rn, unsigned int a, unsigned int b)
1240{
1241 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1242}
1243
7763ffa0
RH
1244static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1245 TCGReg rn, unsigned int a, unsigned int b)
4a136e0a 1246{
b3c56df7 1247 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
4a136e0a
CF
1248}
1249
7763ffa0
RH
1250static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1251 TCGReg rn, unsigned int a, unsigned int b)
4a136e0a 1252{
b3c56df7 1253 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
4a136e0a
CF
1254}
1255
7763ffa0 1256static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
4a136e0a
CF
1257 TCGReg rn, TCGReg rm, unsigned int a)
1258{
b3c56df7 1259 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
4a136e0a
CF
1260}
1261
7763ffa0 1262static inline void tcg_out_shl(TCGContext *s, TCGType ext,
4a136e0a
CF
1263 TCGReg rd, TCGReg rn, unsigned int m)
1264{
b3c56df7
RH
1265 int bits = ext ? 64 : 32;
1266 int max = bits - 1;
4a136e0a
CF
1267 tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
1268}
1269
7763ffa0 1270static inline void tcg_out_shr(TCGContext *s, TCGType ext,
4a136e0a
CF
1271 TCGReg rd, TCGReg rn, unsigned int m)
1272{
1273 int max = ext ? 63 : 31;
1274 tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1275}
1276
7763ffa0 1277static inline void tcg_out_sar(TCGContext *s, TCGType ext,
4a136e0a
CF
1278 TCGReg rd, TCGReg rn, unsigned int m)
1279{
1280 int max = ext ? 63 : 31;
1281 tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1282}
1283
7763ffa0 1284static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
4a136e0a
CF
1285 TCGReg rd, TCGReg rn, unsigned int m)
1286{
1287 int max = ext ? 63 : 31;
1288 tcg_out_extr(s, ext, rd, rn, rn, m & max);
1289}
1290
7763ffa0 1291static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
4a136e0a
CF
1292 TCGReg rd, TCGReg rn, unsigned int m)
1293{
26b1248f
YK
1294 int max = ext ? 63 : 31;
1295 tcg_out_extr(s, ext, rd, rn, rn, -m & max);
4a136e0a
CF
1296}
1297
b3c56df7
RH
1298static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1299 TCGReg rn, unsigned lsb, unsigned width)
1300{
1301 unsigned size = ext ? 64 : 32;
1302 unsigned a = (size - lsb) & (size - 1);
1303 unsigned b = width - 1;
1304 tcg_out_bfm(s, ext, rd, rn, a, b);
1305}
1306
90f1cd91
RH
1307static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1308 tcg_target_long b, bool const_b)
4a136e0a 1309{
90f1cd91
RH
1310 if (const_b) {
1311 /* Using CMP or CMN aliases. */
1312 if (b >= 0) {
1313 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1314 } else {
1315 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1316 }
1317 } else {
1318 /* Using CMP alias SUBS wzr, Wn, Wm */
1319 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1320 }
4a136e0a
CF
1321}
1322
ffd0e507 1323static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
4a136e0a 1324{
ffba3eb3 1325 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
eabb7b91 1326 tcg_debug_assert(offset == sextract64(offset, 0, 26));
81d8a5ee 1327 tcg_out_insn(s, 3206, B, offset);
4a136e0a
CF
1328}
1329
ffba3eb3 1330static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target)
23b7aa1d 1331{
ffba3eb3 1332 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
23b7aa1d 1333 if (offset == sextract64(offset, 0, 26)) {
f716bab3 1334 tcg_out_insn(s, 3206, B, offset);
23b7aa1d
PK
1335 } else {
1336 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1337 tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1338 }
1339}
1340
4a136e0a
CF
1341static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
1342{
81d8a5ee 1343 tcg_out_insn(s, 3207, BLR, reg);
4a136e0a
CF
1344}
1345
ffba3eb3 1346static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target)
4a136e0a 1347{
ffba3eb3 1348 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
8587c30c 1349 if (offset == sextract64(offset, 0, 26)) {
81d8a5ee 1350 tcg_out_insn(s, 3206, BL, offset);
8587c30c
RH
1351 } else {
1352 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1353 tcg_out_callr(s, TCG_REG_TMP);
4a136e0a
CF
1354 }
1355}
1356
1acbad0f
RH
1357void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
1358 uintptr_t jmp_rw, uintptr_t addr)
4a136e0a 1359{
b68686bd
PK
1360 tcg_insn_unit i1, i2;
1361 TCGType rt = TCG_TYPE_I64;
1362 TCGReg rd = TCG_REG_TMP;
1363 uint64_t pair;
4a136e0a 1364
1acbad0f 1365 ptrdiff_t offset = addr - jmp_rx;
b68686bd
PK
1366
1367 if (offset == sextract64(offset, 0, 26)) {
1368 i1 = I3206_B | ((offset >> 2) & 0x3ffffff);
1369 i2 = NOP;
1370 } else {
1acbad0f 1371 offset = (addr >> 12) - (jmp_rx >> 12);
b68686bd
PK
1372
1373 /* patch ADRP */
1374 i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd;
1375 /* patch ADDI */
1376 i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd;
1377 }
1378 pair = (uint64_t)i2 << 32 | i1;
1acbad0f
RH
1379 qatomic_set((uint64_t *)jmp_rw, pair);
1380 flush_idcache_range(jmp_rx, jmp_rw, 8);
4a136e0a
CF
1381}
1382
bec16311 1383static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
4a136e0a 1384{
4a136e0a 1385 if (!l->has_value) {
bec16311 1386 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
733589b3 1387 tcg_out_insn(s, 3206, B, 0);
4a136e0a 1388 } else {
8587c30c 1389 tcg_out_goto(s, l->u.value_ptr);
4a136e0a
CF
1390 }
1391}
1392
dc1eccd6 1393static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
bec16311 1394 TCGArg b, bool b_const, TCGLabel *l)
4a136e0a 1395{
cae1f6f3 1396 intptr_t offset;
3d9e69a2 1397 bool need_cmp;
cae1f6f3 1398
3d9e69a2
RH
1399 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1400 need_cmp = false;
1401 } else {
1402 need_cmp = true;
1403 tcg_out_cmp(s, ext, a, b, b_const);
1404 }
4a136e0a
CF
1405
1406 if (!l->has_value) {
bec16311 1407 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
cae1f6f3 1408 offset = tcg_in32(s) >> 5;
4a136e0a 1409 } else {
ffba3eb3 1410 offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2;
eabb7b91 1411 tcg_debug_assert(offset == sextract64(offset, 0, 19));
4a136e0a 1412 }
cae1f6f3 1413
3d9e69a2
RH
1414 if (need_cmp) {
1415 tcg_out_insn(s, 3202, B_C, c, offset);
1416 } else if (c == TCG_COND_EQ) {
1417 tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1418 } else {
1419 tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1420 }
4a136e0a
CF
1421}
1422
edd8824c 1423static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
9c4a059d 1424{
edd8824c 1425 tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
9c4a059d
CF
1426}
1427
edd8824c 1428static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
9c4a059d 1429{
edd8824c
RH
1430 tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
1431}
1432
1433static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
1434{
1435 tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
9c4a059d
CF
1436}
1437
14776ab5 1438static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
31f1275b
CF
1439 TCGReg rd, TCGReg rn)
1440{
b3c56df7 1441 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
929f8b55 1442 int bits = (8 << s_bits) - 1;
31f1275b
CF
1443 tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1444}
1445
14776ab5 1446static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
31f1275b
CF
1447 TCGReg rd, TCGReg rn)
1448{
b3c56df7 1449 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
929f8b55 1450 int bits = (8 << s_bits) - 1;
31f1275b
CF
1451 tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1452}
1453
90f1cd91
RH
1454static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1455 TCGReg rn, int64_t aimm)
1456{
1457 if (aimm >= 0) {
1458 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1459 } else {
1460 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1461 }
1462}
1463
707b45a2
RH
1464static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1465 TCGReg rh, TCGReg al, TCGReg ah,
1466 tcg_target_long bl, tcg_target_long bh,
1467 bool const_bl, bool const_bh, bool sub)
c6e929e7
RH
1468{
1469 TCGReg orig_rl = rl;
1470 AArch64Insn insn;
1471
1472 if (rl == ah || (!const_bh && rl == bh)) {
1473 rl = TCG_REG_TMP;
1474 }
1475
1476 if (const_bl) {
707b45a2 1477 if (bl < 0) {
c6e929e7 1478 bl = -bl;
707b45a2
RH
1479 insn = sub ? I3401_ADDSI : I3401_SUBSI;
1480 } else {
1481 insn = sub ? I3401_SUBSI : I3401_ADDSI;
c6e929e7 1482 }
707b45a2 1483
b1eb20da
RH
1484 if (unlikely(al == TCG_REG_XZR)) {
1485 /* ??? We want to allow al to be zero for the benefit of
1486 negation via subtraction. However, that leaves open the
1487 possibility of adding 0+const in the low part, and the
1488 immediate add instructions encode XSP not XZR. Don't try
1489 anything more elaborate here than loading another zero. */
1490 al = TCG_REG_TMP;
1491 tcg_out_movi(s, ext, al, 0);
1492 }
c6e929e7
RH
1493 tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1494 } else {
1495 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1496 }
1497
1498 insn = I3503_ADC;
1499 if (const_bh) {
1500 /* Note that the only two constants we support are 0 and -1, and
1501 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
1502 if ((bh != 0) ^ sub) {
1503 insn = I3503_SBC;
1504 }
1505 bh = TCG_REG_XZR;
1506 } else if (sub) {
1507 insn = I3503_SBC;
1508 }
1509 tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1510
b825025f 1511 tcg_out_mov(s, ext, orig_rl, rl);
c6e929e7
RH
1512}
1513
c7a59c2a
PK
1514static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1515{
1516 static const uint32_t sync[] = {
1517 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST,
1518 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST,
1519 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1520 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD,
1521 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1522 };
1523 tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1524}
1525
53c76c19
RH
1526static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1527 TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1528{
1529 TCGReg a1 = a0;
1530 if (is_ctz) {
1531 a1 = TCG_REG_TMP;
1532 tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1533 }
1534 if (const_b && b == (ext ? 64 : 32)) {
1535 tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1536 } else {
1537 AArch64Insn sel = I3506_CSEL;
1538
1539 tcg_out_cmp(s, ext, a0, 0, 1);
1540 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1541
1542 if (const_b) {
1543 if (b == -1) {
1544 b = TCG_REG_XZR;
1545 sel = I3506_CSINV;
1546 } else if (b == 0) {
1547 b = TCG_REG_XZR;
1548 } else {
1549 tcg_out_movi(s, ext, d, b);
1550 b = d;
1551 }
1552 }
1553 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1554 }
1555}
1556
4a136e0a 1557#ifdef CONFIG_SOFTMMU
139c1837 1558#include "../tcg-ldst.c.inc"
659ef5cb 1559
023261ef 1560/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
3972ef6f 1561 * TCGMemOpIdx oi, uintptr_t ra)
023261ef 1562 */
8587c30c 1563static void * const qemu_ld_helpers[16] = {
de61d14f
RH
1564 [MO_UB] = helper_ret_ldub_mmu,
1565 [MO_LEUW] = helper_le_lduw_mmu,
1566 [MO_LEUL] = helper_le_ldul_mmu,
1567 [MO_LEQ] = helper_le_ldq_mmu,
1568 [MO_BEUW] = helper_be_lduw_mmu,
1569 [MO_BEUL] = helper_be_ldul_mmu,
1570 [MO_BEQ] = helper_be_ldq_mmu,
4a136e0a
CF
1571};
1572
023261ef 1573/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
3972ef6f
RH
1574 * uintxx_t val, TCGMemOpIdx oi,
1575 * uintptr_t ra)
023261ef 1576 */
8587c30c 1577static void * const qemu_st_helpers[16] = {
de61d14f
RH
1578 [MO_UB] = helper_ret_stb_mmu,
1579 [MO_LEUW] = helper_le_stw_mmu,
1580 [MO_LEUL] = helper_le_stl_mmu,
1581 [MO_LEQ] = helper_le_stq_mmu,
1582 [MO_BEUW] = helper_be_stw_mmu,
1583 [MO_BEUL] = helper_be_stl_mmu,
1584 [MO_BEQ] = helper_be_stq_mmu,
4a136e0a
CF
1585};
1586
ffba3eb3 1587static inline void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
dc0c8aaf 1588{
8587c30c 1589 ptrdiff_t offset = tcg_pcrel_diff(s, target);
eabb7b91 1590 tcg_debug_assert(offset == sextract64(offset, 0, 21));
8587c30c 1591 tcg_out_insn(s, 3406, ADR, rd, offset);
dc0c8aaf
RH
1592}
1593
aeee05f5 1594static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
c6d8ed24 1595{
3972ef6f 1596 TCGMemOpIdx oi = lb->oi;
14776ab5
TN
1597 MemOp opc = get_memop(oi);
1598 MemOp size = opc & MO_SIZE;
929f8b55 1599
ffba3eb3 1600 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
aeee05f5
RH
1601 return false;
1602 }
017a86f7 1603
3972ef6f 1604 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
b825025f 1605 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
3972ef6f 1606 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
8587c30c 1607 tcg_out_adr(s, TCG_REG_X3, lb->raddr);
2b7ec66f 1608 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
929f8b55 1609 if (opc & MO_SIGN) {
9c53889b 1610 tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
c6d8ed24 1611 } else {
b825025f 1612 tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
c6d8ed24
JK
1613 }
1614
8587c30c 1615 tcg_out_goto(s, lb->raddr);
aeee05f5 1616 return true;
c6d8ed24
JK
1617}
1618
aeee05f5 1619static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
c6d8ed24 1620{
3972ef6f 1621 TCGMemOpIdx oi = lb->oi;
14776ab5
TN
1622 MemOp opc = get_memop(oi);
1623 MemOp size = opc & MO_SIZE;
929f8b55 1624
ffba3eb3 1625 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
aeee05f5
RH
1626 return false;
1627 }
c6d8ed24 1628
3972ef6f 1629 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
b825025f
RH
1630 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1631 tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
3972ef6f 1632 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
8587c30c 1633 tcg_out_adr(s, TCG_REG_X4, lb->raddr);
2b7ec66f 1634 tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
8587c30c 1635 tcg_out_goto(s, lb->raddr);
aeee05f5 1636 return true;
c6d8ed24
JK
1637}
1638
3972ef6f 1639static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
9c53889b 1640 TCGType ext, TCGReg data_reg, TCGReg addr_reg,
3972ef6f 1641 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
c6d8ed24 1642{
9ecefc84 1643 TCGLabelQemuLdst *label = new_ldst_label(s);
c6d8ed24 1644
c6d8ed24 1645 label->is_ld = is_ld;
3972ef6f 1646 label->oi = oi;
9c53889b 1647 label->type = ext;
c6d8ed24
JK
1648 label->datalo_reg = data_reg;
1649 label->addrlo_reg = addr_reg;
e5e2e4c7 1650 label->raddr = tcg_splitwx_to_rx(raddr);
c6d8ed24
JK
1651 label->label_ptr[0] = label_ptr;
1652}
1653
269bd5d8
RH
1654/* We expect to use a 7-bit scaled negative offset from ENV. */
1655QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1656QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
f7bcd966 1657
65b23204
RH
1658/* These offsets are built into the LDP below. */
1659QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1660QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1661
c6d8ed24
JK
1662/* Load and compare a TLB entry, emitting the conditional jump to the
1663 slow path for the failure case, which will be patched later when finalizing
1664 the slow path. Generated code returns the host addend in X1,
1665 clobbers X0,X2,X3,TMP. */
14776ab5 1666static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
8587c30c
RH
1667 tcg_insn_unit **label_ptr, int mem_index,
1668 bool is_read)
c6d8ed24 1669{
85aa8081
RH
1670 unsigned a_bits = get_alignment_bits(opc);
1671 unsigned s_bits = opc & MO_SIZE;
1672 unsigned a_mask = (1u << a_bits) - 1;
1673 unsigned s_mask = (1u << s_bits) - 1;
65b23204 1674 TCGReg x3;
f7bcd966
RH
1675 TCGType mask_type;
1676 uint64_t compare_mask;
1677
f7bcd966
RH
1678 mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
1679 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1680
65b23204
RH
1681 /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */
1682 tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
1683 TLB_MASK_TABLE_OFS(mem_index), 1, 0);
f7bcd966
RH
1684
1685 /* Extract the TLB index from the address into X0. */
1686 tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1687 TCG_REG_X0, TCG_REG_X0, addr_reg,
1688 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1689
1690 /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */
1691 tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
1692
1693 /* Load the tlb comparator into X0, and the fast path addend into X1. */
1694 tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read
1695 ? offsetof(CPUTLBEntry, addr_read)
1696 : offsetof(CPUTLBEntry, addr_write));
1697 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
1698 offsetof(CPUTLBEntry, addend));
9ee14902
RH
1699
1700 /* For aligned accesses, we check the first byte and include the alignment
1701 bits within the address. For unaligned access, we check that we don't
1702 cross pages using the address of the last byte of the access. */
85aa8081 1703 if (a_bits >= s_bits) {
9ee14902
RH
1704 x3 = addr_reg;
1705 } else {
1706 tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
85aa8081 1707 TCG_REG_X3, addr_reg, s_mask - a_mask);
9ee14902
RH
1708 x3 = TCG_REG_X3;
1709 }
f7bcd966 1710 compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
6f472467 1711
9ee14902
RH
1712 /* Store the page mask part of the address into X3. */
1713 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
f7bcd966 1714 TCG_REG_X3, x3, compare_mask);
6f472467 1715
c6d8ed24 1716 /* Perform the address comparison. */
f7bcd966 1717 tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
6f472467 1718
c6d8ed24 1719 /* If not equal, we jump to the slow path. */
6f472467 1720 *label_ptr = s->code_ptr;
733589b3 1721 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
c6d8ed24
JK
1722}
1723
1724#endif /* CONFIG_SOFTMMU */
6a91c7c9 1725
14776ab5 1726static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
ffc63728
PB
1727 TCGReg data_r, TCGReg addr_r,
1728 TCGType otype, TCGReg off_r)
6a91c7c9 1729{
14776ab5 1730 const MemOp bswap = memop & MO_BSWAP;
9e4177ad
RH
1731
1732 switch (memop & MO_SSIZE) {
1733 case MO_UB:
6c0f0c0f 1734 tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
6a91c7c9 1735 break;
9e4177ad 1736 case MO_SB:
9c53889b 1737 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
6c0f0c0f 1738 data_r, addr_r, otype, off_r);
6a91c7c9 1739 break;
9e4177ad 1740 case MO_UW:
6c0f0c0f 1741 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
9e4177ad 1742 if (bswap) {
edd8824c 1743 tcg_out_rev16(s, data_r, data_r);
6a91c7c9
JK
1744 }
1745 break;
9e4177ad
RH
1746 case MO_SW:
1747 if (bswap) {
6c0f0c0f 1748 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
edd8824c 1749 tcg_out_rev16(s, data_r, data_r);
9c53889b 1750 tcg_out_sxt(s, ext, MO_16, data_r, data_r);
6a91c7c9 1751 } else {
6c0f0c0f
PB
1752 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1753 data_r, addr_r, otype, off_r);
6a91c7c9
JK
1754 }
1755 break;
9e4177ad 1756 case MO_UL:
6c0f0c0f 1757 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
9e4177ad 1758 if (bswap) {
edd8824c 1759 tcg_out_rev32(s, data_r, data_r);
6a91c7c9
JK
1760 }
1761 break;
9e4177ad
RH
1762 case MO_SL:
1763 if (bswap) {
6c0f0c0f 1764 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
edd8824c 1765 tcg_out_rev32(s, data_r, data_r);
929f8b55 1766 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
6a91c7c9 1767 } else {
6c0f0c0f 1768 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
6a91c7c9
JK
1769 }
1770 break;
9e4177ad 1771 case MO_Q:
6c0f0c0f 1772 tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
9e4177ad 1773 if (bswap) {
edd8824c 1774 tcg_out_rev64(s, data_r, data_r);
6a91c7c9
JK
1775 }
1776 break;
1777 default:
1778 tcg_abort();
1779 }
1780}
1781
14776ab5 1782static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
ffc63728
PB
1783 TCGReg data_r, TCGReg addr_r,
1784 TCGType otype, TCGReg off_r)
6a91c7c9 1785{
14776ab5 1786 const MemOp bswap = memop & MO_BSWAP;
9e4177ad
RH
1787
1788 switch (memop & MO_SIZE) {
1789 case MO_8:
6c0f0c0f 1790 tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
6a91c7c9 1791 break;
9e4177ad 1792 case MO_16:
e81864a1 1793 if (bswap && data_r != TCG_REG_XZR) {
edd8824c 1794 tcg_out_rev16(s, TCG_REG_TMP, data_r);
9e4177ad 1795 data_r = TCG_REG_TMP;
6a91c7c9 1796 }
6c0f0c0f 1797 tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
6a91c7c9 1798 break;
9e4177ad 1799 case MO_32:
e81864a1 1800 if (bswap && data_r != TCG_REG_XZR) {
edd8824c 1801 tcg_out_rev32(s, TCG_REG_TMP, data_r);
9e4177ad 1802 data_r = TCG_REG_TMP;
6a91c7c9 1803 }
6c0f0c0f 1804 tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
6a91c7c9 1805 break;
9e4177ad 1806 case MO_64:
e81864a1 1807 if (bswap && data_r != TCG_REG_XZR) {
edd8824c 1808 tcg_out_rev64(s, TCG_REG_TMP, data_r);
9e4177ad 1809 data_r = TCG_REG_TMP;
6a91c7c9 1810 }
6c0f0c0f 1811 tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
6a91c7c9
JK
1812 break;
1813 default:
1814 tcg_abort();
1815 }
1816}
4a136e0a 1817
667b1cdd 1818static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
59227d5d 1819 TCGMemOpIdx oi, TCGType ext)
4a136e0a 1820{
14776ab5 1821 MemOp memop = get_memop(oi);
80adb8fc 1822 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
4a136e0a 1823#ifdef CONFIG_SOFTMMU
59227d5d 1824 unsigned mem_index = get_mmuidx(oi);
8587c30c 1825 tcg_insn_unit *label_ptr;
4a136e0a 1826
9ee14902 1827 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
80adb8fc
RH
1828 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1829 TCG_REG_X1, otype, addr_reg);
3972ef6f
RH
1830 add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1831 s->code_ptr, label_ptr);
4a136e0a 1832#else /* !CONFIG_SOFTMMU */
352bcb0a
RH
1833 if (USE_GUEST_BASE) {
1834 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1835 TCG_REG_GUEST_BASE, otype, addr_reg);
1836 } else {
1837 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1838 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1839 }
6a91c7c9 1840#endif /* CONFIG_SOFTMMU */
4a136e0a
CF
1841}
1842
667b1cdd 1843static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
59227d5d 1844 TCGMemOpIdx oi)
4a136e0a 1845{
14776ab5 1846 MemOp memop = get_memop(oi);
80adb8fc 1847 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
4a136e0a 1848#ifdef CONFIG_SOFTMMU
59227d5d 1849 unsigned mem_index = get_mmuidx(oi);
8587c30c 1850 tcg_insn_unit *label_ptr;
4a136e0a 1851
9ee14902 1852 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
80adb8fc
RH
1853 tcg_out_qemu_st_direct(s, memop, data_reg,
1854 TCG_REG_X1, otype, addr_reg);
9ee14902
RH
1855 add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1856 data_reg, addr_reg, s->code_ptr, label_ptr);
4a136e0a 1857#else /* !CONFIG_SOFTMMU */
352bcb0a
RH
1858 if (USE_GUEST_BASE) {
1859 tcg_out_qemu_st_direct(s, memop, data_reg,
1860 TCG_REG_GUEST_BASE, otype, addr_reg);
1861 } else {
1862 tcg_out_qemu_st_direct(s, memop, data_reg,
1863 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1864 }
6a91c7c9 1865#endif /* CONFIG_SOFTMMU */
4a136e0a
CF
1866}
1867
ffba3eb3 1868static const tcg_insn_unit *tb_ret_addr;
4a136e0a 1869
4a136e0a 1870static void tcg_out_op(TCGContext *s, TCGOpcode opc,
8d8db193
RH
1871 const TCGArg args[TCG_MAX_OP_ARGS],
1872 const int const_args[TCG_MAX_OP_ARGS])
4a136e0a 1873{
f0293414
RH
1874 /* 99% of the time, we can signal the use of extension registers
1875 by looking to see if the opcode handles 64-bit data. */
1876 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
4a136e0a 1877
8d8db193
RH
1878 /* Hoist the loads of the most common arguments. */
1879 TCGArg a0 = args[0];
1880 TCGArg a1 = args[1];
1881 TCGArg a2 = args[2];
1882 int c2 = const_args[2];
1883
04ce397b
RH
1884 /* Some operands are defined with "rZ" constraint, a register or
1885 the zero register. These need not actually test args[I] == 0. */
1886#define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1887
4a136e0a
CF
1888 switch (opc) {
1889 case INDEX_op_exit_tb:
b19f0c2e
RH
1890 /* Reuse the zeroing that exists for goto_ptr. */
1891 if (a0 == 0) {
8b5c2b62 1892 tcg_out_goto_long(s, tcg_code_gen_epilogue);
b19f0c2e
RH
1893 } else {
1894 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
23b7aa1d 1895 tcg_out_goto_long(s, tb_ret_addr);
b19f0c2e 1896 }
4a136e0a
CF
1897 break;
1898
1899 case INDEX_op_goto_tb:
2acee8b2 1900 if (s->tb_jmp_insn_offset != NULL) {
a8583393 1901 /* TCG_TARGET_HAS_direct_jump */
2acee8b2
PK
1902 /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
1903 write can be used to patch the target address. */
1904 if ((uintptr_t)s->code_ptr & 7) {
1905 tcg_out32(s, NOP);
1906 }
1907 s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1908 /* actual branch destination will be patched by
a8583393 1909 tb_target_set_jmp_target later. */
2acee8b2
PK
1910 tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
1911 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
1912 } else {
a8583393 1913 /* !TCG_TARGET_HAS_direct_jump */
2acee8b2
PK
1914 tcg_debug_assert(s->tb_jmp_target_addr != NULL);
1915 intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2;
1916 tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
b68686bd 1917 }
b68686bd 1918 tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
9f754620 1919 set_jmp_reset_offset(s, a0);
4a136e0a
CF
1920 break;
1921
b19f0c2e
RH
1922 case INDEX_op_goto_ptr:
1923 tcg_out_insn(s, 3207, BR, a0);
1924 break;
1925
4a136e0a 1926 case INDEX_op_br:
bec16311 1927 tcg_out_goto_label(s, arg_label(a0));
4a136e0a
CF
1928 break;
1929
4a136e0a 1930 case INDEX_op_ld8u_i32:
4a136e0a 1931 case INDEX_op_ld8u_i64:
14e4c1e2 1932 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
dc73dfd4
RH
1933 break;
1934 case INDEX_op_ld8s_i32:
14e4c1e2 1935 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
dc73dfd4 1936 break;
4a136e0a 1937 case INDEX_op_ld8s_i64:
14e4c1e2 1938 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
dc73dfd4
RH
1939 break;
1940 case INDEX_op_ld16u_i32:
4a136e0a 1941 case INDEX_op_ld16u_i64:
14e4c1e2 1942 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
dc73dfd4
RH
1943 break;
1944 case INDEX_op_ld16s_i32:
14e4c1e2 1945 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
dc73dfd4 1946 break;
4a136e0a 1947 case INDEX_op_ld16s_i64:
14e4c1e2 1948 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
dc73dfd4
RH
1949 break;
1950 case INDEX_op_ld_i32:
4a136e0a 1951 case INDEX_op_ld32u_i64:
14e4c1e2 1952 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
dc73dfd4 1953 break;
4a136e0a 1954 case INDEX_op_ld32s_i64:
14e4c1e2 1955 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
e81864a1 1956 break;
dc73dfd4 1957 case INDEX_op_ld_i64:
14e4c1e2 1958 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
dc73dfd4
RH
1959 break;
1960
4a136e0a
CF
1961 case INDEX_op_st8_i32:
1962 case INDEX_op_st8_i64:
14e4c1e2 1963 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
dc73dfd4 1964 break;
4a136e0a
CF
1965 case INDEX_op_st16_i32:
1966 case INDEX_op_st16_i64:
14e4c1e2 1967 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
dc73dfd4
RH
1968 break;
1969 case INDEX_op_st_i32:
4a136e0a 1970 case INDEX_op_st32_i64:
14e4c1e2 1971 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
dc73dfd4
RH
1972 break;
1973 case INDEX_op_st_i64:
14e4c1e2 1974 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
4a136e0a
CF
1975 break;
1976
4a136e0a 1977 case INDEX_op_add_i32:
90f1cd91
RH
1978 a2 = (int32_t)a2;
1979 /* FALLTHRU */
1980 case INDEX_op_add_i64:
1981 if (c2) {
1982 tcg_out_addsubi(s, ext, a0, a1, a2);
1983 } else {
1984 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1985 }
4a136e0a
CF
1986 break;
1987
4a136e0a 1988 case INDEX_op_sub_i32:
90f1cd91
RH
1989 a2 = (int32_t)a2;
1990 /* FALLTHRU */
1991 case INDEX_op_sub_i64:
1992 if (c2) {
1993 tcg_out_addsubi(s, ext, a0, a1, -a2);
1994 } else {
1995 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1996 }
4a136e0a
CF
1997 break;
1998
14b155dd
RH
1999 case INDEX_op_neg_i64:
2000 case INDEX_op_neg_i32:
2001 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
2002 break;
2003
4a136e0a 2004 case INDEX_op_and_i32:
e029f293
RH
2005 a2 = (int32_t)a2;
2006 /* FALLTHRU */
2007 case INDEX_op_and_i64:
2008 if (c2) {
2009 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
2010 } else {
2011 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
2012 }
4a136e0a
CF
2013 break;
2014
14b155dd
RH
2015 case INDEX_op_andc_i32:
2016 a2 = (int32_t)a2;
2017 /* FALLTHRU */
2018 case INDEX_op_andc_i64:
2019 if (c2) {
2020 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
2021 } else {
2022 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
2023 }
2024 break;
2025
4a136e0a 2026 case INDEX_op_or_i32:
e029f293
RH
2027 a2 = (int32_t)a2;
2028 /* FALLTHRU */
2029 case INDEX_op_or_i64:
2030 if (c2) {
2031 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
2032 } else {
2033 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
2034 }
4a136e0a
CF
2035 break;
2036
14b155dd
RH
2037 case INDEX_op_orc_i32:
2038 a2 = (int32_t)a2;
2039 /* FALLTHRU */
2040 case INDEX_op_orc_i64:
2041 if (c2) {
2042 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2043 } else {
2044 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2045 }
2046 break;
2047
4a136e0a 2048 case INDEX_op_xor_i32:
e029f293
RH
2049 a2 = (int32_t)a2;
2050 /* FALLTHRU */
2051 case INDEX_op_xor_i64:
2052 if (c2) {
2053 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2054 } else {
2055 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2056 }
4a136e0a
CF
2057 break;
2058
14b155dd
RH
2059 case INDEX_op_eqv_i32:
2060 a2 = (int32_t)a2;
2061 /* FALLTHRU */
2062 case INDEX_op_eqv_i64:
2063 if (c2) {
2064 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2065 } else {
2066 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2067 }
2068 break;
2069
2070 case INDEX_op_not_i64:
2071 case INDEX_op_not_i32:
2072 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2073 break;
2074
4a136e0a 2075 case INDEX_op_mul_i64:
4a136e0a 2076 case INDEX_op_mul_i32:
8678b71c
RH
2077 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2078 break;
2079
2080 case INDEX_op_div_i64:
2081 case INDEX_op_div_i32:
2082 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2083 break;
2084 case INDEX_op_divu_i64:
2085 case INDEX_op_divu_i32:
2086 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2087 break;
2088
2089 case INDEX_op_rem_i64:
2090 case INDEX_op_rem_i32:
2091 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
2092 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2093 break;
2094 case INDEX_op_remu_i64:
2095 case INDEX_op_remu_i32:
2096 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
2097 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
4a136e0a
CF
2098 break;
2099
2100 case INDEX_op_shl_i64:
4a136e0a 2101 case INDEX_op_shl_i32:
df9351e3 2102 if (c2) {
8d8db193 2103 tcg_out_shl(s, ext, a0, a1, a2);
df9351e3
RH
2104 } else {
2105 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
4a136e0a
CF
2106 }
2107 break;
2108
2109 case INDEX_op_shr_i64:
4a136e0a 2110 case INDEX_op_shr_i32:
df9351e3 2111 if (c2) {
8d8db193 2112 tcg_out_shr(s, ext, a0, a1, a2);
df9351e3
RH
2113 } else {
2114 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
4a136e0a
CF
2115 }
2116 break;
2117
2118 case INDEX_op_sar_i64:
4a136e0a 2119 case INDEX_op_sar_i32:
df9351e3 2120 if (c2) {
8d8db193 2121 tcg_out_sar(s, ext, a0, a1, a2);
df9351e3
RH
2122 } else {
2123 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
4a136e0a
CF
2124 }
2125 break;
2126
2127 case INDEX_op_rotr_i64:
4a136e0a 2128 case INDEX_op_rotr_i32:
df9351e3 2129 if (c2) {
8d8db193 2130 tcg_out_rotr(s, ext, a0, a1, a2);
df9351e3
RH
2131 } else {
2132 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
4a136e0a
CF
2133 }
2134 break;
2135
2136 case INDEX_op_rotl_i64:
df9351e3
RH
2137 case INDEX_op_rotl_i32:
2138 if (c2) {
8d8db193 2139 tcg_out_rotl(s, ext, a0, a1, a2);
4a136e0a 2140 } else {
50573c66 2141 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
df9351e3 2142 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
4a136e0a
CF
2143 }
2144 break;
2145
53c76c19
RH
2146 case INDEX_op_clz_i64:
2147 case INDEX_op_clz_i32:
2148 tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2149 break;
2150 case INDEX_op_ctz_i64:
2151 case INDEX_op_ctz_i32:
2152 tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2153 break;
2154
8d8db193 2155 case INDEX_op_brcond_i32:
90f1cd91
RH
2156 a1 = (int32_t)a1;
2157 /* FALLTHRU */
2158 case INDEX_op_brcond_i64:
bec16311 2159 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
4a136e0a
CF
2160 break;
2161
4a136e0a 2162 case INDEX_op_setcond_i32:
90f1cd91
RH
2163 a2 = (int32_t)a2;
2164 /* FALLTHRU */
2165 case INDEX_op_setcond_i64:
2166 tcg_out_cmp(s, ext, a1, a2, c2);
ed7a0aa8
RH
2167 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */
2168 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2169 TCG_REG_XZR, tcg_invert_cond(args[3]));
4a136e0a
CF
2170 break;
2171
04ce397b
RH
2172 case INDEX_op_movcond_i32:
2173 a2 = (int32_t)a2;
2174 /* FALLTHRU */
2175 case INDEX_op_movcond_i64:
2176 tcg_out_cmp(s, ext, a1, a2, c2);
2177 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2178 break;
2179
de61d14f
RH
2180 case INDEX_op_qemu_ld_i32:
2181 case INDEX_op_qemu_ld_i64:
59227d5d 2182 tcg_out_qemu_ld(s, a0, a1, a2, ext);
4a136e0a 2183 break;
de61d14f
RH
2184 case INDEX_op_qemu_st_i32:
2185 case INDEX_op_qemu_st_i64:
59227d5d 2186 tcg_out_qemu_st(s, REG0(0), a1, a2);
4a136e0a
CF
2187 break;
2188
f0293414 2189 case INDEX_op_bswap64_i64:
edd8824c
RH
2190 tcg_out_rev64(s, a0, a1);
2191 break;
2192 case INDEX_op_bswap32_i64:
9c4a059d 2193 case INDEX_op_bswap32_i32:
edd8824c 2194 tcg_out_rev32(s, a0, a1);
9c4a059d
CF
2195 break;
2196 case INDEX_op_bswap16_i64:
2197 case INDEX_op_bswap16_i32:
edd8824c 2198 tcg_out_rev16(s, a0, a1);
9c4a059d
CF
2199 break;
2200
31f1275b 2201 case INDEX_op_ext8s_i64:
31f1275b 2202 case INDEX_op_ext8s_i32:
929f8b55 2203 tcg_out_sxt(s, ext, MO_8, a0, a1);
31f1275b
CF
2204 break;
2205 case INDEX_op_ext16s_i64:
31f1275b 2206 case INDEX_op_ext16s_i32:
929f8b55 2207 tcg_out_sxt(s, ext, MO_16, a0, a1);
31f1275b 2208 break;
4f2331e5 2209 case INDEX_op_ext_i32_i64:
31f1275b 2210 case INDEX_op_ext32s_i64:
929f8b55 2211 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
31f1275b
CF
2212 break;
2213 case INDEX_op_ext8u_i64:
2214 case INDEX_op_ext8u_i32:
929f8b55 2215 tcg_out_uxt(s, MO_8, a0, a1);
31f1275b
CF
2216 break;
2217 case INDEX_op_ext16u_i64:
2218 case INDEX_op_ext16u_i32:
929f8b55 2219 tcg_out_uxt(s, MO_16, a0, a1);
31f1275b 2220 break;
4f2331e5 2221 case INDEX_op_extu_i32_i64:
31f1275b 2222 case INDEX_op_ext32u_i64:
929f8b55 2223 tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
31f1275b
CF
2224 break;
2225
b3c56df7
RH
2226 case INDEX_op_deposit_i64:
2227 case INDEX_op_deposit_i32:
2228 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2229 break;
2230
e2179f94
RH
2231 case INDEX_op_extract_i64:
2232 case INDEX_op_extract_i32:
2233 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2234 break;
2235
2236 case INDEX_op_sextract_i64:
2237 case INDEX_op_sextract_i32:
2238 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2239 break;
2240
464c2969
RH
2241 case INDEX_op_extract2_i64:
2242 case INDEX_op_extract2_i32:
1789d427 2243 tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
464c2969
RH
2244 break;
2245
c6e929e7
RH
2246 case INDEX_op_add2_i32:
2247 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2248 (int32_t)args[4], args[5], const_args[4],
2249 const_args[5], false);
2250 break;
2251 case INDEX_op_add2_i64:
2252 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2253 args[5], const_args[4], const_args[5], false);
2254 break;
2255 case INDEX_op_sub2_i32:
2256 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2257 (int32_t)args[4], args[5], const_args[4],
2258 const_args[5], true);
2259 break;
2260 case INDEX_op_sub2_i64:
2261 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2262 args[5], const_args[4], const_args[5], true);
2263 break;
2264
1fcc9ddf
RH
2265 case INDEX_op_muluh_i64:
2266 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2267 break;
2268 case INDEX_op_mulsh_i64:
2269 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2270 break;
2271
c7a59c2a
PK
2272 case INDEX_op_mb:
2273 tcg_out_mb(s, a0);
2274 break;
2275
96d0ee7f 2276 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
a51a6b6a 2277 case INDEX_op_mov_i64:
96d0ee7f 2278 case INDEX_op_call: /* Always emitted via tcg_out_call. */
4a136e0a 2279 default:
14e4c1e2 2280 g_assert_not_reached();
4a136e0a 2281 }
04ce397b
RH
2282
2283#undef REG0
4a136e0a
CF
2284}
2285
14e4c1e2
RH
2286static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2287 unsigned vecl, unsigned vece,
5e8892db
MR
2288 const TCGArg args[TCG_MAX_OP_ARGS],
2289 const int const_args[TCG_MAX_OP_ARGS])
14e4c1e2 2290{
d81bad24 2291 static const AArch64Insn cmp_vec_insn[16] = {
14e4c1e2
RH
2292 [TCG_COND_EQ] = I3616_CMEQ,
2293 [TCG_COND_GT] = I3616_CMGT,
2294 [TCG_COND_GE] = I3616_CMGE,
2295 [TCG_COND_GTU] = I3616_CMHI,
2296 [TCG_COND_GEU] = I3616_CMHS,
2297 };
d81bad24
RH
2298 static const AArch64Insn cmp_scalar_insn[16] = {
2299 [TCG_COND_EQ] = I3611_CMEQ,
2300 [TCG_COND_GT] = I3611_CMGT,
2301 [TCG_COND_GE] = I3611_CMGE,
2302 [TCG_COND_GTU] = I3611_CMHI,
2303 [TCG_COND_GEU] = I3611_CMHS,
2304 };
2305 static const AArch64Insn cmp0_vec_insn[16] = {
14e4c1e2
RH
2306 [TCG_COND_EQ] = I3617_CMEQ0,
2307 [TCG_COND_GT] = I3617_CMGT0,
2308 [TCG_COND_GE] = I3617_CMGE0,
2309 [TCG_COND_LT] = I3617_CMLT0,
2310 [TCG_COND_LE] = I3617_CMLE0,
2311 };
d81bad24
RH
2312 static const AArch64Insn cmp0_scalar_insn[16] = {
2313 [TCG_COND_EQ] = I3612_CMEQ0,
2314 [TCG_COND_GT] = I3612_CMGT0,
2315 [TCG_COND_GE] = I3612_CMGE0,
2316 [TCG_COND_LT] = I3612_CMLT0,
2317 [TCG_COND_LE] = I3612_CMLE0,
2318 };
14e4c1e2
RH
2319
2320 TCGType type = vecl + TCG_TYPE_V64;
2321 unsigned is_q = vecl;
d81bad24 2322 bool is_scalar = !is_q && vece == MO_64;
a9e434a5 2323 TCGArg a0, a1, a2, a3;
9e27f58b 2324 int cmode, imm8;
14e4c1e2
RH
2325
2326 a0 = args[0];
2327 a1 = args[1];
2328 a2 = args[2];
2329
2330 switch (opc) {
2331 case INDEX_op_ld_vec:
2332 tcg_out_ld(s, type, a0, a1, a2);
2333 break;
2334 case INDEX_op_st_vec:
2335 tcg_out_st(s, type, a0, a1, a2);
2336 break;
37ee55a0
RH
2337 case INDEX_op_dupm_vec:
2338 tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2339 break;
14e4c1e2 2340 case INDEX_op_add_vec:
d81bad24
RH
2341 if (is_scalar) {
2342 tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2);
2343 } else {
2344 tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2345 }
14e4c1e2
RH
2346 break;
2347 case INDEX_op_sub_vec:
d81bad24
RH
2348 if (is_scalar) {
2349 tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2);
2350 } else {
2351 tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2352 }
14e4c1e2
RH
2353 break;
2354 case INDEX_op_mul_vec:
2355 tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2356 break;
2357 case INDEX_op_neg_vec:
d81bad24
RH
2358 if (is_scalar) {
2359 tcg_out_insn(s, 3612, NEG, vece, a0, a1);
2360 } else {
2361 tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2362 }
14e4c1e2 2363 break;
a456394a 2364 case INDEX_op_abs_vec:
d81bad24
RH
2365 if (is_scalar) {
2366 tcg_out_insn(s, 3612, ABS, vece, a0, a1);
2367 } else {
2368 tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2369 }
a456394a 2370 break;
14e4c1e2 2371 case INDEX_op_and_vec:
9e27f58b
RH
2372 if (const_args[2]) {
2373 is_shimm1632(~a2, &cmode, &imm8);
2374 if (a0 == a1) {
2375 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2376 return;
2377 }
2378 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2379 a2 = a0;
2380 }
14e4c1e2
RH
2381 tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2382 break;
2383 case INDEX_op_or_vec:
9e27f58b
RH
2384 if (const_args[2]) {
2385 is_shimm1632(a2, &cmode, &imm8);
2386 if (a0 == a1) {
2387 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2388 return;
2389 }
2390 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2391 a2 = a0;
2392 }
14e4c1e2
RH
2393 tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2394 break;
14e4c1e2 2395 case INDEX_op_andc_vec:
9e27f58b
RH
2396 if (const_args[2]) {
2397 is_shimm1632(a2, &cmode, &imm8);
2398 if (a0 == a1) {
2399 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2400 return;
2401 }
2402 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2403 a2 = a0;
2404 }
14e4c1e2
RH
2405 tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2406 break;
2407 case INDEX_op_orc_vec:
9e27f58b
RH
2408 if (const_args[2]) {
2409 is_shimm1632(~a2, &cmode, &imm8);
2410 if (a0 == a1) {
2411 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2412 return;
2413 }
2414 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2415 a2 = a0;
2416 }
14e4c1e2
RH
2417 tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2418 break;
9e27f58b
RH
2419 case INDEX_op_xor_vec:
2420 tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2421 break;
d32648d4 2422 case INDEX_op_ssadd_vec:
d81bad24
RH
2423 if (is_scalar) {
2424 tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2);
2425 } else {
2426 tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2427 }
d32648d4
RH
2428 break;
2429 case INDEX_op_sssub_vec:
d81bad24
RH
2430 if (is_scalar) {
2431 tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2);
2432 } else {
2433 tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2434 }
d32648d4
RH
2435 break;
2436 case INDEX_op_usadd_vec:
d81bad24
RH
2437 if (is_scalar) {
2438 tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2);
2439 } else {
2440 tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2441 }
d32648d4
RH
2442 break;
2443 case INDEX_op_ussub_vec:
d81bad24
RH
2444 if (is_scalar) {
2445 tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2);
2446 } else {
2447 tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2448 }
d32648d4 2449 break;
93f332a5
RH
2450 case INDEX_op_smax_vec:
2451 tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2452 break;
2453 case INDEX_op_smin_vec:
2454 tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2455 break;
2456 case INDEX_op_umax_vec:
2457 tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2458 break;
2459 case INDEX_op_umin_vec:
2460 tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2461 break;
14e4c1e2
RH
2462 case INDEX_op_not_vec:
2463 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2464 break;
14e4c1e2 2465 case INDEX_op_shli_vec:
d81bad24
RH
2466 if (is_scalar) {
2467 tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece));
2468 } else {
2469 tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2470 }
14e4c1e2
RH
2471 break;
2472 case INDEX_op_shri_vec:
d81bad24
RH
2473 if (is_scalar) {
2474 tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2);
2475 } else {
2476 tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2477 }
14e4c1e2
RH
2478 break;
2479 case INDEX_op_sari_vec:
d81bad24
RH
2480 if (is_scalar) {
2481 tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2);
2482 } else {
2483 tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2484 }
14e4c1e2 2485 break;
7cff8988 2486 case INDEX_op_aa64_sli_vec:
d81bad24
RH
2487 if (is_scalar) {
2488 tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece));
2489 } else {
2490 tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2491 }
7cff8988 2492 break;
79525dfd 2493 case INDEX_op_shlv_vec:
d81bad24
RH
2494 if (is_scalar) {
2495 tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2);
2496 } else {
2497 tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2498 }
79525dfd
RH
2499 break;
2500 case INDEX_op_aa64_sshl_vec:
d81bad24
RH
2501 if (is_scalar) {
2502 tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2);
2503 } else {
2504 tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2505 }
79525dfd 2506 break;
14e4c1e2
RH
2507 case INDEX_op_cmp_vec:
2508 {
2509 TCGCond cond = args[3];
2510 AArch64Insn insn;
2511
2512 if (cond == TCG_COND_NE) {
2513 if (const_args[2]) {
d81bad24
RH
2514 if (is_scalar) {
2515 tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1);
2516 } else {
2517 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2518 }
14e4c1e2 2519 } else {
d81bad24
RH
2520 if (is_scalar) {
2521 tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2);
2522 } else {
2523 tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2524 }
14e4c1e2
RH
2525 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2526 }
2527 } else {
2528 if (const_args[2]) {
d81bad24
RH
2529 if (is_scalar) {
2530 insn = cmp0_scalar_insn[cond];
2531 if (insn) {
2532 tcg_out_insn_3612(s, insn, vece, a0, a1);
2533 break;
2534 }
2535 } else {
2536 insn = cmp0_vec_insn[cond];
2537 if (insn) {
2538 tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2539 break;
2540 }
14e4c1e2 2541 }
4e186175 2542 tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
14e4c1e2
RH
2543 a2 = TCG_VEC_TMP;
2544 }
d81bad24
RH
2545 if (is_scalar) {
2546 insn = cmp_scalar_insn[cond];
2547 if (insn == 0) {
2548 TCGArg t;
2549 t = a1, a1 = a2, a2 = t;
2550 cond = tcg_swap_cond(cond);
2551 insn = cmp_scalar_insn[cond];
2552 tcg_debug_assert(insn != 0);
2553 }
2554 tcg_out_insn_3611(s, insn, vece, a0, a1, a2);
2555 } else {
2556 insn = cmp_vec_insn[cond];
2557 if (insn == 0) {
2558 TCGArg t;
2559 t = a1, a1 = a2, a2 = t;
2560 cond = tcg_swap_cond(cond);
2561 insn = cmp_vec_insn[cond];
2562 tcg_debug_assert(insn != 0);
2563 }
2564 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
14e4c1e2 2565 }
14e4c1e2
RH
2566 }
2567 }
2568 break;
bab1671f 2569
a9e434a5
RH
2570 case INDEX_op_bitsel_vec:
2571 a3 = args[3];
2572 if (a0 == a3) {
2573 tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2574 } else if (a0 == a2) {
2575 tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2576 } else {
2577 if (a0 != a1) {
2578 tcg_out_mov(s, type, a0, a1);
2579 }
2580 tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2581 }
2582 break;
2583
bab1671f 2584 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
bab1671f 2585 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
14e4c1e2
RH
2586 default:
2587 g_assert_not_reached();
2588 }
2589}
2590
2591int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2592{
2593 switch (opc) {
2594 case INDEX_op_add_vec:
2595 case INDEX_op_sub_vec:
14e4c1e2
RH
2596 case INDEX_op_and_vec:
2597 case INDEX_op_or_vec:
2598 case INDEX_op_xor_vec:
2599 case INDEX_op_andc_vec:
2600 case INDEX_op_orc_vec:
2601 case INDEX_op_neg_vec:
a456394a 2602 case INDEX_op_abs_vec:
14e4c1e2
RH
2603 case INDEX_op_not_vec:
2604 case INDEX_op_cmp_vec:
2605 case INDEX_op_shli_vec:
2606 case INDEX_op_shri_vec:
2607 case INDEX_op_sari_vec:
d32648d4
RH
2608 case INDEX_op_ssadd_vec:
2609 case INDEX_op_sssub_vec:
2610 case INDEX_op_usadd_vec:
2611 case INDEX_op_ussub_vec:
79525dfd 2612 case INDEX_op_shlv_vec:
a9e434a5 2613 case INDEX_op_bitsel_vec:
14e4c1e2 2614 return 1;
7cff8988 2615 case INDEX_op_rotli_vec:
79525dfd
RH
2616 case INDEX_op_shrv_vec:
2617 case INDEX_op_sarv_vec:
7cff8988
RH
2618 case INDEX_op_rotlv_vec:
2619 case INDEX_op_rotrv_vec:
79525dfd 2620 return -1;
e65a5f22 2621 case INDEX_op_mul_vec:
a7b6d286
RH
2622 case INDEX_op_smax_vec:
2623 case INDEX_op_smin_vec:
2624 case INDEX_op_umax_vec:
2625 case INDEX_op_umin_vec:
e65a5f22 2626 return vece < MO_64;
14e4c1e2
RH
2627
2628 default:
2629 return 0;
2630 }
2631}
2632
2633void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2634 TCGArg a0, ...)
2635{
79525dfd 2636 va_list va;
10061ffe 2637 TCGv_vec v0, v1, v2, t1, t2, c1;
7cff8988 2638 TCGArg a2;
79525dfd
RH
2639
2640 va_start(va, a0);
2641 v0 = temp_tcgv_vec(arg_temp(a0));
2642 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
7cff8988 2643 a2 = va_arg(va, TCGArg);
2dfa2f18 2644 va_end(va);
79525dfd
RH
2645
2646 switch (opc) {
7cff8988
RH
2647 case INDEX_op_rotli_vec:
2648 t1 = tcg_temp_new_vec(type);
2649 tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
2650 vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
2651 tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
2652 tcg_temp_free_vec(t1);
2653 break;
2654
79525dfd
RH
2655 case INDEX_op_shrv_vec:
2656 case INDEX_op_sarv_vec:
2657 /* Right shifts are negative left shifts for AArch64. */
2dfa2f18 2658 v2 = temp_tcgv_vec(arg_temp(a2));
79525dfd
RH
2659 t1 = tcg_temp_new_vec(type);
2660 tcg_gen_neg_vec(vece, t1, v2);
2661 opc = (opc == INDEX_op_shrv_vec
2662 ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2663 vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2664 tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2665 tcg_temp_free_vec(t1);
2666 break;
2667
7cff8988 2668 case INDEX_op_rotlv_vec:
2dfa2f18 2669 v2 = temp_tcgv_vec(arg_temp(a2));
7cff8988 2670 t1 = tcg_temp_new_vec(type);
10061ffe
RH
2671 c1 = tcg_constant_vec(type, vece, 8 << vece);
2672 tcg_gen_sub_vec(vece, t1, v2, c1);
7cff8988
RH
2673 /* Right shifts are negative left shifts for AArch64. */
2674 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2675 tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2676 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
2677 tcgv_vec_arg(v1), tcgv_vec_arg(v2));
2678 tcg_gen_or_vec(vece, v0, v0, t1);
2679 tcg_temp_free_vec(t1);
2680 break;
2681
2682 case INDEX_op_rotrv_vec:
2dfa2f18 2683 v2 = temp_tcgv_vec(arg_temp(a2));
7cff8988
RH
2684 t1 = tcg_temp_new_vec(type);
2685 t2 = tcg_temp_new_vec(type);
10061ffe 2686 c1 = tcg_constant_vec(type, vece, 8 << vece);
7cff8988 2687 tcg_gen_neg_vec(vece, t1, v2);
10061ffe 2688 tcg_gen_sub_vec(vece, t2, c1, v2);
7cff8988
RH
2689 /* Right shifts are negative left shifts for AArch64. */
2690 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2691 tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2692 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
2693 tcgv_vec_arg(v1), tcgv_vec_arg(t2));
2694 tcg_gen_or_vec(vece, v0, t1, t2);
2695 tcg_temp_free_vec(t1);
2696 tcg_temp_free_vec(t2);
2697 break;
2698
79525dfd
RH
2699 default:
2700 g_assert_not_reached();
2701 }
14e4c1e2
RH
2702}
2703
39e7522b
RH
2704static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
2705{
1897cc2e
RH
2706 switch (op) {
2707 case INDEX_op_goto_ptr:
39e7522b 2708 return C_O0_I1(r);
f69d277e 2709
1897cc2e
RH
2710 case INDEX_op_ld8u_i32:
2711 case INDEX_op_ld8s_i32:
2712 case INDEX_op_ld16u_i32:
2713 case INDEX_op_ld16s_i32:
2714 case INDEX_op_ld_i32:
2715 case INDEX_op_ld8u_i64:
2716 case INDEX_op_ld8s_i64:
2717 case INDEX_op_ld16u_i64:
2718 case INDEX_op_ld16s_i64:
2719 case INDEX_op_ld32u_i64:
2720 case INDEX_op_ld32s_i64:
2721 case INDEX_op_ld_i64:
2722 case INDEX_op_neg_i32:
2723 case INDEX_op_neg_i64:
2724 case INDEX_op_not_i32:
2725 case INDEX_op_not_i64:
2726 case INDEX_op_bswap16_i32:
2727 case INDEX_op_bswap32_i32:
2728 case INDEX_op_bswap16_i64:
2729 case INDEX_op_bswap32_i64:
2730 case INDEX_op_bswap64_i64:
2731 case INDEX_op_ext8s_i32:
2732 case INDEX_op_ext16s_i32:
2733 case INDEX_op_ext8u_i32:
2734 case INDEX_op_ext16u_i32:
2735 case INDEX_op_ext8s_i64:
2736 case INDEX_op_ext16s_i64:
2737 case INDEX_op_ext32s_i64:
2738 case INDEX_op_ext8u_i64:
2739 case INDEX_op_ext16u_i64:
2740 case INDEX_op_ext32u_i64:
2741 case INDEX_op_ext_i32_i64:
2742 case INDEX_op_extu_i32_i64:
2743 case INDEX_op_extract_i32:
2744 case INDEX_op_extract_i64:
2745 case INDEX_op_sextract_i32:
2746 case INDEX_op_sextract_i64:
39e7522b 2747 return C_O1_I1(r, r);
1897cc2e
RH
2748
2749 case INDEX_op_st8_i32:
2750 case INDEX_op_st16_i32:
2751 case INDEX_op_st_i32:
2752 case INDEX_op_st8_i64:
2753 case INDEX_op_st16_i64:
2754 case INDEX_op_st32_i64:
2755 case INDEX_op_st_i64:
39e7522b 2756 return C_O0_I2(rZ, r);
1897cc2e
RH
2757
2758 case INDEX_op_add_i32:
2759 case INDEX_op_add_i64:
2760 case INDEX_op_sub_i32:
2761 case INDEX_op_sub_i64:
2762 case INDEX_op_setcond_i32:
2763 case INDEX_op_setcond_i64:
39e7522b 2764 return C_O1_I2(r, r, rA);
1897cc2e
RH
2765
2766 case INDEX_op_mul_i32:
2767 case INDEX_op_mul_i64:
2768 case INDEX_op_div_i32:
2769 case INDEX_op_div_i64:
2770 case INDEX_op_divu_i32:
2771 case INDEX_op_divu_i64:
2772 case INDEX_op_rem_i32:
2773 case INDEX_op_rem_i64:
2774 case INDEX_op_remu_i32:
2775 case INDEX_op_remu_i64:
2776 case INDEX_op_muluh_i64:
2777 case INDEX_op_mulsh_i64:
39e7522b 2778 return C_O1_I2(r, r, r);
1897cc2e
RH
2779
2780 case INDEX_op_and_i32:
2781 case INDEX_op_and_i64:
2782 case INDEX_op_or_i32:
2783 case INDEX_op_or_i64:
2784 case INDEX_op_xor_i32:
2785 case INDEX_op_xor_i64:
2786 case INDEX_op_andc_i32:
2787 case INDEX_op_andc_i64:
2788 case INDEX_op_orc_i32:
2789 case INDEX_op_orc_i64:
2790 case INDEX_op_eqv_i32:
2791 case INDEX_op_eqv_i64:
39e7522b 2792 return C_O1_I2(r, r, rL);
1897cc2e
RH
2793
2794 case INDEX_op_shl_i32:
2795 case INDEX_op_shr_i32:
2796 case INDEX_op_sar_i32:
2797 case INDEX_op_rotl_i32:
2798 case INDEX_op_rotr_i32:
2799 case INDEX_op_shl_i64:
2800 case INDEX_op_shr_i64:
2801 case INDEX_op_sar_i64:
2802 case INDEX_op_rotl_i64:
2803 case INDEX_op_rotr_i64:
39e7522b 2804 return C_O1_I2(r, r, ri);
1897cc2e
RH
2805
2806 case INDEX_op_clz_i32:
2807 case INDEX_op_ctz_i32:
2808 case INDEX_op_clz_i64:
2809 case INDEX_op_ctz_i64:
39e7522b 2810 return C_O1_I2(r, r, rAL);
1897cc2e
RH
2811
2812 case INDEX_op_brcond_i32:
2813 case INDEX_op_brcond_i64:
39e7522b 2814 return C_O0_I2(r, rA);
1897cc2e
RH
2815
2816 case INDEX_op_movcond_i32:
2817 case INDEX_op_movcond_i64:
39e7522b 2818 return C_O1_I4(r, r, rA, rZ, rZ);
1897cc2e
RH
2819
2820 case INDEX_op_qemu_ld_i32:
2821 case INDEX_op_qemu_ld_i64:
39e7522b 2822 return C_O1_I1(r, l);
1897cc2e
RH
2823 case INDEX_op_qemu_st_i32:
2824 case INDEX_op_qemu_st_i64:
39e7522b 2825 return C_O0_I2(lZ, l);
1897cc2e
RH
2826
2827 case INDEX_op_deposit_i32:
2828 case INDEX_op_deposit_i64:
39e7522b 2829 return C_O1_I2(r, 0, rZ);
1897cc2e 2830
464c2969
RH
2831 case INDEX_op_extract2_i32:
2832 case INDEX_op_extract2_i64:
39e7522b 2833 return C_O1_I2(r, rZ, rZ);
464c2969 2834
1897cc2e
RH
2835 case INDEX_op_add2_i32:
2836 case INDEX_op_add2_i64:
2837 case INDEX_op_sub2_i32:
2838 case INDEX_op_sub2_i64:
39e7522b 2839 return C_O2_I4(r, r, rZ, rZ, rA, rMZ);
1897cc2e 2840
14e4c1e2
RH
2841 case INDEX_op_add_vec:
2842 case INDEX_op_sub_vec:
2843 case INDEX_op_mul_vec:
14e4c1e2 2844 case INDEX_op_xor_vec:
d32648d4
RH
2845 case INDEX_op_ssadd_vec:
2846 case INDEX_op_sssub_vec:
2847 case INDEX_op_usadd_vec:
2848 case INDEX_op_ussub_vec:
93f332a5
RH
2849 case INDEX_op_smax_vec:
2850 case INDEX_op_smin_vec:
2851 case INDEX_op_umax_vec:
2852 case INDEX_op_umin_vec:
79525dfd
RH
2853 case INDEX_op_shlv_vec:
2854 case INDEX_op_shrv_vec:
2855 case INDEX_op_sarv_vec:
2856 case INDEX_op_aa64_sshl_vec:
39e7522b 2857 return C_O1_I2(w, w, w);
14e4c1e2
RH
2858 case INDEX_op_not_vec:
2859 case INDEX_op_neg_vec:
a456394a 2860 case INDEX_op_abs_vec:
14e4c1e2
RH
2861 case INDEX_op_shli_vec:
2862 case INDEX_op_shri_vec:
2863 case INDEX_op_sari_vec:
39e7522b 2864 return C_O1_I1(w, w);
14e4c1e2 2865 case INDEX_op_ld_vec:
37ee55a0 2866 case INDEX_op_dupm_vec:
39e7522b
RH
2867 return C_O1_I1(w, r);
2868 case INDEX_op_st_vec:
2869 return C_O0_I2(w, r);
14e4c1e2 2870 case INDEX_op_dup_vec:
39e7522b 2871 return C_O1_I1(w, wr);
9e27f58b
RH
2872 case INDEX_op_or_vec:
2873 case INDEX_op_andc_vec:
39e7522b 2874 return C_O1_I2(w, w, wO);
9e27f58b
RH
2875 case INDEX_op_and_vec:
2876 case INDEX_op_orc_vec:
39e7522b 2877 return C_O1_I2(w, w, wN);
14e4c1e2 2878 case INDEX_op_cmp_vec:
39e7522b 2879 return C_O1_I2(w, w, wZ);
a9e434a5 2880 case INDEX_op_bitsel_vec:
39e7522b 2881 return C_O1_I3(w, w, w, w);
7cff8988 2882 case INDEX_op_aa64_sli_vec:
39e7522b 2883 return C_O1_I2(w, 0, w);
14e4c1e2 2884
1897cc2e 2885 default:
39e7522b 2886 g_assert_not_reached();
f69d277e 2887 }
f69d277e
RH
2888}
2889
4a136e0a
CF
2890static void tcg_target_init(TCGContext *s)
2891{
f46934df
RH
2892 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2893 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
14e4c1e2
RH
2894 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2895 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
f46934df 2896
14e4c1e2 2897 tcg_target_call_clobber_regs = -1ull;
f46934df
RH
2898 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2899 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2900 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2901 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2902 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2903 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2904 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2905 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2906 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2907 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2908 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
14e4c1e2
RH
2909 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2910 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2911 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2912 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2913 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2914 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2915 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2916 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
4a136e0a 2917
ccb1bb66 2918 s->reserved_regs = 0;
4a136e0a
CF
2919 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2920 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2921 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2922 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
14e4c1e2 2923 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
4a136e0a
CF
2924}
2925
38d195aa
RH
2926/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
2927#define PUSH_SIZE ((30 - 19 + 1) * 8)
2928
2929#define FRAME_SIZE \
2930 ((PUSH_SIZE \
2931 + TCG_STATIC_CALL_ARGS_SIZE \
2932 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2933 + TCG_TARGET_STACK_ALIGN - 1) \
2934 & ~(TCG_TARGET_STACK_ALIGN - 1))
2935
2936/* We're expecting a 2 byte uleb128 encoded value. */
2937QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2938
2939/* We're expecting to use a single ADDI insn. */
2940QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
2941
4a136e0a
CF
2942static void tcg_target_qemu_prologue(TCGContext *s)
2943{
4a136e0a
CF
2944 TCGReg r;
2945
95f72aa9
RH
2946 /* Push (FP, LR) and allocate space for all saved registers. */
2947 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
38d195aa 2948 TCG_REG_SP, -PUSH_SIZE, 1, 1);
4a136e0a 2949
d82b78e4 2950 /* Set up frame pointer for canonical unwinding. */
929f8b55 2951 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
4a136e0a 2952
d82b78e4 2953 /* Store callee-preserved regs x19..x28. */
4a136e0a 2954 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
95f72aa9
RH
2955 int ofs = (r - TCG_REG_X19 + 2) * 8;
2956 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
4a136e0a
CF
2957 }
2958
096c46c0
RH
2959 /* Make stack space for TCG locals. */
2960 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
38d195aa 2961 FRAME_SIZE - PUSH_SIZE);
096c46c0 2962
95f72aa9 2963 /* Inform TCG about how to find TCG locals with register, offset, size. */
4a136e0a
CF
2964 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
2965 CPU_TEMP_BUF_NLONGS * sizeof(long));
2966
4cbea598 2967#if !defined(CONFIG_SOFTMMU)
352bcb0a 2968 if (USE_GUEST_BASE) {
b76f21a7 2969 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
6a91c7c9
JK
2970 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
2971 }
2972#endif
2973
4a136e0a 2974 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
81d8a5ee 2975 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
4a136e0a 2976
b19f0c2e
RH
2977 /*
2978 * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2979 * and fall through to the rest of the epilogue.
2980 */
c8bc1168 2981 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
b19f0c2e
RH
2982 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
2983
2984 /* TB epilogue */
ffba3eb3 2985 tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
4a136e0a 2986
096c46c0
RH
2987 /* Remove TCG locals stack space. */
2988 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
38d195aa 2989 FRAME_SIZE - PUSH_SIZE);
4a136e0a 2990
95f72aa9 2991 /* Restore registers x19..x28. */
4a136e0a 2992 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
95f72aa9
RH
2993 int ofs = (r - TCG_REG_X19 + 2) * 8;
2994 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
4a136e0a
CF
2995 }
2996
95f72aa9
RH
2997 /* Pop (FP, LR), restore SP to previous frame. */
2998 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
38d195aa 2999 TCG_REG_SP, PUSH_SIZE, 0, 1);
81d8a5ee 3000 tcg_out_insn(s, 3207, RET, TCG_REG_LR);
4a136e0a 3001}
38d195aa 3002
55129955
RH
3003static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3004{
3005 int i;
3006 for (i = 0; i < count; ++i) {
3007 p[i] = NOP;
3008 }
3009}
3010
38d195aa 3011typedef struct {
3d9bddb3 3012 DebugFrameHeader h;
38d195aa
RH
3013 uint8_t fde_def_cfa[4];
3014 uint8_t fde_reg_ofs[24];
3015} DebugFrame;
3016
3017#define ELF_HOST_MACHINE EM_AARCH64
3018
3d9bddb3
RH
3019static const DebugFrame debug_frame = {
3020 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3021 .h.cie.id = -1,
3022 .h.cie.version = 1,
3023 .h.cie.code_align = 1,
3024 .h.cie.data_align = 0x78, /* sleb128 -8 */
3025 .h.cie.return_column = TCG_REG_LR,
38d195aa
RH
3026
3027 /* Total FDE size does not include the "len" member. */
3d9bddb3 3028 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
38d195aa
RH
3029
3030 .fde_def_cfa = {
3031 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */
3032 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
3033 (FRAME_SIZE >> 7)
3034 },
3035 .fde_reg_ofs = {
3036 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */
3037 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */
3038 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */
3039 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */
3040 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */
3041 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */
3042 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */
3043 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */
3044 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */
3045 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */
3046 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */
3047 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */
3048 }
3049};
3050
755bf9e5 3051void tcg_register_jit(const void *buf, size_t buf_size)
38d195aa 3052{
38d195aa
RH
3053 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3054}