]> git.proxmox.com Git - mirror_qemu.git/blame - tcg/aarch64/tcg-target.inc.c
tcg/i386: Use umin/umax in expanding unsigned compare
[mirror_qemu.git] / tcg / aarch64 / tcg-target.inc.c
CommitLineData
4a136e0a
CF
1/*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
55129955 13#include "tcg-pool.inc.c"
4a136e0a
CF
14#include "qemu/bitops.h"
15
7763ffa0
RH
16/* We're going to re-use TCGType in setting of the SF bit, which controls
17 the size of the operation performed. If we know the values match, it
18 makes things much cleaner. */
19QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
20
8d8fdbae 21#ifdef CONFIG_DEBUG_TCG
4a136e0a 22static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
14e4c1e2
RH
23 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
24 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
25 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
26 "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
27
28 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
29 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
30 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
31 "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
4a136e0a 32};
8d8fdbae 33#endif /* CONFIG_DEBUG_TCG */
4a136e0a
CF
34
35static const int tcg_target_reg_alloc_order[] = {
36 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
37 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
b76f21a7 38 TCG_REG_X28, /* we will reserve this for guest_base if configured */
4a136e0a 39
d82b78e4
RH
40 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
41 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
4a136e0a
CF
42 TCG_REG_X16, TCG_REG_X17,
43
4a136e0a
CF
44 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
45 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
46
d82b78e4
RH
47 /* X18 reserved by system */
48 /* X19 reserved for AREG0 */
49 /* X29 reserved as fp */
50 /* X30 reserved as temporary */
14e4c1e2
RH
51
52 TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
53 TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
54 /* V8 - V15 are call-saved, and skipped. */
55 TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
56 TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
57 TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
58 TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
4a136e0a
CF
59};
60
61static const int tcg_target_call_iarg_regs[8] = {
62 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
63 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
64};
65static const int tcg_target_call_oarg_regs[1] = {
66 TCG_REG_X0
67};
68
d82b78e4 69#define TCG_REG_TMP TCG_REG_X30
14e4c1e2 70#define TCG_VEC_TMP TCG_REG_V31
4a136e0a 71
6a91c7c9 72#ifndef CONFIG_SOFTMMU
352bcb0a
RH
73/* Note that XZR cannot be encoded in the address base register slot,
74 as that actaully encodes SP. So if we need to zero-extend the guest
75 address, via the address index register slot, we need to load even
76 a zero guest base into a register. */
77#define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32)
4cbea598 78#define TCG_REG_GUEST_BASE TCG_REG_X28
6a91c7c9
JK
79#endif
80
214bfe83 81static inline bool reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
4a136e0a 82{
8587c30c 83 ptrdiff_t offset = target - code_ptr;
214bfe83
RH
84 if (offset == sextract64(offset, 0, 26)) {
85 /* read instruction, mask away previous PC_REL26 parameter contents,
86 set the proper offset, then write back the instruction. */
87 *code_ptr = deposit32(*code_ptr, 0, 26, offset);
88 return true;
89 }
90 return false;
4a136e0a
CF
91}
92
214bfe83 93static inline bool reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
4a136e0a 94{
8587c30c 95 ptrdiff_t offset = target - code_ptr;
214bfe83
RH
96 if (offset == sextract64(offset, 0, 19)) {
97 *code_ptr = deposit32(*code_ptr, 5, 19, offset);
98 return true;
99 }
100 return false;
4a136e0a
CF
101}
102
6ac17786 103static inline bool patch_reloc(tcg_insn_unit *code_ptr, int type,
2ba7fae2 104 intptr_t value, intptr_t addend)
4a136e0a 105{
eabb7b91 106 tcg_debug_assert(addend == 0);
4a136e0a
CF
107 switch (type) {
108 case R_AARCH64_JUMP26:
109 case R_AARCH64_CALL26:
214bfe83 110 return reloc_pc26(code_ptr, (tcg_insn_unit *)value);
4a136e0a 111 case R_AARCH64_CONDBR19:
214bfe83 112 return reloc_pc19(code_ptr, (tcg_insn_unit *)value);
4a136e0a 113 default:
214bfe83 114 g_assert_not_reached();
4a136e0a
CF
115 }
116}
117
170bf931
RH
118#define TCG_CT_CONST_AIMM 0x100
119#define TCG_CT_CONST_LIMM 0x200
120#define TCG_CT_CONST_ZERO 0x400
121#define TCG_CT_CONST_MONE 0x800
90f1cd91 122
4a136e0a 123/* parse target specific constraints */
069ea736
RH
124static const char *target_parse_constraint(TCGArgConstraint *ct,
125 const char *ct_str, TCGType type)
4a136e0a 126{
069ea736 127 switch (*ct_str++) {
14e4c1e2 128 case 'r': /* general registers */
4a136e0a 129 ct->ct |= TCG_CT_REG;
14e4c1e2
RH
130 ct->u.regs |= 0xffffffffu;
131 break;
132 case 'w': /* advsimd registers */
133 ct->ct |= TCG_CT_REG;
134 ct->u.regs |= 0xffffffff00000000ull;
4a136e0a
CF
135 break;
136 case 'l': /* qemu_ld / qemu_st address, data_reg */
137 ct->ct |= TCG_CT_REG;
f46934df 138 ct->u.regs = 0xffffffffu;
4a136e0a
CF
139#ifdef CONFIG_SOFTMMU
140 /* x0 and x1 will be overwritten when reading the tlb entry,
141 and x2, and x3 for helper args, better to avoid using them. */
142 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
143 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
144 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
145 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
146#endif
147 break;
90f1cd91
RH
148 case 'A': /* Valid for arithmetic immediate (positive or negative). */
149 ct->ct |= TCG_CT_CONST_AIMM;
150 break;
e029f293
RH
151 case 'L': /* Valid for logical immediate. */
152 ct->ct |= TCG_CT_CONST_LIMM;
153 break;
c6e929e7
RH
154 case 'M': /* minus one */
155 ct->ct |= TCG_CT_CONST_MONE;
156 break;
04ce397b
RH
157 case 'Z': /* zero */
158 ct->ct |= TCG_CT_CONST_ZERO;
159 break;
4a136e0a 160 default:
069ea736 161 return NULL;
4a136e0a 162 }
069ea736 163 return ct_str;
4a136e0a
CF
164}
165
14e4c1e2 166/* Match a constant valid for addition (12-bit, optionally shifted). */
90f1cd91
RH
167static inline bool is_aimm(uint64_t val)
168{
169 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
170}
171
14e4c1e2 172/* Match a constant valid for logical operations. */
e029f293
RH
173static inline bool is_limm(uint64_t val)
174{
175 /* Taking a simplified view of the logical immediates for now, ignoring
176 the replication that can happen across the field. Match bit patterns
177 of the forms
178 0....01....1
179 0..01..10..0
180 and their inverses. */
181
182 /* Make things easier below, by testing the form with msb clear. */
183 if ((int64_t)val < 0) {
184 val = ~val;
185 }
186 if (val == 0) {
187 return false;
188 }
189 val += val & -val;
190 return (val & (val - 1)) == 0;
191}
192
14e4c1e2
RH
193/* Match a constant that is valid for vectors. */
194static bool is_fimm(uint64_t v64, int *op, int *cmode, int *imm8)
195{
196 int i;
197
198 *op = 0;
199 /* Match replication across 8 bits. */
200 if (v64 == dup_const(MO_8, v64)) {
201 *cmode = 0xe;
202 *imm8 = v64 & 0xff;
203 return true;
204 }
205 /* Match replication across 16 bits. */
206 if (v64 == dup_const(MO_16, v64)) {
207 uint16_t v16 = v64;
208
209 if (v16 == (v16 & 0xff)) {
210 *cmode = 0x8;
211 *imm8 = v16 & 0xff;
212 return true;
213 } else if (v16 == (v16 & 0xff00)) {
214 *cmode = 0xa;
215 *imm8 = v16 >> 8;
216 return true;
217 }
218 }
219 /* Match replication across 32 bits. */
220 if (v64 == dup_const(MO_32, v64)) {
221 uint32_t v32 = v64;
222
223 if (v32 == (v32 & 0xff)) {
224 *cmode = 0x0;
225 *imm8 = v32 & 0xff;
226 return true;
227 } else if (v32 == (v32 & 0xff00)) {
228 *cmode = 0x2;
229 *imm8 = (v32 >> 8) & 0xff;
230 return true;
231 } else if (v32 == (v32 & 0xff0000)) {
232 *cmode = 0x4;
233 *imm8 = (v32 >> 16) & 0xff;
234 return true;
235 } else if (v32 == (v32 & 0xff000000)) {
236 *cmode = 0x6;
237 *imm8 = v32 >> 24;
238 return true;
239 } else if ((v32 & 0xffff00ff) == 0xff) {
240 *cmode = 0xc;
241 *imm8 = (v32 >> 8) & 0xff;
242 return true;
243 } else if ((v32 & 0xff00ffff) == 0xffff) {
244 *cmode = 0xd;
245 *imm8 = (v32 >> 16) & 0xff;
246 return true;
247 }
248 /* Match forms of a float32. */
249 if (extract32(v32, 0, 19) == 0
250 && (extract32(v32, 25, 6) == 0x20
251 || extract32(v32, 25, 6) == 0x1f)) {
252 *cmode = 0xf;
253 *imm8 = (extract32(v32, 31, 1) << 7)
254 | (extract32(v32, 25, 1) << 6)
255 | extract32(v32, 19, 6);
256 return true;
257 }
258 }
259 /* Match forms of a float64. */
260 if (extract64(v64, 0, 48) == 0
261 && (extract64(v64, 54, 9) == 0x100
262 || extract64(v64, 54, 9) == 0x0ff)) {
263 *cmode = 0xf;
264 *op = 1;
265 *imm8 = (extract64(v64, 63, 1) << 7)
266 | (extract64(v64, 54, 1) << 6)
267 | extract64(v64, 48, 6);
268 return true;
269 }
270 /* Match bytes of 0x00 and 0xff. */
271 for (i = 0; i < 64; i += 8) {
272 uint64_t byte = extract64(v64, i, 8);
273 if (byte != 0 && byte != 0xff) {
274 break;
275 }
276 }
277 if (i == 64) {
278 *cmode = 0xe;
279 *op = 1;
280 *imm8 = (extract64(v64, 0, 1) << 0)
281 | (extract64(v64, 8, 1) << 1)
282 | (extract64(v64, 16, 1) << 2)
283 | (extract64(v64, 24, 1) << 3)
284 | (extract64(v64, 32, 1) << 4)
285 | (extract64(v64, 40, 1) << 5)
286 | (extract64(v64, 48, 1) << 6)
287 | (extract64(v64, 56, 1) << 7);
288 return true;
289 }
290 return false;
291}
292
f6c6afc1 293static int tcg_target_const_match(tcg_target_long val, TCGType type,
90f1cd91 294 const TCGArgConstraint *arg_ct)
4a136e0a
CF
295{
296 int ct = arg_ct->ct;
297
298 if (ct & TCG_CT_CONST) {
299 return 1;
300 }
170bf931 301 if (type == TCG_TYPE_I32) {
90f1cd91
RH
302 val = (int32_t)val;
303 }
304 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
305 return 1;
306 }
e029f293
RH
307 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
308 return 1;
309 }
04ce397b
RH
310 if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
311 return 1;
312 }
c6e929e7
RH
313 if ((ct & TCG_CT_CONST_MONE) && val == -1) {
314 return 1;
315 }
4a136e0a
CF
316
317 return 0;
318}
319
320enum aarch64_cond_code {
321 COND_EQ = 0x0,
322 COND_NE = 0x1,
323 COND_CS = 0x2, /* Unsigned greater or equal */
324 COND_HS = COND_CS, /* ALIAS greater or equal */
325 COND_CC = 0x3, /* Unsigned less than */
326 COND_LO = COND_CC, /* ALIAS Lower */
327 COND_MI = 0x4, /* Negative */
328 COND_PL = 0x5, /* Zero or greater */
329 COND_VS = 0x6, /* Overflow */
330 COND_VC = 0x7, /* No overflow */
331 COND_HI = 0x8, /* Unsigned greater than */
332 COND_LS = 0x9, /* Unsigned less or equal */
333 COND_GE = 0xa,
334 COND_LT = 0xb,
335 COND_GT = 0xc,
336 COND_LE = 0xd,
337 COND_AL = 0xe,
338 COND_NV = 0xf, /* behaves like COND_AL here */
339};
340
341static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
342 [TCG_COND_EQ] = COND_EQ,
343 [TCG_COND_NE] = COND_NE,
344 [TCG_COND_LT] = COND_LT,
345 [TCG_COND_GE] = COND_GE,
346 [TCG_COND_LE] = COND_LE,
347 [TCG_COND_GT] = COND_GT,
348 /* unsigned */
349 [TCG_COND_LTU] = COND_LO,
350 [TCG_COND_GTU] = COND_HI,
351 [TCG_COND_GEU] = COND_HS,
352 [TCG_COND_LEU] = COND_LS,
353};
354
3d4299f4
RH
355typedef enum {
356 LDST_ST = 0, /* store */
357 LDST_LD = 1, /* load */
358 LDST_LD_S_X = 2, /* load and sign-extend into Xt */
359 LDST_LD_S_W = 3, /* load and sign-extend into Wt */
360} AArch64LdstType;
4a136e0a 361
50573c66
RH
362/* We encode the format of the insn into the beginning of the name, so that
363 we can have the preprocessor help "typecheck" the insn vs the output
364 function. Arm didn't provide us with nice names for the formats, so we
365 use the section number of the architecture reference manual in which the
366 instruction group is described. */
367typedef enum {
3d9e69a2
RH
368 /* Compare and branch (immediate). */
369 I3201_CBZ = 0x34000000,
370 I3201_CBNZ = 0x35000000,
371
81d8a5ee
RH
372 /* Conditional branch (immediate). */
373 I3202_B_C = 0x54000000,
374
375 /* Unconditional branch (immediate). */
376 I3206_B = 0x14000000,
377 I3206_BL = 0x94000000,
378
379 /* Unconditional branch (register). */
380 I3207_BR = 0xd61f0000,
381 I3207_BLR = 0xd63f0000,
382 I3207_RET = 0xd65f0000,
383
f23e5e15
RH
384 /* AdvSIMD load/store single structure. */
385 I3303_LD1R = 0x0d40c000,
386
2acee8b2
PK
387 /* Load literal for loading the address at pc-relative offset */
388 I3305_LDR = 0x58000000,
14e4c1e2
RH
389 I3305_LDR_v64 = 0x5c000000,
390 I3305_LDR_v128 = 0x9c000000,
391
3d4299f4
RH
392 /* Load/store register. Described here as 3.3.12, but the helper
393 that emits them can transform to 3.3.10 or 3.3.13. */
394 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
395 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
396 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
397 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
398
399 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
400 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
401 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
402 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
403
404 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
405 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
406
407 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
408 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
409 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
410
14e4c1e2
RH
411 I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
412 I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
413
414 I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
415 I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
416
417 I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30,
418 I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30,
419
6c0f0c0f 420 I3312_TO_I3310 = 0x00200800,
3d4299f4
RH
421 I3312_TO_I3313 = 0x01000000,
422
95f72aa9
RH
423 /* Load/store register pair instructions. */
424 I3314_LDP = 0x28400000,
425 I3314_STP = 0x28000000,
426
096c46c0
RH
427 /* Add/subtract immediate instructions. */
428 I3401_ADDI = 0x11000000,
429 I3401_ADDSI = 0x31000000,
430 I3401_SUBI = 0x51000000,
431 I3401_SUBSI = 0x71000000,
432
b3c56df7
RH
433 /* Bitfield instructions. */
434 I3402_BFM = 0x33000000,
435 I3402_SBFM = 0x13000000,
436 I3402_UBFM = 0x53000000,
437
438 /* Extract instruction. */
439 I3403_EXTR = 0x13800000,
440
e029f293
RH
441 /* Logical immediate instructions. */
442 I3404_ANDI = 0x12000000,
443 I3404_ORRI = 0x32000000,
444 I3404_EORI = 0x52000000,
445
582ab779
RH
446 /* Move wide immediate instructions. */
447 I3405_MOVN = 0x12800000,
448 I3405_MOVZ = 0x52800000,
449 I3405_MOVK = 0x72800000,
450
c6e310d9
RH
451 /* PC relative addressing instructions. */
452 I3406_ADR = 0x10000000,
453 I3406_ADRP = 0x90000000,
454
50573c66
RH
455 /* Add/subtract shifted register instructions (without a shift). */
456 I3502_ADD = 0x0b000000,
457 I3502_ADDS = 0x2b000000,
458 I3502_SUB = 0x4b000000,
459 I3502_SUBS = 0x6b000000,
460
461 /* Add/subtract shifted register instructions (with a shift). */
462 I3502S_ADD_LSL = I3502_ADD,
463
c6e929e7
RH
464 /* Add/subtract with carry instructions. */
465 I3503_ADC = 0x1a000000,
466 I3503_SBC = 0x5a000000,
467
04ce397b
RH
468 /* Conditional select instructions. */
469 I3506_CSEL = 0x1a800000,
470 I3506_CSINC = 0x1a800400,
53c76c19
RH
471 I3506_CSINV = 0x5a800000,
472 I3506_CSNEG = 0x5a800400,
04ce397b 473
edd8824c 474 /* Data-processing (1 source) instructions. */
53c76c19
RH
475 I3507_CLZ = 0x5ac01000,
476 I3507_RBIT = 0x5ac00000,
edd8824c
RH
477 I3507_REV16 = 0x5ac00400,
478 I3507_REV32 = 0x5ac00800,
479 I3507_REV64 = 0x5ac00c00,
480
df9351e3
RH
481 /* Data-processing (2 source) instructions. */
482 I3508_LSLV = 0x1ac02000,
483 I3508_LSRV = 0x1ac02400,
484 I3508_ASRV = 0x1ac02800,
485 I3508_RORV = 0x1ac02c00,
1fcc9ddf
RH
486 I3508_SMULH = 0x9b407c00,
487 I3508_UMULH = 0x9bc07c00,
8678b71c
RH
488 I3508_UDIV = 0x1ac00800,
489 I3508_SDIV = 0x1ac00c00,
490
491 /* Data-processing (3 source) instructions. */
492 I3509_MADD = 0x1b000000,
493 I3509_MSUB = 0x1b008000,
df9351e3 494
50573c66
RH
495 /* Logical shifted register instructions (without a shift). */
496 I3510_AND = 0x0a000000,
14b155dd 497 I3510_BIC = 0x0a200000,
50573c66 498 I3510_ORR = 0x2a000000,
14b155dd 499 I3510_ORN = 0x2a200000,
50573c66 500 I3510_EOR = 0x4a000000,
14b155dd 501 I3510_EON = 0x4a200000,
50573c66 502 I3510_ANDS = 0x6a000000,
c7a59c2a 503
f7bcd966
RH
504 /* Logical shifted register instructions (with a shift). */
505 I3502S_AND_LSR = I3510_AND | (1 << 22),
506
14e4c1e2
RH
507 /* AdvSIMD copy */
508 I3605_DUP = 0x0e000400,
509 I3605_INS = 0x4e001c00,
510 I3605_UMOV = 0x0e003c00,
511
512 /* AdvSIMD modified immediate */
513 I3606_MOVI = 0x0f000400,
514
515 /* AdvSIMD shift by immediate */
516 I3614_SSHR = 0x0f000400,
517 I3614_SSRA = 0x0f001400,
518 I3614_SHL = 0x0f005400,
519 I3614_USHR = 0x2f000400,
520 I3614_USRA = 0x2f001400,
521
522 /* AdvSIMD three same. */
523 I3616_ADD = 0x0e208400,
524 I3616_AND = 0x0e201c00,
525 I3616_BIC = 0x0e601c00,
526 I3616_EOR = 0x2e201c00,
527 I3616_MUL = 0x0e209c00,
528 I3616_ORR = 0x0ea01c00,
529 I3616_ORN = 0x0ee01c00,
530 I3616_SUB = 0x2e208400,
531 I3616_CMGT = 0x0e203400,
532 I3616_CMGE = 0x0e203c00,
533 I3616_CMTST = 0x0e208c00,
534 I3616_CMHI = 0x2e203400,
535 I3616_CMHS = 0x2e203c00,
536 I3616_CMEQ = 0x2e208c00,
93f332a5
RH
537 I3616_SMAX = 0x0e206400,
538 I3616_SMIN = 0x0e206c00,
79525dfd 539 I3616_SSHL = 0x0e204400,
d32648d4
RH
540 I3616_SQADD = 0x0e200c00,
541 I3616_SQSUB = 0x0e202c00,
93f332a5
RH
542 I3616_UMAX = 0x2e206400,
543 I3616_UMIN = 0x2e206c00,
d32648d4
RH
544 I3616_UQADD = 0x2e200c00,
545 I3616_UQSUB = 0x2e202c00,
79525dfd 546 I3616_USHL = 0x2e204400,
14e4c1e2
RH
547
548 /* AdvSIMD two-reg misc. */
549 I3617_CMGT0 = 0x0e208800,
550 I3617_CMEQ0 = 0x0e209800,
551 I3617_CMLT0 = 0x0e20a800,
552 I3617_CMGE0 = 0x2e208800,
553 I3617_CMLE0 = 0x2e20a800,
554 I3617_NOT = 0x2e205800,
a456394a 555 I3617_ABS = 0x0e20b800,
14e4c1e2
RH
556 I3617_NEG = 0x2e20b800,
557
c7a59c2a 558 /* System instructions. */
14e4c1e2 559 NOP = 0xd503201f,
c7a59c2a
PK
560 DMB_ISH = 0xd50338bf,
561 DMB_LD = 0x00000100,
562 DMB_ST = 0x00000200,
50573c66 563} AArch64Insn;
4a136e0a 564
4a136e0a
CF
565static inline uint32_t tcg_in32(TCGContext *s)
566{
567 uint32_t v = *(uint32_t *)s->code_ptr;
568 return v;
569}
570
50573c66
RH
571/* Emit an opcode with "type-checking" of the format. */
572#define tcg_out_insn(S, FMT, OP, ...) \
573 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
574
f23e5e15
RH
575static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
576 TCGReg rt, TCGReg rn, unsigned size)
577{
578 tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
579}
580
581static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
582 int imm19, TCGReg rt)
2acee8b2
PK
583{
584 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
585}
586
3d9e69a2
RH
587static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
588 TCGReg rt, int imm19)
589{
590 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
591}
592
81d8a5ee
RH
593static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
594 TCGCond c, int imm19)
595{
596 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
597}
598
599static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
600{
601 tcg_out32(s, insn | (imm26 & 0x03ffffff));
602}
603
604static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
605{
606 tcg_out32(s, insn | rn << 5);
607}
608
95f72aa9
RH
609static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
610 TCGReg r1, TCGReg r2, TCGReg rn,
611 tcg_target_long ofs, bool pre, bool w)
612{
613 insn |= 1u << 31; /* ext */
614 insn |= pre << 24;
615 insn |= w << 23;
616
eabb7b91 617 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
95f72aa9
RH
618 insn |= (ofs & (0x7f << 3)) << (15 - 3);
619
620 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
621}
622
096c46c0
RH
623static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
624 TCGReg rd, TCGReg rn, uint64_t aimm)
625{
626 if (aimm > 0xfff) {
eabb7b91 627 tcg_debug_assert((aimm & 0xfff) == 0);
096c46c0 628 aimm >>= 12;
eabb7b91 629 tcg_debug_assert(aimm <= 0xfff);
096c46c0
RH
630 aimm |= 1 << 12; /* apply LSL 12 */
631 }
632 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
633}
634
e029f293
RH
635/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
636 (Logical immediate). Both insn groups have N, IMMR and IMMS fields
637 that feed the DecodeBitMasks pseudo function. */
638static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
639 TCGReg rd, TCGReg rn, int n, int immr, int imms)
640{
641 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
642 | rn << 5 | rd);
643}
644
645#define tcg_out_insn_3404 tcg_out_insn_3402
646
b3c56df7
RH
647static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
648 TCGReg rd, TCGReg rn, TCGReg rm, int imms)
649{
650 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
651 | rn << 5 | rd);
652}
653
582ab779
RH
654/* This function is used for the Move (wide immediate) instruction group.
655 Note that SHIFT is a full shift count, not the 2 bit HW field. */
656static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
657 TCGReg rd, uint16_t half, unsigned shift)
658{
eabb7b91 659 tcg_debug_assert((shift & ~0x30) == 0);
582ab779
RH
660 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
661}
662
c6e310d9
RH
663static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
664 TCGReg rd, int64_t disp)
665{
666 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
667}
668
50573c66
RH
669/* This function is for both 3.5.2 (Add/Subtract shifted register), for
670 the rare occasion when we actually want to supply a shift amount. */
671static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
672 TCGType ext, TCGReg rd, TCGReg rn,
673 TCGReg rm, int imm6)
674{
675 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
676}
677
678/* This function is for 3.5.2 (Add/subtract shifted register),
679 and 3.5.10 (Logical shifted register), for the vast majorty of cases
680 when we don't want to apply a shift. Thus it can also be used for
681 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */
682static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
683 TCGReg rd, TCGReg rn, TCGReg rm)
684{
685 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
686}
687
688#define tcg_out_insn_3503 tcg_out_insn_3502
689#define tcg_out_insn_3508 tcg_out_insn_3502
690#define tcg_out_insn_3510 tcg_out_insn_3502
691
04ce397b
RH
692static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
693 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
694{
695 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
696 | tcg_cond_to_aarch64[c] << 12);
697}
698
edd8824c
RH
699static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
700 TCGReg rd, TCGReg rn)
701{
702 tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
703}
704
8678b71c
RH
705static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
706 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
707{
708 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
709}
710
14e4c1e2
RH
711static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
712 TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
713{
714 /* Note that bit 11 set means general register input. Therefore
715 we can handle both register sets with one function. */
716 tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
717 | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
718}
719
720static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
721 TCGReg rd, bool op, int cmode, uint8_t imm8)
722{
723 tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
724 | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
725}
726
727static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
728 TCGReg rd, TCGReg rn, unsigned immhb)
729{
730 tcg_out32(s, insn | q << 30 | immhb << 16
731 | (rn & 0x1f) << 5 | (rd & 0x1f));
732}
733
734static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
735 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
736{
737 tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
738 | (rn & 0x1f) << 5 | (rd & 0x1f));
739}
740
741static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
742 unsigned size, TCGReg rd, TCGReg rn)
743{
744 tcg_out32(s, insn | q << 30 | (size << 22)
745 | (rn & 0x1f) << 5 | (rd & 0x1f));
746}
747
3d4299f4 748static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
6c0f0c0f
PB
749 TCGReg rd, TCGReg base, TCGType ext,
750 TCGReg regoff)
3d4299f4
RH
751{
752 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
6c0f0c0f 753 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
14e4c1e2 754 0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
3d4299f4 755}
50573c66 756
3d4299f4
RH
757static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
758 TCGReg rd, TCGReg rn, intptr_t offset)
4a136e0a 759{
14e4c1e2 760 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
4a136e0a
CF
761}
762
3d4299f4
RH
763static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
764 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
b1f6dc0d 765{
3d4299f4 766 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
14e4c1e2
RH
767 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
768 | rn << 5 | (rd & 0x1f));
b1f6dc0d
CF
769}
770
7d11fc7c
RH
771/* Register to register move using ORR (shifted register with no shift). */
772static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
4a136e0a 773{
7d11fc7c
RH
774 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
775}
776
777/* Register to register move using ADDI (move to/from SP). */
778static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
779{
780 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
4a136e0a
CF
781}
782
4ec4f0bd
RH
783/* This function is used for the Logical (immediate) instruction group.
784 The value of LIMM must satisfy IS_LIMM. See the comment above about
785 only supporting simplified logical immediates. */
786static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
787 TCGReg rd, TCGReg rn, uint64_t limm)
788{
789 unsigned h, l, r, c;
790
eabb7b91 791 tcg_debug_assert(is_limm(limm));
4ec4f0bd
RH
792
793 h = clz64(limm);
794 l = ctz64(limm);
795 if (l == 0) {
796 r = 0; /* form 0....01....1 */
797 c = ctz64(~limm) - 1;
798 if (h == 0) {
799 r = clz64(~limm); /* form 1..10..01..1 */
800 c += r;
801 }
802 } else {
803 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */
804 c = r - h - 1;
805 }
806 if (ext == TCG_TYPE_I32) {
807 r &= 31;
808 c &= 31;
809 }
810
811 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
812}
813
14e4c1e2 814static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
e7632cfa 815 TCGReg rd, tcg_target_long v64)
14e4c1e2
RH
816{
817 int op, cmode, imm8;
818
819 if (is_fimm(v64, &op, &cmode, &imm8)) {
820 tcg_out_insn(s, 3606, MOVI, type == TCG_TYPE_V128, rd, op, cmode, imm8);
821 } else if (type == TCG_TYPE_V128) {
822 new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
823 tcg_out_insn(s, 3305, LDR_v128, 0, rd);
824 } else {
825 new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
826 tcg_out_insn(s, 3305, LDR_v64, 0, rd);
827 }
828}
829
e7632cfa
RH
830static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
831 TCGReg rd, TCGReg rs)
832{
833 int is_q = type - TCG_TYPE_V64;
834 tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
835 return true;
836}
837
d6ecb4a9
RH
838static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
839 TCGReg r, TCGReg base, intptr_t offset)
840{
f23e5e15
RH
841 TCGReg temp = TCG_REG_TMP;
842
843 if (offset < -0xffffff || offset > 0xffffff) {
844 tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
845 tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
846 base = temp;
847 } else {
848 AArch64Insn add_insn = I3401_ADDI;
849
850 if (offset < 0) {
851 add_insn = I3401_SUBI;
852 offset = -offset;
853 }
854 if (offset & 0xfff000) {
855 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
856 base = temp;
857 }
858 if (offset & 0xfff) {
859 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
860 base = temp;
861 }
862 }
863 tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
864 return true;
d6ecb4a9
RH
865}
866
582ab779
RH
867static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
868 tcg_target_long value)
4a136e0a 869{
dfeb5fe7
RH
870 tcg_target_long svalue = value;
871 tcg_target_long ivalue = ~value;
55129955
RH
872 tcg_target_long t0, t1, t2;
873 int s0, s1;
874 AArch64Insn opc;
dfeb5fe7 875
14e4c1e2
RH
876 switch (type) {
877 case TCG_TYPE_I32:
878 case TCG_TYPE_I64:
879 tcg_debug_assert(rd < 32);
880 break;
881
882 case TCG_TYPE_V64:
883 case TCG_TYPE_V128:
884 tcg_debug_assert(rd >= 32);
885 tcg_out_dupi_vec(s, type, rd, value);
886 return;
887
888 default:
889 g_assert_not_reached();
890 }
891
dfeb5fe7
RH
892 /* For 32-bit values, discard potential garbage in value. For 64-bit
893 values within [2**31, 2**32-1], we can create smaller sequences by
894 interpreting this as a negative 32-bit number, while ensuring that
895 the high 32 bits are cleared by setting SF=0. */
896 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
897 svalue = (int32_t)value;
582ab779 898 value = (uint32_t)value;
dfeb5fe7
RH
899 ivalue = (uint32_t)ivalue;
900 type = TCG_TYPE_I32;
901 }
902
d8918df5
RH
903 /* Speed things up by handling the common case of small positive
904 and negative values specially. */
905 if ((value & ~0xffffull) == 0) {
906 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
907 return;
908 } else if ((ivalue & ~0xffffull) == 0) {
909 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
910 return;
911 }
912
4ec4f0bd
RH
913 /* Check for bitfield immediates. For the benefit of 32-bit quantities,
914 use the sign-extended value. That lets us match rotated values such
915 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
916 if (is_limm(svalue)) {
917 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
918 return;
919 }
920
c6e310d9
RH
921 /* Look for host pointer values within 4G of the PC. This happens
922 often when loading pointers to QEMU's own data structures. */
923 if (type == TCG_TYPE_I64) {
cc74d332
RH
924 tcg_target_long disp = value - (intptr_t)s->code_ptr;
925 if (disp == sextract64(disp, 0, 21)) {
926 tcg_out_insn(s, 3406, ADR, rd, disp);
927 return;
928 }
929 disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
c6e310d9
RH
930 if (disp == sextract64(disp, 0, 21)) {
931 tcg_out_insn(s, 3406, ADRP, rd, disp);
932 if (value & 0xfff) {
933 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
934 }
935 return;
936 }
937 }
938
55129955
RH
939 /* Would it take fewer insns to begin with MOVN? */
940 if (ctpop64(value) >= 32) {
941 t0 = ivalue;
942 opc = I3405_MOVN;
8cf9a3d3 943 } else {
55129955
RH
944 t0 = value;
945 opc = I3405_MOVZ;
946 }
947 s0 = ctz64(t0) & (63 & -16);
948 t1 = t0 & ~(0xffffUL << s0);
949 s1 = ctz64(t1) & (63 & -16);
950 t2 = t1 & ~(0xffffUL << s1);
951 if (t2 == 0) {
952 tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
953 if (t1 != 0) {
954 tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
8cf9a3d3 955 }
55129955 956 return;
dfeb5fe7 957 }
55129955
RH
958
959 /* For more than 2 insns, dump it into the constant pool. */
960 new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
961 tcg_out_insn(s, 3305, LDR, 0, rd);
4a136e0a
CF
962}
963
3d4299f4
RH
964/* Define something more legible for general use. */
965#define tcg_out_ldst_r tcg_out_insn_3310
4a136e0a 966
14e4c1e2
RH
967static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
968 TCGReg rn, intptr_t offset, int lgsize)
4a136e0a 969{
3d4299f4
RH
970 /* If the offset is naturally aligned and in range, then we can
971 use the scaled uimm12 encoding */
14e4c1e2
RH
972 if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
973 uintptr_t scaled_uimm = offset >> lgsize;
3d4299f4
RH
974 if (scaled_uimm <= 0xfff) {
975 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
976 return;
b1f6dc0d
CF
977 }
978 }
979
a056c9fa
RH
980 /* Small signed offsets can use the unscaled encoding. */
981 if (offset >= -256 && offset < 256) {
982 tcg_out_insn_3312(s, insn, rd, rn, offset);
983 return;
984 }
985
3d4299f4 986 /* Worst-case scenario, move offset to temp register, use reg offset. */
b1f6dc0d 987 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
6c0f0c0f 988 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
4a136e0a
CF
989}
990
78113e83 991static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
4a136e0a 992{
14e4c1e2 993 if (ret == arg) {
78113e83 994 return true;
14e4c1e2
RH
995 }
996 switch (type) {
997 case TCG_TYPE_I32:
998 case TCG_TYPE_I64:
999 if (ret < 32 && arg < 32) {
1000 tcg_out_movr(s, type, ret, arg);
1001 break;
1002 } else if (ret < 32) {
1003 tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1004 break;
1005 } else if (arg < 32) {
1006 tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1007 break;
1008 }
1009 /* FALLTHRU */
1010
1011 case TCG_TYPE_V64:
1012 tcg_debug_assert(ret >= 32 && arg >= 32);
1013 tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1014 break;
1015 case TCG_TYPE_V128:
1016 tcg_debug_assert(ret >= 32 && arg >= 32);
1017 tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1018 break;
1019
1020 default:
1021 g_assert_not_reached();
4a136e0a 1022 }
78113e83 1023 return true;
4a136e0a
CF
1024}
1025
14e4c1e2
RH
1026static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1027 TCGReg base, intptr_t ofs)
4a136e0a 1028{
14e4c1e2
RH
1029 AArch64Insn insn;
1030 int lgsz;
1031
1032 switch (type) {
1033 case TCG_TYPE_I32:
1034 insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1035 lgsz = 2;
1036 break;
1037 case TCG_TYPE_I64:
1038 insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1039 lgsz = 3;
1040 break;
1041 case TCG_TYPE_V64:
1042 insn = I3312_LDRVD;
1043 lgsz = 3;
1044 break;
1045 case TCG_TYPE_V128:
1046 insn = I3312_LDRVQ;
1047 lgsz = 4;
1048 break;
1049 default:
1050 g_assert_not_reached();
1051 }
1052 tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
4a136e0a
CF
1053}
1054
14e4c1e2
RH
1055static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1056 TCGReg base, intptr_t ofs)
4a136e0a 1057{
14e4c1e2
RH
1058 AArch64Insn insn;
1059 int lgsz;
1060
1061 switch (type) {
1062 case TCG_TYPE_I32:
1063 insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1064 lgsz = 2;
1065 break;
1066 case TCG_TYPE_I64:
1067 insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1068 lgsz = 3;
1069 break;
1070 case TCG_TYPE_V64:
1071 insn = I3312_STRVD;
1072 lgsz = 3;
1073 break;
1074 case TCG_TYPE_V128:
1075 insn = I3312_STRVQ;
1076 lgsz = 4;
1077 break;
1078 default:
1079 g_assert_not_reached();
1080 }
1081 tcg_out_ldst(s, insn, src, base, ofs, lgsz);
4a136e0a
CF
1082}
1083
59d7c14e
RH
1084static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1085 TCGReg base, intptr_t ofs)
1086{
14e4c1e2 1087 if (type <= TCG_TYPE_I64 && val == 0) {
59d7c14e
RH
1088 tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1089 return true;
1090 }
1091 return false;
1092}
1093
b3c56df7
RH
1094static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1095 TCGReg rn, unsigned int a, unsigned int b)
1096{
1097 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1098}
1099
7763ffa0
RH
1100static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1101 TCGReg rn, unsigned int a, unsigned int b)
4a136e0a 1102{
b3c56df7 1103 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
4a136e0a
CF
1104}
1105
7763ffa0
RH
1106static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1107 TCGReg rn, unsigned int a, unsigned int b)
4a136e0a 1108{
b3c56df7 1109 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
4a136e0a
CF
1110}
1111
7763ffa0 1112static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
4a136e0a
CF
1113 TCGReg rn, TCGReg rm, unsigned int a)
1114{
b3c56df7 1115 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
4a136e0a
CF
1116}
1117
7763ffa0 1118static inline void tcg_out_shl(TCGContext *s, TCGType ext,
4a136e0a
CF
1119 TCGReg rd, TCGReg rn, unsigned int m)
1120{
b3c56df7
RH
1121 int bits = ext ? 64 : 32;
1122 int max = bits - 1;
4a136e0a
CF
1123 tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
1124}
1125
7763ffa0 1126static inline void tcg_out_shr(TCGContext *s, TCGType ext,
4a136e0a
CF
1127 TCGReg rd, TCGReg rn, unsigned int m)
1128{
1129 int max = ext ? 63 : 31;
1130 tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1131}
1132
7763ffa0 1133static inline void tcg_out_sar(TCGContext *s, TCGType ext,
4a136e0a
CF
1134 TCGReg rd, TCGReg rn, unsigned int m)
1135{
1136 int max = ext ? 63 : 31;
1137 tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1138}
1139
7763ffa0 1140static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
4a136e0a
CF
1141 TCGReg rd, TCGReg rn, unsigned int m)
1142{
1143 int max = ext ? 63 : 31;
1144 tcg_out_extr(s, ext, rd, rn, rn, m & max);
1145}
1146
7763ffa0 1147static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
4a136e0a
CF
1148 TCGReg rd, TCGReg rn, unsigned int m)
1149{
b3c56df7
RH
1150 int bits = ext ? 64 : 32;
1151 int max = bits - 1;
4a136e0a
CF
1152 tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
1153}
1154
b3c56df7
RH
1155static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1156 TCGReg rn, unsigned lsb, unsigned width)
1157{
1158 unsigned size = ext ? 64 : 32;
1159 unsigned a = (size - lsb) & (size - 1);
1160 unsigned b = width - 1;
1161 tcg_out_bfm(s, ext, rd, rn, a, b);
1162}
1163
90f1cd91
RH
1164static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1165 tcg_target_long b, bool const_b)
4a136e0a 1166{
90f1cd91
RH
1167 if (const_b) {
1168 /* Using CMP or CMN aliases. */
1169 if (b >= 0) {
1170 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1171 } else {
1172 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1173 }
1174 } else {
1175 /* Using CMP alias SUBS wzr, Wn, Wm */
1176 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1177 }
4a136e0a
CF
1178}
1179
8587c30c 1180static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target)
4a136e0a 1181{
8587c30c 1182 ptrdiff_t offset = target - s->code_ptr;
eabb7b91 1183 tcg_debug_assert(offset == sextract64(offset, 0, 26));
81d8a5ee 1184 tcg_out_insn(s, 3206, B, offset);
4a136e0a
CF
1185}
1186
23b7aa1d
PK
1187static inline void tcg_out_goto_long(TCGContext *s, tcg_insn_unit *target)
1188{
1189 ptrdiff_t offset = target - s->code_ptr;
1190 if (offset == sextract64(offset, 0, 26)) {
1191 tcg_out_insn(s, 3206, BL, offset);
1192 } else {
1193 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1194 tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1195 }
1196}
1197
4a136e0a
CF
1198static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
1199{
81d8a5ee 1200 tcg_out_insn(s, 3207, BLR, reg);
4a136e0a
CF
1201}
1202
8587c30c 1203static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
4a136e0a 1204{
8587c30c
RH
1205 ptrdiff_t offset = target - s->code_ptr;
1206 if (offset == sextract64(offset, 0, 26)) {
81d8a5ee 1207 tcg_out_insn(s, 3206, BL, offset);
8587c30c
RH
1208 } else {
1209 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1210 tcg_out_callr(s, TCG_REG_TMP);
4a136e0a
CF
1211 }
1212}
1213
a8583393
RH
1214void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
1215 uintptr_t addr)
4a136e0a 1216{
b68686bd
PK
1217 tcg_insn_unit i1, i2;
1218 TCGType rt = TCG_TYPE_I64;
1219 TCGReg rd = TCG_REG_TMP;
1220 uint64_t pair;
4a136e0a 1221
b68686bd
PK
1222 ptrdiff_t offset = addr - jmp_addr;
1223
1224 if (offset == sextract64(offset, 0, 26)) {
1225 i1 = I3206_B | ((offset >> 2) & 0x3ffffff);
1226 i2 = NOP;
1227 } else {
1228 offset = (addr >> 12) - (jmp_addr >> 12);
1229
1230 /* patch ADRP */
1231 i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd;
1232 /* patch ADDI */
1233 i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd;
1234 }
1235 pair = (uint64_t)i2 << 32 | i1;
1236 atomic_set((uint64_t *)jmp_addr, pair);
1237 flush_icache_range(jmp_addr, jmp_addr + 8);
4a136e0a
CF
1238}
1239
bec16311 1240static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
4a136e0a 1241{
4a136e0a 1242 if (!l->has_value) {
bec16311 1243 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
733589b3 1244 tcg_out_insn(s, 3206, B, 0);
4a136e0a 1245 } else {
8587c30c 1246 tcg_out_goto(s, l->u.value_ptr);
4a136e0a
CF
1247 }
1248}
1249
dc1eccd6 1250static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
bec16311 1251 TCGArg b, bool b_const, TCGLabel *l)
4a136e0a 1252{
cae1f6f3 1253 intptr_t offset;
3d9e69a2 1254 bool need_cmp;
cae1f6f3 1255
3d9e69a2
RH
1256 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1257 need_cmp = false;
1258 } else {
1259 need_cmp = true;
1260 tcg_out_cmp(s, ext, a, b, b_const);
1261 }
4a136e0a
CF
1262
1263 if (!l->has_value) {
bec16311 1264 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
cae1f6f3 1265 offset = tcg_in32(s) >> 5;
4a136e0a 1266 } else {
8587c30c 1267 offset = l->u.value_ptr - s->code_ptr;
eabb7b91 1268 tcg_debug_assert(offset == sextract64(offset, 0, 19));
4a136e0a 1269 }
cae1f6f3 1270
3d9e69a2
RH
1271 if (need_cmp) {
1272 tcg_out_insn(s, 3202, B_C, c, offset);
1273 } else if (c == TCG_COND_EQ) {
1274 tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1275 } else {
1276 tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1277 }
4a136e0a
CF
1278}
1279
edd8824c 1280static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
9c4a059d 1281{
edd8824c 1282 tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
9c4a059d
CF
1283}
1284
edd8824c 1285static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
9c4a059d 1286{
edd8824c
RH
1287 tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
1288}
1289
1290static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
1291{
1292 tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
9c4a059d
CF
1293}
1294
929f8b55 1295static inline void tcg_out_sxt(TCGContext *s, TCGType ext, TCGMemOp s_bits,
31f1275b
CF
1296 TCGReg rd, TCGReg rn)
1297{
b3c56df7 1298 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
929f8b55 1299 int bits = (8 << s_bits) - 1;
31f1275b
CF
1300 tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1301}
1302
929f8b55 1303static inline void tcg_out_uxt(TCGContext *s, TCGMemOp s_bits,
31f1275b
CF
1304 TCGReg rd, TCGReg rn)
1305{
b3c56df7 1306 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
929f8b55 1307 int bits = (8 << s_bits) - 1;
31f1275b
CF
1308 tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1309}
1310
90f1cd91
RH
1311static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1312 TCGReg rn, int64_t aimm)
1313{
1314 if (aimm >= 0) {
1315 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1316 } else {
1317 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1318 }
1319}
1320
dc1eccd6 1321static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
c6e929e7
RH
1322 TCGReg rh, TCGReg al, TCGReg ah,
1323 tcg_target_long bl, tcg_target_long bh,
1324 bool const_bl, bool const_bh, bool sub)
1325{
1326 TCGReg orig_rl = rl;
1327 AArch64Insn insn;
1328
1329 if (rl == ah || (!const_bh && rl == bh)) {
1330 rl = TCG_REG_TMP;
1331 }
1332
1333 if (const_bl) {
1334 insn = I3401_ADDSI;
1335 if ((bl < 0) ^ sub) {
1336 insn = I3401_SUBSI;
1337 bl = -bl;
1338 }
b1eb20da
RH
1339 if (unlikely(al == TCG_REG_XZR)) {
1340 /* ??? We want to allow al to be zero for the benefit of
1341 negation via subtraction. However, that leaves open the
1342 possibility of adding 0+const in the low part, and the
1343 immediate add instructions encode XSP not XZR. Don't try
1344 anything more elaborate here than loading another zero. */
1345 al = TCG_REG_TMP;
1346 tcg_out_movi(s, ext, al, 0);
1347 }
c6e929e7
RH
1348 tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1349 } else {
1350 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1351 }
1352
1353 insn = I3503_ADC;
1354 if (const_bh) {
1355 /* Note that the only two constants we support are 0 and -1, and
1356 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
1357 if ((bh != 0) ^ sub) {
1358 insn = I3503_SBC;
1359 }
1360 bh = TCG_REG_XZR;
1361 } else if (sub) {
1362 insn = I3503_SBC;
1363 }
1364 tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1365
b825025f 1366 tcg_out_mov(s, ext, orig_rl, rl);
c6e929e7
RH
1367}
1368
c7a59c2a
PK
1369static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1370{
1371 static const uint32_t sync[] = {
1372 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST,
1373 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST,
1374 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1375 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD,
1376 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1377 };
1378 tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1379}
1380
53c76c19
RH
1381static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1382 TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1383{
1384 TCGReg a1 = a0;
1385 if (is_ctz) {
1386 a1 = TCG_REG_TMP;
1387 tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1388 }
1389 if (const_b && b == (ext ? 64 : 32)) {
1390 tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1391 } else {
1392 AArch64Insn sel = I3506_CSEL;
1393
1394 tcg_out_cmp(s, ext, a0, 0, 1);
1395 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1396
1397 if (const_b) {
1398 if (b == -1) {
1399 b = TCG_REG_XZR;
1400 sel = I3506_CSINV;
1401 } else if (b == 0) {
1402 b = TCG_REG_XZR;
1403 } else {
1404 tcg_out_movi(s, ext, d, b);
1405 b = d;
1406 }
1407 }
1408 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1409 }
1410}
1411
4a136e0a 1412#ifdef CONFIG_SOFTMMU
659ef5cb
RH
1413#include "tcg-ldst.inc.c"
1414
023261ef 1415/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
3972ef6f 1416 * TCGMemOpIdx oi, uintptr_t ra)
023261ef 1417 */
8587c30c 1418static void * const qemu_ld_helpers[16] = {
de61d14f
RH
1419 [MO_UB] = helper_ret_ldub_mmu,
1420 [MO_LEUW] = helper_le_lduw_mmu,
1421 [MO_LEUL] = helper_le_ldul_mmu,
1422 [MO_LEQ] = helper_le_ldq_mmu,
1423 [MO_BEUW] = helper_be_lduw_mmu,
1424 [MO_BEUL] = helper_be_ldul_mmu,
1425 [MO_BEQ] = helper_be_ldq_mmu,
4a136e0a
CF
1426};
1427
023261ef 1428/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
3972ef6f
RH
1429 * uintxx_t val, TCGMemOpIdx oi,
1430 * uintptr_t ra)
023261ef 1431 */
8587c30c 1432static void * const qemu_st_helpers[16] = {
de61d14f
RH
1433 [MO_UB] = helper_ret_stb_mmu,
1434 [MO_LEUW] = helper_le_stw_mmu,
1435 [MO_LEUL] = helper_le_stl_mmu,
1436 [MO_LEQ] = helper_le_stq_mmu,
1437 [MO_BEUW] = helper_be_stw_mmu,
1438 [MO_BEUL] = helper_be_stl_mmu,
1439 [MO_BEQ] = helper_be_stq_mmu,
4a136e0a
CF
1440};
1441
8587c30c 1442static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target)
dc0c8aaf 1443{
8587c30c 1444 ptrdiff_t offset = tcg_pcrel_diff(s, target);
eabb7b91 1445 tcg_debug_assert(offset == sextract64(offset, 0, 21));
8587c30c 1446 tcg_out_insn(s, 3406, ADR, rd, offset);
dc0c8aaf
RH
1447}
1448
aeee05f5 1449static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
c6d8ed24 1450{
3972ef6f
RH
1451 TCGMemOpIdx oi = lb->oi;
1452 TCGMemOp opc = get_memop(oi);
929f8b55
RH
1453 TCGMemOp size = opc & MO_SIZE;
1454
aeee05f5
RH
1455 if (!reloc_pc19(lb->label_ptr[0], s->code_ptr)) {
1456 return false;
1457 }
017a86f7 1458
3972ef6f 1459 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
b825025f 1460 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
3972ef6f 1461 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
8587c30c 1462 tcg_out_adr(s, TCG_REG_X3, lb->raddr);
2b7ec66f 1463 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
929f8b55 1464 if (opc & MO_SIGN) {
9c53889b 1465 tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
c6d8ed24 1466 } else {
b825025f 1467 tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
c6d8ed24
JK
1468 }
1469
8587c30c 1470 tcg_out_goto(s, lb->raddr);
aeee05f5 1471 return true;
c6d8ed24
JK
1472}
1473
aeee05f5 1474static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
c6d8ed24 1475{
3972ef6f
RH
1476 TCGMemOpIdx oi = lb->oi;
1477 TCGMemOp opc = get_memop(oi);
de61d14f 1478 TCGMemOp size = opc & MO_SIZE;
929f8b55 1479
aeee05f5
RH
1480 if (!reloc_pc19(lb->label_ptr[0], s->code_ptr)) {
1481 return false;
1482 }
c6d8ed24 1483
3972ef6f 1484 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
b825025f
RH
1485 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1486 tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
3972ef6f 1487 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
8587c30c 1488 tcg_out_adr(s, TCG_REG_X4, lb->raddr);
2b7ec66f 1489 tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
8587c30c 1490 tcg_out_goto(s, lb->raddr);
aeee05f5 1491 return true;
c6d8ed24
JK
1492}
1493
3972ef6f 1494static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
9c53889b 1495 TCGType ext, TCGReg data_reg, TCGReg addr_reg,
3972ef6f 1496 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
c6d8ed24 1497{
9ecefc84 1498 TCGLabelQemuLdst *label = new_ldst_label(s);
c6d8ed24 1499
c6d8ed24 1500 label->is_ld = is_ld;
3972ef6f 1501 label->oi = oi;
9c53889b 1502 label->type = ext;
c6d8ed24
JK
1503 label->datalo_reg = data_reg;
1504 label->addrlo_reg = addr_reg;
c6d8ed24
JK
1505 label->raddr = raddr;
1506 label->label_ptr[0] = label_ptr;
1507}
1508
f7bcd966
RH
1509/* We expect tlb_mask to be before tlb_table. */
1510QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) <
1511 offsetof(CPUArchState, tlb_mask));
1512
1513/* We expect to use a 24-bit unsigned offset from ENV. */
1514QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1])
1515 > 0xffffff);
1516
c6d8ed24
JK
1517/* Load and compare a TLB entry, emitting the conditional jump to the
1518 slow path for the failure case, which will be patched later when finalizing
1519 the slow path. Generated code returns the host addend in X1,
1520 clobbers X0,X2,X3,TMP. */
9ee14902 1521static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc,
8587c30c
RH
1522 tcg_insn_unit **label_ptr, int mem_index,
1523 bool is_read)
c6d8ed24 1524{
f7bcd966
RH
1525 int mask_ofs = offsetof(CPUArchState, tlb_mask[mem_index]);
1526 int table_ofs = offsetof(CPUArchState, tlb_table[mem_index]);
85aa8081
RH
1527 unsigned a_bits = get_alignment_bits(opc);
1528 unsigned s_bits = opc & MO_SIZE;
1529 unsigned a_mask = (1u << a_bits) - 1;
1530 unsigned s_mask = (1u << s_bits) - 1;
f7bcd966
RH
1531 TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0, x3;
1532 TCGType mask_type;
1533 uint64_t compare_mask;
1534
1535 if (table_ofs > 0xfff) {
1536 int table_hi = table_ofs & ~0xfff;
1537 int mask_hi = mask_ofs & ~0xfff;
1538
1539 table_base = TCG_REG_X1;
1540 if (mask_hi == table_hi) {
1541 mask_base = table_base;
1542 } else if (mask_hi) {
1543 mask_base = TCG_REG_X0;
1544 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64,
1545 mask_base, TCG_AREG0, mask_hi);
1546 }
1547 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64,
1548 table_base, TCG_AREG0, table_hi);
1549 mask_ofs -= mask_hi;
1550 table_ofs -= table_hi;
1551 }
1552
1553 mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
1554 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1555
1556 /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
1557 tcg_out_ld(s, mask_type, TCG_REG_X0, mask_base, mask_ofs);
1558 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, table_base, table_ofs);
1559
1560 /* Extract the TLB index from the address into X0. */
1561 tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1562 TCG_REG_X0, TCG_REG_X0, addr_reg,
1563 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1564
1565 /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */
1566 tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
1567
1568 /* Load the tlb comparator into X0, and the fast path addend into X1. */
1569 tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read
1570 ? offsetof(CPUTLBEntry, addr_read)
1571 : offsetof(CPUTLBEntry, addr_write));
1572 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
1573 offsetof(CPUTLBEntry, addend));
9ee14902
RH
1574
1575 /* For aligned accesses, we check the first byte and include the alignment
1576 bits within the address. For unaligned access, we check that we don't
1577 cross pages using the address of the last byte of the access. */
85aa8081 1578 if (a_bits >= s_bits) {
9ee14902
RH
1579 x3 = addr_reg;
1580 } else {
1581 tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
85aa8081 1582 TCG_REG_X3, addr_reg, s_mask - a_mask);
9ee14902
RH
1583 x3 = TCG_REG_X3;
1584 }
f7bcd966 1585 compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
6f472467 1586
9ee14902
RH
1587 /* Store the page mask part of the address into X3. */
1588 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
f7bcd966 1589 TCG_REG_X3, x3, compare_mask);
6f472467 1590
c6d8ed24 1591 /* Perform the address comparison. */
f7bcd966 1592 tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
6f472467 1593
c6d8ed24 1594 /* If not equal, we jump to the slow path. */
6f472467 1595 *label_ptr = s->code_ptr;
733589b3 1596 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
c6d8ed24
JK
1597}
1598
1599#endif /* CONFIG_SOFTMMU */
6a91c7c9 1600
9c53889b 1601static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp memop, TCGType ext,
ffc63728
PB
1602 TCGReg data_r, TCGReg addr_r,
1603 TCGType otype, TCGReg off_r)
6a91c7c9 1604{
9e4177ad
RH
1605 const TCGMemOp bswap = memop & MO_BSWAP;
1606
1607 switch (memop & MO_SSIZE) {
1608 case MO_UB:
6c0f0c0f 1609 tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
6a91c7c9 1610 break;
9e4177ad 1611 case MO_SB:
9c53889b 1612 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
6c0f0c0f 1613 data_r, addr_r, otype, off_r);
6a91c7c9 1614 break;
9e4177ad 1615 case MO_UW:
6c0f0c0f 1616 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
9e4177ad 1617 if (bswap) {
edd8824c 1618 tcg_out_rev16(s, data_r, data_r);
6a91c7c9
JK
1619 }
1620 break;
9e4177ad
RH
1621 case MO_SW:
1622 if (bswap) {
6c0f0c0f 1623 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
edd8824c 1624 tcg_out_rev16(s, data_r, data_r);
9c53889b 1625 tcg_out_sxt(s, ext, MO_16, data_r, data_r);
6a91c7c9 1626 } else {
6c0f0c0f
PB
1627 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1628 data_r, addr_r, otype, off_r);
6a91c7c9
JK
1629 }
1630 break;
9e4177ad 1631 case MO_UL:
6c0f0c0f 1632 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
9e4177ad 1633 if (bswap) {
edd8824c 1634 tcg_out_rev32(s, data_r, data_r);
6a91c7c9
JK
1635 }
1636 break;
9e4177ad
RH
1637 case MO_SL:
1638 if (bswap) {
6c0f0c0f 1639 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
edd8824c 1640 tcg_out_rev32(s, data_r, data_r);
929f8b55 1641 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
6a91c7c9 1642 } else {
6c0f0c0f 1643 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
6a91c7c9
JK
1644 }
1645 break;
9e4177ad 1646 case MO_Q:
6c0f0c0f 1647 tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
9e4177ad 1648 if (bswap) {
edd8824c 1649 tcg_out_rev64(s, data_r, data_r);
6a91c7c9
JK
1650 }
1651 break;
1652 default:
1653 tcg_abort();
1654 }
1655}
1656
9e4177ad 1657static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp memop,
ffc63728
PB
1658 TCGReg data_r, TCGReg addr_r,
1659 TCGType otype, TCGReg off_r)
6a91c7c9 1660{
9e4177ad
RH
1661 const TCGMemOp bswap = memop & MO_BSWAP;
1662
1663 switch (memop & MO_SIZE) {
1664 case MO_8:
6c0f0c0f 1665 tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
6a91c7c9 1666 break;
9e4177ad 1667 case MO_16:
e81864a1 1668 if (bswap && data_r != TCG_REG_XZR) {
edd8824c 1669 tcg_out_rev16(s, TCG_REG_TMP, data_r);
9e4177ad 1670 data_r = TCG_REG_TMP;
6a91c7c9 1671 }
6c0f0c0f 1672 tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
6a91c7c9 1673 break;
9e4177ad 1674 case MO_32:
e81864a1 1675 if (bswap && data_r != TCG_REG_XZR) {
edd8824c 1676 tcg_out_rev32(s, TCG_REG_TMP, data_r);
9e4177ad 1677 data_r = TCG_REG_TMP;
6a91c7c9 1678 }
6c0f0c0f 1679 tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
6a91c7c9 1680 break;
9e4177ad 1681 case MO_64:
e81864a1 1682 if (bswap && data_r != TCG_REG_XZR) {
edd8824c 1683 tcg_out_rev64(s, TCG_REG_TMP, data_r);
9e4177ad 1684 data_r = TCG_REG_TMP;
6a91c7c9 1685 }
6c0f0c0f 1686 tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
6a91c7c9
JK
1687 break;
1688 default:
1689 tcg_abort();
1690 }
1691}
4a136e0a 1692
667b1cdd 1693static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
59227d5d 1694 TCGMemOpIdx oi, TCGType ext)
4a136e0a 1695{
59227d5d 1696 TCGMemOp memop = get_memop(oi);
80adb8fc 1697 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
4a136e0a 1698#ifdef CONFIG_SOFTMMU
59227d5d 1699 unsigned mem_index = get_mmuidx(oi);
8587c30c 1700 tcg_insn_unit *label_ptr;
4a136e0a 1701
9ee14902 1702 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
80adb8fc
RH
1703 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1704 TCG_REG_X1, otype, addr_reg);
3972ef6f
RH
1705 add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1706 s->code_ptr, label_ptr);
4a136e0a 1707#else /* !CONFIG_SOFTMMU */
352bcb0a
RH
1708 if (USE_GUEST_BASE) {
1709 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1710 TCG_REG_GUEST_BASE, otype, addr_reg);
1711 } else {
1712 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1713 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1714 }
6a91c7c9 1715#endif /* CONFIG_SOFTMMU */
4a136e0a
CF
1716}
1717
667b1cdd 1718static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
59227d5d 1719 TCGMemOpIdx oi)
4a136e0a 1720{
59227d5d 1721 TCGMemOp memop = get_memop(oi);
80adb8fc 1722 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
4a136e0a 1723#ifdef CONFIG_SOFTMMU
59227d5d 1724 unsigned mem_index = get_mmuidx(oi);
8587c30c 1725 tcg_insn_unit *label_ptr;
4a136e0a 1726
9ee14902 1727 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
80adb8fc
RH
1728 tcg_out_qemu_st_direct(s, memop, data_reg,
1729 TCG_REG_X1, otype, addr_reg);
9ee14902
RH
1730 add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1731 data_reg, addr_reg, s->code_ptr, label_ptr);
4a136e0a 1732#else /* !CONFIG_SOFTMMU */
352bcb0a
RH
1733 if (USE_GUEST_BASE) {
1734 tcg_out_qemu_st_direct(s, memop, data_reg,
1735 TCG_REG_GUEST_BASE, otype, addr_reg);
1736 } else {
1737 tcg_out_qemu_st_direct(s, memop, data_reg,
1738 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1739 }
6a91c7c9 1740#endif /* CONFIG_SOFTMMU */
4a136e0a
CF
1741}
1742
8587c30c 1743static tcg_insn_unit *tb_ret_addr;
4a136e0a 1744
4a136e0a 1745static void tcg_out_op(TCGContext *s, TCGOpcode opc,
8d8db193
RH
1746 const TCGArg args[TCG_MAX_OP_ARGS],
1747 const int const_args[TCG_MAX_OP_ARGS])
4a136e0a 1748{
f0293414
RH
1749 /* 99% of the time, we can signal the use of extension registers
1750 by looking to see if the opcode handles 64-bit data. */
1751 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
4a136e0a 1752
8d8db193
RH
1753 /* Hoist the loads of the most common arguments. */
1754 TCGArg a0 = args[0];
1755 TCGArg a1 = args[1];
1756 TCGArg a2 = args[2];
1757 int c2 = const_args[2];
1758
04ce397b
RH
1759 /* Some operands are defined with "rZ" constraint, a register or
1760 the zero register. These need not actually test args[I] == 0. */
1761#define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1762
4a136e0a
CF
1763 switch (opc) {
1764 case INDEX_op_exit_tb:
b19f0c2e
RH
1765 /* Reuse the zeroing that exists for goto_ptr. */
1766 if (a0 == 0) {
23b7aa1d 1767 tcg_out_goto_long(s, s->code_gen_epilogue);
b19f0c2e
RH
1768 } else {
1769 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
23b7aa1d 1770 tcg_out_goto_long(s, tb_ret_addr);
b19f0c2e 1771 }
4a136e0a
CF
1772 break;
1773
1774 case INDEX_op_goto_tb:
2acee8b2 1775 if (s->tb_jmp_insn_offset != NULL) {
a8583393 1776 /* TCG_TARGET_HAS_direct_jump */
2acee8b2
PK
1777 /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
1778 write can be used to patch the target address. */
1779 if ((uintptr_t)s->code_ptr & 7) {
1780 tcg_out32(s, NOP);
1781 }
1782 s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1783 /* actual branch destination will be patched by
a8583393 1784 tb_target_set_jmp_target later. */
2acee8b2
PK
1785 tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
1786 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
1787 } else {
a8583393 1788 /* !TCG_TARGET_HAS_direct_jump */
2acee8b2
PK
1789 tcg_debug_assert(s->tb_jmp_target_addr != NULL);
1790 intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2;
1791 tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
b68686bd 1792 }
b68686bd 1793 tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
9f754620 1794 set_jmp_reset_offset(s, a0);
4a136e0a
CF
1795 break;
1796
b19f0c2e
RH
1797 case INDEX_op_goto_ptr:
1798 tcg_out_insn(s, 3207, BR, a0);
1799 break;
1800
4a136e0a 1801 case INDEX_op_br:
bec16311 1802 tcg_out_goto_label(s, arg_label(a0));
4a136e0a
CF
1803 break;
1804
4a136e0a 1805 case INDEX_op_ld8u_i32:
4a136e0a 1806 case INDEX_op_ld8u_i64:
14e4c1e2 1807 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
dc73dfd4
RH
1808 break;
1809 case INDEX_op_ld8s_i32:
14e4c1e2 1810 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
dc73dfd4 1811 break;
4a136e0a 1812 case INDEX_op_ld8s_i64:
14e4c1e2 1813 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
dc73dfd4
RH
1814 break;
1815 case INDEX_op_ld16u_i32:
4a136e0a 1816 case INDEX_op_ld16u_i64:
14e4c1e2 1817 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
dc73dfd4
RH
1818 break;
1819 case INDEX_op_ld16s_i32:
14e4c1e2 1820 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
dc73dfd4 1821 break;
4a136e0a 1822 case INDEX_op_ld16s_i64:
14e4c1e2 1823 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
dc73dfd4
RH
1824 break;
1825 case INDEX_op_ld_i32:
4a136e0a 1826 case INDEX_op_ld32u_i64:
14e4c1e2 1827 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
dc73dfd4 1828 break;
4a136e0a 1829 case INDEX_op_ld32s_i64:
14e4c1e2 1830 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
e81864a1 1831 break;
dc73dfd4 1832 case INDEX_op_ld_i64:
14e4c1e2 1833 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
dc73dfd4
RH
1834 break;
1835
4a136e0a
CF
1836 case INDEX_op_st8_i32:
1837 case INDEX_op_st8_i64:
14e4c1e2 1838 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
dc73dfd4 1839 break;
4a136e0a
CF
1840 case INDEX_op_st16_i32:
1841 case INDEX_op_st16_i64:
14e4c1e2 1842 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
dc73dfd4
RH
1843 break;
1844 case INDEX_op_st_i32:
4a136e0a 1845 case INDEX_op_st32_i64:
14e4c1e2 1846 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
dc73dfd4
RH
1847 break;
1848 case INDEX_op_st_i64:
14e4c1e2 1849 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
4a136e0a
CF
1850 break;
1851
4a136e0a 1852 case INDEX_op_add_i32:
90f1cd91
RH
1853 a2 = (int32_t)a2;
1854 /* FALLTHRU */
1855 case INDEX_op_add_i64:
1856 if (c2) {
1857 tcg_out_addsubi(s, ext, a0, a1, a2);
1858 } else {
1859 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1860 }
4a136e0a
CF
1861 break;
1862
4a136e0a 1863 case INDEX_op_sub_i32:
90f1cd91
RH
1864 a2 = (int32_t)a2;
1865 /* FALLTHRU */
1866 case INDEX_op_sub_i64:
1867 if (c2) {
1868 tcg_out_addsubi(s, ext, a0, a1, -a2);
1869 } else {
1870 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1871 }
4a136e0a
CF
1872 break;
1873
14b155dd
RH
1874 case INDEX_op_neg_i64:
1875 case INDEX_op_neg_i32:
1876 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1877 break;
1878
4a136e0a 1879 case INDEX_op_and_i32:
e029f293
RH
1880 a2 = (int32_t)a2;
1881 /* FALLTHRU */
1882 case INDEX_op_and_i64:
1883 if (c2) {
1884 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1885 } else {
1886 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1887 }
4a136e0a
CF
1888 break;
1889
14b155dd
RH
1890 case INDEX_op_andc_i32:
1891 a2 = (int32_t)a2;
1892 /* FALLTHRU */
1893 case INDEX_op_andc_i64:
1894 if (c2) {
1895 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
1896 } else {
1897 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
1898 }
1899 break;
1900
4a136e0a 1901 case INDEX_op_or_i32:
e029f293
RH
1902 a2 = (int32_t)a2;
1903 /* FALLTHRU */
1904 case INDEX_op_or_i64:
1905 if (c2) {
1906 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
1907 } else {
1908 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
1909 }
4a136e0a
CF
1910 break;
1911
14b155dd
RH
1912 case INDEX_op_orc_i32:
1913 a2 = (int32_t)a2;
1914 /* FALLTHRU */
1915 case INDEX_op_orc_i64:
1916 if (c2) {
1917 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
1918 } else {
1919 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
1920 }
1921 break;
1922
4a136e0a 1923 case INDEX_op_xor_i32:
e029f293
RH
1924 a2 = (int32_t)a2;
1925 /* FALLTHRU */
1926 case INDEX_op_xor_i64:
1927 if (c2) {
1928 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
1929 } else {
1930 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
1931 }
4a136e0a
CF
1932 break;
1933
14b155dd
RH
1934 case INDEX_op_eqv_i32:
1935 a2 = (int32_t)a2;
1936 /* FALLTHRU */
1937 case INDEX_op_eqv_i64:
1938 if (c2) {
1939 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
1940 } else {
1941 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
1942 }
1943 break;
1944
1945 case INDEX_op_not_i64:
1946 case INDEX_op_not_i32:
1947 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
1948 break;
1949
4a136e0a 1950 case INDEX_op_mul_i64:
4a136e0a 1951 case INDEX_op_mul_i32:
8678b71c
RH
1952 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
1953 break;
1954
1955 case INDEX_op_div_i64:
1956 case INDEX_op_div_i32:
1957 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
1958 break;
1959 case INDEX_op_divu_i64:
1960 case INDEX_op_divu_i32:
1961 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
1962 break;
1963
1964 case INDEX_op_rem_i64:
1965 case INDEX_op_rem_i32:
1966 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
1967 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1968 break;
1969 case INDEX_op_remu_i64:
1970 case INDEX_op_remu_i32:
1971 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
1972 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
4a136e0a
CF
1973 break;
1974
1975 case INDEX_op_shl_i64:
4a136e0a 1976 case INDEX_op_shl_i32:
df9351e3 1977 if (c2) {
8d8db193 1978 tcg_out_shl(s, ext, a0, a1, a2);
df9351e3
RH
1979 } else {
1980 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
4a136e0a
CF
1981 }
1982 break;
1983
1984 case INDEX_op_shr_i64:
4a136e0a 1985 case INDEX_op_shr_i32:
df9351e3 1986 if (c2) {
8d8db193 1987 tcg_out_shr(s, ext, a0, a1, a2);
df9351e3
RH
1988 } else {
1989 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
4a136e0a
CF
1990 }
1991 break;
1992
1993 case INDEX_op_sar_i64:
4a136e0a 1994 case INDEX_op_sar_i32:
df9351e3 1995 if (c2) {
8d8db193 1996 tcg_out_sar(s, ext, a0, a1, a2);
df9351e3
RH
1997 } else {
1998 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
4a136e0a
CF
1999 }
2000 break;
2001
2002 case INDEX_op_rotr_i64:
4a136e0a 2003 case INDEX_op_rotr_i32:
df9351e3 2004 if (c2) {
8d8db193 2005 tcg_out_rotr(s, ext, a0, a1, a2);
df9351e3
RH
2006 } else {
2007 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
4a136e0a
CF
2008 }
2009 break;
2010
2011 case INDEX_op_rotl_i64:
df9351e3
RH
2012 case INDEX_op_rotl_i32:
2013 if (c2) {
8d8db193 2014 tcg_out_rotl(s, ext, a0, a1, a2);
4a136e0a 2015 } else {
50573c66 2016 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
df9351e3 2017 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
4a136e0a
CF
2018 }
2019 break;
2020
53c76c19
RH
2021 case INDEX_op_clz_i64:
2022 case INDEX_op_clz_i32:
2023 tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2024 break;
2025 case INDEX_op_ctz_i64:
2026 case INDEX_op_ctz_i32:
2027 tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2028 break;
2029
8d8db193 2030 case INDEX_op_brcond_i32:
90f1cd91
RH
2031 a1 = (int32_t)a1;
2032 /* FALLTHRU */
2033 case INDEX_op_brcond_i64:
bec16311 2034 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
4a136e0a
CF
2035 break;
2036
4a136e0a 2037 case INDEX_op_setcond_i32:
90f1cd91
RH
2038 a2 = (int32_t)a2;
2039 /* FALLTHRU */
2040 case INDEX_op_setcond_i64:
2041 tcg_out_cmp(s, ext, a1, a2, c2);
ed7a0aa8
RH
2042 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */
2043 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2044 TCG_REG_XZR, tcg_invert_cond(args[3]));
4a136e0a
CF
2045 break;
2046
04ce397b
RH
2047 case INDEX_op_movcond_i32:
2048 a2 = (int32_t)a2;
2049 /* FALLTHRU */
2050 case INDEX_op_movcond_i64:
2051 tcg_out_cmp(s, ext, a1, a2, c2);
2052 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2053 break;
2054
de61d14f
RH
2055 case INDEX_op_qemu_ld_i32:
2056 case INDEX_op_qemu_ld_i64:
59227d5d 2057 tcg_out_qemu_ld(s, a0, a1, a2, ext);
4a136e0a 2058 break;
de61d14f
RH
2059 case INDEX_op_qemu_st_i32:
2060 case INDEX_op_qemu_st_i64:
59227d5d 2061 tcg_out_qemu_st(s, REG0(0), a1, a2);
4a136e0a
CF
2062 break;
2063
f0293414 2064 case INDEX_op_bswap64_i64:
edd8824c
RH
2065 tcg_out_rev64(s, a0, a1);
2066 break;
2067 case INDEX_op_bswap32_i64:
9c4a059d 2068 case INDEX_op_bswap32_i32:
edd8824c 2069 tcg_out_rev32(s, a0, a1);
9c4a059d
CF
2070 break;
2071 case INDEX_op_bswap16_i64:
2072 case INDEX_op_bswap16_i32:
edd8824c 2073 tcg_out_rev16(s, a0, a1);
9c4a059d
CF
2074 break;
2075
31f1275b 2076 case INDEX_op_ext8s_i64:
31f1275b 2077 case INDEX_op_ext8s_i32:
929f8b55 2078 tcg_out_sxt(s, ext, MO_8, a0, a1);
31f1275b
CF
2079 break;
2080 case INDEX_op_ext16s_i64:
31f1275b 2081 case INDEX_op_ext16s_i32:
929f8b55 2082 tcg_out_sxt(s, ext, MO_16, a0, a1);
31f1275b 2083 break;
4f2331e5 2084 case INDEX_op_ext_i32_i64:
31f1275b 2085 case INDEX_op_ext32s_i64:
929f8b55 2086 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
31f1275b
CF
2087 break;
2088 case INDEX_op_ext8u_i64:
2089 case INDEX_op_ext8u_i32:
929f8b55 2090 tcg_out_uxt(s, MO_8, a0, a1);
31f1275b
CF
2091 break;
2092 case INDEX_op_ext16u_i64:
2093 case INDEX_op_ext16u_i32:
929f8b55 2094 tcg_out_uxt(s, MO_16, a0, a1);
31f1275b 2095 break;
4f2331e5 2096 case INDEX_op_extu_i32_i64:
31f1275b 2097 case INDEX_op_ext32u_i64:
929f8b55 2098 tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
31f1275b
CF
2099 break;
2100
b3c56df7
RH
2101 case INDEX_op_deposit_i64:
2102 case INDEX_op_deposit_i32:
2103 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2104 break;
2105
e2179f94
RH
2106 case INDEX_op_extract_i64:
2107 case INDEX_op_extract_i32:
2108 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2109 break;
2110
2111 case INDEX_op_sextract_i64:
2112 case INDEX_op_sextract_i32:
2113 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2114 break;
2115
464c2969
RH
2116 case INDEX_op_extract2_i64:
2117 case INDEX_op_extract2_i32:
2118 tcg_out_extr(s, ext, a0, a1, a2, args[3]);
2119 break;
2120
c6e929e7
RH
2121 case INDEX_op_add2_i32:
2122 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2123 (int32_t)args[4], args[5], const_args[4],
2124 const_args[5], false);
2125 break;
2126 case INDEX_op_add2_i64:
2127 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2128 args[5], const_args[4], const_args[5], false);
2129 break;
2130 case INDEX_op_sub2_i32:
2131 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2132 (int32_t)args[4], args[5], const_args[4],
2133 const_args[5], true);
2134 break;
2135 case INDEX_op_sub2_i64:
2136 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2137 args[5], const_args[4], const_args[5], true);
2138 break;
2139
1fcc9ddf
RH
2140 case INDEX_op_muluh_i64:
2141 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2142 break;
2143 case INDEX_op_mulsh_i64:
2144 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2145 break;
2146
c7a59c2a
PK
2147 case INDEX_op_mb:
2148 tcg_out_mb(s, a0);
2149 break;
2150
96d0ee7f 2151 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
a51a6b6a 2152 case INDEX_op_mov_i64:
96d0ee7f 2153 case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
a51a6b6a 2154 case INDEX_op_movi_i64:
96d0ee7f 2155 case INDEX_op_call: /* Always emitted via tcg_out_call. */
4a136e0a 2156 default:
14e4c1e2 2157 g_assert_not_reached();
4a136e0a 2158 }
04ce397b
RH
2159
2160#undef REG0
4a136e0a
CF
2161}
2162
14e4c1e2
RH
2163static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2164 unsigned vecl, unsigned vece,
2165 const TCGArg *args, const int *const_args)
2166{
2167 static const AArch64Insn cmp_insn[16] = {
2168 [TCG_COND_EQ] = I3616_CMEQ,
2169 [TCG_COND_GT] = I3616_CMGT,
2170 [TCG_COND_GE] = I3616_CMGE,
2171 [TCG_COND_GTU] = I3616_CMHI,
2172 [TCG_COND_GEU] = I3616_CMHS,
2173 };
2174 static const AArch64Insn cmp0_insn[16] = {
2175 [TCG_COND_EQ] = I3617_CMEQ0,
2176 [TCG_COND_GT] = I3617_CMGT0,
2177 [TCG_COND_GE] = I3617_CMGE0,
2178 [TCG_COND_LT] = I3617_CMLT0,
2179 [TCG_COND_LE] = I3617_CMLE0,
2180 };
2181
2182 TCGType type = vecl + TCG_TYPE_V64;
2183 unsigned is_q = vecl;
2184 TCGArg a0, a1, a2;
2185
2186 a0 = args[0];
2187 a1 = args[1];
2188 a2 = args[2];
2189
2190 switch (opc) {
2191 case INDEX_op_ld_vec:
2192 tcg_out_ld(s, type, a0, a1, a2);
2193 break;
2194 case INDEX_op_st_vec:
2195 tcg_out_st(s, type, a0, a1, a2);
2196 break;
37ee55a0
RH
2197 case INDEX_op_dupm_vec:
2198 tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2199 break;
14e4c1e2
RH
2200 case INDEX_op_add_vec:
2201 tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2202 break;
2203 case INDEX_op_sub_vec:
2204 tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2205 break;
2206 case INDEX_op_mul_vec:
2207 tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2208 break;
2209 case INDEX_op_neg_vec:
2210 tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2211 break;
a456394a
RH
2212 case INDEX_op_abs_vec:
2213 tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2214 break;
14e4c1e2
RH
2215 case INDEX_op_and_vec:
2216 tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2217 break;
2218 case INDEX_op_or_vec:
2219 tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2220 break;
2221 case INDEX_op_xor_vec:
2222 tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2223 break;
2224 case INDEX_op_andc_vec:
2225 tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2226 break;
2227 case INDEX_op_orc_vec:
2228 tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2229 break;
d32648d4
RH
2230 case INDEX_op_ssadd_vec:
2231 tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2232 break;
2233 case INDEX_op_sssub_vec:
2234 tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2235 break;
2236 case INDEX_op_usadd_vec:
2237 tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2238 break;
2239 case INDEX_op_ussub_vec:
2240 tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2241 break;
93f332a5
RH
2242 case INDEX_op_smax_vec:
2243 tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2244 break;
2245 case INDEX_op_smin_vec:
2246 tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2247 break;
2248 case INDEX_op_umax_vec:
2249 tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2250 break;
2251 case INDEX_op_umin_vec:
2252 tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2253 break;
14e4c1e2
RH
2254 case INDEX_op_not_vec:
2255 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2256 break;
14e4c1e2
RH
2257 case INDEX_op_shli_vec:
2258 tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2259 break;
2260 case INDEX_op_shri_vec:
2261 tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2262 break;
2263 case INDEX_op_sari_vec:
2264 tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2265 break;
79525dfd
RH
2266 case INDEX_op_shlv_vec:
2267 tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2268 break;
2269 case INDEX_op_aa64_sshl_vec:
2270 tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2271 break;
14e4c1e2
RH
2272 case INDEX_op_cmp_vec:
2273 {
2274 TCGCond cond = args[3];
2275 AArch64Insn insn;
2276
2277 if (cond == TCG_COND_NE) {
2278 if (const_args[2]) {
2279 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2280 } else {
2281 tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2282 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2283 }
2284 } else {
2285 if (const_args[2]) {
2286 insn = cmp0_insn[cond];
2287 if (insn) {
2288 tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2289 break;
2290 }
2291 tcg_out_dupi_vec(s, type, TCG_VEC_TMP, 0);
2292 a2 = TCG_VEC_TMP;
2293 }
2294 insn = cmp_insn[cond];
2295 if (insn == 0) {
2296 TCGArg t;
2297 t = a1, a1 = a2, a2 = t;
2298 cond = tcg_swap_cond(cond);
2299 insn = cmp_insn[cond];
2300 tcg_debug_assert(insn != 0);
2301 }
2302 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2303 }
2304 }
2305 break;
bab1671f
RH
2306
2307 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
2308 case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi. */
2309 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
14e4c1e2
RH
2310 default:
2311 g_assert_not_reached();
2312 }
2313}
2314
2315int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2316{
2317 switch (opc) {
2318 case INDEX_op_add_vec:
2319 case INDEX_op_sub_vec:
14e4c1e2
RH
2320 case INDEX_op_and_vec:
2321 case INDEX_op_or_vec:
2322 case INDEX_op_xor_vec:
2323 case INDEX_op_andc_vec:
2324 case INDEX_op_orc_vec:
2325 case INDEX_op_neg_vec:
a456394a 2326 case INDEX_op_abs_vec:
14e4c1e2
RH
2327 case INDEX_op_not_vec:
2328 case INDEX_op_cmp_vec:
2329 case INDEX_op_shli_vec:
2330 case INDEX_op_shri_vec:
2331 case INDEX_op_sari_vec:
d32648d4
RH
2332 case INDEX_op_ssadd_vec:
2333 case INDEX_op_sssub_vec:
2334 case INDEX_op_usadd_vec:
2335 case INDEX_op_ussub_vec:
79525dfd 2336 case INDEX_op_shlv_vec:
14e4c1e2 2337 return 1;
79525dfd
RH
2338 case INDEX_op_shrv_vec:
2339 case INDEX_op_sarv_vec:
2340 return -1;
e65a5f22 2341 case INDEX_op_mul_vec:
a7b6d286
RH
2342 case INDEX_op_smax_vec:
2343 case INDEX_op_smin_vec:
2344 case INDEX_op_umax_vec:
2345 case INDEX_op_umin_vec:
e65a5f22 2346 return vece < MO_64;
14e4c1e2
RH
2347
2348 default:
2349 return 0;
2350 }
2351}
2352
2353void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2354 TCGArg a0, ...)
2355{
79525dfd
RH
2356 va_list va;
2357 TCGv_vec v0, v1, v2, t1;
2358
2359 va_start(va, a0);
2360 v0 = temp_tcgv_vec(arg_temp(a0));
2361 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2362 v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2363
2364 switch (opc) {
2365 case INDEX_op_shrv_vec:
2366 case INDEX_op_sarv_vec:
2367 /* Right shifts are negative left shifts for AArch64. */
2368 t1 = tcg_temp_new_vec(type);
2369 tcg_gen_neg_vec(vece, t1, v2);
2370 opc = (opc == INDEX_op_shrv_vec
2371 ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2372 vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2373 tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2374 tcg_temp_free_vec(t1);
2375 break;
2376
2377 default:
2378 g_assert_not_reached();
2379 }
2380
2381 va_end(va);
14e4c1e2
RH
2382}
2383
f69d277e
RH
2384static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
2385{
1897cc2e
RH
2386 static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
2387 static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
14e4c1e2
RH
2388 static const TCGTargetOpDef w_w = { .args_ct_str = { "w", "w" } };
2389 static const TCGTargetOpDef w_r = { .args_ct_str = { "w", "r" } };
2390 static const TCGTargetOpDef w_wr = { .args_ct_str = { "w", "wr" } };
1897cc2e
RH
2391 static const TCGTargetOpDef r_l = { .args_ct_str = { "r", "l" } };
2392 static const TCGTargetOpDef r_rA = { .args_ct_str = { "r", "rA" } };
2393 static const TCGTargetOpDef rZ_r = { .args_ct_str = { "rZ", "r" } };
2394 static const TCGTargetOpDef lZ_l = { .args_ct_str = { "lZ", "l" } };
2395 static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
14e4c1e2
RH
2396 static const TCGTargetOpDef w_w_w = { .args_ct_str = { "w", "w", "w" } };
2397 static const TCGTargetOpDef w_w_wZ = { .args_ct_str = { "w", "w", "wZ" } };
1897cc2e
RH
2398 static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
2399 static const TCGTargetOpDef r_r_rA = { .args_ct_str = { "r", "r", "rA" } };
2400 static const TCGTargetOpDef r_r_rL = { .args_ct_str = { "r", "r", "rL" } };
2401 static const TCGTargetOpDef r_r_rAL
2402 = { .args_ct_str = { "r", "r", "rAL" } };
2403 static const TCGTargetOpDef dep
2404 = { .args_ct_str = { "r", "0", "rZ" } };
464c2969
RH
2405 static const TCGTargetOpDef ext2
2406 = { .args_ct_str = { "r", "rZ", "rZ" } };
1897cc2e
RH
2407 static const TCGTargetOpDef movc
2408 = { .args_ct_str = { "r", "r", "rA", "rZ", "rZ" } };
2409 static const TCGTargetOpDef add2
2410 = { .args_ct_str = { "r", "r", "rZ", "rZ", "rA", "rMZ" } };
2411
2412 switch (op) {
2413 case INDEX_op_goto_ptr:
2414 return &r;
f69d277e 2415
1897cc2e
RH
2416 case INDEX_op_ld8u_i32:
2417 case INDEX_op_ld8s_i32:
2418 case INDEX_op_ld16u_i32:
2419 case INDEX_op_ld16s_i32:
2420 case INDEX_op_ld_i32:
2421 case INDEX_op_ld8u_i64:
2422 case INDEX_op_ld8s_i64:
2423 case INDEX_op_ld16u_i64:
2424 case INDEX_op_ld16s_i64:
2425 case INDEX_op_ld32u_i64:
2426 case INDEX_op_ld32s_i64:
2427 case INDEX_op_ld_i64:
2428 case INDEX_op_neg_i32:
2429 case INDEX_op_neg_i64:
2430 case INDEX_op_not_i32:
2431 case INDEX_op_not_i64:
2432 case INDEX_op_bswap16_i32:
2433 case INDEX_op_bswap32_i32:
2434 case INDEX_op_bswap16_i64:
2435 case INDEX_op_bswap32_i64:
2436 case INDEX_op_bswap64_i64:
2437 case INDEX_op_ext8s_i32:
2438 case INDEX_op_ext16s_i32:
2439 case INDEX_op_ext8u_i32:
2440 case INDEX_op_ext16u_i32:
2441 case INDEX_op_ext8s_i64:
2442 case INDEX_op_ext16s_i64:
2443 case INDEX_op_ext32s_i64:
2444 case INDEX_op_ext8u_i64:
2445 case INDEX_op_ext16u_i64:
2446 case INDEX_op_ext32u_i64:
2447 case INDEX_op_ext_i32_i64:
2448 case INDEX_op_extu_i32_i64:
2449 case INDEX_op_extract_i32:
2450 case INDEX_op_extract_i64:
2451 case INDEX_op_sextract_i32:
2452 case INDEX_op_sextract_i64:
2453 return &r_r;
2454
2455 case INDEX_op_st8_i32:
2456 case INDEX_op_st16_i32:
2457 case INDEX_op_st_i32:
2458 case INDEX_op_st8_i64:
2459 case INDEX_op_st16_i64:
2460 case INDEX_op_st32_i64:
2461 case INDEX_op_st_i64:
2462 return &rZ_r;
2463
2464 case INDEX_op_add_i32:
2465 case INDEX_op_add_i64:
2466 case INDEX_op_sub_i32:
2467 case INDEX_op_sub_i64:
2468 case INDEX_op_setcond_i32:
2469 case INDEX_op_setcond_i64:
2470 return &r_r_rA;
2471
2472 case INDEX_op_mul_i32:
2473 case INDEX_op_mul_i64:
2474 case INDEX_op_div_i32:
2475 case INDEX_op_div_i64:
2476 case INDEX_op_divu_i32:
2477 case INDEX_op_divu_i64:
2478 case INDEX_op_rem_i32:
2479 case INDEX_op_rem_i64:
2480 case INDEX_op_remu_i32:
2481 case INDEX_op_remu_i64:
2482 case INDEX_op_muluh_i64:
2483 case INDEX_op_mulsh_i64:
2484 return &r_r_r;
2485
2486 case INDEX_op_and_i32:
2487 case INDEX_op_and_i64:
2488 case INDEX_op_or_i32:
2489 case INDEX_op_or_i64:
2490 case INDEX_op_xor_i32:
2491 case INDEX_op_xor_i64:
2492 case INDEX_op_andc_i32:
2493 case INDEX_op_andc_i64:
2494 case INDEX_op_orc_i32:
2495 case INDEX_op_orc_i64:
2496 case INDEX_op_eqv_i32:
2497 case INDEX_op_eqv_i64:
2498 return &r_r_rL;
2499
2500 case INDEX_op_shl_i32:
2501 case INDEX_op_shr_i32:
2502 case INDEX_op_sar_i32:
2503 case INDEX_op_rotl_i32:
2504 case INDEX_op_rotr_i32:
2505 case INDEX_op_shl_i64:
2506 case INDEX_op_shr_i64:
2507 case INDEX_op_sar_i64:
2508 case INDEX_op_rotl_i64:
2509 case INDEX_op_rotr_i64:
2510 return &r_r_ri;
2511
2512 case INDEX_op_clz_i32:
2513 case INDEX_op_ctz_i32:
2514 case INDEX_op_clz_i64:
2515 case INDEX_op_ctz_i64:
2516 return &r_r_rAL;
2517
2518 case INDEX_op_brcond_i32:
2519 case INDEX_op_brcond_i64:
2520 return &r_rA;
2521
2522 case INDEX_op_movcond_i32:
2523 case INDEX_op_movcond_i64:
2524 return &movc;
2525
2526 case INDEX_op_qemu_ld_i32:
2527 case INDEX_op_qemu_ld_i64:
2528 return &r_l;
2529 case INDEX_op_qemu_st_i32:
2530 case INDEX_op_qemu_st_i64:
2531 return &lZ_l;
2532
2533 case INDEX_op_deposit_i32:
2534 case INDEX_op_deposit_i64:
2535 return &dep;
2536
464c2969
RH
2537 case INDEX_op_extract2_i32:
2538 case INDEX_op_extract2_i64:
2539 return &ext2;
2540
1897cc2e
RH
2541 case INDEX_op_add2_i32:
2542 case INDEX_op_add2_i64:
2543 case INDEX_op_sub2_i32:
2544 case INDEX_op_sub2_i64:
2545 return &add2;
2546
14e4c1e2
RH
2547 case INDEX_op_add_vec:
2548 case INDEX_op_sub_vec:
2549 case INDEX_op_mul_vec:
2550 case INDEX_op_and_vec:
2551 case INDEX_op_or_vec:
2552 case INDEX_op_xor_vec:
2553 case INDEX_op_andc_vec:
2554 case INDEX_op_orc_vec:
d32648d4
RH
2555 case INDEX_op_ssadd_vec:
2556 case INDEX_op_sssub_vec:
2557 case INDEX_op_usadd_vec:
2558 case INDEX_op_ussub_vec:
93f332a5
RH
2559 case INDEX_op_smax_vec:
2560 case INDEX_op_smin_vec:
2561 case INDEX_op_umax_vec:
2562 case INDEX_op_umin_vec:
79525dfd
RH
2563 case INDEX_op_shlv_vec:
2564 case INDEX_op_shrv_vec:
2565 case INDEX_op_sarv_vec:
2566 case INDEX_op_aa64_sshl_vec:
14e4c1e2
RH
2567 return &w_w_w;
2568 case INDEX_op_not_vec:
2569 case INDEX_op_neg_vec:
a456394a 2570 case INDEX_op_abs_vec:
14e4c1e2
RH
2571 case INDEX_op_shli_vec:
2572 case INDEX_op_shri_vec:
2573 case INDEX_op_sari_vec:
2574 return &w_w;
2575 case INDEX_op_ld_vec:
2576 case INDEX_op_st_vec:
37ee55a0 2577 case INDEX_op_dupm_vec:
14e4c1e2
RH
2578 return &w_r;
2579 case INDEX_op_dup_vec:
2580 return &w_wr;
2581 case INDEX_op_cmp_vec:
2582 return &w_w_wZ;
2583
1897cc2e
RH
2584 default:
2585 return NULL;
f69d277e 2586 }
f69d277e
RH
2587}
2588
4a136e0a
CF
2589static void tcg_target_init(TCGContext *s)
2590{
f46934df
RH
2591 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2592 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
14e4c1e2
RH
2593 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2594 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
f46934df 2595
14e4c1e2 2596 tcg_target_call_clobber_regs = -1ull;
f46934df
RH
2597 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2598 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2599 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2600 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2601 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2602 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2603 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2604 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2605 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2606 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2607 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
14e4c1e2
RH
2608 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2609 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2610 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2611 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2612 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2613 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2614 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2615 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
4a136e0a 2616
ccb1bb66 2617 s->reserved_regs = 0;
4a136e0a
CF
2618 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2619 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2620 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2621 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
14e4c1e2 2622 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
4a136e0a
CF
2623}
2624
38d195aa
RH
2625/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
2626#define PUSH_SIZE ((30 - 19 + 1) * 8)
2627
2628#define FRAME_SIZE \
2629 ((PUSH_SIZE \
2630 + TCG_STATIC_CALL_ARGS_SIZE \
2631 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2632 + TCG_TARGET_STACK_ALIGN - 1) \
2633 & ~(TCG_TARGET_STACK_ALIGN - 1))
2634
2635/* We're expecting a 2 byte uleb128 encoded value. */
2636QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2637
2638/* We're expecting to use a single ADDI insn. */
2639QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
2640
4a136e0a
CF
2641static void tcg_target_qemu_prologue(TCGContext *s)
2642{
4a136e0a
CF
2643 TCGReg r;
2644
95f72aa9
RH
2645 /* Push (FP, LR) and allocate space for all saved registers. */
2646 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
38d195aa 2647 TCG_REG_SP, -PUSH_SIZE, 1, 1);
4a136e0a 2648
d82b78e4 2649 /* Set up frame pointer for canonical unwinding. */
929f8b55 2650 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
4a136e0a 2651
d82b78e4 2652 /* Store callee-preserved regs x19..x28. */
4a136e0a 2653 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
95f72aa9
RH
2654 int ofs = (r - TCG_REG_X19 + 2) * 8;
2655 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
4a136e0a
CF
2656 }
2657
096c46c0
RH
2658 /* Make stack space for TCG locals. */
2659 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
38d195aa 2660 FRAME_SIZE - PUSH_SIZE);
096c46c0 2661
95f72aa9 2662 /* Inform TCG about how to find TCG locals with register, offset, size. */
4a136e0a
CF
2663 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
2664 CPU_TEMP_BUF_NLONGS * sizeof(long));
2665
4cbea598 2666#if !defined(CONFIG_SOFTMMU)
352bcb0a 2667 if (USE_GUEST_BASE) {
b76f21a7 2668 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
6a91c7c9
JK
2669 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
2670 }
2671#endif
2672
4a136e0a 2673 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
81d8a5ee 2674 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
4a136e0a 2675
b19f0c2e
RH
2676 /*
2677 * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2678 * and fall through to the rest of the epilogue.
2679 */
2680 s->code_gen_epilogue = s->code_ptr;
2681 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
2682
2683 /* TB epilogue */
4a136e0a
CF
2684 tb_ret_addr = s->code_ptr;
2685
096c46c0
RH
2686 /* Remove TCG locals stack space. */
2687 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
38d195aa 2688 FRAME_SIZE - PUSH_SIZE);
4a136e0a 2689
95f72aa9 2690 /* Restore registers x19..x28. */
4a136e0a 2691 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
95f72aa9
RH
2692 int ofs = (r - TCG_REG_X19 + 2) * 8;
2693 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
4a136e0a
CF
2694 }
2695
95f72aa9
RH
2696 /* Pop (FP, LR), restore SP to previous frame. */
2697 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
38d195aa 2698 TCG_REG_SP, PUSH_SIZE, 0, 1);
81d8a5ee 2699 tcg_out_insn(s, 3207, RET, TCG_REG_LR);
4a136e0a 2700}
38d195aa 2701
55129955
RH
2702static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2703{
2704 int i;
2705 for (i = 0; i < count; ++i) {
2706 p[i] = NOP;
2707 }
2708}
2709
38d195aa 2710typedef struct {
3d9bddb3 2711 DebugFrameHeader h;
38d195aa
RH
2712 uint8_t fde_def_cfa[4];
2713 uint8_t fde_reg_ofs[24];
2714} DebugFrame;
2715
2716#define ELF_HOST_MACHINE EM_AARCH64
2717
3d9bddb3
RH
2718static const DebugFrame debug_frame = {
2719 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2720 .h.cie.id = -1,
2721 .h.cie.version = 1,
2722 .h.cie.code_align = 1,
2723 .h.cie.data_align = 0x78, /* sleb128 -8 */
2724 .h.cie.return_column = TCG_REG_LR,
38d195aa
RH
2725
2726 /* Total FDE size does not include the "len" member. */
3d9bddb3 2727 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
38d195aa
RH
2728
2729 .fde_def_cfa = {
2730 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */
2731 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2732 (FRAME_SIZE >> 7)
2733 },
2734 .fde_reg_ofs = {
2735 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */
2736 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */
2737 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */
2738 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */
2739 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */
2740 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */
2741 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */
2742 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */
2743 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */
2744 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */
2745 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */
2746 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */
2747 }
2748};
2749
2750void tcg_register_jit(void *buf, size_t buf_size)
2751{
38d195aa
RH
2752 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2753}