]> git.proxmox.com Git - mirror_qemu.git/blob - tcg/aarch64/tcg-target.c.inc
tcg: Constify TCGLabelQemuLdst.raddr
[mirror_qemu.git] / tcg / aarch64 / tcg-target.c.inc
1 /*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13 #include "../tcg-pool.c.inc"
14 #include "qemu/bitops.h"
15
16 /* We're going to re-use TCGType in setting of the SF bit, which controls
17 the size of the operation performed. If we know the values match, it
18 makes things much cleaner. */
19 QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
20
21 #ifdef CONFIG_DEBUG_TCG
22 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
23 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
24 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
25 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
26 "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
27
28 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
29 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
30 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
31 "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
32 };
33 #endif /* CONFIG_DEBUG_TCG */
34
35 static const int tcg_target_reg_alloc_order[] = {
36 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
37 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
38 TCG_REG_X28, /* we will reserve this for guest_base if configured */
39
40 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
41 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
42 TCG_REG_X16, TCG_REG_X17,
43
44 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
45 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
46
47 /* X18 reserved by system */
48 /* X19 reserved for AREG0 */
49 /* X29 reserved as fp */
50 /* X30 reserved as temporary */
51
52 TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
53 TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
54 /* V8 - V15 are call-saved, and skipped. */
55 TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
56 TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
57 TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
58 TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
59 };
60
61 static const int tcg_target_call_iarg_regs[8] = {
62 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
63 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
64 };
65 static const int tcg_target_call_oarg_regs[1] = {
66 TCG_REG_X0
67 };
68
69 #define TCG_REG_TMP TCG_REG_X30
70 #define TCG_VEC_TMP TCG_REG_V31
71
72 #ifndef CONFIG_SOFTMMU
73 /* Note that XZR cannot be encoded in the address base register slot,
74 as that actaully encodes SP. So if we need to zero-extend the guest
75 address, via the address index register slot, we need to load even
76 a zero guest base into a register. */
77 #define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32)
78 #define TCG_REG_GUEST_BASE TCG_REG_X28
79 #endif
80
81 static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
82 {
83 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
84 ptrdiff_t offset = target - src_rx;
85
86 if (offset == sextract64(offset, 0, 26)) {
87 /* read instruction, mask away previous PC_REL26 parameter contents,
88 set the proper offset, then write back the instruction. */
89 *src_rw = deposit32(*src_rw, 0, 26, offset);
90 return true;
91 }
92 return false;
93 }
94
95 static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
96 {
97 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
98 ptrdiff_t offset = target - src_rx;
99
100 if (offset == sextract64(offset, 0, 19)) {
101 *src_rw = deposit32(*src_rw, 5, 19, offset);
102 return true;
103 }
104 return false;
105 }
106
107 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
108 intptr_t value, intptr_t addend)
109 {
110 tcg_debug_assert(addend == 0);
111 switch (type) {
112 case R_AARCH64_JUMP26:
113 case R_AARCH64_CALL26:
114 return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
115 case R_AARCH64_CONDBR19:
116 return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
117 default:
118 g_assert_not_reached();
119 }
120 }
121
122 #define TCG_CT_CONST_AIMM 0x100
123 #define TCG_CT_CONST_LIMM 0x200
124 #define TCG_CT_CONST_ZERO 0x400
125 #define TCG_CT_CONST_MONE 0x800
126 #define TCG_CT_CONST_ORRI 0x1000
127 #define TCG_CT_CONST_ANDI 0x2000
128
129 /* parse target specific constraints */
130 static const char *target_parse_constraint(TCGArgConstraint *ct,
131 const char *ct_str, TCGType type)
132 {
133 switch (*ct_str++) {
134 case 'r': /* general registers */
135 ct->regs |= 0xffffffffu;
136 break;
137 case 'w': /* advsimd registers */
138 ct->regs |= 0xffffffff00000000ull;
139 break;
140 case 'l': /* qemu_ld / qemu_st address, data_reg */
141 ct->regs = 0xffffffffu;
142 #ifdef CONFIG_SOFTMMU
143 /* x0 and x1 will be overwritten when reading the tlb entry,
144 and x2, and x3 for helper args, better to avoid using them. */
145 tcg_regset_reset_reg(ct->regs, TCG_REG_X0);
146 tcg_regset_reset_reg(ct->regs, TCG_REG_X1);
147 tcg_regset_reset_reg(ct->regs, TCG_REG_X2);
148 tcg_regset_reset_reg(ct->regs, TCG_REG_X3);
149 #endif
150 break;
151 case 'A': /* Valid for arithmetic immediate (positive or negative). */
152 ct->ct |= TCG_CT_CONST_AIMM;
153 break;
154 case 'L': /* Valid for logical immediate. */
155 ct->ct |= TCG_CT_CONST_LIMM;
156 break;
157 case 'M': /* minus one */
158 ct->ct |= TCG_CT_CONST_MONE;
159 break;
160 case 'O': /* vector orr/bic immediate */
161 ct->ct |= TCG_CT_CONST_ORRI;
162 break;
163 case 'N': /* vector orr/bic immediate, inverted */
164 ct->ct |= TCG_CT_CONST_ANDI;
165 break;
166 case 'Z': /* zero */
167 ct->ct |= TCG_CT_CONST_ZERO;
168 break;
169 default:
170 return NULL;
171 }
172 return ct_str;
173 }
174
175 /* Match a constant valid for addition (12-bit, optionally shifted). */
176 static inline bool is_aimm(uint64_t val)
177 {
178 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
179 }
180
181 /* Match a constant valid for logical operations. */
182 static inline bool is_limm(uint64_t val)
183 {
184 /* Taking a simplified view of the logical immediates for now, ignoring
185 the replication that can happen across the field. Match bit patterns
186 of the forms
187 0....01....1
188 0..01..10..0
189 and their inverses. */
190
191 /* Make things easier below, by testing the form with msb clear. */
192 if ((int64_t)val < 0) {
193 val = ~val;
194 }
195 if (val == 0) {
196 return false;
197 }
198 val += val & -val;
199 return (val & (val - 1)) == 0;
200 }
201
202 /* Return true if v16 is a valid 16-bit shifted immediate. */
203 static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
204 {
205 if (v16 == (v16 & 0xff)) {
206 *cmode = 0x8;
207 *imm8 = v16 & 0xff;
208 return true;
209 } else if (v16 == (v16 & 0xff00)) {
210 *cmode = 0xa;
211 *imm8 = v16 >> 8;
212 return true;
213 }
214 return false;
215 }
216
217 /* Return true if v32 is a valid 32-bit shifted immediate. */
218 static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
219 {
220 if (v32 == (v32 & 0xff)) {
221 *cmode = 0x0;
222 *imm8 = v32 & 0xff;
223 return true;
224 } else if (v32 == (v32 & 0xff00)) {
225 *cmode = 0x2;
226 *imm8 = (v32 >> 8) & 0xff;
227 return true;
228 } else if (v32 == (v32 & 0xff0000)) {
229 *cmode = 0x4;
230 *imm8 = (v32 >> 16) & 0xff;
231 return true;
232 } else if (v32 == (v32 & 0xff000000)) {
233 *cmode = 0x6;
234 *imm8 = v32 >> 24;
235 return true;
236 }
237 return false;
238 }
239
240 /* Return true if v32 is a valid 32-bit shifting ones immediate. */
241 static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
242 {
243 if ((v32 & 0xffff00ff) == 0xff) {
244 *cmode = 0xc;
245 *imm8 = (v32 >> 8) & 0xff;
246 return true;
247 } else if ((v32 & 0xff00ffff) == 0xffff) {
248 *cmode = 0xd;
249 *imm8 = (v32 >> 16) & 0xff;
250 return true;
251 }
252 return false;
253 }
254
255 /* Return true if v32 is a valid float32 immediate. */
256 static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
257 {
258 if (extract32(v32, 0, 19) == 0
259 && (extract32(v32, 25, 6) == 0x20
260 || extract32(v32, 25, 6) == 0x1f)) {
261 *cmode = 0xf;
262 *imm8 = (extract32(v32, 31, 1) << 7)
263 | (extract32(v32, 25, 1) << 6)
264 | extract32(v32, 19, 6);
265 return true;
266 }
267 return false;
268 }
269
270 /* Return true if v64 is a valid float64 immediate. */
271 static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
272 {
273 if (extract64(v64, 0, 48) == 0
274 && (extract64(v64, 54, 9) == 0x100
275 || extract64(v64, 54, 9) == 0x0ff)) {
276 *cmode = 0xf;
277 *imm8 = (extract64(v64, 63, 1) << 7)
278 | (extract64(v64, 54, 1) << 6)
279 | extract64(v64, 48, 6);
280 return true;
281 }
282 return false;
283 }
284
285 /*
286 * Return non-zero if v32 can be formed by MOVI+ORR.
287 * Place the parameters for MOVI in (cmode, imm8).
288 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
289 */
290 static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
291 {
292 int i;
293
294 for (i = 6; i > 0; i -= 2) {
295 /* Mask out one byte we can add with ORR. */
296 uint32_t tmp = v32 & ~(0xffu << (i * 4));
297 if (is_shimm32(tmp, cmode, imm8) ||
298 is_soimm32(tmp, cmode, imm8)) {
299 break;
300 }
301 }
302 return i;
303 }
304
305 /* Return true if V is a valid 16-bit or 32-bit shifted immediate. */
306 static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
307 {
308 if (v32 == deposit32(v32, 16, 16, v32)) {
309 return is_shimm16(v32, cmode, imm8);
310 } else {
311 return is_shimm32(v32, cmode, imm8);
312 }
313 }
314
315 static int tcg_target_const_match(tcg_target_long val, TCGType type,
316 const TCGArgConstraint *arg_ct)
317 {
318 int ct = arg_ct->ct;
319
320 if (ct & TCG_CT_CONST) {
321 return 1;
322 }
323 if (type == TCG_TYPE_I32) {
324 val = (int32_t)val;
325 }
326 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
327 return 1;
328 }
329 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
330 return 1;
331 }
332 if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
333 return 1;
334 }
335 if ((ct & TCG_CT_CONST_MONE) && val == -1) {
336 return 1;
337 }
338
339 switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
340 case 0:
341 break;
342 case TCG_CT_CONST_ANDI:
343 val = ~val;
344 /* fallthru */
345 case TCG_CT_CONST_ORRI:
346 if (val == deposit64(val, 32, 32, val)) {
347 int cmode, imm8;
348 return is_shimm1632(val, &cmode, &imm8);
349 }
350 break;
351 default:
352 /* Both bits should not be set for the same insn. */
353 g_assert_not_reached();
354 }
355
356 return 0;
357 }
358
359 enum aarch64_cond_code {
360 COND_EQ = 0x0,
361 COND_NE = 0x1,
362 COND_CS = 0x2, /* Unsigned greater or equal */
363 COND_HS = COND_CS, /* ALIAS greater or equal */
364 COND_CC = 0x3, /* Unsigned less than */
365 COND_LO = COND_CC, /* ALIAS Lower */
366 COND_MI = 0x4, /* Negative */
367 COND_PL = 0x5, /* Zero or greater */
368 COND_VS = 0x6, /* Overflow */
369 COND_VC = 0x7, /* No overflow */
370 COND_HI = 0x8, /* Unsigned greater than */
371 COND_LS = 0x9, /* Unsigned less or equal */
372 COND_GE = 0xa,
373 COND_LT = 0xb,
374 COND_GT = 0xc,
375 COND_LE = 0xd,
376 COND_AL = 0xe,
377 COND_NV = 0xf, /* behaves like COND_AL here */
378 };
379
380 static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
381 [TCG_COND_EQ] = COND_EQ,
382 [TCG_COND_NE] = COND_NE,
383 [TCG_COND_LT] = COND_LT,
384 [TCG_COND_GE] = COND_GE,
385 [TCG_COND_LE] = COND_LE,
386 [TCG_COND_GT] = COND_GT,
387 /* unsigned */
388 [TCG_COND_LTU] = COND_LO,
389 [TCG_COND_GTU] = COND_HI,
390 [TCG_COND_GEU] = COND_HS,
391 [TCG_COND_LEU] = COND_LS,
392 };
393
394 typedef enum {
395 LDST_ST = 0, /* store */
396 LDST_LD = 1, /* load */
397 LDST_LD_S_X = 2, /* load and sign-extend into Xt */
398 LDST_LD_S_W = 3, /* load and sign-extend into Wt */
399 } AArch64LdstType;
400
401 /* We encode the format of the insn into the beginning of the name, so that
402 we can have the preprocessor help "typecheck" the insn vs the output
403 function. Arm didn't provide us with nice names for the formats, so we
404 use the section number of the architecture reference manual in which the
405 instruction group is described. */
406 typedef enum {
407 /* Compare and branch (immediate). */
408 I3201_CBZ = 0x34000000,
409 I3201_CBNZ = 0x35000000,
410
411 /* Conditional branch (immediate). */
412 I3202_B_C = 0x54000000,
413
414 /* Unconditional branch (immediate). */
415 I3206_B = 0x14000000,
416 I3206_BL = 0x94000000,
417
418 /* Unconditional branch (register). */
419 I3207_BR = 0xd61f0000,
420 I3207_BLR = 0xd63f0000,
421 I3207_RET = 0xd65f0000,
422
423 /* AdvSIMD load/store single structure. */
424 I3303_LD1R = 0x0d40c000,
425
426 /* Load literal for loading the address at pc-relative offset */
427 I3305_LDR = 0x58000000,
428 I3305_LDR_v64 = 0x5c000000,
429 I3305_LDR_v128 = 0x9c000000,
430
431 /* Load/store register. Described here as 3.3.12, but the helper
432 that emits them can transform to 3.3.10 or 3.3.13. */
433 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
434 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
435 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
436 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
437
438 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
439 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
440 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
441 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
442
443 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
444 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
445
446 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
447 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
448 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
449
450 I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
451 I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
452
453 I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
454 I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
455
456 I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30,
457 I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30,
458
459 I3312_TO_I3310 = 0x00200800,
460 I3312_TO_I3313 = 0x01000000,
461
462 /* Load/store register pair instructions. */
463 I3314_LDP = 0x28400000,
464 I3314_STP = 0x28000000,
465
466 /* Add/subtract immediate instructions. */
467 I3401_ADDI = 0x11000000,
468 I3401_ADDSI = 0x31000000,
469 I3401_SUBI = 0x51000000,
470 I3401_SUBSI = 0x71000000,
471
472 /* Bitfield instructions. */
473 I3402_BFM = 0x33000000,
474 I3402_SBFM = 0x13000000,
475 I3402_UBFM = 0x53000000,
476
477 /* Extract instruction. */
478 I3403_EXTR = 0x13800000,
479
480 /* Logical immediate instructions. */
481 I3404_ANDI = 0x12000000,
482 I3404_ORRI = 0x32000000,
483 I3404_EORI = 0x52000000,
484
485 /* Move wide immediate instructions. */
486 I3405_MOVN = 0x12800000,
487 I3405_MOVZ = 0x52800000,
488 I3405_MOVK = 0x72800000,
489
490 /* PC relative addressing instructions. */
491 I3406_ADR = 0x10000000,
492 I3406_ADRP = 0x90000000,
493
494 /* Add/subtract shifted register instructions (without a shift). */
495 I3502_ADD = 0x0b000000,
496 I3502_ADDS = 0x2b000000,
497 I3502_SUB = 0x4b000000,
498 I3502_SUBS = 0x6b000000,
499
500 /* Add/subtract shifted register instructions (with a shift). */
501 I3502S_ADD_LSL = I3502_ADD,
502
503 /* Add/subtract with carry instructions. */
504 I3503_ADC = 0x1a000000,
505 I3503_SBC = 0x5a000000,
506
507 /* Conditional select instructions. */
508 I3506_CSEL = 0x1a800000,
509 I3506_CSINC = 0x1a800400,
510 I3506_CSINV = 0x5a800000,
511 I3506_CSNEG = 0x5a800400,
512
513 /* Data-processing (1 source) instructions. */
514 I3507_CLZ = 0x5ac01000,
515 I3507_RBIT = 0x5ac00000,
516 I3507_REV16 = 0x5ac00400,
517 I3507_REV32 = 0x5ac00800,
518 I3507_REV64 = 0x5ac00c00,
519
520 /* Data-processing (2 source) instructions. */
521 I3508_LSLV = 0x1ac02000,
522 I3508_LSRV = 0x1ac02400,
523 I3508_ASRV = 0x1ac02800,
524 I3508_RORV = 0x1ac02c00,
525 I3508_SMULH = 0x9b407c00,
526 I3508_UMULH = 0x9bc07c00,
527 I3508_UDIV = 0x1ac00800,
528 I3508_SDIV = 0x1ac00c00,
529
530 /* Data-processing (3 source) instructions. */
531 I3509_MADD = 0x1b000000,
532 I3509_MSUB = 0x1b008000,
533
534 /* Logical shifted register instructions (without a shift). */
535 I3510_AND = 0x0a000000,
536 I3510_BIC = 0x0a200000,
537 I3510_ORR = 0x2a000000,
538 I3510_ORN = 0x2a200000,
539 I3510_EOR = 0x4a000000,
540 I3510_EON = 0x4a200000,
541 I3510_ANDS = 0x6a000000,
542
543 /* Logical shifted register instructions (with a shift). */
544 I3502S_AND_LSR = I3510_AND | (1 << 22),
545
546 /* AdvSIMD copy */
547 I3605_DUP = 0x0e000400,
548 I3605_INS = 0x4e001c00,
549 I3605_UMOV = 0x0e003c00,
550
551 /* AdvSIMD modified immediate */
552 I3606_MOVI = 0x0f000400,
553 I3606_MVNI = 0x2f000400,
554 I3606_BIC = 0x2f001400,
555 I3606_ORR = 0x0f001400,
556
557 /* AdvSIMD shift by immediate */
558 I3614_SSHR = 0x0f000400,
559 I3614_SSRA = 0x0f001400,
560 I3614_SHL = 0x0f005400,
561 I3614_SLI = 0x2f005400,
562 I3614_USHR = 0x2f000400,
563 I3614_USRA = 0x2f001400,
564
565 /* AdvSIMD three same. */
566 I3616_ADD = 0x0e208400,
567 I3616_AND = 0x0e201c00,
568 I3616_BIC = 0x0e601c00,
569 I3616_BIF = 0x2ee01c00,
570 I3616_BIT = 0x2ea01c00,
571 I3616_BSL = 0x2e601c00,
572 I3616_EOR = 0x2e201c00,
573 I3616_MUL = 0x0e209c00,
574 I3616_ORR = 0x0ea01c00,
575 I3616_ORN = 0x0ee01c00,
576 I3616_SUB = 0x2e208400,
577 I3616_CMGT = 0x0e203400,
578 I3616_CMGE = 0x0e203c00,
579 I3616_CMTST = 0x0e208c00,
580 I3616_CMHI = 0x2e203400,
581 I3616_CMHS = 0x2e203c00,
582 I3616_CMEQ = 0x2e208c00,
583 I3616_SMAX = 0x0e206400,
584 I3616_SMIN = 0x0e206c00,
585 I3616_SSHL = 0x0e204400,
586 I3616_SQADD = 0x0e200c00,
587 I3616_SQSUB = 0x0e202c00,
588 I3616_UMAX = 0x2e206400,
589 I3616_UMIN = 0x2e206c00,
590 I3616_UQADD = 0x2e200c00,
591 I3616_UQSUB = 0x2e202c00,
592 I3616_USHL = 0x2e204400,
593
594 /* AdvSIMD two-reg misc. */
595 I3617_CMGT0 = 0x0e208800,
596 I3617_CMEQ0 = 0x0e209800,
597 I3617_CMLT0 = 0x0e20a800,
598 I3617_CMGE0 = 0x2e208800,
599 I3617_CMLE0 = 0x2e20a800,
600 I3617_NOT = 0x2e205800,
601 I3617_ABS = 0x0e20b800,
602 I3617_NEG = 0x2e20b800,
603
604 /* System instructions. */
605 NOP = 0xd503201f,
606 DMB_ISH = 0xd50338bf,
607 DMB_LD = 0x00000100,
608 DMB_ST = 0x00000200,
609 } AArch64Insn;
610
611 static inline uint32_t tcg_in32(TCGContext *s)
612 {
613 uint32_t v = *(uint32_t *)s->code_ptr;
614 return v;
615 }
616
617 /* Emit an opcode with "type-checking" of the format. */
618 #define tcg_out_insn(S, FMT, OP, ...) \
619 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
620
621 static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
622 TCGReg rt, TCGReg rn, unsigned size)
623 {
624 tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
625 }
626
627 static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
628 int imm19, TCGReg rt)
629 {
630 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
631 }
632
633 static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
634 TCGReg rt, int imm19)
635 {
636 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
637 }
638
639 static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
640 TCGCond c, int imm19)
641 {
642 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
643 }
644
645 static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
646 {
647 tcg_out32(s, insn | (imm26 & 0x03ffffff));
648 }
649
650 static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
651 {
652 tcg_out32(s, insn | rn << 5);
653 }
654
655 static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
656 TCGReg r1, TCGReg r2, TCGReg rn,
657 tcg_target_long ofs, bool pre, bool w)
658 {
659 insn |= 1u << 31; /* ext */
660 insn |= pre << 24;
661 insn |= w << 23;
662
663 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
664 insn |= (ofs & (0x7f << 3)) << (15 - 3);
665
666 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
667 }
668
669 static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
670 TCGReg rd, TCGReg rn, uint64_t aimm)
671 {
672 if (aimm > 0xfff) {
673 tcg_debug_assert((aimm & 0xfff) == 0);
674 aimm >>= 12;
675 tcg_debug_assert(aimm <= 0xfff);
676 aimm |= 1 << 12; /* apply LSL 12 */
677 }
678 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
679 }
680
681 /* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
682 (Logical immediate). Both insn groups have N, IMMR and IMMS fields
683 that feed the DecodeBitMasks pseudo function. */
684 static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
685 TCGReg rd, TCGReg rn, int n, int immr, int imms)
686 {
687 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
688 | rn << 5 | rd);
689 }
690
691 #define tcg_out_insn_3404 tcg_out_insn_3402
692
693 static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
694 TCGReg rd, TCGReg rn, TCGReg rm, int imms)
695 {
696 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
697 | rn << 5 | rd);
698 }
699
700 /* This function is used for the Move (wide immediate) instruction group.
701 Note that SHIFT is a full shift count, not the 2 bit HW field. */
702 static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
703 TCGReg rd, uint16_t half, unsigned shift)
704 {
705 tcg_debug_assert((shift & ~0x30) == 0);
706 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
707 }
708
709 static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
710 TCGReg rd, int64_t disp)
711 {
712 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
713 }
714
715 /* This function is for both 3.5.2 (Add/Subtract shifted register), for
716 the rare occasion when we actually want to supply a shift amount. */
717 static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
718 TCGType ext, TCGReg rd, TCGReg rn,
719 TCGReg rm, int imm6)
720 {
721 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
722 }
723
724 /* This function is for 3.5.2 (Add/subtract shifted register),
725 and 3.5.10 (Logical shifted register), for the vast majorty of cases
726 when we don't want to apply a shift. Thus it can also be used for
727 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */
728 static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
729 TCGReg rd, TCGReg rn, TCGReg rm)
730 {
731 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
732 }
733
734 #define tcg_out_insn_3503 tcg_out_insn_3502
735 #define tcg_out_insn_3508 tcg_out_insn_3502
736 #define tcg_out_insn_3510 tcg_out_insn_3502
737
738 static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
739 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
740 {
741 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
742 | tcg_cond_to_aarch64[c] << 12);
743 }
744
745 static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
746 TCGReg rd, TCGReg rn)
747 {
748 tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
749 }
750
751 static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
752 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
753 {
754 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
755 }
756
757 static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
758 TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
759 {
760 /* Note that bit 11 set means general register input. Therefore
761 we can handle both register sets with one function. */
762 tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
763 | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
764 }
765
766 static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
767 TCGReg rd, bool op, int cmode, uint8_t imm8)
768 {
769 tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
770 | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
771 }
772
773 static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
774 TCGReg rd, TCGReg rn, unsigned immhb)
775 {
776 tcg_out32(s, insn | q << 30 | immhb << 16
777 | (rn & 0x1f) << 5 | (rd & 0x1f));
778 }
779
780 static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
781 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
782 {
783 tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
784 | (rn & 0x1f) << 5 | (rd & 0x1f));
785 }
786
787 static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
788 unsigned size, TCGReg rd, TCGReg rn)
789 {
790 tcg_out32(s, insn | q << 30 | (size << 22)
791 | (rn & 0x1f) << 5 | (rd & 0x1f));
792 }
793
794 static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
795 TCGReg rd, TCGReg base, TCGType ext,
796 TCGReg regoff)
797 {
798 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
799 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
800 0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
801 }
802
803 static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
804 TCGReg rd, TCGReg rn, intptr_t offset)
805 {
806 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
807 }
808
809 static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
810 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
811 {
812 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
813 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
814 | rn << 5 | (rd & 0x1f));
815 }
816
817 /* Register to register move using ORR (shifted register with no shift). */
818 static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
819 {
820 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
821 }
822
823 /* Register to register move using ADDI (move to/from SP). */
824 static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
825 {
826 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
827 }
828
829 /* This function is used for the Logical (immediate) instruction group.
830 The value of LIMM must satisfy IS_LIMM. See the comment above about
831 only supporting simplified logical immediates. */
832 static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
833 TCGReg rd, TCGReg rn, uint64_t limm)
834 {
835 unsigned h, l, r, c;
836
837 tcg_debug_assert(is_limm(limm));
838
839 h = clz64(limm);
840 l = ctz64(limm);
841 if (l == 0) {
842 r = 0; /* form 0....01....1 */
843 c = ctz64(~limm) - 1;
844 if (h == 0) {
845 r = clz64(~limm); /* form 1..10..01..1 */
846 c += r;
847 }
848 } else {
849 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */
850 c = r - h - 1;
851 }
852 if (ext == TCG_TYPE_I32) {
853 r &= 31;
854 c &= 31;
855 }
856
857 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
858 }
859
860 static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
861 TCGReg rd, tcg_target_long v64)
862 {
863 bool q = type == TCG_TYPE_V128;
864 int cmode, imm8, i;
865
866 /* Test all bytes equal first. */
867 if (v64 == dup_const(MO_8, v64)) {
868 imm8 = (uint8_t)v64;
869 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
870 return;
871 }
872
873 /*
874 * Test all bytes 0x00 or 0xff second. This can match cases that
875 * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
876 */
877 for (i = imm8 = 0; i < 8; i++) {
878 uint8_t byte = v64 >> (i * 8);
879 if (byte == 0xff) {
880 imm8 |= 1 << i;
881 } else if (byte != 0) {
882 goto fail_bytes;
883 }
884 }
885 tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
886 return;
887 fail_bytes:
888
889 /*
890 * Tests for various replications. For each element width, if we
891 * cannot find an expansion there's no point checking a larger
892 * width because we already know by replication it cannot match.
893 */
894 if (v64 == dup_const(MO_16, v64)) {
895 uint16_t v16 = v64;
896
897 if (is_shimm16(v16, &cmode, &imm8)) {
898 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
899 return;
900 }
901 if (is_shimm16(~v16, &cmode, &imm8)) {
902 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
903 return;
904 }
905
906 /*
907 * Otherwise, all remaining constants can be loaded in two insns:
908 * rd = v16 & 0xff, rd |= v16 & 0xff00.
909 */
910 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
911 tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
912 return;
913 } else if (v64 == dup_const(MO_32, v64)) {
914 uint32_t v32 = v64;
915 uint32_t n32 = ~v32;
916
917 if (is_shimm32(v32, &cmode, &imm8) ||
918 is_soimm32(v32, &cmode, &imm8) ||
919 is_fimm32(v32, &cmode, &imm8)) {
920 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
921 return;
922 }
923 if (is_shimm32(n32, &cmode, &imm8) ||
924 is_soimm32(n32, &cmode, &imm8)) {
925 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
926 return;
927 }
928
929 /*
930 * Restrict the set of constants to those we can load with
931 * two instructions. Others we load from the pool.
932 */
933 i = is_shimm32_pair(v32, &cmode, &imm8);
934 if (i) {
935 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
936 tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
937 return;
938 }
939 i = is_shimm32_pair(n32, &cmode, &imm8);
940 if (i) {
941 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
942 tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
943 return;
944 }
945 } else if (is_fimm64(v64, &cmode, &imm8)) {
946 tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
947 return;
948 }
949
950 /*
951 * As a last resort, load from the constant pool. Sadly there
952 * is no LD1R (literal), so store the full 16-byte vector.
953 */
954 if (type == TCG_TYPE_V128) {
955 new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
956 tcg_out_insn(s, 3305, LDR_v128, 0, rd);
957 } else {
958 new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
959 tcg_out_insn(s, 3305, LDR_v64, 0, rd);
960 }
961 }
962
963 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
964 TCGReg rd, TCGReg rs)
965 {
966 int is_q = type - TCG_TYPE_V64;
967 tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
968 return true;
969 }
970
971 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
972 TCGReg r, TCGReg base, intptr_t offset)
973 {
974 TCGReg temp = TCG_REG_TMP;
975
976 if (offset < -0xffffff || offset > 0xffffff) {
977 tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
978 tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
979 base = temp;
980 } else {
981 AArch64Insn add_insn = I3401_ADDI;
982
983 if (offset < 0) {
984 add_insn = I3401_SUBI;
985 offset = -offset;
986 }
987 if (offset & 0xfff000) {
988 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
989 base = temp;
990 }
991 if (offset & 0xfff) {
992 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
993 base = temp;
994 }
995 }
996 tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
997 return true;
998 }
999
1000 static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
1001 tcg_target_long value)
1002 {
1003 tcg_target_long svalue = value;
1004 tcg_target_long ivalue = ~value;
1005 tcg_target_long t0, t1, t2;
1006 int s0, s1;
1007 AArch64Insn opc;
1008
1009 switch (type) {
1010 case TCG_TYPE_I32:
1011 case TCG_TYPE_I64:
1012 tcg_debug_assert(rd < 32);
1013 break;
1014
1015 case TCG_TYPE_V64:
1016 case TCG_TYPE_V128:
1017 tcg_debug_assert(rd >= 32);
1018 tcg_out_dupi_vec(s, type, rd, value);
1019 return;
1020
1021 default:
1022 g_assert_not_reached();
1023 }
1024
1025 /* For 32-bit values, discard potential garbage in value. For 64-bit
1026 values within [2**31, 2**32-1], we can create smaller sequences by
1027 interpreting this as a negative 32-bit number, while ensuring that
1028 the high 32 bits are cleared by setting SF=0. */
1029 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1030 svalue = (int32_t)value;
1031 value = (uint32_t)value;
1032 ivalue = (uint32_t)ivalue;
1033 type = TCG_TYPE_I32;
1034 }
1035
1036 /* Speed things up by handling the common case of small positive
1037 and negative values specially. */
1038 if ((value & ~0xffffull) == 0) {
1039 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1040 return;
1041 } else if ((ivalue & ~0xffffull) == 0) {
1042 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1043 return;
1044 }
1045
1046 /* Check for bitfield immediates. For the benefit of 32-bit quantities,
1047 use the sign-extended value. That lets us match rotated values such
1048 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1049 if (is_limm(svalue)) {
1050 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1051 return;
1052 }
1053
1054 /* Look for host pointer values within 4G of the PC. This happens
1055 often when loading pointers to QEMU's own data structures. */
1056 if (type == TCG_TYPE_I64) {
1057 intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
1058 tcg_target_long disp = value - src_rx;
1059 if (disp == sextract64(disp, 0, 21)) {
1060 tcg_out_insn(s, 3406, ADR, rd, disp);
1061 return;
1062 }
1063 disp = (value >> 12) - (src_rx >> 12);
1064 if (disp == sextract64(disp, 0, 21)) {
1065 tcg_out_insn(s, 3406, ADRP, rd, disp);
1066 if (value & 0xfff) {
1067 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1068 }
1069 return;
1070 }
1071 }
1072
1073 /* Would it take fewer insns to begin with MOVN? */
1074 if (ctpop64(value) >= 32) {
1075 t0 = ivalue;
1076 opc = I3405_MOVN;
1077 } else {
1078 t0 = value;
1079 opc = I3405_MOVZ;
1080 }
1081 s0 = ctz64(t0) & (63 & -16);
1082 t1 = t0 & ~(0xffffUL << s0);
1083 s1 = ctz64(t1) & (63 & -16);
1084 t2 = t1 & ~(0xffffUL << s1);
1085 if (t2 == 0) {
1086 tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1087 if (t1 != 0) {
1088 tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1089 }
1090 return;
1091 }
1092
1093 /* For more than 2 insns, dump it into the constant pool. */
1094 new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1095 tcg_out_insn(s, 3305, LDR, 0, rd);
1096 }
1097
1098 /* Define something more legible for general use. */
1099 #define tcg_out_ldst_r tcg_out_insn_3310
1100
1101 static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1102 TCGReg rn, intptr_t offset, int lgsize)
1103 {
1104 /* If the offset is naturally aligned and in range, then we can
1105 use the scaled uimm12 encoding */
1106 if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1107 uintptr_t scaled_uimm = offset >> lgsize;
1108 if (scaled_uimm <= 0xfff) {
1109 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1110 return;
1111 }
1112 }
1113
1114 /* Small signed offsets can use the unscaled encoding. */
1115 if (offset >= -256 && offset < 256) {
1116 tcg_out_insn_3312(s, insn, rd, rn, offset);
1117 return;
1118 }
1119
1120 /* Worst-case scenario, move offset to temp register, use reg offset. */
1121 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
1122 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
1123 }
1124
1125 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1126 {
1127 if (ret == arg) {
1128 return true;
1129 }
1130 switch (type) {
1131 case TCG_TYPE_I32:
1132 case TCG_TYPE_I64:
1133 if (ret < 32 && arg < 32) {
1134 tcg_out_movr(s, type, ret, arg);
1135 break;
1136 } else if (ret < 32) {
1137 tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1138 break;
1139 } else if (arg < 32) {
1140 tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1141 break;
1142 }
1143 /* FALLTHRU */
1144
1145 case TCG_TYPE_V64:
1146 tcg_debug_assert(ret >= 32 && arg >= 32);
1147 tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1148 break;
1149 case TCG_TYPE_V128:
1150 tcg_debug_assert(ret >= 32 && arg >= 32);
1151 tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1152 break;
1153
1154 default:
1155 g_assert_not_reached();
1156 }
1157 return true;
1158 }
1159
1160 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1161 TCGReg base, intptr_t ofs)
1162 {
1163 AArch64Insn insn;
1164 int lgsz;
1165
1166 switch (type) {
1167 case TCG_TYPE_I32:
1168 insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1169 lgsz = 2;
1170 break;
1171 case TCG_TYPE_I64:
1172 insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1173 lgsz = 3;
1174 break;
1175 case TCG_TYPE_V64:
1176 insn = I3312_LDRVD;
1177 lgsz = 3;
1178 break;
1179 case TCG_TYPE_V128:
1180 insn = I3312_LDRVQ;
1181 lgsz = 4;
1182 break;
1183 default:
1184 g_assert_not_reached();
1185 }
1186 tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1187 }
1188
1189 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1190 TCGReg base, intptr_t ofs)
1191 {
1192 AArch64Insn insn;
1193 int lgsz;
1194
1195 switch (type) {
1196 case TCG_TYPE_I32:
1197 insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1198 lgsz = 2;
1199 break;
1200 case TCG_TYPE_I64:
1201 insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1202 lgsz = 3;
1203 break;
1204 case TCG_TYPE_V64:
1205 insn = I3312_STRVD;
1206 lgsz = 3;
1207 break;
1208 case TCG_TYPE_V128:
1209 insn = I3312_STRVQ;
1210 lgsz = 4;
1211 break;
1212 default:
1213 g_assert_not_reached();
1214 }
1215 tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1216 }
1217
1218 static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1219 TCGReg base, intptr_t ofs)
1220 {
1221 if (type <= TCG_TYPE_I64 && val == 0) {
1222 tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1223 return true;
1224 }
1225 return false;
1226 }
1227
1228 static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1229 TCGReg rn, unsigned int a, unsigned int b)
1230 {
1231 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1232 }
1233
1234 static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1235 TCGReg rn, unsigned int a, unsigned int b)
1236 {
1237 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1238 }
1239
1240 static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1241 TCGReg rn, unsigned int a, unsigned int b)
1242 {
1243 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1244 }
1245
1246 static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1247 TCGReg rn, TCGReg rm, unsigned int a)
1248 {
1249 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1250 }
1251
1252 static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1253 TCGReg rd, TCGReg rn, unsigned int m)
1254 {
1255 int bits = ext ? 64 : 32;
1256 int max = bits - 1;
1257 tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
1258 }
1259
1260 static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1261 TCGReg rd, TCGReg rn, unsigned int m)
1262 {
1263 int max = ext ? 63 : 31;
1264 tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1265 }
1266
1267 static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1268 TCGReg rd, TCGReg rn, unsigned int m)
1269 {
1270 int max = ext ? 63 : 31;
1271 tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1272 }
1273
1274 static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1275 TCGReg rd, TCGReg rn, unsigned int m)
1276 {
1277 int max = ext ? 63 : 31;
1278 tcg_out_extr(s, ext, rd, rn, rn, m & max);
1279 }
1280
1281 static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1282 TCGReg rd, TCGReg rn, unsigned int m)
1283 {
1284 int bits = ext ? 64 : 32;
1285 int max = bits - 1;
1286 tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
1287 }
1288
1289 static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1290 TCGReg rn, unsigned lsb, unsigned width)
1291 {
1292 unsigned size = ext ? 64 : 32;
1293 unsigned a = (size - lsb) & (size - 1);
1294 unsigned b = width - 1;
1295 tcg_out_bfm(s, ext, rd, rn, a, b);
1296 }
1297
1298 static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1299 tcg_target_long b, bool const_b)
1300 {
1301 if (const_b) {
1302 /* Using CMP or CMN aliases. */
1303 if (b >= 0) {
1304 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1305 } else {
1306 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1307 }
1308 } else {
1309 /* Using CMP alias SUBS wzr, Wn, Wm */
1310 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1311 }
1312 }
1313
1314 static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1315 {
1316 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1317 tcg_debug_assert(offset == sextract64(offset, 0, 26));
1318 tcg_out_insn(s, 3206, B, offset);
1319 }
1320
1321 static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target)
1322 {
1323 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1324 if (offset == sextract64(offset, 0, 26)) {
1325 tcg_out_insn(s, 3206, B, offset);
1326 } else {
1327 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1328 tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1329 }
1330 }
1331
1332 static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
1333 {
1334 tcg_out_insn(s, 3207, BLR, reg);
1335 }
1336
1337 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target)
1338 {
1339 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1340 if (offset == sextract64(offset, 0, 26)) {
1341 tcg_out_insn(s, 3206, BL, offset);
1342 } else {
1343 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1344 tcg_out_callr(s, TCG_REG_TMP);
1345 }
1346 }
1347
1348 void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
1349 uintptr_t jmp_rw, uintptr_t addr)
1350 {
1351 tcg_insn_unit i1, i2;
1352 TCGType rt = TCG_TYPE_I64;
1353 TCGReg rd = TCG_REG_TMP;
1354 uint64_t pair;
1355
1356 ptrdiff_t offset = addr - jmp_rx;
1357
1358 if (offset == sextract64(offset, 0, 26)) {
1359 i1 = I3206_B | ((offset >> 2) & 0x3ffffff);
1360 i2 = NOP;
1361 } else {
1362 offset = (addr >> 12) - (jmp_rx >> 12);
1363
1364 /* patch ADRP */
1365 i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd;
1366 /* patch ADDI */
1367 i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd;
1368 }
1369 pair = (uint64_t)i2 << 32 | i1;
1370 qatomic_set((uint64_t *)jmp_rw, pair);
1371 flush_idcache_range(jmp_rx, jmp_rw, 8);
1372 }
1373
1374 static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1375 {
1376 if (!l->has_value) {
1377 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1378 tcg_out_insn(s, 3206, B, 0);
1379 } else {
1380 tcg_out_goto(s, l->u.value_ptr);
1381 }
1382 }
1383
1384 static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1385 TCGArg b, bool b_const, TCGLabel *l)
1386 {
1387 intptr_t offset;
1388 bool need_cmp;
1389
1390 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1391 need_cmp = false;
1392 } else {
1393 need_cmp = true;
1394 tcg_out_cmp(s, ext, a, b, b_const);
1395 }
1396
1397 if (!l->has_value) {
1398 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1399 offset = tcg_in32(s) >> 5;
1400 } else {
1401 offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2;
1402 tcg_debug_assert(offset == sextract64(offset, 0, 19));
1403 }
1404
1405 if (need_cmp) {
1406 tcg_out_insn(s, 3202, B_C, c, offset);
1407 } else if (c == TCG_COND_EQ) {
1408 tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1409 } else {
1410 tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1411 }
1412 }
1413
1414 static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
1415 {
1416 tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
1417 }
1418
1419 static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
1420 {
1421 tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
1422 }
1423
1424 static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
1425 {
1426 tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
1427 }
1428
1429 static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
1430 TCGReg rd, TCGReg rn)
1431 {
1432 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1433 int bits = (8 << s_bits) - 1;
1434 tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1435 }
1436
1437 static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
1438 TCGReg rd, TCGReg rn)
1439 {
1440 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1441 int bits = (8 << s_bits) - 1;
1442 tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1443 }
1444
1445 static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1446 TCGReg rn, int64_t aimm)
1447 {
1448 if (aimm >= 0) {
1449 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1450 } else {
1451 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1452 }
1453 }
1454
1455 static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1456 TCGReg rh, TCGReg al, TCGReg ah,
1457 tcg_target_long bl, tcg_target_long bh,
1458 bool const_bl, bool const_bh, bool sub)
1459 {
1460 TCGReg orig_rl = rl;
1461 AArch64Insn insn;
1462
1463 if (rl == ah || (!const_bh && rl == bh)) {
1464 rl = TCG_REG_TMP;
1465 }
1466
1467 if (const_bl) {
1468 insn = I3401_ADDSI;
1469 if ((bl < 0) ^ sub) {
1470 insn = I3401_SUBSI;
1471 bl = -bl;
1472 }
1473 if (unlikely(al == TCG_REG_XZR)) {
1474 /* ??? We want to allow al to be zero for the benefit of
1475 negation via subtraction. However, that leaves open the
1476 possibility of adding 0+const in the low part, and the
1477 immediate add instructions encode XSP not XZR. Don't try
1478 anything more elaborate here than loading another zero. */
1479 al = TCG_REG_TMP;
1480 tcg_out_movi(s, ext, al, 0);
1481 }
1482 tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1483 } else {
1484 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1485 }
1486
1487 insn = I3503_ADC;
1488 if (const_bh) {
1489 /* Note that the only two constants we support are 0 and -1, and
1490 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
1491 if ((bh != 0) ^ sub) {
1492 insn = I3503_SBC;
1493 }
1494 bh = TCG_REG_XZR;
1495 } else if (sub) {
1496 insn = I3503_SBC;
1497 }
1498 tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1499
1500 tcg_out_mov(s, ext, orig_rl, rl);
1501 }
1502
1503 static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1504 {
1505 static const uint32_t sync[] = {
1506 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST,
1507 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST,
1508 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1509 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD,
1510 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1511 };
1512 tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1513 }
1514
1515 static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1516 TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1517 {
1518 TCGReg a1 = a0;
1519 if (is_ctz) {
1520 a1 = TCG_REG_TMP;
1521 tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1522 }
1523 if (const_b && b == (ext ? 64 : 32)) {
1524 tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1525 } else {
1526 AArch64Insn sel = I3506_CSEL;
1527
1528 tcg_out_cmp(s, ext, a0, 0, 1);
1529 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1530
1531 if (const_b) {
1532 if (b == -1) {
1533 b = TCG_REG_XZR;
1534 sel = I3506_CSINV;
1535 } else if (b == 0) {
1536 b = TCG_REG_XZR;
1537 } else {
1538 tcg_out_movi(s, ext, d, b);
1539 b = d;
1540 }
1541 }
1542 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1543 }
1544 }
1545
1546 #ifdef CONFIG_SOFTMMU
1547 #include "../tcg-ldst.c.inc"
1548
1549 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1550 * TCGMemOpIdx oi, uintptr_t ra)
1551 */
1552 static void * const qemu_ld_helpers[16] = {
1553 [MO_UB] = helper_ret_ldub_mmu,
1554 [MO_LEUW] = helper_le_lduw_mmu,
1555 [MO_LEUL] = helper_le_ldul_mmu,
1556 [MO_LEQ] = helper_le_ldq_mmu,
1557 [MO_BEUW] = helper_be_lduw_mmu,
1558 [MO_BEUL] = helper_be_ldul_mmu,
1559 [MO_BEQ] = helper_be_ldq_mmu,
1560 };
1561
1562 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1563 * uintxx_t val, TCGMemOpIdx oi,
1564 * uintptr_t ra)
1565 */
1566 static void * const qemu_st_helpers[16] = {
1567 [MO_UB] = helper_ret_stb_mmu,
1568 [MO_LEUW] = helper_le_stw_mmu,
1569 [MO_LEUL] = helper_le_stl_mmu,
1570 [MO_LEQ] = helper_le_stq_mmu,
1571 [MO_BEUW] = helper_be_stw_mmu,
1572 [MO_BEUL] = helper_be_stl_mmu,
1573 [MO_BEQ] = helper_be_stq_mmu,
1574 };
1575
1576 static inline void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
1577 {
1578 ptrdiff_t offset = tcg_pcrel_diff(s, target);
1579 tcg_debug_assert(offset == sextract64(offset, 0, 21));
1580 tcg_out_insn(s, 3406, ADR, rd, offset);
1581 }
1582
1583 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1584 {
1585 TCGMemOpIdx oi = lb->oi;
1586 MemOp opc = get_memop(oi);
1587 MemOp size = opc & MO_SIZE;
1588
1589 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1590 return false;
1591 }
1592
1593 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1594 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1595 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1596 tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1597 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1598 if (opc & MO_SIGN) {
1599 tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1600 } else {
1601 tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1602 }
1603
1604 tcg_out_goto(s, lb->raddr);
1605 return true;
1606 }
1607
1608 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1609 {
1610 TCGMemOpIdx oi = lb->oi;
1611 MemOp opc = get_memop(oi);
1612 MemOp size = opc & MO_SIZE;
1613
1614 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1615 return false;
1616 }
1617
1618 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1619 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1620 tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1621 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1622 tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1623 tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1624 tcg_out_goto(s, lb->raddr);
1625 return true;
1626 }
1627
1628 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1629 TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1630 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1631 {
1632 TCGLabelQemuLdst *label = new_ldst_label(s);
1633
1634 label->is_ld = is_ld;
1635 label->oi = oi;
1636 label->type = ext;
1637 label->datalo_reg = data_reg;
1638 label->addrlo_reg = addr_reg;
1639 label->raddr = tcg_splitwx_to_rx(raddr);
1640 label->label_ptr[0] = label_ptr;
1641 }
1642
1643 /* We expect to use a 7-bit scaled negative offset from ENV. */
1644 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1645 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
1646
1647 /* These offsets are built into the LDP below. */
1648 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1649 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1650
1651 /* Load and compare a TLB entry, emitting the conditional jump to the
1652 slow path for the failure case, which will be patched later when finalizing
1653 the slow path. Generated code returns the host addend in X1,
1654 clobbers X0,X2,X3,TMP. */
1655 static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
1656 tcg_insn_unit **label_ptr, int mem_index,
1657 bool is_read)
1658 {
1659 unsigned a_bits = get_alignment_bits(opc);
1660 unsigned s_bits = opc & MO_SIZE;
1661 unsigned a_mask = (1u << a_bits) - 1;
1662 unsigned s_mask = (1u << s_bits) - 1;
1663 TCGReg x3;
1664 TCGType mask_type;
1665 uint64_t compare_mask;
1666
1667 mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
1668 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1669
1670 /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */
1671 tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
1672 TLB_MASK_TABLE_OFS(mem_index), 1, 0);
1673
1674 /* Extract the TLB index from the address into X0. */
1675 tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1676 TCG_REG_X0, TCG_REG_X0, addr_reg,
1677 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1678
1679 /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */
1680 tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
1681
1682 /* Load the tlb comparator into X0, and the fast path addend into X1. */
1683 tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read
1684 ? offsetof(CPUTLBEntry, addr_read)
1685 : offsetof(CPUTLBEntry, addr_write));
1686 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
1687 offsetof(CPUTLBEntry, addend));
1688
1689 /* For aligned accesses, we check the first byte and include the alignment
1690 bits within the address. For unaligned access, we check that we don't
1691 cross pages using the address of the last byte of the access. */
1692 if (a_bits >= s_bits) {
1693 x3 = addr_reg;
1694 } else {
1695 tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1696 TCG_REG_X3, addr_reg, s_mask - a_mask);
1697 x3 = TCG_REG_X3;
1698 }
1699 compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1700
1701 /* Store the page mask part of the address into X3. */
1702 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1703 TCG_REG_X3, x3, compare_mask);
1704
1705 /* Perform the address comparison. */
1706 tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
1707
1708 /* If not equal, we jump to the slow path. */
1709 *label_ptr = s->code_ptr;
1710 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1711 }
1712
1713 #endif /* CONFIG_SOFTMMU */
1714
1715 static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
1716 TCGReg data_r, TCGReg addr_r,
1717 TCGType otype, TCGReg off_r)
1718 {
1719 const MemOp bswap = memop & MO_BSWAP;
1720
1721 switch (memop & MO_SSIZE) {
1722 case MO_UB:
1723 tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1724 break;
1725 case MO_SB:
1726 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1727 data_r, addr_r, otype, off_r);
1728 break;
1729 case MO_UW:
1730 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1731 if (bswap) {
1732 tcg_out_rev16(s, data_r, data_r);
1733 }
1734 break;
1735 case MO_SW:
1736 if (bswap) {
1737 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1738 tcg_out_rev16(s, data_r, data_r);
1739 tcg_out_sxt(s, ext, MO_16, data_r, data_r);
1740 } else {
1741 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1742 data_r, addr_r, otype, off_r);
1743 }
1744 break;
1745 case MO_UL:
1746 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1747 if (bswap) {
1748 tcg_out_rev32(s, data_r, data_r);
1749 }
1750 break;
1751 case MO_SL:
1752 if (bswap) {
1753 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1754 tcg_out_rev32(s, data_r, data_r);
1755 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1756 } else {
1757 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1758 }
1759 break;
1760 case MO_Q:
1761 tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1762 if (bswap) {
1763 tcg_out_rev64(s, data_r, data_r);
1764 }
1765 break;
1766 default:
1767 tcg_abort();
1768 }
1769 }
1770
1771 static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
1772 TCGReg data_r, TCGReg addr_r,
1773 TCGType otype, TCGReg off_r)
1774 {
1775 const MemOp bswap = memop & MO_BSWAP;
1776
1777 switch (memop & MO_SIZE) {
1778 case MO_8:
1779 tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1780 break;
1781 case MO_16:
1782 if (bswap && data_r != TCG_REG_XZR) {
1783 tcg_out_rev16(s, TCG_REG_TMP, data_r);
1784 data_r = TCG_REG_TMP;
1785 }
1786 tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1787 break;
1788 case MO_32:
1789 if (bswap && data_r != TCG_REG_XZR) {
1790 tcg_out_rev32(s, TCG_REG_TMP, data_r);
1791 data_r = TCG_REG_TMP;
1792 }
1793 tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1794 break;
1795 case MO_64:
1796 if (bswap && data_r != TCG_REG_XZR) {
1797 tcg_out_rev64(s, TCG_REG_TMP, data_r);
1798 data_r = TCG_REG_TMP;
1799 }
1800 tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1801 break;
1802 default:
1803 tcg_abort();
1804 }
1805 }
1806
1807 static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1808 TCGMemOpIdx oi, TCGType ext)
1809 {
1810 MemOp memop = get_memop(oi);
1811 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1812 #ifdef CONFIG_SOFTMMU
1813 unsigned mem_index = get_mmuidx(oi);
1814 tcg_insn_unit *label_ptr;
1815
1816 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1817 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1818 TCG_REG_X1, otype, addr_reg);
1819 add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1820 s->code_ptr, label_ptr);
1821 #else /* !CONFIG_SOFTMMU */
1822 if (USE_GUEST_BASE) {
1823 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1824 TCG_REG_GUEST_BASE, otype, addr_reg);
1825 } else {
1826 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1827 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1828 }
1829 #endif /* CONFIG_SOFTMMU */
1830 }
1831
1832 static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1833 TCGMemOpIdx oi)
1834 {
1835 MemOp memop = get_memop(oi);
1836 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1837 #ifdef CONFIG_SOFTMMU
1838 unsigned mem_index = get_mmuidx(oi);
1839 tcg_insn_unit *label_ptr;
1840
1841 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1842 tcg_out_qemu_st_direct(s, memop, data_reg,
1843 TCG_REG_X1, otype, addr_reg);
1844 add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1845 data_reg, addr_reg, s->code_ptr, label_ptr);
1846 #else /* !CONFIG_SOFTMMU */
1847 if (USE_GUEST_BASE) {
1848 tcg_out_qemu_st_direct(s, memop, data_reg,
1849 TCG_REG_GUEST_BASE, otype, addr_reg);
1850 } else {
1851 tcg_out_qemu_st_direct(s, memop, data_reg,
1852 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1853 }
1854 #endif /* CONFIG_SOFTMMU */
1855 }
1856
1857 static const tcg_insn_unit *tb_ret_addr;
1858
1859 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1860 const TCGArg args[TCG_MAX_OP_ARGS],
1861 const int const_args[TCG_MAX_OP_ARGS])
1862 {
1863 /* 99% of the time, we can signal the use of extension registers
1864 by looking to see if the opcode handles 64-bit data. */
1865 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1866
1867 /* Hoist the loads of the most common arguments. */
1868 TCGArg a0 = args[0];
1869 TCGArg a1 = args[1];
1870 TCGArg a2 = args[2];
1871 int c2 = const_args[2];
1872
1873 /* Some operands are defined with "rZ" constraint, a register or
1874 the zero register. These need not actually test args[I] == 0. */
1875 #define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1876
1877 switch (opc) {
1878 case INDEX_op_exit_tb:
1879 /* Reuse the zeroing that exists for goto_ptr. */
1880 if (a0 == 0) {
1881 tcg_out_goto_long(s, tcg_code_gen_epilogue);
1882 } else {
1883 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1884 tcg_out_goto_long(s, tb_ret_addr);
1885 }
1886 break;
1887
1888 case INDEX_op_goto_tb:
1889 if (s->tb_jmp_insn_offset != NULL) {
1890 /* TCG_TARGET_HAS_direct_jump */
1891 /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
1892 write can be used to patch the target address. */
1893 if ((uintptr_t)s->code_ptr & 7) {
1894 tcg_out32(s, NOP);
1895 }
1896 s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1897 /* actual branch destination will be patched by
1898 tb_target_set_jmp_target later. */
1899 tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
1900 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
1901 } else {
1902 /* !TCG_TARGET_HAS_direct_jump */
1903 tcg_debug_assert(s->tb_jmp_target_addr != NULL);
1904 intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2;
1905 tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
1906 }
1907 tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1908 set_jmp_reset_offset(s, a0);
1909 break;
1910
1911 case INDEX_op_goto_ptr:
1912 tcg_out_insn(s, 3207, BR, a0);
1913 break;
1914
1915 case INDEX_op_br:
1916 tcg_out_goto_label(s, arg_label(a0));
1917 break;
1918
1919 case INDEX_op_ld8u_i32:
1920 case INDEX_op_ld8u_i64:
1921 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
1922 break;
1923 case INDEX_op_ld8s_i32:
1924 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
1925 break;
1926 case INDEX_op_ld8s_i64:
1927 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
1928 break;
1929 case INDEX_op_ld16u_i32:
1930 case INDEX_op_ld16u_i64:
1931 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
1932 break;
1933 case INDEX_op_ld16s_i32:
1934 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
1935 break;
1936 case INDEX_op_ld16s_i64:
1937 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
1938 break;
1939 case INDEX_op_ld_i32:
1940 case INDEX_op_ld32u_i64:
1941 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
1942 break;
1943 case INDEX_op_ld32s_i64:
1944 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
1945 break;
1946 case INDEX_op_ld_i64:
1947 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
1948 break;
1949
1950 case INDEX_op_st8_i32:
1951 case INDEX_op_st8_i64:
1952 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
1953 break;
1954 case INDEX_op_st16_i32:
1955 case INDEX_op_st16_i64:
1956 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
1957 break;
1958 case INDEX_op_st_i32:
1959 case INDEX_op_st32_i64:
1960 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
1961 break;
1962 case INDEX_op_st_i64:
1963 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
1964 break;
1965
1966 case INDEX_op_add_i32:
1967 a2 = (int32_t)a2;
1968 /* FALLTHRU */
1969 case INDEX_op_add_i64:
1970 if (c2) {
1971 tcg_out_addsubi(s, ext, a0, a1, a2);
1972 } else {
1973 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1974 }
1975 break;
1976
1977 case INDEX_op_sub_i32:
1978 a2 = (int32_t)a2;
1979 /* FALLTHRU */
1980 case INDEX_op_sub_i64:
1981 if (c2) {
1982 tcg_out_addsubi(s, ext, a0, a1, -a2);
1983 } else {
1984 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1985 }
1986 break;
1987
1988 case INDEX_op_neg_i64:
1989 case INDEX_op_neg_i32:
1990 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1991 break;
1992
1993 case INDEX_op_and_i32:
1994 a2 = (int32_t)a2;
1995 /* FALLTHRU */
1996 case INDEX_op_and_i64:
1997 if (c2) {
1998 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1999 } else {
2000 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
2001 }
2002 break;
2003
2004 case INDEX_op_andc_i32:
2005 a2 = (int32_t)a2;
2006 /* FALLTHRU */
2007 case INDEX_op_andc_i64:
2008 if (c2) {
2009 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
2010 } else {
2011 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
2012 }
2013 break;
2014
2015 case INDEX_op_or_i32:
2016 a2 = (int32_t)a2;
2017 /* FALLTHRU */
2018 case INDEX_op_or_i64:
2019 if (c2) {
2020 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
2021 } else {
2022 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
2023 }
2024 break;
2025
2026 case INDEX_op_orc_i32:
2027 a2 = (int32_t)a2;
2028 /* FALLTHRU */
2029 case INDEX_op_orc_i64:
2030 if (c2) {
2031 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2032 } else {
2033 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2034 }
2035 break;
2036
2037 case INDEX_op_xor_i32:
2038 a2 = (int32_t)a2;
2039 /* FALLTHRU */
2040 case INDEX_op_xor_i64:
2041 if (c2) {
2042 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2043 } else {
2044 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2045 }
2046 break;
2047
2048 case INDEX_op_eqv_i32:
2049 a2 = (int32_t)a2;
2050 /* FALLTHRU */
2051 case INDEX_op_eqv_i64:
2052 if (c2) {
2053 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2054 } else {
2055 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2056 }
2057 break;
2058
2059 case INDEX_op_not_i64:
2060 case INDEX_op_not_i32:
2061 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2062 break;
2063
2064 case INDEX_op_mul_i64:
2065 case INDEX_op_mul_i32:
2066 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2067 break;
2068
2069 case INDEX_op_div_i64:
2070 case INDEX_op_div_i32:
2071 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2072 break;
2073 case INDEX_op_divu_i64:
2074 case INDEX_op_divu_i32:
2075 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2076 break;
2077
2078 case INDEX_op_rem_i64:
2079 case INDEX_op_rem_i32:
2080 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
2081 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2082 break;
2083 case INDEX_op_remu_i64:
2084 case INDEX_op_remu_i32:
2085 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
2086 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2087 break;
2088
2089 case INDEX_op_shl_i64:
2090 case INDEX_op_shl_i32:
2091 if (c2) {
2092 tcg_out_shl(s, ext, a0, a1, a2);
2093 } else {
2094 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
2095 }
2096 break;
2097
2098 case INDEX_op_shr_i64:
2099 case INDEX_op_shr_i32:
2100 if (c2) {
2101 tcg_out_shr(s, ext, a0, a1, a2);
2102 } else {
2103 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
2104 }
2105 break;
2106
2107 case INDEX_op_sar_i64:
2108 case INDEX_op_sar_i32:
2109 if (c2) {
2110 tcg_out_sar(s, ext, a0, a1, a2);
2111 } else {
2112 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
2113 }
2114 break;
2115
2116 case INDEX_op_rotr_i64:
2117 case INDEX_op_rotr_i32:
2118 if (c2) {
2119 tcg_out_rotr(s, ext, a0, a1, a2);
2120 } else {
2121 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
2122 }
2123 break;
2124
2125 case INDEX_op_rotl_i64:
2126 case INDEX_op_rotl_i32:
2127 if (c2) {
2128 tcg_out_rotl(s, ext, a0, a1, a2);
2129 } else {
2130 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
2131 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
2132 }
2133 break;
2134
2135 case INDEX_op_clz_i64:
2136 case INDEX_op_clz_i32:
2137 tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2138 break;
2139 case INDEX_op_ctz_i64:
2140 case INDEX_op_ctz_i32:
2141 tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2142 break;
2143
2144 case INDEX_op_brcond_i32:
2145 a1 = (int32_t)a1;
2146 /* FALLTHRU */
2147 case INDEX_op_brcond_i64:
2148 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
2149 break;
2150
2151 case INDEX_op_setcond_i32:
2152 a2 = (int32_t)a2;
2153 /* FALLTHRU */
2154 case INDEX_op_setcond_i64:
2155 tcg_out_cmp(s, ext, a1, a2, c2);
2156 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */
2157 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2158 TCG_REG_XZR, tcg_invert_cond(args[3]));
2159 break;
2160
2161 case INDEX_op_movcond_i32:
2162 a2 = (int32_t)a2;
2163 /* FALLTHRU */
2164 case INDEX_op_movcond_i64:
2165 tcg_out_cmp(s, ext, a1, a2, c2);
2166 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2167 break;
2168
2169 case INDEX_op_qemu_ld_i32:
2170 case INDEX_op_qemu_ld_i64:
2171 tcg_out_qemu_ld(s, a0, a1, a2, ext);
2172 break;
2173 case INDEX_op_qemu_st_i32:
2174 case INDEX_op_qemu_st_i64:
2175 tcg_out_qemu_st(s, REG0(0), a1, a2);
2176 break;
2177
2178 case INDEX_op_bswap64_i64:
2179 tcg_out_rev64(s, a0, a1);
2180 break;
2181 case INDEX_op_bswap32_i64:
2182 case INDEX_op_bswap32_i32:
2183 tcg_out_rev32(s, a0, a1);
2184 break;
2185 case INDEX_op_bswap16_i64:
2186 case INDEX_op_bswap16_i32:
2187 tcg_out_rev16(s, a0, a1);
2188 break;
2189
2190 case INDEX_op_ext8s_i64:
2191 case INDEX_op_ext8s_i32:
2192 tcg_out_sxt(s, ext, MO_8, a0, a1);
2193 break;
2194 case INDEX_op_ext16s_i64:
2195 case INDEX_op_ext16s_i32:
2196 tcg_out_sxt(s, ext, MO_16, a0, a1);
2197 break;
2198 case INDEX_op_ext_i32_i64:
2199 case INDEX_op_ext32s_i64:
2200 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
2201 break;
2202 case INDEX_op_ext8u_i64:
2203 case INDEX_op_ext8u_i32:
2204 tcg_out_uxt(s, MO_8, a0, a1);
2205 break;
2206 case INDEX_op_ext16u_i64:
2207 case INDEX_op_ext16u_i32:
2208 tcg_out_uxt(s, MO_16, a0, a1);
2209 break;
2210 case INDEX_op_extu_i32_i64:
2211 case INDEX_op_ext32u_i64:
2212 tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
2213 break;
2214
2215 case INDEX_op_deposit_i64:
2216 case INDEX_op_deposit_i32:
2217 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2218 break;
2219
2220 case INDEX_op_extract_i64:
2221 case INDEX_op_extract_i32:
2222 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2223 break;
2224
2225 case INDEX_op_sextract_i64:
2226 case INDEX_op_sextract_i32:
2227 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2228 break;
2229
2230 case INDEX_op_extract2_i64:
2231 case INDEX_op_extract2_i32:
2232 tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
2233 break;
2234
2235 case INDEX_op_add2_i32:
2236 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2237 (int32_t)args[4], args[5], const_args[4],
2238 const_args[5], false);
2239 break;
2240 case INDEX_op_add2_i64:
2241 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2242 args[5], const_args[4], const_args[5], false);
2243 break;
2244 case INDEX_op_sub2_i32:
2245 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2246 (int32_t)args[4], args[5], const_args[4],
2247 const_args[5], true);
2248 break;
2249 case INDEX_op_sub2_i64:
2250 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2251 args[5], const_args[4], const_args[5], true);
2252 break;
2253
2254 case INDEX_op_muluh_i64:
2255 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2256 break;
2257 case INDEX_op_mulsh_i64:
2258 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2259 break;
2260
2261 case INDEX_op_mb:
2262 tcg_out_mb(s, a0);
2263 break;
2264
2265 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
2266 case INDEX_op_mov_i64:
2267 case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
2268 case INDEX_op_movi_i64:
2269 case INDEX_op_call: /* Always emitted via tcg_out_call. */
2270 default:
2271 g_assert_not_reached();
2272 }
2273
2274 #undef REG0
2275 }
2276
2277 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2278 unsigned vecl, unsigned vece,
2279 const TCGArg *args, const int *const_args)
2280 {
2281 static const AArch64Insn cmp_insn[16] = {
2282 [TCG_COND_EQ] = I3616_CMEQ,
2283 [TCG_COND_GT] = I3616_CMGT,
2284 [TCG_COND_GE] = I3616_CMGE,
2285 [TCG_COND_GTU] = I3616_CMHI,
2286 [TCG_COND_GEU] = I3616_CMHS,
2287 };
2288 static const AArch64Insn cmp0_insn[16] = {
2289 [TCG_COND_EQ] = I3617_CMEQ0,
2290 [TCG_COND_GT] = I3617_CMGT0,
2291 [TCG_COND_GE] = I3617_CMGE0,
2292 [TCG_COND_LT] = I3617_CMLT0,
2293 [TCG_COND_LE] = I3617_CMLE0,
2294 };
2295
2296 TCGType type = vecl + TCG_TYPE_V64;
2297 unsigned is_q = vecl;
2298 TCGArg a0, a1, a2, a3;
2299 int cmode, imm8;
2300
2301 a0 = args[0];
2302 a1 = args[1];
2303 a2 = args[2];
2304
2305 switch (opc) {
2306 case INDEX_op_ld_vec:
2307 tcg_out_ld(s, type, a0, a1, a2);
2308 break;
2309 case INDEX_op_st_vec:
2310 tcg_out_st(s, type, a0, a1, a2);
2311 break;
2312 case INDEX_op_dupm_vec:
2313 tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2314 break;
2315 case INDEX_op_add_vec:
2316 tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2317 break;
2318 case INDEX_op_sub_vec:
2319 tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2320 break;
2321 case INDEX_op_mul_vec:
2322 tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2323 break;
2324 case INDEX_op_neg_vec:
2325 tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2326 break;
2327 case INDEX_op_abs_vec:
2328 tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2329 break;
2330 case INDEX_op_and_vec:
2331 if (const_args[2]) {
2332 is_shimm1632(~a2, &cmode, &imm8);
2333 if (a0 == a1) {
2334 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2335 return;
2336 }
2337 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2338 a2 = a0;
2339 }
2340 tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2341 break;
2342 case INDEX_op_or_vec:
2343 if (const_args[2]) {
2344 is_shimm1632(a2, &cmode, &imm8);
2345 if (a0 == a1) {
2346 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2347 return;
2348 }
2349 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2350 a2 = a0;
2351 }
2352 tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2353 break;
2354 case INDEX_op_andc_vec:
2355 if (const_args[2]) {
2356 is_shimm1632(a2, &cmode, &imm8);
2357 if (a0 == a1) {
2358 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2359 return;
2360 }
2361 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2362 a2 = a0;
2363 }
2364 tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2365 break;
2366 case INDEX_op_orc_vec:
2367 if (const_args[2]) {
2368 is_shimm1632(~a2, &cmode, &imm8);
2369 if (a0 == a1) {
2370 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2371 return;
2372 }
2373 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2374 a2 = a0;
2375 }
2376 tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2377 break;
2378 case INDEX_op_xor_vec:
2379 tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2380 break;
2381 case INDEX_op_ssadd_vec:
2382 tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2383 break;
2384 case INDEX_op_sssub_vec:
2385 tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2386 break;
2387 case INDEX_op_usadd_vec:
2388 tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2389 break;
2390 case INDEX_op_ussub_vec:
2391 tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2392 break;
2393 case INDEX_op_smax_vec:
2394 tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2395 break;
2396 case INDEX_op_smin_vec:
2397 tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2398 break;
2399 case INDEX_op_umax_vec:
2400 tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2401 break;
2402 case INDEX_op_umin_vec:
2403 tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2404 break;
2405 case INDEX_op_not_vec:
2406 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2407 break;
2408 case INDEX_op_shli_vec:
2409 tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2410 break;
2411 case INDEX_op_shri_vec:
2412 tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2413 break;
2414 case INDEX_op_sari_vec:
2415 tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2416 break;
2417 case INDEX_op_aa64_sli_vec:
2418 tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2419 break;
2420 case INDEX_op_shlv_vec:
2421 tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2422 break;
2423 case INDEX_op_aa64_sshl_vec:
2424 tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2425 break;
2426 case INDEX_op_cmp_vec:
2427 {
2428 TCGCond cond = args[3];
2429 AArch64Insn insn;
2430
2431 if (cond == TCG_COND_NE) {
2432 if (const_args[2]) {
2433 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2434 } else {
2435 tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2436 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2437 }
2438 } else {
2439 if (const_args[2]) {
2440 insn = cmp0_insn[cond];
2441 if (insn) {
2442 tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2443 break;
2444 }
2445 tcg_out_dupi_vec(s, type, TCG_VEC_TMP, 0);
2446 a2 = TCG_VEC_TMP;
2447 }
2448 insn = cmp_insn[cond];
2449 if (insn == 0) {
2450 TCGArg t;
2451 t = a1, a1 = a2, a2 = t;
2452 cond = tcg_swap_cond(cond);
2453 insn = cmp_insn[cond];
2454 tcg_debug_assert(insn != 0);
2455 }
2456 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2457 }
2458 }
2459 break;
2460
2461 case INDEX_op_bitsel_vec:
2462 a3 = args[3];
2463 if (a0 == a3) {
2464 tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2465 } else if (a0 == a2) {
2466 tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2467 } else {
2468 if (a0 != a1) {
2469 tcg_out_mov(s, type, a0, a1);
2470 }
2471 tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2472 }
2473 break;
2474
2475 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
2476 case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi. */
2477 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
2478 default:
2479 g_assert_not_reached();
2480 }
2481 }
2482
2483 int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2484 {
2485 switch (opc) {
2486 case INDEX_op_add_vec:
2487 case INDEX_op_sub_vec:
2488 case INDEX_op_and_vec:
2489 case INDEX_op_or_vec:
2490 case INDEX_op_xor_vec:
2491 case INDEX_op_andc_vec:
2492 case INDEX_op_orc_vec:
2493 case INDEX_op_neg_vec:
2494 case INDEX_op_abs_vec:
2495 case INDEX_op_not_vec:
2496 case INDEX_op_cmp_vec:
2497 case INDEX_op_shli_vec:
2498 case INDEX_op_shri_vec:
2499 case INDEX_op_sari_vec:
2500 case INDEX_op_ssadd_vec:
2501 case INDEX_op_sssub_vec:
2502 case INDEX_op_usadd_vec:
2503 case INDEX_op_ussub_vec:
2504 case INDEX_op_shlv_vec:
2505 case INDEX_op_bitsel_vec:
2506 return 1;
2507 case INDEX_op_rotli_vec:
2508 case INDEX_op_shrv_vec:
2509 case INDEX_op_sarv_vec:
2510 case INDEX_op_rotlv_vec:
2511 case INDEX_op_rotrv_vec:
2512 return -1;
2513 case INDEX_op_mul_vec:
2514 case INDEX_op_smax_vec:
2515 case INDEX_op_smin_vec:
2516 case INDEX_op_umax_vec:
2517 case INDEX_op_umin_vec:
2518 return vece < MO_64;
2519
2520 default:
2521 return 0;
2522 }
2523 }
2524
2525 void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2526 TCGArg a0, ...)
2527 {
2528 va_list va;
2529 TCGv_vec v0, v1, v2, t1, t2;
2530 TCGArg a2;
2531
2532 va_start(va, a0);
2533 v0 = temp_tcgv_vec(arg_temp(a0));
2534 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2535 a2 = va_arg(va, TCGArg);
2536 v2 = temp_tcgv_vec(arg_temp(a2));
2537
2538 switch (opc) {
2539 case INDEX_op_rotli_vec:
2540 t1 = tcg_temp_new_vec(type);
2541 tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
2542 vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
2543 tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
2544 tcg_temp_free_vec(t1);
2545 break;
2546
2547 case INDEX_op_shrv_vec:
2548 case INDEX_op_sarv_vec:
2549 /* Right shifts are negative left shifts for AArch64. */
2550 t1 = tcg_temp_new_vec(type);
2551 tcg_gen_neg_vec(vece, t1, v2);
2552 opc = (opc == INDEX_op_shrv_vec
2553 ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2554 vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2555 tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2556 tcg_temp_free_vec(t1);
2557 break;
2558
2559 case INDEX_op_rotlv_vec:
2560 t1 = tcg_temp_new_vec(type);
2561 tcg_gen_dupi_vec(vece, t1, 8 << vece);
2562 tcg_gen_sub_vec(vece, t1, v2, t1);
2563 /* Right shifts are negative left shifts for AArch64. */
2564 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2565 tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2566 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
2567 tcgv_vec_arg(v1), tcgv_vec_arg(v2));
2568 tcg_gen_or_vec(vece, v0, v0, t1);
2569 tcg_temp_free_vec(t1);
2570 break;
2571
2572 case INDEX_op_rotrv_vec:
2573 t1 = tcg_temp_new_vec(type);
2574 t2 = tcg_temp_new_vec(type);
2575 tcg_gen_neg_vec(vece, t1, v2);
2576 tcg_gen_dupi_vec(vece, t2, 8 << vece);
2577 tcg_gen_add_vec(vece, t2, t1, t2);
2578 /* Right shifts are negative left shifts for AArch64. */
2579 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2580 tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2581 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
2582 tcgv_vec_arg(v1), tcgv_vec_arg(t2));
2583 tcg_gen_or_vec(vece, v0, t1, t2);
2584 tcg_temp_free_vec(t1);
2585 tcg_temp_free_vec(t2);
2586 break;
2587
2588 default:
2589 g_assert_not_reached();
2590 }
2591
2592 va_end(va);
2593 }
2594
2595 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
2596 {
2597 static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
2598 static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
2599 static const TCGTargetOpDef w_w = { .args_ct_str = { "w", "w" } };
2600 static const TCGTargetOpDef w_r = { .args_ct_str = { "w", "r" } };
2601 static const TCGTargetOpDef w_wr = { .args_ct_str = { "w", "wr" } };
2602 static const TCGTargetOpDef r_l = { .args_ct_str = { "r", "l" } };
2603 static const TCGTargetOpDef r_rA = { .args_ct_str = { "r", "rA" } };
2604 static const TCGTargetOpDef rZ_r = { .args_ct_str = { "rZ", "r" } };
2605 static const TCGTargetOpDef lZ_l = { .args_ct_str = { "lZ", "l" } };
2606 static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
2607 static const TCGTargetOpDef w_w_w = { .args_ct_str = { "w", "w", "w" } };
2608 static const TCGTargetOpDef w_0_w = { .args_ct_str = { "w", "0", "w" } };
2609 static const TCGTargetOpDef w_w_wO = { .args_ct_str = { "w", "w", "wO" } };
2610 static const TCGTargetOpDef w_w_wN = { .args_ct_str = { "w", "w", "wN" } };
2611 static const TCGTargetOpDef w_w_wZ = { .args_ct_str = { "w", "w", "wZ" } };
2612 static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
2613 static const TCGTargetOpDef r_r_rA = { .args_ct_str = { "r", "r", "rA" } };
2614 static const TCGTargetOpDef r_r_rL = { .args_ct_str = { "r", "r", "rL" } };
2615 static const TCGTargetOpDef r_r_rAL
2616 = { .args_ct_str = { "r", "r", "rAL" } };
2617 static const TCGTargetOpDef dep
2618 = { .args_ct_str = { "r", "0", "rZ" } };
2619 static const TCGTargetOpDef ext2
2620 = { .args_ct_str = { "r", "rZ", "rZ" } };
2621 static const TCGTargetOpDef movc
2622 = { .args_ct_str = { "r", "r", "rA", "rZ", "rZ" } };
2623 static const TCGTargetOpDef add2
2624 = { .args_ct_str = { "r", "r", "rZ", "rZ", "rA", "rMZ" } };
2625 static const TCGTargetOpDef w_w_w_w
2626 = { .args_ct_str = { "w", "w", "w", "w" } };
2627
2628 switch (op) {
2629 case INDEX_op_goto_ptr:
2630 return &r;
2631
2632 case INDEX_op_ld8u_i32:
2633 case INDEX_op_ld8s_i32:
2634 case INDEX_op_ld16u_i32:
2635 case INDEX_op_ld16s_i32:
2636 case INDEX_op_ld_i32:
2637 case INDEX_op_ld8u_i64:
2638 case INDEX_op_ld8s_i64:
2639 case INDEX_op_ld16u_i64:
2640 case INDEX_op_ld16s_i64:
2641 case INDEX_op_ld32u_i64:
2642 case INDEX_op_ld32s_i64:
2643 case INDEX_op_ld_i64:
2644 case INDEX_op_neg_i32:
2645 case INDEX_op_neg_i64:
2646 case INDEX_op_not_i32:
2647 case INDEX_op_not_i64:
2648 case INDEX_op_bswap16_i32:
2649 case INDEX_op_bswap32_i32:
2650 case INDEX_op_bswap16_i64:
2651 case INDEX_op_bswap32_i64:
2652 case INDEX_op_bswap64_i64:
2653 case INDEX_op_ext8s_i32:
2654 case INDEX_op_ext16s_i32:
2655 case INDEX_op_ext8u_i32:
2656 case INDEX_op_ext16u_i32:
2657 case INDEX_op_ext8s_i64:
2658 case INDEX_op_ext16s_i64:
2659 case INDEX_op_ext32s_i64:
2660 case INDEX_op_ext8u_i64:
2661 case INDEX_op_ext16u_i64:
2662 case INDEX_op_ext32u_i64:
2663 case INDEX_op_ext_i32_i64:
2664 case INDEX_op_extu_i32_i64:
2665 case INDEX_op_extract_i32:
2666 case INDEX_op_extract_i64:
2667 case INDEX_op_sextract_i32:
2668 case INDEX_op_sextract_i64:
2669 return &r_r;
2670
2671 case INDEX_op_st8_i32:
2672 case INDEX_op_st16_i32:
2673 case INDEX_op_st_i32:
2674 case INDEX_op_st8_i64:
2675 case INDEX_op_st16_i64:
2676 case INDEX_op_st32_i64:
2677 case INDEX_op_st_i64:
2678 return &rZ_r;
2679
2680 case INDEX_op_add_i32:
2681 case INDEX_op_add_i64:
2682 case INDEX_op_sub_i32:
2683 case INDEX_op_sub_i64:
2684 case INDEX_op_setcond_i32:
2685 case INDEX_op_setcond_i64:
2686 return &r_r_rA;
2687
2688 case INDEX_op_mul_i32:
2689 case INDEX_op_mul_i64:
2690 case INDEX_op_div_i32:
2691 case INDEX_op_div_i64:
2692 case INDEX_op_divu_i32:
2693 case INDEX_op_divu_i64:
2694 case INDEX_op_rem_i32:
2695 case INDEX_op_rem_i64:
2696 case INDEX_op_remu_i32:
2697 case INDEX_op_remu_i64:
2698 case INDEX_op_muluh_i64:
2699 case INDEX_op_mulsh_i64:
2700 return &r_r_r;
2701
2702 case INDEX_op_and_i32:
2703 case INDEX_op_and_i64:
2704 case INDEX_op_or_i32:
2705 case INDEX_op_or_i64:
2706 case INDEX_op_xor_i32:
2707 case INDEX_op_xor_i64:
2708 case INDEX_op_andc_i32:
2709 case INDEX_op_andc_i64:
2710 case INDEX_op_orc_i32:
2711 case INDEX_op_orc_i64:
2712 case INDEX_op_eqv_i32:
2713 case INDEX_op_eqv_i64:
2714 return &r_r_rL;
2715
2716 case INDEX_op_shl_i32:
2717 case INDEX_op_shr_i32:
2718 case INDEX_op_sar_i32:
2719 case INDEX_op_rotl_i32:
2720 case INDEX_op_rotr_i32:
2721 case INDEX_op_shl_i64:
2722 case INDEX_op_shr_i64:
2723 case INDEX_op_sar_i64:
2724 case INDEX_op_rotl_i64:
2725 case INDEX_op_rotr_i64:
2726 return &r_r_ri;
2727
2728 case INDEX_op_clz_i32:
2729 case INDEX_op_ctz_i32:
2730 case INDEX_op_clz_i64:
2731 case INDEX_op_ctz_i64:
2732 return &r_r_rAL;
2733
2734 case INDEX_op_brcond_i32:
2735 case INDEX_op_brcond_i64:
2736 return &r_rA;
2737
2738 case INDEX_op_movcond_i32:
2739 case INDEX_op_movcond_i64:
2740 return &movc;
2741
2742 case INDEX_op_qemu_ld_i32:
2743 case INDEX_op_qemu_ld_i64:
2744 return &r_l;
2745 case INDEX_op_qemu_st_i32:
2746 case INDEX_op_qemu_st_i64:
2747 return &lZ_l;
2748
2749 case INDEX_op_deposit_i32:
2750 case INDEX_op_deposit_i64:
2751 return &dep;
2752
2753 case INDEX_op_extract2_i32:
2754 case INDEX_op_extract2_i64:
2755 return &ext2;
2756
2757 case INDEX_op_add2_i32:
2758 case INDEX_op_add2_i64:
2759 case INDEX_op_sub2_i32:
2760 case INDEX_op_sub2_i64:
2761 return &add2;
2762
2763 case INDEX_op_add_vec:
2764 case INDEX_op_sub_vec:
2765 case INDEX_op_mul_vec:
2766 case INDEX_op_xor_vec:
2767 case INDEX_op_ssadd_vec:
2768 case INDEX_op_sssub_vec:
2769 case INDEX_op_usadd_vec:
2770 case INDEX_op_ussub_vec:
2771 case INDEX_op_smax_vec:
2772 case INDEX_op_smin_vec:
2773 case INDEX_op_umax_vec:
2774 case INDEX_op_umin_vec:
2775 case INDEX_op_shlv_vec:
2776 case INDEX_op_shrv_vec:
2777 case INDEX_op_sarv_vec:
2778 case INDEX_op_aa64_sshl_vec:
2779 return &w_w_w;
2780 case INDEX_op_not_vec:
2781 case INDEX_op_neg_vec:
2782 case INDEX_op_abs_vec:
2783 case INDEX_op_shli_vec:
2784 case INDEX_op_shri_vec:
2785 case INDEX_op_sari_vec:
2786 return &w_w;
2787 case INDEX_op_ld_vec:
2788 case INDEX_op_st_vec:
2789 case INDEX_op_dupm_vec:
2790 return &w_r;
2791 case INDEX_op_dup_vec:
2792 return &w_wr;
2793 case INDEX_op_or_vec:
2794 case INDEX_op_andc_vec:
2795 return &w_w_wO;
2796 case INDEX_op_and_vec:
2797 case INDEX_op_orc_vec:
2798 return &w_w_wN;
2799 case INDEX_op_cmp_vec:
2800 return &w_w_wZ;
2801 case INDEX_op_bitsel_vec:
2802 return &w_w_w_w;
2803 case INDEX_op_aa64_sli_vec:
2804 return &w_0_w;
2805
2806 default:
2807 return NULL;
2808 }
2809 }
2810
2811 static void tcg_target_init(TCGContext *s)
2812 {
2813 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2814 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
2815 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2816 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
2817
2818 tcg_target_call_clobber_regs = -1ull;
2819 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2820 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2821 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2822 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2823 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2824 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2825 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2826 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2827 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2828 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2829 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
2830 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2831 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2832 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2833 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2834 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2835 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2836 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2837 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
2838
2839 s->reserved_regs = 0;
2840 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2841 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2842 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2843 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
2844 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
2845 }
2846
2847 /* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
2848 #define PUSH_SIZE ((30 - 19 + 1) * 8)
2849
2850 #define FRAME_SIZE \
2851 ((PUSH_SIZE \
2852 + TCG_STATIC_CALL_ARGS_SIZE \
2853 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2854 + TCG_TARGET_STACK_ALIGN - 1) \
2855 & ~(TCG_TARGET_STACK_ALIGN - 1))
2856
2857 /* We're expecting a 2 byte uleb128 encoded value. */
2858 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2859
2860 /* We're expecting to use a single ADDI insn. */
2861 QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
2862
2863 static void tcg_target_qemu_prologue(TCGContext *s)
2864 {
2865 TCGReg r;
2866
2867 /* Push (FP, LR) and allocate space for all saved registers. */
2868 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
2869 TCG_REG_SP, -PUSH_SIZE, 1, 1);
2870
2871 /* Set up frame pointer for canonical unwinding. */
2872 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
2873
2874 /* Store callee-preserved regs x19..x28. */
2875 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2876 int ofs = (r - TCG_REG_X19 + 2) * 8;
2877 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2878 }
2879
2880 /* Make stack space for TCG locals. */
2881 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2882 FRAME_SIZE - PUSH_SIZE);
2883
2884 /* Inform TCG about how to find TCG locals with register, offset, size. */
2885 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
2886 CPU_TEMP_BUF_NLONGS * sizeof(long));
2887
2888 #if !defined(CONFIG_SOFTMMU)
2889 if (USE_GUEST_BASE) {
2890 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
2891 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
2892 }
2893 #endif
2894
2895 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2896 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
2897
2898 /*
2899 * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2900 * and fall through to the rest of the epilogue.
2901 */
2902 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
2903 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
2904
2905 /* TB epilogue */
2906 tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
2907
2908 /* Remove TCG locals stack space. */
2909 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2910 FRAME_SIZE - PUSH_SIZE);
2911
2912 /* Restore registers x19..x28. */
2913 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2914 int ofs = (r - TCG_REG_X19 + 2) * 8;
2915 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2916 }
2917
2918 /* Pop (FP, LR), restore SP to previous frame. */
2919 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
2920 TCG_REG_SP, PUSH_SIZE, 0, 1);
2921 tcg_out_insn(s, 3207, RET, TCG_REG_LR);
2922 }
2923
2924 static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2925 {
2926 int i;
2927 for (i = 0; i < count; ++i) {
2928 p[i] = NOP;
2929 }
2930 }
2931
2932 typedef struct {
2933 DebugFrameHeader h;
2934 uint8_t fde_def_cfa[4];
2935 uint8_t fde_reg_ofs[24];
2936 } DebugFrame;
2937
2938 #define ELF_HOST_MACHINE EM_AARCH64
2939
2940 static const DebugFrame debug_frame = {
2941 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2942 .h.cie.id = -1,
2943 .h.cie.version = 1,
2944 .h.cie.code_align = 1,
2945 .h.cie.data_align = 0x78, /* sleb128 -8 */
2946 .h.cie.return_column = TCG_REG_LR,
2947
2948 /* Total FDE size does not include the "len" member. */
2949 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2950
2951 .fde_def_cfa = {
2952 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */
2953 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2954 (FRAME_SIZE >> 7)
2955 },
2956 .fde_reg_ofs = {
2957 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */
2958 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */
2959 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */
2960 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */
2961 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */
2962 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */
2963 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */
2964 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */
2965 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */
2966 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */
2967 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */
2968 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */
2969 }
2970 };
2971
2972 void tcg_register_jit(const void *buf, size_t buf_size)
2973 {
2974 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2975 }