]> git.proxmox.com Git - mirror_qemu.git/blob - tcg/aarch64/tcg-target.inc.c
tcg/aarch64: Allow immediates for vector ORR and BIC
[mirror_qemu.git] / tcg / aarch64 / tcg-target.inc.c
1 /*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13 #include "tcg-pool.inc.c"
14 #include "qemu/bitops.h"
15
16 /* We're going to re-use TCGType in setting of the SF bit, which controls
17 the size of the operation performed. If we know the values match, it
18 makes things much cleaner. */
19 QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
20
21 #ifdef CONFIG_DEBUG_TCG
22 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
23 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
24 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
25 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
26 "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
27
28 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
29 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
30 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
31 "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
32 };
33 #endif /* CONFIG_DEBUG_TCG */
34
35 static const int tcg_target_reg_alloc_order[] = {
36 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
37 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
38 TCG_REG_X28, /* we will reserve this for guest_base if configured */
39
40 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
41 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
42 TCG_REG_X16, TCG_REG_X17,
43
44 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
45 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
46
47 /* X18 reserved by system */
48 /* X19 reserved for AREG0 */
49 /* X29 reserved as fp */
50 /* X30 reserved as temporary */
51
52 TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
53 TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
54 /* V8 - V15 are call-saved, and skipped. */
55 TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
56 TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
57 TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
58 TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
59 };
60
61 static const int tcg_target_call_iarg_regs[8] = {
62 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
63 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
64 };
65 static const int tcg_target_call_oarg_regs[1] = {
66 TCG_REG_X0
67 };
68
69 #define TCG_REG_TMP TCG_REG_X30
70 #define TCG_VEC_TMP TCG_REG_V31
71
72 #ifndef CONFIG_SOFTMMU
73 /* Note that XZR cannot be encoded in the address base register slot,
74 as that actaully encodes SP. So if we need to zero-extend the guest
75 address, via the address index register slot, we need to load even
76 a zero guest base into a register. */
77 #define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32)
78 #define TCG_REG_GUEST_BASE TCG_REG_X28
79 #endif
80
81 static inline bool reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
82 {
83 ptrdiff_t offset = target - code_ptr;
84 if (offset == sextract64(offset, 0, 26)) {
85 /* read instruction, mask away previous PC_REL26 parameter contents,
86 set the proper offset, then write back the instruction. */
87 *code_ptr = deposit32(*code_ptr, 0, 26, offset);
88 return true;
89 }
90 return false;
91 }
92
93 static inline bool reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
94 {
95 ptrdiff_t offset = target - code_ptr;
96 if (offset == sextract64(offset, 0, 19)) {
97 *code_ptr = deposit32(*code_ptr, 5, 19, offset);
98 return true;
99 }
100 return false;
101 }
102
103 static inline bool patch_reloc(tcg_insn_unit *code_ptr, int type,
104 intptr_t value, intptr_t addend)
105 {
106 tcg_debug_assert(addend == 0);
107 switch (type) {
108 case R_AARCH64_JUMP26:
109 case R_AARCH64_CALL26:
110 return reloc_pc26(code_ptr, (tcg_insn_unit *)value);
111 case R_AARCH64_CONDBR19:
112 return reloc_pc19(code_ptr, (tcg_insn_unit *)value);
113 default:
114 g_assert_not_reached();
115 }
116 }
117
118 #define TCG_CT_CONST_AIMM 0x100
119 #define TCG_CT_CONST_LIMM 0x200
120 #define TCG_CT_CONST_ZERO 0x400
121 #define TCG_CT_CONST_MONE 0x800
122 #define TCG_CT_CONST_ORRI 0x1000
123 #define TCG_CT_CONST_ANDI 0x2000
124
125 /* parse target specific constraints */
126 static const char *target_parse_constraint(TCGArgConstraint *ct,
127 const char *ct_str, TCGType type)
128 {
129 switch (*ct_str++) {
130 case 'r': /* general registers */
131 ct->ct |= TCG_CT_REG;
132 ct->u.regs |= 0xffffffffu;
133 break;
134 case 'w': /* advsimd registers */
135 ct->ct |= TCG_CT_REG;
136 ct->u.regs |= 0xffffffff00000000ull;
137 break;
138 case 'l': /* qemu_ld / qemu_st address, data_reg */
139 ct->ct |= TCG_CT_REG;
140 ct->u.regs = 0xffffffffu;
141 #ifdef CONFIG_SOFTMMU
142 /* x0 and x1 will be overwritten when reading the tlb entry,
143 and x2, and x3 for helper args, better to avoid using them. */
144 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
145 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
146 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
147 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
148 #endif
149 break;
150 case 'A': /* Valid for arithmetic immediate (positive or negative). */
151 ct->ct |= TCG_CT_CONST_AIMM;
152 break;
153 case 'L': /* Valid for logical immediate. */
154 ct->ct |= TCG_CT_CONST_LIMM;
155 break;
156 case 'M': /* minus one */
157 ct->ct |= TCG_CT_CONST_MONE;
158 break;
159 case 'O': /* vector orr/bic immediate */
160 ct->ct |= TCG_CT_CONST_ORRI;
161 break;
162 case 'N': /* vector orr/bic immediate, inverted */
163 ct->ct |= TCG_CT_CONST_ANDI;
164 break;
165 case 'Z': /* zero */
166 ct->ct |= TCG_CT_CONST_ZERO;
167 break;
168 default:
169 return NULL;
170 }
171 return ct_str;
172 }
173
174 /* Match a constant valid for addition (12-bit, optionally shifted). */
175 static inline bool is_aimm(uint64_t val)
176 {
177 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
178 }
179
180 /* Match a constant valid for logical operations. */
181 static inline bool is_limm(uint64_t val)
182 {
183 /* Taking a simplified view of the logical immediates for now, ignoring
184 the replication that can happen across the field. Match bit patterns
185 of the forms
186 0....01....1
187 0..01..10..0
188 and their inverses. */
189
190 /* Make things easier below, by testing the form with msb clear. */
191 if ((int64_t)val < 0) {
192 val = ~val;
193 }
194 if (val == 0) {
195 return false;
196 }
197 val += val & -val;
198 return (val & (val - 1)) == 0;
199 }
200
201 /* Return true if v16 is a valid 16-bit shifted immediate. */
202 static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
203 {
204 if (v16 == (v16 & 0xff)) {
205 *cmode = 0x8;
206 *imm8 = v16 & 0xff;
207 return true;
208 } else if (v16 == (v16 & 0xff00)) {
209 *cmode = 0xa;
210 *imm8 = v16 >> 8;
211 return true;
212 }
213 return false;
214 }
215
216 /* Return true if v32 is a valid 32-bit shifted immediate. */
217 static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
218 {
219 if (v32 == (v32 & 0xff)) {
220 *cmode = 0x0;
221 *imm8 = v32 & 0xff;
222 return true;
223 } else if (v32 == (v32 & 0xff00)) {
224 *cmode = 0x2;
225 *imm8 = (v32 >> 8) & 0xff;
226 return true;
227 } else if (v32 == (v32 & 0xff0000)) {
228 *cmode = 0x4;
229 *imm8 = (v32 >> 16) & 0xff;
230 return true;
231 } else if (v32 == (v32 & 0xff000000)) {
232 *cmode = 0x6;
233 *imm8 = v32 >> 24;
234 return true;
235 }
236 return false;
237 }
238
239 /* Return true if v32 is a valid 32-bit shifting ones immediate. */
240 static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
241 {
242 if ((v32 & 0xffff00ff) == 0xff) {
243 *cmode = 0xc;
244 *imm8 = (v32 >> 8) & 0xff;
245 return true;
246 } else if ((v32 & 0xff00ffff) == 0xffff) {
247 *cmode = 0xd;
248 *imm8 = (v32 >> 16) & 0xff;
249 return true;
250 }
251 return false;
252 }
253
254 /* Return true if v32 is a valid float32 immediate. */
255 static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
256 {
257 if (extract32(v32, 0, 19) == 0
258 && (extract32(v32, 25, 6) == 0x20
259 || extract32(v32, 25, 6) == 0x1f)) {
260 *cmode = 0xf;
261 *imm8 = (extract32(v32, 31, 1) << 7)
262 | (extract32(v32, 25, 1) << 6)
263 | extract32(v32, 19, 6);
264 return true;
265 }
266 return false;
267 }
268
269 /* Return true if v64 is a valid float64 immediate. */
270 static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
271 {
272 if (extract64(v64, 0, 48) == 0
273 && (extract64(v64, 54, 9) == 0x100
274 || extract64(v64, 54, 9) == 0x0ff)) {
275 *cmode = 0xf;
276 *imm8 = (extract64(v64, 63, 1) << 7)
277 | (extract64(v64, 54, 1) << 6)
278 | extract64(v64, 48, 6);
279 return true;
280 }
281 return false;
282 }
283
284 /*
285 * Return non-zero if v32 can be formed by MOVI+ORR.
286 * Place the parameters for MOVI in (cmode, imm8).
287 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
288 */
289 static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
290 {
291 int i;
292
293 for (i = 6; i > 0; i -= 2) {
294 /* Mask out one byte we can add with ORR. */
295 uint32_t tmp = v32 & ~(0xffu << (i * 4));
296 if (is_shimm32(tmp, cmode, imm8) ||
297 is_soimm32(tmp, cmode, imm8)) {
298 break;
299 }
300 }
301 return i;
302 }
303
304 /* Return true if V is a valid 16-bit or 32-bit shifted immediate. */
305 static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
306 {
307 if (v32 == deposit32(v32, 16, 16, v32)) {
308 return is_shimm16(v32, cmode, imm8);
309 } else {
310 return is_shimm32(v32, cmode, imm8);
311 }
312 }
313
314 static int tcg_target_const_match(tcg_target_long val, TCGType type,
315 const TCGArgConstraint *arg_ct)
316 {
317 int ct = arg_ct->ct;
318
319 if (ct & TCG_CT_CONST) {
320 return 1;
321 }
322 if (type == TCG_TYPE_I32) {
323 val = (int32_t)val;
324 }
325 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
326 return 1;
327 }
328 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
329 return 1;
330 }
331 if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
332 return 1;
333 }
334 if ((ct & TCG_CT_CONST_MONE) && val == -1) {
335 return 1;
336 }
337
338 switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
339 case 0:
340 break;
341 case TCG_CT_CONST_ANDI:
342 val = ~val;
343 /* fallthru */
344 case TCG_CT_CONST_ORRI:
345 if (val == deposit64(val, 32, 32, val)) {
346 int cmode, imm8;
347 return is_shimm1632(val, &cmode, &imm8);
348 }
349 break;
350 default:
351 /* Both bits should not be set for the same insn. */
352 g_assert_not_reached();
353 }
354
355 return 0;
356 }
357
358 enum aarch64_cond_code {
359 COND_EQ = 0x0,
360 COND_NE = 0x1,
361 COND_CS = 0x2, /* Unsigned greater or equal */
362 COND_HS = COND_CS, /* ALIAS greater or equal */
363 COND_CC = 0x3, /* Unsigned less than */
364 COND_LO = COND_CC, /* ALIAS Lower */
365 COND_MI = 0x4, /* Negative */
366 COND_PL = 0x5, /* Zero or greater */
367 COND_VS = 0x6, /* Overflow */
368 COND_VC = 0x7, /* No overflow */
369 COND_HI = 0x8, /* Unsigned greater than */
370 COND_LS = 0x9, /* Unsigned less or equal */
371 COND_GE = 0xa,
372 COND_LT = 0xb,
373 COND_GT = 0xc,
374 COND_LE = 0xd,
375 COND_AL = 0xe,
376 COND_NV = 0xf, /* behaves like COND_AL here */
377 };
378
379 static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
380 [TCG_COND_EQ] = COND_EQ,
381 [TCG_COND_NE] = COND_NE,
382 [TCG_COND_LT] = COND_LT,
383 [TCG_COND_GE] = COND_GE,
384 [TCG_COND_LE] = COND_LE,
385 [TCG_COND_GT] = COND_GT,
386 /* unsigned */
387 [TCG_COND_LTU] = COND_LO,
388 [TCG_COND_GTU] = COND_HI,
389 [TCG_COND_GEU] = COND_HS,
390 [TCG_COND_LEU] = COND_LS,
391 };
392
393 typedef enum {
394 LDST_ST = 0, /* store */
395 LDST_LD = 1, /* load */
396 LDST_LD_S_X = 2, /* load and sign-extend into Xt */
397 LDST_LD_S_W = 3, /* load and sign-extend into Wt */
398 } AArch64LdstType;
399
400 /* We encode the format of the insn into the beginning of the name, so that
401 we can have the preprocessor help "typecheck" the insn vs the output
402 function. Arm didn't provide us with nice names for the formats, so we
403 use the section number of the architecture reference manual in which the
404 instruction group is described. */
405 typedef enum {
406 /* Compare and branch (immediate). */
407 I3201_CBZ = 0x34000000,
408 I3201_CBNZ = 0x35000000,
409
410 /* Conditional branch (immediate). */
411 I3202_B_C = 0x54000000,
412
413 /* Unconditional branch (immediate). */
414 I3206_B = 0x14000000,
415 I3206_BL = 0x94000000,
416
417 /* Unconditional branch (register). */
418 I3207_BR = 0xd61f0000,
419 I3207_BLR = 0xd63f0000,
420 I3207_RET = 0xd65f0000,
421
422 /* AdvSIMD load/store single structure. */
423 I3303_LD1R = 0x0d40c000,
424
425 /* Load literal for loading the address at pc-relative offset */
426 I3305_LDR = 0x58000000,
427 I3305_LDR_v64 = 0x5c000000,
428 I3305_LDR_v128 = 0x9c000000,
429
430 /* Load/store register. Described here as 3.3.12, but the helper
431 that emits them can transform to 3.3.10 or 3.3.13. */
432 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
433 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
434 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
435 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
436
437 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
438 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
439 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
440 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
441
442 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
443 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
444
445 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
446 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
447 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
448
449 I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
450 I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
451
452 I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
453 I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
454
455 I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30,
456 I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30,
457
458 I3312_TO_I3310 = 0x00200800,
459 I3312_TO_I3313 = 0x01000000,
460
461 /* Load/store register pair instructions. */
462 I3314_LDP = 0x28400000,
463 I3314_STP = 0x28000000,
464
465 /* Add/subtract immediate instructions. */
466 I3401_ADDI = 0x11000000,
467 I3401_ADDSI = 0x31000000,
468 I3401_SUBI = 0x51000000,
469 I3401_SUBSI = 0x71000000,
470
471 /* Bitfield instructions. */
472 I3402_BFM = 0x33000000,
473 I3402_SBFM = 0x13000000,
474 I3402_UBFM = 0x53000000,
475
476 /* Extract instruction. */
477 I3403_EXTR = 0x13800000,
478
479 /* Logical immediate instructions. */
480 I3404_ANDI = 0x12000000,
481 I3404_ORRI = 0x32000000,
482 I3404_EORI = 0x52000000,
483
484 /* Move wide immediate instructions. */
485 I3405_MOVN = 0x12800000,
486 I3405_MOVZ = 0x52800000,
487 I3405_MOVK = 0x72800000,
488
489 /* PC relative addressing instructions. */
490 I3406_ADR = 0x10000000,
491 I3406_ADRP = 0x90000000,
492
493 /* Add/subtract shifted register instructions (without a shift). */
494 I3502_ADD = 0x0b000000,
495 I3502_ADDS = 0x2b000000,
496 I3502_SUB = 0x4b000000,
497 I3502_SUBS = 0x6b000000,
498
499 /* Add/subtract shifted register instructions (with a shift). */
500 I3502S_ADD_LSL = I3502_ADD,
501
502 /* Add/subtract with carry instructions. */
503 I3503_ADC = 0x1a000000,
504 I3503_SBC = 0x5a000000,
505
506 /* Conditional select instructions. */
507 I3506_CSEL = 0x1a800000,
508 I3506_CSINC = 0x1a800400,
509 I3506_CSINV = 0x5a800000,
510 I3506_CSNEG = 0x5a800400,
511
512 /* Data-processing (1 source) instructions. */
513 I3507_CLZ = 0x5ac01000,
514 I3507_RBIT = 0x5ac00000,
515 I3507_REV16 = 0x5ac00400,
516 I3507_REV32 = 0x5ac00800,
517 I3507_REV64 = 0x5ac00c00,
518
519 /* Data-processing (2 source) instructions. */
520 I3508_LSLV = 0x1ac02000,
521 I3508_LSRV = 0x1ac02400,
522 I3508_ASRV = 0x1ac02800,
523 I3508_RORV = 0x1ac02c00,
524 I3508_SMULH = 0x9b407c00,
525 I3508_UMULH = 0x9bc07c00,
526 I3508_UDIV = 0x1ac00800,
527 I3508_SDIV = 0x1ac00c00,
528
529 /* Data-processing (3 source) instructions. */
530 I3509_MADD = 0x1b000000,
531 I3509_MSUB = 0x1b008000,
532
533 /* Logical shifted register instructions (without a shift). */
534 I3510_AND = 0x0a000000,
535 I3510_BIC = 0x0a200000,
536 I3510_ORR = 0x2a000000,
537 I3510_ORN = 0x2a200000,
538 I3510_EOR = 0x4a000000,
539 I3510_EON = 0x4a200000,
540 I3510_ANDS = 0x6a000000,
541
542 /* Logical shifted register instructions (with a shift). */
543 I3502S_AND_LSR = I3510_AND | (1 << 22),
544
545 /* AdvSIMD copy */
546 I3605_DUP = 0x0e000400,
547 I3605_INS = 0x4e001c00,
548 I3605_UMOV = 0x0e003c00,
549
550 /* AdvSIMD modified immediate */
551 I3606_MOVI = 0x0f000400,
552 I3606_MVNI = 0x2f000400,
553 I3606_BIC = 0x2f001400,
554 I3606_ORR = 0x0f001400,
555
556 /* AdvSIMD shift by immediate */
557 I3614_SSHR = 0x0f000400,
558 I3614_SSRA = 0x0f001400,
559 I3614_SHL = 0x0f005400,
560 I3614_USHR = 0x2f000400,
561 I3614_USRA = 0x2f001400,
562
563 /* AdvSIMD three same. */
564 I3616_ADD = 0x0e208400,
565 I3616_AND = 0x0e201c00,
566 I3616_BIC = 0x0e601c00,
567 I3616_BIF = 0x2ee01c00,
568 I3616_BIT = 0x2ea01c00,
569 I3616_BSL = 0x2e601c00,
570 I3616_EOR = 0x2e201c00,
571 I3616_MUL = 0x0e209c00,
572 I3616_ORR = 0x0ea01c00,
573 I3616_ORN = 0x0ee01c00,
574 I3616_SUB = 0x2e208400,
575 I3616_CMGT = 0x0e203400,
576 I3616_CMGE = 0x0e203c00,
577 I3616_CMTST = 0x0e208c00,
578 I3616_CMHI = 0x2e203400,
579 I3616_CMHS = 0x2e203c00,
580 I3616_CMEQ = 0x2e208c00,
581 I3616_SMAX = 0x0e206400,
582 I3616_SMIN = 0x0e206c00,
583 I3616_SSHL = 0x0e204400,
584 I3616_SQADD = 0x0e200c00,
585 I3616_SQSUB = 0x0e202c00,
586 I3616_UMAX = 0x2e206400,
587 I3616_UMIN = 0x2e206c00,
588 I3616_UQADD = 0x2e200c00,
589 I3616_UQSUB = 0x2e202c00,
590 I3616_USHL = 0x2e204400,
591
592 /* AdvSIMD two-reg misc. */
593 I3617_CMGT0 = 0x0e208800,
594 I3617_CMEQ0 = 0x0e209800,
595 I3617_CMLT0 = 0x0e20a800,
596 I3617_CMGE0 = 0x2e208800,
597 I3617_CMLE0 = 0x2e20a800,
598 I3617_NOT = 0x2e205800,
599 I3617_ABS = 0x0e20b800,
600 I3617_NEG = 0x2e20b800,
601
602 /* System instructions. */
603 NOP = 0xd503201f,
604 DMB_ISH = 0xd50338bf,
605 DMB_LD = 0x00000100,
606 DMB_ST = 0x00000200,
607 } AArch64Insn;
608
609 static inline uint32_t tcg_in32(TCGContext *s)
610 {
611 uint32_t v = *(uint32_t *)s->code_ptr;
612 return v;
613 }
614
615 /* Emit an opcode with "type-checking" of the format. */
616 #define tcg_out_insn(S, FMT, OP, ...) \
617 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
618
619 static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
620 TCGReg rt, TCGReg rn, unsigned size)
621 {
622 tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
623 }
624
625 static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
626 int imm19, TCGReg rt)
627 {
628 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
629 }
630
631 static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
632 TCGReg rt, int imm19)
633 {
634 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
635 }
636
637 static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
638 TCGCond c, int imm19)
639 {
640 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
641 }
642
643 static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
644 {
645 tcg_out32(s, insn | (imm26 & 0x03ffffff));
646 }
647
648 static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
649 {
650 tcg_out32(s, insn | rn << 5);
651 }
652
653 static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
654 TCGReg r1, TCGReg r2, TCGReg rn,
655 tcg_target_long ofs, bool pre, bool w)
656 {
657 insn |= 1u << 31; /* ext */
658 insn |= pre << 24;
659 insn |= w << 23;
660
661 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
662 insn |= (ofs & (0x7f << 3)) << (15 - 3);
663
664 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
665 }
666
667 static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
668 TCGReg rd, TCGReg rn, uint64_t aimm)
669 {
670 if (aimm > 0xfff) {
671 tcg_debug_assert((aimm & 0xfff) == 0);
672 aimm >>= 12;
673 tcg_debug_assert(aimm <= 0xfff);
674 aimm |= 1 << 12; /* apply LSL 12 */
675 }
676 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
677 }
678
679 /* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
680 (Logical immediate). Both insn groups have N, IMMR and IMMS fields
681 that feed the DecodeBitMasks pseudo function. */
682 static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
683 TCGReg rd, TCGReg rn, int n, int immr, int imms)
684 {
685 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
686 | rn << 5 | rd);
687 }
688
689 #define tcg_out_insn_3404 tcg_out_insn_3402
690
691 static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
692 TCGReg rd, TCGReg rn, TCGReg rm, int imms)
693 {
694 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
695 | rn << 5 | rd);
696 }
697
698 /* This function is used for the Move (wide immediate) instruction group.
699 Note that SHIFT is a full shift count, not the 2 bit HW field. */
700 static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
701 TCGReg rd, uint16_t half, unsigned shift)
702 {
703 tcg_debug_assert((shift & ~0x30) == 0);
704 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
705 }
706
707 static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
708 TCGReg rd, int64_t disp)
709 {
710 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
711 }
712
713 /* This function is for both 3.5.2 (Add/Subtract shifted register), for
714 the rare occasion when we actually want to supply a shift amount. */
715 static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
716 TCGType ext, TCGReg rd, TCGReg rn,
717 TCGReg rm, int imm6)
718 {
719 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
720 }
721
722 /* This function is for 3.5.2 (Add/subtract shifted register),
723 and 3.5.10 (Logical shifted register), for the vast majorty of cases
724 when we don't want to apply a shift. Thus it can also be used for
725 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */
726 static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
727 TCGReg rd, TCGReg rn, TCGReg rm)
728 {
729 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
730 }
731
732 #define tcg_out_insn_3503 tcg_out_insn_3502
733 #define tcg_out_insn_3508 tcg_out_insn_3502
734 #define tcg_out_insn_3510 tcg_out_insn_3502
735
736 static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
737 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
738 {
739 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
740 | tcg_cond_to_aarch64[c] << 12);
741 }
742
743 static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
744 TCGReg rd, TCGReg rn)
745 {
746 tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
747 }
748
749 static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
750 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
751 {
752 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
753 }
754
755 static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
756 TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
757 {
758 /* Note that bit 11 set means general register input. Therefore
759 we can handle both register sets with one function. */
760 tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
761 | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
762 }
763
764 static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
765 TCGReg rd, bool op, int cmode, uint8_t imm8)
766 {
767 tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
768 | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
769 }
770
771 static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
772 TCGReg rd, TCGReg rn, unsigned immhb)
773 {
774 tcg_out32(s, insn | q << 30 | immhb << 16
775 | (rn & 0x1f) << 5 | (rd & 0x1f));
776 }
777
778 static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
779 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
780 {
781 tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
782 | (rn & 0x1f) << 5 | (rd & 0x1f));
783 }
784
785 static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
786 unsigned size, TCGReg rd, TCGReg rn)
787 {
788 tcg_out32(s, insn | q << 30 | (size << 22)
789 | (rn & 0x1f) << 5 | (rd & 0x1f));
790 }
791
792 static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
793 TCGReg rd, TCGReg base, TCGType ext,
794 TCGReg regoff)
795 {
796 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
797 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
798 0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
799 }
800
801 static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
802 TCGReg rd, TCGReg rn, intptr_t offset)
803 {
804 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
805 }
806
807 static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
808 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
809 {
810 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
811 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
812 | rn << 5 | (rd & 0x1f));
813 }
814
815 /* Register to register move using ORR (shifted register with no shift). */
816 static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
817 {
818 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
819 }
820
821 /* Register to register move using ADDI (move to/from SP). */
822 static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
823 {
824 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
825 }
826
827 /* This function is used for the Logical (immediate) instruction group.
828 The value of LIMM must satisfy IS_LIMM. See the comment above about
829 only supporting simplified logical immediates. */
830 static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
831 TCGReg rd, TCGReg rn, uint64_t limm)
832 {
833 unsigned h, l, r, c;
834
835 tcg_debug_assert(is_limm(limm));
836
837 h = clz64(limm);
838 l = ctz64(limm);
839 if (l == 0) {
840 r = 0; /* form 0....01....1 */
841 c = ctz64(~limm) - 1;
842 if (h == 0) {
843 r = clz64(~limm); /* form 1..10..01..1 */
844 c += r;
845 }
846 } else {
847 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */
848 c = r - h - 1;
849 }
850 if (ext == TCG_TYPE_I32) {
851 r &= 31;
852 c &= 31;
853 }
854
855 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
856 }
857
858 static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
859 TCGReg rd, tcg_target_long v64)
860 {
861 bool q = type == TCG_TYPE_V128;
862 int cmode, imm8, i;
863
864 /* Test all bytes equal first. */
865 if (v64 == dup_const(MO_8, v64)) {
866 imm8 = (uint8_t)v64;
867 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
868 return;
869 }
870
871 /*
872 * Test all bytes 0x00 or 0xff second. This can match cases that
873 * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
874 */
875 for (i = imm8 = 0; i < 8; i++) {
876 uint8_t byte = v64 >> (i * 8);
877 if (byte == 0xff) {
878 imm8 |= 1 << i;
879 } else if (byte != 0) {
880 goto fail_bytes;
881 }
882 }
883 tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
884 return;
885 fail_bytes:
886
887 /*
888 * Tests for various replications. For each element width, if we
889 * cannot find an expansion there's no point checking a larger
890 * width because we already know by replication it cannot match.
891 */
892 if (v64 == dup_const(MO_16, v64)) {
893 uint16_t v16 = v64;
894
895 if (is_shimm16(v16, &cmode, &imm8)) {
896 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
897 return;
898 }
899 if (is_shimm16(~v16, &cmode, &imm8)) {
900 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
901 return;
902 }
903
904 /*
905 * Otherwise, all remaining constants can be loaded in two insns:
906 * rd = v16 & 0xff, rd |= v16 & 0xff00.
907 */
908 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
909 tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
910 return;
911 } else if (v64 == dup_const(MO_32, v64)) {
912 uint32_t v32 = v64;
913 uint32_t n32 = ~v32;
914
915 if (is_shimm32(v32, &cmode, &imm8) ||
916 is_soimm32(v32, &cmode, &imm8) ||
917 is_fimm32(v32, &cmode, &imm8)) {
918 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
919 return;
920 }
921 if (is_shimm32(n32, &cmode, &imm8) ||
922 is_soimm32(n32, &cmode, &imm8)) {
923 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
924 return;
925 }
926
927 /*
928 * Restrict the set of constants to those we can load with
929 * two instructions. Others we load from the pool.
930 */
931 i = is_shimm32_pair(v32, &cmode, &imm8);
932 if (i) {
933 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
934 tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
935 return;
936 }
937 i = is_shimm32_pair(n32, &cmode, &imm8);
938 if (i) {
939 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
940 tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
941 return;
942 }
943 } else if (is_fimm64(v64, &cmode, &imm8)) {
944 tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
945 return;
946 }
947
948 /*
949 * As a last resort, load from the constant pool. Sadly there
950 * is no LD1R (literal), so store the full 16-byte vector.
951 */
952 if (type == TCG_TYPE_V128) {
953 new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
954 tcg_out_insn(s, 3305, LDR_v128, 0, rd);
955 } else {
956 new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
957 tcg_out_insn(s, 3305, LDR_v64, 0, rd);
958 }
959 }
960
961 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
962 TCGReg rd, TCGReg rs)
963 {
964 int is_q = type - TCG_TYPE_V64;
965 tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
966 return true;
967 }
968
969 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
970 TCGReg r, TCGReg base, intptr_t offset)
971 {
972 TCGReg temp = TCG_REG_TMP;
973
974 if (offset < -0xffffff || offset > 0xffffff) {
975 tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
976 tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
977 base = temp;
978 } else {
979 AArch64Insn add_insn = I3401_ADDI;
980
981 if (offset < 0) {
982 add_insn = I3401_SUBI;
983 offset = -offset;
984 }
985 if (offset & 0xfff000) {
986 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
987 base = temp;
988 }
989 if (offset & 0xfff) {
990 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
991 base = temp;
992 }
993 }
994 tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
995 return true;
996 }
997
998 static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
999 tcg_target_long value)
1000 {
1001 tcg_target_long svalue = value;
1002 tcg_target_long ivalue = ~value;
1003 tcg_target_long t0, t1, t2;
1004 int s0, s1;
1005 AArch64Insn opc;
1006
1007 switch (type) {
1008 case TCG_TYPE_I32:
1009 case TCG_TYPE_I64:
1010 tcg_debug_assert(rd < 32);
1011 break;
1012
1013 case TCG_TYPE_V64:
1014 case TCG_TYPE_V128:
1015 tcg_debug_assert(rd >= 32);
1016 tcg_out_dupi_vec(s, type, rd, value);
1017 return;
1018
1019 default:
1020 g_assert_not_reached();
1021 }
1022
1023 /* For 32-bit values, discard potential garbage in value. For 64-bit
1024 values within [2**31, 2**32-1], we can create smaller sequences by
1025 interpreting this as a negative 32-bit number, while ensuring that
1026 the high 32 bits are cleared by setting SF=0. */
1027 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1028 svalue = (int32_t)value;
1029 value = (uint32_t)value;
1030 ivalue = (uint32_t)ivalue;
1031 type = TCG_TYPE_I32;
1032 }
1033
1034 /* Speed things up by handling the common case of small positive
1035 and negative values specially. */
1036 if ((value & ~0xffffull) == 0) {
1037 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1038 return;
1039 } else if ((ivalue & ~0xffffull) == 0) {
1040 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1041 return;
1042 }
1043
1044 /* Check for bitfield immediates. For the benefit of 32-bit quantities,
1045 use the sign-extended value. That lets us match rotated values such
1046 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1047 if (is_limm(svalue)) {
1048 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1049 return;
1050 }
1051
1052 /* Look for host pointer values within 4G of the PC. This happens
1053 often when loading pointers to QEMU's own data structures. */
1054 if (type == TCG_TYPE_I64) {
1055 tcg_target_long disp = value - (intptr_t)s->code_ptr;
1056 if (disp == sextract64(disp, 0, 21)) {
1057 tcg_out_insn(s, 3406, ADR, rd, disp);
1058 return;
1059 }
1060 disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
1061 if (disp == sextract64(disp, 0, 21)) {
1062 tcg_out_insn(s, 3406, ADRP, rd, disp);
1063 if (value & 0xfff) {
1064 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1065 }
1066 return;
1067 }
1068 }
1069
1070 /* Would it take fewer insns to begin with MOVN? */
1071 if (ctpop64(value) >= 32) {
1072 t0 = ivalue;
1073 opc = I3405_MOVN;
1074 } else {
1075 t0 = value;
1076 opc = I3405_MOVZ;
1077 }
1078 s0 = ctz64(t0) & (63 & -16);
1079 t1 = t0 & ~(0xffffUL << s0);
1080 s1 = ctz64(t1) & (63 & -16);
1081 t2 = t1 & ~(0xffffUL << s1);
1082 if (t2 == 0) {
1083 tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1084 if (t1 != 0) {
1085 tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1086 }
1087 return;
1088 }
1089
1090 /* For more than 2 insns, dump it into the constant pool. */
1091 new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1092 tcg_out_insn(s, 3305, LDR, 0, rd);
1093 }
1094
1095 /* Define something more legible for general use. */
1096 #define tcg_out_ldst_r tcg_out_insn_3310
1097
1098 static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1099 TCGReg rn, intptr_t offset, int lgsize)
1100 {
1101 /* If the offset is naturally aligned and in range, then we can
1102 use the scaled uimm12 encoding */
1103 if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1104 uintptr_t scaled_uimm = offset >> lgsize;
1105 if (scaled_uimm <= 0xfff) {
1106 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1107 return;
1108 }
1109 }
1110
1111 /* Small signed offsets can use the unscaled encoding. */
1112 if (offset >= -256 && offset < 256) {
1113 tcg_out_insn_3312(s, insn, rd, rn, offset);
1114 return;
1115 }
1116
1117 /* Worst-case scenario, move offset to temp register, use reg offset. */
1118 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
1119 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
1120 }
1121
1122 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1123 {
1124 if (ret == arg) {
1125 return true;
1126 }
1127 switch (type) {
1128 case TCG_TYPE_I32:
1129 case TCG_TYPE_I64:
1130 if (ret < 32 && arg < 32) {
1131 tcg_out_movr(s, type, ret, arg);
1132 break;
1133 } else if (ret < 32) {
1134 tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1135 break;
1136 } else if (arg < 32) {
1137 tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1138 break;
1139 }
1140 /* FALLTHRU */
1141
1142 case TCG_TYPE_V64:
1143 tcg_debug_assert(ret >= 32 && arg >= 32);
1144 tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1145 break;
1146 case TCG_TYPE_V128:
1147 tcg_debug_assert(ret >= 32 && arg >= 32);
1148 tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1149 break;
1150
1151 default:
1152 g_assert_not_reached();
1153 }
1154 return true;
1155 }
1156
1157 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1158 TCGReg base, intptr_t ofs)
1159 {
1160 AArch64Insn insn;
1161 int lgsz;
1162
1163 switch (type) {
1164 case TCG_TYPE_I32:
1165 insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1166 lgsz = 2;
1167 break;
1168 case TCG_TYPE_I64:
1169 insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1170 lgsz = 3;
1171 break;
1172 case TCG_TYPE_V64:
1173 insn = I3312_LDRVD;
1174 lgsz = 3;
1175 break;
1176 case TCG_TYPE_V128:
1177 insn = I3312_LDRVQ;
1178 lgsz = 4;
1179 break;
1180 default:
1181 g_assert_not_reached();
1182 }
1183 tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1184 }
1185
1186 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1187 TCGReg base, intptr_t ofs)
1188 {
1189 AArch64Insn insn;
1190 int lgsz;
1191
1192 switch (type) {
1193 case TCG_TYPE_I32:
1194 insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1195 lgsz = 2;
1196 break;
1197 case TCG_TYPE_I64:
1198 insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1199 lgsz = 3;
1200 break;
1201 case TCG_TYPE_V64:
1202 insn = I3312_STRVD;
1203 lgsz = 3;
1204 break;
1205 case TCG_TYPE_V128:
1206 insn = I3312_STRVQ;
1207 lgsz = 4;
1208 break;
1209 default:
1210 g_assert_not_reached();
1211 }
1212 tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1213 }
1214
1215 static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1216 TCGReg base, intptr_t ofs)
1217 {
1218 if (type <= TCG_TYPE_I64 && val == 0) {
1219 tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1220 return true;
1221 }
1222 return false;
1223 }
1224
1225 static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1226 TCGReg rn, unsigned int a, unsigned int b)
1227 {
1228 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1229 }
1230
1231 static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1232 TCGReg rn, unsigned int a, unsigned int b)
1233 {
1234 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1235 }
1236
1237 static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1238 TCGReg rn, unsigned int a, unsigned int b)
1239 {
1240 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1241 }
1242
1243 static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1244 TCGReg rn, TCGReg rm, unsigned int a)
1245 {
1246 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1247 }
1248
1249 static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1250 TCGReg rd, TCGReg rn, unsigned int m)
1251 {
1252 int bits = ext ? 64 : 32;
1253 int max = bits - 1;
1254 tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
1255 }
1256
1257 static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1258 TCGReg rd, TCGReg rn, unsigned int m)
1259 {
1260 int max = ext ? 63 : 31;
1261 tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1262 }
1263
1264 static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1265 TCGReg rd, TCGReg rn, unsigned int m)
1266 {
1267 int max = ext ? 63 : 31;
1268 tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1269 }
1270
1271 static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1272 TCGReg rd, TCGReg rn, unsigned int m)
1273 {
1274 int max = ext ? 63 : 31;
1275 tcg_out_extr(s, ext, rd, rn, rn, m & max);
1276 }
1277
1278 static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1279 TCGReg rd, TCGReg rn, unsigned int m)
1280 {
1281 int bits = ext ? 64 : 32;
1282 int max = bits - 1;
1283 tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
1284 }
1285
1286 static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1287 TCGReg rn, unsigned lsb, unsigned width)
1288 {
1289 unsigned size = ext ? 64 : 32;
1290 unsigned a = (size - lsb) & (size - 1);
1291 unsigned b = width - 1;
1292 tcg_out_bfm(s, ext, rd, rn, a, b);
1293 }
1294
1295 static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1296 tcg_target_long b, bool const_b)
1297 {
1298 if (const_b) {
1299 /* Using CMP or CMN aliases. */
1300 if (b >= 0) {
1301 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1302 } else {
1303 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1304 }
1305 } else {
1306 /* Using CMP alias SUBS wzr, Wn, Wm */
1307 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1308 }
1309 }
1310
1311 static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target)
1312 {
1313 ptrdiff_t offset = target - s->code_ptr;
1314 tcg_debug_assert(offset == sextract64(offset, 0, 26));
1315 tcg_out_insn(s, 3206, B, offset);
1316 }
1317
1318 static inline void tcg_out_goto_long(TCGContext *s, tcg_insn_unit *target)
1319 {
1320 ptrdiff_t offset = target - s->code_ptr;
1321 if (offset == sextract64(offset, 0, 26)) {
1322 tcg_out_insn(s, 3206, BL, offset);
1323 } else {
1324 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1325 tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1326 }
1327 }
1328
1329 static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
1330 {
1331 tcg_out_insn(s, 3207, BLR, reg);
1332 }
1333
1334 static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
1335 {
1336 ptrdiff_t offset = target - s->code_ptr;
1337 if (offset == sextract64(offset, 0, 26)) {
1338 tcg_out_insn(s, 3206, BL, offset);
1339 } else {
1340 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1341 tcg_out_callr(s, TCG_REG_TMP);
1342 }
1343 }
1344
1345 void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
1346 uintptr_t addr)
1347 {
1348 tcg_insn_unit i1, i2;
1349 TCGType rt = TCG_TYPE_I64;
1350 TCGReg rd = TCG_REG_TMP;
1351 uint64_t pair;
1352
1353 ptrdiff_t offset = addr - jmp_addr;
1354
1355 if (offset == sextract64(offset, 0, 26)) {
1356 i1 = I3206_B | ((offset >> 2) & 0x3ffffff);
1357 i2 = NOP;
1358 } else {
1359 offset = (addr >> 12) - (jmp_addr >> 12);
1360
1361 /* patch ADRP */
1362 i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd;
1363 /* patch ADDI */
1364 i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd;
1365 }
1366 pair = (uint64_t)i2 << 32 | i1;
1367 atomic_set((uint64_t *)jmp_addr, pair);
1368 flush_icache_range(jmp_addr, jmp_addr + 8);
1369 }
1370
1371 static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1372 {
1373 if (!l->has_value) {
1374 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1375 tcg_out_insn(s, 3206, B, 0);
1376 } else {
1377 tcg_out_goto(s, l->u.value_ptr);
1378 }
1379 }
1380
1381 static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1382 TCGArg b, bool b_const, TCGLabel *l)
1383 {
1384 intptr_t offset;
1385 bool need_cmp;
1386
1387 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1388 need_cmp = false;
1389 } else {
1390 need_cmp = true;
1391 tcg_out_cmp(s, ext, a, b, b_const);
1392 }
1393
1394 if (!l->has_value) {
1395 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1396 offset = tcg_in32(s) >> 5;
1397 } else {
1398 offset = l->u.value_ptr - s->code_ptr;
1399 tcg_debug_assert(offset == sextract64(offset, 0, 19));
1400 }
1401
1402 if (need_cmp) {
1403 tcg_out_insn(s, 3202, B_C, c, offset);
1404 } else if (c == TCG_COND_EQ) {
1405 tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1406 } else {
1407 tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1408 }
1409 }
1410
1411 static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
1412 {
1413 tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
1414 }
1415
1416 static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
1417 {
1418 tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
1419 }
1420
1421 static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
1422 {
1423 tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
1424 }
1425
1426 static inline void tcg_out_sxt(TCGContext *s, TCGType ext, TCGMemOp s_bits,
1427 TCGReg rd, TCGReg rn)
1428 {
1429 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1430 int bits = (8 << s_bits) - 1;
1431 tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1432 }
1433
1434 static inline void tcg_out_uxt(TCGContext *s, TCGMemOp s_bits,
1435 TCGReg rd, TCGReg rn)
1436 {
1437 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1438 int bits = (8 << s_bits) - 1;
1439 tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1440 }
1441
1442 static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1443 TCGReg rn, int64_t aimm)
1444 {
1445 if (aimm >= 0) {
1446 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1447 } else {
1448 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1449 }
1450 }
1451
1452 static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1453 TCGReg rh, TCGReg al, TCGReg ah,
1454 tcg_target_long bl, tcg_target_long bh,
1455 bool const_bl, bool const_bh, bool sub)
1456 {
1457 TCGReg orig_rl = rl;
1458 AArch64Insn insn;
1459
1460 if (rl == ah || (!const_bh && rl == bh)) {
1461 rl = TCG_REG_TMP;
1462 }
1463
1464 if (const_bl) {
1465 insn = I3401_ADDSI;
1466 if ((bl < 0) ^ sub) {
1467 insn = I3401_SUBSI;
1468 bl = -bl;
1469 }
1470 if (unlikely(al == TCG_REG_XZR)) {
1471 /* ??? We want to allow al to be zero for the benefit of
1472 negation via subtraction. However, that leaves open the
1473 possibility of adding 0+const in the low part, and the
1474 immediate add instructions encode XSP not XZR. Don't try
1475 anything more elaborate here than loading another zero. */
1476 al = TCG_REG_TMP;
1477 tcg_out_movi(s, ext, al, 0);
1478 }
1479 tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1480 } else {
1481 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1482 }
1483
1484 insn = I3503_ADC;
1485 if (const_bh) {
1486 /* Note that the only two constants we support are 0 and -1, and
1487 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
1488 if ((bh != 0) ^ sub) {
1489 insn = I3503_SBC;
1490 }
1491 bh = TCG_REG_XZR;
1492 } else if (sub) {
1493 insn = I3503_SBC;
1494 }
1495 tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1496
1497 tcg_out_mov(s, ext, orig_rl, rl);
1498 }
1499
1500 static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1501 {
1502 static const uint32_t sync[] = {
1503 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST,
1504 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST,
1505 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1506 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD,
1507 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1508 };
1509 tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1510 }
1511
1512 static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1513 TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1514 {
1515 TCGReg a1 = a0;
1516 if (is_ctz) {
1517 a1 = TCG_REG_TMP;
1518 tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1519 }
1520 if (const_b && b == (ext ? 64 : 32)) {
1521 tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1522 } else {
1523 AArch64Insn sel = I3506_CSEL;
1524
1525 tcg_out_cmp(s, ext, a0, 0, 1);
1526 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1527
1528 if (const_b) {
1529 if (b == -1) {
1530 b = TCG_REG_XZR;
1531 sel = I3506_CSINV;
1532 } else if (b == 0) {
1533 b = TCG_REG_XZR;
1534 } else {
1535 tcg_out_movi(s, ext, d, b);
1536 b = d;
1537 }
1538 }
1539 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1540 }
1541 }
1542
1543 #ifdef CONFIG_SOFTMMU
1544 #include "tcg-ldst.inc.c"
1545
1546 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1547 * TCGMemOpIdx oi, uintptr_t ra)
1548 */
1549 static void * const qemu_ld_helpers[16] = {
1550 [MO_UB] = helper_ret_ldub_mmu,
1551 [MO_LEUW] = helper_le_lduw_mmu,
1552 [MO_LEUL] = helper_le_ldul_mmu,
1553 [MO_LEQ] = helper_le_ldq_mmu,
1554 [MO_BEUW] = helper_be_lduw_mmu,
1555 [MO_BEUL] = helper_be_ldul_mmu,
1556 [MO_BEQ] = helper_be_ldq_mmu,
1557 };
1558
1559 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1560 * uintxx_t val, TCGMemOpIdx oi,
1561 * uintptr_t ra)
1562 */
1563 static void * const qemu_st_helpers[16] = {
1564 [MO_UB] = helper_ret_stb_mmu,
1565 [MO_LEUW] = helper_le_stw_mmu,
1566 [MO_LEUL] = helper_le_stl_mmu,
1567 [MO_LEQ] = helper_le_stq_mmu,
1568 [MO_BEUW] = helper_be_stw_mmu,
1569 [MO_BEUL] = helper_be_stl_mmu,
1570 [MO_BEQ] = helper_be_stq_mmu,
1571 };
1572
1573 static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target)
1574 {
1575 ptrdiff_t offset = tcg_pcrel_diff(s, target);
1576 tcg_debug_assert(offset == sextract64(offset, 0, 21));
1577 tcg_out_insn(s, 3406, ADR, rd, offset);
1578 }
1579
1580 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1581 {
1582 TCGMemOpIdx oi = lb->oi;
1583 TCGMemOp opc = get_memop(oi);
1584 TCGMemOp size = opc & MO_SIZE;
1585
1586 if (!reloc_pc19(lb->label_ptr[0], s->code_ptr)) {
1587 return false;
1588 }
1589
1590 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1591 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1592 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1593 tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1594 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1595 if (opc & MO_SIGN) {
1596 tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1597 } else {
1598 tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1599 }
1600
1601 tcg_out_goto(s, lb->raddr);
1602 return true;
1603 }
1604
1605 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1606 {
1607 TCGMemOpIdx oi = lb->oi;
1608 TCGMemOp opc = get_memop(oi);
1609 TCGMemOp size = opc & MO_SIZE;
1610
1611 if (!reloc_pc19(lb->label_ptr[0], s->code_ptr)) {
1612 return false;
1613 }
1614
1615 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1616 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1617 tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1618 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1619 tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1620 tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1621 tcg_out_goto(s, lb->raddr);
1622 return true;
1623 }
1624
1625 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1626 TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1627 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1628 {
1629 TCGLabelQemuLdst *label = new_ldst_label(s);
1630
1631 label->is_ld = is_ld;
1632 label->oi = oi;
1633 label->type = ext;
1634 label->datalo_reg = data_reg;
1635 label->addrlo_reg = addr_reg;
1636 label->raddr = raddr;
1637 label->label_ptr[0] = label_ptr;
1638 }
1639
1640 /* We expect tlb_mask to be before tlb_table. */
1641 QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) <
1642 offsetof(CPUArchState, tlb_mask));
1643
1644 /* We expect to use a 24-bit unsigned offset from ENV. */
1645 QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1])
1646 > 0xffffff);
1647
1648 /* Load and compare a TLB entry, emitting the conditional jump to the
1649 slow path for the failure case, which will be patched later when finalizing
1650 the slow path. Generated code returns the host addend in X1,
1651 clobbers X0,X2,X3,TMP. */
1652 static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc,
1653 tcg_insn_unit **label_ptr, int mem_index,
1654 bool is_read)
1655 {
1656 int mask_ofs = offsetof(CPUArchState, tlb_mask[mem_index]);
1657 int table_ofs = offsetof(CPUArchState, tlb_table[mem_index]);
1658 unsigned a_bits = get_alignment_bits(opc);
1659 unsigned s_bits = opc & MO_SIZE;
1660 unsigned a_mask = (1u << a_bits) - 1;
1661 unsigned s_mask = (1u << s_bits) - 1;
1662 TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0, x3;
1663 TCGType mask_type;
1664 uint64_t compare_mask;
1665
1666 if (table_ofs > 0xfff) {
1667 int table_hi = table_ofs & ~0xfff;
1668 int mask_hi = mask_ofs & ~0xfff;
1669
1670 table_base = TCG_REG_X1;
1671 if (mask_hi == table_hi) {
1672 mask_base = table_base;
1673 } else if (mask_hi) {
1674 mask_base = TCG_REG_X0;
1675 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64,
1676 mask_base, TCG_AREG0, mask_hi);
1677 }
1678 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64,
1679 table_base, TCG_AREG0, table_hi);
1680 mask_ofs -= mask_hi;
1681 table_ofs -= table_hi;
1682 }
1683
1684 mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
1685 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1686
1687 /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
1688 tcg_out_ld(s, mask_type, TCG_REG_X0, mask_base, mask_ofs);
1689 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, table_base, table_ofs);
1690
1691 /* Extract the TLB index from the address into X0. */
1692 tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1693 TCG_REG_X0, TCG_REG_X0, addr_reg,
1694 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1695
1696 /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */
1697 tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
1698
1699 /* Load the tlb comparator into X0, and the fast path addend into X1. */
1700 tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read
1701 ? offsetof(CPUTLBEntry, addr_read)
1702 : offsetof(CPUTLBEntry, addr_write));
1703 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
1704 offsetof(CPUTLBEntry, addend));
1705
1706 /* For aligned accesses, we check the first byte and include the alignment
1707 bits within the address. For unaligned access, we check that we don't
1708 cross pages using the address of the last byte of the access. */
1709 if (a_bits >= s_bits) {
1710 x3 = addr_reg;
1711 } else {
1712 tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1713 TCG_REG_X3, addr_reg, s_mask - a_mask);
1714 x3 = TCG_REG_X3;
1715 }
1716 compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1717
1718 /* Store the page mask part of the address into X3. */
1719 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1720 TCG_REG_X3, x3, compare_mask);
1721
1722 /* Perform the address comparison. */
1723 tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
1724
1725 /* If not equal, we jump to the slow path. */
1726 *label_ptr = s->code_ptr;
1727 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1728 }
1729
1730 #endif /* CONFIG_SOFTMMU */
1731
1732 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp memop, TCGType ext,
1733 TCGReg data_r, TCGReg addr_r,
1734 TCGType otype, TCGReg off_r)
1735 {
1736 const TCGMemOp bswap = memop & MO_BSWAP;
1737
1738 switch (memop & MO_SSIZE) {
1739 case MO_UB:
1740 tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1741 break;
1742 case MO_SB:
1743 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1744 data_r, addr_r, otype, off_r);
1745 break;
1746 case MO_UW:
1747 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1748 if (bswap) {
1749 tcg_out_rev16(s, data_r, data_r);
1750 }
1751 break;
1752 case MO_SW:
1753 if (bswap) {
1754 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1755 tcg_out_rev16(s, data_r, data_r);
1756 tcg_out_sxt(s, ext, MO_16, data_r, data_r);
1757 } else {
1758 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1759 data_r, addr_r, otype, off_r);
1760 }
1761 break;
1762 case MO_UL:
1763 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1764 if (bswap) {
1765 tcg_out_rev32(s, data_r, data_r);
1766 }
1767 break;
1768 case MO_SL:
1769 if (bswap) {
1770 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1771 tcg_out_rev32(s, data_r, data_r);
1772 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1773 } else {
1774 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1775 }
1776 break;
1777 case MO_Q:
1778 tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1779 if (bswap) {
1780 tcg_out_rev64(s, data_r, data_r);
1781 }
1782 break;
1783 default:
1784 tcg_abort();
1785 }
1786 }
1787
1788 static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp memop,
1789 TCGReg data_r, TCGReg addr_r,
1790 TCGType otype, TCGReg off_r)
1791 {
1792 const TCGMemOp bswap = memop & MO_BSWAP;
1793
1794 switch (memop & MO_SIZE) {
1795 case MO_8:
1796 tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1797 break;
1798 case MO_16:
1799 if (bswap && data_r != TCG_REG_XZR) {
1800 tcg_out_rev16(s, TCG_REG_TMP, data_r);
1801 data_r = TCG_REG_TMP;
1802 }
1803 tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1804 break;
1805 case MO_32:
1806 if (bswap && data_r != TCG_REG_XZR) {
1807 tcg_out_rev32(s, TCG_REG_TMP, data_r);
1808 data_r = TCG_REG_TMP;
1809 }
1810 tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1811 break;
1812 case MO_64:
1813 if (bswap && data_r != TCG_REG_XZR) {
1814 tcg_out_rev64(s, TCG_REG_TMP, data_r);
1815 data_r = TCG_REG_TMP;
1816 }
1817 tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1818 break;
1819 default:
1820 tcg_abort();
1821 }
1822 }
1823
1824 static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1825 TCGMemOpIdx oi, TCGType ext)
1826 {
1827 TCGMemOp memop = get_memop(oi);
1828 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1829 #ifdef CONFIG_SOFTMMU
1830 unsigned mem_index = get_mmuidx(oi);
1831 tcg_insn_unit *label_ptr;
1832
1833 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1834 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1835 TCG_REG_X1, otype, addr_reg);
1836 add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1837 s->code_ptr, label_ptr);
1838 #else /* !CONFIG_SOFTMMU */
1839 if (USE_GUEST_BASE) {
1840 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1841 TCG_REG_GUEST_BASE, otype, addr_reg);
1842 } else {
1843 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1844 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1845 }
1846 #endif /* CONFIG_SOFTMMU */
1847 }
1848
1849 static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1850 TCGMemOpIdx oi)
1851 {
1852 TCGMemOp memop = get_memop(oi);
1853 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1854 #ifdef CONFIG_SOFTMMU
1855 unsigned mem_index = get_mmuidx(oi);
1856 tcg_insn_unit *label_ptr;
1857
1858 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1859 tcg_out_qemu_st_direct(s, memop, data_reg,
1860 TCG_REG_X1, otype, addr_reg);
1861 add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1862 data_reg, addr_reg, s->code_ptr, label_ptr);
1863 #else /* !CONFIG_SOFTMMU */
1864 if (USE_GUEST_BASE) {
1865 tcg_out_qemu_st_direct(s, memop, data_reg,
1866 TCG_REG_GUEST_BASE, otype, addr_reg);
1867 } else {
1868 tcg_out_qemu_st_direct(s, memop, data_reg,
1869 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1870 }
1871 #endif /* CONFIG_SOFTMMU */
1872 }
1873
1874 static tcg_insn_unit *tb_ret_addr;
1875
1876 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1877 const TCGArg args[TCG_MAX_OP_ARGS],
1878 const int const_args[TCG_MAX_OP_ARGS])
1879 {
1880 /* 99% of the time, we can signal the use of extension registers
1881 by looking to see if the opcode handles 64-bit data. */
1882 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1883
1884 /* Hoist the loads of the most common arguments. */
1885 TCGArg a0 = args[0];
1886 TCGArg a1 = args[1];
1887 TCGArg a2 = args[2];
1888 int c2 = const_args[2];
1889
1890 /* Some operands are defined with "rZ" constraint, a register or
1891 the zero register. These need not actually test args[I] == 0. */
1892 #define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1893
1894 switch (opc) {
1895 case INDEX_op_exit_tb:
1896 /* Reuse the zeroing that exists for goto_ptr. */
1897 if (a0 == 0) {
1898 tcg_out_goto_long(s, s->code_gen_epilogue);
1899 } else {
1900 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1901 tcg_out_goto_long(s, tb_ret_addr);
1902 }
1903 break;
1904
1905 case INDEX_op_goto_tb:
1906 if (s->tb_jmp_insn_offset != NULL) {
1907 /* TCG_TARGET_HAS_direct_jump */
1908 /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
1909 write can be used to patch the target address. */
1910 if ((uintptr_t)s->code_ptr & 7) {
1911 tcg_out32(s, NOP);
1912 }
1913 s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1914 /* actual branch destination will be patched by
1915 tb_target_set_jmp_target later. */
1916 tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
1917 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
1918 } else {
1919 /* !TCG_TARGET_HAS_direct_jump */
1920 tcg_debug_assert(s->tb_jmp_target_addr != NULL);
1921 intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2;
1922 tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
1923 }
1924 tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1925 set_jmp_reset_offset(s, a0);
1926 break;
1927
1928 case INDEX_op_goto_ptr:
1929 tcg_out_insn(s, 3207, BR, a0);
1930 break;
1931
1932 case INDEX_op_br:
1933 tcg_out_goto_label(s, arg_label(a0));
1934 break;
1935
1936 case INDEX_op_ld8u_i32:
1937 case INDEX_op_ld8u_i64:
1938 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
1939 break;
1940 case INDEX_op_ld8s_i32:
1941 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
1942 break;
1943 case INDEX_op_ld8s_i64:
1944 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
1945 break;
1946 case INDEX_op_ld16u_i32:
1947 case INDEX_op_ld16u_i64:
1948 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
1949 break;
1950 case INDEX_op_ld16s_i32:
1951 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
1952 break;
1953 case INDEX_op_ld16s_i64:
1954 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
1955 break;
1956 case INDEX_op_ld_i32:
1957 case INDEX_op_ld32u_i64:
1958 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
1959 break;
1960 case INDEX_op_ld32s_i64:
1961 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
1962 break;
1963 case INDEX_op_ld_i64:
1964 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
1965 break;
1966
1967 case INDEX_op_st8_i32:
1968 case INDEX_op_st8_i64:
1969 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
1970 break;
1971 case INDEX_op_st16_i32:
1972 case INDEX_op_st16_i64:
1973 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
1974 break;
1975 case INDEX_op_st_i32:
1976 case INDEX_op_st32_i64:
1977 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
1978 break;
1979 case INDEX_op_st_i64:
1980 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
1981 break;
1982
1983 case INDEX_op_add_i32:
1984 a2 = (int32_t)a2;
1985 /* FALLTHRU */
1986 case INDEX_op_add_i64:
1987 if (c2) {
1988 tcg_out_addsubi(s, ext, a0, a1, a2);
1989 } else {
1990 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1991 }
1992 break;
1993
1994 case INDEX_op_sub_i32:
1995 a2 = (int32_t)a2;
1996 /* FALLTHRU */
1997 case INDEX_op_sub_i64:
1998 if (c2) {
1999 tcg_out_addsubi(s, ext, a0, a1, -a2);
2000 } else {
2001 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
2002 }
2003 break;
2004
2005 case INDEX_op_neg_i64:
2006 case INDEX_op_neg_i32:
2007 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
2008 break;
2009
2010 case INDEX_op_and_i32:
2011 a2 = (int32_t)a2;
2012 /* FALLTHRU */
2013 case INDEX_op_and_i64:
2014 if (c2) {
2015 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
2016 } else {
2017 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
2018 }
2019 break;
2020
2021 case INDEX_op_andc_i32:
2022 a2 = (int32_t)a2;
2023 /* FALLTHRU */
2024 case INDEX_op_andc_i64:
2025 if (c2) {
2026 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
2027 } else {
2028 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
2029 }
2030 break;
2031
2032 case INDEX_op_or_i32:
2033 a2 = (int32_t)a2;
2034 /* FALLTHRU */
2035 case INDEX_op_or_i64:
2036 if (c2) {
2037 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
2038 } else {
2039 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
2040 }
2041 break;
2042
2043 case INDEX_op_orc_i32:
2044 a2 = (int32_t)a2;
2045 /* FALLTHRU */
2046 case INDEX_op_orc_i64:
2047 if (c2) {
2048 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2049 } else {
2050 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2051 }
2052 break;
2053
2054 case INDEX_op_xor_i32:
2055 a2 = (int32_t)a2;
2056 /* FALLTHRU */
2057 case INDEX_op_xor_i64:
2058 if (c2) {
2059 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2060 } else {
2061 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2062 }
2063 break;
2064
2065 case INDEX_op_eqv_i32:
2066 a2 = (int32_t)a2;
2067 /* FALLTHRU */
2068 case INDEX_op_eqv_i64:
2069 if (c2) {
2070 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2071 } else {
2072 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2073 }
2074 break;
2075
2076 case INDEX_op_not_i64:
2077 case INDEX_op_not_i32:
2078 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2079 break;
2080
2081 case INDEX_op_mul_i64:
2082 case INDEX_op_mul_i32:
2083 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2084 break;
2085
2086 case INDEX_op_div_i64:
2087 case INDEX_op_div_i32:
2088 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2089 break;
2090 case INDEX_op_divu_i64:
2091 case INDEX_op_divu_i32:
2092 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2093 break;
2094
2095 case INDEX_op_rem_i64:
2096 case INDEX_op_rem_i32:
2097 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
2098 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2099 break;
2100 case INDEX_op_remu_i64:
2101 case INDEX_op_remu_i32:
2102 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
2103 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2104 break;
2105
2106 case INDEX_op_shl_i64:
2107 case INDEX_op_shl_i32:
2108 if (c2) {
2109 tcg_out_shl(s, ext, a0, a1, a2);
2110 } else {
2111 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
2112 }
2113 break;
2114
2115 case INDEX_op_shr_i64:
2116 case INDEX_op_shr_i32:
2117 if (c2) {
2118 tcg_out_shr(s, ext, a0, a1, a2);
2119 } else {
2120 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
2121 }
2122 break;
2123
2124 case INDEX_op_sar_i64:
2125 case INDEX_op_sar_i32:
2126 if (c2) {
2127 tcg_out_sar(s, ext, a0, a1, a2);
2128 } else {
2129 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
2130 }
2131 break;
2132
2133 case INDEX_op_rotr_i64:
2134 case INDEX_op_rotr_i32:
2135 if (c2) {
2136 tcg_out_rotr(s, ext, a0, a1, a2);
2137 } else {
2138 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
2139 }
2140 break;
2141
2142 case INDEX_op_rotl_i64:
2143 case INDEX_op_rotl_i32:
2144 if (c2) {
2145 tcg_out_rotl(s, ext, a0, a1, a2);
2146 } else {
2147 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
2148 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
2149 }
2150 break;
2151
2152 case INDEX_op_clz_i64:
2153 case INDEX_op_clz_i32:
2154 tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2155 break;
2156 case INDEX_op_ctz_i64:
2157 case INDEX_op_ctz_i32:
2158 tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2159 break;
2160
2161 case INDEX_op_brcond_i32:
2162 a1 = (int32_t)a1;
2163 /* FALLTHRU */
2164 case INDEX_op_brcond_i64:
2165 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
2166 break;
2167
2168 case INDEX_op_setcond_i32:
2169 a2 = (int32_t)a2;
2170 /* FALLTHRU */
2171 case INDEX_op_setcond_i64:
2172 tcg_out_cmp(s, ext, a1, a2, c2);
2173 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */
2174 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2175 TCG_REG_XZR, tcg_invert_cond(args[3]));
2176 break;
2177
2178 case INDEX_op_movcond_i32:
2179 a2 = (int32_t)a2;
2180 /* FALLTHRU */
2181 case INDEX_op_movcond_i64:
2182 tcg_out_cmp(s, ext, a1, a2, c2);
2183 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2184 break;
2185
2186 case INDEX_op_qemu_ld_i32:
2187 case INDEX_op_qemu_ld_i64:
2188 tcg_out_qemu_ld(s, a0, a1, a2, ext);
2189 break;
2190 case INDEX_op_qemu_st_i32:
2191 case INDEX_op_qemu_st_i64:
2192 tcg_out_qemu_st(s, REG0(0), a1, a2);
2193 break;
2194
2195 case INDEX_op_bswap64_i64:
2196 tcg_out_rev64(s, a0, a1);
2197 break;
2198 case INDEX_op_bswap32_i64:
2199 case INDEX_op_bswap32_i32:
2200 tcg_out_rev32(s, a0, a1);
2201 break;
2202 case INDEX_op_bswap16_i64:
2203 case INDEX_op_bswap16_i32:
2204 tcg_out_rev16(s, a0, a1);
2205 break;
2206
2207 case INDEX_op_ext8s_i64:
2208 case INDEX_op_ext8s_i32:
2209 tcg_out_sxt(s, ext, MO_8, a0, a1);
2210 break;
2211 case INDEX_op_ext16s_i64:
2212 case INDEX_op_ext16s_i32:
2213 tcg_out_sxt(s, ext, MO_16, a0, a1);
2214 break;
2215 case INDEX_op_ext_i32_i64:
2216 case INDEX_op_ext32s_i64:
2217 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
2218 break;
2219 case INDEX_op_ext8u_i64:
2220 case INDEX_op_ext8u_i32:
2221 tcg_out_uxt(s, MO_8, a0, a1);
2222 break;
2223 case INDEX_op_ext16u_i64:
2224 case INDEX_op_ext16u_i32:
2225 tcg_out_uxt(s, MO_16, a0, a1);
2226 break;
2227 case INDEX_op_extu_i32_i64:
2228 case INDEX_op_ext32u_i64:
2229 tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
2230 break;
2231
2232 case INDEX_op_deposit_i64:
2233 case INDEX_op_deposit_i32:
2234 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2235 break;
2236
2237 case INDEX_op_extract_i64:
2238 case INDEX_op_extract_i32:
2239 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2240 break;
2241
2242 case INDEX_op_sextract_i64:
2243 case INDEX_op_sextract_i32:
2244 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2245 break;
2246
2247 case INDEX_op_extract2_i64:
2248 case INDEX_op_extract2_i32:
2249 tcg_out_extr(s, ext, a0, a1, a2, args[3]);
2250 break;
2251
2252 case INDEX_op_add2_i32:
2253 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2254 (int32_t)args[4], args[5], const_args[4],
2255 const_args[5], false);
2256 break;
2257 case INDEX_op_add2_i64:
2258 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2259 args[5], const_args[4], const_args[5], false);
2260 break;
2261 case INDEX_op_sub2_i32:
2262 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2263 (int32_t)args[4], args[5], const_args[4],
2264 const_args[5], true);
2265 break;
2266 case INDEX_op_sub2_i64:
2267 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2268 args[5], const_args[4], const_args[5], true);
2269 break;
2270
2271 case INDEX_op_muluh_i64:
2272 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2273 break;
2274 case INDEX_op_mulsh_i64:
2275 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2276 break;
2277
2278 case INDEX_op_mb:
2279 tcg_out_mb(s, a0);
2280 break;
2281
2282 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
2283 case INDEX_op_mov_i64:
2284 case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
2285 case INDEX_op_movi_i64:
2286 case INDEX_op_call: /* Always emitted via tcg_out_call. */
2287 default:
2288 g_assert_not_reached();
2289 }
2290
2291 #undef REG0
2292 }
2293
2294 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2295 unsigned vecl, unsigned vece,
2296 const TCGArg *args, const int *const_args)
2297 {
2298 static const AArch64Insn cmp_insn[16] = {
2299 [TCG_COND_EQ] = I3616_CMEQ,
2300 [TCG_COND_GT] = I3616_CMGT,
2301 [TCG_COND_GE] = I3616_CMGE,
2302 [TCG_COND_GTU] = I3616_CMHI,
2303 [TCG_COND_GEU] = I3616_CMHS,
2304 };
2305 static const AArch64Insn cmp0_insn[16] = {
2306 [TCG_COND_EQ] = I3617_CMEQ0,
2307 [TCG_COND_GT] = I3617_CMGT0,
2308 [TCG_COND_GE] = I3617_CMGE0,
2309 [TCG_COND_LT] = I3617_CMLT0,
2310 [TCG_COND_LE] = I3617_CMLE0,
2311 };
2312
2313 TCGType type = vecl + TCG_TYPE_V64;
2314 unsigned is_q = vecl;
2315 TCGArg a0, a1, a2, a3;
2316 int cmode, imm8;
2317
2318 a0 = args[0];
2319 a1 = args[1];
2320 a2 = args[2];
2321
2322 switch (opc) {
2323 case INDEX_op_ld_vec:
2324 tcg_out_ld(s, type, a0, a1, a2);
2325 break;
2326 case INDEX_op_st_vec:
2327 tcg_out_st(s, type, a0, a1, a2);
2328 break;
2329 case INDEX_op_dupm_vec:
2330 tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2331 break;
2332 case INDEX_op_add_vec:
2333 tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2334 break;
2335 case INDEX_op_sub_vec:
2336 tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2337 break;
2338 case INDEX_op_mul_vec:
2339 tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2340 break;
2341 case INDEX_op_neg_vec:
2342 tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2343 break;
2344 case INDEX_op_abs_vec:
2345 tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2346 break;
2347 case INDEX_op_and_vec:
2348 if (const_args[2]) {
2349 is_shimm1632(~a2, &cmode, &imm8);
2350 if (a0 == a1) {
2351 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2352 return;
2353 }
2354 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2355 a2 = a0;
2356 }
2357 tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2358 break;
2359 case INDEX_op_or_vec:
2360 if (const_args[2]) {
2361 is_shimm1632(a2, &cmode, &imm8);
2362 if (a0 == a1) {
2363 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2364 return;
2365 }
2366 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2367 a2 = a0;
2368 }
2369 tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2370 break;
2371 case INDEX_op_andc_vec:
2372 if (const_args[2]) {
2373 is_shimm1632(a2, &cmode, &imm8);
2374 if (a0 == a1) {
2375 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2376 return;
2377 }
2378 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2379 a2 = a0;
2380 }
2381 tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2382 break;
2383 case INDEX_op_orc_vec:
2384 if (const_args[2]) {
2385 is_shimm1632(~a2, &cmode, &imm8);
2386 if (a0 == a1) {
2387 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2388 return;
2389 }
2390 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2391 a2 = a0;
2392 }
2393 tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2394 break;
2395 case INDEX_op_xor_vec:
2396 tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2397 break;
2398 case INDEX_op_ssadd_vec:
2399 tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2400 break;
2401 case INDEX_op_sssub_vec:
2402 tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2403 break;
2404 case INDEX_op_usadd_vec:
2405 tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2406 break;
2407 case INDEX_op_ussub_vec:
2408 tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2409 break;
2410 case INDEX_op_smax_vec:
2411 tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2412 break;
2413 case INDEX_op_smin_vec:
2414 tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2415 break;
2416 case INDEX_op_umax_vec:
2417 tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2418 break;
2419 case INDEX_op_umin_vec:
2420 tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2421 break;
2422 case INDEX_op_not_vec:
2423 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2424 break;
2425 case INDEX_op_shli_vec:
2426 tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2427 break;
2428 case INDEX_op_shri_vec:
2429 tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2430 break;
2431 case INDEX_op_sari_vec:
2432 tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2433 break;
2434 case INDEX_op_shlv_vec:
2435 tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2436 break;
2437 case INDEX_op_aa64_sshl_vec:
2438 tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2439 break;
2440 case INDEX_op_cmp_vec:
2441 {
2442 TCGCond cond = args[3];
2443 AArch64Insn insn;
2444
2445 if (cond == TCG_COND_NE) {
2446 if (const_args[2]) {
2447 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2448 } else {
2449 tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2450 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2451 }
2452 } else {
2453 if (const_args[2]) {
2454 insn = cmp0_insn[cond];
2455 if (insn) {
2456 tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2457 break;
2458 }
2459 tcg_out_dupi_vec(s, type, TCG_VEC_TMP, 0);
2460 a2 = TCG_VEC_TMP;
2461 }
2462 insn = cmp_insn[cond];
2463 if (insn == 0) {
2464 TCGArg t;
2465 t = a1, a1 = a2, a2 = t;
2466 cond = tcg_swap_cond(cond);
2467 insn = cmp_insn[cond];
2468 tcg_debug_assert(insn != 0);
2469 }
2470 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2471 }
2472 }
2473 break;
2474
2475 case INDEX_op_bitsel_vec:
2476 a3 = args[3];
2477 if (a0 == a3) {
2478 tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2479 } else if (a0 == a2) {
2480 tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2481 } else {
2482 if (a0 != a1) {
2483 tcg_out_mov(s, type, a0, a1);
2484 }
2485 tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2486 }
2487 break;
2488
2489 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
2490 case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi. */
2491 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
2492 default:
2493 g_assert_not_reached();
2494 }
2495 }
2496
2497 int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2498 {
2499 switch (opc) {
2500 case INDEX_op_add_vec:
2501 case INDEX_op_sub_vec:
2502 case INDEX_op_and_vec:
2503 case INDEX_op_or_vec:
2504 case INDEX_op_xor_vec:
2505 case INDEX_op_andc_vec:
2506 case INDEX_op_orc_vec:
2507 case INDEX_op_neg_vec:
2508 case INDEX_op_abs_vec:
2509 case INDEX_op_not_vec:
2510 case INDEX_op_cmp_vec:
2511 case INDEX_op_shli_vec:
2512 case INDEX_op_shri_vec:
2513 case INDEX_op_sari_vec:
2514 case INDEX_op_ssadd_vec:
2515 case INDEX_op_sssub_vec:
2516 case INDEX_op_usadd_vec:
2517 case INDEX_op_ussub_vec:
2518 case INDEX_op_shlv_vec:
2519 case INDEX_op_bitsel_vec:
2520 return 1;
2521 case INDEX_op_shrv_vec:
2522 case INDEX_op_sarv_vec:
2523 return -1;
2524 case INDEX_op_mul_vec:
2525 case INDEX_op_smax_vec:
2526 case INDEX_op_smin_vec:
2527 case INDEX_op_umax_vec:
2528 case INDEX_op_umin_vec:
2529 return vece < MO_64;
2530
2531 default:
2532 return 0;
2533 }
2534 }
2535
2536 void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2537 TCGArg a0, ...)
2538 {
2539 va_list va;
2540 TCGv_vec v0, v1, v2, t1;
2541
2542 va_start(va, a0);
2543 v0 = temp_tcgv_vec(arg_temp(a0));
2544 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2545 v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2546
2547 switch (opc) {
2548 case INDEX_op_shrv_vec:
2549 case INDEX_op_sarv_vec:
2550 /* Right shifts are negative left shifts for AArch64. */
2551 t1 = tcg_temp_new_vec(type);
2552 tcg_gen_neg_vec(vece, t1, v2);
2553 opc = (opc == INDEX_op_shrv_vec
2554 ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2555 vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2556 tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2557 tcg_temp_free_vec(t1);
2558 break;
2559
2560 default:
2561 g_assert_not_reached();
2562 }
2563
2564 va_end(va);
2565 }
2566
2567 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
2568 {
2569 static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
2570 static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
2571 static const TCGTargetOpDef w_w = { .args_ct_str = { "w", "w" } };
2572 static const TCGTargetOpDef w_r = { .args_ct_str = { "w", "r" } };
2573 static const TCGTargetOpDef w_wr = { .args_ct_str = { "w", "wr" } };
2574 static const TCGTargetOpDef r_l = { .args_ct_str = { "r", "l" } };
2575 static const TCGTargetOpDef r_rA = { .args_ct_str = { "r", "rA" } };
2576 static const TCGTargetOpDef rZ_r = { .args_ct_str = { "rZ", "r" } };
2577 static const TCGTargetOpDef lZ_l = { .args_ct_str = { "lZ", "l" } };
2578 static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
2579 static const TCGTargetOpDef w_w_w = { .args_ct_str = { "w", "w", "w" } };
2580 static const TCGTargetOpDef w_w_wO = { .args_ct_str = { "w", "w", "wO" } };
2581 static const TCGTargetOpDef w_w_wN = { .args_ct_str = { "w", "w", "wN" } };
2582 static const TCGTargetOpDef w_w_wZ = { .args_ct_str = { "w", "w", "wZ" } };
2583 static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
2584 static const TCGTargetOpDef r_r_rA = { .args_ct_str = { "r", "r", "rA" } };
2585 static const TCGTargetOpDef r_r_rL = { .args_ct_str = { "r", "r", "rL" } };
2586 static const TCGTargetOpDef r_r_rAL
2587 = { .args_ct_str = { "r", "r", "rAL" } };
2588 static const TCGTargetOpDef dep
2589 = { .args_ct_str = { "r", "0", "rZ" } };
2590 static const TCGTargetOpDef ext2
2591 = { .args_ct_str = { "r", "rZ", "rZ" } };
2592 static const TCGTargetOpDef movc
2593 = { .args_ct_str = { "r", "r", "rA", "rZ", "rZ" } };
2594 static const TCGTargetOpDef add2
2595 = { .args_ct_str = { "r", "r", "rZ", "rZ", "rA", "rMZ" } };
2596 static const TCGTargetOpDef w_w_w_w
2597 = { .args_ct_str = { "w", "w", "w", "w" } };
2598
2599 switch (op) {
2600 case INDEX_op_goto_ptr:
2601 return &r;
2602
2603 case INDEX_op_ld8u_i32:
2604 case INDEX_op_ld8s_i32:
2605 case INDEX_op_ld16u_i32:
2606 case INDEX_op_ld16s_i32:
2607 case INDEX_op_ld_i32:
2608 case INDEX_op_ld8u_i64:
2609 case INDEX_op_ld8s_i64:
2610 case INDEX_op_ld16u_i64:
2611 case INDEX_op_ld16s_i64:
2612 case INDEX_op_ld32u_i64:
2613 case INDEX_op_ld32s_i64:
2614 case INDEX_op_ld_i64:
2615 case INDEX_op_neg_i32:
2616 case INDEX_op_neg_i64:
2617 case INDEX_op_not_i32:
2618 case INDEX_op_not_i64:
2619 case INDEX_op_bswap16_i32:
2620 case INDEX_op_bswap32_i32:
2621 case INDEX_op_bswap16_i64:
2622 case INDEX_op_bswap32_i64:
2623 case INDEX_op_bswap64_i64:
2624 case INDEX_op_ext8s_i32:
2625 case INDEX_op_ext16s_i32:
2626 case INDEX_op_ext8u_i32:
2627 case INDEX_op_ext16u_i32:
2628 case INDEX_op_ext8s_i64:
2629 case INDEX_op_ext16s_i64:
2630 case INDEX_op_ext32s_i64:
2631 case INDEX_op_ext8u_i64:
2632 case INDEX_op_ext16u_i64:
2633 case INDEX_op_ext32u_i64:
2634 case INDEX_op_ext_i32_i64:
2635 case INDEX_op_extu_i32_i64:
2636 case INDEX_op_extract_i32:
2637 case INDEX_op_extract_i64:
2638 case INDEX_op_sextract_i32:
2639 case INDEX_op_sextract_i64:
2640 return &r_r;
2641
2642 case INDEX_op_st8_i32:
2643 case INDEX_op_st16_i32:
2644 case INDEX_op_st_i32:
2645 case INDEX_op_st8_i64:
2646 case INDEX_op_st16_i64:
2647 case INDEX_op_st32_i64:
2648 case INDEX_op_st_i64:
2649 return &rZ_r;
2650
2651 case INDEX_op_add_i32:
2652 case INDEX_op_add_i64:
2653 case INDEX_op_sub_i32:
2654 case INDEX_op_sub_i64:
2655 case INDEX_op_setcond_i32:
2656 case INDEX_op_setcond_i64:
2657 return &r_r_rA;
2658
2659 case INDEX_op_mul_i32:
2660 case INDEX_op_mul_i64:
2661 case INDEX_op_div_i32:
2662 case INDEX_op_div_i64:
2663 case INDEX_op_divu_i32:
2664 case INDEX_op_divu_i64:
2665 case INDEX_op_rem_i32:
2666 case INDEX_op_rem_i64:
2667 case INDEX_op_remu_i32:
2668 case INDEX_op_remu_i64:
2669 case INDEX_op_muluh_i64:
2670 case INDEX_op_mulsh_i64:
2671 return &r_r_r;
2672
2673 case INDEX_op_and_i32:
2674 case INDEX_op_and_i64:
2675 case INDEX_op_or_i32:
2676 case INDEX_op_or_i64:
2677 case INDEX_op_xor_i32:
2678 case INDEX_op_xor_i64:
2679 case INDEX_op_andc_i32:
2680 case INDEX_op_andc_i64:
2681 case INDEX_op_orc_i32:
2682 case INDEX_op_orc_i64:
2683 case INDEX_op_eqv_i32:
2684 case INDEX_op_eqv_i64:
2685 return &r_r_rL;
2686
2687 case INDEX_op_shl_i32:
2688 case INDEX_op_shr_i32:
2689 case INDEX_op_sar_i32:
2690 case INDEX_op_rotl_i32:
2691 case INDEX_op_rotr_i32:
2692 case INDEX_op_shl_i64:
2693 case INDEX_op_shr_i64:
2694 case INDEX_op_sar_i64:
2695 case INDEX_op_rotl_i64:
2696 case INDEX_op_rotr_i64:
2697 return &r_r_ri;
2698
2699 case INDEX_op_clz_i32:
2700 case INDEX_op_ctz_i32:
2701 case INDEX_op_clz_i64:
2702 case INDEX_op_ctz_i64:
2703 return &r_r_rAL;
2704
2705 case INDEX_op_brcond_i32:
2706 case INDEX_op_brcond_i64:
2707 return &r_rA;
2708
2709 case INDEX_op_movcond_i32:
2710 case INDEX_op_movcond_i64:
2711 return &movc;
2712
2713 case INDEX_op_qemu_ld_i32:
2714 case INDEX_op_qemu_ld_i64:
2715 return &r_l;
2716 case INDEX_op_qemu_st_i32:
2717 case INDEX_op_qemu_st_i64:
2718 return &lZ_l;
2719
2720 case INDEX_op_deposit_i32:
2721 case INDEX_op_deposit_i64:
2722 return &dep;
2723
2724 case INDEX_op_extract2_i32:
2725 case INDEX_op_extract2_i64:
2726 return &ext2;
2727
2728 case INDEX_op_add2_i32:
2729 case INDEX_op_add2_i64:
2730 case INDEX_op_sub2_i32:
2731 case INDEX_op_sub2_i64:
2732 return &add2;
2733
2734 case INDEX_op_add_vec:
2735 case INDEX_op_sub_vec:
2736 case INDEX_op_mul_vec:
2737 case INDEX_op_xor_vec:
2738 case INDEX_op_ssadd_vec:
2739 case INDEX_op_sssub_vec:
2740 case INDEX_op_usadd_vec:
2741 case INDEX_op_ussub_vec:
2742 case INDEX_op_smax_vec:
2743 case INDEX_op_smin_vec:
2744 case INDEX_op_umax_vec:
2745 case INDEX_op_umin_vec:
2746 case INDEX_op_shlv_vec:
2747 case INDEX_op_shrv_vec:
2748 case INDEX_op_sarv_vec:
2749 case INDEX_op_aa64_sshl_vec:
2750 return &w_w_w;
2751 case INDEX_op_not_vec:
2752 case INDEX_op_neg_vec:
2753 case INDEX_op_abs_vec:
2754 case INDEX_op_shli_vec:
2755 case INDEX_op_shri_vec:
2756 case INDEX_op_sari_vec:
2757 return &w_w;
2758 case INDEX_op_ld_vec:
2759 case INDEX_op_st_vec:
2760 case INDEX_op_dupm_vec:
2761 return &w_r;
2762 case INDEX_op_dup_vec:
2763 return &w_wr;
2764 case INDEX_op_or_vec:
2765 case INDEX_op_andc_vec:
2766 return &w_w_wO;
2767 case INDEX_op_and_vec:
2768 case INDEX_op_orc_vec:
2769 return &w_w_wN;
2770 case INDEX_op_cmp_vec:
2771 return &w_w_wZ;
2772 case INDEX_op_bitsel_vec:
2773 return &w_w_w_w;
2774
2775 default:
2776 return NULL;
2777 }
2778 }
2779
2780 static void tcg_target_init(TCGContext *s)
2781 {
2782 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2783 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
2784 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2785 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
2786
2787 tcg_target_call_clobber_regs = -1ull;
2788 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2789 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2790 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2791 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2792 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2793 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2794 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2795 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2796 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2797 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2798 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
2799 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2800 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2801 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2802 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2803 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2804 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2805 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2806 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
2807
2808 s->reserved_regs = 0;
2809 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2810 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2811 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2812 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
2813 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
2814 }
2815
2816 /* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
2817 #define PUSH_SIZE ((30 - 19 + 1) * 8)
2818
2819 #define FRAME_SIZE \
2820 ((PUSH_SIZE \
2821 + TCG_STATIC_CALL_ARGS_SIZE \
2822 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2823 + TCG_TARGET_STACK_ALIGN - 1) \
2824 & ~(TCG_TARGET_STACK_ALIGN - 1))
2825
2826 /* We're expecting a 2 byte uleb128 encoded value. */
2827 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2828
2829 /* We're expecting to use a single ADDI insn. */
2830 QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
2831
2832 static void tcg_target_qemu_prologue(TCGContext *s)
2833 {
2834 TCGReg r;
2835
2836 /* Push (FP, LR) and allocate space for all saved registers. */
2837 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
2838 TCG_REG_SP, -PUSH_SIZE, 1, 1);
2839
2840 /* Set up frame pointer for canonical unwinding. */
2841 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
2842
2843 /* Store callee-preserved regs x19..x28. */
2844 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2845 int ofs = (r - TCG_REG_X19 + 2) * 8;
2846 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2847 }
2848
2849 /* Make stack space for TCG locals. */
2850 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2851 FRAME_SIZE - PUSH_SIZE);
2852
2853 /* Inform TCG about how to find TCG locals with register, offset, size. */
2854 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
2855 CPU_TEMP_BUF_NLONGS * sizeof(long));
2856
2857 #if !defined(CONFIG_SOFTMMU)
2858 if (USE_GUEST_BASE) {
2859 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
2860 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
2861 }
2862 #endif
2863
2864 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2865 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
2866
2867 /*
2868 * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2869 * and fall through to the rest of the epilogue.
2870 */
2871 s->code_gen_epilogue = s->code_ptr;
2872 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
2873
2874 /* TB epilogue */
2875 tb_ret_addr = s->code_ptr;
2876
2877 /* Remove TCG locals stack space. */
2878 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2879 FRAME_SIZE - PUSH_SIZE);
2880
2881 /* Restore registers x19..x28. */
2882 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2883 int ofs = (r - TCG_REG_X19 + 2) * 8;
2884 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2885 }
2886
2887 /* Pop (FP, LR), restore SP to previous frame. */
2888 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
2889 TCG_REG_SP, PUSH_SIZE, 0, 1);
2890 tcg_out_insn(s, 3207, RET, TCG_REG_LR);
2891 }
2892
2893 static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2894 {
2895 int i;
2896 for (i = 0; i < count; ++i) {
2897 p[i] = NOP;
2898 }
2899 }
2900
2901 typedef struct {
2902 DebugFrameHeader h;
2903 uint8_t fde_def_cfa[4];
2904 uint8_t fde_reg_ofs[24];
2905 } DebugFrame;
2906
2907 #define ELF_HOST_MACHINE EM_AARCH64
2908
2909 static const DebugFrame debug_frame = {
2910 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2911 .h.cie.id = -1,
2912 .h.cie.version = 1,
2913 .h.cie.code_align = 1,
2914 .h.cie.data_align = 0x78, /* sleb128 -8 */
2915 .h.cie.return_column = TCG_REG_LR,
2916
2917 /* Total FDE size does not include the "len" member. */
2918 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2919
2920 .fde_def_cfa = {
2921 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */
2922 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2923 (FRAME_SIZE >> 7)
2924 },
2925 .fde_reg_ofs = {
2926 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */
2927 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */
2928 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */
2929 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */
2930 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */
2931 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */
2932 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */
2933 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */
2934 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */
2935 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */
2936 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */
2937 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */
2938 }
2939 };
2940
2941 void tcg_register_jit(void *buf, size_t buf_size)
2942 {
2943 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2944 }