]> git.proxmox.com Git - mirror_qemu.git/blob - tcg/aarch64/tcg-target.inc.c
tcg: Move USE_DIRECT_JUMP discriminator to tcg/cpu/tcg-target.h
[mirror_qemu.git] / tcg / aarch64 / tcg-target.inc.c
1 /*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13 #include "tcg-be-ldst.h"
14 #include "qemu/bitops.h"
15
16 /* We're going to re-use TCGType in setting of the SF bit, which controls
17 the size of the operation performed. If we know the values match, it
18 makes things much cleaner. */
19 QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
20
21 #ifdef CONFIG_DEBUG_TCG
22 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
23 "%x0", "%x1", "%x2", "%x3", "%x4", "%x5", "%x6", "%x7",
24 "%x8", "%x9", "%x10", "%x11", "%x12", "%x13", "%x14", "%x15",
25 "%x16", "%x17", "%x18", "%x19", "%x20", "%x21", "%x22", "%x23",
26 "%x24", "%x25", "%x26", "%x27", "%x28", "%fp", "%x30", "%sp",
27 };
28 #endif /* CONFIG_DEBUG_TCG */
29
30 static const int tcg_target_reg_alloc_order[] = {
31 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
32 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
33 TCG_REG_X28, /* we will reserve this for guest_base if configured */
34
35 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
36 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
37 TCG_REG_X16, TCG_REG_X17,
38
39 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
40 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
41
42 /* X18 reserved by system */
43 /* X19 reserved for AREG0 */
44 /* X29 reserved as fp */
45 /* X30 reserved as temporary */
46 };
47
48 static const int tcg_target_call_iarg_regs[8] = {
49 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
50 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
51 };
52 static const int tcg_target_call_oarg_regs[1] = {
53 TCG_REG_X0
54 };
55
56 #define TCG_REG_TMP TCG_REG_X30
57
58 #ifndef CONFIG_SOFTMMU
59 /* Note that XZR cannot be encoded in the address base register slot,
60 as that actaully encodes SP. So if we need to zero-extend the guest
61 address, via the address index register slot, we need to load even
62 a zero guest base into a register. */
63 #define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32)
64 #define TCG_REG_GUEST_BASE TCG_REG_X28
65 #endif
66
67 static inline void reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
68 {
69 ptrdiff_t offset = target - code_ptr;
70 tcg_debug_assert(offset == sextract64(offset, 0, 26));
71 /* read instruction, mask away previous PC_REL26 parameter contents,
72 set the proper offset, then write back the instruction. */
73 *code_ptr = deposit32(*code_ptr, 0, 26, offset);
74 }
75
76 static inline void reloc_pc26_atomic(tcg_insn_unit *code_ptr,
77 tcg_insn_unit *target)
78 {
79 ptrdiff_t offset = target - code_ptr;
80 tcg_insn_unit insn;
81 tcg_debug_assert(offset == sextract64(offset, 0, 26));
82 /* read instruction, mask away previous PC_REL26 parameter contents,
83 set the proper offset, then write back the instruction. */
84 insn = atomic_read(code_ptr);
85 atomic_set(code_ptr, deposit32(insn, 0, 26, offset));
86 }
87
88 static inline void reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
89 {
90 ptrdiff_t offset = target - code_ptr;
91 tcg_debug_assert(offset == sextract64(offset, 0, 19));
92 *code_ptr = deposit32(*code_ptr, 5, 19, offset);
93 }
94
95 static inline void patch_reloc(tcg_insn_unit *code_ptr, int type,
96 intptr_t value, intptr_t addend)
97 {
98 tcg_debug_assert(addend == 0);
99 switch (type) {
100 case R_AARCH64_JUMP26:
101 case R_AARCH64_CALL26:
102 reloc_pc26(code_ptr, (tcg_insn_unit *)value);
103 break;
104 case R_AARCH64_CONDBR19:
105 reloc_pc19(code_ptr, (tcg_insn_unit *)value);
106 break;
107 default:
108 tcg_abort();
109 }
110 }
111
112 #define TCG_CT_CONST_AIMM 0x100
113 #define TCG_CT_CONST_LIMM 0x200
114 #define TCG_CT_CONST_ZERO 0x400
115 #define TCG_CT_CONST_MONE 0x800
116
117 /* parse target specific constraints */
118 static const char *target_parse_constraint(TCGArgConstraint *ct,
119 const char *ct_str, TCGType type)
120 {
121 switch (*ct_str++) {
122 case 'r':
123 ct->ct |= TCG_CT_REG;
124 tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
125 break;
126 case 'l': /* qemu_ld / qemu_st address, data_reg */
127 ct->ct |= TCG_CT_REG;
128 tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
129 #ifdef CONFIG_SOFTMMU
130 /* x0 and x1 will be overwritten when reading the tlb entry,
131 and x2, and x3 for helper args, better to avoid using them. */
132 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
133 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
134 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
135 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
136 #endif
137 break;
138 case 'A': /* Valid for arithmetic immediate (positive or negative). */
139 ct->ct |= TCG_CT_CONST_AIMM;
140 break;
141 case 'L': /* Valid for logical immediate. */
142 ct->ct |= TCG_CT_CONST_LIMM;
143 break;
144 case 'M': /* minus one */
145 ct->ct |= TCG_CT_CONST_MONE;
146 break;
147 case 'Z': /* zero */
148 ct->ct |= TCG_CT_CONST_ZERO;
149 break;
150 default:
151 return NULL;
152 }
153 return ct_str;
154 }
155
156 static inline bool is_aimm(uint64_t val)
157 {
158 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
159 }
160
161 static inline bool is_limm(uint64_t val)
162 {
163 /* Taking a simplified view of the logical immediates for now, ignoring
164 the replication that can happen across the field. Match bit patterns
165 of the forms
166 0....01....1
167 0..01..10..0
168 and their inverses. */
169
170 /* Make things easier below, by testing the form with msb clear. */
171 if ((int64_t)val < 0) {
172 val = ~val;
173 }
174 if (val == 0) {
175 return false;
176 }
177 val += val & -val;
178 return (val & (val - 1)) == 0;
179 }
180
181 static int tcg_target_const_match(tcg_target_long val, TCGType type,
182 const TCGArgConstraint *arg_ct)
183 {
184 int ct = arg_ct->ct;
185
186 if (ct & TCG_CT_CONST) {
187 return 1;
188 }
189 if (type == TCG_TYPE_I32) {
190 val = (int32_t)val;
191 }
192 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
193 return 1;
194 }
195 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
196 return 1;
197 }
198 if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
199 return 1;
200 }
201 if ((ct & TCG_CT_CONST_MONE) && val == -1) {
202 return 1;
203 }
204
205 return 0;
206 }
207
208 enum aarch64_cond_code {
209 COND_EQ = 0x0,
210 COND_NE = 0x1,
211 COND_CS = 0x2, /* Unsigned greater or equal */
212 COND_HS = COND_CS, /* ALIAS greater or equal */
213 COND_CC = 0x3, /* Unsigned less than */
214 COND_LO = COND_CC, /* ALIAS Lower */
215 COND_MI = 0x4, /* Negative */
216 COND_PL = 0x5, /* Zero or greater */
217 COND_VS = 0x6, /* Overflow */
218 COND_VC = 0x7, /* No overflow */
219 COND_HI = 0x8, /* Unsigned greater than */
220 COND_LS = 0x9, /* Unsigned less or equal */
221 COND_GE = 0xa,
222 COND_LT = 0xb,
223 COND_GT = 0xc,
224 COND_LE = 0xd,
225 COND_AL = 0xe,
226 COND_NV = 0xf, /* behaves like COND_AL here */
227 };
228
229 static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
230 [TCG_COND_EQ] = COND_EQ,
231 [TCG_COND_NE] = COND_NE,
232 [TCG_COND_LT] = COND_LT,
233 [TCG_COND_GE] = COND_GE,
234 [TCG_COND_LE] = COND_LE,
235 [TCG_COND_GT] = COND_GT,
236 /* unsigned */
237 [TCG_COND_LTU] = COND_LO,
238 [TCG_COND_GTU] = COND_HI,
239 [TCG_COND_GEU] = COND_HS,
240 [TCG_COND_LEU] = COND_LS,
241 };
242
243 typedef enum {
244 LDST_ST = 0, /* store */
245 LDST_LD = 1, /* load */
246 LDST_LD_S_X = 2, /* load and sign-extend into Xt */
247 LDST_LD_S_W = 3, /* load and sign-extend into Wt */
248 } AArch64LdstType;
249
250 /* We encode the format of the insn into the beginning of the name, so that
251 we can have the preprocessor help "typecheck" the insn vs the output
252 function. Arm didn't provide us with nice names for the formats, so we
253 use the section number of the architecture reference manual in which the
254 instruction group is described. */
255 typedef enum {
256 /* Compare and branch (immediate). */
257 I3201_CBZ = 0x34000000,
258 I3201_CBNZ = 0x35000000,
259
260 /* Conditional branch (immediate). */
261 I3202_B_C = 0x54000000,
262
263 /* Unconditional branch (immediate). */
264 I3206_B = 0x14000000,
265 I3206_BL = 0x94000000,
266
267 /* Unconditional branch (register). */
268 I3207_BR = 0xd61f0000,
269 I3207_BLR = 0xd63f0000,
270 I3207_RET = 0xd65f0000,
271
272 /* Load literal for loading the address at pc-relative offset */
273 I3305_LDR = 0x58000000,
274 /* Load/store register. Described here as 3.3.12, but the helper
275 that emits them can transform to 3.3.10 or 3.3.13. */
276 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
277 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
278 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
279 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
280
281 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
282 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
283 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
284 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
285
286 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
287 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
288
289 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
290 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
291 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
292
293 I3312_TO_I3310 = 0x00200800,
294 I3312_TO_I3313 = 0x01000000,
295
296 /* Load/store register pair instructions. */
297 I3314_LDP = 0x28400000,
298 I3314_STP = 0x28000000,
299
300 /* Add/subtract immediate instructions. */
301 I3401_ADDI = 0x11000000,
302 I3401_ADDSI = 0x31000000,
303 I3401_SUBI = 0x51000000,
304 I3401_SUBSI = 0x71000000,
305
306 /* Bitfield instructions. */
307 I3402_BFM = 0x33000000,
308 I3402_SBFM = 0x13000000,
309 I3402_UBFM = 0x53000000,
310
311 /* Extract instruction. */
312 I3403_EXTR = 0x13800000,
313
314 /* Logical immediate instructions. */
315 I3404_ANDI = 0x12000000,
316 I3404_ORRI = 0x32000000,
317 I3404_EORI = 0x52000000,
318
319 /* Move wide immediate instructions. */
320 I3405_MOVN = 0x12800000,
321 I3405_MOVZ = 0x52800000,
322 I3405_MOVK = 0x72800000,
323
324 /* PC relative addressing instructions. */
325 I3406_ADR = 0x10000000,
326 I3406_ADRP = 0x90000000,
327
328 /* Add/subtract shifted register instructions (without a shift). */
329 I3502_ADD = 0x0b000000,
330 I3502_ADDS = 0x2b000000,
331 I3502_SUB = 0x4b000000,
332 I3502_SUBS = 0x6b000000,
333
334 /* Add/subtract shifted register instructions (with a shift). */
335 I3502S_ADD_LSL = I3502_ADD,
336
337 /* Add/subtract with carry instructions. */
338 I3503_ADC = 0x1a000000,
339 I3503_SBC = 0x5a000000,
340
341 /* Conditional select instructions. */
342 I3506_CSEL = 0x1a800000,
343 I3506_CSINC = 0x1a800400,
344 I3506_CSINV = 0x5a800000,
345 I3506_CSNEG = 0x5a800400,
346
347 /* Data-processing (1 source) instructions. */
348 I3507_CLZ = 0x5ac01000,
349 I3507_RBIT = 0x5ac00000,
350 I3507_REV16 = 0x5ac00400,
351 I3507_REV32 = 0x5ac00800,
352 I3507_REV64 = 0x5ac00c00,
353
354 /* Data-processing (2 source) instructions. */
355 I3508_LSLV = 0x1ac02000,
356 I3508_LSRV = 0x1ac02400,
357 I3508_ASRV = 0x1ac02800,
358 I3508_RORV = 0x1ac02c00,
359 I3508_SMULH = 0x9b407c00,
360 I3508_UMULH = 0x9bc07c00,
361 I3508_UDIV = 0x1ac00800,
362 I3508_SDIV = 0x1ac00c00,
363
364 /* Data-processing (3 source) instructions. */
365 I3509_MADD = 0x1b000000,
366 I3509_MSUB = 0x1b008000,
367
368 /* Logical shifted register instructions (without a shift). */
369 I3510_AND = 0x0a000000,
370 I3510_BIC = 0x0a200000,
371 I3510_ORR = 0x2a000000,
372 I3510_ORN = 0x2a200000,
373 I3510_EOR = 0x4a000000,
374 I3510_EON = 0x4a200000,
375 I3510_ANDS = 0x6a000000,
376
377 NOP = 0xd503201f,
378 /* System instructions. */
379 DMB_ISH = 0xd50338bf,
380 DMB_LD = 0x00000100,
381 DMB_ST = 0x00000200,
382 } AArch64Insn;
383
384 static inline uint32_t tcg_in32(TCGContext *s)
385 {
386 uint32_t v = *(uint32_t *)s->code_ptr;
387 return v;
388 }
389
390 /* Emit an opcode with "type-checking" of the format. */
391 #define tcg_out_insn(S, FMT, OP, ...) \
392 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
393
394 static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, int imm19, TCGReg rt)
395 {
396 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
397 }
398
399 static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
400 TCGReg rt, int imm19)
401 {
402 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
403 }
404
405 static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
406 TCGCond c, int imm19)
407 {
408 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
409 }
410
411 static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
412 {
413 tcg_out32(s, insn | (imm26 & 0x03ffffff));
414 }
415
416 static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
417 {
418 tcg_out32(s, insn | rn << 5);
419 }
420
421 static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
422 TCGReg r1, TCGReg r2, TCGReg rn,
423 tcg_target_long ofs, bool pre, bool w)
424 {
425 insn |= 1u << 31; /* ext */
426 insn |= pre << 24;
427 insn |= w << 23;
428
429 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
430 insn |= (ofs & (0x7f << 3)) << (15 - 3);
431
432 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
433 }
434
435 static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
436 TCGReg rd, TCGReg rn, uint64_t aimm)
437 {
438 if (aimm > 0xfff) {
439 tcg_debug_assert((aimm & 0xfff) == 0);
440 aimm >>= 12;
441 tcg_debug_assert(aimm <= 0xfff);
442 aimm |= 1 << 12; /* apply LSL 12 */
443 }
444 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
445 }
446
447 /* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
448 (Logical immediate). Both insn groups have N, IMMR and IMMS fields
449 that feed the DecodeBitMasks pseudo function. */
450 static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
451 TCGReg rd, TCGReg rn, int n, int immr, int imms)
452 {
453 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
454 | rn << 5 | rd);
455 }
456
457 #define tcg_out_insn_3404 tcg_out_insn_3402
458
459 static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
460 TCGReg rd, TCGReg rn, TCGReg rm, int imms)
461 {
462 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
463 | rn << 5 | rd);
464 }
465
466 /* This function is used for the Move (wide immediate) instruction group.
467 Note that SHIFT is a full shift count, not the 2 bit HW field. */
468 static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
469 TCGReg rd, uint16_t half, unsigned shift)
470 {
471 tcg_debug_assert((shift & ~0x30) == 0);
472 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
473 }
474
475 static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
476 TCGReg rd, int64_t disp)
477 {
478 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
479 }
480
481 /* This function is for both 3.5.2 (Add/Subtract shifted register), for
482 the rare occasion when we actually want to supply a shift amount. */
483 static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
484 TCGType ext, TCGReg rd, TCGReg rn,
485 TCGReg rm, int imm6)
486 {
487 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
488 }
489
490 /* This function is for 3.5.2 (Add/subtract shifted register),
491 and 3.5.10 (Logical shifted register), for the vast majorty of cases
492 when we don't want to apply a shift. Thus it can also be used for
493 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */
494 static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
495 TCGReg rd, TCGReg rn, TCGReg rm)
496 {
497 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
498 }
499
500 #define tcg_out_insn_3503 tcg_out_insn_3502
501 #define tcg_out_insn_3508 tcg_out_insn_3502
502 #define tcg_out_insn_3510 tcg_out_insn_3502
503
504 static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
505 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
506 {
507 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
508 | tcg_cond_to_aarch64[c] << 12);
509 }
510
511 static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
512 TCGReg rd, TCGReg rn)
513 {
514 tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
515 }
516
517 static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
518 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
519 {
520 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
521 }
522
523 static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
524 TCGReg rd, TCGReg base, TCGType ext,
525 TCGReg regoff)
526 {
527 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
528 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
529 0x4000 | ext << 13 | base << 5 | rd);
530 }
531
532 static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
533 TCGReg rd, TCGReg rn, intptr_t offset)
534 {
535 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | rd);
536 }
537
538 static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
539 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
540 {
541 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
542 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 | rn << 5 | rd);
543 }
544
545 /* Register to register move using ORR (shifted register with no shift). */
546 static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
547 {
548 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
549 }
550
551 /* Register to register move using ADDI (move to/from SP). */
552 static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
553 {
554 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
555 }
556
557 /* This function is used for the Logical (immediate) instruction group.
558 The value of LIMM must satisfy IS_LIMM. See the comment above about
559 only supporting simplified logical immediates. */
560 static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
561 TCGReg rd, TCGReg rn, uint64_t limm)
562 {
563 unsigned h, l, r, c;
564
565 tcg_debug_assert(is_limm(limm));
566
567 h = clz64(limm);
568 l = ctz64(limm);
569 if (l == 0) {
570 r = 0; /* form 0....01....1 */
571 c = ctz64(~limm) - 1;
572 if (h == 0) {
573 r = clz64(~limm); /* form 1..10..01..1 */
574 c += r;
575 }
576 } else {
577 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */
578 c = r - h - 1;
579 }
580 if (ext == TCG_TYPE_I32) {
581 r &= 31;
582 c &= 31;
583 }
584
585 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
586 }
587
588 static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
589 tcg_target_long value)
590 {
591 int i, wantinv, shift;
592 tcg_target_long svalue = value;
593 tcg_target_long ivalue = ~value;
594
595 /* For 32-bit values, discard potential garbage in value. For 64-bit
596 values within [2**31, 2**32-1], we can create smaller sequences by
597 interpreting this as a negative 32-bit number, while ensuring that
598 the high 32 bits are cleared by setting SF=0. */
599 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
600 svalue = (int32_t)value;
601 value = (uint32_t)value;
602 ivalue = (uint32_t)ivalue;
603 type = TCG_TYPE_I32;
604 }
605
606 /* Speed things up by handling the common case of small positive
607 and negative values specially. */
608 if ((value & ~0xffffull) == 0) {
609 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
610 return;
611 } else if ((ivalue & ~0xffffull) == 0) {
612 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
613 return;
614 }
615
616 /* Check for bitfield immediates. For the benefit of 32-bit quantities,
617 use the sign-extended value. That lets us match rotated values such
618 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
619 if (is_limm(svalue)) {
620 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
621 return;
622 }
623
624 /* Look for host pointer values within 4G of the PC. This happens
625 often when loading pointers to QEMU's own data structures. */
626 if (type == TCG_TYPE_I64) {
627 tcg_target_long disp = value - (intptr_t)s->code_ptr;
628 if (disp == sextract64(disp, 0, 21)) {
629 tcg_out_insn(s, 3406, ADR, rd, disp);
630 return;
631 }
632 disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
633 if (disp == sextract64(disp, 0, 21)) {
634 tcg_out_insn(s, 3406, ADRP, rd, disp);
635 if (value & 0xfff) {
636 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
637 }
638 return;
639 }
640 }
641
642 /* Would it take fewer insns to begin with MOVN? For the value and its
643 inverse, count the number of 16-bit lanes that are 0. */
644 for (i = wantinv = 0; i < 64; i += 16) {
645 tcg_target_long mask = 0xffffull << i;
646 wantinv -= ((value & mask) == 0);
647 wantinv += ((ivalue & mask) == 0);
648 }
649
650 if (wantinv <= 0) {
651 /* Find the lowest lane that is not 0x0000. */
652 shift = ctz64(value) & (63 & -16);
653 tcg_out_insn(s, 3405, MOVZ, type, rd, value >> shift, shift);
654 /* Clear out the lane that we just set. */
655 value &= ~(0xffffUL << shift);
656 /* Iterate until all non-zero lanes have been processed. */
657 while (value) {
658 shift = ctz64(value) & (63 & -16);
659 tcg_out_insn(s, 3405, MOVK, type, rd, value >> shift, shift);
660 value &= ~(0xffffUL << shift);
661 }
662 } else {
663 /* Like above, but with the inverted value and MOVN to start. */
664 shift = ctz64(ivalue) & (63 & -16);
665 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue >> shift, shift);
666 ivalue &= ~(0xffffUL << shift);
667 while (ivalue) {
668 shift = ctz64(ivalue) & (63 & -16);
669 /* Provide MOVK with the non-inverted value. */
670 tcg_out_insn(s, 3405, MOVK, type, rd, ~(ivalue >> shift), shift);
671 ivalue &= ~(0xffffUL << shift);
672 }
673 }
674 }
675
676 /* Define something more legible for general use. */
677 #define tcg_out_ldst_r tcg_out_insn_3310
678
679 static void tcg_out_ldst(TCGContext *s, AArch64Insn insn,
680 TCGReg rd, TCGReg rn, intptr_t offset)
681 {
682 TCGMemOp size = (uint32_t)insn >> 30;
683
684 /* If the offset is naturally aligned and in range, then we can
685 use the scaled uimm12 encoding */
686 if (offset >= 0 && !(offset & ((1 << size) - 1))) {
687 uintptr_t scaled_uimm = offset >> size;
688 if (scaled_uimm <= 0xfff) {
689 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
690 return;
691 }
692 }
693
694 /* Small signed offsets can use the unscaled encoding. */
695 if (offset >= -256 && offset < 256) {
696 tcg_out_insn_3312(s, insn, rd, rn, offset);
697 return;
698 }
699
700 /* Worst-case scenario, move offset to temp register, use reg offset. */
701 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
702 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
703 }
704
705 static inline void tcg_out_mov(TCGContext *s,
706 TCGType type, TCGReg ret, TCGReg arg)
707 {
708 if (ret != arg) {
709 tcg_out_movr(s, type, ret, arg);
710 }
711 }
712
713 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
714 TCGReg arg1, intptr_t arg2)
715 {
716 tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_LDRW : I3312_LDRX,
717 arg, arg1, arg2);
718 }
719
720 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
721 TCGReg arg1, intptr_t arg2)
722 {
723 tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_STRW : I3312_STRX,
724 arg, arg1, arg2);
725 }
726
727 static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
728 TCGReg base, intptr_t ofs)
729 {
730 if (val == 0) {
731 tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
732 return true;
733 }
734 return false;
735 }
736
737 static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
738 TCGReg rn, unsigned int a, unsigned int b)
739 {
740 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
741 }
742
743 static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
744 TCGReg rn, unsigned int a, unsigned int b)
745 {
746 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
747 }
748
749 static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
750 TCGReg rn, unsigned int a, unsigned int b)
751 {
752 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
753 }
754
755 static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
756 TCGReg rn, TCGReg rm, unsigned int a)
757 {
758 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
759 }
760
761 static inline void tcg_out_shl(TCGContext *s, TCGType ext,
762 TCGReg rd, TCGReg rn, unsigned int m)
763 {
764 int bits = ext ? 64 : 32;
765 int max = bits - 1;
766 tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
767 }
768
769 static inline void tcg_out_shr(TCGContext *s, TCGType ext,
770 TCGReg rd, TCGReg rn, unsigned int m)
771 {
772 int max = ext ? 63 : 31;
773 tcg_out_ubfm(s, ext, rd, rn, m & max, max);
774 }
775
776 static inline void tcg_out_sar(TCGContext *s, TCGType ext,
777 TCGReg rd, TCGReg rn, unsigned int m)
778 {
779 int max = ext ? 63 : 31;
780 tcg_out_sbfm(s, ext, rd, rn, m & max, max);
781 }
782
783 static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
784 TCGReg rd, TCGReg rn, unsigned int m)
785 {
786 int max = ext ? 63 : 31;
787 tcg_out_extr(s, ext, rd, rn, rn, m & max);
788 }
789
790 static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
791 TCGReg rd, TCGReg rn, unsigned int m)
792 {
793 int bits = ext ? 64 : 32;
794 int max = bits - 1;
795 tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
796 }
797
798 static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
799 TCGReg rn, unsigned lsb, unsigned width)
800 {
801 unsigned size = ext ? 64 : 32;
802 unsigned a = (size - lsb) & (size - 1);
803 unsigned b = width - 1;
804 tcg_out_bfm(s, ext, rd, rn, a, b);
805 }
806
807 static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
808 tcg_target_long b, bool const_b)
809 {
810 if (const_b) {
811 /* Using CMP or CMN aliases. */
812 if (b >= 0) {
813 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
814 } else {
815 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
816 }
817 } else {
818 /* Using CMP alias SUBS wzr, Wn, Wm */
819 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
820 }
821 }
822
823 static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target)
824 {
825 ptrdiff_t offset = target - s->code_ptr;
826 tcg_debug_assert(offset == sextract64(offset, 0, 26));
827 tcg_out_insn(s, 3206, B, offset);
828 }
829
830 static inline void tcg_out_goto_long(TCGContext *s, tcg_insn_unit *target)
831 {
832 ptrdiff_t offset = target - s->code_ptr;
833 if (offset == sextract64(offset, 0, 26)) {
834 tcg_out_insn(s, 3206, BL, offset);
835 } else {
836 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
837 tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
838 }
839 }
840
841 static inline void tcg_out_goto_noaddr(TCGContext *s)
842 {
843 /* We pay attention here to not modify the branch target by reading from
844 the buffer. This ensure that caches and memory are kept coherent during
845 retranslation. Mask away possible garbage in the high bits for the
846 first translation, while keeping the offset bits for retranslation. */
847 uint32_t old = tcg_in32(s);
848 tcg_out_insn(s, 3206, B, old);
849 }
850
851 static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c)
852 {
853 /* See comments in tcg_out_goto_noaddr. */
854 uint32_t old = tcg_in32(s) >> 5;
855 tcg_out_insn(s, 3202, B_C, c, old);
856 }
857
858 static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
859 {
860 tcg_out_insn(s, 3207, BLR, reg);
861 }
862
863 static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
864 {
865 ptrdiff_t offset = target - s->code_ptr;
866 if (offset == sextract64(offset, 0, 26)) {
867 tcg_out_insn(s, 3206, BL, offset);
868 } else {
869 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
870 tcg_out_callr(s, TCG_REG_TMP);
871 }
872 }
873
874 void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
875 uintptr_t addr)
876 {
877 tcg_insn_unit i1, i2;
878 TCGType rt = TCG_TYPE_I64;
879 TCGReg rd = TCG_REG_TMP;
880 uint64_t pair;
881
882 ptrdiff_t offset = addr - jmp_addr;
883
884 if (offset == sextract64(offset, 0, 26)) {
885 i1 = I3206_B | ((offset >> 2) & 0x3ffffff);
886 i2 = NOP;
887 } else {
888 offset = (addr >> 12) - (jmp_addr >> 12);
889
890 /* patch ADRP */
891 i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd;
892 /* patch ADDI */
893 i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd;
894 }
895 pair = (uint64_t)i2 << 32 | i1;
896 atomic_set((uint64_t *)jmp_addr, pair);
897 flush_icache_range(jmp_addr, jmp_addr + 8);
898 }
899
900 static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
901 {
902 if (!l->has_value) {
903 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
904 tcg_out_goto_noaddr(s);
905 } else {
906 tcg_out_goto(s, l->u.value_ptr);
907 }
908 }
909
910 static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
911 TCGArg b, bool b_const, TCGLabel *l)
912 {
913 intptr_t offset;
914 bool need_cmp;
915
916 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
917 need_cmp = false;
918 } else {
919 need_cmp = true;
920 tcg_out_cmp(s, ext, a, b, b_const);
921 }
922
923 if (!l->has_value) {
924 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
925 offset = tcg_in32(s) >> 5;
926 } else {
927 offset = l->u.value_ptr - s->code_ptr;
928 tcg_debug_assert(offset == sextract64(offset, 0, 19));
929 }
930
931 if (need_cmp) {
932 tcg_out_insn(s, 3202, B_C, c, offset);
933 } else if (c == TCG_COND_EQ) {
934 tcg_out_insn(s, 3201, CBZ, ext, a, offset);
935 } else {
936 tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
937 }
938 }
939
940 static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
941 {
942 tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
943 }
944
945 static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
946 {
947 tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
948 }
949
950 static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
951 {
952 tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
953 }
954
955 static inline void tcg_out_sxt(TCGContext *s, TCGType ext, TCGMemOp s_bits,
956 TCGReg rd, TCGReg rn)
957 {
958 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
959 int bits = (8 << s_bits) - 1;
960 tcg_out_sbfm(s, ext, rd, rn, 0, bits);
961 }
962
963 static inline void tcg_out_uxt(TCGContext *s, TCGMemOp s_bits,
964 TCGReg rd, TCGReg rn)
965 {
966 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
967 int bits = (8 << s_bits) - 1;
968 tcg_out_ubfm(s, 0, rd, rn, 0, bits);
969 }
970
971 static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
972 TCGReg rn, int64_t aimm)
973 {
974 if (aimm >= 0) {
975 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
976 } else {
977 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
978 }
979 }
980
981 static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
982 TCGReg rh, TCGReg al, TCGReg ah,
983 tcg_target_long bl, tcg_target_long bh,
984 bool const_bl, bool const_bh, bool sub)
985 {
986 TCGReg orig_rl = rl;
987 AArch64Insn insn;
988
989 if (rl == ah || (!const_bh && rl == bh)) {
990 rl = TCG_REG_TMP;
991 }
992
993 if (const_bl) {
994 insn = I3401_ADDSI;
995 if ((bl < 0) ^ sub) {
996 insn = I3401_SUBSI;
997 bl = -bl;
998 }
999 if (unlikely(al == TCG_REG_XZR)) {
1000 /* ??? We want to allow al to be zero for the benefit of
1001 negation via subtraction. However, that leaves open the
1002 possibility of adding 0+const in the low part, and the
1003 immediate add instructions encode XSP not XZR. Don't try
1004 anything more elaborate here than loading another zero. */
1005 al = TCG_REG_TMP;
1006 tcg_out_movi(s, ext, al, 0);
1007 }
1008 tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1009 } else {
1010 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1011 }
1012
1013 insn = I3503_ADC;
1014 if (const_bh) {
1015 /* Note that the only two constants we support are 0 and -1, and
1016 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
1017 if ((bh != 0) ^ sub) {
1018 insn = I3503_SBC;
1019 }
1020 bh = TCG_REG_XZR;
1021 } else if (sub) {
1022 insn = I3503_SBC;
1023 }
1024 tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1025
1026 tcg_out_mov(s, ext, orig_rl, rl);
1027 }
1028
1029 static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1030 {
1031 static const uint32_t sync[] = {
1032 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST,
1033 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST,
1034 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1035 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD,
1036 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1037 };
1038 tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1039 }
1040
1041 static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1042 TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1043 {
1044 TCGReg a1 = a0;
1045 if (is_ctz) {
1046 a1 = TCG_REG_TMP;
1047 tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1048 }
1049 if (const_b && b == (ext ? 64 : 32)) {
1050 tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1051 } else {
1052 AArch64Insn sel = I3506_CSEL;
1053
1054 tcg_out_cmp(s, ext, a0, 0, 1);
1055 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1056
1057 if (const_b) {
1058 if (b == -1) {
1059 b = TCG_REG_XZR;
1060 sel = I3506_CSINV;
1061 } else if (b == 0) {
1062 b = TCG_REG_XZR;
1063 } else {
1064 tcg_out_movi(s, ext, d, b);
1065 b = d;
1066 }
1067 }
1068 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1069 }
1070 }
1071
1072 #ifdef CONFIG_SOFTMMU
1073 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1074 * TCGMemOpIdx oi, uintptr_t ra)
1075 */
1076 static void * const qemu_ld_helpers[16] = {
1077 [MO_UB] = helper_ret_ldub_mmu,
1078 [MO_LEUW] = helper_le_lduw_mmu,
1079 [MO_LEUL] = helper_le_ldul_mmu,
1080 [MO_LEQ] = helper_le_ldq_mmu,
1081 [MO_BEUW] = helper_be_lduw_mmu,
1082 [MO_BEUL] = helper_be_ldul_mmu,
1083 [MO_BEQ] = helper_be_ldq_mmu,
1084 };
1085
1086 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1087 * uintxx_t val, TCGMemOpIdx oi,
1088 * uintptr_t ra)
1089 */
1090 static void * const qemu_st_helpers[16] = {
1091 [MO_UB] = helper_ret_stb_mmu,
1092 [MO_LEUW] = helper_le_stw_mmu,
1093 [MO_LEUL] = helper_le_stl_mmu,
1094 [MO_LEQ] = helper_le_stq_mmu,
1095 [MO_BEUW] = helper_be_stw_mmu,
1096 [MO_BEUL] = helper_be_stl_mmu,
1097 [MO_BEQ] = helper_be_stq_mmu,
1098 };
1099
1100 static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target)
1101 {
1102 ptrdiff_t offset = tcg_pcrel_diff(s, target);
1103 tcg_debug_assert(offset == sextract64(offset, 0, 21));
1104 tcg_out_insn(s, 3406, ADR, rd, offset);
1105 }
1106
1107 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1108 {
1109 TCGMemOpIdx oi = lb->oi;
1110 TCGMemOp opc = get_memop(oi);
1111 TCGMemOp size = opc & MO_SIZE;
1112
1113 reloc_pc19(lb->label_ptr[0], s->code_ptr);
1114
1115 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1116 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1117 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1118 tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1119 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1120 if (opc & MO_SIGN) {
1121 tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1122 } else {
1123 tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1124 }
1125
1126 tcg_out_goto(s, lb->raddr);
1127 }
1128
1129 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1130 {
1131 TCGMemOpIdx oi = lb->oi;
1132 TCGMemOp opc = get_memop(oi);
1133 TCGMemOp size = opc & MO_SIZE;
1134
1135 reloc_pc19(lb->label_ptr[0], s->code_ptr);
1136
1137 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1138 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1139 tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1140 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1141 tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1142 tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1143 tcg_out_goto(s, lb->raddr);
1144 }
1145
1146 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1147 TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1148 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1149 {
1150 TCGLabelQemuLdst *label = new_ldst_label(s);
1151
1152 label->is_ld = is_ld;
1153 label->oi = oi;
1154 label->type = ext;
1155 label->datalo_reg = data_reg;
1156 label->addrlo_reg = addr_reg;
1157 label->raddr = raddr;
1158 label->label_ptr[0] = label_ptr;
1159 }
1160
1161 /* Load and compare a TLB entry, emitting the conditional jump to the
1162 slow path for the failure case, which will be patched later when finalizing
1163 the slow path. Generated code returns the host addend in X1,
1164 clobbers X0,X2,X3,TMP. */
1165 static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc,
1166 tcg_insn_unit **label_ptr, int mem_index,
1167 bool is_read)
1168 {
1169 int tlb_offset = is_read ?
1170 offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
1171 : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
1172 unsigned a_bits = get_alignment_bits(opc);
1173 unsigned s_bits = opc & MO_SIZE;
1174 unsigned a_mask = (1u << a_bits) - 1;
1175 unsigned s_mask = (1u << s_bits) - 1;
1176 TCGReg base = TCG_AREG0, x3;
1177 uint64_t tlb_mask;
1178
1179 /* For aligned accesses, we check the first byte and include the alignment
1180 bits within the address. For unaligned access, we check that we don't
1181 cross pages using the address of the last byte of the access. */
1182 if (a_bits >= s_bits) {
1183 x3 = addr_reg;
1184 } else {
1185 tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1186 TCG_REG_X3, addr_reg, s_mask - a_mask);
1187 x3 = TCG_REG_X3;
1188 }
1189 tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1190
1191 /* Extract the TLB index from the address into X0.
1192 X0<CPU_TLB_BITS:0> =
1193 addr_reg<TARGET_PAGE_BITS+CPU_TLB_BITS:TARGET_PAGE_BITS> */
1194 tcg_out_ubfm(s, TARGET_LONG_BITS == 64, TCG_REG_X0, addr_reg,
1195 TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS);
1196
1197 /* Store the page mask part of the address into X3. */
1198 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1199 TCG_REG_X3, x3, tlb_mask);
1200
1201 /* Add any "high bits" from the tlb offset to the env address into X2,
1202 to take advantage of the LSL12 form of the ADDI instruction.
1203 X2 = env + (tlb_offset & 0xfff000) */
1204 if (tlb_offset & 0xfff000) {
1205 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_X2, base,
1206 tlb_offset & 0xfff000);
1207 base = TCG_REG_X2;
1208 }
1209
1210 /* Merge the tlb index contribution into X2.
1211 X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
1212 tcg_out_insn(s, 3502S, ADD_LSL, TCG_TYPE_I64, TCG_REG_X2, base,
1213 TCG_REG_X0, CPU_TLB_ENTRY_BITS);
1214
1215 /* Merge "low bits" from tlb offset, load the tlb comparator into X0.
1216 X0 = load [X2 + (tlb_offset & 0x000fff)] */
1217 tcg_out_ldst(s, TARGET_LONG_BITS == 32 ? I3312_LDRW : I3312_LDRX,
1218 TCG_REG_X0, TCG_REG_X2, tlb_offset & 0xfff);
1219
1220 /* Load the tlb addend. Do that early to avoid stalling.
1221 X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */
1222 tcg_out_ldst(s, I3312_LDRX, TCG_REG_X1, TCG_REG_X2,
1223 (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) -
1224 (is_read ? offsetof(CPUTLBEntry, addr_read)
1225 : offsetof(CPUTLBEntry, addr_write)));
1226
1227 /* Perform the address comparison. */
1228 tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0);
1229
1230 /* If not equal, we jump to the slow path. */
1231 *label_ptr = s->code_ptr;
1232 tcg_out_goto_cond_noaddr(s, TCG_COND_NE);
1233 }
1234
1235 #endif /* CONFIG_SOFTMMU */
1236
1237 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp memop, TCGType ext,
1238 TCGReg data_r, TCGReg addr_r,
1239 TCGType otype, TCGReg off_r)
1240 {
1241 const TCGMemOp bswap = memop & MO_BSWAP;
1242
1243 switch (memop & MO_SSIZE) {
1244 case MO_UB:
1245 tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1246 break;
1247 case MO_SB:
1248 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1249 data_r, addr_r, otype, off_r);
1250 break;
1251 case MO_UW:
1252 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1253 if (bswap) {
1254 tcg_out_rev16(s, data_r, data_r);
1255 }
1256 break;
1257 case MO_SW:
1258 if (bswap) {
1259 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1260 tcg_out_rev16(s, data_r, data_r);
1261 tcg_out_sxt(s, ext, MO_16, data_r, data_r);
1262 } else {
1263 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1264 data_r, addr_r, otype, off_r);
1265 }
1266 break;
1267 case MO_UL:
1268 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1269 if (bswap) {
1270 tcg_out_rev32(s, data_r, data_r);
1271 }
1272 break;
1273 case MO_SL:
1274 if (bswap) {
1275 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1276 tcg_out_rev32(s, data_r, data_r);
1277 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1278 } else {
1279 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1280 }
1281 break;
1282 case MO_Q:
1283 tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1284 if (bswap) {
1285 tcg_out_rev64(s, data_r, data_r);
1286 }
1287 break;
1288 default:
1289 tcg_abort();
1290 }
1291 }
1292
1293 static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp memop,
1294 TCGReg data_r, TCGReg addr_r,
1295 TCGType otype, TCGReg off_r)
1296 {
1297 const TCGMemOp bswap = memop & MO_BSWAP;
1298
1299 switch (memop & MO_SIZE) {
1300 case MO_8:
1301 tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1302 break;
1303 case MO_16:
1304 if (bswap && data_r != TCG_REG_XZR) {
1305 tcg_out_rev16(s, TCG_REG_TMP, data_r);
1306 data_r = TCG_REG_TMP;
1307 }
1308 tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1309 break;
1310 case MO_32:
1311 if (bswap && data_r != TCG_REG_XZR) {
1312 tcg_out_rev32(s, TCG_REG_TMP, data_r);
1313 data_r = TCG_REG_TMP;
1314 }
1315 tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1316 break;
1317 case MO_64:
1318 if (bswap && data_r != TCG_REG_XZR) {
1319 tcg_out_rev64(s, TCG_REG_TMP, data_r);
1320 data_r = TCG_REG_TMP;
1321 }
1322 tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1323 break;
1324 default:
1325 tcg_abort();
1326 }
1327 }
1328
1329 static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1330 TCGMemOpIdx oi, TCGType ext)
1331 {
1332 TCGMemOp memop = get_memop(oi);
1333 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1334 #ifdef CONFIG_SOFTMMU
1335 unsigned mem_index = get_mmuidx(oi);
1336 tcg_insn_unit *label_ptr;
1337
1338 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1339 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1340 TCG_REG_X1, otype, addr_reg);
1341 add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1342 s->code_ptr, label_ptr);
1343 #else /* !CONFIG_SOFTMMU */
1344 if (USE_GUEST_BASE) {
1345 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1346 TCG_REG_GUEST_BASE, otype, addr_reg);
1347 } else {
1348 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1349 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1350 }
1351 #endif /* CONFIG_SOFTMMU */
1352 }
1353
1354 static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1355 TCGMemOpIdx oi)
1356 {
1357 TCGMemOp memop = get_memop(oi);
1358 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1359 #ifdef CONFIG_SOFTMMU
1360 unsigned mem_index = get_mmuidx(oi);
1361 tcg_insn_unit *label_ptr;
1362
1363 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1364 tcg_out_qemu_st_direct(s, memop, data_reg,
1365 TCG_REG_X1, otype, addr_reg);
1366 add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1367 data_reg, addr_reg, s->code_ptr, label_ptr);
1368 #else /* !CONFIG_SOFTMMU */
1369 if (USE_GUEST_BASE) {
1370 tcg_out_qemu_st_direct(s, memop, data_reg,
1371 TCG_REG_GUEST_BASE, otype, addr_reg);
1372 } else {
1373 tcg_out_qemu_st_direct(s, memop, data_reg,
1374 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1375 }
1376 #endif /* CONFIG_SOFTMMU */
1377 }
1378
1379 static tcg_insn_unit *tb_ret_addr;
1380
1381 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1382 const TCGArg args[TCG_MAX_OP_ARGS],
1383 const int const_args[TCG_MAX_OP_ARGS])
1384 {
1385 /* 99% of the time, we can signal the use of extension registers
1386 by looking to see if the opcode handles 64-bit data. */
1387 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1388
1389 /* Hoist the loads of the most common arguments. */
1390 TCGArg a0 = args[0];
1391 TCGArg a1 = args[1];
1392 TCGArg a2 = args[2];
1393 int c2 = const_args[2];
1394
1395 /* Some operands are defined with "rZ" constraint, a register or
1396 the zero register. These need not actually test args[I] == 0. */
1397 #define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1398
1399 switch (opc) {
1400 case INDEX_op_exit_tb:
1401 /* Reuse the zeroing that exists for goto_ptr. */
1402 if (a0 == 0) {
1403 tcg_out_goto_long(s, s->code_gen_epilogue);
1404 } else {
1405 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1406 tcg_out_goto_long(s, tb_ret_addr);
1407 }
1408 break;
1409
1410 case INDEX_op_goto_tb:
1411 if (s->tb_jmp_insn_offset != NULL) {
1412 /* TCG_TARGET_HAS_direct_jump */
1413 /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
1414 write can be used to patch the target address. */
1415 if ((uintptr_t)s->code_ptr & 7) {
1416 tcg_out32(s, NOP);
1417 }
1418 s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1419 /* actual branch destination will be patched by
1420 tb_target_set_jmp_target later. */
1421 tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
1422 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
1423 } else {
1424 /* !TCG_TARGET_HAS_direct_jump */
1425 tcg_debug_assert(s->tb_jmp_target_addr != NULL);
1426 intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2;
1427 tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
1428 }
1429 tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1430 s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s);
1431 break;
1432
1433 case INDEX_op_goto_ptr:
1434 tcg_out_insn(s, 3207, BR, a0);
1435 break;
1436
1437 case INDEX_op_br:
1438 tcg_out_goto_label(s, arg_label(a0));
1439 break;
1440
1441 case INDEX_op_ld8u_i32:
1442 case INDEX_op_ld8u_i64:
1443 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2);
1444 break;
1445 case INDEX_op_ld8s_i32:
1446 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2);
1447 break;
1448 case INDEX_op_ld8s_i64:
1449 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2);
1450 break;
1451 case INDEX_op_ld16u_i32:
1452 case INDEX_op_ld16u_i64:
1453 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2);
1454 break;
1455 case INDEX_op_ld16s_i32:
1456 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2);
1457 break;
1458 case INDEX_op_ld16s_i64:
1459 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2);
1460 break;
1461 case INDEX_op_ld_i32:
1462 case INDEX_op_ld32u_i64:
1463 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2);
1464 break;
1465 case INDEX_op_ld32s_i64:
1466 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2);
1467 break;
1468 case INDEX_op_ld_i64:
1469 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2);
1470 break;
1471
1472 case INDEX_op_st8_i32:
1473 case INDEX_op_st8_i64:
1474 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2);
1475 break;
1476 case INDEX_op_st16_i32:
1477 case INDEX_op_st16_i64:
1478 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2);
1479 break;
1480 case INDEX_op_st_i32:
1481 case INDEX_op_st32_i64:
1482 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2);
1483 break;
1484 case INDEX_op_st_i64:
1485 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2);
1486 break;
1487
1488 case INDEX_op_add_i32:
1489 a2 = (int32_t)a2;
1490 /* FALLTHRU */
1491 case INDEX_op_add_i64:
1492 if (c2) {
1493 tcg_out_addsubi(s, ext, a0, a1, a2);
1494 } else {
1495 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1496 }
1497 break;
1498
1499 case INDEX_op_sub_i32:
1500 a2 = (int32_t)a2;
1501 /* FALLTHRU */
1502 case INDEX_op_sub_i64:
1503 if (c2) {
1504 tcg_out_addsubi(s, ext, a0, a1, -a2);
1505 } else {
1506 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1507 }
1508 break;
1509
1510 case INDEX_op_neg_i64:
1511 case INDEX_op_neg_i32:
1512 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1513 break;
1514
1515 case INDEX_op_and_i32:
1516 a2 = (int32_t)a2;
1517 /* FALLTHRU */
1518 case INDEX_op_and_i64:
1519 if (c2) {
1520 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1521 } else {
1522 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1523 }
1524 break;
1525
1526 case INDEX_op_andc_i32:
1527 a2 = (int32_t)a2;
1528 /* FALLTHRU */
1529 case INDEX_op_andc_i64:
1530 if (c2) {
1531 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
1532 } else {
1533 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
1534 }
1535 break;
1536
1537 case INDEX_op_or_i32:
1538 a2 = (int32_t)a2;
1539 /* FALLTHRU */
1540 case INDEX_op_or_i64:
1541 if (c2) {
1542 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
1543 } else {
1544 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
1545 }
1546 break;
1547
1548 case INDEX_op_orc_i32:
1549 a2 = (int32_t)a2;
1550 /* FALLTHRU */
1551 case INDEX_op_orc_i64:
1552 if (c2) {
1553 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
1554 } else {
1555 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
1556 }
1557 break;
1558
1559 case INDEX_op_xor_i32:
1560 a2 = (int32_t)a2;
1561 /* FALLTHRU */
1562 case INDEX_op_xor_i64:
1563 if (c2) {
1564 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
1565 } else {
1566 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
1567 }
1568 break;
1569
1570 case INDEX_op_eqv_i32:
1571 a2 = (int32_t)a2;
1572 /* FALLTHRU */
1573 case INDEX_op_eqv_i64:
1574 if (c2) {
1575 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
1576 } else {
1577 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
1578 }
1579 break;
1580
1581 case INDEX_op_not_i64:
1582 case INDEX_op_not_i32:
1583 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
1584 break;
1585
1586 case INDEX_op_mul_i64:
1587 case INDEX_op_mul_i32:
1588 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
1589 break;
1590
1591 case INDEX_op_div_i64:
1592 case INDEX_op_div_i32:
1593 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
1594 break;
1595 case INDEX_op_divu_i64:
1596 case INDEX_op_divu_i32:
1597 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
1598 break;
1599
1600 case INDEX_op_rem_i64:
1601 case INDEX_op_rem_i32:
1602 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
1603 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1604 break;
1605 case INDEX_op_remu_i64:
1606 case INDEX_op_remu_i32:
1607 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
1608 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1609 break;
1610
1611 case INDEX_op_shl_i64:
1612 case INDEX_op_shl_i32:
1613 if (c2) {
1614 tcg_out_shl(s, ext, a0, a1, a2);
1615 } else {
1616 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
1617 }
1618 break;
1619
1620 case INDEX_op_shr_i64:
1621 case INDEX_op_shr_i32:
1622 if (c2) {
1623 tcg_out_shr(s, ext, a0, a1, a2);
1624 } else {
1625 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
1626 }
1627 break;
1628
1629 case INDEX_op_sar_i64:
1630 case INDEX_op_sar_i32:
1631 if (c2) {
1632 tcg_out_sar(s, ext, a0, a1, a2);
1633 } else {
1634 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
1635 }
1636 break;
1637
1638 case INDEX_op_rotr_i64:
1639 case INDEX_op_rotr_i32:
1640 if (c2) {
1641 tcg_out_rotr(s, ext, a0, a1, a2);
1642 } else {
1643 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
1644 }
1645 break;
1646
1647 case INDEX_op_rotl_i64:
1648 case INDEX_op_rotl_i32:
1649 if (c2) {
1650 tcg_out_rotl(s, ext, a0, a1, a2);
1651 } else {
1652 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
1653 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
1654 }
1655 break;
1656
1657 case INDEX_op_clz_i64:
1658 case INDEX_op_clz_i32:
1659 tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
1660 break;
1661 case INDEX_op_ctz_i64:
1662 case INDEX_op_ctz_i32:
1663 tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
1664 break;
1665
1666 case INDEX_op_brcond_i32:
1667 a1 = (int32_t)a1;
1668 /* FALLTHRU */
1669 case INDEX_op_brcond_i64:
1670 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
1671 break;
1672
1673 case INDEX_op_setcond_i32:
1674 a2 = (int32_t)a2;
1675 /* FALLTHRU */
1676 case INDEX_op_setcond_i64:
1677 tcg_out_cmp(s, ext, a1, a2, c2);
1678 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */
1679 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
1680 TCG_REG_XZR, tcg_invert_cond(args[3]));
1681 break;
1682
1683 case INDEX_op_movcond_i32:
1684 a2 = (int32_t)a2;
1685 /* FALLTHRU */
1686 case INDEX_op_movcond_i64:
1687 tcg_out_cmp(s, ext, a1, a2, c2);
1688 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
1689 break;
1690
1691 case INDEX_op_qemu_ld_i32:
1692 case INDEX_op_qemu_ld_i64:
1693 tcg_out_qemu_ld(s, a0, a1, a2, ext);
1694 break;
1695 case INDEX_op_qemu_st_i32:
1696 case INDEX_op_qemu_st_i64:
1697 tcg_out_qemu_st(s, REG0(0), a1, a2);
1698 break;
1699
1700 case INDEX_op_bswap64_i64:
1701 tcg_out_rev64(s, a0, a1);
1702 break;
1703 case INDEX_op_bswap32_i64:
1704 case INDEX_op_bswap32_i32:
1705 tcg_out_rev32(s, a0, a1);
1706 break;
1707 case INDEX_op_bswap16_i64:
1708 case INDEX_op_bswap16_i32:
1709 tcg_out_rev16(s, a0, a1);
1710 break;
1711
1712 case INDEX_op_ext8s_i64:
1713 case INDEX_op_ext8s_i32:
1714 tcg_out_sxt(s, ext, MO_8, a0, a1);
1715 break;
1716 case INDEX_op_ext16s_i64:
1717 case INDEX_op_ext16s_i32:
1718 tcg_out_sxt(s, ext, MO_16, a0, a1);
1719 break;
1720 case INDEX_op_ext_i32_i64:
1721 case INDEX_op_ext32s_i64:
1722 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
1723 break;
1724 case INDEX_op_ext8u_i64:
1725 case INDEX_op_ext8u_i32:
1726 tcg_out_uxt(s, MO_8, a0, a1);
1727 break;
1728 case INDEX_op_ext16u_i64:
1729 case INDEX_op_ext16u_i32:
1730 tcg_out_uxt(s, MO_16, a0, a1);
1731 break;
1732 case INDEX_op_extu_i32_i64:
1733 case INDEX_op_ext32u_i64:
1734 tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
1735 break;
1736
1737 case INDEX_op_deposit_i64:
1738 case INDEX_op_deposit_i32:
1739 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
1740 break;
1741
1742 case INDEX_op_extract_i64:
1743 case INDEX_op_extract_i32:
1744 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
1745 break;
1746
1747 case INDEX_op_sextract_i64:
1748 case INDEX_op_sextract_i32:
1749 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
1750 break;
1751
1752 case INDEX_op_add2_i32:
1753 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
1754 (int32_t)args[4], args[5], const_args[4],
1755 const_args[5], false);
1756 break;
1757 case INDEX_op_add2_i64:
1758 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
1759 args[5], const_args[4], const_args[5], false);
1760 break;
1761 case INDEX_op_sub2_i32:
1762 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
1763 (int32_t)args[4], args[5], const_args[4],
1764 const_args[5], true);
1765 break;
1766 case INDEX_op_sub2_i64:
1767 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
1768 args[5], const_args[4], const_args[5], true);
1769 break;
1770
1771 case INDEX_op_muluh_i64:
1772 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
1773 break;
1774 case INDEX_op_mulsh_i64:
1775 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
1776 break;
1777
1778 case INDEX_op_mb:
1779 tcg_out_mb(s, a0);
1780 break;
1781
1782 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
1783 case INDEX_op_mov_i64:
1784 case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
1785 case INDEX_op_movi_i64:
1786 case INDEX_op_call: /* Always emitted via tcg_out_call. */
1787 default:
1788 tcg_abort();
1789 }
1790
1791 #undef REG0
1792 }
1793
1794 static const TCGTargetOpDef aarch64_op_defs[] = {
1795 { INDEX_op_exit_tb, { } },
1796 { INDEX_op_goto_tb, { } },
1797 { INDEX_op_br, { } },
1798 { INDEX_op_goto_ptr, { "r" } },
1799
1800 { INDEX_op_ld8u_i32, { "r", "r" } },
1801 { INDEX_op_ld8s_i32, { "r", "r" } },
1802 { INDEX_op_ld16u_i32, { "r", "r" } },
1803 { INDEX_op_ld16s_i32, { "r", "r" } },
1804 { INDEX_op_ld_i32, { "r", "r" } },
1805 { INDEX_op_ld8u_i64, { "r", "r" } },
1806 { INDEX_op_ld8s_i64, { "r", "r" } },
1807 { INDEX_op_ld16u_i64, { "r", "r" } },
1808 { INDEX_op_ld16s_i64, { "r", "r" } },
1809 { INDEX_op_ld32u_i64, { "r", "r" } },
1810 { INDEX_op_ld32s_i64, { "r", "r" } },
1811 { INDEX_op_ld_i64, { "r", "r" } },
1812
1813 { INDEX_op_st8_i32, { "rZ", "r" } },
1814 { INDEX_op_st16_i32, { "rZ", "r" } },
1815 { INDEX_op_st_i32, { "rZ", "r" } },
1816 { INDEX_op_st8_i64, { "rZ", "r" } },
1817 { INDEX_op_st16_i64, { "rZ", "r" } },
1818 { INDEX_op_st32_i64, { "rZ", "r" } },
1819 { INDEX_op_st_i64, { "rZ", "r" } },
1820
1821 { INDEX_op_add_i32, { "r", "r", "rA" } },
1822 { INDEX_op_add_i64, { "r", "r", "rA" } },
1823 { INDEX_op_sub_i32, { "r", "r", "rA" } },
1824 { INDEX_op_sub_i64, { "r", "r", "rA" } },
1825 { INDEX_op_mul_i32, { "r", "r", "r" } },
1826 { INDEX_op_mul_i64, { "r", "r", "r" } },
1827 { INDEX_op_div_i32, { "r", "r", "r" } },
1828 { INDEX_op_div_i64, { "r", "r", "r" } },
1829 { INDEX_op_divu_i32, { "r", "r", "r" } },
1830 { INDEX_op_divu_i64, { "r", "r", "r" } },
1831 { INDEX_op_rem_i32, { "r", "r", "r" } },
1832 { INDEX_op_rem_i64, { "r", "r", "r" } },
1833 { INDEX_op_remu_i32, { "r", "r", "r" } },
1834 { INDEX_op_remu_i64, { "r", "r", "r" } },
1835 { INDEX_op_and_i32, { "r", "r", "rL" } },
1836 { INDEX_op_and_i64, { "r", "r", "rL" } },
1837 { INDEX_op_or_i32, { "r", "r", "rL" } },
1838 { INDEX_op_or_i64, { "r", "r", "rL" } },
1839 { INDEX_op_xor_i32, { "r", "r", "rL" } },
1840 { INDEX_op_xor_i64, { "r", "r", "rL" } },
1841 { INDEX_op_andc_i32, { "r", "r", "rL" } },
1842 { INDEX_op_andc_i64, { "r", "r", "rL" } },
1843 { INDEX_op_orc_i32, { "r", "r", "rL" } },
1844 { INDEX_op_orc_i64, { "r", "r", "rL" } },
1845 { INDEX_op_eqv_i32, { "r", "r", "rL" } },
1846 { INDEX_op_eqv_i64, { "r", "r", "rL" } },
1847
1848 { INDEX_op_neg_i32, { "r", "r" } },
1849 { INDEX_op_neg_i64, { "r", "r" } },
1850 { INDEX_op_not_i32, { "r", "r" } },
1851 { INDEX_op_not_i64, { "r", "r" } },
1852
1853 { INDEX_op_shl_i32, { "r", "r", "ri" } },
1854 { INDEX_op_shr_i32, { "r", "r", "ri" } },
1855 { INDEX_op_sar_i32, { "r", "r", "ri" } },
1856 { INDEX_op_rotl_i32, { "r", "r", "ri" } },
1857 { INDEX_op_rotr_i32, { "r", "r", "ri" } },
1858 { INDEX_op_clz_i32, { "r", "r", "rAL" } },
1859 { INDEX_op_ctz_i32, { "r", "r", "rAL" } },
1860 { INDEX_op_shl_i64, { "r", "r", "ri" } },
1861 { INDEX_op_shr_i64, { "r", "r", "ri" } },
1862 { INDEX_op_sar_i64, { "r", "r", "ri" } },
1863 { INDEX_op_rotl_i64, { "r", "r", "ri" } },
1864 { INDEX_op_rotr_i64, { "r", "r", "ri" } },
1865 { INDEX_op_clz_i64, { "r", "r", "rAL" } },
1866 { INDEX_op_ctz_i64, { "r", "r", "rAL" } },
1867
1868 { INDEX_op_brcond_i32, { "r", "rA" } },
1869 { INDEX_op_brcond_i64, { "r", "rA" } },
1870 { INDEX_op_setcond_i32, { "r", "r", "rA" } },
1871 { INDEX_op_setcond_i64, { "r", "r", "rA" } },
1872 { INDEX_op_movcond_i32, { "r", "r", "rA", "rZ", "rZ" } },
1873 { INDEX_op_movcond_i64, { "r", "r", "rA", "rZ", "rZ" } },
1874
1875 { INDEX_op_qemu_ld_i32, { "r", "l" } },
1876 { INDEX_op_qemu_ld_i64, { "r", "l" } },
1877 { INDEX_op_qemu_st_i32, { "lZ", "l" } },
1878 { INDEX_op_qemu_st_i64, { "lZ", "l" } },
1879
1880 { INDEX_op_bswap16_i32, { "r", "r" } },
1881 { INDEX_op_bswap32_i32, { "r", "r" } },
1882 { INDEX_op_bswap16_i64, { "r", "r" } },
1883 { INDEX_op_bswap32_i64, { "r", "r" } },
1884 { INDEX_op_bswap64_i64, { "r", "r" } },
1885
1886 { INDEX_op_ext8s_i32, { "r", "r" } },
1887 { INDEX_op_ext16s_i32, { "r", "r" } },
1888 { INDEX_op_ext8u_i32, { "r", "r" } },
1889 { INDEX_op_ext16u_i32, { "r", "r" } },
1890
1891 { INDEX_op_ext8s_i64, { "r", "r" } },
1892 { INDEX_op_ext16s_i64, { "r", "r" } },
1893 { INDEX_op_ext32s_i64, { "r", "r" } },
1894 { INDEX_op_ext8u_i64, { "r", "r" } },
1895 { INDEX_op_ext16u_i64, { "r", "r" } },
1896 { INDEX_op_ext32u_i64, { "r", "r" } },
1897 { INDEX_op_ext_i32_i64, { "r", "r" } },
1898 { INDEX_op_extu_i32_i64, { "r", "r" } },
1899
1900 { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
1901 { INDEX_op_deposit_i64, { "r", "0", "rZ" } },
1902 { INDEX_op_extract_i32, { "r", "r" } },
1903 { INDEX_op_extract_i64, { "r", "r" } },
1904 { INDEX_op_sextract_i32, { "r", "r" } },
1905 { INDEX_op_sextract_i64, { "r", "r" } },
1906
1907 { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1908 { INDEX_op_add2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1909 { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1910 { INDEX_op_sub2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1911
1912 { INDEX_op_muluh_i64, { "r", "r", "r" } },
1913 { INDEX_op_mulsh_i64, { "r", "r", "r" } },
1914
1915 { INDEX_op_mb, { } },
1916 { -1 },
1917 };
1918
1919 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
1920 {
1921 int i, n = ARRAY_SIZE(aarch64_op_defs);
1922
1923 for (i = 0; i < n; ++i) {
1924 if (aarch64_op_defs[i].op == op) {
1925 return &aarch64_op_defs[i];
1926 }
1927 }
1928 return NULL;
1929 }
1930
1931 static void tcg_target_init(TCGContext *s)
1932 {
1933 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
1934 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);
1935
1936 tcg_regset_set32(tcg_target_call_clobber_regs, 0,
1937 (1 << TCG_REG_X0) | (1 << TCG_REG_X1) |
1938 (1 << TCG_REG_X2) | (1 << TCG_REG_X3) |
1939 (1 << TCG_REG_X4) | (1 << TCG_REG_X5) |
1940 (1 << TCG_REG_X6) | (1 << TCG_REG_X7) |
1941 (1 << TCG_REG_X8) | (1 << TCG_REG_X9) |
1942 (1 << TCG_REG_X10) | (1 << TCG_REG_X11) |
1943 (1 << TCG_REG_X12) | (1 << TCG_REG_X13) |
1944 (1 << TCG_REG_X14) | (1 << TCG_REG_X15) |
1945 (1 << TCG_REG_X16) | (1 << TCG_REG_X17) |
1946 (1 << TCG_REG_X18) | (1 << TCG_REG_X30));
1947
1948 tcg_regset_clear(s->reserved_regs);
1949 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
1950 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
1951 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
1952 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
1953 }
1954
1955 /* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
1956 #define PUSH_SIZE ((30 - 19 + 1) * 8)
1957
1958 #define FRAME_SIZE \
1959 ((PUSH_SIZE \
1960 + TCG_STATIC_CALL_ARGS_SIZE \
1961 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
1962 + TCG_TARGET_STACK_ALIGN - 1) \
1963 & ~(TCG_TARGET_STACK_ALIGN - 1))
1964
1965 /* We're expecting a 2 byte uleb128 encoded value. */
1966 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
1967
1968 /* We're expecting to use a single ADDI insn. */
1969 QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
1970
1971 static void tcg_target_qemu_prologue(TCGContext *s)
1972 {
1973 TCGReg r;
1974
1975 /* Push (FP, LR) and allocate space for all saved registers. */
1976 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
1977 TCG_REG_SP, -PUSH_SIZE, 1, 1);
1978
1979 /* Set up frame pointer for canonical unwinding. */
1980 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
1981
1982 /* Store callee-preserved regs x19..x28. */
1983 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1984 int ofs = (r - TCG_REG_X19 + 2) * 8;
1985 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
1986 }
1987
1988 /* Make stack space for TCG locals. */
1989 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
1990 FRAME_SIZE - PUSH_SIZE);
1991
1992 /* Inform TCG about how to find TCG locals with register, offset, size. */
1993 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
1994 CPU_TEMP_BUF_NLONGS * sizeof(long));
1995
1996 #if !defined(CONFIG_SOFTMMU)
1997 if (USE_GUEST_BASE) {
1998 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
1999 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
2000 }
2001 #endif
2002
2003 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2004 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
2005
2006 /*
2007 * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2008 * and fall through to the rest of the epilogue.
2009 */
2010 s->code_gen_epilogue = s->code_ptr;
2011 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
2012
2013 /* TB epilogue */
2014 tb_ret_addr = s->code_ptr;
2015
2016 /* Remove TCG locals stack space. */
2017 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2018 FRAME_SIZE - PUSH_SIZE);
2019
2020 /* Restore registers x19..x28. */
2021 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2022 int ofs = (r - TCG_REG_X19 + 2) * 8;
2023 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2024 }
2025
2026 /* Pop (FP, LR), restore SP to previous frame. */
2027 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
2028 TCG_REG_SP, PUSH_SIZE, 0, 1);
2029 tcg_out_insn(s, 3207, RET, TCG_REG_LR);
2030 }
2031
2032 typedef struct {
2033 DebugFrameHeader h;
2034 uint8_t fde_def_cfa[4];
2035 uint8_t fde_reg_ofs[24];
2036 } DebugFrame;
2037
2038 #define ELF_HOST_MACHINE EM_AARCH64
2039
2040 static const DebugFrame debug_frame = {
2041 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2042 .h.cie.id = -1,
2043 .h.cie.version = 1,
2044 .h.cie.code_align = 1,
2045 .h.cie.data_align = 0x78, /* sleb128 -8 */
2046 .h.cie.return_column = TCG_REG_LR,
2047
2048 /* Total FDE size does not include the "len" member. */
2049 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2050
2051 .fde_def_cfa = {
2052 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */
2053 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2054 (FRAME_SIZE >> 7)
2055 },
2056 .fde_reg_ofs = {
2057 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */
2058 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */
2059 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */
2060 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */
2061 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */
2062 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */
2063 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */
2064 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */
2065 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */
2066 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */
2067 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */
2068 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */
2069 }
2070 };
2071
2072 void tcg_register_jit(void *buf, size_t buf_size)
2073 {
2074 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2075 }