]> git.proxmox.com Git - mirror_qemu.git/blob - tcg/aarch64/tcg-target.inc.c
tcg: Remove tcg_regset_set32
[mirror_qemu.git] / tcg / aarch64 / tcg-target.inc.c
1 /*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13 #include "tcg-pool.inc.c"
14 #include "qemu/bitops.h"
15
16 /* We're going to re-use TCGType in setting of the SF bit, which controls
17 the size of the operation performed. If we know the values match, it
18 makes things much cleaner. */
19 QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
20
21 #ifdef CONFIG_DEBUG_TCG
22 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
23 "%x0", "%x1", "%x2", "%x3", "%x4", "%x5", "%x6", "%x7",
24 "%x8", "%x9", "%x10", "%x11", "%x12", "%x13", "%x14", "%x15",
25 "%x16", "%x17", "%x18", "%x19", "%x20", "%x21", "%x22", "%x23",
26 "%x24", "%x25", "%x26", "%x27", "%x28", "%fp", "%x30", "%sp",
27 };
28 #endif /* CONFIG_DEBUG_TCG */
29
30 static const int tcg_target_reg_alloc_order[] = {
31 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
32 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
33 TCG_REG_X28, /* we will reserve this for guest_base if configured */
34
35 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
36 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
37 TCG_REG_X16, TCG_REG_X17,
38
39 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
40 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
41
42 /* X18 reserved by system */
43 /* X19 reserved for AREG0 */
44 /* X29 reserved as fp */
45 /* X30 reserved as temporary */
46 };
47
48 static const int tcg_target_call_iarg_regs[8] = {
49 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
50 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
51 };
52 static const int tcg_target_call_oarg_regs[1] = {
53 TCG_REG_X0
54 };
55
56 #define TCG_REG_TMP TCG_REG_X30
57
58 #ifndef CONFIG_SOFTMMU
59 /* Note that XZR cannot be encoded in the address base register slot,
60 as that actaully encodes SP. So if we need to zero-extend the guest
61 address, via the address index register slot, we need to load even
62 a zero guest base into a register. */
63 #define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32)
64 #define TCG_REG_GUEST_BASE TCG_REG_X28
65 #endif
66
67 static inline void reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
68 {
69 ptrdiff_t offset = target - code_ptr;
70 tcg_debug_assert(offset == sextract64(offset, 0, 26));
71 /* read instruction, mask away previous PC_REL26 parameter contents,
72 set the proper offset, then write back the instruction. */
73 *code_ptr = deposit32(*code_ptr, 0, 26, offset);
74 }
75
76 static inline void reloc_pc26_atomic(tcg_insn_unit *code_ptr,
77 tcg_insn_unit *target)
78 {
79 ptrdiff_t offset = target - code_ptr;
80 tcg_insn_unit insn;
81 tcg_debug_assert(offset == sextract64(offset, 0, 26));
82 /* read instruction, mask away previous PC_REL26 parameter contents,
83 set the proper offset, then write back the instruction. */
84 insn = atomic_read(code_ptr);
85 atomic_set(code_ptr, deposit32(insn, 0, 26, offset));
86 }
87
88 static inline void reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
89 {
90 ptrdiff_t offset = target - code_ptr;
91 tcg_debug_assert(offset == sextract64(offset, 0, 19));
92 *code_ptr = deposit32(*code_ptr, 5, 19, offset);
93 }
94
95 static inline void patch_reloc(tcg_insn_unit *code_ptr, int type,
96 intptr_t value, intptr_t addend)
97 {
98 tcg_debug_assert(addend == 0);
99 switch (type) {
100 case R_AARCH64_JUMP26:
101 case R_AARCH64_CALL26:
102 reloc_pc26(code_ptr, (tcg_insn_unit *)value);
103 break;
104 case R_AARCH64_CONDBR19:
105 reloc_pc19(code_ptr, (tcg_insn_unit *)value);
106 break;
107 default:
108 tcg_abort();
109 }
110 }
111
112 #define TCG_CT_CONST_AIMM 0x100
113 #define TCG_CT_CONST_LIMM 0x200
114 #define TCG_CT_CONST_ZERO 0x400
115 #define TCG_CT_CONST_MONE 0x800
116
117 /* parse target specific constraints */
118 static const char *target_parse_constraint(TCGArgConstraint *ct,
119 const char *ct_str, TCGType type)
120 {
121 switch (*ct_str++) {
122 case 'r':
123 ct->ct |= TCG_CT_REG;
124 ct->u.regs = 0xffffffffu;
125 break;
126 case 'l': /* qemu_ld / qemu_st address, data_reg */
127 ct->ct |= TCG_CT_REG;
128 ct->u.regs = 0xffffffffu;
129 #ifdef CONFIG_SOFTMMU
130 /* x0 and x1 will be overwritten when reading the tlb entry,
131 and x2, and x3 for helper args, better to avoid using them. */
132 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
133 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
134 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
135 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
136 #endif
137 break;
138 case 'A': /* Valid for arithmetic immediate (positive or negative). */
139 ct->ct |= TCG_CT_CONST_AIMM;
140 break;
141 case 'L': /* Valid for logical immediate. */
142 ct->ct |= TCG_CT_CONST_LIMM;
143 break;
144 case 'M': /* minus one */
145 ct->ct |= TCG_CT_CONST_MONE;
146 break;
147 case 'Z': /* zero */
148 ct->ct |= TCG_CT_CONST_ZERO;
149 break;
150 default:
151 return NULL;
152 }
153 return ct_str;
154 }
155
156 static inline bool is_aimm(uint64_t val)
157 {
158 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
159 }
160
161 static inline bool is_limm(uint64_t val)
162 {
163 /* Taking a simplified view of the logical immediates for now, ignoring
164 the replication that can happen across the field. Match bit patterns
165 of the forms
166 0....01....1
167 0..01..10..0
168 and their inverses. */
169
170 /* Make things easier below, by testing the form with msb clear. */
171 if ((int64_t)val < 0) {
172 val = ~val;
173 }
174 if (val == 0) {
175 return false;
176 }
177 val += val & -val;
178 return (val & (val - 1)) == 0;
179 }
180
181 static int tcg_target_const_match(tcg_target_long val, TCGType type,
182 const TCGArgConstraint *arg_ct)
183 {
184 int ct = arg_ct->ct;
185
186 if (ct & TCG_CT_CONST) {
187 return 1;
188 }
189 if (type == TCG_TYPE_I32) {
190 val = (int32_t)val;
191 }
192 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
193 return 1;
194 }
195 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
196 return 1;
197 }
198 if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
199 return 1;
200 }
201 if ((ct & TCG_CT_CONST_MONE) && val == -1) {
202 return 1;
203 }
204
205 return 0;
206 }
207
208 enum aarch64_cond_code {
209 COND_EQ = 0x0,
210 COND_NE = 0x1,
211 COND_CS = 0x2, /* Unsigned greater or equal */
212 COND_HS = COND_CS, /* ALIAS greater or equal */
213 COND_CC = 0x3, /* Unsigned less than */
214 COND_LO = COND_CC, /* ALIAS Lower */
215 COND_MI = 0x4, /* Negative */
216 COND_PL = 0x5, /* Zero or greater */
217 COND_VS = 0x6, /* Overflow */
218 COND_VC = 0x7, /* No overflow */
219 COND_HI = 0x8, /* Unsigned greater than */
220 COND_LS = 0x9, /* Unsigned less or equal */
221 COND_GE = 0xa,
222 COND_LT = 0xb,
223 COND_GT = 0xc,
224 COND_LE = 0xd,
225 COND_AL = 0xe,
226 COND_NV = 0xf, /* behaves like COND_AL here */
227 };
228
229 static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
230 [TCG_COND_EQ] = COND_EQ,
231 [TCG_COND_NE] = COND_NE,
232 [TCG_COND_LT] = COND_LT,
233 [TCG_COND_GE] = COND_GE,
234 [TCG_COND_LE] = COND_LE,
235 [TCG_COND_GT] = COND_GT,
236 /* unsigned */
237 [TCG_COND_LTU] = COND_LO,
238 [TCG_COND_GTU] = COND_HI,
239 [TCG_COND_GEU] = COND_HS,
240 [TCG_COND_LEU] = COND_LS,
241 };
242
243 typedef enum {
244 LDST_ST = 0, /* store */
245 LDST_LD = 1, /* load */
246 LDST_LD_S_X = 2, /* load and sign-extend into Xt */
247 LDST_LD_S_W = 3, /* load and sign-extend into Wt */
248 } AArch64LdstType;
249
250 /* We encode the format of the insn into the beginning of the name, so that
251 we can have the preprocessor help "typecheck" the insn vs the output
252 function. Arm didn't provide us with nice names for the formats, so we
253 use the section number of the architecture reference manual in which the
254 instruction group is described. */
255 typedef enum {
256 /* Compare and branch (immediate). */
257 I3201_CBZ = 0x34000000,
258 I3201_CBNZ = 0x35000000,
259
260 /* Conditional branch (immediate). */
261 I3202_B_C = 0x54000000,
262
263 /* Unconditional branch (immediate). */
264 I3206_B = 0x14000000,
265 I3206_BL = 0x94000000,
266
267 /* Unconditional branch (register). */
268 I3207_BR = 0xd61f0000,
269 I3207_BLR = 0xd63f0000,
270 I3207_RET = 0xd65f0000,
271
272 /* Load literal for loading the address at pc-relative offset */
273 I3305_LDR = 0x58000000,
274 /* Load/store register. Described here as 3.3.12, but the helper
275 that emits them can transform to 3.3.10 or 3.3.13. */
276 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
277 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
278 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
279 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
280
281 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
282 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
283 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
284 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
285
286 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
287 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
288
289 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
290 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
291 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
292
293 I3312_TO_I3310 = 0x00200800,
294 I3312_TO_I3313 = 0x01000000,
295
296 /* Load/store register pair instructions. */
297 I3314_LDP = 0x28400000,
298 I3314_STP = 0x28000000,
299
300 /* Add/subtract immediate instructions. */
301 I3401_ADDI = 0x11000000,
302 I3401_ADDSI = 0x31000000,
303 I3401_SUBI = 0x51000000,
304 I3401_SUBSI = 0x71000000,
305
306 /* Bitfield instructions. */
307 I3402_BFM = 0x33000000,
308 I3402_SBFM = 0x13000000,
309 I3402_UBFM = 0x53000000,
310
311 /* Extract instruction. */
312 I3403_EXTR = 0x13800000,
313
314 /* Logical immediate instructions. */
315 I3404_ANDI = 0x12000000,
316 I3404_ORRI = 0x32000000,
317 I3404_EORI = 0x52000000,
318
319 /* Move wide immediate instructions. */
320 I3405_MOVN = 0x12800000,
321 I3405_MOVZ = 0x52800000,
322 I3405_MOVK = 0x72800000,
323
324 /* PC relative addressing instructions. */
325 I3406_ADR = 0x10000000,
326 I3406_ADRP = 0x90000000,
327
328 /* Add/subtract shifted register instructions (without a shift). */
329 I3502_ADD = 0x0b000000,
330 I3502_ADDS = 0x2b000000,
331 I3502_SUB = 0x4b000000,
332 I3502_SUBS = 0x6b000000,
333
334 /* Add/subtract shifted register instructions (with a shift). */
335 I3502S_ADD_LSL = I3502_ADD,
336
337 /* Add/subtract with carry instructions. */
338 I3503_ADC = 0x1a000000,
339 I3503_SBC = 0x5a000000,
340
341 /* Conditional select instructions. */
342 I3506_CSEL = 0x1a800000,
343 I3506_CSINC = 0x1a800400,
344 I3506_CSINV = 0x5a800000,
345 I3506_CSNEG = 0x5a800400,
346
347 /* Data-processing (1 source) instructions. */
348 I3507_CLZ = 0x5ac01000,
349 I3507_RBIT = 0x5ac00000,
350 I3507_REV16 = 0x5ac00400,
351 I3507_REV32 = 0x5ac00800,
352 I3507_REV64 = 0x5ac00c00,
353
354 /* Data-processing (2 source) instructions. */
355 I3508_LSLV = 0x1ac02000,
356 I3508_LSRV = 0x1ac02400,
357 I3508_ASRV = 0x1ac02800,
358 I3508_RORV = 0x1ac02c00,
359 I3508_SMULH = 0x9b407c00,
360 I3508_UMULH = 0x9bc07c00,
361 I3508_UDIV = 0x1ac00800,
362 I3508_SDIV = 0x1ac00c00,
363
364 /* Data-processing (3 source) instructions. */
365 I3509_MADD = 0x1b000000,
366 I3509_MSUB = 0x1b008000,
367
368 /* Logical shifted register instructions (without a shift). */
369 I3510_AND = 0x0a000000,
370 I3510_BIC = 0x0a200000,
371 I3510_ORR = 0x2a000000,
372 I3510_ORN = 0x2a200000,
373 I3510_EOR = 0x4a000000,
374 I3510_EON = 0x4a200000,
375 I3510_ANDS = 0x6a000000,
376
377 NOP = 0xd503201f,
378 /* System instructions. */
379 DMB_ISH = 0xd50338bf,
380 DMB_LD = 0x00000100,
381 DMB_ST = 0x00000200,
382 } AArch64Insn;
383
384 static inline uint32_t tcg_in32(TCGContext *s)
385 {
386 uint32_t v = *(uint32_t *)s->code_ptr;
387 return v;
388 }
389
390 /* Emit an opcode with "type-checking" of the format. */
391 #define tcg_out_insn(S, FMT, OP, ...) \
392 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
393
394 static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, int imm19, TCGReg rt)
395 {
396 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
397 }
398
399 static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
400 TCGReg rt, int imm19)
401 {
402 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
403 }
404
405 static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
406 TCGCond c, int imm19)
407 {
408 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
409 }
410
411 static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
412 {
413 tcg_out32(s, insn | (imm26 & 0x03ffffff));
414 }
415
416 static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
417 {
418 tcg_out32(s, insn | rn << 5);
419 }
420
421 static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
422 TCGReg r1, TCGReg r2, TCGReg rn,
423 tcg_target_long ofs, bool pre, bool w)
424 {
425 insn |= 1u << 31; /* ext */
426 insn |= pre << 24;
427 insn |= w << 23;
428
429 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
430 insn |= (ofs & (0x7f << 3)) << (15 - 3);
431
432 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
433 }
434
435 static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
436 TCGReg rd, TCGReg rn, uint64_t aimm)
437 {
438 if (aimm > 0xfff) {
439 tcg_debug_assert((aimm & 0xfff) == 0);
440 aimm >>= 12;
441 tcg_debug_assert(aimm <= 0xfff);
442 aimm |= 1 << 12; /* apply LSL 12 */
443 }
444 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
445 }
446
447 /* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
448 (Logical immediate). Both insn groups have N, IMMR and IMMS fields
449 that feed the DecodeBitMasks pseudo function. */
450 static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
451 TCGReg rd, TCGReg rn, int n, int immr, int imms)
452 {
453 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
454 | rn << 5 | rd);
455 }
456
457 #define tcg_out_insn_3404 tcg_out_insn_3402
458
459 static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
460 TCGReg rd, TCGReg rn, TCGReg rm, int imms)
461 {
462 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
463 | rn << 5 | rd);
464 }
465
466 /* This function is used for the Move (wide immediate) instruction group.
467 Note that SHIFT is a full shift count, not the 2 bit HW field. */
468 static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
469 TCGReg rd, uint16_t half, unsigned shift)
470 {
471 tcg_debug_assert((shift & ~0x30) == 0);
472 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
473 }
474
475 static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
476 TCGReg rd, int64_t disp)
477 {
478 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
479 }
480
481 /* This function is for both 3.5.2 (Add/Subtract shifted register), for
482 the rare occasion when we actually want to supply a shift amount. */
483 static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
484 TCGType ext, TCGReg rd, TCGReg rn,
485 TCGReg rm, int imm6)
486 {
487 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
488 }
489
490 /* This function is for 3.5.2 (Add/subtract shifted register),
491 and 3.5.10 (Logical shifted register), for the vast majorty of cases
492 when we don't want to apply a shift. Thus it can also be used for
493 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */
494 static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
495 TCGReg rd, TCGReg rn, TCGReg rm)
496 {
497 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
498 }
499
500 #define tcg_out_insn_3503 tcg_out_insn_3502
501 #define tcg_out_insn_3508 tcg_out_insn_3502
502 #define tcg_out_insn_3510 tcg_out_insn_3502
503
504 static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
505 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
506 {
507 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
508 | tcg_cond_to_aarch64[c] << 12);
509 }
510
511 static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
512 TCGReg rd, TCGReg rn)
513 {
514 tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
515 }
516
517 static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
518 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
519 {
520 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
521 }
522
523 static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
524 TCGReg rd, TCGReg base, TCGType ext,
525 TCGReg regoff)
526 {
527 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
528 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
529 0x4000 | ext << 13 | base << 5 | rd);
530 }
531
532 static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
533 TCGReg rd, TCGReg rn, intptr_t offset)
534 {
535 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | rd);
536 }
537
538 static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
539 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
540 {
541 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
542 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 | rn << 5 | rd);
543 }
544
545 /* Register to register move using ORR (shifted register with no shift). */
546 static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
547 {
548 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
549 }
550
551 /* Register to register move using ADDI (move to/from SP). */
552 static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
553 {
554 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
555 }
556
557 /* This function is used for the Logical (immediate) instruction group.
558 The value of LIMM must satisfy IS_LIMM. See the comment above about
559 only supporting simplified logical immediates. */
560 static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
561 TCGReg rd, TCGReg rn, uint64_t limm)
562 {
563 unsigned h, l, r, c;
564
565 tcg_debug_assert(is_limm(limm));
566
567 h = clz64(limm);
568 l = ctz64(limm);
569 if (l == 0) {
570 r = 0; /* form 0....01....1 */
571 c = ctz64(~limm) - 1;
572 if (h == 0) {
573 r = clz64(~limm); /* form 1..10..01..1 */
574 c += r;
575 }
576 } else {
577 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */
578 c = r - h - 1;
579 }
580 if (ext == TCG_TYPE_I32) {
581 r &= 31;
582 c &= 31;
583 }
584
585 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
586 }
587
588 static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
589 tcg_target_long value)
590 {
591 tcg_target_long svalue = value;
592 tcg_target_long ivalue = ~value;
593 tcg_target_long t0, t1, t2;
594 int s0, s1;
595 AArch64Insn opc;
596
597 /* For 32-bit values, discard potential garbage in value. For 64-bit
598 values within [2**31, 2**32-1], we can create smaller sequences by
599 interpreting this as a negative 32-bit number, while ensuring that
600 the high 32 bits are cleared by setting SF=0. */
601 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
602 svalue = (int32_t)value;
603 value = (uint32_t)value;
604 ivalue = (uint32_t)ivalue;
605 type = TCG_TYPE_I32;
606 }
607
608 /* Speed things up by handling the common case of small positive
609 and negative values specially. */
610 if ((value & ~0xffffull) == 0) {
611 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
612 return;
613 } else if ((ivalue & ~0xffffull) == 0) {
614 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
615 return;
616 }
617
618 /* Check for bitfield immediates. For the benefit of 32-bit quantities,
619 use the sign-extended value. That lets us match rotated values such
620 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
621 if (is_limm(svalue)) {
622 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
623 return;
624 }
625
626 /* Look for host pointer values within 4G of the PC. This happens
627 often when loading pointers to QEMU's own data structures. */
628 if (type == TCG_TYPE_I64) {
629 tcg_target_long disp = value - (intptr_t)s->code_ptr;
630 if (disp == sextract64(disp, 0, 21)) {
631 tcg_out_insn(s, 3406, ADR, rd, disp);
632 return;
633 }
634 disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
635 if (disp == sextract64(disp, 0, 21)) {
636 tcg_out_insn(s, 3406, ADRP, rd, disp);
637 if (value & 0xfff) {
638 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
639 }
640 return;
641 }
642 }
643
644 /* Would it take fewer insns to begin with MOVN? */
645 if (ctpop64(value) >= 32) {
646 t0 = ivalue;
647 opc = I3405_MOVN;
648 } else {
649 t0 = value;
650 opc = I3405_MOVZ;
651 }
652 s0 = ctz64(t0) & (63 & -16);
653 t1 = t0 & ~(0xffffUL << s0);
654 s1 = ctz64(t1) & (63 & -16);
655 t2 = t1 & ~(0xffffUL << s1);
656 if (t2 == 0) {
657 tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
658 if (t1 != 0) {
659 tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
660 }
661 return;
662 }
663
664 /* For more than 2 insns, dump it into the constant pool. */
665 new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
666 tcg_out_insn(s, 3305, LDR, 0, rd);
667 }
668
669 /* Define something more legible for general use. */
670 #define tcg_out_ldst_r tcg_out_insn_3310
671
672 static void tcg_out_ldst(TCGContext *s, AArch64Insn insn,
673 TCGReg rd, TCGReg rn, intptr_t offset)
674 {
675 TCGMemOp size = (uint32_t)insn >> 30;
676
677 /* If the offset is naturally aligned and in range, then we can
678 use the scaled uimm12 encoding */
679 if (offset >= 0 && !(offset & ((1 << size) - 1))) {
680 uintptr_t scaled_uimm = offset >> size;
681 if (scaled_uimm <= 0xfff) {
682 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
683 return;
684 }
685 }
686
687 /* Small signed offsets can use the unscaled encoding. */
688 if (offset >= -256 && offset < 256) {
689 tcg_out_insn_3312(s, insn, rd, rn, offset);
690 return;
691 }
692
693 /* Worst-case scenario, move offset to temp register, use reg offset. */
694 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
695 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
696 }
697
698 static inline void tcg_out_mov(TCGContext *s,
699 TCGType type, TCGReg ret, TCGReg arg)
700 {
701 if (ret != arg) {
702 tcg_out_movr(s, type, ret, arg);
703 }
704 }
705
706 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
707 TCGReg arg1, intptr_t arg2)
708 {
709 tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_LDRW : I3312_LDRX,
710 arg, arg1, arg2);
711 }
712
713 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
714 TCGReg arg1, intptr_t arg2)
715 {
716 tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_STRW : I3312_STRX,
717 arg, arg1, arg2);
718 }
719
720 static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
721 TCGReg base, intptr_t ofs)
722 {
723 if (val == 0) {
724 tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
725 return true;
726 }
727 return false;
728 }
729
730 static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
731 TCGReg rn, unsigned int a, unsigned int b)
732 {
733 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
734 }
735
736 static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
737 TCGReg rn, unsigned int a, unsigned int b)
738 {
739 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
740 }
741
742 static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
743 TCGReg rn, unsigned int a, unsigned int b)
744 {
745 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
746 }
747
748 static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
749 TCGReg rn, TCGReg rm, unsigned int a)
750 {
751 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
752 }
753
754 static inline void tcg_out_shl(TCGContext *s, TCGType ext,
755 TCGReg rd, TCGReg rn, unsigned int m)
756 {
757 int bits = ext ? 64 : 32;
758 int max = bits - 1;
759 tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
760 }
761
762 static inline void tcg_out_shr(TCGContext *s, TCGType ext,
763 TCGReg rd, TCGReg rn, unsigned int m)
764 {
765 int max = ext ? 63 : 31;
766 tcg_out_ubfm(s, ext, rd, rn, m & max, max);
767 }
768
769 static inline void tcg_out_sar(TCGContext *s, TCGType ext,
770 TCGReg rd, TCGReg rn, unsigned int m)
771 {
772 int max = ext ? 63 : 31;
773 tcg_out_sbfm(s, ext, rd, rn, m & max, max);
774 }
775
776 static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
777 TCGReg rd, TCGReg rn, unsigned int m)
778 {
779 int max = ext ? 63 : 31;
780 tcg_out_extr(s, ext, rd, rn, rn, m & max);
781 }
782
783 static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
784 TCGReg rd, TCGReg rn, unsigned int m)
785 {
786 int bits = ext ? 64 : 32;
787 int max = bits - 1;
788 tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
789 }
790
791 static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
792 TCGReg rn, unsigned lsb, unsigned width)
793 {
794 unsigned size = ext ? 64 : 32;
795 unsigned a = (size - lsb) & (size - 1);
796 unsigned b = width - 1;
797 tcg_out_bfm(s, ext, rd, rn, a, b);
798 }
799
800 static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
801 tcg_target_long b, bool const_b)
802 {
803 if (const_b) {
804 /* Using CMP or CMN aliases. */
805 if (b >= 0) {
806 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
807 } else {
808 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
809 }
810 } else {
811 /* Using CMP alias SUBS wzr, Wn, Wm */
812 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
813 }
814 }
815
816 static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target)
817 {
818 ptrdiff_t offset = target - s->code_ptr;
819 tcg_debug_assert(offset == sextract64(offset, 0, 26));
820 tcg_out_insn(s, 3206, B, offset);
821 }
822
823 static inline void tcg_out_goto_long(TCGContext *s, tcg_insn_unit *target)
824 {
825 ptrdiff_t offset = target - s->code_ptr;
826 if (offset == sextract64(offset, 0, 26)) {
827 tcg_out_insn(s, 3206, BL, offset);
828 } else {
829 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
830 tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
831 }
832 }
833
834 static inline void tcg_out_goto_noaddr(TCGContext *s)
835 {
836 /* We pay attention here to not modify the branch target by reading from
837 the buffer. This ensure that caches and memory are kept coherent during
838 retranslation. Mask away possible garbage in the high bits for the
839 first translation, while keeping the offset bits for retranslation. */
840 uint32_t old = tcg_in32(s);
841 tcg_out_insn(s, 3206, B, old);
842 }
843
844 static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c)
845 {
846 /* See comments in tcg_out_goto_noaddr. */
847 uint32_t old = tcg_in32(s) >> 5;
848 tcg_out_insn(s, 3202, B_C, c, old);
849 }
850
851 static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
852 {
853 tcg_out_insn(s, 3207, BLR, reg);
854 }
855
856 static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
857 {
858 ptrdiff_t offset = target - s->code_ptr;
859 if (offset == sextract64(offset, 0, 26)) {
860 tcg_out_insn(s, 3206, BL, offset);
861 } else {
862 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
863 tcg_out_callr(s, TCG_REG_TMP);
864 }
865 }
866
867 void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
868 uintptr_t addr)
869 {
870 tcg_insn_unit i1, i2;
871 TCGType rt = TCG_TYPE_I64;
872 TCGReg rd = TCG_REG_TMP;
873 uint64_t pair;
874
875 ptrdiff_t offset = addr - jmp_addr;
876
877 if (offset == sextract64(offset, 0, 26)) {
878 i1 = I3206_B | ((offset >> 2) & 0x3ffffff);
879 i2 = NOP;
880 } else {
881 offset = (addr >> 12) - (jmp_addr >> 12);
882
883 /* patch ADRP */
884 i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd;
885 /* patch ADDI */
886 i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd;
887 }
888 pair = (uint64_t)i2 << 32 | i1;
889 atomic_set((uint64_t *)jmp_addr, pair);
890 flush_icache_range(jmp_addr, jmp_addr + 8);
891 }
892
893 static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
894 {
895 if (!l->has_value) {
896 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
897 tcg_out_goto_noaddr(s);
898 } else {
899 tcg_out_goto(s, l->u.value_ptr);
900 }
901 }
902
903 static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
904 TCGArg b, bool b_const, TCGLabel *l)
905 {
906 intptr_t offset;
907 bool need_cmp;
908
909 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
910 need_cmp = false;
911 } else {
912 need_cmp = true;
913 tcg_out_cmp(s, ext, a, b, b_const);
914 }
915
916 if (!l->has_value) {
917 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
918 offset = tcg_in32(s) >> 5;
919 } else {
920 offset = l->u.value_ptr - s->code_ptr;
921 tcg_debug_assert(offset == sextract64(offset, 0, 19));
922 }
923
924 if (need_cmp) {
925 tcg_out_insn(s, 3202, B_C, c, offset);
926 } else if (c == TCG_COND_EQ) {
927 tcg_out_insn(s, 3201, CBZ, ext, a, offset);
928 } else {
929 tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
930 }
931 }
932
933 static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
934 {
935 tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
936 }
937
938 static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
939 {
940 tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
941 }
942
943 static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
944 {
945 tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
946 }
947
948 static inline void tcg_out_sxt(TCGContext *s, TCGType ext, TCGMemOp s_bits,
949 TCGReg rd, TCGReg rn)
950 {
951 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
952 int bits = (8 << s_bits) - 1;
953 tcg_out_sbfm(s, ext, rd, rn, 0, bits);
954 }
955
956 static inline void tcg_out_uxt(TCGContext *s, TCGMemOp s_bits,
957 TCGReg rd, TCGReg rn)
958 {
959 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
960 int bits = (8 << s_bits) - 1;
961 tcg_out_ubfm(s, 0, rd, rn, 0, bits);
962 }
963
964 static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
965 TCGReg rn, int64_t aimm)
966 {
967 if (aimm >= 0) {
968 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
969 } else {
970 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
971 }
972 }
973
974 static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
975 TCGReg rh, TCGReg al, TCGReg ah,
976 tcg_target_long bl, tcg_target_long bh,
977 bool const_bl, bool const_bh, bool sub)
978 {
979 TCGReg orig_rl = rl;
980 AArch64Insn insn;
981
982 if (rl == ah || (!const_bh && rl == bh)) {
983 rl = TCG_REG_TMP;
984 }
985
986 if (const_bl) {
987 insn = I3401_ADDSI;
988 if ((bl < 0) ^ sub) {
989 insn = I3401_SUBSI;
990 bl = -bl;
991 }
992 if (unlikely(al == TCG_REG_XZR)) {
993 /* ??? We want to allow al to be zero for the benefit of
994 negation via subtraction. However, that leaves open the
995 possibility of adding 0+const in the low part, and the
996 immediate add instructions encode XSP not XZR. Don't try
997 anything more elaborate here than loading another zero. */
998 al = TCG_REG_TMP;
999 tcg_out_movi(s, ext, al, 0);
1000 }
1001 tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1002 } else {
1003 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1004 }
1005
1006 insn = I3503_ADC;
1007 if (const_bh) {
1008 /* Note that the only two constants we support are 0 and -1, and
1009 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
1010 if ((bh != 0) ^ sub) {
1011 insn = I3503_SBC;
1012 }
1013 bh = TCG_REG_XZR;
1014 } else if (sub) {
1015 insn = I3503_SBC;
1016 }
1017 tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1018
1019 tcg_out_mov(s, ext, orig_rl, rl);
1020 }
1021
1022 static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1023 {
1024 static const uint32_t sync[] = {
1025 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST,
1026 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST,
1027 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1028 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD,
1029 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1030 };
1031 tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1032 }
1033
1034 static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1035 TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1036 {
1037 TCGReg a1 = a0;
1038 if (is_ctz) {
1039 a1 = TCG_REG_TMP;
1040 tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1041 }
1042 if (const_b && b == (ext ? 64 : 32)) {
1043 tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1044 } else {
1045 AArch64Insn sel = I3506_CSEL;
1046
1047 tcg_out_cmp(s, ext, a0, 0, 1);
1048 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1049
1050 if (const_b) {
1051 if (b == -1) {
1052 b = TCG_REG_XZR;
1053 sel = I3506_CSINV;
1054 } else if (b == 0) {
1055 b = TCG_REG_XZR;
1056 } else {
1057 tcg_out_movi(s, ext, d, b);
1058 b = d;
1059 }
1060 }
1061 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1062 }
1063 }
1064
1065 #ifdef CONFIG_SOFTMMU
1066 #include "tcg-ldst.inc.c"
1067
1068 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1069 * TCGMemOpIdx oi, uintptr_t ra)
1070 */
1071 static void * const qemu_ld_helpers[16] = {
1072 [MO_UB] = helper_ret_ldub_mmu,
1073 [MO_LEUW] = helper_le_lduw_mmu,
1074 [MO_LEUL] = helper_le_ldul_mmu,
1075 [MO_LEQ] = helper_le_ldq_mmu,
1076 [MO_BEUW] = helper_be_lduw_mmu,
1077 [MO_BEUL] = helper_be_ldul_mmu,
1078 [MO_BEQ] = helper_be_ldq_mmu,
1079 };
1080
1081 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1082 * uintxx_t val, TCGMemOpIdx oi,
1083 * uintptr_t ra)
1084 */
1085 static void * const qemu_st_helpers[16] = {
1086 [MO_UB] = helper_ret_stb_mmu,
1087 [MO_LEUW] = helper_le_stw_mmu,
1088 [MO_LEUL] = helper_le_stl_mmu,
1089 [MO_LEQ] = helper_le_stq_mmu,
1090 [MO_BEUW] = helper_be_stw_mmu,
1091 [MO_BEUL] = helper_be_stl_mmu,
1092 [MO_BEQ] = helper_be_stq_mmu,
1093 };
1094
1095 static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target)
1096 {
1097 ptrdiff_t offset = tcg_pcrel_diff(s, target);
1098 tcg_debug_assert(offset == sextract64(offset, 0, 21));
1099 tcg_out_insn(s, 3406, ADR, rd, offset);
1100 }
1101
1102 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1103 {
1104 TCGMemOpIdx oi = lb->oi;
1105 TCGMemOp opc = get_memop(oi);
1106 TCGMemOp size = opc & MO_SIZE;
1107
1108 reloc_pc19(lb->label_ptr[0], s->code_ptr);
1109
1110 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1111 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1112 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1113 tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1114 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1115 if (opc & MO_SIGN) {
1116 tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1117 } else {
1118 tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1119 }
1120
1121 tcg_out_goto(s, lb->raddr);
1122 }
1123
1124 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1125 {
1126 TCGMemOpIdx oi = lb->oi;
1127 TCGMemOp opc = get_memop(oi);
1128 TCGMemOp size = opc & MO_SIZE;
1129
1130 reloc_pc19(lb->label_ptr[0], s->code_ptr);
1131
1132 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1133 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1134 tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1135 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1136 tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1137 tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1138 tcg_out_goto(s, lb->raddr);
1139 }
1140
1141 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1142 TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1143 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1144 {
1145 TCGLabelQemuLdst *label = new_ldst_label(s);
1146
1147 label->is_ld = is_ld;
1148 label->oi = oi;
1149 label->type = ext;
1150 label->datalo_reg = data_reg;
1151 label->addrlo_reg = addr_reg;
1152 label->raddr = raddr;
1153 label->label_ptr[0] = label_ptr;
1154 }
1155
1156 /* Load and compare a TLB entry, emitting the conditional jump to the
1157 slow path for the failure case, which will be patched later when finalizing
1158 the slow path. Generated code returns the host addend in X1,
1159 clobbers X0,X2,X3,TMP. */
1160 static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc,
1161 tcg_insn_unit **label_ptr, int mem_index,
1162 bool is_read)
1163 {
1164 int tlb_offset = is_read ?
1165 offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
1166 : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
1167 unsigned a_bits = get_alignment_bits(opc);
1168 unsigned s_bits = opc & MO_SIZE;
1169 unsigned a_mask = (1u << a_bits) - 1;
1170 unsigned s_mask = (1u << s_bits) - 1;
1171 TCGReg base = TCG_AREG0, x3;
1172 uint64_t tlb_mask;
1173
1174 /* For aligned accesses, we check the first byte and include the alignment
1175 bits within the address. For unaligned access, we check that we don't
1176 cross pages using the address of the last byte of the access. */
1177 if (a_bits >= s_bits) {
1178 x3 = addr_reg;
1179 } else {
1180 tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1181 TCG_REG_X3, addr_reg, s_mask - a_mask);
1182 x3 = TCG_REG_X3;
1183 }
1184 tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1185
1186 /* Extract the TLB index from the address into X0.
1187 X0<CPU_TLB_BITS:0> =
1188 addr_reg<TARGET_PAGE_BITS+CPU_TLB_BITS:TARGET_PAGE_BITS> */
1189 tcg_out_ubfm(s, TARGET_LONG_BITS == 64, TCG_REG_X0, addr_reg,
1190 TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS);
1191
1192 /* Store the page mask part of the address into X3. */
1193 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1194 TCG_REG_X3, x3, tlb_mask);
1195
1196 /* Add any "high bits" from the tlb offset to the env address into X2,
1197 to take advantage of the LSL12 form of the ADDI instruction.
1198 X2 = env + (tlb_offset & 0xfff000) */
1199 if (tlb_offset & 0xfff000) {
1200 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_X2, base,
1201 tlb_offset & 0xfff000);
1202 base = TCG_REG_X2;
1203 }
1204
1205 /* Merge the tlb index contribution into X2.
1206 X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
1207 tcg_out_insn(s, 3502S, ADD_LSL, TCG_TYPE_I64, TCG_REG_X2, base,
1208 TCG_REG_X0, CPU_TLB_ENTRY_BITS);
1209
1210 /* Merge "low bits" from tlb offset, load the tlb comparator into X0.
1211 X0 = load [X2 + (tlb_offset & 0x000fff)] */
1212 tcg_out_ldst(s, TARGET_LONG_BITS == 32 ? I3312_LDRW : I3312_LDRX,
1213 TCG_REG_X0, TCG_REG_X2, tlb_offset & 0xfff);
1214
1215 /* Load the tlb addend. Do that early to avoid stalling.
1216 X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */
1217 tcg_out_ldst(s, I3312_LDRX, TCG_REG_X1, TCG_REG_X2,
1218 (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) -
1219 (is_read ? offsetof(CPUTLBEntry, addr_read)
1220 : offsetof(CPUTLBEntry, addr_write)));
1221
1222 /* Perform the address comparison. */
1223 tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0);
1224
1225 /* If not equal, we jump to the slow path. */
1226 *label_ptr = s->code_ptr;
1227 tcg_out_goto_cond_noaddr(s, TCG_COND_NE);
1228 }
1229
1230 #endif /* CONFIG_SOFTMMU */
1231
1232 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp memop, TCGType ext,
1233 TCGReg data_r, TCGReg addr_r,
1234 TCGType otype, TCGReg off_r)
1235 {
1236 const TCGMemOp bswap = memop & MO_BSWAP;
1237
1238 switch (memop & MO_SSIZE) {
1239 case MO_UB:
1240 tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1241 break;
1242 case MO_SB:
1243 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1244 data_r, addr_r, otype, off_r);
1245 break;
1246 case MO_UW:
1247 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1248 if (bswap) {
1249 tcg_out_rev16(s, data_r, data_r);
1250 }
1251 break;
1252 case MO_SW:
1253 if (bswap) {
1254 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1255 tcg_out_rev16(s, data_r, data_r);
1256 tcg_out_sxt(s, ext, MO_16, data_r, data_r);
1257 } else {
1258 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1259 data_r, addr_r, otype, off_r);
1260 }
1261 break;
1262 case MO_UL:
1263 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1264 if (bswap) {
1265 tcg_out_rev32(s, data_r, data_r);
1266 }
1267 break;
1268 case MO_SL:
1269 if (bswap) {
1270 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1271 tcg_out_rev32(s, data_r, data_r);
1272 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1273 } else {
1274 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1275 }
1276 break;
1277 case MO_Q:
1278 tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1279 if (bswap) {
1280 tcg_out_rev64(s, data_r, data_r);
1281 }
1282 break;
1283 default:
1284 tcg_abort();
1285 }
1286 }
1287
1288 static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp memop,
1289 TCGReg data_r, TCGReg addr_r,
1290 TCGType otype, TCGReg off_r)
1291 {
1292 const TCGMemOp bswap = memop & MO_BSWAP;
1293
1294 switch (memop & MO_SIZE) {
1295 case MO_8:
1296 tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1297 break;
1298 case MO_16:
1299 if (bswap && data_r != TCG_REG_XZR) {
1300 tcg_out_rev16(s, TCG_REG_TMP, data_r);
1301 data_r = TCG_REG_TMP;
1302 }
1303 tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1304 break;
1305 case MO_32:
1306 if (bswap && data_r != TCG_REG_XZR) {
1307 tcg_out_rev32(s, TCG_REG_TMP, data_r);
1308 data_r = TCG_REG_TMP;
1309 }
1310 tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1311 break;
1312 case MO_64:
1313 if (bswap && data_r != TCG_REG_XZR) {
1314 tcg_out_rev64(s, TCG_REG_TMP, data_r);
1315 data_r = TCG_REG_TMP;
1316 }
1317 tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1318 break;
1319 default:
1320 tcg_abort();
1321 }
1322 }
1323
1324 static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1325 TCGMemOpIdx oi, TCGType ext)
1326 {
1327 TCGMemOp memop = get_memop(oi);
1328 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1329 #ifdef CONFIG_SOFTMMU
1330 unsigned mem_index = get_mmuidx(oi);
1331 tcg_insn_unit *label_ptr;
1332
1333 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1334 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1335 TCG_REG_X1, otype, addr_reg);
1336 add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1337 s->code_ptr, label_ptr);
1338 #else /* !CONFIG_SOFTMMU */
1339 if (USE_GUEST_BASE) {
1340 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1341 TCG_REG_GUEST_BASE, otype, addr_reg);
1342 } else {
1343 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1344 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1345 }
1346 #endif /* CONFIG_SOFTMMU */
1347 }
1348
1349 static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1350 TCGMemOpIdx oi)
1351 {
1352 TCGMemOp memop = get_memop(oi);
1353 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1354 #ifdef CONFIG_SOFTMMU
1355 unsigned mem_index = get_mmuidx(oi);
1356 tcg_insn_unit *label_ptr;
1357
1358 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1359 tcg_out_qemu_st_direct(s, memop, data_reg,
1360 TCG_REG_X1, otype, addr_reg);
1361 add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1362 data_reg, addr_reg, s->code_ptr, label_ptr);
1363 #else /* !CONFIG_SOFTMMU */
1364 if (USE_GUEST_BASE) {
1365 tcg_out_qemu_st_direct(s, memop, data_reg,
1366 TCG_REG_GUEST_BASE, otype, addr_reg);
1367 } else {
1368 tcg_out_qemu_st_direct(s, memop, data_reg,
1369 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1370 }
1371 #endif /* CONFIG_SOFTMMU */
1372 }
1373
1374 static tcg_insn_unit *tb_ret_addr;
1375
1376 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1377 const TCGArg args[TCG_MAX_OP_ARGS],
1378 const int const_args[TCG_MAX_OP_ARGS])
1379 {
1380 /* 99% of the time, we can signal the use of extension registers
1381 by looking to see if the opcode handles 64-bit data. */
1382 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1383
1384 /* Hoist the loads of the most common arguments. */
1385 TCGArg a0 = args[0];
1386 TCGArg a1 = args[1];
1387 TCGArg a2 = args[2];
1388 int c2 = const_args[2];
1389
1390 /* Some operands are defined with "rZ" constraint, a register or
1391 the zero register. These need not actually test args[I] == 0. */
1392 #define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1393
1394 switch (opc) {
1395 case INDEX_op_exit_tb:
1396 /* Reuse the zeroing that exists for goto_ptr. */
1397 if (a0 == 0) {
1398 tcg_out_goto_long(s, s->code_gen_epilogue);
1399 } else {
1400 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1401 tcg_out_goto_long(s, tb_ret_addr);
1402 }
1403 break;
1404
1405 case INDEX_op_goto_tb:
1406 if (s->tb_jmp_insn_offset != NULL) {
1407 /* TCG_TARGET_HAS_direct_jump */
1408 /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
1409 write can be used to patch the target address. */
1410 if ((uintptr_t)s->code_ptr & 7) {
1411 tcg_out32(s, NOP);
1412 }
1413 s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1414 /* actual branch destination will be patched by
1415 tb_target_set_jmp_target later. */
1416 tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
1417 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
1418 } else {
1419 /* !TCG_TARGET_HAS_direct_jump */
1420 tcg_debug_assert(s->tb_jmp_target_addr != NULL);
1421 intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2;
1422 tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
1423 }
1424 tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1425 s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s);
1426 break;
1427
1428 case INDEX_op_goto_ptr:
1429 tcg_out_insn(s, 3207, BR, a0);
1430 break;
1431
1432 case INDEX_op_br:
1433 tcg_out_goto_label(s, arg_label(a0));
1434 break;
1435
1436 case INDEX_op_ld8u_i32:
1437 case INDEX_op_ld8u_i64:
1438 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2);
1439 break;
1440 case INDEX_op_ld8s_i32:
1441 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2);
1442 break;
1443 case INDEX_op_ld8s_i64:
1444 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2);
1445 break;
1446 case INDEX_op_ld16u_i32:
1447 case INDEX_op_ld16u_i64:
1448 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2);
1449 break;
1450 case INDEX_op_ld16s_i32:
1451 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2);
1452 break;
1453 case INDEX_op_ld16s_i64:
1454 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2);
1455 break;
1456 case INDEX_op_ld_i32:
1457 case INDEX_op_ld32u_i64:
1458 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2);
1459 break;
1460 case INDEX_op_ld32s_i64:
1461 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2);
1462 break;
1463 case INDEX_op_ld_i64:
1464 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2);
1465 break;
1466
1467 case INDEX_op_st8_i32:
1468 case INDEX_op_st8_i64:
1469 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2);
1470 break;
1471 case INDEX_op_st16_i32:
1472 case INDEX_op_st16_i64:
1473 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2);
1474 break;
1475 case INDEX_op_st_i32:
1476 case INDEX_op_st32_i64:
1477 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2);
1478 break;
1479 case INDEX_op_st_i64:
1480 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2);
1481 break;
1482
1483 case INDEX_op_add_i32:
1484 a2 = (int32_t)a2;
1485 /* FALLTHRU */
1486 case INDEX_op_add_i64:
1487 if (c2) {
1488 tcg_out_addsubi(s, ext, a0, a1, a2);
1489 } else {
1490 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1491 }
1492 break;
1493
1494 case INDEX_op_sub_i32:
1495 a2 = (int32_t)a2;
1496 /* FALLTHRU */
1497 case INDEX_op_sub_i64:
1498 if (c2) {
1499 tcg_out_addsubi(s, ext, a0, a1, -a2);
1500 } else {
1501 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1502 }
1503 break;
1504
1505 case INDEX_op_neg_i64:
1506 case INDEX_op_neg_i32:
1507 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1508 break;
1509
1510 case INDEX_op_and_i32:
1511 a2 = (int32_t)a2;
1512 /* FALLTHRU */
1513 case INDEX_op_and_i64:
1514 if (c2) {
1515 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1516 } else {
1517 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1518 }
1519 break;
1520
1521 case INDEX_op_andc_i32:
1522 a2 = (int32_t)a2;
1523 /* FALLTHRU */
1524 case INDEX_op_andc_i64:
1525 if (c2) {
1526 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
1527 } else {
1528 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
1529 }
1530 break;
1531
1532 case INDEX_op_or_i32:
1533 a2 = (int32_t)a2;
1534 /* FALLTHRU */
1535 case INDEX_op_or_i64:
1536 if (c2) {
1537 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
1538 } else {
1539 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
1540 }
1541 break;
1542
1543 case INDEX_op_orc_i32:
1544 a2 = (int32_t)a2;
1545 /* FALLTHRU */
1546 case INDEX_op_orc_i64:
1547 if (c2) {
1548 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
1549 } else {
1550 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
1551 }
1552 break;
1553
1554 case INDEX_op_xor_i32:
1555 a2 = (int32_t)a2;
1556 /* FALLTHRU */
1557 case INDEX_op_xor_i64:
1558 if (c2) {
1559 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
1560 } else {
1561 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
1562 }
1563 break;
1564
1565 case INDEX_op_eqv_i32:
1566 a2 = (int32_t)a2;
1567 /* FALLTHRU */
1568 case INDEX_op_eqv_i64:
1569 if (c2) {
1570 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
1571 } else {
1572 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
1573 }
1574 break;
1575
1576 case INDEX_op_not_i64:
1577 case INDEX_op_not_i32:
1578 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
1579 break;
1580
1581 case INDEX_op_mul_i64:
1582 case INDEX_op_mul_i32:
1583 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
1584 break;
1585
1586 case INDEX_op_div_i64:
1587 case INDEX_op_div_i32:
1588 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
1589 break;
1590 case INDEX_op_divu_i64:
1591 case INDEX_op_divu_i32:
1592 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
1593 break;
1594
1595 case INDEX_op_rem_i64:
1596 case INDEX_op_rem_i32:
1597 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
1598 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1599 break;
1600 case INDEX_op_remu_i64:
1601 case INDEX_op_remu_i32:
1602 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
1603 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1604 break;
1605
1606 case INDEX_op_shl_i64:
1607 case INDEX_op_shl_i32:
1608 if (c2) {
1609 tcg_out_shl(s, ext, a0, a1, a2);
1610 } else {
1611 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
1612 }
1613 break;
1614
1615 case INDEX_op_shr_i64:
1616 case INDEX_op_shr_i32:
1617 if (c2) {
1618 tcg_out_shr(s, ext, a0, a1, a2);
1619 } else {
1620 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
1621 }
1622 break;
1623
1624 case INDEX_op_sar_i64:
1625 case INDEX_op_sar_i32:
1626 if (c2) {
1627 tcg_out_sar(s, ext, a0, a1, a2);
1628 } else {
1629 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
1630 }
1631 break;
1632
1633 case INDEX_op_rotr_i64:
1634 case INDEX_op_rotr_i32:
1635 if (c2) {
1636 tcg_out_rotr(s, ext, a0, a1, a2);
1637 } else {
1638 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
1639 }
1640 break;
1641
1642 case INDEX_op_rotl_i64:
1643 case INDEX_op_rotl_i32:
1644 if (c2) {
1645 tcg_out_rotl(s, ext, a0, a1, a2);
1646 } else {
1647 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
1648 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
1649 }
1650 break;
1651
1652 case INDEX_op_clz_i64:
1653 case INDEX_op_clz_i32:
1654 tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
1655 break;
1656 case INDEX_op_ctz_i64:
1657 case INDEX_op_ctz_i32:
1658 tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
1659 break;
1660
1661 case INDEX_op_brcond_i32:
1662 a1 = (int32_t)a1;
1663 /* FALLTHRU */
1664 case INDEX_op_brcond_i64:
1665 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
1666 break;
1667
1668 case INDEX_op_setcond_i32:
1669 a2 = (int32_t)a2;
1670 /* FALLTHRU */
1671 case INDEX_op_setcond_i64:
1672 tcg_out_cmp(s, ext, a1, a2, c2);
1673 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */
1674 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
1675 TCG_REG_XZR, tcg_invert_cond(args[3]));
1676 break;
1677
1678 case INDEX_op_movcond_i32:
1679 a2 = (int32_t)a2;
1680 /* FALLTHRU */
1681 case INDEX_op_movcond_i64:
1682 tcg_out_cmp(s, ext, a1, a2, c2);
1683 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
1684 break;
1685
1686 case INDEX_op_qemu_ld_i32:
1687 case INDEX_op_qemu_ld_i64:
1688 tcg_out_qemu_ld(s, a0, a1, a2, ext);
1689 break;
1690 case INDEX_op_qemu_st_i32:
1691 case INDEX_op_qemu_st_i64:
1692 tcg_out_qemu_st(s, REG0(0), a1, a2);
1693 break;
1694
1695 case INDEX_op_bswap64_i64:
1696 tcg_out_rev64(s, a0, a1);
1697 break;
1698 case INDEX_op_bswap32_i64:
1699 case INDEX_op_bswap32_i32:
1700 tcg_out_rev32(s, a0, a1);
1701 break;
1702 case INDEX_op_bswap16_i64:
1703 case INDEX_op_bswap16_i32:
1704 tcg_out_rev16(s, a0, a1);
1705 break;
1706
1707 case INDEX_op_ext8s_i64:
1708 case INDEX_op_ext8s_i32:
1709 tcg_out_sxt(s, ext, MO_8, a0, a1);
1710 break;
1711 case INDEX_op_ext16s_i64:
1712 case INDEX_op_ext16s_i32:
1713 tcg_out_sxt(s, ext, MO_16, a0, a1);
1714 break;
1715 case INDEX_op_ext_i32_i64:
1716 case INDEX_op_ext32s_i64:
1717 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
1718 break;
1719 case INDEX_op_ext8u_i64:
1720 case INDEX_op_ext8u_i32:
1721 tcg_out_uxt(s, MO_8, a0, a1);
1722 break;
1723 case INDEX_op_ext16u_i64:
1724 case INDEX_op_ext16u_i32:
1725 tcg_out_uxt(s, MO_16, a0, a1);
1726 break;
1727 case INDEX_op_extu_i32_i64:
1728 case INDEX_op_ext32u_i64:
1729 tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
1730 break;
1731
1732 case INDEX_op_deposit_i64:
1733 case INDEX_op_deposit_i32:
1734 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
1735 break;
1736
1737 case INDEX_op_extract_i64:
1738 case INDEX_op_extract_i32:
1739 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
1740 break;
1741
1742 case INDEX_op_sextract_i64:
1743 case INDEX_op_sextract_i32:
1744 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
1745 break;
1746
1747 case INDEX_op_add2_i32:
1748 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
1749 (int32_t)args[4], args[5], const_args[4],
1750 const_args[5], false);
1751 break;
1752 case INDEX_op_add2_i64:
1753 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
1754 args[5], const_args[4], const_args[5], false);
1755 break;
1756 case INDEX_op_sub2_i32:
1757 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
1758 (int32_t)args[4], args[5], const_args[4],
1759 const_args[5], true);
1760 break;
1761 case INDEX_op_sub2_i64:
1762 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
1763 args[5], const_args[4], const_args[5], true);
1764 break;
1765
1766 case INDEX_op_muluh_i64:
1767 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
1768 break;
1769 case INDEX_op_mulsh_i64:
1770 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
1771 break;
1772
1773 case INDEX_op_mb:
1774 tcg_out_mb(s, a0);
1775 break;
1776
1777 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
1778 case INDEX_op_mov_i64:
1779 case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
1780 case INDEX_op_movi_i64:
1781 case INDEX_op_call: /* Always emitted via tcg_out_call. */
1782 default:
1783 tcg_abort();
1784 }
1785
1786 #undef REG0
1787 }
1788
1789 static const TCGTargetOpDef aarch64_op_defs[] = {
1790 { INDEX_op_exit_tb, { } },
1791 { INDEX_op_goto_tb, { } },
1792 { INDEX_op_br, { } },
1793 { INDEX_op_goto_ptr, { "r" } },
1794
1795 { INDEX_op_ld8u_i32, { "r", "r" } },
1796 { INDEX_op_ld8s_i32, { "r", "r" } },
1797 { INDEX_op_ld16u_i32, { "r", "r" } },
1798 { INDEX_op_ld16s_i32, { "r", "r" } },
1799 { INDEX_op_ld_i32, { "r", "r" } },
1800 { INDEX_op_ld8u_i64, { "r", "r" } },
1801 { INDEX_op_ld8s_i64, { "r", "r" } },
1802 { INDEX_op_ld16u_i64, { "r", "r" } },
1803 { INDEX_op_ld16s_i64, { "r", "r" } },
1804 { INDEX_op_ld32u_i64, { "r", "r" } },
1805 { INDEX_op_ld32s_i64, { "r", "r" } },
1806 { INDEX_op_ld_i64, { "r", "r" } },
1807
1808 { INDEX_op_st8_i32, { "rZ", "r" } },
1809 { INDEX_op_st16_i32, { "rZ", "r" } },
1810 { INDEX_op_st_i32, { "rZ", "r" } },
1811 { INDEX_op_st8_i64, { "rZ", "r" } },
1812 { INDEX_op_st16_i64, { "rZ", "r" } },
1813 { INDEX_op_st32_i64, { "rZ", "r" } },
1814 { INDEX_op_st_i64, { "rZ", "r" } },
1815
1816 { INDEX_op_add_i32, { "r", "r", "rA" } },
1817 { INDEX_op_add_i64, { "r", "r", "rA" } },
1818 { INDEX_op_sub_i32, { "r", "r", "rA" } },
1819 { INDEX_op_sub_i64, { "r", "r", "rA" } },
1820 { INDEX_op_mul_i32, { "r", "r", "r" } },
1821 { INDEX_op_mul_i64, { "r", "r", "r" } },
1822 { INDEX_op_div_i32, { "r", "r", "r" } },
1823 { INDEX_op_div_i64, { "r", "r", "r" } },
1824 { INDEX_op_divu_i32, { "r", "r", "r" } },
1825 { INDEX_op_divu_i64, { "r", "r", "r" } },
1826 { INDEX_op_rem_i32, { "r", "r", "r" } },
1827 { INDEX_op_rem_i64, { "r", "r", "r" } },
1828 { INDEX_op_remu_i32, { "r", "r", "r" } },
1829 { INDEX_op_remu_i64, { "r", "r", "r" } },
1830 { INDEX_op_and_i32, { "r", "r", "rL" } },
1831 { INDEX_op_and_i64, { "r", "r", "rL" } },
1832 { INDEX_op_or_i32, { "r", "r", "rL" } },
1833 { INDEX_op_or_i64, { "r", "r", "rL" } },
1834 { INDEX_op_xor_i32, { "r", "r", "rL" } },
1835 { INDEX_op_xor_i64, { "r", "r", "rL" } },
1836 { INDEX_op_andc_i32, { "r", "r", "rL" } },
1837 { INDEX_op_andc_i64, { "r", "r", "rL" } },
1838 { INDEX_op_orc_i32, { "r", "r", "rL" } },
1839 { INDEX_op_orc_i64, { "r", "r", "rL" } },
1840 { INDEX_op_eqv_i32, { "r", "r", "rL" } },
1841 { INDEX_op_eqv_i64, { "r", "r", "rL" } },
1842
1843 { INDEX_op_neg_i32, { "r", "r" } },
1844 { INDEX_op_neg_i64, { "r", "r" } },
1845 { INDEX_op_not_i32, { "r", "r" } },
1846 { INDEX_op_not_i64, { "r", "r" } },
1847
1848 { INDEX_op_shl_i32, { "r", "r", "ri" } },
1849 { INDEX_op_shr_i32, { "r", "r", "ri" } },
1850 { INDEX_op_sar_i32, { "r", "r", "ri" } },
1851 { INDEX_op_rotl_i32, { "r", "r", "ri" } },
1852 { INDEX_op_rotr_i32, { "r", "r", "ri" } },
1853 { INDEX_op_clz_i32, { "r", "r", "rAL" } },
1854 { INDEX_op_ctz_i32, { "r", "r", "rAL" } },
1855 { INDEX_op_shl_i64, { "r", "r", "ri" } },
1856 { INDEX_op_shr_i64, { "r", "r", "ri" } },
1857 { INDEX_op_sar_i64, { "r", "r", "ri" } },
1858 { INDEX_op_rotl_i64, { "r", "r", "ri" } },
1859 { INDEX_op_rotr_i64, { "r", "r", "ri" } },
1860 { INDEX_op_clz_i64, { "r", "r", "rAL" } },
1861 { INDEX_op_ctz_i64, { "r", "r", "rAL" } },
1862
1863 { INDEX_op_brcond_i32, { "r", "rA" } },
1864 { INDEX_op_brcond_i64, { "r", "rA" } },
1865 { INDEX_op_setcond_i32, { "r", "r", "rA" } },
1866 { INDEX_op_setcond_i64, { "r", "r", "rA" } },
1867 { INDEX_op_movcond_i32, { "r", "r", "rA", "rZ", "rZ" } },
1868 { INDEX_op_movcond_i64, { "r", "r", "rA", "rZ", "rZ" } },
1869
1870 { INDEX_op_qemu_ld_i32, { "r", "l" } },
1871 { INDEX_op_qemu_ld_i64, { "r", "l" } },
1872 { INDEX_op_qemu_st_i32, { "lZ", "l" } },
1873 { INDEX_op_qemu_st_i64, { "lZ", "l" } },
1874
1875 { INDEX_op_bswap16_i32, { "r", "r" } },
1876 { INDEX_op_bswap32_i32, { "r", "r" } },
1877 { INDEX_op_bswap16_i64, { "r", "r" } },
1878 { INDEX_op_bswap32_i64, { "r", "r" } },
1879 { INDEX_op_bswap64_i64, { "r", "r" } },
1880
1881 { INDEX_op_ext8s_i32, { "r", "r" } },
1882 { INDEX_op_ext16s_i32, { "r", "r" } },
1883 { INDEX_op_ext8u_i32, { "r", "r" } },
1884 { INDEX_op_ext16u_i32, { "r", "r" } },
1885
1886 { INDEX_op_ext8s_i64, { "r", "r" } },
1887 { INDEX_op_ext16s_i64, { "r", "r" } },
1888 { INDEX_op_ext32s_i64, { "r", "r" } },
1889 { INDEX_op_ext8u_i64, { "r", "r" } },
1890 { INDEX_op_ext16u_i64, { "r", "r" } },
1891 { INDEX_op_ext32u_i64, { "r", "r" } },
1892 { INDEX_op_ext_i32_i64, { "r", "r" } },
1893 { INDEX_op_extu_i32_i64, { "r", "r" } },
1894
1895 { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
1896 { INDEX_op_deposit_i64, { "r", "0", "rZ" } },
1897 { INDEX_op_extract_i32, { "r", "r" } },
1898 { INDEX_op_extract_i64, { "r", "r" } },
1899 { INDEX_op_sextract_i32, { "r", "r" } },
1900 { INDEX_op_sextract_i64, { "r", "r" } },
1901
1902 { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1903 { INDEX_op_add2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1904 { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1905 { INDEX_op_sub2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1906
1907 { INDEX_op_muluh_i64, { "r", "r", "r" } },
1908 { INDEX_op_mulsh_i64, { "r", "r", "r" } },
1909
1910 { INDEX_op_mb, { } },
1911 { -1 },
1912 };
1913
1914 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
1915 {
1916 int i, n = ARRAY_SIZE(aarch64_op_defs);
1917
1918 for (i = 0; i < n; ++i) {
1919 if (aarch64_op_defs[i].op == op) {
1920 return &aarch64_op_defs[i];
1921 }
1922 }
1923 return NULL;
1924 }
1925
1926 static void tcg_target_init(TCGContext *s)
1927 {
1928 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
1929 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
1930
1931 tcg_target_call_clobber_regs = 0xfffffffu;
1932 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
1933 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
1934 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
1935 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
1936 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
1937 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
1938 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
1939 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
1940 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
1941 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
1942 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
1943
1944 s->reserved_regs = 0;
1945 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
1946 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
1947 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
1948 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
1949 }
1950
1951 /* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
1952 #define PUSH_SIZE ((30 - 19 + 1) * 8)
1953
1954 #define FRAME_SIZE \
1955 ((PUSH_SIZE \
1956 + TCG_STATIC_CALL_ARGS_SIZE \
1957 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
1958 + TCG_TARGET_STACK_ALIGN - 1) \
1959 & ~(TCG_TARGET_STACK_ALIGN - 1))
1960
1961 /* We're expecting a 2 byte uleb128 encoded value. */
1962 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
1963
1964 /* We're expecting to use a single ADDI insn. */
1965 QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
1966
1967 static void tcg_target_qemu_prologue(TCGContext *s)
1968 {
1969 TCGReg r;
1970
1971 /* Push (FP, LR) and allocate space for all saved registers. */
1972 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
1973 TCG_REG_SP, -PUSH_SIZE, 1, 1);
1974
1975 /* Set up frame pointer for canonical unwinding. */
1976 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
1977
1978 /* Store callee-preserved regs x19..x28. */
1979 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1980 int ofs = (r - TCG_REG_X19 + 2) * 8;
1981 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
1982 }
1983
1984 /* Make stack space for TCG locals. */
1985 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
1986 FRAME_SIZE - PUSH_SIZE);
1987
1988 /* Inform TCG about how to find TCG locals with register, offset, size. */
1989 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
1990 CPU_TEMP_BUF_NLONGS * sizeof(long));
1991
1992 #if !defined(CONFIG_SOFTMMU)
1993 if (USE_GUEST_BASE) {
1994 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
1995 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
1996 }
1997 #endif
1998
1999 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2000 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
2001
2002 /*
2003 * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2004 * and fall through to the rest of the epilogue.
2005 */
2006 s->code_gen_epilogue = s->code_ptr;
2007 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
2008
2009 /* TB epilogue */
2010 tb_ret_addr = s->code_ptr;
2011
2012 /* Remove TCG locals stack space. */
2013 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2014 FRAME_SIZE - PUSH_SIZE);
2015
2016 /* Restore registers x19..x28. */
2017 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2018 int ofs = (r - TCG_REG_X19 + 2) * 8;
2019 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2020 }
2021
2022 /* Pop (FP, LR), restore SP to previous frame. */
2023 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
2024 TCG_REG_SP, PUSH_SIZE, 0, 1);
2025 tcg_out_insn(s, 3207, RET, TCG_REG_LR);
2026 }
2027
2028 static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2029 {
2030 int i;
2031 for (i = 0; i < count; ++i) {
2032 p[i] = NOP;
2033 }
2034 }
2035
2036 typedef struct {
2037 DebugFrameHeader h;
2038 uint8_t fde_def_cfa[4];
2039 uint8_t fde_reg_ofs[24];
2040 } DebugFrame;
2041
2042 #define ELF_HOST_MACHINE EM_AARCH64
2043
2044 static const DebugFrame debug_frame = {
2045 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2046 .h.cie.id = -1,
2047 .h.cie.version = 1,
2048 .h.cie.code_align = 1,
2049 .h.cie.data_align = 0x78, /* sleb128 -8 */
2050 .h.cie.return_column = TCG_REG_LR,
2051
2052 /* Total FDE size does not include the "len" member. */
2053 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2054
2055 .fde_def_cfa = {
2056 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */
2057 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2058 (FRAME_SIZE >> 7)
2059 },
2060 .fde_reg_ofs = {
2061 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */
2062 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */
2063 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */
2064 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */
2065 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */
2066 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */
2067 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */
2068 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */
2069 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */
2070 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */
2071 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */
2072 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */
2073 }
2074 };
2075
2076 void tcg_register_jit(void *buf, size_t buf_size)
2077 {
2078 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2079 }