]> git.proxmox.com Git - mirror_qemu.git/blob - tcg/aarch64/tcg-target.inc.c
Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20170605' into staging
[mirror_qemu.git] / tcg / aarch64 / tcg-target.inc.c
1 /*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13 #include "tcg-be-ldst.h"
14 #include "qemu/bitops.h"
15
16 /* We're going to re-use TCGType in setting of the SF bit, which controls
17 the size of the operation performed. If we know the values match, it
18 makes things much cleaner. */
19 QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
20
21 #ifdef CONFIG_DEBUG_TCG
22 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
23 "%x0", "%x1", "%x2", "%x3", "%x4", "%x5", "%x6", "%x7",
24 "%x8", "%x9", "%x10", "%x11", "%x12", "%x13", "%x14", "%x15",
25 "%x16", "%x17", "%x18", "%x19", "%x20", "%x21", "%x22", "%x23",
26 "%x24", "%x25", "%x26", "%x27", "%x28", "%fp", "%x30", "%sp",
27 };
28 #endif /* CONFIG_DEBUG_TCG */
29
30 static const int tcg_target_reg_alloc_order[] = {
31 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
32 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
33 TCG_REG_X28, /* we will reserve this for guest_base if configured */
34
35 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
36 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
37 TCG_REG_X16, TCG_REG_X17,
38
39 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
40 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
41
42 /* X18 reserved by system */
43 /* X19 reserved for AREG0 */
44 /* X29 reserved as fp */
45 /* X30 reserved as temporary */
46 };
47
48 static const int tcg_target_call_iarg_regs[8] = {
49 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
50 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
51 };
52 static const int tcg_target_call_oarg_regs[1] = {
53 TCG_REG_X0
54 };
55
56 #define TCG_REG_TMP TCG_REG_X30
57
58 #ifndef CONFIG_SOFTMMU
59 /* Note that XZR cannot be encoded in the address base register slot,
60 as that actaully encodes SP. So if we need to zero-extend the guest
61 address, via the address index register slot, we need to load even
62 a zero guest base into a register. */
63 #define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32)
64 #define TCG_REG_GUEST_BASE TCG_REG_X28
65 #endif
66
67 static inline void reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
68 {
69 ptrdiff_t offset = target - code_ptr;
70 tcg_debug_assert(offset == sextract64(offset, 0, 26));
71 /* read instruction, mask away previous PC_REL26 parameter contents,
72 set the proper offset, then write back the instruction. */
73 *code_ptr = deposit32(*code_ptr, 0, 26, offset);
74 }
75
76 static inline void reloc_pc26_atomic(tcg_insn_unit *code_ptr,
77 tcg_insn_unit *target)
78 {
79 ptrdiff_t offset = target - code_ptr;
80 tcg_insn_unit insn;
81 tcg_debug_assert(offset == sextract64(offset, 0, 26));
82 /* read instruction, mask away previous PC_REL26 parameter contents,
83 set the proper offset, then write back the instruction. */
84 insn = atomic_read(code_ptr);
85 atomic_set(code_ptr, deposit32(insn, 0, 26, offset));
86 }
87
88 static inline void reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
89 {
90 ptrdiff_t offset = target - code_ptr;
91 tcg_debug_assert(offset == sextract64(offset, 0, 19));
92 *code_ptr = deposit32(*code_ptr, 5, 19, offset);
93 }
94
95 static inline void patch_reloc(tcg_insn_unit *code_ptr, int type,
96 intptr_t value, intptr_t addend)
97 {
98 tcg_debug_assert(addend == 0);
99 switch (type) {
100 case R_AARCH64_JUMP26:
101 case R_AARCH64_CALL26:
102 reloc_pc26(code_ptr, (tcg_insn_unit *)value);
103 break;
104 case R_AARCH64_CONDBR19:
105 reloc_pc19(code_ptr, (tcg_insn_unit *)value);
106 break;
107 default:
108 tcg_abort();
109 }
110 }
111
112 #define TCG_CT_CONST_AIMM 0x100
113 #define TCG_CT_CONST_LIMM 0x200
114 #define TCG_CT_CONST_ZERO 0x400
115 #define TCG_CT_CONST_MONE 0x800
116
117 /* parse target specific constraints */
118 static const char *target_parse_constraint(TCGArgConstraint *ct,
119 const char *ct_str, TCGType type)
120 {
121 switch (*ct_str++) {
122 case 'r':
123 ct->ct |= TCG_CT_REG;
124 tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
125 break;
126 case 'l': /* qemu_ld / qemu_st address, data_reg */
127 ct->ct |= TCG_CT_REG;
128 tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
129 #ifdef CONFIG_SOFTMMU
130 /* x0 and x1 will be overwritten when reading the tlb entry,
131 and x2, and x3 for helper args, better to avoid using them. */
132 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
133 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
134 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
135 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
136 #endif
137 break;
138 case 'A': /* Valid for arithmetic immediate (positive or negative). */
139 ct->ct |= TCG_CT_CONST_AIMM;
140 break;
141 case 'L': /* Valid for logical immediate. */
142 ct->ct |= TCG_CT_CONST_LIMM;
143 break;
144 case 'M': /* minus one */
145 ct->ct |= TCG_CT_CONST_MONE;
146 break;
147 case 'Z': /* zero */
148 ct->ct |= TCG_CT_CONST_ZERO;
149 break;
150 default:
151 return NULL;
152 }
153 return ct_str;
154 }
155
156 static inline bool is_aimm(uint64_t val)
157 {
158 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
159 }
160
161 static inline bool is_limm(uint64_t val)
162 {
163 /* Taking a simplified view of the logical immediates for now, ignoring
164 the replication that can happen across the field. Match bit patterns
165 of the forms
166 0....01....1
167 0..01..10..0
168 and their inverses. */
169
170 /* Make things easier below, by testing the form with msb clear. */
171 if ((int64_t)val < 0) {
172 val = ~val;
173 }
174 if (val == 0) {
175 return false;
176 }
177 val += val & -val;
178 return (val & (val - 1)) == 0;
179 }
180
181 static int tcg_target_const_match(tcg_target_long val, TCGType type,
182 const TCGArgConstraint *arg_ct)
183 {
184 int ct = arg_ct->ct;
185
186 if (ct & TCG_CT_CONST) {
187 return 1;
188 }
189 if (type == TCG_TYPE_I32) {
190 val = (int32_t)val;
191 }
192 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
193 return 1;
194 }
195 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
196 return 1;
197 }
198 if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
199 return 1;
200 }
201 if ((ct & TCG_CT_CONST_MONE) && val == -1) {
202 return 1;
203 }
204
205 return 0;
206 }
207
208 enum aarch64_cond_code {
209 COND_EQ = 0x0,
210 COND_NE = 0x1,
211 COND_CS = 0x2, /* Unsigned greater or equal */
212 COND_HS = COND_CS, /* ALIAS greater or equal */
213 COND_CC = 0x3, /* Unsigned less than */
214 COND_LO = COND_CC, /* ALIAS Lower */
215 COND_MI = 0x4, /* Negative */
216 COND_PL = 0x5, /* Zero or greater */
217 COND_VS = 0x6, /* Overflow */
218 COND_VC = 0x7, /* No overflow */
219 COND_HI = 0x8, /* Unsigned greater than */
220 COND_LS = 0x9, /* Unsigned less or equal */
221 COND_GE = 0xa,
222 COND_LT = 0xb,
223 COND_GT = 0xc,
224 COND_LE = 0xd,
225 COND_AL = 0xe,
226 COND_NV = 0xf, /* behaves like COND_AL here */
227 };
228
229 static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
230 [TCG_COND_EQ] = COND_EQ,
231 [TCG_COND_NE] = COND_NE,
232 [TCG_COND_LT] = COND_LT,
233 [TCG_COND_GE] = COND_GE,
234 [TCG_COND_LE] = COND_LE,
235 [TCG_COND_GT] = COND_GT,
236 /* unsigned */
237 [TCG_COND_LTU] = COND_LO,
238 [TCG_COND_GTU] = COND_HI,
239 [TCG_COND_GEU] = COND_HS,
240 [TCG_COND_LEU] = COND_LS,
241 };
242
243 typedef enum {
244 LDST_ST = 0, /* store */
245 LDST_LD = 1, /* load */
246 LDST_LD_S_X = 2, /* load and sign-extend into Xt */
247 LDST_LD_S_W = 3, /* load and sign-extend into Wt */
248 } AArch64LdstType;
249
250 /* We encode the format of the insn into the beginning of the name, so that
251 we can have the preprocessor help "typecheck" the insn vs the output
252 function. Arm didn't provide us with nice names for the formats, so we
253 use the section number of the architecture reference manual in which the
254 instruction group is described. */
255 typedef enum {
256 /* Compare and branch (immediate). */
257 I3201_CBZ = 0x34000000,
258 I3201_CBNZ = 0x35000000,
259
260 /* Conditional branch (immediate). */
261 I3202_B_C = 0x54000000,
262
263 /* Unconditional branch (immediate). */
264 I3206_B = 0x14000000,
265 I3206_BL = 0x94000000,
266
267 /* Unconditional branch (register). */
268 I3207_BR = 0xd61f0000,
269 I3207_BLR = 0xd63f0000,
270 I3207_RET = 0xd65f0000,
271
272 /* Load/store register. Described here as 3.3.12, but the helper
273 that emits them can transform to 3.3.10 or 3.3.13. */
274 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
275 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
276 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
277 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
278
279 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
280 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
281 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
282 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
283
284 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
285 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
286
287 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
288 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
289 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
290
291 I3312_TO_I3310 = 0x00200800,
292 I3312_TO_I3313 = 0x01000000,
293
294 /* Load/store register pair instructions. */
295 I3314_LDP = 0x28400000,
296 I3314_STP = 0x28000000,
297
298 /* Add/subtract immediate instructions. */
299 I3401_ADDI = 0x11000000,
300 I3401_ADDSI = 0x31000000,
301 I3401_SUBI = 0x51000000,
302 I3401_SUBSI = 0x71000000,
303
304 /* Bitfield instructions. */
305 I3402_BFM = 0x33000000,
306 I3402_SBFM = 0x13000000,
307 I3402_UBFM = 0x53000000,
308
309 /* Extract instruction. */
310 I3403_EXTR = 0x13800000,
311
312 /* Logical immediate instructions. */
313 I3404_ANDI = 0x12000000,
314 I3404_ORRI = 0x32000000,
315 I3404_EORI = 0x52000000,
316
317 /* Move wide immediate instructions. */
318 I3405_MOVN = 0x12800000,
319 I3405_MOVZ = 0x52800000,
320 I3405_MOVK = 0x72800000,
321
322 /* PC relative addressing instructions. */
323 I3406_ADR = 0x10000000,
324 I3406_ADRP = 0x90000000,
325
326 /* Add/subtract shifted register instructions (without a shift). */
327 I3502_ADD = 0x0b000000,
328 I3502_ADDS = 0x2b000000,
329 I3502_SUB = 0x4b000000,
330 I3502_SUBS = 0x6b000000,
331
332 /* Add/subtract shifted register instructions (with a shift). */
333 I3502S_ADD_LSL = I3502_ADD,
334
335 /* Add/subtract with carry instructions. */
336 I3503_ADC = 0x1a000000,
337 I3503_SBC = 0x5a000000,
338
339 /* Conditional select instructions. */
340 I3506_CSEL = 0x1a800000,
341 I3506_CSINC = 0x1a800400,
342 I3506_CSINV = 0x5a800000,
343 I3506_CSNEG = 0x5a800400,
344
345 /* Data-processing (1 source) instructions. */
346 I3507_CLZ = 0x5ac01000,
347 I3507_RBIT = 0x5ac00000,
348 I3507_REV16 = 0x5ac00400,
349 I3507_REV32 = 0x5ac00800,
350 I3507_REV64 = 0x5ac00c00,
351
352 /* Data-processing (2 source) instructions. */
353 I3508_LSLV = 0x1ac02000,
354 I3508_LSRV = 0x1ac02400,
355 I3508_ASRV = 0x1ac02800,
356 I3508_RORV = 0x1ac02c00,
357 I3508_SMULH = 0x9b407c00,
358 I3508_UMULH = 0x9bc07c00,
359 I3508_UDIV = 0x1ac00800,
360 I3508_SDIV = 0x1ac00c00,
361
362 /* Data-processing (3 source) instructions. */
363 I3509_MADD = 0x1b000000,
364 I3509_MSUB = 0x1b008000,
365
366 /* Logical shifted register instructions (without a shift). */
367 I3510_AND = 0x0a000000,
368 I3510_BIC = 0x0a200000,
369 I3510_ORR = 0x2a000000,
370 I3510_ORN = 0x2a200000,
371 I3510_EOR = 0x4a000000,
372 I3510_EON = 0x4a200000,
373 I3510_ANDS = 0x6a000000,
374
375 /* System instructions. */
376 DMB_ISH = 0xd50338bf,
377 DMB_LD = 0x00000100,
378 DMB_ST = 0x00000200,
379 } AArch64Insn;
380
381 static inline uint32_t tcg_in32(TCGContext *s)
382 {
383 uint32_t v = *(uint32_t *)s->code_ptr;
384 return v;
385 }
386
387 /* Emit an opcode with "type-checking" of the format. */
388 #define tcg_out_insn(S, FMT, OP, ...) \
389 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
390
391 static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
392 TCGReg rt, int imm19)
393 {
394 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
395 }
396
397 static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
398 TCGCond c, int imm19)
399 {
400 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
401 }
402
403 static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
404 {
405 tcg_out32(s, insn | (imm26 & 0x03ffffff));
406 }
407
408 static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
409 {
410 tcg_out32(s, insn | rn << 5);
411 }
412
413 static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
414 TCGReg r1, TCGReg r2, TCGReg rn,
415 tcg_target_long ofs, bool pre, bool w)
416 {
417 insn |= 1u << 31; /* ext */
418 insn |= pre << 24;
419 insn |= w << 23;
420
421 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
422 insn |= (ofs & (0x7f << 3)) << (15 - 3);
423
424 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
425 }
426
427 static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
428 TCGReg rd, TCGReg rn, uint64_t aimm)
429 {
430 if (aimm > 0xfff) {
431 tcg_debug_assert((aimm & 0xfff) == 0);
432 aimm >>= 12;
433 tcg_debug_assert(aimm <= 0xfff);
434 aimm |= 1 << 12; /* apply LSL 12 */
435 }
436 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
437 }
438
439 /* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
440 (Logical immediate). Both insn groups have N, IMMR and IMMS fields
441 that feed the DecodeBitMasks pseudo function. */
442 static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
443 TCGReg rd, TCGReg rn, int n, int immr, int imms)
444 {
445 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
446 | rn << 5 | rd);
447 }
448
449 #define tcg_out_insn_3404 tcg_out_insn_3402
450
451 static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
452 TCGReg rd, TCGReg rn, TCGReg rm, int imms)
453 {
454 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
455 | rn << 5 | rd);
456 }
457
458 /* This function is used for the Move (wide immediate) instruction group.
459 Note that SHIFT is a full shift count, not the 2 bit HW field. */
460 static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
461 TCGReg rd, uint16_t half, unsigned shift)
462 {
463 tcg_debug_assert((shift & ~0x30) == 0);
464 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
465 }
466
467 static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
468 TCGReg rd, int64_t disp)
469 {
470 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
471 }
472
473 /* This function is for both 3.5.2 (Add/Subtract shifted register), for
474 the rare occasion when we actually want to supply a shift amount. */
475 static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
476 TCGType ext, TCGReg rd, TCGReg rn,
477 TCGReg rm, int imm6)
478 {
479 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
480 }
481
482 /* This function is for 3.5.2 (Add/subtract shifted register),
483 and 3.5.10 (Logical shifted register), for the vast majorty of cases
484 when we don't want to apply a shift. Thus it can also be used for
485 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */
486 static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
487 TCGReg rd, TCGReg rn, TCGReg rm)
488 {
489 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
490 }
491
492 #define tcg_out_insn_3503 tcg_out_insn_3502
493 #define tcg_out_insn_3508 tcg_out_insn_3502
494 #define tcg_out_insn_3510 tcg_out_insn_3502
495
496 static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
497 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
498 {
499 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
500 | tcg_cond_to_aarch64[c] << 12);
501 }
502
503 static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
504 TCGReg rd, TCGReg rn)
505 {
506 tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
507 }
508
509 static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
510 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
511 {
512 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
513 }
514
515 static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
516 TCGReg rd, TCGReg base, TCGType ext,
517 TCGReg regoff)
518 {
519 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
520 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
521 0x4000 | ext << 13 | base << 5 | rd);
522 }
523
524 static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
525 TCGReg rd, TCGReg rn, intptr_t offset)
526 {
527 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | rd);
528 }
529
530 static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
531 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
532 {
533 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
534 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 | rn << 5 | rd);
535 }
536
537 /* Register to register move using ORR (shifted register with no shift). */
538 static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
539 {
540 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
541 }
542
543 /* Register to register move using ADDI (move to/from SP). */
544 static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
545 {
546 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
547 }
548
549 /* This function is used for the Logical (immediate) instruction group.
550 The value of LIMM must satisfy IS_LIMM. See the comment above about
551 only supporting simplified logical immediates. */
552 static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
553 TCGReg rd, TCGReg rn, uint64_t limm)
554 {
555 unsigned h, l, r, c;
556
557 tcg_debug_assert(is_limm(limm));
558
559 h = clz64(limm);
560 l = ctz64(limm);
561 if (l == 0) {
562 r = 0; /* form 0....01....1 */
563 c = ctz64(~limm) - 1;
564 if (h == 0) {
565 r = clz64(~limm); /* form 1..10..01..1 */
566 c += r;
567 }
568 } else {
569 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */
570 c = r - h - 1;
571 }
572 if (ext == TCG_TYPE_I32) {
573 r &= 31;
574 c &= 31;
575 }
576
577 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
578 }
579
580 static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
581 tcg_target_long value)
582 {
583 int i, wantinv, shift;
584 tcg_target_long svalue = value;
585 tcg_target_long ivalue = ~value;
586
587 /* For 32-bit values, discard potential garbage in value. For 64-bit
588 values within [2**31, 2**32-1], we can create smaller sequences by
589 interpreting this as a negative 32-bit number, while ensuring that
590 the high 32 bits are cleared by setting SF=0. */
591 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
592 svalue = (int32_t)value;
593 value = (uint32_t)value;
594 ivalue = (uint32_t)ivalue;
595 type = TCG_TYPE_I32;
596 }
597
598 /* Speed things up by handling the common case of small positive
599 and negative values specially. */
600 if ((value & ~0xffffull) == 0) {
601 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
602 return;
603 } else if ((ivalue & ~0xffffull) == 0) {
604 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
605 return;
606 }
607
608 /* Check for bitfield immediates. For the benefit of 32-bit quantities,
609 use the sign-extended value. That lets us match rotated values such
610 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
611 if (is_limm(svalue)) {
612 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
613 return;
614 }
615
616 /* Look for host pointer values within 4G of the PC. This happens
617 often when loading pointers to QEMU's own data structures. */
618 if (type == TCG_TYPE_I64) {
619 tcg_target_long disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
620 if (disp == sextract64(disp, 0, 21)) {
621 tcg_out_insn(s, 3406, ADRP, rd, disp);
622 if (value & 0xfff) {
623 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
624 }
625 return;
626 }
627 }
628
629 /* Would it take fewer insns to begin with MOVN? For the value and its
630 inverse, count the number of 16-bit lanes that are 0. */
631 for (i = wantinv = 0; i < 64; i += 16) {
632 tcg_target_long mask = 0xffffull << i;
633 wantinv -= ((value & mask) == 0);
634 wantinv += ((ivalue & mask) == 0);
635 }
636
637 if (wantinv <= 0) {
638 /* Find the lowest lane that is not 0x0000. */
639 shift = ctz64(value) & (63 & -16);
640 tcg_out_insn(s, 3405, MOVZ, type, rd, value >> shift, shift);
641 /* Clear out the lane that we just set. */
642 value &= ~(0xffffUL << shift);
643 /* Iterate until all non-zero lanes have been processed. */
644 while (value) {
645 shift = ctz64(value) & (63 & -16);
646 tcg_out_insn(s, 3405, MOVK, type, rd, value >> shift, shift);
647 value &= ~(0xffffUL << shift);
648 }
649 } else {
650 /* Like above, but with the inverted value and MOVN to start. */
651 shift = ctz64(ivalue) & (63 & -16);
652 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue >> shift, shift);
653 ivalue &= ~(0xffffUL << shift);
654 while (ivalue) {
655 shift = ctz64(ivalue) & (63 & -16);
656 /* Provide MOVK with the non-inverted value. */
657 tcg_out_insn(s, 3405, MOVK, type, rd, ~(ivalue >> shift), shift);
658 ivalue &= ~(0xffffUL << shift);
659 }
660 }
661 }
662
663 /* Define something more legible for general use. */
664 #define tcg_out_ldst_r tcg_out_insn_3310
665
666 static void tcg_out_ldst(TCGContext *s, AArch64Insn insn,
667 TCGReg rd, TCGReg rn, intptr_t offset)
668 {
669 TCGMemOp size = (uint32_t)insn >> 30;
670
671 /* If the offset is naturally aligned and in range, then we can
672 use the scaled uimm12 encoding */
673 if (offset >= 0 && !(offset & ((1 << size) - 1))) {
674 uintptr_t scaled_uimm = offset >> size;
675 if (scaled_uimm <= 0xfff) {
676 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
677 return;
678 }
679 }
680
681 /* Small signed offsets can use the unscaled encoding. */
682 if (offset >= -256 && offset < 256) {
683 tcg_out_insn_3312(s, insn, rd, rn, offset);
684 return;
685 }
686
687 /* Worst-case scenario, move offset to temp register, use reg offset. */
688 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
689 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
690 }
691
692 static inline void tcg_out_mov(TCGContext *s,
693 TCGType type, TCGReg ret, TCGReg arg)
694 {
695 if (ret != arg) {
696 tcg_out_movr(s, type, ret, arg);
697 }
698 }
699
700 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
701 TCGReg arg1, intptr_t arg2)
702 {
703 tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_LDRW : I3312_LDRX,
704 arg, arg1, arg2);
705 }
706
707 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
708 TCGReg arg1, intptr_t arg2)
709 {
710 tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_STRW : I3312_STRX,
711 arg, arg1, arg2);
712 }
713
714 static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
715 TCGReg base, intptr_t ofs)
716 {
717 if (val == 0) {
718 tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
719 return true;
720 }
721 return false;
722 }
723
724 static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
725 TCGReg rn, unsigned int a, unsigned int b)
726 {
727 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
728 }
729
730 static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
731 TCGReg rn, unsigned int a, unsigned int b)
732 {
733 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
734 }
735
736 static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
737 TCGReg rn, unsigned int a, unsigned int b)
738 {
739 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
740 }
741
742 static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
743 TCGReg rn, TCGReg rm, unsigned int a)
744 {
745 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
746 }
747
748 static inline void tcg_out_shl(TCGContext *s, TCGType ext,
749 TCGReg rd, TCGReg rn, unsigned int m)
750 {
751 int bits = ext ? 64 : 32;
752 int max = bits - 1;
753 tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
754 }
755
756 static inline void tcg_out_shr(TCGContext *s, TCGType ext,
757 TCGReg rd, TCGReg rn, unsigned int m)
758 {
759 int max = ext ? 63 : 31;
760 tcg_out_ubfm(s, ext, rd, rn, m & max, max);
761 }
762
763 static inline void tcg_out_sar(TCGContext *s, TCGType ext,
764 TCGReg rd, TCGReg rn, unsigned int m)
765 {
766 int max = ext ? 63 : 31;
767 tcg_out_sbfm(s, ext, rd, rn, m & max, max);
768 }
769
770 static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
771 TCGReg rd, TCGReg rn, unsigned int m)
772 {
773 int max = ext ? 63 : 31;
774 tcg_out_extr(s, ext, rd, rn, rn, m & max);
775 }
776
777 static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
778 TCGReg rd, TCGReg rn, unsigned int m)
779 {
780 int bits = ext ? 64 : 32;
781 int max = bits - 1;
782 tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
783 }
784
785 static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
786 TCGReg rn, unsigned lsb, unsigned width)
787 {
788 unsigned size = ext ? 64 : 32;
789 unsigned a = (size - lsb) & (size - 1);
790 unsigned b = width - 1;
791 tcg_out_bfm(s, ext, rd, rn, a, b);
792 }
793
794 static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
795 tcg_target_long b, bool const_b)
796 {
797 if (const_b) {
798 /* Using CMP or CMN aliases. */
799 if (b >= 0) {
800 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
801 } else {
802 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
803 }
804 } else {
805 /* Using CMP alias SUBS wzr, Wn, Wm */
806 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
807 }
808 }
809
810 static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target)
811 {
812 ptrdiff_t offset = target - s->code_ptr;
813 tcg_debug_assert(offset == sextract64(offset, 0, 26));
814 tcg_out_insn(s, 3206, B, offset);
815 }
816
817 static inline void tcg_out_goto_noaddr(TCGContext *s)
818 {
819 /* We pay attention here to not modify the branch target by reading from
820 the buffer. This ensure that caches and memory are kept coherent during
821 retranslation. Mask away possible garbage in the high bits for the
822 first translation, while keeping the offset bits for retranslation. */
823 uint32_t old = tcg_in32(s);
824 tcg_out_insn(s, 3206, B, old);
825 }
826
827 static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c)
828 {
829 /* See comments in tcg_out_goto_noaddr. */
830 uint32_t old = tcg_in32(s) >> 5;
831 tcg_out_insn(s, 3202, B_C, c, old);
832 }
833
834 static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
835 {
836 tcg_out_insn(s, 3207, BLR, reg);
837 }
838
839 static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
840 {
841 ptrdiff_t offset = target - s->code_ptr;
842 if (offset == sextract64(offset, 0, 26)) {
843 tcg_out_insn(s, 3206, BL, offset);
844 } else {
845 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
846 tcg_out_callr(s, TCG_REG_TMP);
847 }
848 }
849
850 void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
851 {
852 tcg_insn_unit *code_ptr = (tcg_insn_unit *)jmp_addr;
853 tcg_insn_unit *target = (tcg_insn_unit *)addr;
854
855 reloc_pc26_atomic(code_ptr, target);
856 flush_icache_range(jmp_addr, jmp_addr + 4);
857 }
858
859 static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
860 {
861 if (!l->has_value) {
862 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
863 tcg_out_goto_noaddr(s);
864 } else {
865 tcg_out_goto(s, l->u.value_ptr);
866 }
867 }
868
869 static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
870 TCGArg b, bool b_const, TCGLabel *l)
871 {
872 intptr_t offset;
873 bool need_cmp;
874
875 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
876 need_cmp = false;
877 } else {
878 need_cmp = true;
879 tcg_out_cmp(s, ext, a, b, b_const);
880 }
881
882 if (!l->has_value) {
883 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
884 offset = tcg_in32(s) >> 5;
885 } else {
886 offset = l->u.value_ptr - s->code_ptr;
887 tcg_debug_assert(offset == sextract64(offset, 0, 19));
888 }
889
890 if (need_cmp) {
891 tcg_out_insn(s, 3202, B_C, c, offset);
892 } else if (c == TCG_COND_EQ) {
893 tcg_out_insn(s, 3201, CBZ, ext, a, offset);
894 } else {
895 tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
896 }
897 }
898
899 static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
900 {
901 tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
902 }
903
904 static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
905 {
906 tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
907 }
908
909 static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
910 {
911 tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
912 }
913
914 static inline void tcg_out_sxt(TCGContext *s, TCGType ext, TCGMemOp s_bits,
915 TCGReg rd, TCGReg rn)
916 {
917 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
918 int bits = (8 << s_bits) - 1;
919 tcg_out_sbfm(s, ext, rd, rn, 0, bits);
920 }
921
922 static inline void tcg_out_uxt(TCGContext *s, TCGMemOp s_bits,
923 TCGReg rd, TCGReg rn)
924 {
925 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
926 int bits = (8 << s_bits) - 1;
927 tcg_out_ubfm(s, 0, rd, rn, 0, bits);
928 }
929
930 static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
931 TCGReg rn, int64_t aimm)
932 {
933 if (aimm >= 0) {
934 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
935 } else {
936 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
937 }
938 }
939
940 static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
941 TCGReg rh, TCGReg al, TCGReg ah,
942 tcg_target_long bl, tcg_target_long bh,
943 bool const_bl, bool const_bh, bool sub)
944 {
945 TCGReg orig_rl = rl;
946 AArch64Insn insn;
947
948 if (rl == ah || (!const_bh && rl == bh)) {
949 rl = TCG_REG_TMP;
950 }
951
952 if (const_bl) {
953 insn = I3401_ADDSI;
954 if ((bl < 0) ^ sub) {
955 insn = I3401_SUBSI;
956 bl = -bl;
957 }
958 if (unlikely(al == TCG_REG_XZR)) {
959 /* ??? We want to allow al to be zero for the benefit of
960 negation via subtraction. However, that leaves open the
961 possibility of adding 0+const in the low part, and the
962 immediate add instructions encode XSP not XZR. Don't try
963 anything more elaborate here than loading another zero. */
964 al = TCG_REG_TMP;
965 tcg_out_movi(s, ext, al, 0);
966 }
967 tcg_out_insn_3401(s, insn, ext, rl, al, bl);
968 } else {
969 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
970 }
971
972 insn = I3503_ADC;
973 if (const_bh) {
974 /* Note that the only two constants we support are 0 and -1, and
975 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
976 if ((bh != 0) ^ sub) {
977 insn = I3503_SBC;
978 }
979 bh = TCG_REG_XZR;
980 } else if (sub) {
981 insn = I3503_SBC;
982 }
983 tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
984
985 tcg_out_mov(s, ext, orig_rl, rl);
986 }
987
988 static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
989 {
990 static const uint32_t sync[] = {
991 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST,
992 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST,
993 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
994 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD,
995 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
996 };
997 tcg_out32(s, sync[a0 & TCG_MO_ALL]);
998 }
999
1000 static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1001 TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1002 {
1003 TCGReg a1 = a0;
1004 if (is_ctz) {
1005 a1 = TCG_REG_TMP;
1006 tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1007 }
1008 if (const_b && b == (ext ? 64 : 32)) {
1009 tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1010 } else {
1011 AArch64Insn sel = I3506_CSEL;
1012
1013 tcg_out_cmp(s, ext, a0, 0, 1);
1014 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1015
1016 if (const_b) {
1017 if (b == -1) {
1018 b = TCG_REG_XZR;
1019 sel = I3506_CSINV;
1020 } else if (b == 0) {
1021 b = TCG_REG_XZR;
1022 } else {
1023 tcg_out_movi(s, ext, d, b);
1024 b = d;
1025 }
1026 }
1027 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1028 }
1029 }
1030
1031 #ifdef CONFIG_SOFTMMU
1032 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1033 * TCGMemOpIdx oi, uintptr_t ra)
1034 */
1035 static void * const qemu_ld_helpers[16] = {
1036 [MO_UB] = helper_ret_ldub_mmu,
1037 [MO_LEUW] = helper_le_lduw_mmu,
1038 [MO_LEUL] = helper_le_ldul_mmu,
1039 [MO_LEQ] = helper_le_ldq_mmu,
1040 [MO_BEUW] = helper_be_lduw_mmu,
1041 [MO_BEUL] = helper_be_ldul_mmu,
1042 [MO_BEQ] = helper_be_ldq_mmu,
1043 };
1044
1045 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1046 * uintxx_t val, TCGMemOpIdx oi,
1047 * uintptr_t ra)
1048 */
1049 static void * const qemu_st_helpers[16] = {
1050 [MO_UB] = helper_ret_stb_mmu,
1051 [MO_LEUW] = helper_le_stw_mmu,
1052 [MO_LEUL] = helper_le_stl_mmu,
1053 [MO_LEQ] = helper_le_stq_mmu,
1054 [MO_BEUW] = helper_be_stw_mmu,
1055 [MO_BEUL] = helper_be_stl_mmu,
1056 [MO_BEQ] = helper_be_stq_mmu,
1057 };
1058
1059 static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target)
1060 {
1061 ptrdiff_t offset = tcg_pcrel_diff(s, target);
1062 tcg_debug_assert(offset == sextract64(offset, 0, 21));
1063 tcg_out_insn(s, 3406, ADR, rd, offset);
1064 }
1065
1066 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1067 {
1068 TCGMemOpIdx oi = lb->oi;
1069 TCGMemOp opc = get_memop(oi);
1070 TCGMemOp size = opc & MO_SIZE;
1071
1072 reloc_pc19(lb->label_ptr[0], s->code_ptr);
1073
1074 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1075 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1076 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1077 tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1078 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1079 if (opc & MO_SIGN) {
1080 tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1081 } else {
1082 tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1083 }
1084
1085 tcg_out_goto(s, lb->raddr);
1086 }
1087
1088 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1089 {
1090 TCGMemOpIdx oi = lb->oi;
1091 TCGMemOp opc = get_memop(oi);
1092 TCGMemOp size = opc & MO_SIZE;
1093
1094 reloc_pc19(lb->label_ptr[0], s->code_ptr);
1095
1096 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1097 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1098 tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1099 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1100 tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1101 tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1102 tcg_out_goto(s, lb->raddr);
1103 }
1104
1105 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1106 TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1107 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1108 {
1109 TCGLabelQemuLdst *label = new_ldst_label(s);
1110
1111 label->is_ld = is_ld;
1112 label->oi = oi;
1113 label->type = ext;
1114 label->datalo_reg = data_reg;
1115 label->addrlo_reg = addr_reg;
1116 label->raddr = raddr;
1117 label->label_ptr[0] = label_ptr;
1118 }
1119
1120 /* Load and compare a TLB entry, emitting the conditional jump to the
1121 slow path for the failure case, which will be patched later when finalizing
1122 the slow path. Generated code returns the host addend in X1,
1123 clobbers X0,X2,X3,TMP. */
1124 static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc,
1125 tcg_insn_unit **label_ptr, int mem_index,
1126 bool is_read)
1127 {
1128 int tlb_offset = is_read ?
1129 offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
1130 : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
1131 unsigned a_bits = get_alignment_bits(opc);
1132 unsigned s_bits = opc & MO_SIZE;
1133 unsigned a_mask = (1u << a_bits) - 1;
1134 unsigned s_mask = (1u << s_bits) - 1;
1135 TCGReg base = TCG_AREG0, x3;
1136 uint64_t tlb_mask;
1137
1138 /* For aligned accesses, we check the first byte and include the alignment
1139 bits within the address. For unaligned access, we check that we don't
1140 cross pages using the address of the last byte of the access. */
1141 if (a_bits >= s_bits) {
1142 x3 = addr_reg;
1143 } else {
1144 tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1145 TCG_REG_X3, addr_reg, s_mask - a_mask);
1146 x3 = TCG_REG_X3;
1147 }
1148 tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1149
1150 /* Extract the TLB index from the address into X0.
1151 X0<CPU_TLB_BITS:0> =
1152 addr_reg<TARGET_PAGE_BITS+CPU_TLB_BITS:TARGET_PAGE_BITS> */
1153 tcg_out_ubfm(s, TARGET_LONG_BITS == 64, TCG_REG_X0, addr_reg,
1154 TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS);
1155
1156 /* Store the page mask part of the address into X3. */
1157 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1158 TCG_REG_X3, x3, tlb_mask);
1159
1160 /* Add any "high bits" from the tlb offset to the env address into X2,
1161 to take advantage of the LSL12 form of the ADDI instruction.
1162 X2 = env + (tlb_offset & 0xfff000) */
1163 if (tlb_offset & 0xfff000) {
1164 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_X2, base,
1165 tlb_offset & 0xfff000);
1166 base = TCG_REG_X2;
1167 }
1168
1169 /* Merge the tlb index contribution into X2.
1170 X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
1171 tcg_out_insn(s, 3502S, ADD_LSL, TCG_TYPE_I64, TCG_REG_X2, base,
1172 TCG_REG_X0, CPU_TLB_ENTRY_BITS);
1173
1174 /* Merge "low bits" from tlb offset, load the tlb comparator into X0.
1175 X0 = load [X2 + (tlb_offset & 0x000fff)] */
1176 tcg_out_ldst(s, TARGET_LONG_BITS == 32 ? I3312_LDRW : I3312_LDRX,
1177 TCG_REG_X0, TCG_REG_X2, tlb_offset & 0xfff);
1178
1179 /* Load the tlb addend. Do that early to avoid stalling.
1180 X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */
1181 tcg_out_ldst(s, I3312_LDRX, TCG_REG_X1, TCG_REG_X2,
1182 (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) -
1183 (is_read ? offsetof(CPUTLBEntry, addr_read)
1184 : offsetof(CPUTLBEntry, addr_write)));
1185
1186 /* Perform the address comparison. */
1187 tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0);
1188
1189 /* If not equal, we jump to the slow path. */
1190 *label_ptr = s->code_ptr;
1191 tcg_out_goto_cond_noaddr(s, TCG_COND_NE);
1192 }
1193
1194 #endif /* CONFIG_SOFTMMU */
1195
1196 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp memop, TCGType ext,
1197 TCGReg data_r, TCGReg addr_r,
1198 TCGType otype, TCGReg off_r)
1199 {
1200 const TCGMemOp bswap = memop & MO_BSWAP;
1201
1202 switch (memop & MO_SSIZE) {
1203 case MO_UB:
1204 tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1205 break;
1206 case MO_SB:
1207 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1208 data_r, addr_r, otype, off_r);
1209 break;
1210 case MO_UW:
1211 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1212 if (bswap) {
1213 tcg_out_rev16(s, data_r, data_r);
1214 }
1215 break;
1216 case MO_SW:
1217 if (bswap) {
1218 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1219 tcg_out_rev16(s, data_r, data_r);
1220 tcg_out_sxt(s, ext, MO_16, data_r, data_r);
1221 } else {
1222 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1223 data_r, addr_r, otype, off_r);
1224 }
1225 break;
1226 case MO_UL:
1227 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1228 if (bswap) {
1229 tcg_out_rev32(s, data_r, data_r);
1230 }
1231 break;
1232 case MO_SL:
1233 if (bswap) {
1234 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1235 tcg_out_rev32(s, data_r, data_r);
1236 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1237 } else {
1238 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1239 }
1240 break;
1241 case MO_Q:
1242 tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1243 if (bswap) {
1244 tcg_out_rev64(s, data_r, data_r);
1245 }
1246 break;
1247 default:
1248 tcg_abort();
1249 }
1250 }
1251
1252 static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp memop,
1253 TCGReg data_r, TCGReg addr_r,
1254 TCGType otype, TCGReg off_r)
1255 {
1256 const TCGMemOp bswap = memop & MO_BSWAP;
1257
1258 switch (memop & MO_SIZE) {
1259 case MO_8:
1260 tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1261 break;
1262 case MO_16:
1263 if (bswap && data_r != TCG_REG_XZR) {
1264 tcg_out_rev16(s, TCG_REG_TMP, data_r);
1265 data_r = TCG_REG_TMP;
1266 }
1267 tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1268 break;
1269 case MO_32:
1270 if (bswap && data_r != TCG_REG_XZR) {
1271 tcg_out_rev32(s, TCG_REG_TMP, data_r);
1272 data_r = TCG_REG_TMP;
1273 }
1274 tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1275 break;
1276 case MO_64:
1277 if (bswap && data_r != TCG_REG_XZR) {
1278 tcg_out_rev64(s, TCG_REG_TMP, data_r);
1279 data_r = TCG_REG_TMP;
1280 }
1281 tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1282 break;
1283 default:
1284 tcg_abort();
1285 }
1286 }
1287
1288 static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1289 TCGMemOpIdx oi, TCGType ext)
1290 {
1291 TCGMemOp memop = get_memop(oi);
1292 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1293 #ifdef CONFIG_SOFTMMU
1294 unsigned mem_index = get_mmuidx(oi);
1295 tcg_insn_unit *label_ptr;
1296
1297 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1298 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1299 TCG_REG_X1, otype, addr_reg);
1300 add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1301 s->code_ptr, label_ptr);
1302 #else /* !CONFIG_SOFTMMU */
1303 if (USE_GUEST_BASE) {
1304 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1305 TCG_REG_GUEST_BASE, otype, addr_reg);
1306 } else {
1307 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1308 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1309 }
1310 #endif /* CONFIG_SOFTMMU */
1311 }
1312
1313 static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1314 TCGMemOpIdx oi)
1315 {
1316 TCGMemOp memop = get_memop(oi);
1317 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1318 #ifdef CONFIG_SOFTMMU
1319 unsigned mem_index = get_mmuidx(oi);
1320 tcg_insn_unit *label_ptr;
1321
1322 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1323 tcg_out_qemu_st_direct(s, memop, data_reg,
1324 TCG_REG_X1, otype, addr_reg);
1325 add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1326 data_reg, addr_reg, s->code_ptr, label_ptr);
1327 #else /* !CONFIG_SOFTMMU */
1328 if (USE_GUEST_BASE) {
1329 tcg_out_qemu_st_direct(s, memop, data_reg,
1330 TCG_REG_GUEST_BASE, otype, addr_reg);
1331 } else {
1332 tcg_out_qemu_st_direct(s, memop, data_reg,
1333 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1334 }
1335 #endif /* CONFIG_SOFTMMU */
1336 }
1337
1338 static tcg_insn_unit *tb_ret_addr;
1339
1340 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1341 const TCGArg args[TCG_MAX_OP_ARGS],
1342 const int const_args[TCG_MAX_OP_ARGS])
1343 {
1344 /* 99% of the time, we can signal the use of extension registers
1345 by looking to see if the opcode handles 64-bit data. */
1346 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1347
1348 /* Hoist the loads of the most common arguments. */
1349 TCGArg a0 = args[0];
1350 TCGArg a1 = args[1];
1351 TCGArg a2 = args[2];
1352 int c2 = const_args[2];
1353
1354 /* Some operands are defined with "rZ" constraint, a register or
1355 the zero register. These need not actually test args[I] == 0. */
1356 #define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1357
1358 switch (opc) {
1359 case INDEX_op_exit_tb:
1360 /* Reuse the zeroing that exists for goto_ptr. */
1361 if (a0 == 0) {
1362 tcg_out_goto(s, s->code_gen_epilogue);
1363 } else {
1364 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1365 tcg_out_goto(s, tb_ret_addr);
1366 }
1367 break;
1368
1369 case INDEX_op_goto_tb:
1370 #ifndef USE_DIRECT_JUMP
1371 #error "USE_DIRECT_JUMP required for aarch64"
1372 #endif
1373 /* consistency for USE_DIRECT_JUMP */
1374 tcg_debug_assert(s->tb_jmp_insn_offset != NULL);
1375 s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1376 /* actual branch destination will be patched by
1377 aarch64_tb_set_jmp_target later, beware retranslation. */
1378 tcg_out_goto_noaddr(s);
1379 s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s);
1380 break;
1381
1382 case INDEX_op_goto_ptr:
1383 tcg_out_insn(s, 3207, BR, a0);
1384 break;
1385
1386 case INDEX_op_br:
1387 tcg_out_goto_label(s, arg_label(a0));
1388 break;
1389
1390 case INDEX_op_ld8u_i32:
1391 case INDEX_op_ld8u_i64:
1392 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2);
1393 break;
1394 case INDEX_op_ld8s_i32:
1395 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2);
1396 break;
1397 case INDEX_op_ld8s_i64:
1398 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2);
1399 break;
1400 case INDEX_op_ld16u_i32:
1401 case INDEX_op_ld16u_i64:
1402 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2);
1403 break;
1404 case INDEX_op_ld16s_i32:
1405 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2);
1406 break;
1407 case INDEX_op_ld16s_i64:
1408 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2);
1409 break;
1410 case INDEX_op_ld_i32:
1411 case INDEX_op_ld32u_i64:
1412 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2);
1413 break;
1414 case INDEX_op_ld32s_i64:
1415 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2);
1416 break;
1417 case INDEX_op_ld_i64:
1418 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2);
1419 break;
1420
1421 case INDEX_op_st8_i32:
1422 case INDEX_op_st8_i64:
1423 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2);
1424 break;
1425 case INDEX_op_st16_i32:
1426 case INDEX_op_st16_i64:
1427 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2);
1428 break;
1429 case INDEX_op_st_i32:
1430 case INDEX_op_st32_i64:
1431 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2);
1432 break;
1433 case INDEX_op_st_i64:
1434 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2);
1435 break;
1436
1437 case INDEX_op_add_i32:
1438 a2 = (int32_t)a2;
1439 /* FALLTHRU */
1440 case INDEX_op_add_i64:
1441 if (c2) {
1442 tcg_out_addsubi(s, ext, a0, a1, a2);
1443 } else {
1444 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1445 }
1446 break;
1447
1448 case INDEX_op_sub_i32:
1449 a2 = (int32_t)a2;
1450 /* FALLTHRU */
1451 case INDEX_op_sub_i64:
1452 if (c2) {
1453 tcg_out_addsubi(s, ext, a0, a1, -a2);
1454 } else {
1455 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1456 }
1457 break;
1458
1459 case INDEX_op_neg_i64:
1460 case INDEX_op_neg_i32:
1461 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1462 break;
1463
1464 case INDEX_op_and_i32:
1465 a2 = (int32_t)a2;
1466 /* FALLTHRU */
1467 case INDEX_op_and_i64:
1468 if (c2) {
1469 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1470 } else {
1471 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1472 }
1473 break;
1474
1475 case INDEX_op_andc_i32:
1476 a2 = (int32_t)a2;
1477 /* FALLTHRU */
1478 case INDEX_op_andc_i64:
1479 if (c2) {
1480 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
1481 } else {
1482 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
1483 }
1484 break;
1485
1486 case INDEX_op_or_i32:
1487 a2 = (int32_t)a2;
1488 /* FALLTHRU */
1489 case INDEX_op_or_i64:
1490 if (c2) {
1491 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
1492 } else {
1493 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
1494 }
1495 break;
1496
1497 case INDEX_op_orc_i32:
1498 a2 = (int32_t)a2;
1499 /* FALLTHRU */
1500 case INDEX_op_orc_i64:
1501 if (c2) {
1502 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
1503 } else {
1504 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
1505 }
1506 break;
1507
1508 case INDEX_op_xor_i32:
1509 a2 = (int32_t)a2;
1510 /* FALLTHRU */
1511 case INDEX_op_xor_i64:
1512 if (c2) {
1513 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
1514 } else {
1515 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
1516 }
1517 break;
1518
1519 case INDEX_op_eqv_i32:
1520 a2 = (int32_t)a2;
1521 /* FALLTHRU */
1522 case INDEX_op_eqv_i64:
1523 if (c2) {
1524 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
1525 } else {
1526 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
1527 }
1528 break;
1529
1530 case INDEX_op_not_i64:
1531 case INDEX_op_not_i32:
1532 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
1533 break;
1534
1535 case INDEX_op_mul_i64:
1536 case INDEX_op_mul_i32:
1537 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
1538 break;
1539
1540 case INDEX_op_div_i64:
1541 case INDEX_op_div_i32:
1542 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
1543 break;
1544 case INDEX_op_divu_i64:
1545 case INDEX_op_divu_i32:
1546 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
1547 break;
1548
1549 case INDEX_op_rem_i64:
1550 case INDEX_op_rem_i32:
1551 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
1552 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1553 break;
1554 case INDEX_op_remu_i64:
1555 case INDEX_op_remu_i32:
1556 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
1557 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1558 break;
1559
1560 case INDEX_op_shl_i64:
1561 case INDEX_op_shl_i32:
1562 if (c2) {
1563 tcg_out_shl(s, ext, a0, a1, a2);
1564 } else {
1565 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
1566 }
1567 break;
1568
1569 case INDEX_op_shr_i64:
1570 case INDEX_op_shr_i32:
1571 if (c2) {
1572 tcg_out_shr(s, ext, a0, a1, a2);
1573 } else {
1574 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
1575 }
1576 break;
1577
1578 case INDEX_op_sar_i64:
1579 case INDEX_op_sar_i32:
1580 if (c2) {
1581 tcg_out_sar(s, ext, a0, a1, a2);
1582 } else {
1583 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
1584 }
1585 break;
1586
1587 case INDEX_op_rotr_i64:
1588 case INDEX_op_rotr_i32:
1589 if (c2) {
1590 tcg_out_rotr(s, ext, a0, a1, a2);
1591 } else {
1592 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
1593 }
1594 break;
1595
1596 case INDEX_op_rotl_i64:
1597 case INDEX_op_rotl_i32:
1598 if (c2) {
1599 tcg_out_rotl(s, ext, a0, a1, a2);
1600 } else {
1601 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
1602 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
1603 }
1604 break;
1605
1606 case INDEX_op_clz_i64:
1607 case INDEX_op_clz_i32:
1608 tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
1609 break;
1610 case INDEX_op_ctz_i64:
1611 case INDEX_op_ctz_i32:
1612 tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
1613 break;
1614
1615 case INDEX_op_brcond_i32:
1616 a1 = (int32_t)a1;
1617 /* FALLTHRU */
1618 case INDEX_op_brcond_i64:
1619 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
1620 break;
1621
1622 case INDEX_op_setcond_i32:
1623 a2 = (int32_t)a2;
1624 /* FALLTHRU */
1625 case INDEX_op_setcond_i64:
1626 tcg_out_cmp(s, ext, a1, a2, c2);
1627 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */
1628 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
1629 TCG_REG_XZR, tcg_invert_cond(args[3]));
1630 break;
1631
1632 case INDEX_op_movcond_i32:
1633 a2 = (int32_t)a2;
1634 /* FALLTHRU */
1635 case INDEX_op_movcond_i64:
1636 tcg_out_cmp(s, ext, a1, a2, c2);
1637 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
1638 break;
1639
1640 case INDEX_op_qemu_ld_i32:
1641 case INDEX_op_qemu_ld_i64:
1642 tcg_out_qemu_ld(s, a0, a1, a2, ext);
1643 break;
1644 case INDEX_op_qemu_st_i32:
1645 case INDEX_op_qemu_st_i64:
1646 tcg_out_qemu_st(s, REG0(0), a1, a2);
1647 break;
1648
1649 case INDEX_op_bswap64_i64:
1650 tcg_out_rev64(s, a0, a1);
1651 break;
1652 case INDEX_op_bswap32_i64:
1653 case INDEX_op_bswap32_i32:
1654 tcg_out_rev32(s, a0, a1);
1655 break;
1656 case INDEX_op_bswap16_i64:
1657 case INDEX_op_bswap16_i32:
1658 tcg_out_rev16(s, a0, a1);
1659 break;
1660
1661 case INDEX_op_ext8s_i64:
1662 case INDEX_op_ext8s_i32:
1663 tcg_out_sxt(s, ext, MO_8, a0, a1);
1664 break;
1665 case INDEX_op_ext16s_i64:
1666 case INDEX_op_ext16s_i32:
1667 tcg_out_sxt(s, ext, MO_16, a0, a1);
1668 break;
1669 case INDEX_op_ext_i32_i64:
1670 case INDEX_op_ext32s_i64:
1671 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
1672 break;
1673 case INDEX_op_ext8u_i64:
1674 case INDEX_op_ext8u_i32:
1675 tcg_out_uxt(s, MO_8, a0, a1);
1676 break;
1677 case INDEX_op_ext16u_i64:
1678 case INDEX_op_ext16u_i32:
1679 tcg_out_uxt(s, MO_16, a0, a1);
1680 break;
1681 case INDEX_op_extu_i32_i64:
1682 case INDEX_op_ext32u_i64:
1683 tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
1684 break;
1685
1686 case INDEX_op_deposit_i64:
1687 case INDEX_op_deposit_i32:
1688 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
1689 break;
1690
1691 case INDEX_op_extract_i64:
1692 case INDEX_op_extract_i32:
1693 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
1694 break;
1695
1696 case INDEX_op_sextract_i64:
1697 case INDEX_op_sextract_i32:
1698 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
1699 break;
1700
1701 case INDEX_op_add2_i32:
1702 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
1703 (int32_t)args[4], args[5], const_args[4],
1704 const_args[5], false);
1705 break;
1706 case INDEX_op_add2_i64:
1707 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
1708 args[5], const_args[4], const_args[5], false);
1709 break;
1710 case INDEX_op_sub2_i32:
1711 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
1712 (int32_t)args[4], args[5], const_args[4],
1713 const_args[5], true);
1714 break;
1715 case INDEX_op_sub2_i64:
1716 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
1717 args[5], const_args[4], const_args[5], true);
1718 break;
1719
1720 case INDEX_op_muluh_i64:
1721 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
1722 break;
1723 case INDEX_op_mulsh_i64:
1724 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
1725 break;
1726
1727 case INDEX_op_mb:
1728 tcg_out_mb(s, a0);
1729 break;
1730
1731 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
1732 case INDEX_op_mov_i64:
1733 case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
1734 case INDEX_op_movi_i64:
1735 case INDEX_op_call: /* Always emitted via tcg_out_call. */
1736 default:
1737 tcg_abort();
1738 }
1739
1740 #undef REG0
1741 }
1742
1743 static const TCGTargetOpDef aarch64_op_defs[] = {
1744 { INDEX_op_exit_tb, { } },
1745 { INDEX_op_goto_tb, { } },
1746 { INDEX_op_br, { } },
1747 { INDEX_op_goto_ptr, { "r" } },
1748
1749 { INDEX_op_ld8u_i32, { "r", "r" } },
1750 { INDEX_op_ld8s_i32, { "r", "r" } },
1751 { INDEX_op_ld16u_i32, { "r", "r" } },
1752 { INDEX_op_ld16s_i32, { "r", "r" } },
1753 { INDEX_op_ld_i32, { "r", "r" } },
1754 { INDEX_op_ld8u_i64, { "r", "r" } },
1755 { INDEX_op_ld8s_i64, { "r", "r" } },
1756 { INDEX_op_ld16u_i64, { "r", "r" } },
1757 { INDEX_op_ld16s_i64, { "r", "r" } },
1758 { INDEX_op_ld32u_i64, { "r", "r" } },
1759 { INDEX_op_ld32s_i64, { "r", "r" } },
1760 { INDEX_op_ld_i64, { "r", "r" } },
1761
1762 { INDEX_op_st8_i32, { "rZ", "r" } },
1763 { INDEX_op_st16_i32, { "rZ", "r" } },
1764 { INDEX_op_st_i32, { "rZ", "r" } },
1765 { INDEX_op_st8_i64, { "rZ", "r" } },
1766 { INDEX_op_st16_i64, { "rZ", "r" } },
1767 { INDEX_op_st32_i64, { "rZ", "r" } },
1768 { INDEX_op_st_i64, { "rZ", "r" } },
1769
1770 { INDEX_op_add_i32, { "r", "r", "rA" } },
1771 { INDEX_op_add_i64, { "r", "r", "rA" } },
1772 { INDEX_op_sub_i32, { "r", "r", "rA" } },
1773 { INDEX_op_sub_i64, { "r", "r", "rA" } },
1774 { INDEX_op_mul_i32, { "r", "r", "r" } },
1775 { INDEX_op_mul_i64, { "r", "r", "r" } },
1776 { INDEX_op_div_i32, { "r", "r", "r" } },
1777 { INDEX_op_div_i64, { "r", "r", "r" } },
1778 { INDEX_op_divu_i32, { "r", "r", "r" } },
1779 { INDEX_op_divu_i64, { "r", "r", "r" } },
1780 { INDEX_op_rem_i32, { "r", "r", "r" } },
1781 { INDEX_op_rem_i64, { "r", "r", "r" } },
1782 { INDEX_op_remu_i32, { "r", "r", "r" } },
1783 { INDEX_op_remu_i64, { "r", "r", "r" } },
1784 { INDEX_op_and_i32, { "r", "r", "rL" } },
1785 { INDEX_op_and_i64, { "r", "r", "rL" } },
1786 { INDEX_op_or_i32, { "r", "r", "rL" } },
1787 { INDEX_op_or_i64, { "r", "r", "rL" } },
1788 { INDEX_op_xor_i32, { "r", "r", "rL" } },
1789 { INDEX_op_xor_i64, { "r", "r", "rL" } },
1790 { INDEX_op_andc_i32, { "r", "r", "rL" } },
1791 { INDEX_op_andc_i64, { "r", "r", "rL" } },
1792 { INDEX_op_orc_i32, { "r", "r", "rL" } },
1793 { INDEX_op_orc_i64, { "r", "r", "rL" } },
1794 { INDEX_op_eqv_i32, { "r", "r", "rL" } },
1795 { INDEX_op_eqv_i64, { "r", "r", "rL" } },
1796
1797 { INDEX_op_neg_i32, { "r", "r" } },
1798 { INDEX_op_neg_i64, { "r", "r" } },
1799 { INDEX_op_not_i32, { "r", "r" } },
1800 { INDEX_op_not_i64, { "r", "r" } },
1801
1802 { INDEX_op_shl_i32, { "r", "r", "ri" } },
1803 { INDEX_op_shr_i32, { "r", "r", "ri" } },
1804 { INDEX_op_sar_i32, { "r", "r", "ri" } },
1805 { INDEX_op_rotl_i32, { "r", "r", "ri" } },
1806 { INDEX_op_rotr_i32, { "r", "r", "ri" } },
1807 { INDEX_op_clz_i32, { "r", "r", "rAL" } },
1808 { INDEX_op_ctz_i32, { "r", "r", "rAL" } },
1809 { INDEX_op_shl_i64, { "r", "r", "ri" } },
1810 { INDEX_op_shr_i64, { "r", "r", "ri" } },
1811 { INDEX_op_sar_i64, { "r", "r", "ri" } },
1812 { INDEX_op_rotl_i64, { "r", "r", "ri" } },
1813 { INDEX_op_rotr_i64, { "r", "r", "ri" } },
1814 { INDEX_op_clz_i64, { "r", "r", "rAL" } },
1815 { INDEX_op_ctz_i64, { "r", "r", "rAL" } },
1816
1817 { INDEX_op_brcond_i32, { "r", "rA" } },
1818 { INDEX_op_brcond_i64, { "r", "rA" } },
1819 { INDEX_op_setcond_i32, { "r", "r", "rA" } },
1820 { INDEX_op_setcond_i64, { "r", "r", "rA" } },
1821 { INDEX_op_movcond_i32, { "r", "r", "rA", "rZ", "rZ" } },
1822 { INDEX_op_movcond_i64, { "r", "r", "rA", "rZ", "rZ" } },
1823
1824 { INDEX_op_qemu_ld_i32, { "r", "l" } },
1825 { INDEX_op_qemu_ld_i64, { "r", "l" } },
1826 { INDEX_op_qemu_st_i32, { "lZ", "l" } },
1827 { INDEX_op_qemu_st_i64, { "lZ", "l" } },
1828
1829 { INDEX_op_bswap16_i32, { "r", "r" } },
1830 { INDEX_op_bswap32_i32, { "r", "r" } },
1831 { INDEX_op_bswap16_i64, { "r", "r" } },
1832 { INDEX_op_bswap32_i64, { "r", "r" } },
1833 { INDEX_op_bswap64_i64, { "r", "r" } },
1834
1835 { INDEX_op_ext8s_i32, { "r", "r" } },
1836 { INDEX_op_ext16s_i32, { "r", "r" } },
1837 { INDEX_op_ext8u_i32, { "r", "r" } },
1838 { INDEX_op_ext16u_i32, { "r", "r" } },
1839
1840 { INDEX_op_ext8s_i64, { "r", "r" } },
1841 { INDEX_op_ext16s_i64, { "r", "r" } },
1842 { INDEX_op_ext32s_i64, { "r", "r" } },
1843 { INDEX_op_ext8u_i64, { "r", "r" } },
1844 { INDEX_op_ext16u_i64, { "r", "r" } },
1845 { INDEX_op_ext32u_i64, { "r", "r" } },
1846 { INDEX_op_ext_i32_i64, { "r", "r" } },
1847 { INDEX_op_extu_i32_i64, { "r", "r" } },
1848
1849 { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
1850 { INDEX_op_deposit_i64, { "r", "0", "rZ" } },
1851 { INDEX_op_extract_i32, { "r", "r" } },
1852 { INDEX_op_extract_i64, { "r", "r" } },
1853 { INDEX_op_sextract_i32, { "r", "r" } },
1854 { INDEX_op_sextract_i64, { "r", "r" } },
1855
1856 { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1857 { INDEX_op_add2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1858 { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1859 { INDEX_op_sub2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1860
1861 { INDEX_op_muluh_i64, { "r", "r", "r" } },
1862 { INDEX_op_mulsh_i64, { "r", "r", "r" } },
1863
1864 { INDEX_op_mb, { } },
1865 { -1 },
1866 };
1867
1868 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
1869 {
1870 int i, n = ARRAY_SIZE(aarch64_op_defs);
1871
1872 for (i = 0; i < n; ++i) {
1873 if (aarch64_op_defs[i].op == op) {
1874 return &aarch64_op_defs[i];
1875 }
1876 }
1877 return NULL;
1878 }
1879
1880 static void tcg_target_init(TCGContext *s)
1881 {
1882 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
1883 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);
1884
1885 tcg_regset_set32(tcg_target_call_clobber_regs, 0,
1886 (1 << TCG_REG_X0) | (1 << TCG_REG_X1) |
1887 (1 << TCG_REG_X2) | (1 << TCG_REG_X3) |
1888 (1 << TCG_REG_X4) | (1 << TCG_REG_X5) |
1889 (1 << TCG_REG_X6) | (1 << TCG_REG_X7) |
1890 (1 << TCG_REG_X8) | (1 << TCG_REG_X9) |
1891 (1 << TCG_REG_X10) | (1 << TCG_REG_X11) |
1892 (1 << TCG_REG_X12) | (1 << TCG_REG_X13) |
1893 (1 << TCG_REG_X14) | (1 << TCG_REG_X15) |
1894 (1 << TCG_REG_X16) | (1 << TCG_REG_X17) |
1895 (1 << TCG_REG_X18) | (1 << TCG_REG_X30));
1896
1897 tcg_regset_clear(s->reserved_regs);
1898 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
1899 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
1900 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
1901 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
1902 }
1903
1904 /* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
1905 #define PUSH_SIZE ((30 - 19 + 1) * 8)
1906
1907 #define FRAME_SIZE \
1908 ((PUSH_SIZE \
1909 + TCG_STATIC_CALL_ARGS_SIZE \
1910 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
1911 + TCG_TARGET_STACK_ALIGN - 1) \
1912 & ~(TCG_TARGET_STACK_ALIGN - 1))
1913
1914 /* We're expecting a 2 byte uleb128 encoded value. */
1915 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
1916
1917 /* We're expecting to use a single ADDI insn. */
1918 QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
1919
1920 static void tcg_target_qemu_prologue(TCGContext *s)
1921 {
1922 TCGReg r;
1923
1924 /* Push (FP, LR) and allocate space for all saved registers. */
1925 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
1926 TCG_REG_SP, -PUSH_SIZE, 1, 1);
1927
1928 /* Set up frame pointer for canonical unwinding. */
1929 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
1930
1931 /* Store callee-preserved regs x19..x28. */
1932 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1933 int ofs = (r - TCG_REG_X19 + 2) * 8;
1934 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
1935 }
1936
1937 /* Make stack space for TCG locals. */
1938 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
1939 FRAME_SIZE - PUSH_SIZE);
1940
1941 /* Inform TCG about how to find TCG locals with register, offset, size. */
1942 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
1943 CPU_TEMP_BUF_NLONGS * sizeof(long));
1944
1945 #if !defined(CONFIG_SOFTMMU)
1946 if (USE_GUEST_BASE) {
1947 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
1948 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
1949 }
1950 #endif
1951
1952 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
1953 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
1954
1955 /*
1956 * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
1957 * and fall through to the rest of the epilogue.
1958 */
1959 s->code_gen_epilogue = s->code_ptr;
1960 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
1961
1962 /* TB epilogue */
1963 tb_ret_addr = s->code_ptr;
1964
1965 /* Remove TCG locals stack space. */
1966 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
1967 FRAME_SIZE - PUSH_SIZE);
1968
1969 /* Restore registers x19..x28. */
1970 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1971 int ofs = (r - TCG_REG_X19 + 2) * 8;
1972 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
1973 }
1974
1975 /* Pop (FP, LR), restore SP to previous frame. */
1976 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
1977 TCG_REG_SP, PUSH_SIZE, 0, 1);
1978 tcg_out_insn(s, 3207, RET, TCG_REG_LR);
1979 }
1980
1981 typedef struct {
1982 DebugFrameHeader h;
1983 uint8_t fde_def_cfa[4];
1984 uint8_t fde_reg_ofs[24];
1985 } DebugFrame;
1986
1987 #define ELF_HOST_MACHINE EM_AARCH64
1988
1989 static const DebugFrame debug_frame = {
1990 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
1991 .h.cie.id = -1,
1992 .h.cie.version = 1,
1993 .h.cie.code_align = 1,
1994 .h.cie.data_align = 0x78, /* sleb128 -8 */
1995 .h.cie.return_column = TCG_REG_LR,
1996
1997 /* Total FDE size does not include the "len" member. */
1998 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
1999
2000 .fde_def_cfa = {
2001 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */
2002 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2003 (FRAME_SIZE >> 7)
2004 },
2005 .fde_reg_ofs = {
2006 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */
2007 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */
2008 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */
2009 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */
2010 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */
2011 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */
2012 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */
2013 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */
2014 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */
2015 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */
2016 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */
2017 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */
2018 }
2019 };
2020
2021 void tcg_register_jit(void *buf, size_t buf_size)
2022 {
2023 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2024 }