]> git.proxmox.com Git - mirror_qemu.git/blob - tcg/aarch64/tcg-target.c
Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
[mirror_qemu.git] / tcg / aarch64 / tcg-target.c
1 /*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13 #include "tcg-be-ldst.h"
14 #include "qemu/bitops.h"
15
16 /* We're going to re-use TCGType in setting of the SF bit, which controls
17 the size of the operation performed. If we know the values match, it
18 makes things much cleaner. */
19 QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
20
21 #ifndef NDEBUG
22 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
23 "%x0", "%x1", "%x2", "%x3", "%x4", "%x5", "%x6", "%x7",
24 "%x8", "%x9", "%x10", "%x11", "%x12", "%x13", "%x14", "%x15",
25 "%x16", "%x17", "%x18", "%x19", "%x20", "%x21", "%x22", "%x23",
26 "%x24", "%x25", "%x26", "%x27", "%x28", "%fp", "%x30", "%sp",
27 };
28 #endif /* NDEBUG */
29
30 static const int tcg_target_reg_alloc_order[] = {
31 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
32 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
33 TCG_REG_X28, /* we will reserve this for GUEST_BASE if configured */
34
35 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
36 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
37 TCG_REG_X16, TCG_REG_X17,
38
39 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
40 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
41
42 /* X18 reserved by system */
43 /* X19 reserved for AREG0 */
44 /* X29 reserved as fp */
45 /* X30 reserved as temporary */
46 };
47
48 static const int tcg_target_call_iarg_regs[8] = {
49 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
50 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
51 };
52 static const int tcg_target_call_oarg_regs[1] = {
53 TCG_REG_X0
54 };
55
56 #define TCG_REG_TMP TCG_REG_X30
57
58 #ifndef CONFIG_SOFTMMU
59 # ifdef CONFIG_USE_GUEST_BASE
60 # define TCG_REG_GUEST_BASE TCG_REG_X28
61 # else
62 # define TCG_REG_GUEST_BASE TCG_REG_XZR
63 # endif
64 #endif
65
66 static inline void reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
67 {
68 ptrdiff_t offset = target - code_ptr;
69 assert(offset == sextract64(offset, 0, 26));
70 /* read instruction, mask away previous PC_REL26 parameter contents,
71 set the proper offset, then write back the instruction. */
72 *code_ptr = deposit32(*code_ptr, 0, 26, offset);
73 }
74
75 static inline void reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
76 {
77 ptrdiff_t offset = target - code_ptr;
78 assert(offset == sextract64(offset, 0, 19));
79 *code_ptr = deposit32(*code_ptr, 5, 19, offset);
80 }
81
82 static inline void patch_reloc(tcg_insn_unit *code_ptr, int type,
83 intptr_t value, intptr_t addend)
84 {
85 assert(addend == 0);
86 switch (type) {
87 case R_AARCH64_JUMP26:
88 case R_AARCH64_CALL26:
89 reloc_pc26(code_ptr, (tcg_insn_unit *)value);
90 break;
91 case R_AARCH64_CONDBR19:
92 reloc_pc19(code_ptr, (tcg_insn_unit *)value);
93 break;
94 default:
95 tcg_abort();
96 }
97 }
98
99 #define TCG_CT_CONST_AIMM 0x100
100 #define TCG_CT_CONST_LIMM 0x200
101 #define TCG_CT_CONST_ZERO 0x400
102 #define TCG_CT_CONST_MONE 0x800
103
104 /* parse target specific constraints */
105 static int target_parse_constraint(TCGArgConstraint *ct,
106 const char **pct_str)
107 {
108 const char *ct_str = *pct_str;
109
110 switch (ct_str[0]) {
111 case 'r':
112 ct->ct |= TCG_CT_REG;
113 tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
114 break;
115 case 'l': /* qemu_ld / qemu_st address, data_reg */
116 ct->ct |= TCG_CT_REG;
117 tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
118 #ifdef CONFIG_SOFTMMU
119 /* x0 and x1 will be overwritten when reading the tlb entry,
120 and x2, and x3 for helper args, better to avoid using them. */
121 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
122 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
123 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
124 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
125 #endif
126 break;
127 case 'A': /* Valid for arithmetic immediate (positive or negative). */
128 ct->ct |= TCG_CT_CONST_AIMM;
129 break;
130 case 'L': /* Valid for logical immediate. */
131 ct->ct |= TCG_CT_CONST_LIMM;
132 break;
133 case 'M': /* minus one */
134 ct->ct |= TCG_CT_CONST_MONE;
135 break;
136 case 'Z': /* zero */
137 ct->ct |= TCG_CT_CONST_ZERO;
138 break;
139 default:
140 return -1;
141 }
142
143 ct_str++;
144 *pct_str = ct_str;
145 return 0;
146 }
147
148 static inline bool is_aimm(uint64_t val)
149 {
150 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
151 }
152
153 static inline bool is_limm(uint64_t val)
154 {
155 /* Taking a simplified view of the logical immediates for now, ignoring
156 the replication that can happen across the field. Match bit patterns
157 of the forms
158 0....01....1
159 0..01..10..0
160 and their inverses. */
161
162 /* Make things easier below, by testing the form with msb clear. */
163 if ((int64_t)val < 0) {
164 val = ~val;
165 }
166 if (val == 0) {
167 return false;
168 }
169 val += val & -val;
170 return (val & (val - 1)) == 0;
171 }
172
173 static int tcg_target_const_match(tcg_target_long val, TCGType type,
174 const TCGArgConstraint *arg_ct)
175 {
176 int ct = arg_ct->ct;
177
178 if (ct & TCG_CT_CONST) {
179 return 1;
180 }
181 if (type == TCG_TYPE_I32) {
182 val = (int32_t)val;
183 }
184 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
185 return 1;
186 }
187 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
188 return 1;
189 }
190 if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
191 return 1;
192 }
193 if ((ct & TCG_CT_CONST_MONE) && val == -1) {
194 return 1;
195 }
196
197 return 0;
198 }
199
200 enum aarch64_cond_code {
201 COND_EQ = 0x0,
202 COND_NE = 0x1,
203 COND_CS = 0x2, /* Unsigned greater or equal */
204 COND_HS = COND_CS, /* ALIAS greater or equal */
205 COND_CC = 0x3, /* Unsigned less than */
206 COND_LO = COND_CC, /* ALIAS Lower */
207 COND_MI = 0x4, /* Negative */
208 COND_PL = 0x5, /* Zero or greater */
209 COND_VS = 0x6, /* Overflow */
210 COND_VC = 0x7, /* No overflow */
211 COND_HI = 0x8, /* Unsigned greater than */
212 COND_LS = 0x9, /* Unsigned less or equal */
213 COND_GE = 0xa,
214 COND_LT = 0xb,
215 COND_GT = 0xc,
216 COND_LE = 0xd,
217 COND_AL = 0xe,
218 COND_NV = 0xf, /* behaves like COND_AL here */
219 };
220
221 static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
222 [TCG_COND_EQ] = COND_EQ,
223 [TCG_COND_NE] = COND_NE,
224 [TCG_COND_LT] = COND_LT,
225 [TCG_COND_GE] = COND_GE,
226 [TCG_COND_LE] = COND_LE,
227 [TCG_COND_GT] = COND_GT,
228 /* unsigned */
229 [TCG_COND_LTU] = COND_LO,
230 [TCG_COND_GTU] = COND_HI,
231 [TCG_COND_GEU] = COND_HS,
232 [TCG_COND_LEU] = COND_LS,
233 };
234
235 typedef enum {
236 LDST_ST = 0, /* store */
237 LDST_LD = 1, /* load */
238 LDST_LD_S_X = 2, /* load and sign-extend into Xt */
239 LDST_LD_S_W = 3, /* load and sign-extend into Wt */
240 } AArch64LdstType;
241
242 /* We encode the format of the insn into the beginning of the name, so that
243 we can have the preprocessor help "typecheck" the insn vs the output
244 function. Arm didn't provide us with nice names for the formats, so we
245 use the section number of the architecture reference manual in which the
246 instruction group is described. */
247 typedef enum {
248 /* Compare and branch (immediate). */
249 I3201_CBZ = 0x34000000,
250 I3201_CBNZ = 0x35000000,
251
252 /* Conditional branch (immediate). */
253 I3202_B_C = 0x54000000,
254
255 /* Unconditional branch (immediate). */
256 I3206_B = 0x14000000,
257 I3206_BL = 0x94000000,
258
259 /* Unconditional branch (register). */
260 I3207_BR = 0xd61f0000,
261 I3207_BLR = 0xd63f0000,
262 I3207_RET = 0xd65f0000,
263
264 /* Load/store register. Described here as 3.3.12, but the helper
265 that emits them can transform to 3.3.10 or 3.3.13. */
266 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
267 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
268 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
269 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
270
271 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
272 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
273 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
274 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
275
276 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
277 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
278
279 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
280 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
281 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
282
283 I3312_TO_I3310 = 0x00206800,
284 I3312_TO_I3313 = 0x01000000,
285
286 /* Load/store register pair instructions. */
287 I3314_LDP = 0x28400000,
288 I3314_STP = 0x28000000,
289
290 /* Add/subtract immediate instructions. */
291 I3401_ADDI = 0x11000000,
292 I3401_ADDSI = 0x31000000,
293 I3401_SUBI = 0x51000000,
294 I3401_SUBSI = 0x71000000,
295
296 /* Bitfield instructions. */
297 I3402_BFM = 0x33000000,
298 I3402_SBFM = 0x13000000,
299 I3402_UBFM = 0x53000000,
300
301 /* Extract instruction. */
302 I3403_EXTR = 0x13800000,
303
304 /* Logical immediate instructions. */
305 I3404_ANDI = 0x12000000,
306 I3404_ORRI = 0x32000000,
307 I3404_EORI = 0x52000000,
308
309 /* Move wide immediate instructions. */
310 I3405_MOVN = 0x12800000,
311 I3405_MOVZ = 0x52800000,
312 I3405_MOVK = 0x72800000,
313
314 /* PC relative addressing instructions. */
315 I3406_ADR = 0x10000000,
316 I3406_ADRP = 0x90000000,
317
318 /* Add/subtract shifted register instructions (without a shift). */
319 I3502_ADD = 0x0b000000,
320 I3502_ADDS = 0x2b000000,
321 I3502_SUB = 0x4b000000,
322 I3502_SUBS = 0x6b000000,
323
324 /* Add/subtract shifted register instructions (with a shift). */
325 I3502S_ADD_LSL = I3502_ADD,
326
327 /* Add/subtract with carry instructions. */
328 I3503_ADC = 0x1a000000,
329 I3503_SBC = 0x5a000000,
330
331 /* Conditional select instructions. */
332 I3506_CSEL = 0x1a800000,
333 I3506_CSINC = 0x1a800400,
334
335 /* Data-processing (1 source) instructions. */
336 I3507_REV16 = 0x5ac00400,
337 I3507_REV32 = 0x5ac00800,
338 I3507_REV64 = 0x5ac00c00,
339
340 /* Data-processing (2 source) instructions. */
341 I3508_LSLV = 0x1ac02000,
342 I3508_LSRV = 0x1ac02400,
343 I3508_ASRV = 0x1ac02800,
344 I3508_RORV = 0x1ac02c00,
345 I3508_SMULH = 0x9b407c00,
346 I3508_UMULH = 0x9bc07c00,
347 I3508_UDIV = 0x1ac00800,
348 I3508_SDIV = 0x1ac00c00,
349
350 /* Data-processing (3 source) instructions. */
351 I3509_MADD = 0x1b000000,
352 I3509_MSUB = 0x1b008000,
353
354 /* Logical shifted register instructions (without a shift). */
355 I3510_AND = 0x0a000000,
356 I3510_BIC = 0x0a200000,
357 I3510_ORR = 0x2a000000,
358 I3510_ORN = 0x2a200000,
359 I3510_EOR = 0x4a000000,
360 I3510_EON = 0x4a200000,
361 I3510_ANDS = 0x6a000000,
362 } AArch64Insn;
363
364 static inline uint32_t tcg_in32(TCGContext *s)
365 {
366 uint32_t v = *(uint32_t *)s->code_ptr;
367 return v;
368 }
369
370 /* Emit an opcode with "type-checking" of the format. */
371 #define tcg_out_insn(S, FMT, OP, ...) \
372 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
373
374 static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
375 TCGReg rt, int imm19)
376 {
377 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
378 }
379
380 static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
381 TCGCond c, int imm19)
382 {
383 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
384 }
385
386 static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
387 {
388 tcg_out32(s, insn | (imm26 & 0x03ffffff));
389 }
390
391 static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
392 {
393 tcg_out32(s, insn | rn << 5);
394 }
395
396 static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
397 TCGReg r1, TCGReg r2, TCGReg rn,
398 tcg_target_long ofs, bool pre, bool w)
399 {
400 insn |= 1u << 31; /* ext */
401 insn |= pre << 24;
402 insn |= w << 23;
403
404 assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
405 insn |= (ofs & (0x7f << 3)) << (15 - 3);
406
407 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
408 }
409
410 static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
411 TCGReg rd, TCGReg rn, uint64_t aimm)
412 {
413 if (aimm > 0xfff) {
414 assert((aimm & 0xfff) == 0);
415 aimm >>= 12;
416 assert(aimm <= 0xfff);
417 aimm |= 1 << 12; /* apply LSL 12 */
418 }
419 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
420 }
421
422 /* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
423 (Logical immediate). Both insn groups have N, IMMR and IMMS fields
424 that feed the DecodeBitMasks pseudo function. */
425 static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
426 TCGReg rd, TCGReg rn, int n, int immr, int imms)
427 {
428 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
429 | rn << 5 | rd);
430 }
431
432 #define tcg_out_insn_3404 tcg_out_insn_3402
433
434 static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
435 TCGReg rd, TCGReg rn, TCGReg rm, int imms)
436 {
437 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
438 | rn << 5 | rd);
439 }
440
441 /* This function is used for the Move (wide immediate) instruction group.
442 Note that SHIFT is a full shift count, not the 2 bit HW field. */
443 static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
444 TCGReg rd, uint16_t half, unsigned shift)
445 {
446 assert((shift & ~0x30) == 0);
447 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
448 }
449
450 static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
451 TCGReg rd, int64_t disp)
452 {
453 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
454 }
455
456 /* This function is for both 3.5.2 (Add/Subtract shifted register), for
457 the rare occasion when we actually want to supply a shift amount. */
458 static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
459 TCGType ext, TCGReg rd, TCGReg rn,
460 TCGReg rm, int imm6)
461 {
462 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
463 }
464
465 /* This function is for 3.5.2 (Add/subtract shifted register),
466 and 3.5.10 (Logical shifted register), for the vast majorty of cases
467 when we don't want to apply a shift. Thus it can also be used for
468 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */
469 static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
470 TCGReg rd, TCGReg rn, TCGReg rm)
471 {
472 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
473 }
474
475 #define tcg_out_insn_3503 tcg_out_insn_3502
476 #define tcg_out_insn_3508 tcg_out_insn_3502
477 #define tcg_out_insn_3510 tcg_out_insn_3502
478
479 static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
480 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
481 {
482 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
483 | tcg_cond_to_aarch64[c] << 12);
484 }
485
486 static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
487 TCGReg rd, TCGReg rn)
488 {
489 tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
490 }
491
492 static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
493 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
494 {
495 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
496 }
497
498 static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
499 TCGReg rd, TCGReg base, TCGReg regoff)
500 {
501 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
502 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 | base << 5 | rd);
503 }
504
505
506 static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
507 TCGReg rd, TCGReg rn, intptr_t offset)
508 {
509 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | rd);
510 }
511
512 static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
513 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
514 {
515 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
516 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 | rn << 5 | rd);
517 }
518
519 /* Register to register move using ORR (shifted register with no shift). */
520 static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
521 {
522 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
523 }
524
525 /* Register to register move using ADDI (move to/from SP). */
526 static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
527 {
528 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
529 }
530
531 /* This function is used for the Logical (immediate) instruction group.
532 The value of LIMM must satisfy IS_LIMM. See the comment above about
533 only supporting simplified logical immediates. */
534 static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
535 TCGReg rd, TCGReg rn, uint64_t limm)
536 {
537 unsigned h, l, r, c;
538
539 assert(is_limm(limm));
540
541 h = clz64(limm);
542 l = ctz64(limm);
543 if (l == 0) {
544 r = 0; /* form 0....01....1 */
545 c = ctz64(~limm) - 1;
546 if (h == 0) {
547 r = clz64(~limm); /* form 1..10..01..1 */
548 c += r;
549 }
550 } else {
551 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */
552 c = r - h - 1;
553 }
554 if (ext == TCG_TYPE_I32) {
555 r &= 31;
556 c &= 31;
557 }
558
559 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
560 }
561
562 static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
563 tcg_target_long value)
564 {
565 AArch64Insn insn;
566 int i, wantinv, shift;
567 tcg_target_long svalue = value;
568 tcg_target_long ivalue = ~value;
569 tcg_target_long imask;
570
571 /* For 32-bit values, discard potential garbage in value. For 64-bit
572 values within [2**31, 2**32-1], we can create smaller sequences by
573 interpreting this as a negative 32-bit number, while ensuring that
574 the high 32 bits are cleared by setting SF=0. */
575 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
576 svalue = (int32_t)value;
577 value = (uint32_t)value;
578 ivalue = (uint32_t)ivalue;
579 type = TCG_TYPE_I32;
580 }
581
582 /* Speed things up by handling the common case of small positive
583 and negative values specially. */
584 if ((value & ~0xffffull) == 0) {
585 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
586 return;
587 } else if ((ivalue & ~0xffffull) == 0) {
588 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
589 return;
590 }
591
592 /* Check for bitfield immediates. For the benefit of 32-bit quantities,
593 use the sign-extended value. That lets us match rotated values such
594 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
595 if (is_limm(svalue)) {
596 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
597 return;
598 }
599
600 /* Look for host pointer values within 4G of the PC. This happens
601 often when loading pointers to QEMU's own data structures. */
602 if (type == TCG_TYPE_I64) {
603 tcg_target_long disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
604 if (disp == sextract64(disp, 0, 21)) {
605 tcg_out_insn(s, 3406, ADRP, rd, disp);
606 if (value & 0xfff) {
607 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
608 }
609 return;
610 }
611 }
612
613 /* Would it take fewer insns to begin with MOVN? For the value and its
614 inverse, count the number of 16-bit lanes that are 0. */
615 for (i = wantinv = imask = 0; i < 64; i += 16) {
616 tcg_target_long mask = 0xffffull << i;
617 if ((value & mask) == 0) {
618 wantinv -= 1;
619 }
620 if ((ivalue & mask) == 0) {
621 wantinv += 1;
622 imask |= mask;
623 }
624 }
625
626 /* If we had more 0xffff than 0x0000, invert VALUE and use MOVN. */
627 insn = I3405_MOVZ;
628 if (wantinv > 0) {
629 value = ivalue;
630 insn = I3405_MOVN;
631 }
632
633 /* Find the lowest lane that is not 0x0000. */
634 shift = ctz64(value) & (63 & -16);
635 tcg_out_insn_3405(s, insn, type, rd, value >> shift, shift);
636
637 if (wantinv > 0) {
638 /* Re-invert the value, so MOVK sees non-inverted bits. */
639 value = ~value;
640 /* Clear out all the 0xffff lanes. */
641 value ^= imask;
642 }
643 /* Clear out the lane that we just set. */
644 value &= ~(0xffffUL << shift);
645
646 /* Iterate until all lanes have been set, and thus cleared from VALUE. */
647 while (value) {
648 shift = ctz64(value) & (63 & -16);
649 tcg_out_insn(s, 3405, MOVK, type, rd, value >> shift, shift);
650 value &= ~(0xffffUL << shift);
651 }
652 }
653
654 /* Define something more legible for general use. */
655 #define tcg_out_ldst_r tcg_out_insn_3310
656
657 static void tcg_out_ldst(TCGContext *s, AArch64Insn insn,
658 TCGReg rd, TCGReg rn, intptr_t offset)
659 {
660 TCGMemOp size = (uint32_t)insn >> 30;
661
662 /* If the offset is naturally aligned and in range, then we can
663 use the scaled uimm12 encoding */
664 if (offset >= 0 && !(offset & ((1 << size) - 1))) {
665 uintptr_t scaled_uimm = offset >> size;
666 if (scaled_uimm <= 0xfff) {
667 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
668 return;
669 }
670 }
671
672 /* Small signed offsets can use the unscaled encoding. */
673 if (offset >= -256 && offset < 256) {
674 tcg_out_insn_3312(s, insn, rd, rn, offset);
675 return;
676 }
677
678 /* Worst-case scenario, move offset to temp register, use reg offset. */
679 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
680 tcg_out_ldst_r(s, insn, rd, rn, TCG_REG_TMP);
681 }
682
683 static inline void tcg_out_mov(TCGContext *s,
684 TCGType type, TCGReg ret, TCGReg arg)
685 {
686 if (ret != arg) {
687 tcg_out_movr(s, type, ret, arg);
688 }
689 }
690
691 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
692 TCGReg arg1, intptr_t arg2)
693 {
694 tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_LDRW : I3312_LDRX,
695 arg, arg1, arg2);
696 }
697
698 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
699 TCGReg arg1, intptr_t arg2)
700 {
701 tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_STRW : I3312_STRX,
702 arg, arg1, arg2);
703 }
704
705 static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
706 TCGReg rn, unsigned int a, unsigned int b)
707 {
708 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
709 }
710
711 static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
712 TCGReg rn, unsigned int a, unsigned int b)
713 {
714 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
715 }
716
717 static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
718 TCGReg rn, unsigned int a, unsigned int b)
719 {
720 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
721 }
722
723 static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
724 TCGReg rn, TCGReg rm, unsigned int a)
725 {
726 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
727 }
728
729 static inline void tcg_out_shl(TCGContext *s, TCGType ext,
730 TCGReg rd, TCGReg rn, unsigned int m)
731 {
732 int bits = ext ? 64 : 32;
733 int max = bits - 1;
734 tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
735 }
736
737 static inline void tcg_out_shr(TCGContext *s, TCGType ext,
738 TCGReg rd, TCGReg rn, unsigned int m)
739 {
740 int max = ext ? 63 : 31;
741 tcg_out_ubfm(s, ext, rd, rn, m & max, max);
742 }
743
744 static inline void tcg_out_sar(TCGContext *s, TCGType ext,
745 TCGReg rd, TCGReg rn, unsigned int m)
746 {
747 int max = ext ? 63 : 31;
748 tcg_out_sbfm(s, ext, rd, rn, m & max, max);
749 }
750
751 static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
752 TCGReg rd, TCGReg rn, unsigned int m)
753 {
754 int max = ext ? 63 : 31;
755 tcg_out_extr(s, ext, rd, rn, rn, m & max);
756 }
757
758 static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
759 TCGReg rd, TCGReg rn, unsigned int m)
760 {
761 int bits = ext ? 64 : 32;
762 int max = bits - 1;
763 tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
764 }
765
766 static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
767 TCGReg rn, unsigned lsb, unsigned width)
768 {
769 unsigned size = ext ? 64 : 32;
770 unsigned a = (size - lsb) & (size - 1);
771 unsigned b = width - 1;
772 tcg_out_bfm(s, ext, rd, rn, a, b);
773 }
774
775 static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
776 tcg_target_long b, bool const_b)
777 {
778 if (const_b) {
779 /* Using CMP or CMN aliases. */
780 if (b >= 0) {
781 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
782 } else {
783 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
784 }
785 } else {
786 /* Using CMP alias SUBS wzr, Wn, Wm */
787 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
788 }
789 }
790
791 static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target)
792 {
793 ptrdiff_t offset = target - s->code_ptr;
794 assert(offset == sextract64(offset, 0, 26));
795 tcg_out_insn(s, 3206, B, offset);
796 }
797
798 static inline void tcg_out_goto_noaddr(TCGContext *s)
799 {
800 /* We pay attention here to not modify the branch target by reading from
801 the buffer. This ensure that caches and memory are kept coherent during
802 retranslation. Mask away possible garbage in the high bits for the
803 first translation, while keeping the offset bits for retranslation. */
804 uint32_t old = tcg_in32(s);
805 tcg_out_insn(s, 3206, B, old);
806 }
807
808 static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c)
809 {
810 /* See comments in tcg_out_goto_noaddr. */
811 uint32_t old = tcg_in32(s) >> 5;
812 tcg_out_insn(s, 3202, B_C, c, old);
813 }
814
815 static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
816 {
817 tcg_out_insn(s, 3207, BLR, reg);
818 }
819
820 static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
821 {
822 ptrdiff_t offset = target - s->code_ptr;
823 if (offset == sextract64(offset, 0, 26)) {
824 tcg_out_insn(s, 3206, BL, offset);
825 } else {
826 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
827 tcg_out_callr(s, TCG_REG_TMP);
828 }
829 }
830
831 void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
832 {
833 tcg_insn_unit *code_ptr = (tcg_insn_unit *)jmp_addr;
834 tcg_insn_unit *target = (tcg_insn_unit *)addr;
835
836 reloc_pc26(code_ptr, target);
837 flush_icache_range(jmp_addr, jmp_addr + 4);
838 }
839
840 static inline void tcg_out_goto_label(TCGContext *s, int label_index)
841 {
842 TCGLabel *l = &s->labels[label_index];
843
844 if (!l->has_value) {
845 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, label_index, 0);
846 tcg_out_goto_noaddr(s);
847 } else {
848 tcg_out_goto(s, l->u.value_ptr);
849 }
850 }
851
852 static void tcg_out_brcond(TCGContext *s, TCGMemOp ext, TCGCond c, TCGArg a,
853 TCGArg b, bool b_const, int label)
854 {
855 TCGLabel *l = &s->labels[label];
856 intptr_t offset;
857 bool need_cmp;
858
859 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
860 need_cmp = false;
861 } else {
862 need_cmp = true;
863 tcg_out_cmp(s, ext, a, b, b_const);
864 }
865
866 if (!l->has_value) {
867 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, label, 0);
868 offset = tcg_in32(s) >> 5;
869 } else {
870 offset = l->u.value_ptr - s->code_ptr;
871 assert(offset == sextract64(offset, 0, 19));
872 }
873
874 if (need_cmp) {
875 tcg_out_insn(s, 3202, B_C, c, offset);
876 } else if (c == TCG_COND_EQ) {
877 tcg_out_insn(s, 3201, CBZ, ext, a, offset);
878 } else {
879 tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
880 }
881 }
882
883 static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
884 {
885 tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
886 }
887
888 static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
889 {
890 tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
891 }
892
893 static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
894 {
895 tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
896 }
897
898 static inline void tcg_out_sxt(TCGContext *s, TCGType ext, TCGMemOp s_bits,
899 TCGReg rd, TCGReg rn)
900 {
901 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
902 int bits = (8 << s_bits) - 1;
903 tcg_out_sbfm(s, ext, rd, rn, 0, bits);
904 }
905
906 static inline void tcg_out_uxt(TCGContext *s, TCGMemOp s_bits,
907 TCGReg rd, TCGReg rn)
908 {
909 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
910 int bits = (8 << s_bits) - 1;
911 tcg_out_ubfm(s, 0, rd, rn, 0, bits);
912 }
913
914 static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
915 TCGReg rn, int64_t aimm)
916 {
917 if (aimm >= 0) {
918 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
919 } else {
920 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
921 }
922 }
923
924 static inline void tcg_out_addsub2(TCGContext *s, int ext, TCGReg rl,
925 TCGReg rh, TCGReg al, TCGReg ah,
926 tcg_target_long bl, tcg_target_long bh,
927 bool const_bl, bool const_bh, bool sub)
928 {
929 TCGReg orig_rl = rl;
930 AArch64Insn insn;
931
932 if (rl == ah || (!const_bh && rl == bh)) {
933 rl = TCG_REG_TMP;
934 }
935
936 if (const_bl) {
937 insn = I3401_ADDSI;
938 if ((bl < 0) ^ sub) {
939 insn = I3401_SUBSI;
940 bl = -bl;
941 }
942 tcg_out_insn_3401(s, insn, ext, rl, al, bl);
943 } else {
944 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
945 }
946
947 insn = I3503_ADC;
948 if (const_bh) {
949 /* Note that the only two constants we support are 0 and -1, and
950 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
951 if ((bh != 0) ^ sub) {
952 insn = I3503_SBC;
953 }
954 bh = TCG_REG_XZR;
955 } else if (sub) {
956 insn = I3503_SBC;
957 }
958 tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
959
960 tcg_out_mov(s, ext, orig_rl, rl);
961 }
962
963 #ifdef CONFIG_SOFTMMU
964 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
965 * int mmu_idx, uintptr_t ra)
966 */
967 static void * const qemu_ld_helpers[16] = {
968 [MO_UB] = helper_ret_ldub_mmu,
969 [MO_LEUW] = helper_le_lduw_mmu,
970 [MO_LEUL] = helper_le_ldul_mmu,
971 [MO_LEQ] = helper_le_ldq_mmu,
972 [MO_BEUW] = helper_be_lduw_mmu,
973 [MO_BEUL] = helper_be_ldul_mmu,
974 [MO_BEQ] = helper_be_ldq_mmu,
975 };
976
977 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
978 * uintxx_t val, int mmu_idx, uintptr_t ra)
979 */
980 static void * const qemu_st_helpers[16] = {
981 [MO_UB] = helper_ret_stb_mmu,
982 [MO_LEUW] = helper_le_stw_mmu,
983 [MO_LEUL] = helper_le_stl_mmu,
984 [MO_LEQ] = helper_le_stq_mmu,
985 [MO_BEUW] = helper_be_stw_mmu,
986 [MO_BEUL] = helper_be_stl_mmu,
987 [MO_BEQ] = helper_be_stq_mmu,
988 };
989
990 static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target)
991 {
992 ptrdiff_t offset = tcg_pcrel_diff(s, target);
993 assert(offset == sextract64(offset, 0, 21));
994 tcg_out_insn(s, 3406, ADR, rd, offset);
995 }
996
997 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
998 {
999 TCGMemOp opc = lb->opc;
1000 TCGMemOp size = opc & MO_SIZE;
1001
1002 reloc_pc19(lb->label_ptr[0], s->code_ptr);
1003
1004 tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_X0, TCG_AREG0);
1005 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1006 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, lb->mem_index);
1007 tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1008 tcg_out_call(s, qemu_ld_helpers[opc & ~MO_SIGN]);
1009 if (opc & MO_SIGN) {
1010 tcg_out_sxt(s, TCG_TYPE_I64, size, lb->datalo_reg, TCG_REG_X0);
1011 } else {
1012 tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1013 }
1014
1015 tcg_out_goto(s, lb->raddr);
1016 }
1017
1018 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1019 {
1020 TCGMemOp opc = lb->opc;
1021 TCGMemOp size = opc & MO_SIZE;
1022
1023 reloc_pc19(lb->label_ptr[0], s->code_ptr);
1024
1025 tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_X0, TCG_AREG0);
1026 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1027 tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1028 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, lb->mem_index);
1029 tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1030 tcg_out_call(s, qemu_st_helpers[opc]);
1031 tcg_out_goto(s, lb->raddr);
1032 }
1033
1034 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOp opc,
1035 TCGReg data_reg, TCGReg addr_reg,
1036 int mem_index, tcg_insn_unit *raddr,
1037 tcg_insn_unit *label_ptr)
1038 {
1039 TCGLabelQemuLdst *label = new_ldst_label(s);
1040
1041 label->is_ld = is_ld;
1042 label->opc = opc;
1043 label->datalo_reg = data_reg;
1044 label->addrlo_reg = addr_reg;
1045 label->mem_index = mem_index;
1046 label->raddr = raddr;
1047 label->label_ptr[0] = label_ptr;
1048 }
1049
1050 /* Load and compare a TLB entry, emitting the conditional jump to the
1051 slow path for the failure case, which will be patched later when finalizing
1052 the slow path. Generated code returns the host addend in X1,
1053 clobbers X0,X2,X3,TMP. */
1054 static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp s_bits,
1055 tcg_insn_unit **label_ptr, int mem_index,
1056 bool is_read)
1057 {
1058 TCGReg base = TCG_AREG0;
1059 int tlb_offset = is_read ?
1060 offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
1061 : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
1062
1063 /* Extract the TLB index from the address into X0.
1064 X0<CPU_TLB_BITS:0> =
1065 addr_reg<TARGET_PAGE_BITS+CPU_TLB_BITS:TARGET_PAGE_BITS> */
1066 tcg_out_ubfm(s, TARGET_LONG_BITS == 64, TCG_REG_X0, addr_reg,
1067 TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS);
1068
1069 /* Store the page mask part of the address and the low s_bits into X3.
1070 Later this allows checking for equality and alignment at the same time.
1071 X3 = addr_reg & (PAGE_MASK | ((1 << s_bits) - 1)) */
1072 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64, TCG_REG_X3,
1073 addr_reg, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
1074
1075 /* Add any "high bits" from the tlb offset to the env address into X2,
1076 to take advantage of the LSL12 form of the ADDI instruction.
1077 X2 = env + (tlb_offset & 0xfff000) */
1078 if (tlb_offset & 0xfff000) {
1079 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_X2, base,
1080 tlb_offset & 0xfff000);
1081 base = TCG_REG_X2;
1082 }
1083
1084 /* Merge the tlb index contribution into X2.
1085 X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
1086 tcg_out_insn(s, 3502S, ADD_LSL, TCG_TYPE_I64, TCG_REG_X2, base,
1087 TCG_REG_X0, CPU_TLB_ENTRY_BITS);
1088
1089 /* Merge "low bits" from tlb offset, load the tlb comparator into X0.
1090 X0 = load [X2 + (tlb_offset & 0x000fff)] */
1091 tcg_out_ldst(s, TARGET_LONG_BITS == 32 ? I3312_LDRW : I3312_LDRX,
1092 TCG_REG_X0, TCG_REG_X2, tlb_offset & 0xfff);
1093
1094 /* Load the tlb addend. Do that early to avoid stalling.
1095 X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */
1096 tcg_out_ldst(s, I3312_LDRX, TCG_REG_X1, TCG_REG_X2,
1097 (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) -
1098 (is_read ? offsetof(CPUTLBEntry, addr_read)
1099 : offsetof(CPUTLBEntry, addr_write)));
1100
1101 /* Perform the address comparison. */
1102 tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0);
1103
1104 /* If not equal, we jump to the slow path. */
1105 *label_ptr = s->code_ptr;
1106 tcg_out_goto_cond_noaddr(s, TCG_COND_NE);
1107 }
1108
1109 #endif /* CONFIG_SOFTMMU */
1110
1111 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp memop,
1112 TCGReg data_r, TCGReg addr_r, TCGReg off_r)
1113 {
1114 const TCGMemOp bswap = memop & MO_BSWAP;
1115
1116 switch (memop & MO_SSIZE) {
1117 case MO_UB:
1118 tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, off_r);
1119 break;
1120 case MO_SB:
1121 tcg_out_ldst_r(s, I3312_LDRSBX, data_r, addr_r, off_r);
1122 break;
1123 case MO_UW:
1124 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, off_r);
1125 if (bswap) {
1126 tcg_out_rev16(s, data_r, data_r);
1127 }
1128 break;
1129 case MO_SW:
1130 if (bswap) {
1131 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, off_r);
1132 tcg_out_rev16(s, data_r, data_r);
1133 tcg_out_sxt(s, TCG_TYPE_I64, MO_16, data_r, data_r);
1134 } else {
1135 tcg_out_ldst_r(s, I3312_LDRSHX, data_r, addr_r, off_r);
1136 }
1137 break;
1138 case MO_UL:
1139 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, off_r);
1140 if (bswap) {
1141 tcg_out_rev32(s, data_r, data_r);
1142 }
1143 break;
1144 case MO_SL:
1145 if (bswap) {
1146 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, off_r);
1147 tcg_out_rev32(s, data_r, data_r);
1148 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1149 } else {
1150 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, off_r);
1151 }
1152 break;
1153 case MO_Q:
1154 tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, off_r);
1155 if (bswap) {
1156 tcg_out_rev64(s, data_r, data_r);
1157 }
1158 break;
1159 default:
1160 tcg_abort();
1161 }
1162 }
1163
1164 static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp memop,
1165 TCGReg data_r, TCGReg addr_r, TCGReg off_r)
1166 {
1167 const TCGMemOp bswap = memop & MO_BSWAP;
1168
1169 switch (memop & MO_SIZE) {
1170 case MO_8:
1171 tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, off_r);
1172 break;
1173 case MO_16:
1174 if (bswap && data_r != TCG_REG_XZR) {
1175 tcg_out_rev16(s, TCG_REG_TMP, data_r);
1176 data_r = TCG_REG_TMP;
1177 }
1178 tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, off_r);
1179 break;
1180 case MO_32:
1181 if (bswap && data_r != TCG_REG_XZR) {
1182 tcg_out_rev32(s, TCG_REG_TMP, data_r);
1183 data_r = TCG_REG_TMP;
1184 }
1185 tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, off_r);
1186 break;
1187 case MO_64:
1188 if (bswap && data_r != TCG_REG_XZR) {
1189 tcg_out_rev64(s, TCG_REG_TMP, data_r);
1190 data_r = TCG_REG_TMP;
1191 }
1192 tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, off_r);
1193 break;
1194 default:
1195 tcg_abort();
1196 }
1197 }
1198
1199 static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1200 TCGMemOp memop, int mem_index)
1201 {
1202 #ifdef CONFIG_SOFTMMU
1203 TCGMemOp s_bits = memop & MO_SIZE;
1204 tcg_insn_unit *label_ptr;
1205
1206 tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 1);
1207 tcg_out_qemu_ld_direct(s, memop, data_reg, addr_reg, TCG_REG_X1);
1208 add_qemu_ldst_label(s, true, memop, data_reg, addr_reg,
1209 mem_index, s->code_ptr, label_ptr);
1210 #else /* !CONFIG_SOFTMMU */
1211 tcg_out_qemu_ld_direct(s, memop, data_reg, addr_reg,
1212 GUEST_BASE ? TCG_REG_GUEST_BASE : TCG_REG_XZR);
1213 #endif /* CONFIG_SOFTMMU */
1214 }
1215
1216 static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1217 TCGMemOp memop, int mem_index)
1218 {
1219 #ifdef CONFIG_SOFTMMU
1220 TCGMemOp s_bits = memop & MO_SIZE;
1221 tcg_insn_unit *label_ptr;
1222
1223 tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 0);
1224 tcg_out_qemu_st_direct(s, memop, data_reg, addr_reg, TCG_REG_X1);
1225 add_qemu_ldst_label(s, false, memop, data_reg, addr_reg,
1226 mem_index, s->code_ptr, label_ptr);
1227 #else /* !CONFIG_SOFTMMU */
1228 tcg_out_qemu_st_direct(s, memop, data_reg, addr_reg,
1229 GUEST_BASE ? TCG_REG_GUEST_BASE : TCG_REG_XZR);
1230 #endif /* CONFIG_SOFTMMU */
1231 }
1232
1233 static tcg_insn_unit *tb_ret_addr;
1234
1235 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1236 const TCGArg args[TCG_MAX_OP_ARGS],
1237 const int const_args[TCG_MAX_OP_ARGS])
1238 {
1239 /* 99% of the time, we can signal the use of extension registers
1240 by looking to see if the opcode handles 64-bit data. */
1241 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1242
1243 /* Hoist the loads of the most common arguments. */
1244 TCGArg a0 = args[0];
1245 TCGArg a1 = args[1];
1246 TCGArg a2 = args[2];
1247 int c2 = const_args[2];
1248
1249 /* Some operands are defined with "rZ" constraint, a register or
1250 the zero register. These need not actually test args[I] == 0. */
1251 #define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1252
1253 switch (opc) {
1254 case INDEX_op_exit_tb:
1255 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1256 tcg_out_goto(s, tb_ret_addr);
1257 break;
1258
1259 case INDEX_op_goto_tb:
1260 #ifndef USE_DIRECT_JUMP
1261 #error "USE_DIRECT_JUMP required for aarch64"
1262 #endif
1263 assert(s->tb_jmp_offset != NULL); /* consistency for USE_DIRECT_JUMP */
1264 s->tb_jmp_offset[a0] = tcg_current_code_size(s);
1265 /* actual branch destination will be patched by
1266 aarch64_tb_set_jmp_target later, beware retranslation. */
1267 tcg_out_goto_noaddr(s);
1268 s->tb_next_offset[a0] = tcg_current_code_size(s);
1269 break;
1270
1271 case INDEX_op_br:
1272 tcg_out_goto_label(s, a0);
1273 break;
1274
1275 case INDEX_op_ld8u_i32:
1276 case INDEX_op_ld8u_i64:
1277 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2);
1278 break;
1279 case INDEX_op_ld8s_i32:
1280 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2);
1281 break;
1282 case INDEX_op_ld8s_i64:
1283 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2);
1284 break;
1285 case INDEX_op_ld16u_i32:
1286 case INDEX_op_ld16u_i64:
1287 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2);
1288 break;
1289 case INDEX_op_ld16s_i32:
1290 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2);
1291 break;
1292 case INDEX_op_ld16s_i64:
1293 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2);
1294 break;
1295 case INDEX_op_ld_i32:
1296 case INDEX_op_ld32u_i64:
1297 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2);
1298 break;
1299 case INDEX_op_ld32s_i64:
1300 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2);
1301 break;
1302 case INDEX_op_ld_i64:
1303 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2);
1304 break;
1305
1306 case INDEX_op_st8_i32:
1307 case INDEX_op_st8_i64:
1308 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2);
1309 break;
1310 case INDEX_op_st16_i32:
1311 case INDEX_op_st16_i64:
1312 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2);
1313 break;
1314 case INDEX_op_st_i32:
1315 case INDEX_op_st32_i64:
1316 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2);
1317 break;
1318 case INDEX_op_st_i64:
1319 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2);
1320 break;
1321
1322 case INDEX_op_add_i32:
1323 a2 = (int32_t)a2;
1324 /* FALLTHRU */
1325 case INDEX_op_add_i64:
1326 if (c2) {
1327 tcg_out_addsubi(s, ext, a0, a1, a2);
1328 } else {
1329 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1330 }
1331 break;
1332
1333 case INDEX_op_sub_i32:
1334 a2 = (int32_t)a2;
1335 /* FALLTHRU */
1336 case INDEX_op_sub_i64:
1337 if (c2) {
1338 tcg_out_addsubi(s, ext, a0, a1, -a2);
1339 } else {
1340 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1341 }
1342 break;
1343
1344 case INDEX_op_neg_i64:
1345 case INDEX_op_neg_i32:
1346 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1347 break;
1348
1349 case INDEX_op_and_i32:
1350 a2 = (int32_t)a2;
1351 /* FALLTHRU */
1352 case INDEX_op_and_i64:
1353 if (c2) {
1354 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1355 } else {
1356 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1357 }
1358 break;
1359
1360 case INDEX_op_andc_i32:
1361 a2 = (int32_t)a2;
1362 /* FALLTHRU */
1363 case INDEX_op_andc_i64:
1364 if (c2) {
1365 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
1366 } else {
1367 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
1368 }
1369 break;
1370
1371 case INDEX_op_or_i32:
1372 a2 = (int32_t)a2;
1373 /* FALLTHRU */
1374 case INDEX_op_or_i64:
1375 if (c2) {
1376 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
1377 } else {
1378 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
1379 }
1380 break;
1381
1382 case INDEX_op_orc_i32:
1383 a2 = (int32_t)a2;
1384 /* FALLTHRU */
1385 case INDEX_op_orc_i64:
1386 if (c2) {
1387 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
1388 } else {
1389 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
1390 }
1391 break;
1392
1393 case INDEX_op_xor_i32:
1394 a2 = (int32_t)a2;
1395 /* FALLTHRU */
1396 case INDEX_op_xor_i64:
1397 if (c2) {
1398 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
1399 } else {
1400 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
1401 }
1402 break;
1403
1404 case INDEX_op_eqv_i32:
1405 a2 = (int32_t)a2;
1406 /* FALLTHRU */
1407 case INDEX_op_eqv_i64:
1408 if (c2) {
1409 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
1410 } else {
1411 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
1412 }
1413 break;
1414
1415 case INDEX_op_not_i64:
1416 case INDEX_op_not_i32:
1417 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
1418 break;
1419
1420 case INDEX_op_mul_i64:
1421 case INDEX_op_mul_i32:
1422 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
1423 break;
1424
1425 case INDEX_op_div_i64:
1426 case INDEX_op_div_i32:
1427 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
1428 break;
1429 case INDEX_op_divu_i64:
1430 case INDEX_op_divu_i32:
1431 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
1432 break;
1433
1434 case INDEX_op_rem_i64:
1435 case INDEX_op_rem_i32:
1436 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
1437 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1438 break;
1439 case INDEX_op_remu_i64:
1440 case INDEX_op_remu_i32:
1441 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
1442 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1443 break;
1444
1445 case INDEX_op_shl_i64:
1446 case INDEX_op_shl_i32:
1447 if (c2) {
1448 tcg_out_shl(s, ext, a0, a1, a2);
1449 } else {
1450 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
1451 }
1452 break;
1453
1454 case INDEX_op_shr_i64:
1455 case INDEX_op_shr_i32:
1456 if (c2) {
1457 tcg_out_shr(s, ext, a0, a1, a2);
1458 } else {
1459 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
1460 }
1461 break;
1462
1463 case INDEX_op_sar_i64:
1464 case INDEX_op_sar_i32:
1465 if (c2) {
1466 tcg_out_sar(s, ext, a0, a1, a2);
1467 } else {
1468 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
1469 }
1470 break;
1471
1472 case INDEX_op_rotr_i64:
1473 case INDEX_op_rotr_i32:
1474 if (c2) {
1475 tcg_out_rotr(s, ext, a0, a1, a2);
1476 } else {
1477 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
1478 }
1479 break;
1480
1481 case INDEX_op_rotl_i64:
1482 case INDEX_op_rotl_i32:
1483 if (c2) {
1484 tcg_out_rotl(s, ext, a0, a1, a2);
1485 } else {
1486 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
1487 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
1488 }
1489 break;
1490
1491 case INDEX_op_brcond_i32:
1492 a1 = (int32_t)a1;
1493 /* FALLTHRU */
1494 case INDEX_op_brcond_i64:
1495 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], args[3]);
1496 break;
1497
1498 case INDEX_op_setcond_i32:
1499 a2 = (int32_t)a2;
1500 /* FALLTHRU */
1501 case INDEX_op_setcond_i64:
1502 tcg_out_cmp(s, ext, a1, a2, c2);
1503 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */
1504 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
1505 TCG_REG_XZR, tcg_invert_cond(args[3]));
1506 break;
1507
1508 case INDEX_op_movcond_i32:
1509 a2 = (int32_t)a2;
1510 /* FALLTHRU */
1511 case INDEX_op_movcond_i64:
1512 tcg_out_cmp(s, ext, a1, a2, c2);
1513 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
1514 break;
1515
1516 case INDEX_op_qemu_ld_i32:
1517 case INDEX_op_qemu_ld_i64:
1518 tcg_out_qemu_ld(s, a0, a1, a2, args[3]);
1519 break;
1520 case INDEX_op_qemu_st_i32:
1521 case INDEX_op_qemu_st_i64:
1522 tcg_out_qemu_st(s, REG0(0), a1, a2, args[3]);
1523 break;
1524
1525 case INDEX_op_bswap64_i64:
1526 tcg_out_rev64(s, a0, a1);
1527 break;
1528 case INDEX_op_bswap32_i64:
1529 case INDEX_op_bswap32_i32:
1530 tcg_out_rev32(s, a0, a1);
1531 break;
1532 case INDEX_op_bswap16_i64:
1533 case INDEX_op_bswap16_i32:
1534 tcg_out_rev16(s, a0, a1);
1535 break;
1536
1537 case INDEX_op_ext8s_i64:
1538 case INDEX_op_ext8s_i32:
1539 tcg_out_sxt(s, ext, MO_8, a0, a1);
1540 break;
1541 case INDEX_op_ext16s_i64:
1542 case INDEX_op_ext16s_i32:
1543 tcg_out_sxt(s, ext, MO_16, a0, a1);
1544 break;
1545 case INDEX_op_ext32s_i64:
1546 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
1547 break;
1548 case INDEX_op_ext8u_i64:
1549 case INDEX_op_ext8u_i32:
1550 tcg_out_uxt(s, MO_8, a0, a1);
1551 break;
1552 case INDEX_op_ext16u_i64:
1553 case INDEX_op_ext16u_i32:
1554 tcg_out_uxt(s, MO_16, a0, a1);
1555 break;
1556 case INDEX_op_ext32u_i64:
1557 tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
1558 break;
1559
1560 case INDEX_op_deposit_i64:
1561 case INDEX_op_deposit_i32:
1562 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
1563 break;
1564
1565 case INDEX_op_add2_i32:
1566 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
1567 (int32_t)args[4], args[5], const_args[4],
1568 const_args[5], false);
1569 break;
1570 case INDEX_op_add2_i64:
1571 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
1572 args[5], const_args[4], const_args[5], false);
1573 break;
1574 case INDEX_op_sub2_i32:
1575 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
1576 (int32_t)args[4], args[5], const_args[4],
1577 const_args[5], true);
1578 break;
1579 case INDEX_op_sub2_i64:
1580 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
1581 args[5], const_args[4], const_args[5], true);
1582 break;
1583
1584 case INDEX_op_muluh_i64:
1585 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
1586 break;
1587 case INDEX_op_mulsh_i64:
1588 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
1589 break;
1590
1591 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
1592 case INDEX_op_mov_i64:
1593 case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
1594 case INDEX_op_movi_i64:
1595 case INDEX_op_call: /* Always emitted via tcg_out_call. */
1596 default:
1597 tcg_abort();
1598 }
1599
1600 #undef REG0
1601 }
1602
1603 static const TCGTargetOpDef aarch64_op_defs[] = {
1604 { INDEX_op_exit_tb, { } },
1605 { INDEX_op_goto_tb, { } },
1606 { INDEX_op_br, { } },
1607
1608 { INDEX_op_ld8u_i32, { "r", "r" } },
1609 { INDEX_op_ld8s_i32, { "r", "r" } },
1610 { INDEX_op_ld16u_i32, { "r", "r" } },
1611 { INDEX_op_ld16s_i32, { "r", "r" } },
1612 { INDEX_op_ld_i32, { "r", "r" } },
1613 { INDEX_op_ld8u_i64, { "r", "r" } },
1614 { INDEX_op_ld8s_i64, { "r", "r" } },
1615 { INDEX_op_ld16u_i64, { "r", "r" } },
1616 { INDEX_op_ld16s_i64, { "r", "r" } },
1617 { INDEX_op_ld32u_i64, { "r", "r" } },
1618 { INDEX_op_ld32s_i64, { "r", "r" } },
1619 { INDEX_op_ld_i64, { "r", "r" } },
1620
1621 { INDEX_op_st8_i32, { "rZ", "r" } },
1622 { INDEX_op_st16_i32, { "rZ", "r" } },
1623 { INDEX_op_st_i32, { "rZ", "r" } },
1624 { INDEX_op_st8_i64, { "rZ", "r" } },
1625 { INDEX_op_st16_i64, { "rZ", "r" } },
1626 { INDEX_op_st32_i64, { "rZ", "r" } },
1627 { INDEX_op_st_i64, { "rZ", "r" } },
1628
1629 { INDEX_op_add_i32, { "r", "r", "rA" } },
1630 { INDEX_op_add_i64, { "r", "r", "rA" } },
1631 { INDEX_op_sub_i32, { "r", "r", "rA" } },
1632 { INDEX_op_sub_i64, { "r", "r", "rA" } },
1633 { INDEX_op_mul_i32, { "r", "r", "r" } },
1634 { INDEX_op_mul_i64, { "r", "r", "r" } },
1635 { INDEX_op_div_i32, { "r", "r", "r" } },
1636 { INDEX_op_div_i64, { "r", "r", "r" } },
1637 { INDEX_op_divu_i32, { "r", "r", "r" } },
1638 { INDEX_op_divu_i64, { "r", "r", "r" } },
1639 { INDEX_op_rem_i32, { "r", "r", "r" } },
1640 { INDEX_op_rem_i64, { "r", "r", "r" } },
1641 { INDEX_op_remu_i32, { "r", "r", "r" } },
1642 { INDEX_op_remu_i64, { "r", "r", "r" } },
1643 { INDEX_op_and_i32, { "r", "r", "rL" } },
1644 { INDEX_op_and_i64, { "r", "r", "rL" } },
1645 { INDEX_op_or_i32, { "r", "r", "rL" } },
1646 { INDEX_op_or_i64, { "r", "r", "rL" } },
1647 { INDEX_op_xor_i32, { "r", "r", "rL" } },
1648 { INDEX_op_xor_i64, { "r", "r", "rL" } },
1649 { INDEX_op_andc_i32, { "r", "r", "rL" } },
1650 { INDEX_op_andc_i64, { "r", "r", "rL" } },
1651 { INDEX_op_orc_i32, { "r", "r", "rL" } },
1652 { INDEX_op_orc_i64, { "r", "r", "rL" } },
1653 { INDEX_op_eqv_i32, { "r", "r", "rL" } },
1654 { INDEX_op_eqv_i64, { "r", "r", "rL" } },
1655
1656 { INDEX_op_neg_i32, { "r", "r" } },
1657 { INDEX_op_neg_i64, { "r", "r" } },
1658 { INDEX_op_not_i32, { "r", "r" } },
1659 { INDEX_op_not_i64, { "r", "r" } },
1660
1661 { INDEX_op_shl_i32, { "r", "r", "ri" } },
1662 { INDEX_op_shr_i32, { "r", "r", "ri" } },
1663 { INDEX_op_sar_i32, { "r", "r", "ri" } },
1664 { INDEX_op_rotl_i32, { "r", "r", "ri" } },
1665 { INDEX_op_rotr_i32, { "r", "r", "ri" } },
1666 { INDEX_op_shl_i64, { "r", "r", "ri" } },
1667 { INDEX_op_shr_i64, { "r", "r", "ri" } },
1668 { INDEX_op_sar_i64, { "r", "r", "ri" } },
1669 { INDEX_op_rotl_i64, { "r", "r", "ri" } },
1670 { INDEX_op_rotr_i64, { "r", "r", "ri" } },
1671
1672 { INDEX_op_brcond_i32, { "r", "rA" } },
1673 { INDEX_op_brcond_i64, { "r", "rA" } },
1674 { INDEX_op_setcond_i32, { "r", "r", "rA" } },
1675 { INDEX_op_setcond_i64, { "r", "r", "rA" } },
1676 { INDEX_op_movcond_i32, { "r", "r", "rA", "rZ", "rZ" } },
1677 { INDEX_op_movcond_i64, { "r", "r", "rA", "rZ", "rZ" } },
1678
1679 { INDEX_op_qemu_ld_i32, { "r", "l" } },
1680 { INDEX_op_qemu_ld_i64, { "r", "l" } },
1681 { INDEX_op_qemu_st_i32, { "lZ", "l" } },
1682 { INDEX_op_qemu_st_i64, { "lZ", "l" } },
1683
1684 { INDEX_op_bswap16_i32, { "r", "r" } },
1685 { INDEX_op_bswap32_i32, { "r", "r" } },
1686 { INDEX_op_bswap16_i64, { "r", "r" } },
1687 { INDEX_op_bswap32_i64, { "r", "r" } },
1688 { INDEX_op_bswap64_i64, { "r", "r" } },
1689
1690 { INDEX_op_ext8s_i32, { "r", "r" } },
1691 { INDEX_op_ext16s_i32, { "r", "r" } },
1692 { INDEX_op_ext8u_i32, { "r", "r" } },
1693 { INDEX_op_ext16u_i32, { "r", "r" } },
1694
1695 { INDEX_op_ext8s_i64, { "r", "r" } },
1696 { INDEX_op_ext16s_i64, { "r", "r" } },
1697 { INDEX_op_ext32s_i64, { "r", "r" } },
1698 { INDEX_op_ext8u_i64, { "r", "r" } },
1699 { INDEX_op_ext16u_i64, { "r", "r" } },
1700 { INDEX_op_ext32u_i64, { "r", "r" } },
1701
1702 { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
1703 { INDEX_op_deposit_i64, { "r", "0", "rZ" } },
1704
1705 { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1706 { INDEX_op_add2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1707 { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1708 { INDEX_op_sub2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1709
1710 { INDEX_op_muluh_i64, { "r", "r", "r" } },
1711 { INDEX_op_mulsh_i64, { "r", "r", "r" } },
1712
1713 { -1 },
1714 };
1715
1716 static void tcg_target_init(TCGContext *s)
1717 {
1718 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
1719 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);
1720
1721 tcg_regset_set32(tcg_target_call_clobber_regs, 0,
1722 (1 << TCG_REG_X0) | (1 << TCG_REG_X1) |
1723 (1 << TCG_REG_X2) | (1 << TCG_REG_X3) |
1724 (1 << TCG_REG_X4) | (1 << TCG_REG_X5) |
1725 (1 << TCG_REG_X6) | (1 << TCG_REG_X7) |
1726 (1 << TCG_REG_X8) | (1 << TCG_REG_X9) |
1727 (1 << TCG_REG_X10) | (1 << TCG_REG_X11) |
1728 (1 << TCG_REG_X12) | (1 << TCG_REG_X13) |
1729 (1 << TCG_REG_X14) | (1 << TCG_REG_X15) |
1730 (1 << TCG_REG_X16) | (1 << TCG_REG_X17) |
1731 (1 << TCG_REG_X18) | (1 << TCG_REG_X30));
1732
1733 tcg_regset_clear(s->reserved_regs);
1734 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
1735 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
1736 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
1737 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
1738
1739 tcg_add_target_add_op_defs(aarch64_op_defs);
1740 }
1741
1742 /* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
1743 #define PUSH_SIZE ((30 - 19 + 1) * 8)
1744
1745 #define FRAME_SIZE \
1746 ((PUSH_SIZE \
1747 + TCG_STATIC_CALL_ARGS_SIZE \
1748 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
1749 + TCG_TARGET_STACK_ALIGN - 1) \
1750 & ~(TCG_TARGET_STACK_ALIGN - 1))
1751
1752 /* We're expecting a 2 byte uleb128 encoded value. */
1753 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
1754
1755 /* We're expecting to use a single ADDI insn. */
1756 QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
1757
1758 static void tcg_target_qemu_prologue(TCGContext *s)
1759 {
1760 TCGReg r;
1761
1762 /* Push (FP, LR) and allocate space for all saved registers. */
1763 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
1764 TCG_REG_SP, -PUSH_SIZE, 1, 1);
1765
1766 /* Set up frame pointer for canonical unwinding. */
1767 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
1768
1769 /* Store callee-preserved regs x19..x28. */
1770 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1771 int ofs = (r - TCG_REG_X19 + 2) * 8;
1772 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
1773 }
1774
1775 /* Make stack space for TCG locals. */
1776 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
1777 FRAME_SIZE - PUSH_SIZE);
1778
1779 /* Inform TCG about how to find TCG locals with register, offset, size. */
1780 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
1781 CPU_TEMP_BUF_NLONGS * sizeof(long));
1782
1783 #if defined(CONFIG_USE_GUEST_BASE)
1784 if (GUEST_BASE) {
1785 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, GUEST_BASE);
1786 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
1787 }
1788 #endif
1789
1790 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
1791 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
1792
1793 tb_ret_addr = s->code_ptr;
1794
1795 /* Remove TCG locals stack space. */
1796 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
1797 FRAME_SIZE - PUSH_SIZE);
1798
1799 /* Restore registers x19..x28. */
1800 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1801 int ofs = (r - TCG_REG_X19 + 2) * 8;
1802 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
1803 }
1804
1805 /* Pop (FP, LR), restore SP to previous frame. */
1806 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
1807 TCG_REG_SP, PUSH_SIZE, 0, 1);
1808 tcg_out_insn(s, 3207, RET, TCG_REG_LR);
1809 }
1810
1811 typedef struct {
1812 DebugFrameHeader h;
1813 uint8_t fde_def_cfa[4];
1814 uint8_t fde_reg_ofs[24];
1815 } DebugFrame;
1816
1817 #define ELF_HOST_MACHINE EM_AARCH64
1818
1819 static const DebugFrame debug_frame = {
1820 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
1821 .h.cie.id = -1,
1822 .h.cie.version = 1,
1823 .h.cie.code_align = 1,
1824 .h.cie.data_align = 0x78, /* sleb128 -8 */
1825 .h.cie.return_column = TCG_REG_LR,
1826
1827 /* Total FDE size does not include the "len" member. */
1828 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
1829
1830 .fde_def_cfa = {
1831 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */
1832 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
1833 (FRAME_SIZE >> 7)
1834 },
1835 .fde_reg_ofs = {
1836 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */
1837 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */
1838 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */
1839 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */
1840 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */
1841 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */
1842 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */
1843 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */
1844 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */
1845 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */
1846 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */
1847 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */
1848 }
1849 };
1850
1851 void tcg_register_jit(void *buf, size_t buf_size)
1852 {
1853 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
1854 }