]> git.proxmox.com Git - mirror_qemu.git/blob - tcg/aarch64/tcg-target.c
tcg: Add TCGType parameter to tcg_target_const_match
[mirror_qemu.git] / tcg / aarch64 / tcg-target.c
1 /*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13 #include "tcg-be-ldst.h"
14 #include "qemu/bitops.h"
15
16 /* We're going to re-use TCGType in setting of the SF bit, which controls
17 the size of the operation performed. If we know the values match, it
18 makes things much cleaner. */
19 QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
20
21 #ifndef NDEBUG
22 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
23 "%x0", "%x1", "%x2", "%x3", "%x4", "%x5", "%x6", "%x7",
24 "%x8", "%x9", "%x10", "%x11", "%x12", "%x13", "%x14", "%x15",
25 "%x16", "%x17", "%x18", "%x19", "%x20", "%x21", "%x22", "%x23",
26 "%x24", "%x25", "%x26", "%x27", "%x28", "%fp", "%x30", "%sp",
27 };
28 #endif /* NDEBUG */
29
30 static const int tcg_target_reg_alloc_order[] = {
31 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
32 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
33 TCG_REG_X28, /* we will reserve this for GUEST_BASE if configured */
34
35 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
36 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
37 TCG_REG_X16, TCG_REG_X17,
38
39 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
40 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
41
42 /* X18 reserved by system */
43 /* X19 reserved for AREG0 */
44 /* X29 reserved as fp */
45 /* X30 reserved as temporary */
46 };
47
48 static const int tcg_target_call_iarg_regs[8] = {
49 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
50 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
51 };
52 static const int tcg_target_call_oarg_regs[1] = {
53 TCG_REG_X0
54 };
55
56 #define TCG_REG_TMP TCG_REG_X30
57
58 #ifndef CONFIG_SOFTMMU
59 # ifdef CONFIG_USE_GUEST_BASE
60 # define TCG_REG_GUEST_BASE TCG_REG_X28
61 # else
62 # define TCG_REG_GUEST_BASE TCG_REG_XZR
63 # endif
64 #endif
65
66 static inline void reloc_pc26(void *code_ptr, intptr_t target)
67 {
68 intptr_t offset = (target - (intptr_t)code_ptr) / 4;
69 /* read instruction, mask away previous PC_REL26 parameter contents,
70 set the proper offset, then write back the instruction. */
71 uint32_t insn = *(uint32_t *)code_ptr;
72 insn = deposit32(insn, 0, 26, offset);
73 *(uint32_t *)code_ptr = insn;
74 }
75
76 static inline void reloc_pc19(void *code_ptr, intptr_t target)
77 {
78 intptr_t offset = (target - (intptr_t)code_ptr) / 4;
79 /* read instruction, mask away previous PC_REL19 parameter contents,
80 set the proper offset, then write back the instruction. */
81 uint32_t insn = *(uint32_t *)code_ptr;
82 insn = deposit32(insn, 5, 19, offset);
83 *(uint32_t *)code_ptr = insn;
84 }
85
86 static inline void patch_reloc(uint8_t *code_ptr, int type,
87 intptr_t value, intptr_t addend)
88 {
89 value += addend;
90
91 switch (type) {
92 case R_AARCH64_JUMP26:
93 case R_AARCH64_CALL26:
94 reloc_pc26(code_ptr, value);
95 break;
96 case R_AARCH64_CONDBR19:
97 reloc_pc19(code_ptr, value);
98 break;
99
100 default:
101 tcg_abort();
102 }
103 }
104
105 #define TCG_CT_CONST_IS32 0x100
106 #define TCG_CT_CONST_AIMM 0x200
107 #define TCG_CT_CONST_LIMM 0x400
108 #define TCG_CT_CONST_ZERO 0x800
109 #define TCG_CT_CONST_MONE 0x1000
110
111 /* parse target specific constraints */
112 static int target_parse_constraint(TCGArgConstraint *ct,
113 const char **pct_str)
114 {
115 const char *ct_str = *pct_str;
116
117 switch (ct_str[0]) {
118 case 'r':
119 ct->ct |= TCG_CT_REG;
120 tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
121 break;
122 case 'l': /* qemu_ld / qemu_st address, data_reg */
123 ct->ct |= TCG_CT_REG;
124 tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
125 #ifdef CONFIG_SOFTMMU
126 /* x0 and x1 will be overwritten when reading the tlb entry,
127 and x2, and x3 for helper args, better to avoid using them. */
128 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
129 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
130 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
131 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
132 #endif
133 break;
134 case 'w': /* The operand should be considered 32-bit. */
135 ct->ct |= TCG_CT_CONST_IS32;
136 break;
137 case 'A': /* Valid for arithmetic immediate (positive or negative). */
138 ct->ct |= TCG_CT_CONST_AIMM;
139 break;
140 case 'L': /* Valid for logical immediate. */
141 ct->ct |= TCG_CT_CONST_LIMM;
142 break;
143 case 'M': /* minus one */
144 ct->ct |= TCG_CT_CONST_MONE;
145 break;
146 case 'Z': /* zero */
147 ct->ct |= TCG_CT_CONST_ZERO;
148 break;
149 default:
150 return -1;
151 }
152
153 ct_str++;
154 *pct_str = ct_str;
155 return 0;
156 }
157
158 static inline bool is_aimm(uint64_t val)
159 {
160 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
161 }
162
163 static inline bool is_limm(uint64_t val)
164 {
165 /* Taking a simplified view of the logical immediates for now, ignoring
166 the replication that can happen across the field. Match bit patterns
167 of the forms
168 0....01....1
169 0..01..10..0
170 and their inverses. */
171
172 /* Make things easier below, by testing the form with msb clear. */
173 if ((int64_t)val < 0) {
174 val = ~val;
175 }
176 if (val == 0) {
177 return false;
178 }
179 val += val & -val;
180 return (val & (val - 1)) == 0;
181 }
182
183 static int tcg_target_const_match(tcg_target_long val, TCGType type,
184 const TCGArgConstraint *arg_ct)
185 {
186 int ct = arg_ct->ct;
187
188 if (ct & TCG_CT_CONST) {
189 return 1;
190 }
191 if (ct & TCG_CT_CONST_IS32) {
192 val = (int32_t)val;
193 }
194 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
195 return 1;
196 }
197 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
198 return 1;
199 }
200 if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
201 return 1;
202 }
203 if ((ct & TCG_CT_CONST_MONE) && val == -1) {
204 return 1;
205 }
206
207 return 0;
208 }
209
210 enum aarch64_cond_code {
211 COND_EQ = 0x0,
212 COND_NE = 0x1,
213 COND_CS = 0x2, /* Unsigned greater or equal */
214 COND_HS = COND_CS, /* ALIAS greater or equal */
215 COND_CC = 0x3, /* Unsigned less than */
216 COND_LO = COND_CC, /* ALIAS Lower */
217 COND_MI = 0x4, /* Negative */
218 COND_PL = 0x5, /* Zero or greater */
219 COND_VS = 0x6, /* Overflow */
220 COND_VC = 0x7, /* No overflow */
221 COND_HI = 0x8, /* Unsigned greater than */
222 COND_LS = 0x9, /* Unsigned less or equal */
223 COND_GE = 0xa,
224 COND_LT = 0xb,
225 COND_GT = 0xc,
226 COND_LE = 0xd,
227 COND_AL = 0xe,
228 COND_NV = 0xf, /* behaves like COND_AL here */
229 };
230
231 static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
232 [TCG_COND_EQ] = COND_EQ,
233 [TCG_COND_NE] = COND_NE,
234 [TCG_COND_LT] = COND_LT,
235 [TCG_COND_GE] = COND_GE,
236 [TCG_COND_LE] = COND_LE,
237 [TCG_COND_GT] = COND_GT,
238 /* unsigned */
239 [TCG_COND_LTU] = COND_LO,
240 [TCG_COND_GTU] = COND_HI,
241 [TCG_COND_GEU] = COND_HS,
242 [TCG_COND_LEU] = COND_LS,
243 };
244
245 typedef enum {
246 LDST_ST = 0, /* store */
247 LDST_LD = 1, /* load */
248 LDST_LD_S_X = 2, /* load and sign-extend into Xt */
249 LDST_LD_S_W = 3, /* load and sign-extend into Wt */
250 } AArch64LdstType;
251
252 /* We encode the format of the insn into the beginning of the name, so that
253 we can have the preprocessor help "typecheck" the insn vs the output
254 function. Arm didn't provide us with nice names for the formats, so we
255 use the section number of the architecture reference manual in which the
256 instruction group is described. */
257 typedef enum {
258 /* Compare and branch (immediate). */
259 I3201_CBZ = 0x34000000,
260 I3201_CBNZ = 0x35000000,
261
262 /* Conditional branch (immediate). */
263 I3202_B_C = 0x54000000,
264
265 /* Unconditional branch (immediate). */
266 I3206_B = 0x14000000,
267 I3206_BL = 0x94000000,
268
269 /* Unconditional branch (register). */
270 I3207_BR = 0xd61f0000,
271 I3207_BLR = 0xd63f0000,
272 I3207_RET = 0xd65f0000,
273
274 /* Load/store register. Described here as 3.3.12, but the helper
275 that emits them can transform to 3.3.10 or 3.3.13. */
276 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
277 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
278 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
279 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
280
281 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
282 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
283 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
284 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
285
286 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
287 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
288
289 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
290 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
291 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
292
293 I3312_TO_I3310 = 0x00206800,
294 I3312_TO_I3313 = 0x01000000,
295
296 /* Load/store register pair instructions. */
297 I3314_LDP = 0x28400000,
298 I3314_STP = 0x28000000,
299
300 /* Add/subtract immediate instructions. */
301 I3401_ADDI = 0x11000000,
302 I3401_ADDSI = 0x31000000,
303 I3401_SUBI = 0x51000000,
304 I3401_SUBSI = 0x71000000,
305
306 /* Bitfield instructions. */
307 I3402_BFM = 0x33000000,
308 I3402_SBFM = 0x13000000,
309 I3402_UBFM = 0x53000000,
310
311 /* Extract instruction. */
312 I3403_EXTR = 0x13800000,
313
314 /* Logical immediate instructions. */
315 I3404_ANDI = 0x12000000,
316 I3404_ORRI = 0x32000000,
317 I3404_EORI = 0x52000000,
318
319 /* Move wide immediate instructions. */
320 I3405_MOVN = 0x12800000,
321 I3405_MOVZ = 0x52800000,
322 I3405_MOVK = 0x72800000,
323
324 /* PC relative addressing instructions. */
325 I3406_ADR = 0x10000000,
326 I3406_ADRP = 0x90000000,
327
328 /* Add/subtract shifted register instructions (without a shift). */
329 I3502_ADD = 0x0b000000,
330 I3502_ADDS = 0x2b000000,
331 I3502_SUB = 0x4b000000,
332 I3502_SUBS = 0x6b000000,
333
334 /* Add/subtract shifted register instructions (with a shift). */
335 I3502S_ADD_LSL = I3502_ADD,
336
337 /* Add/subtract with carry instructions. */
338 I3503_ADC = 0x1a000000,
339 I3503_SBC = 0x5a000000,
340
341 /* Conditional select instructions. */
342 I3506_CSEL = 0x1a800000,
343 I3506_CSINC = 0x1a800400,
344
345 /* Data-processing (1 source) instructions. */
346 I3507_REV16 = 0x5ac00400,
347 I3507_REV32 = 0x5ac00800,
348 I3507_REV64 = 0x5ac00c00,
349
350 /* Data-processing (2 source) instructions. */
351 I3508_LSLV = 0x1ac02000,
352 I3508_LSRV = 0x1ac02400,
353 I3508_ASRV = 0x1ac02800,
354 I3508_RORV = 0x1ac02c00,
355 I3508_SMULH = 0x9b407c00,
356 I3508_UMULH = 0x9bc07c00,
357 I3508_UDIV = 0x1ac00800,
358 I3508_SDIV = 0x1ac00c00,
359
360 /* Data-processing (3 source) instructions. */
361 I3509_MADD = 0x1b000000,
362 I3509_MSUB = 0x1b008000,
363
364 /* Logical shifted register instructions (without a shift). */
365 I3510_AND = 0x0a000000,
366 I3510_BIC = 0x0a200000,
367 I3510_ORR = 0x2a000000,
368 I3510_ORN = 0x2a200000,
369 I3510_EOR = 0x4a000000,
370 I3510_EON = 0x4a200000,
371 I3510_ANDS = 0x6a000000,
372 } AArch64Insn;
373
374 static inline uint32_t tcg_in32(TCGContext *s)
375 {
376 uint32_t v = *(uint32_t *)s->code_ptr;
377 return v;
378 }
379
380 /* Emit an opcode with "type-checking" of the format. */
381 #define tcg_out_insn(S, FMT, OP, ...) \
382 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
383
384 static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
385 TCGReg rt, int imm19)
386 {
387 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
388 }
389
390 static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
391 TCGCond c, int imm19)
392 {
393 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
394 }
395
396 static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
397 {
398 tcg_out32(s, insn | (imm26 & 0x03ffffff));
399 }
400
401 static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
402 {
403 tcg_out32(s, insn | rn << 5);
404 }
405
406 static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
407 TCGReg r1, TCGReg r2, TCGReg rn,
408 tcg_target_long ofs, bool pre, bool w)
409 {
410 insn |= 1u << 31; /* ext */
411 insn |= pre << 24;
412 insn |= w << 23;
413
414 assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
415 insn |= (ofs & (0x7f << 3)) << (15 - 3);
416
417 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
418 }
419
420 static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
421 TCGReg rd, TCGReg rn, uint64_t aimm)
422 {
423 if (aimm > 0xfff) {
424 assert((aimm & 0xfff) == 0);
425 aimm >>= 12;
426 assert(aimm <= 0xfff);
427 aimm |= 1 << 12; /* apply LSL 12 */
428 }
429 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
430 }
431
432 /* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
433 (Logical immediate). Both insn groups have N, IMMR and IMMS fields
434 that feed the DecodeBitMasks pseudo function. */
435 static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
436 TCGReg rd, TCGReg rn, int n, int immr, int imms)
437 {
438 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
439 | rn << 5 | rd);
440 }
441
442 #define tcg_out_insn_3404 tcg_out_insn_3402
443
444 static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
445 TCGReg rd, TCGReg rn, TCGReg rm, int imms)
446 {
447 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
448 | rn << 5 | rd);
449 }
450
451 /* This function is used for the Move (wide immediate) instruction group.
452 Note that SHIFT is a full shift count, not the 2 bit HW field. */
453 static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
454 TCGReg rd, uint16_t half, unsigned shift)
455 {
456 assert((shift & ~0x30) == 0);
457 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
458 }
459
460 static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
461 TCGReg rd, int64_t disp)
462 {
463 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
464 }
465
466 /* This function is for both 3.5.2 (Add/Subtract shifted register), for
467 the rare occasion when we actually want to supply a shift amount. */
468 static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
469 TCGType ext, TCGReg rd, TCGReg rn,
470 TCGReg rm, int imm6)
471 {
472 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
473 }
474
475 /* This function is for 3.5.2 (Add/subtract shifted register),
476 and 3.5.10 (Logical shifted register), for the vast majorty of cases
477 when we don't want to apply a shift. Thus it can also be used for
478 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */
479 static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
480 TCGReg rd, TCGReg rn, TCGReg rm)
481 {
482 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
483 }
484
485 #define tcg_out_insn_3503 tcg_out_insn_3502
486 #define tcg_out_insn_3508 tcg_out_insn_3502
487 #define tcg_out_insn_3510 tcg_out_insn_3502
488
489 static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
490 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
491 {
492 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
493 | tcg_cond_to_aarch64[c] << 12);
494 }
495
496 static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
497 TCGReg rd, TCGReg rn)
498 {
499 tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
500 }
501
502 static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
503 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
504 {
505 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
506 }
507
508 static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
509 TCGReg rd, TCGReg base, TCGReg regoff)
510 {
511 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
512 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 | base << 5 | rd);
513 }
514
515
516 static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
517 TCGReg rd, TCGReg rn, intptr_t offset)
518 {
519 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | rd);
520 }
521
522 static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
523 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
524 {
525 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
526 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 | rn << 5 | rd);
527 }
528
529 /* Register to register move using ORR (shifted register with no shift). */
530 static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
531 {
532 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
533 }
534
535 /* Register to register move using ADDI (move to/from SP). */
536 static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
537 {
538 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
539 }
540
541 /* This function is used for the Logical (immediate) instruction group.
542 The value of LIMM must satisfy IS_LIMM. See the comment above about
543 only supporting simplified logical immediates. */
544 static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
545 TCGReg rd, TCGReg rn, uint64_t limm)
546 {
547 unsigned h, l, r, c;
548
549 assert(is_limm(limm));
550
551 h = clz64(limm);
552 l = ctz64(limm);
553 if (l == 0) {
554 r = 0; /* form 0....01....1 */
555 c = ctz64(~limm) - 1;
556 if (h == 0) {
557 r = clz64(~limm); /* form 1..10..01..1 */
558 c += r;
559 }
560 } else {
561 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */
562 c = r - h - 1;
563 }
564 if (ext == TCG_TYPE_I32) {
565 r &= 31;
566 c &= 31;
567 }
568
569 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
570 }
571
572 static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
573 tcg_target_long value)
574 {
575 AArch64Insn insn;
576 int i, wantinv, shift;
577 tcg_target_long svalue = value;
578 tcg_target_long ivalue = ~value;
579 tcg_target_long imask;
580
581 /* For 32-bit values, discard potential garbage in value. For 64-bit
582 values within [2**31, 2**32-1], we can create smaller sequences by
583 interpreting this as a negative 32-bit number, while ensuring that
584 the high 32 bits are cleared by setting SF=0. */
585 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
586 svalue = (int32_t)value;
587 value = (uint32_t)value;
588 ivalue = (uint32_t)ivalue;
589 type = TCG_TYPE_I32;
590 }
591
592 /* Speed things up by handling the common case of small positive
593 and negative values specially. */
594 if ((value & ~0xffffull) == 0) {
595 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
596 return;
597 } else if ((ivalue & ~0xffffull) == 0) {
598 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
599 return;
600 }
601
602 /* Check for bitfield immediates. For the benefit of 32-bit quantities,
603 use the sign-extended value. That lets us match rotated values such
604 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
605 if (is_limm(svalue)) {
606 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
607 return;
608 }
609
610 /* Look for host pointer values within 4G of the PC. This happens
611 often when loading pointers to QEMU's own data structures. */
612 if (type == TCG_TYPE_I64) {
613 tcg_target_long disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
614 if (disp == sextract64(disp, 0, 21)) {
615 tcg_out_insn(s, 3406, ADRP, rd, disp);
616 if (value & 0xfff) {
617 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
618 }
619 return;
620 }
621 }
622
623 /* Would it take fewer insns to begin with MOVN? For the value and its
624 inverse, count the number of 16-bit lanes that are 0. */
625 for (i = wantinv = imask = 0; i < 64; i += 16) {
626 tcg_target_long mask = 0xffffull << i;
627 if ((value & mask) == 0) {
628 wantinv -= 1;
629 }
630 if ((ivalue & mask) == 0) {
631 wantinv += 1;
632 imask |= mask;
633 }
634 }
635
636 /* If we had more 0xffff than 0x0000, invert VALUE and use MOVN. */
637 insn = I3405_MOVZ;
638 if (wantinv > 0) {
639 value = ivalue;
640 insn = I3405_MOVN;
641 }
642
643 /* Find the lowest lane that is not 0x0000. */
644 shift = ctz64(value) & (63 & -16);
645 tcg_out_insn_3405(s, insn, type, rd, value >> shift, shift);
646
647 if (wantinv > 0) {
648 /* Re-invert the value, so MOVK sees non-inverted bits. */
649 value = ~value;
650 /* Clear out all the 0xffff lanes. */
651 value ^= imask;
652 }
653 /* Clear out the lane that we just set. */
654 value &= ~(0xffffUL << shift);
655
656 /* Iterate until all lanes have been set, and thus cleared from VALUE. */
657 while (value) {
658 shift = ctz64(value) & (63 & -16);
659 tcg_out_insn(s, 3405, MOVK, type, rd, value >> shift, shift);
660 value &= ~(0xffffUL << shift);
661 }
662 }
663
664 /* Define something more legible for general use. */
665 #define tcg_out_ldst_r tcg_out_insn_3310
666
667 static void tcg_out_ldst(TCGContext *s, AArch64Insn insn,
668 TCGReg rd, TCGReg rn, intptr_t offset)
669 {
670 TCGMemOp size = (uint32_t)insn >> 30;
671
672 /* If the offset is naturally aligned and in range, then we can
673 use the scaled uimm12 encoding */
674 if (offset >= 0 && !(offset & ((1 << size) - 1))) {
675 uintptr_t scaled_uimm = offset >> size;
676 if (scaled_uimm <= 0xfff) {
677 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
678 return;
679 }
680 }
681
682 /* Small signed offsets can use the unscaled encoding. */
683 if (offset >= -256 && offset < 256) {
684 tcg_out_insn_3312(s, insn, rd, rn, offset);
685 return;
686 }
687
688 /* Worst-case scenario, move offset to temp register, use reg offset. */
689 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
690 tcg_out_ldst_r(s, insn, rd, rn, TCG_REG_TMP);
691 }
692
693 static inline void tcg_out_mov(TCGContext *s,
694 TCGType type, TCGReg ret, TCGReg arg)
695 {
696 if (ret != arg) {
697 tcg_out_movr(s, type, ret, arg);
698 }
699 }
700
701 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
702 TCGReg arg1, intptr_t arg2)
703 {
704 tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_LDRW : I3312_LDRX,
705 arg, arg1, arg2);
706 }
707
708 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
709 TCGReg arg1, intptr_t arg2)
710 {
711 tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_STRW : I3312_STRX,
712 arg, arg1, arg2);
713 }
714
715 static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
716 TCGReg rn, unsigned int a, unsigned int b)
717 {
718 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
719 }
720
721 static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
722 TCGReg rn, unsigned int a, unsigned int b)
723 {
724 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
725 }
726
727 static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
728 TCGReg rn, unsigned int a, unsigned int b)
729 {
730 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
731 }
732
733 static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
734 TCGReg rn, TCGReg rm, unsigned int a)
735 {
736 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
737 }
738
739 static inline void tcg_out_shl(TCGContext *s, TCGType ext,
740 TCGReg rd, TCGReg rn, unsigned int m)
741 {
742 int bits = ext ? 64 : 32;
743 int max = bits - 1;
744 tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
745 }
746
747 static inline void tcg_out_shr(TCGContext *s, TCGType ext,
748 TCGReg rd, TCGReg rn, unsigned int m)
749 {
750 int max = ext ? 63 : 31;
751 tcg_out_ubfm(s, ext, rd, rn, m & max, max);
752 }
753
754 static inline void tcg_out_sar(TCGContext *s, TCGType ext,
755 TCGReg rd, TCGReg rn, unsigned int m)
756 {
757 int max = ext ? 63 : 31;
758 tcg_out_sbfm(s, ext, rd, rn, m & max, max);
759 }
760
761 static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
762 TCGReg rd, TCGReg rn, unsigned int m)
763 {
764 int max = ext ? 63 : 31;
765 tcg_out_extr(s, ext, rd, rn, rn, m & max);
766 }
767
768 static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
769 TCGReg rd, TCGReg rn, unsigned int m)
770 {
771 int bits = ext ? 64 : 32;
772 int max = bits - 1;
773 tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
774 }
775
776 static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
777 TCGReg rn, unsigned lsb, unsigned width)
778 {
779 unsigned size = ext ? 64 : 32;
780 unsigned a = (size - lsb) & (size - 1);
781 unsigned b = width - 1;
782 tcg_out_bfm(s, ext, rd, rn, a, b);
783 }
784
785 static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
786 tcg_target_long b, bool const_b)
787 {
788 if (const_b) {
789 /* Using CMP or CMN aliases. */
790 if (b >= 0) {
791 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
792 } else {
793 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
794 }
795 } else {
796 /* Using CMP alias SUBS wzr, Wn, Wm */
797 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
798 }
799 }
800
801 static inline void tcg_out_goto(TCGContext *s, intptr_t target)
802 {
803 intptr_t offset = (target - (intptr_t)s->code_ptr) / 4;
804
805 if (offset < -0x02000000 || offset >= 0x02000000) {
806 /* out of 26bit range */
807 tcg_abort();
808 }
809
810 tcg_out_insn(s, 3206, B, offset);
811 }
812
813 static inline void tcg_out_goto_noaddr(TCGContext *s)
814 {
815 /* We pay attention here to not modify the branch target by reading from
816 the buffer. This ensure that caches and memory are kept coherent during
817 retranslation. Mask away possible garbage in the high bits for the
818 first translation, while keeping the offset bits for retranslation. */
819 uint32_t old = tcg_in32(s);
820 tcg_out_insn(s, 3206, B, old);
821 }
822
823 static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c)
824 {
825 /* See comments in tcg_out_goto_noaddr. */
826 uint32_t old = tcg_in32(s) >> 5;
827 tcg_out_insn(s, 3202, B_C, c, old);
828 }
829
830 static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
831 {
832 tcg_out_insn(s, 3207, BLR, reg);
833 }
834
835 static inline void tcg_out_call(TCGContext *s, intptr_t target)
836 {
837 intptr_t offset = (target - (intptr_t)s->code_ptr) / 4;
838
839 if (offset < -0x02000000 || offset >= 0x02000000) { /* out of 26bit rng */
840 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, target);
841 tcg_out_callr(s, TCG_REG_TMP);
842 } else {
843 tcg_out_insn(s, 3206, BL, offset);
844 }
845 }
846
847 void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
848 {
849 intptr_t target = addr;
850 intptr_t offset = (target - (intptr_t)jmp_addr) / 4;
851
852 if (offset < -0x02000000 || offset >= 0x02000000) {
853 /* out of 26bit range */
854 tcg_abort();
855 }
856
857 patch_reloc((uint8_t *)jmp_addr, R_AARCH64_JUMP26, target, 0);
858 flush_icache_range(jmp_addr, jmp_addr + 4);
859 }
860
861 static inline void tcg_out_goto_label(TCGContext *s, int label_index)
862 {
863 TCGLabel *l = &s->labels[label_index];
864
865 if (!l->has_value) {
866 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, label_index, 0);
867 tcg_out_goto_noaddr(s);
868 } else {
869 tcg_out_goto(s, l->u.value);
870 }
871 }
872
873 static void tcg_out_brcond(TCGContext *s, TCGMemOp ext, TCGCond c, TCGArg a,
874 TCGArg b, bool b_const, int label)
875 {
876 TCGLabel *l = &s->labels[label];
877 intptr_t offset;
878 bool need_cmp;
879
880 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
881 need_cmp = false;
882 } else {
883 need_cmp = true;
884 tcg_out_cmp(s, ext, a, b, b_const);
885 }
886
887 if (!l->has_value) {
888 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, label, 0);
889 offset = tcg_in32(s) >> 5;
890 } else {
891 offset = l->u.value - (uintptr_t)s->code_ptr;
892 offset >>= 2;
893 assert(offset >= -0x40000 && offset < 0x40000);
894 }
895
896 if (need_cmp) {
897 tcg_out_insn(s, 3202, B_C, c, offset);
898 } else if (c == TCG_COND_EQ) {
899 tcg_out_insn(s, 3201, CBZ, ext, a, offset);
900 } else {
901 tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
902 }
903 }
904
905 static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
906 {
907 tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
908 }
909
910 static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
911 {
912 tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
913 }
914
915 static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
916 {
917 tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
918 }
919
920 static inline void tcg_out_sxt(TCGContext *s, TCGType ext, TCGMemOp s_bits,
921 TCGReg rd, TCGReg rn)
922 {
923 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
924 int bits = (8 << s_bits) - 1;
925 tcg_out_sbfm(s, ext, rd, rn, 0, bits);
926 }
927
928 static inline void tcg_out_uxt(TCGContext *s, TCGMemOp s_bits,
929 TCGReg rd, TCGReg rn)
930 {
931 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
932 int bits = (8 << s_bits) - 1;
933 tcg_out_ubfm(s, 0, rd, rn, 0, bits);
934 }
935
936 static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
937 TCGReg rn, int64_t aimm)
938 {
939 if (aimm >= 0) {
940 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
941 } else {
942 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
943 }
944 }
945
946 static inline void tcg_out_addsub2(TCGContext *s, int ext, TCGReg rl,
947 TCGReg rh, TCGReg al, TCGReg ah,
948 tcg_target_long bl, tcg_target_long bh,
949 bool const_bl, bool const_bh, bool sub)
950 {
951 TCGReg orig_rl = rl;
952 AArch64Insn insn;
953
954 if (rl == ah || (!const_bh && rl == bh)) {
955 rl = TCG_REG_TMP;
956 }
957
958 if (const_bl) {
959 insn = I3401_ADDSI;
960 if ((bl < 0) ^ sub) {
961 insn = I3401_SUBSI;
962 bl = -bl;
963 }
964 tcg_out_insn_3401(s, insn, ext, rl, al, bl);
965 } else {
966 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
967 }
968
969 insn = I3503_ADC;
970 if (const_bh) {
971 /* Note that the only two constants we support are 0 and -1, and
972 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
973 if ((bh != 0) ^ sub) {
974 insn = I3503_SBC;
975 }
976 bh = TCG_REG_XZR;
977 } else if (sub) {
978 insn = I3503_SBC;
979 }
980 tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
981
982 tcg_out_mov(s, ext, orig_rl, rl);
983 }
984
985 #ifdef CONFIG_SOFTMMU
986 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
987 * int mmu_idx, uintptr_t ra)
988 */
989 static const void * const qemu_ld_helpers[16] = {
990 [MO_UB] = helper_ret_ldub_mmu,
991 [MO_LEUW] = helper_le_lduw_mmu,
992 [MO_LEUL] = helper_le_ldul_mmu,
993 [MO_LEQ] = helper_le_ldq_mmu,
994 [MO_BEUW] = helper_be_lduw_mmu,
995 [MO_BEUL] = helper_be_ldul_mmu,
996 [MO_BEQ] = helper_be_ldq_mmu,
997 };
998
999 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1000 * uintxx_t val, int mmu_idx, uintptr_t ra)
1001 */
1002 static const void * const qemu_st_helpers[16] = {
1003 [MO_UB] = helper_ret_stb_mmu,
1004 [MO_LEUW] = helper_le_stw_mmu,
1005 [MO_LEUL] = helper_le_stl_mmu,
1006 [MO_LEQ] = helper_le_stq_mmu,
1007 [MO_BEUW] = helper_be_stw_mmu,
1008 [MO_BEUL] = helper_be_stl_mmu,
1009 [MO_BEQ] = helper_be_stq_mmu,
1010 };
1011
1012 static inline void tcg_out_adr(TCGContext *s, TCGReg rd, uintptr_t addr)
1013 {
1014 addr -= (uintptr_t)s->code_ptr;
1015 assert(addr == sextract64(addr, 0, 21));
1016 tcg_out_insn(s, 3406, ADR, rd, addr);
1017 }
1018
1019 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1020 {
1021 TCGMemOp opc = lb->opc;
1022 TCGMemOp size = opc & MO_SIZE;
1023
1024 reloc_pc19(lb->label_ptr[0], (intptr_t)s->code_ptr);
1025
1026 tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_X0, TCG_AREG0);
1027 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1028 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, lb->mem_index);
1029 tcg_out_adr(s, TCG_REG_X3, (intptr_t)lb->raddr);
1030 tcg_out_call(s, (intptr_t)qemu_ld_helpers[opc & ~MO_SIGN]);
1031 if (opc & MO_SIGN) {
1032 tcg_out_sxt(s, TCG_TYPE_I64, size, lb->datalo_reg, TCG_REG_X0);
1033 } else {
1034 tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1035 }
1036
1037 tcg_out_goto(s, (intptr_t)lb->raddr);
1038 }
1039
1040 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1041 {
1042 TCGMemOp opc = lb->opc;
1043 TCGMemOp size = opc & MO_SIZE;
1044
1045 reloc_pc19(lb->label_ptr[0], (intptr_t)s->code_ptr);
1046
1047 tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_X0, TCG_AREG0);
1048 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1049 tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1050 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, lb->mem_index);
1051 tcg_out_adr(s, TCG_REG_X4, (intptr_t)lb->raddr);
1052 tcg_out_call(s, (intptr_t)qemu_st_helpers[opc]);
1053 tcg_out_goto(s, (intptr_t)lb->raddr);
1054 }
1055
1056 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOp opc,
1057 TCGReg data_reg, TCGReg addr_reg,
1058 int mem_index,
1059 uint8_t *raddr, uint8_t *label_ptr)
1060 {
1061 TCGLabelQemuLdst *label = new_ldst_label(s);
1062
1063 label->is_ld = is_ld;
1064 label->opc = opc;
1065 label->datalo_reg = data_reg;
1066 label->addrlo_reg = addr_reg;
1067 label->mem_index = mem_index;
1068 label->raddr = raddr;
1069 label->label_ptr[0] = label_ptr;
1070 }
1071
1072 /* Load and compare a TLB entry, emitting the conditional jump to the
1073 slow path for the failure case, which will be patched later when finalizing
1074 the slow path. Generated code returns the host addend in X1,
1075 clobbers X0,X2,X3,TMP. */
1076 static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp s_bits,
1077 uint8_t **label_ptr, int mem_index, bool is_read)
1078 {
1079 TCGReg base = TCG_AREG0;
1080 int tlb_offset = is_read ?
1081 offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
1082 : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
1083
1084 /* Extract the TLB index from the address into X0.
1085 X0<CPU_TLB_BITS:0> =
1086 addr_reg<TARGET_PAGE_BITS+CPU_TLB_BITS:TARGET_PAGE_BITS> */
1087 tcg_out_ubfm(s, TARGET_LONG_BITS == 64, TCG_REG_X0, addr_reg,
1088 TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS);
1089
1090 /* Store the page mask part of the address and the low s_bits into X3.
1091 Later this allows checking for equality and alignment at the same time.
1092 X3 = addr_reg & (PAGE_MASK | ((1 << s_bits) - 1)) */
1093 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64, TCG_REG_X3,
1094 addr_reg, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
1095
1096 /* Add any "high bits" from the tlb offset to the env address into X2,
1097 to take advantage of the LSL12 form of the ADDI instruction.
1098 X2 = env + (tlb_offset & 0xfff000) */
1099 if (tlb_offset & 0xfff000) {
1100 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_X2, base,
1101 tlb_offset & 0xfff000);
1102 base = TCG_REG_X2;
1103 }
1104
1105 /* Merge the tlb index contribution into X2.
1106 X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
1107 tcg_out_insn(s, 3502S, ADD_LSL, TCG_TYPE_I64, TCG_REG_X2, base,
1108 TCG_REG_X0, CPU_TLB_ENTRY_BITS);
1109
1110 /* Merge "low bits" from tlb offset, load the tlb comparator into X0.
1111 X0 = load [X2 + (tlb_offset & 0x000fff)] */
1112 tcg_out_ldst(s, TARGET_LONG_BITS == 32 ? I3312_LDRW : I3312_LDRX,
1113 TCG_REG_X0, TCG_REG_X2, tlb_offset & 0xfff);
1114
1115 /* Load the tlb addend. Do that early to avoid stalling.
1116 X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */
1117 tcg_out_ldst(s, I3312_LDRX, TCG_REG_X1, TCG_REG_X2,
1118 (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) -
1119 (is_read ? offsetof(CPUTLBEntry, addr_read)
1120 : offsetof(CPUTLBEntry, addr_write)));
1121
1122 /* Perform the address comparison. */
1123 tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0);
1124
1125 /* If not equal, we jump to the slow path. */
1126 *label_ptr = s->code_ptr;
1127 tcg_out_goto_cond_noaddr(s, TCG_COND_NE);
1128 }
1129
1130 #endif /* CONFIG_SOFTMMU */
1131
1132 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp memop,
1133 TCGReg data_r, TCGReg addr_r, TCGReg off_r)
1134 {
1135 const TCGMemOp bswap = memop & MO_BSWAP;
1136
1137 switch (memop & MO_SSIZE) {
1138 case MO_UB:
1139 tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, off_r);
1140 break;
1141 case MO_SB:
1142 tcg_out_ldst_r(s, I3312_LDRSBX, data_r, addr_r, off_r);
1143 break;
1144 case MO_UW:
1145 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, off_r);
1146 if (bswap) {
1147 tcg_out_rev16(s, data_r, data_r);
1148 }
1149 break;
1150 case MO_SW:
1151 if (bswap) {
1152 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, off_r);
1153 tcg_out_rev16(s, data_r, data_r);
1154 tcg_out_sxt(s, TCG_TYPE_I64, MO_16, data_r, data_r);
1155 } else {
1156 tcg_out_ldst_r(s, I3312_LDRSHX, data_r, addr_r, off_r);
1157 }
1158 break;
1159 case MO_UL:
1160 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, off_r);
1161 if (bswap) {
1162 tcg_out_rev32(s, data_r, data_r);
1163 }
1164 break;
1165 case MO_SL:
1166 if (bswap) {
1167 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, off_r);
1168 tcg_out_rev32(s, data_r, data_r);
1169 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1170 } else {
1171 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, off_r);
1172 }
1173 break;
1174 case MO_Q:
1175 tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, off_r);
1176 if (bswap) {
1177 tcg_out_rev64(s, data_r, data_r);
1178 }
1179 break;
1180 default:
1181 tcg_abort();
1182 }
1183 }
1184
1185 static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp memop,
1186 TCGReg data_r, TCGReg addr_r, TCGReg off_r)
1187 {
1188 const TCGMemOp bswap = memop & MO_BSWAP;
1189
1190 switch (memop & MO_SIZE) {
1191 case MO_8:
1192 tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, off_r);
1193 break;
1194 case MO_16:
1195 if (bswap && data_r != TCG_REG_XZR) {
1196 tcg_out_rev16(s, TCG_REG_TMP, data_r);
1197 data_r = TCG_REG_TMP;
1198 }
1199 tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, off_r);
1200 break;
1201 case MO_32:
1202 if (bswap && data_r != TCG_REG_XZR) {
1203 tcg_out_rev32(s, TCG_REG_TMP, data_r);
1204 data_r = TCG_REG_TMP;
1205 }
1206 tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, off_r);
1207 break;
1208 case MO_64:
1209 if (bswap && data_r != TCG_REG_XZR) {
1210 tcg_out_rev64(s, TCG_REG_TMP, data_r);
1211 data_r = TCG_REG_TMP;
1212 }
1213 tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, off_r);
1214 break;
1215 default:
1216 tcg_abort();
1217 }
1218 }
1219
1220 static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1221 TCGMemOp memop, int mem_index)
1222 {
1223 #ifdef CONFIG_SOFTMMU
1224 TCGMemOp s_bits = memop & MO_SIZE;
1225 uint8_t *label_ptr;
1226
1227 tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 1);
1228 tcg_out_qemu_ld_direct(s, memop, data_reg, addr_reg, TCG_REG_X1);
1229 add_qemu_ldst_label(s, true, memop, data_reg, addr_reg,
1230 mem_index, s->code_ptr, label_ptr);
1231 #else /* !CONFIG_SOFTMMU */
1232 tcg_out_qemu_ld_direct(s, memop, data_reg, addr_reg,
1233 GUEST_BASE ? TCG_REG_GUEST_BASE : TCG_REG_XZR);
1234 #endif /* CONFIG_SOFTMMU */
1235 }
1236
1237 static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1238 TCGMemOp memop, int mem_index)
1239 {
1240 #ifdef CONFIG_SOFTMMU
1241 TCGMemOp s_bits = memop & MO_SIZE;
1242 uint8_t *label_ptr;
1243
1244 tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 0);
1245 tcg_out_qemu_st_direct(s, memop, data_reg, addr_reg, TCG_REG_X1);
1246 add_qemu_ldst_label(s, false, memop, data_reg, addr_reg,
1247 mem_index, s->code_ptr, label_ptr);
1248 #else /* !CONFIG_SOFTMMU */
1249 tcg_out_qemu_st_direct(s, memop, data_reg, addr_reg,
1250 GUEST_BASE ? TCG_REG_GUEST_BASE : TCG_REG_XZR);
1251 #endif /* CONFIG_SOFTMMU */
1252 }
1253
1254 static uint8_t *tb_ret_addr;
1255
1256 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1257 const TCGArg args[TCG_MAX_OP_ARGS],
1258 const int const_args[TCG_MAX_OP_ARGS])
1259 {
1260 /* 99% of the time, we can signal the use of extension registers
1261 by looking to see if the opcode handles 64-bit data. */
1262 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1263
1264 /* Hoist the loads of the most common arguments. */
1265 TCGArg a0 = args[0];
1266 TCGArg a1 = args[1];
1267 TCGArg a2 = args[2];
1268 int c2 = const_args[2];
1269
1270 /* Some operands are defined with "rZ" constraint, a register or
1271 the zero register. These need not actually test args[I] == 0. */
1272 #define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1273
1274 switch (opc) {
1275 case INDEX_op_exit_tb:
1276 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1277 tcg_out_goto(s, (intptr_t)tb_ret_addr);
1278 break;
1279
1280 case INDEX_op_goto_tb:
1281 #ifndef USE_DIRECT_JUMP
1282 #error "USE_DIRECT_JUMP required for aarch64"
1283 #endif
1284 assert(s->tb_jmp_offset != NULL); /* consistency for USE_DIRECT_JUMP */
1285 s->tb_jmp_offset[a0] = s->code_ptr - s->code_buf;
1286 /* actual branch destination will be patched by
1287 aarch64_tb_set_jmp_target later, beware retranslation. */
1288 tcg_out_goto_noaddr(s);
1289 s->tb_next_offset[a0] = s->code_ptr - s->code_buf;
1290 break;
1291
1292 case INDEX_op_call:
1293 if (const_args[0]) {
1294 tcg_out_call(s, a0);
1295 } else {
1296 tcg_out_callr(s, a0);
1297 }
1298 break;
1299
1300 case INDEX_op_br:
1301 tcg_out_goto_label(s, a0);
1302 break;
1303
1304 case INDEX_op_ld8u_i32:
1305 case INDEX_op_ld8u_i64:
1306 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2);
1307 break;
1308 case INDEX_op_ld8s_i32:
1309 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2);
1310 break;
1311 case INDEX_op_ld8s_i64:
1312 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2);
1313 break;
1314 case INDEX_op_ld16u_i32:
1315 case INDEX_op_ld16u_i64:
1316 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2);
1317 break;
1318 case INDEX_op_ld16s_i32:
1319 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2);
1320 break;
1321 case INDEX_op_ld16s_i64:
1322 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2);
1323 break;
1324 case INDEX_op_ld_i32:
1325 case INDEX_op_ld32u_i64:
1326 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2);
1327 break;
1328 case INDEX_op_ld32s_i64:
1329 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2);
1330 break;
1331 case INDEX_op_ld_i64:
1332 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2);
1333 break;
1334
1335 case INDEX_op_st8_i32:
1336 case INDEX_op_st8_i64:
1337 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2);
1338 break;
1339 case INDEX_op_st16_i32:
1340 case INDEX_op_st16_i64:
1341 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2);
1342 break;
1343 case INDEX_op_st_i32:
1344 case INDEX_op_st32_i64:
1345 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2);
1346 break;
1347 case INDEX_op_st_i64:
1348 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2);
1349 break;
1350
1351 case INDEX_op_add_i32:
1352 a2 = (int32_t)a2;
1353 /* FALLTHRU */
1354 case INDEX_op_add_i64:
1355 if (c2) {
1356 tcg_out_addsubi(s, ext, a0, a1, a2);
1357 } else {
1358 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1359 }
1360 break;
1361
1362 case INDEX_op_sub_i32:
1363 a2 = (int32_t)a2;
1364 /* FALLTHRU */
1365 case INDEX_op_sub_i64:
1366 if (c2) {
1367 tcg_out_addsubi(s, ext, a0, a1, -a2);
1368 } else {
1369 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1370 }
1371 break;
1372
1373 case INDEX_op_neg_i64:
1374 case INDEX_op_neg_i32:
1375 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1376 break;
1377
1378 case INDEX_op_and_i32:
1379 a2 = (int32_t)a2;
1380 /* FALLTHRU */
1381 case INDEX_op_and_i64:
1382 if (c2) {
1383 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1384 } else {
1385 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1386 }
1387 break;
1388
1389 case INDEX_op_andc_i32:
1390 a2 = (int32_t)a2;
1391 /* FALLTHRU */
1392 case INDEX_op_andc_i64:
1393 if (c2) {
1394 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
1395 } else {
1396 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
1397 }
1398 break;
1399
1400 case INDEX_op_or_i32:
1401 a2 = (int32_t)a2;
1402 /* FALLTHRU */
1403 case INDEX_op_or_i64:
1404 if (c2) {
1405 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
1406 } else {
1407 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
1408 }
1409 break;
1410
1411 case INDEX_op_orc_i32:
1412 a2 = (int32_t)a2;
1413 /* FALLTHRU */
1414 case INDEX_op_orc_i64:
1415 if (c2) {
1416 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
1417 } else {
1418 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
1419 }
1420 break;
1421
1422 case INDEX_op_xor_i32:
1423 a2 = (int32_t)a2;
1424 /* FALLTHRU */
1425 case INDEX_op_xor_i64:
1426 if (c2) {
1427 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
1428 } else {
1429 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
1430 }
1431 break;
1432
1433 case INDEX_op_eqv_i32:
1434 a2 = (int32_t)a2;
1435 /* FALLTHRU */
1436 case INDEX_op_eqv_i64:
1437 if (c2) {
1438 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
1439 } else {
1440 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
1441 }
1442 break;
1443
1444 case INDEX_op_not_i64:
1445 case INDEX_op_not_i32:
1446 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
1447 break;
1448
1449 case INDEX_op_mul_i64:
1450 case INDEX_op_mul_i32:
1451 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
1452 break;
1453
1454 case INDEX_op_div_i64:
1455 case INDEX_op_div_i32:
1456 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
1457 break;
1458 case INDEX_op_divu_i64:
1459 case INDEX_op_divu_i32:
1460 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
1461 break;
1462
1463 case INDEX_op_rem_i64:
1464 case INDEX_op_rem_i32:
1465 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
1466 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1467 break;
1468 case INDEX_op_remu_i64:
1469 case INDEX_op_remu_i32:
1470 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
1471 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1472 break;
1473
1474 case INDEX_op_shl_i64:
1475 case INDEX_op_shl_i32:
1476 if (c2) {
1477 tcg_out_shl(s, ext, a0, a1, a2);
1478 } else {
1479 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
1480 }
1481 break;
1482
1483 case INDEX_op_shr_i64:
1484 case INDEX_op_shr_i32:
1485 if (c2) {
1486 tcg_out_shr(s, ext, a0, a1, a2);
1487 } else {
1488 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
1489 }
1490 break;
1491
1492 case INDEX_op_sar_i64:
1493 case INDEX_op_sar_i32:
1494 if (c2) {
1495 tcg_out_sar(s, ext, a0, a1, a2);
1496 } else {
1497 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
1498 }
1499 break;
1500
1501 case INDEX_op_rotr_i64:
1502 case INDEX_op_rotr_i32:
1503 if (c2) {
1504 tcg_out_rotr(s, ext, a0, a1, a2);
1505 } else {
1506 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
1507 }
1508 break;
1509
1510 case INDEX_op_rotl_i64:
1511 case INDEX_op_rotl_i32:
1512 if (c2) {
1513 tcg_out_rotl(s, ext, a0, a1, a2);
1514 } else {
1515 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
1516 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
1517 }
1518 break;
1519
1520 case INDEX_op_brcond_i32:
1521 a1 = (int32_t)a1;
1522 /* FALLTHRU */
1523 case INDEX_op_brcond_i64:
1524 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], args[3]);
1525 break;
1526
1527 case INDEX_op_setcond_i32:
1528 a2 = (int32_t)a2;
1529 /* FALLTHRU */
1530 case INDEX_op_setcond_i64:
1531 tcg_out_cmp(s, ext, a1, a2, c2);
1532 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */
1533 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
1534 TCG_REG_XZR, tcg_invert_cond(args[3]));
1535 break;
1536
1537 case INDEX_op_movcond_i32:
1538 a2 = (int32_t)a2;
1539 /* FALLTHRU */
1540 case INDEX_op_movcond_i64:
1541 tcg_out_cmp(s, ext, a1, a2, c2);
1542 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
1543 break;
1544
1545 case INDEX_op_qemu_ld_i32:
1546 case INDEX_op_qemu_ld_i64:
1547 tcg_out_qemu_ld(s, a0, a1, a2, args[3]);
1548 break;
1549 case INDEX_op_qemu_st_i32:
1550 case INDEX_op_qemu_st_i64:
1551 tcg_out_qemu_st(s, REG0(0), a1, a2, args[3]);
1552 break;
1553
1554 case INDEX_op_bswap64_i64:
1555 tcg_out_rev64(s, a0, a1);
1556 break;
1557 case INDEX_op_bswap32_i64:
1558 case INDEX_op_bswap32_i32:
1559 tcg_out_rev32(s, a0, a1);
1560 break;
1561 case INDEX_op_bswap16_i64:
1562 case INDEX_op_bswap16_i32:
1563 tcg_out_rev16(s, a0, a1);
1564 break;
1565
1566 case INDEX_op_ext8s_i64:
1567 case INDEX_op_ext8s_i32:
1568 tcg_out_sxt(s, ext, MO_8, a0, a1);
1569 break;
1570 case INDEX_op_ext16s_i64:
1571 case INDEX_op_ext16s_i32:
1572 tcg_out_sxt(s, ext, MO_16, a0, a1);
1573 break;
1574 case INDEX_op_ext32s_i64:
1575 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
1576 break;
1577 case INDEX_op_ext8u_i64:
1578 case INDEX_op_ext8u_i32:
1579 tcg_out_uxt(s, MO_8, a0, a1);
1580 break;
1581 case INDEX_op_ext16u_i64:
1582 case INDEX_op_ext16u_i32:
1583 tcg_out_uxt(s, MO_16, a0, a1);
1584 break;
1585 case INDEX_op_ext32u_i64:
1586 tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
1587 break;
1588
1589 case INDEX_op_deposit_i64:
1590 case INDEX_op_deposit_i32:
1591 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
1592 break;
1593
1594 case INDEX_op_add2_i32:
1595 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
1596 (int32_t)args[4], args[5], const_args[4],
1597 const_args[5], false);
1598 break;
1599 case INDEX_op_add2_i64:
1600 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
1601 args[5], const_args[4], const_args[5], false);
1602 break;
1603 case INDEX_op_sub2_i32:
1604 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
1605 (int32_t)args[4], args[5], const_args[4],
1606 const_args[5], true);
1607 break;
1608 case INDEX_op_sub2_i64:
1609 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
1610 args[5], const_args[4], const_args[5], true);
1611 break;
1612
1613 case INDEX_op_muluh_i64:
1614 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
1615 break;
1616 case INDEX_op_mulsh_i64:
1617 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
1618 break;
1619
1620 case INDEX_op_mov_i64:
1621 case INDEX_op_mov_i32:
1622 case INDEX_op_movi_i64:
1623 case INDEX_op_movi_i32:
1624 /* Always implemented with tcg_out_mov/i, never with tcg_out_op. */
1625 default:
1626 /* Opcode not implemented. */
1627 tcg_abort();
1628 }
1629
1630 #undef REG0
1631 }
1632
1633 static const TCGTargetOpDef aarch64_op_defs[] = {
1634 { INDEX_op_exit_tb, { } },
1635 { INDEX_op_goto_tb, { } },
1636 { INDEX_op_call, { "ri" } },
1637 { INDEX_op_br, { } },
1638
1639 { INDEX_op_mov_i32, { "r", "r" } },
1640 { INDEX_op_mov_i64, { "r", "r" } },
1641
1642 { INDEX_op_movi_i32, { "r" } },
1643 { INDEX_op_movi_i64, { "r" } },
1644
1645 { INDEX_op_ld8u_i32, { "r", "r" } },
1646 { INDEX_op_ld8s_i32, { "r", "r" } },
1647 { INDEX_op_ld16u_i32, { "r", "r" } },
1648 { INDEX_op_ld16s_i32, { "r", "r" } },
1649 { INDEX_op_ld_i32, { "r", "r" } },
1650 { INDEX_op_ld8u_i64, { "r", "r" } },
1651 { INDEX_op_ld8s_i64, { "r", "r" } },
1652 { INDEX_op_ld16u_i64, { "r", "r" } },
1653 { INDEX_op_ld16s_i64, { "r", "r" } },
1654 { INDEX_op_ld32u_i64, { "r", "r" } },
1655 { INDEX_op_ld32s_i64, { "r", "r" } },
1656 { INDEX_op_ld_i64, { "r", "r" } },
1657
1658 { INDEX_op_st8_i32, { "rZ", "r" } },
1659 { INDEX_op_st16_i32, { "rZ", "r" } },
1660 { INDEX_op_st_i32, { "rZ", "r" } },
1661 { INDEX_op_st8_i64, { "rZ", "r" } },
1662 { INDEX_op_st16_i64, { "rZ", "r" } },
1663 { INDEX_op_st32_i64, { "rZ", "r" } },
1664 { INDEX_op_st_i64, { "rZ", "r" } },
1665
1666 { INDEX_op_add_i32, { "r", "r", "rwA" } },
1667 { INDEX_op_add_i64, { "r", "r", "rA" } },
1668 { INDEX_op_sub_i32, { "r", "r", "rwA" } },
1669 { INDEX_op_sub_i64, { "r", "r", "rA" } },
1670 { INDEX_op_mul_i32, { "r", "r", "r" } },
1671 { INDEX_op_mul_i64, { "r", "r", "r" } },
1672 { INDEX_op_div_i32, { "r", "r", "r" } },
1673 { INDEX_op_div_i64, { "r", "r", "r" } },
1674 { INDEX_op_divu_i32, { "r", "r", "r" } },
1675 { INDEX_op_divu_i64, { "r", "r", "r" } },
1676 { INDEX_op_rem_i32, { "r", "r", "r" } },
1677 { INDEX_op_rem_i64, { "r", "r", "r" } },
1678 { INDEX_op_remu_i32, { "r", "r", "r" } },
1679 { INDEX_op_remu_i64, { "r", "r", "r" } },
1680 { INDEX_op_and_i32, { "r", "r", "rwL" } },
1681 { INDEX_op_and_i64, { "r", "r", "rL" } },
1682 { INDEX_op_or_i32, { "r", "r", "rwL" } },
1683 { INDEX_op_or_i64, { "r", "r", "rL" } },
1684 { INDEX_op_xor_i32, { "r", "r", "rwL" } },
1685 { INDEX_op_xor_i64, { "r", "r", "rL" } },
1686 { INDEX_op_andc_i32, { "r", "r", "rwL" } },
1687 { INDEX_op_andc_i64, { "r", "r", "rL" } },
1688 { INDEX_op_orc_i32, { "r", "r", "rwL" } },
1689 { INDEX_op_orc_i64, { "r", "r", "rL" } },
1690 { INDEX_op_eqv_i32, { "r", "r", "rwL" } },
1691 { INDEX_op_eqv_i64, { "r", "r", "rL" } },
1692
1693 { INDEX_op_neg_i32, { "r", "r" } },
1694 { INDEX_op_neg_i64, { "r", "r" } },
1695 { INDEX_op_not_i32, { "r", "r" } },
1696 { INDEX_op_not_i64, { "r", "r" } },
1697
1698 { INDEX_op_shl_i32, { "r", "r", "ri" } },
1699 { INDEX_op_shr_i32, { "r", "r", "ri" } },
1700 { INDEX_op_sar_i32, { "r", "r", "ri" } },
1701 { INDEX_op_rotl_i32, { "r", "r", "ri" } },
1702 { INDEX_op_rotr_i32, { "r", "r", "ri" } },
1703 { INDEX_op_shl_i64, { "r", "r", "ri" } },
1704 { INDEX_op_shr_i64, { "r", "r", "ri" } },
1705 { INDEX_op_sar_i64, { "r", "r", "ri" } },
1706 { INDEX_op_rotl_i64, { "r", "r", "ri" } },
1707 { INDEX_op_rotr_i64, { "r", "r", "ri" } },
1708
1709 { INDEX_op_brcond_i32, { "r", "rwA" } },
1710 { INDEX_op_brcond_i64, { "r", "rA" } },
1711 { INDEX_op_setcond_i32, { "r", "r", "rwA" } },
1712 { INDEX_op_setcond_i64, { "r", "r", "rA" } },
1713 { INDEX_op_movcond_i32, { "r", "r", "rwA", "rZ", "rZ" } },
1714 { INDEX_op_movcond_i64, { "r", "r", "rA", "rZ", "rZ" } },
1715
1716 { INDEX_op_qemu_ld_i32, { "r", "l" } },
1717 { INDEX_op_qemu_ld_i64, { "r", "l" } },
1718 { INDEX_op_qemu_st_i32, { "lZ", "l" } },
1719 { INDEX_op_qemu_st_i64, { "lZ", "l" } },
1720
1721 { INDEX_op_bswap16_i32, { "r", "r" } },
1722 { INDEX_op_bswap32_i32, { "r", "r" } },
1723 { INDEX_op_bswap16_i64, { "r", "r" } },
1724 { INDEX_op_bswap32_i64, { "r", "r" } },
1725 { INDEX_op_bswap64_i64, { "r", "r" } },
1726
1727 { INDEX_op_ext8s_i32, { "r", "r" } },
1728 { INDEX_op_ext16s_i32, { "r", "r" } },
1729 { INDEX_op_ext8u_i32, { "r", "r" } },
1730 { INDEX_op_ext16u_i32, { "r", "r" } },
1731
1732 { INDEX_op_ext8s_i64, { "r", "r" } },
1733 { INDEX_op_ext16s_i64, { "r", "r" } },
1734 { INDEX_op_ext32s_i64, { "r", "r" } },
1735 { INDEX_op_ext8u_i64, { "r", "r" } },
1736 { INDEX_op_ext16u_i64, { "r", "r" } },
1737 { INDEX_op_ext32u_i64, { "r", "r" } },
1738
1739 { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
1740 { INDEX_op_deposit_i64, { "r", "0", "rZ" } },
1741
1742 { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rwA", "rwMZ" } },
1743 { INDEX_op_add2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1744 { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rwA", "rwMZ" } },
1745 { INDEX_op_sub2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1746
1747 { INDEX_op_muluh_i64, { "r", "r", "r" } },
1748 { INDEX_op_mulsh_i64, { "r", "r", "r" } },
1749
1750 { -1 },
1751 };
1752
1753 static void tcg_target_init(TCGContext *s)
1754 {
1755 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
1756 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);
1757
1758 tcg_regset_set32(tcg_target_call_clobber_regs, 0,
1759 (1 << TCG_REG_X0) | (1 << TCG_REG_X1) |
1760 (1 << TCG_REG_X2) | (1 << TCG_REG_X3) |
1761 (1 << TCG_REG_X4) | (1 << TCG_REG_X5) |
1762 (1 << TCG_REG_X6) | (1 << TCG_REG_X7) |
1763 (1 << TCG_REG_X8) | (1 << TCG_REG_X9) |
1764 (1 << TCG_REG_X10) | (1 << TCG_REG_X11) |
1765 (1 << TCG_REG_X12) | (1 << TCG_REG_X13) |
1766 (1 << TCG_REG_X14) | (1 << TCG_REG_X15) |
1767 (1 << TCG_REG_X16) | (1 << TCG_REG_X17) |
1768 (1 << TCG_REG_X18) | (1 << TCG_REG_X30));
1769
1770 tcg_regset_clear(s->reserved_regs);
1771 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
1772 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
1773 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
1774 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
1775
1776 tcg_add_target_add_op_defs(aarch64_op_defs);
1777 }
1778
1779 /* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
1780 #define PUSH_SIZE ((30 - 19 + 1) * 8)
1781
1782 #define FRAME_SIZE \
1783 ((PUSH_SIZE \
1784 + TCG_STATIC_CALL_ARGS_SIZE \
1785 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
1786 + TCG_TARGET_STACK_ALIGN - 1) \
1787 & ~(TCG_TARGET_STACK_ALIGN - 1))
1788
1789 /* We're expecting a 2 byte uleb128 encoded value. */
1790 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
1791
1792 /* We're expecting to use a single ADDI insn. */
1793 QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
1794
1795 static void tcg_target_qemu_prologue(TCGContext *s)
1796 {
1797 TCGReg r;
1798
1799 /* Push (FP, LR) and allocate space for all saved registers. */
1800 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
1801 TCG_REG_SP, -PUSH_SIZE, 1, 1);
1802
1803 /* Set up frame pointer for canonical unwinding. */
1804 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
1805
1806 /* Store callee-preserved regs x19..x28. */
1807 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1808 int ofs = (r - TCG_REG_X19 + 2) * 8;
1809 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
1810 }
1811
1812 /* Make stack space for TCG locals. */
1813 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
1814 FRAME_SIZE - PUSH_SIZE);
1815
1816 /* Inform TCG about how to find TCG locals with register, offset, size. */
1817 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
1818 CPU_TEMP_BUF_NLONGS * sizeof(long));
1819
1820 #if defined(CONFIG_USE_GUEST_BASE)
1821 if (GUEST_BASE) {
1822 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, GUEST_BASE);
1823 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
1824 }
1825 #endif
1826
1827 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
1828 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
1829
1830 tb_ret_addr = s->code_ptr;
1831
1832 /* Remove TCG locals stack space. */
1833 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
1834 FRAME_SIZE - PUSH_SIZE);
1835
1836 /* Restore registers x19..x28. */
1837 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1838 int ofs = (r - TCG_REG_X19 + 2) * 8;
1839 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
1840 }
1841
1842 /* Pop (FP, LR), restore SP to previous frame. */
1843 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
1844 TCG_REG_SP, PUSH_SIZE, 0, 1);
1845 tcg_out_insn(s, 3207, RET, TCG_REG_LR);
1846 }
1847
1848 typedef struct {
1849 DebugFrameCIE cie;
1850 DebugFrameFDEHeader fde;
1851 uint8_t fde_def_cfa[4];
1852 uint8_t fde_reg_ofs[24];
1853 } DebugFrame;
1854
1855 #define ELF_HOST_MACHINE EM_AARCH64
1856
1857 static DebugFrame debug_frame = {
1858 .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
1859 .cie.id = -1,
1860 .cie.version = 1,
1861 .cie.code_align = 1,
1862 .cie.data_align = 0x78, /* sleb128 -8 */
1863 .cie.return_column = TCG_REG_LR,
1864
1865 /* Total FDE size does not include the "len" member. */
1866 .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
1867
1868 .fde_def_cfa = {
1869 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */
1870 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
1871 (FRAME_SIZE >> 7)
1872 },
1873 .fde_reg_ofs = {
1874 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */
1875 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */
1876 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */
1877 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */
1878 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */
1879 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */
1880 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */
1881 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */
1882 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */
1883 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */
1884 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */
1885 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */
1886 }
1887 };
1888
1889 void tcg_register_jit(void *buf, size_t buf_size)
1890 {
1891 debug_frame.fde.func_start = (intptr_t)buf;
1892 debug_frame.fde.func_len = buf_size;
1893
1894 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
1895 }