]> git.proxmox.com Git - mirror_qemu.git/blob - tcg/aarch64/tcg-target.c
tcg-aarch64: Use TCGMemOp in qemu_ld/st
[mirror_qemu.git] / tcg / aarch64 / tcg-target.c
1 /*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13 #include "tcg-be-ldst.h"
14 #include "qemu/bitops.h"
15
16 /* We're going to re-use TCGType in setting of the SF bit, which controls
17 the size of the operation performed. If we know the values match, it
18 makes things much cleaner. */
19 QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
20
21 #ifndef NDEBUG
22 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
23 "%x0", "%x1", "%x2", "%x3", "%x4", "%x5", "%x6", "%x7",
24 "%x8", "%x9", "%x10", "%x11", "%x12", "%x13", "%x14", "%x15",
25 "%x16", "%x17", "%x18", "%x19", "%x20", "%x21", "%x22", "%x23",
26 "%x24", "%x25", "%x26", "%x27", "%x28", "%fp", "%x30", "%sp",
27 };
28 #endif /* NDEBUG */
29
30 static const int tcg_target_reg_alloc_order[] = {
31 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
32 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
33 TCG_REG_X28, /* we will reserve this for GUEST_BASE if configured */
34
35 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
36 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
37 TCG_REG_X16, TCG_REG_X17,
38
39 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
40 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
41
42 /* X18 reserved by system */
43 /* X19 reserved for AREG0 */
44 /* X29 reserved as fp */
45 /* X30 reserved as temporary */
46 };
47
48 static const int tcg_target_call_iarg_regs[8] = {
49 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
50 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
51 };
52 static const int tcg_target_call_oarg_regs[1] = {
53 TCG_REG_X0
54 };
55
56 #define TCG_REG_TMP TCG_REG_X30
57
58 #ifndef CONFIG_SOFTMMU
59 # ifdef CONFIG_USE_GUEST_BASE
60 # define TCG_REG_GUEST_BASE TCG_REG_X28
61 # else
62 # define TCG_REG_GUEST_BASE TCG_REG_XZR
63 # endif
64 #endif
65
66 static inline void reloc_pc26(void *code_ptr, intptr_t target)
67 {
68 intptr_t offset = (target - (intptr_t)code_ptr) / 4;
69 /* read instruction, mask away previous PC_REL26 parameter contents,
70 set the proper offset, then write back the instruction. */
71 uint32_t insn = *(uint32_t *)code_ptr;
72 insn = deposit32(insn, 0, 26, offset);
73 *(uint32_t *)code_ptr = insn;
74 }
75
76 static inline void reloc_pc19(void *code_ptr, intptr_t target)
77 {
78 intptr_t offset = (target - (intptr_t)code_ptr) / 4;
79 /* read instruction, mask away previous PC_REL19 parameter contents,
80 set the proper offset, then write back the instruction. */
81 uint32_t insn = *(uint32_t *)code_ptr;
82 insn = deposit32(insn, 5, 19, offset);
83 *(uint32_t *)code_ptr = insn;
84 }
85
86 static inline void patch_reloc(uint8_t *code_ptr, int type,
87 intptr_t value, intptr_t addend)
88 {
89 value += addend;
90
91 switch (type) {
92 case R_AARCH64_JUMP26:
93 case R_AARCH64_CALL26:
94 reloc_pc26(code_ptr, value);
95 break;
96 case R_AARCH64_CONDBR19:
97 reloc_pc19(code_ptr, value);
98 break;
99
100 default:
101 tcg_abort();
102 }
103 }
104
105 #define TCG_CT_CONST_IS32 0x100
106 #define TCG_CT_CONST_AIMM 0x200
107 #define TCG_CT_CONST_LIMM 0x400
108 #define TCG_CT_CONST_ZERO 0x800
109 #define TCG_CT_CONST_MONE 0x1000
110
111 /* parse target specific constraints */
112 static int target_parse_constraint(TCGArgConstraint *ct,
113 const char **pct_str)
114 {
115 const char *ct_str = *pct_str;
116
117 switch (ct_str[0]) {
118 case 'r':
119 ct->ct |= TCG_CT_REG;
120 tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
121 break;
122 case 'l': /* qemu_ld / qemu_st address, data_reg */
123 ct->ct |= TCG_CT_REG;
124 tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
125 #ifdef CONFIG_SOFTMMU
126 /* x0 and x1 will be overwritten when reading the tlb entry,
127 and x2, and x3 for helper args, better to avoid using them. */
128 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
129 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
130 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
131 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
132 #endif
133 break;
134 case 'w': /* The operand should be considered 32-bit. */
135 ct->ct |= TCG_CT_CONST_IS32;
136 break;
137 case 'A': /* Valid for arithmetic immediate (positive or negative). */
138 ct->ct |= TCG_CT_CONST_AIMM;
139 break;
140 case 'L': /* Valid for logical immediate. */
141 ct->ct |= TCG_CT_CONST_LIMM;
142 break;
143 case 'M': /* minus one */
144 ct->ct |= TCG_CT_CONST_MONE;
145 break;
146 case 'Z': /* zero */
147 ct->ct |= TCG_CT_CONST_ZERO;
148 break;
149 default:
150 return -1;
151 }
152
153 ct_str++;
154 *pct_str = ct_str;
155 return 0;
156 }
157
158 static inline bool is_aimm(uint64_t val)
159 {
160 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
161 }
162
163 static inline bool is_limm(uint64_t val)
164 {
165 /* Taking a simplified view of the logical immediates for now, ignoring
166 the replication that can happen across the field. Match bit patterns
167 of the forms
168 0....01....1
169 0..01..10..0
170 and their inverses. */
171
172 /* Make things easier below, by testing the form with msb clear. */
173 if ((int64_t)val < 0) {
174 val = ~val;
175 }
176 if (val == 0) {
177 return false;
178 }
179 val += val & -val;
180 return (val & (val - 1)) == 0;
181 }
182
183 static int tcg_target_const_match(tcg_target_long val,
184 const TCGArgConstraint *arg_ct)
185 {
186 int ct = arg_ct->ct;
187
188 if (ct & TCG_CT_CONST) {
189 return 1;
190 }
191 if (ct & TCG_CT_CONST_IS32) {
192 val = (int32_t)val;
193 }
194 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
195 return 1;
196 }
197 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
198 return 1;
199 }
200 if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
201 return 1;
202 }
203 if ((ct & TCG_CT_CONST_MONE) && val == -1) {
204 return 1;
205 }
206
207 return 0;
208 }
209
210 enum aarch64_cond_code {
211 COND_EQ = 0x0,
212 COND_NE = 0x1,
213 COND_CS = 0x2, /* Unsigned greater or equal */
214 COND_HS = COND_CS, /* ALIAS greater or equal */
215 COND_CC = 0x3, /* Unsigned less than */
216 COND_LO = COND_CC, /* ALIAS Lower */
217 COND_MI = 0x4, /* Negative */
218 COND_PL = 0x5, /* Zero or greater */
219 COND_VS = 0x6, /* Overflow */
220 COND_VC = 0x7, /* No overflow */
221 COND_HI = 0x8, /* Unsigned greater than */
222 COND_LS = 0x9, /* Unsigned less or equal */
223 COND_GE = 0xa,
224 COND_LT = 0xb,
225 COND_GT = 0xc,
226 COND_LE = 0xd,
227 COND_AL = 0xe,
228 COND_NV = 0xf, /* behaves like COND_AL here */
229 };
230
231 static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
232 [TCG_COND_EQ] = COND_EQ,
233 [TCG_COND_NE] = COND_NE,
234 [TCG_COND_LT] = COND_LT,
235 [TCG_COND_GE] = COND_GE,
236 [TCG_COND_LE] = COND_LE,
237 [TCG_COND_GT] = COND_GT,
238 /* unsigned */
239 [TCG_COND_LTU] = COND_LO,
240 [TCG_COND_GTU] = COND_HI,
241 [TCG_COND_GEU] = COND_HS,
242 [TCG_COND_LEU] = COND_LS,
243 };
244
245 /* opcodes for LDR / STR instructions with base + simm9 addressing */
246 enum aarch64_ldst_op_data { /* size of the data moved */
247 LDST_8 = 0x38,
248 LDST_16 = 0x78,
249 LDST_32 = 0xb8,
250 LDST_64 = 0xf8,
251 };
252 enum aarch64_ldst_op_type { /* type of operation */
253 LDST_ST = 0x0, /* store */
254 LDST_LD = 0x4, /* load */
255 LDST_LD_S_X = 0x8, /* load and sign-extend into Xt */
256 LDST_LD_S_W = 0xc, /* load and sign-extend into Wt */
257 };
258
259 /* We encode the format of the insn into the beginning of the name, so that
260 we can have the preprocessor help "typecheck" the insn vs the output
261 function. Arm didn't provide us with nice names for the formats, so we
262 use the section number of the architecture reference manual in which the
263 instruction group is described. */
264 typedef enum {
265 /* Compare and branch (immediate). */
266 I3201_CBZ = 0x34000000,
267 I3201_CBNZ = 0x35000000,
268
269 /* Conditional branch (immediate). */
270 I3202_B_C = 0x54000000,
271
272 /* Unconditional branch (immediate). */
273 I3206_B = 0x14000000,
274 I3206_BL = 0x94000000,
275
276 /* Unconditional branch (register). */
277 I3207_BR = 0xd61f0000,
278 I3207_BLR = 0xd63f0000,
279 I3207_RET = 0xd65f0000,
280
281 /* Load/store register pair instructions. */
282 I3314_LDP = 0x28400000,
283 I3314_STP = 0x28000000,
284
285 /* Add/subtract immediate instructions. */
286 I3401_ADDI = 0x11000000,
287 I3401_ADDSI = 0x31000000,
288 I3401_SUBI = 0x51000000,
289 I3401_SUBSI = 0x71000000,
290
291 /* Bitfield instructions. */
292 I3402_BFM = 0x33000000,
293 I3402_SBFM = 0x13000000,
294 I3402_UBFM = 0x53000000,
295
296 /* Extract instruction. */
297 I3403_EXTR = 0x13800000,
298
299 /* Logical immediate instructions. */
300 I3404_ANDI = 0x12000000,
301 I3404_ORRI = 0x32000000,
302 I3404_EORI = 0x52000000,
303
304 /* Move wide immediate instructions. */
305 I3405_MOVN = 0x12800000,
306 I3405_MOVZ = 0x52800000,
307 I3405_MOVK = 0x72800000,
308
309 /* PC relative addressing instructions. */
310 I3406_ADR = 0x10000000,
311 I3406_ADRP = 0x90000000,
312
313 /* Add/subtract shifted register instructions (without a shift). */
314 I3502_ADD = 0x0b000000,
315 I3502_ADDS = 0x2b000000,
316 I3502_SUB = 0x4b000000,
317 I3502_SUBS = 0x6b000000,
318
319 /* Add/subtract shifted register instructions (with a shift). */
320 I3502S_ADD_LSL = I3502_ADD,
321
322 /* Add/subtract with carry instructions. */
323 I3503_ADC = 0x1a000000,
324 I3503_SBC = 0x5a000000,
325
326 /* Conditional select instructions. */
327 I3506_CSEL = 0x1a800000,
328 I3506_CSINC = 0x1a800400,
329
330 /* Data-processing (2 source) instructions. */
331 I3508_LSLV = 0x1ac02000,
332 I3508_LSRV = 0x1ac02400,
333 I3508_ASRV = 0x1ac02800,
334 I3508_RORV = 0x1ac02c00,
335 I3508_SMULH = 0x9b407c00,
336 I3508_UMULH = 0x9bc07c00,
337 I3508_UDIV = 0x1ac00800,
338 I3508_SDIV = 0x1ac00c00,
339
340 /* Data-processing (3 source) instructions. */
341 I3509_MADD = 0x1b000000,
342 I3509_MSUB = 0x1b008000,
343
344 /* Logical shifted register instructions (without a shift). */
345 I3510_AND = 0x0a000000,
346 I3510_BIC = 0x0a200000,
347 I3510_ORR = 0x2a000000,
348 I3510_ORN = 0x2a200000,
349 I3510_EOR = 0x4a000000,
350 I3510_EON = 0x4a200000,
351 I3510_ANDS = 0x6a000000,
352 } AArch64Insn;
353
354 static inline enum aarch64_ldst_op_data
355 aarch64_ldst_get_data(TCGOpcode tcg_op)
356 {
357 switch (tcg_op) {
358 case INDEX_op_ld8u_i32:
359 case INDEX_op_ld8s_i32:
360 case INDEX_op_ld8u_i64:
361 case INDEX_op_ld8s_i64:
362 case INDEX_op_st8_i32:
363 case INDEX_op_st8_i64:
364 return LDST_8;
365
366 case INDEX_op_ld16u_i32:
367 case INDEX_op_ld16s_i32:
368 case INDEX_op_ld16u_i64:
369 case INDEX_op_ld16s_i64:
370 case INDEX_op_st16_i32:
371 case INDEX_op_st16_i64:
372 return LDST_16;
373
374 case INDEX_op_ld_i32:
375 case INDEX_op_st_i32:
376 case INDEX_op_ld32u_i64:
377 case INDEX_op_ld32s_i64:
378 case INDEX_op_st32_i64:
379 return LDST_32;
380
381 case INDEX_op_ld_i64:
382 case INDEX_op_st_i64:
383 return LDST_64;
384
385 default:
386 tcg_abort();
387 }
388 }
389
390 static inline enum aarch64_ldst_op_type
391 aarch64_ldst_get_type(TCGOpcode tcg_op)
392 {
393 switch (tcg_op) {
394 case INDEX_op_st8_i32:
395 case INDEX_op_st16_i32:
396 case INDEX_op_st8_i64:
397 case INDEX_op_st16_i64:
398 case INDEX_op_st_i32:
399 case INDEX_op_st32_i64:
400 case INDEX_op_st_i64:
401 return LDST_ST;
402
403 case INDEX_op_ld8u_i32:
404 case INDEX_op_ld16u_i32:
405 case INDEX_op_ld8u_i64:
406 case INDEX_op_ld16u_i64:
407 case INDEX_op_ld_i32:
408 case INDEX_op_ld32u_i64:
409 case INDEX_op_ld_i64:
410 return LDST_LD;
411
412 case INDEX_op_ld8s_i32:
413 case INDEX_op_ld16s_i32:
414 return LDST_LD_S_W;
415
416 case INDEX_op_ld8s_i64:
417 case INDEX_op_ld16s_i64:
418 case INDEX_op_ld32s_i64:
419 return LDST_LD_S_X;
420
421 default:
422 tcg_abort();
423 }
424 }
425
426 static inline uint32_t tcg_in32(TCGContext *s)
427 {
428 uint32_t v = *(uint32_t *)s->code_ptr;
429 return v;
430 }
431
432 /* Emit an opcode with "type-checking" of the format. */
433 #define tcg_out_insn(S, FMT, OP, ...) \
434 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
435
436 static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
437 TCGReg rt, int imm19)
438 {
439 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
440 }
441
442 static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
443 TCGCond c, int imm19)
444 {
445 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
446 }
447
448 static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
449 {
450 tcg_out32(s, insn | (imm26 & 0x03ffffff));
451 }
452
453 static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
454 {
455 tcg_out32(s, insn | rn << 5);
456 }
457
458 static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
459 TCGReg r1, TCGReg r2, TCGReg rn,
460 tcg_target_long ofs, bool pre, bool w)
461 {
462 insn |= 1u << 31; /* ext */
463 insn |= pre << 24;
464 insn |= w << 23;
465
466 assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
467 insn |= (ofs & (0x7f << 3)) << (15 - 3);
468
469 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
470 }
471
472 static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
473 TCGReg rd, TCGReg rn, uint64_t aimm)
474 {
475 if (aimm > 0xfff) {
476 assert((aimm & 0xfff) == 0);
477 aimm >>= 12;
478 assert(aimm <= 0xfff);
479 aimm |= 1 << 12; /* apply LSL 12 */
480 }
481 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
482 }
483
484 /* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
485 (Logical immediate). Both insn groups have N, IMMR and IMMS fields
486 that feed the DecodeBitMasks pseudo function. */
487 static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
488 TCGReg rd, TCGReg rn, int n, int immr, int imms)
489 {
490 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
491 | rn << 5 | rd);
492 }
493
494 #define tcg_out_insn_3404 tcg_out_insn_3402
495
496 static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
497 TCGReg rd, TCGReg rn, TCGReg rm, int imms)
498 {
499 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
500 | rn << 5 | rd);
501 }
502
503 /* This function is used for the Move (wide immediate) instruction group.
504 Note that SHIFT is a full shift count, not the 2 bit HW field. */
505 static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
506 TCGReg rd, uint16_t half, unsigned shift)
507 {
508 assert((shift & ~0x30) == 0);
509 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
510 }
511
512 static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
513 TCGReg rd, int64_t disp)
514 {
515 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
516 }
517
518 /* This function is for both 3.5.2 (Add/Subtract shifted register), for
519 the rare occasion when we actually want to supply a shift amount. */
520 static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
521 TCGType ext, TCGReg rd, TCGReg rn,
522 TCGReg rm, int imm6)
523 {
524 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
525 }
526
527 /* This function is for 3.5.2 (Add/subtract shifted register),
528 and 3.5.10 (Logical shifted register), for the vast majorty of cases
529 when we don't want to apply a shift. Thus it can also be used for
530 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */
531 static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
532 TCGReg rd, TCGReg rn, TCGReg rm)
533 {
534 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
535 }
536
537 #define tcg_out_insn_3503 tcg_out_insn_3502
538 #define tcg_out_insn_3508 tcg_out_insn_3502
539 #define tcg_out_insn_3510 tcg_out_insn_3502
540
541 static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
542 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
543 {
544 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
545 | tcg_cond_to_aarch64[c] << 12);
546 }
547
548 static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
549 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
550 {
551 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
552 }
553
554
555 static inline void tcg_out_ldst_9(TCGContext *s,
556 enum aarch64_ldst_op_data op_data,
557 enum aarch64_ldst_op_type op_type,
558 TCGReg rd, TCGReg rn, intptr_t offset)
559 {
560 /* use LDUR with BASE register with 9bit signed unscaled offset */
561 tcg_out32(s, op_data << 24 | op_type << 20
562 | (offset & 0x1ff) << 12 | rn << 5 | rd);
563 }
564
565 /* tcg_out_ldst_12 expects a scaled unsigned immediate offset */
566 static inline void tcg_out_ldst_12(TCGContext *s,
567 enum aarch64_ldst_op_data op_data,
568 enum aarch64_ldst_op_type op_type,
569 TCGReg rd, TCGReg rn,
570 tcg_target_ulong scaled_uimm)
571 {
572 tcg_out32(s, (op_data | 1) << 24
573 | op_type << 20 | scaled_uimm << 10 | rn << 5 | rd);
574 }
575
576 /* Register to register move using ORR (shifted register with no shift). */
577 static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
578 {
579 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
580 }
581
582 /* Register to register move using ADDI (move to/from SP). */
583 static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
584 {
585 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
586 }
587
588 /* This function is used for the Logical (immediate) instruction group.
589 The value of LIMM must satisfy IS_LIMM. See the comment above about
590 only supporting simplified logical immediates. */
591 static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
592 TCGReg rd, TCGReg rn, uint64_t limm)
593 {
594 unsigned h, l, r, c;
595
596 assert(is_limm(limm));
597
598 h = clz64(limm);
599 l = ctz64(limm);
600 if (l == 0) {
601 r = 0; /* form 0....01....1 */
602 c = ctz64(~limm) - 1;
603 if (h == 0) {
604 r = clz64(~limm); /* form 1..10..01..1 */
605 c += r;
606 }
607 } else {
608 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */
609 c = r - h - 1;
610 }
611 if (ext == TCG_TYPE_I32) {
612 r &= 31;
613 c &= 31;
614 }
615
616 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
617 }
618
619 static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
620 tcg_target_long value)
621 {
622 AArch64Insn insn;
623 int i, wantinv, shift;
624 tcg_target_long svalue = value;
625 tcg_target_long ivalue = ~value;
626 tcg_target_long imask;
627
628 /* For 32-bit values, discard potential garbage in value. For 64-bit
629 values within [2**31, 2**32-1], we can create smaller sequences by
630 interpreting this as a negative 32-bit number, while ensuring that
631 the high 32 bits are cleared by setting SF=0. */
632 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
633 svalue = (int32_t)value;
634 value = (uint32_t)value;
635 ivalue = (uint32_t)ivalue;
636 type = TCG_TYPE_I32;
637 }
638
639 /* Speed things up by handling the common case of small positive
640 and negative values specially. */
641 if ((value & ~0xffffull) == 0) {
642 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
643 return;
644 } else if ((ivalue & ~0xffffull) == 0) {
645 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
646 return;
647 }
648
649 /* Check for bitfield immediates. For the benefit of 32-bit quantities,
650 use the sign-extended value. That lets us match rotated values such
651 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
652 if (is_limm(svalue)) {
653 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
654 return;
655 }
656
657 /* Look for host pointer values within 4G of the PC. This happens
658 often when loading pointers to QEMU's own data structures. */
659 if (type == TCG_TYPE_I64) {
660 tcg_target_long disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
661 if (disp == sextract64(disp, 0, 21)) {
662 tcg_out_insn(s, 3406, ADRP, rd, disp);
663 if (value & 0xfff) {
664 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
665 }
666 return;
667 }
668 }
669
670 /* Would it take fewer insns to begin with MOVN? For the value and its
671 inverse, count the number of 16-bit lanes that are 0. */
672 for (i = wantinv = imask = 0; i < 64; i += 16) {
673 tcg_target_long mask = 0xffffull << i;
674 if ((value & mask) == 0) {
675 wantinv -= 1;
676 }
677 if ((ivalue & mask) == 0) {
678 wantinv += 1;
679 imask |= mask;
680 }
681 }
682
683 /* If we had more 0xffff than 0x0000, invert VALUE and use MOVN. */
684 insn = I3405_MOVZ;
685 if (wantinv > 0) {
686 value = ivalue;
687 insn = I3405_MOVN;
688 }
689
690 /* Find the lowest lane that is not 0x0000. */
691 shift = ctz64(value) & (63 & -16);
692 tcg_out_insn_3405(s, insn, type, rd, value >> shift, shift);
693
694 if (wantinv > 0) {
695 /* Re-invert the value, so MOVK sees non-inverted bits. */
696 value = ~value;
697 /* Clear out all the 0xffff lanes. */
698 value ^= imask;
699 }
700 /* Clear out the lane that we just set. */
701 value &= ~(0xffffUL << shift);
702
703 /* Iterate until all lanes have been set, and thus cleared from VALUE. */
704 while (value) {
705 shift = ctz64(value) & (63 & -16);
706 tcg_out_insn(s, 3405, MOVK, type, rd, value >> shift, shift);
707 value &= ~(0xffffUL << shift);
708 }
709 }
710
711 static inline void tcg_out_ldst_r(TCGContext *s,
712 enum aarch64_ldst_op_data op_data,
713 enum aarch64_ldst_op_type op_type,
714 TCGReg rd, TCGReg base, TCGReg regoff)
715 {
716 /* load from memory to register using base + 64bit register offset */
717 /* using f.e. STR Wt, [Xn, Xm] 0xb8600800|(regoff << 16)|(base << 5)|rd */
718 /* the 0x6000 is for the "no extend field" */
719 tcg_out32(s, 0x00206800
720 | op_data << 24 | op_type << 20 | regoff << 16 | base << 5 | rd);
721 }
722
723 /* solve the whole ldst problem */
724 static inline void tcg_out_ldst(TCGContext *s, enum aarch64_ldst_op_data data,
725 enum aarch64_ldst_op_type type,
726 TCGReg rd, TCGReg rn, intptr_t offset)
727 {
728 if (offset >= -256 && offset < 256) {
729 tcg_out_ldst_9(s, data, type, rd, rn, offset);
730 return;
731 }
732
733 if (offset >= 256) {
734 /* if the offset is naturally aligned and in range,
735 then we can use the scaled uimm12 encoding */
736 unsigned int s_bits = data >> 6;
737 if (!(offset & ((1 << s_bits) - 1))) {
738 tcg_target_ulong scaled_uimm = offset >> s_bits;
739 if (scaled_uimm <= 0xfff) {
740 tcg_out_ldst_12(s, data, type, rd, rn, scaled_uimm);
741 return;
742 }
743 }
744 }
745
746 /* worst-case scenario, move offset to temp register, use reg offset */
747 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
748 tcg_out_ldst_r(s, data, type, rd, rn, TCG_REG_TMP);
749 }
750
751 static inline void tcg_out_mov(TCGContext *s,
752 TCGType type, TCGReg ret, TCGReg arg)
753 {
754 if (ret != arg) {
755 tcg_out_movr(s, type, ret, arg);
756 }
757 }
758
759 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
760 TCGReg arg1, intptr_t arg2)
761 {
762 tcg_out_ldst(s, (type == TCG_TYPE_I64) ? LDST_64 : LDST_32, LDST_LD,
763 arg, arg1, arg2);
764 }
765
766 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
767 TCGReg arg1, intptr_t arg2)
768 {
769 tcg_out_ldst(s, (type == TCG_TYPE_I64) ? LDST_64 : LDST_32, LDST_ST,
770 arg, arg1, arg2);
771 }
772
773 static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
774 TCGReg rn, unsigned int a, unsigned int b)
775 {
776 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
777 }
778
779 static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
780 TCGReg rn, unsigned int a, unsigned int b)
781 {
782 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
783 }
784
785 static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
786 TCGReg rn, unsigned int a, unsigned int b)
787 {
788 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
789 }
790
791 static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
792 TCGReg rn, TCGReg rm, unsigned int a)
793 {
794 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
795 }
796
797 static inline void tcg_out_shl(TCGContext *s, TCGType ext,
798 TCGReg rd, TCGReg rn, unsigned int m)
799 {
800 int bits = ext ? 64 : 32;
801 int max = bits - 1;
802 tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
803 }
804
805 static inline void tcg_out_shr(TCGContext *s, TCGType ext,
806 TCGReg rd, TCGReg rn, unsigned int m)
807 {
808 int max = ext ? 63 : 31;
809 tcg_out_ubfm(s, ext, rd, rn, m & max, max);
810 }
811
812 static inline void tcg_out_sar(TCGContext *s, TCGType ext,
813 TCGReg rd, TCGReg rn, unsigned int m)
814 {
815 int max = ext ? 63 : 31;
816 tcg_out_sbfm(s, ext, rd, rn, m & max, max);
817 }
818
819 static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
820 TCGReg rd, TCGReg rn, unsigned int m)
821 {
822 int max = ext ? 63 : 31;
823 tcg_out_extr(s, ext, rd, rn, rn, m & max);
824 }
825
826 static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
827 TCGReg rd, TCGReg rn, unsigned int m)
828 {
829 int bits = ext ? 64 : 32;
830 int max = bits - 1;
831 tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
832 }
833
834 static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
835 TCGReg rn, unsigned lsb, unsigned width)
836 {
837 unsigned size = ext ? 64 : 32;
838 unsigned a = (size - lsb) & (size - 1);
839 unsigned b = width - 1;
840 tcg_out_bfm(s, ext, rd, rn, a, b);
841 }
842
843 static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
844 tcg_target_long b, bool const_b)
845 {
846 if (const_b) {
847 /* Using CMP or CMN aliases. */
848 if (b >= 0) {
849 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
850 } else {
851 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
852 }
853 } else {
854 /* Using CMP alias SUBS wzr, Wn, Wm */
855 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
856 }
857 }
858
859 static inline void tcg_out_goto(TCGContext *s, intptr_t target)
860 {
861 intptr_t offset = (target - (intptr_t)s->code_ptr) / 4;
862
863 if (offset < -0x02000000 || offset >= 0x02000000) {
864 /* out of 26bit range */
865 tcg_abort();
866 }
867
868 tcg_out_insn(s, 3206, B, offset);
869 }
870
871 static inline void tcg_out_goto_noaddr(TCGContext *s)
872 {
873 /* We pay attention here to not modify the branch target by reading from
874 the buffer. This ensure that caches and memory are kept coherent during
875 retranslation. Mask away possible garbage in the high bits for the
876 first translation, while keeping the offset bits for retranslation. */
877 uint32_t old = tcg_in32(s);
878 tcg_out_insn(s, 3206, B, old);
879 }
880
881 static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c)
882 {
883 /* See comments in tcg_out_goto_noaddr. */
884 uint32_t old = tcg_in32(s) >> 5;
885 tcg_out_insn(s, 3202, B_C, c, old);
886 }
887
888 static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
889 {
890 tcg_out_insn(s, 3207, BLR, reg);
891 }
892
893 static inline void tcg_out_call(TCGContext *s, intptr_t target)
894 {
895 intptr_t offset = (target - (intptr_t)s->code_ptr) / 4;
896
897 if (offset < -0x02000000 || offset >= 0x02000000) { /* out of 26bit rng */
898 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, target);
899 tcg_out_callr(s, TCG_REG_TMP);
900 } else {
901 tcg_out_insn(s, 3206, BL, offset);
902 }
903 }
904
905 void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
906 {
907 intptr_t target = addr;
908 intptr_t offset = (target - (intptr_t)jmp_addr) / 4;
909
910 if (offset < -0x02000000 || offset >= 0x02000000) {
911 /* out of 26bit range */
912 tcg_abort();
913 }
914
915 patch_reloc((uint8_t *)jmp_addr, R_AARCH64_JUMP26, target, 0);
916 flush_icache_range(jmp_addr, jmp_addr + 4);
917 }
918
919 static inline void tcg_out_goto_label(TCGContext *s, int label_index)
920 {
921 TCGLabel *l = &s->labels[label_index];
922
923 if (!l->has_value) {
924 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, label_index, 0);
925 tcg_out_goto_noaddr(s);
926 } else {
927 tcg_out_goto(s, l->u.value);
928 }
929 }
930
931 static void tcg_out_brcond(TCGContext *s, TCGMemOp ext, TCGCond c, TCGArg a,
932 TCGArg b, bool b_const, int label)
933 {
934 TCGLabel *l = &s->labels[label];
935 intptr_t offset;
936 bool need_cmp;
937
938 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
939 need_cmp = false;
940 } else {
941 need_cmp = true;
942 tcg_out_cmp(s, ext, a, b, b_const);
943 }
944
945 if (!l->has_value) {
946 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, label, 0);
947 offset = tcg_in32(s) >> 5;
948 } else {
949 offset = l->u.value - (uintptr_t)s->code_ptr;
950 offset >>= 2;
951 assert(offset >= -0x40000 && offset < 0x40000);
952 }
953
954 if (need_cmp) {
955 tcg_out_insn(s, 3202, B_C, c, offset);
956 } else if (c == TCG_COND_EQ) {
957 tcg_out_insn(s, 3201, CBZ, ext, a, offset);
958 } else {
959 tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
960 }
961 }
962
963 static inline void tcg_out_rev(TCGContext *s, TCGType ext,
964 TCGReg rd, TCGReg rm)
965 {
966 /* using REV 0x5ac00800 */
967 unsigned int base = ext ? 0xdac00c00 : 0x5ac00800;
968 tcg_out32(s, base | rm << 5 | rd);
969 }
970
971 static inline void tcg_out_rev16(TCGContext *s, TCGType ext,
972 TCGReg rd, TCGReg rm)
973 {
974 /* using REV16 0x5ac00400 */
975 unsigned int base = ext ? 0xdac00400 : 0x5ac00400;
976 tcg_out32(s, base | rm << 5 | rd);
977 }
978
979 static inline void tcg_out_sxt(TCGContext *s, TCGType ext, TCGMemOp s_bits,
980 TCGReg rd, TCGReg rn)
981 {
982 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
983 int bits = (8 << s_bits) - 1;
984 tcg_out_sbfm(s, ext, rd, rn, 0, bits);
985 }
986
987 static inline void tcg_out_uxt(TCGContext *s, TCGMemOp s_bits,
988 TCGReg rd, TCGReg rn)
989 {
990 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
991 int bits = (8 << s_bits) - 1;
992 tcg_out_ubfm(s, 0, rd, rn, 0, bits);
993 }
994
995 static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
996 TCGReg rn, int64_t aimm)
997 {
998 if (aimm >= 0) {
999 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1000 } else {
1001 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1002 }
1003 }
1004
1005 static inline void tcg_out_addsub2(TCGContext *s, int ext, TCGReg rl,
1006 TCGReg rh, TCGReg al, TCGReg ah,
1007 tcg_target_long bl, tcg_target_long bh,
1008 bool const_bl, bool const_bh, bool sub)
1009 {
1010 TCGReg orig_rl = rl;
1011 AArch64Insn insn;
1012
1013 if (rl == ah || (!const_bh && rl == bh)) {
1014 rl = TCG_REG_TMP;
1015 }
1016
1017 if (const_bl) {
1018 insn = I3401_ADDSI;
1019 if ((bl < 0) ^ sub) {
1020 insn = I3401_SUBSI;
1021 bl = -bl;
1022 }
1023 tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1024 } else {
1025 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1026 }
1027
1028 insn = I3503_ADC;
1029 if (const_bh) {
1030 /* Note that the only two constants we support are 0 and -1, and
1031 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
1032 if ((bh != 0) ^ sub) {
1033 insn = I3503_SBC;
1034 }
1035 bh = TCG_REG_XZR;
1036 } else if (sub) {
1037 insn = I3503_SBC;
1038 }
1039 tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1040
1041 if (rl != orig_rl) {
1042 tcg_out_movr(s, ext, orig_rl, rl);
1043 }
1044 }
1045
1046 #ifdef CONFIG_SOFTMMU
1047 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1048 * int mmu_idx, uintptr_t ra)
1049 */
1050 static const void * const qemu_ld_helpers[4] = {
1051 helper_ret_ldub_mmu,
1052 helper_ret_lduw_mmu,
1053 helper_ret_ldul_mmu,
1054 helper_ret_ldq_mmu,
1055 };
1056
1057 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1058 * uintxx_t val, int mmu_idx, uintptr_t ra)
1059 */
1060 static const void * const qemu_st_helpers[4] = {
1061 helper_ret_stb_mmu,
1062 helper_ret_stw_mmu,
1063 helper_ret_stl_mmu,
1064 helper_ret_stq_mmu,
1065 };
1066
1067 static inline void tcg_out_adr(TCGContext *s, TCGReg rd, uintptr_t addr)
1068 {
1069 addr -= (uintptr_t)s->code_ptr;
1070 assert(addr == sextract64(addr, 0, 21));
1071 tcg_out_insn(s, 3406, ADR, rd, addr);
1072 }
1073
1074 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1075 {
1076 TCGMemOp opc = lb->opc;
1077 TCGMemOp size = opc & MO_SIZE;
1078
1079 reloc_pc19(lb->label_ptr[0], (intptr_t)s->code_ptr);
1080
1081 tcg_out_movr(s, TCG_TYPE_I64, TCG_REG_X0, TCG_AREG0);
1082 tcg_out_movr(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1083 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, lb->mem_index);
1084 tcg_out_adr(s, TCG_REG_X3, (intptr_t)lb->raddr);
1085 tcg_out_call(s, (intptr_t)qemu_ld_helpers[size]);
1086 if (opc & MO_SIGN) {
1087 tcg_out_sxt(s, TCG_TYPE_I64, size, lb->datalo_reg, TCG_REG_X0);
1088 } else {
1089 tcg_out_movr(s, TCG_TYPE_I64, lb->datalo_reg, TCG_REG_X0);
1090 }
1091
1092 tcg_out_goto(s, (intptr_t)lb->raddr);
1093 }
1094
1095 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1096 {
1097 TCGMemOp size = lb->opc;
1098
1099 reloc_pc19(lb->label_ptr[0], (intptr_t)s->code_ptr);
1100
1101 tcg_out_movr(s, TCG_TYPE_I64, TCG_REG_X0, TCG_AREG0);
1102 tcg_out_movr(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1103 tcg_out_movr(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1104 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, lb->mem_index);
1105 tcg_out_adr(s, TCG_REG_X4, (intptr_t)lb->raddr);
1106 tcg_out_call(s, (intptr_t)qemu_st_helpers[size]);
1107 tcg_out_goto(s, (intptr_t)lb->raddr);
1108 }
1109
1110 static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOp opc,
1111 TCGReg data_reg, TCGReg addr_reg,
1112 int mem_index,
1113 uint8_t *raddr, uint8_t *label_ptr)
1114 {
1115 TCGLabelQemuLdst *label = new_ldst_label(s);
1116
1117 label->is_ld = is_ld;
1118 label->opc = opc;
1119 label->datalo_reg = data_reg;
1120 label->addrlo_reg = addr_reg;
1121 label->mem_index = mem_index;
1122 label->raddr = raddr;
1123 label->label_ptr[0] = label_ptr;
1124 }
1125
1126 /* Load and compare a TLB entry, emitting the conditional jump to the
1127 slow path for the failure case, which will be patched later when finalizing
1128 the slow path. Generated code returns the host addend in X1,
1129 clobbers X0,X2,X3,TMP. */
1130 static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp s_bits,
1131 uint8_t **label_ptr, int mem_index, bool is_read)
1132 {
1133 TCGReg base = TCG_AREG0;
1134 int tlb_offset = is_read ?
1135 offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
1136 : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
1137
1138 /* Extract the TLB index from the address into X0.
1139 X0<CPU_TLB_BITS:0> =
1140 addr_reg<TARGET_PAGE_BITS+CPU_TLB_BITS:TARGET_PAGE_BITS> */
1141 tcg_out_ubfm(s, TARGET_LONG_BITS == 64, TCG_REG_X0, addr_reg,
1142 TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS);
1143
1144 /* Store the page mask part of the address and the low s_bits into X3.
1145 Later this allows checking for equality and alignment at the same time.
1146 X3 = addr_reg & (PAGE_MASK | ((1 << s_bits) - 1)) */
1147 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64, TCG_REG_X3,
1148 addr_reg, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
1149
1150 /* Add any "high bits" from the tlb offset to the env address into X2,
1151 to take advantage of the LSL12 form of the ADDI instruction.
1152 X2 = env + (tlb_offset & 0xfff000) */
1153 if (tlb_offset & 0xfff000) {
1154 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_X2, base,
1155 tlb_offset & 0xfff000);
1156 base = TCG_REG_X2;
1157 }
1158
1159 /* Merge the tlb index contribution into X2.
1160 X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
1161 tcg_out_insn(s, 3502S, ADD_LSL, TCG_TYPE_I64, TCG_REG_X2, base,
1162 TCG_REG_X0, CPU_TLB_ENTRY_BITS);
1163
1164 /* Merge "low bits" from tlb offset, load the tlb comparator into X0.
1165 X0 = load [X2 + (tlb_offset & 0x000fff)] */
1166 tcg_out_ldst(s, TARGET_LONG_BITS == 64 ? LDST_64 : LDST_32,
1167 LDST_LD, TCG_REG_X0, TCG_REG_X2, tlb_offset & 0xfff);
1168
1169 /* Load the tlb addend. Do that early to avoid stalling.
1170 X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */
1171 tcg_out_ldst(s, LDST_64, LDST_LD, TCG_REG_X1, TCG_REG_X2,
1172 (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) -
1173 (is_read ? offsetof(CPUTLBEntry, addr_read)
1174 : offsetof(CPUTLBEntry, addr_write)));
1175
1176 /* Perform the address comparison. */
1177 tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0);
1178
1179 /* If not equal, we jump to the slow path. */
1180 *label_ptr = s->code_ptr;
1181 tcg_out_goto_cond_noaddr(s, TCG_COND_NE);
1182 }
1183
1184 #endif /* CONFIG_SOFTMMU */
1185
1186 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp memop,
1187 TCGReg data_r, TCGReg addr_r, TCGReg off_r)
1188 {
1189 const TCGMemOp bswap = memop & MO_BSWAP;
1190
1191 switch (memop & MO_SSIZE) {
1192 case MO_UB:
1193 tcg_out_ldst_r(s, LDST_8, LDST_LD, data_r, addr_r, off_r);
1194 break;
1195 case MO_SB:
1196 tcg_out_ldst_r(s, LDST_8, LDST_LD_S_X, data_r, addr_r, off_r);
1197 break;
1198 case MO_UW:
1199 tcg_out_ldst_r(s, LDST_16, LDST_LD, data_r, addr_r, off_r);
1200 if (bswap) {
1201 tcg_out_rev16(s, TCG_TYPE_I32, data_r, data_r);
1202 }
1203 break;
1204 case MO_SW:
1205 if (bswap) {
1206 tcg_out_ldst_r(s, LDST_16, LDST_LD, data_r, addr_r, off_r);
1207 tcg_out_rev16(s, TCG_TYPE_I32, data_r, data_r);
1208 tcg_out_sxt(s, TCG_TYPE_I64, MO_16, data_r, data_r);
1209 } else {
1210 tcg_out_ldst_r(s, LDST_16, LDST_LD_S_X, data_r, addr_r, off_r);
1211 }
1212 break;
1213 case MO_UL:
1214 tcg_out_ldst_r(s, LDST_32, LDST_LD, data_r, addr_r, off_r);
1215 if (bswap) {
1216 tcg_out_rev(s, TCG_TYPE_I32, data_r, data_r);
1217 }
1218 break;
1219 case MO_SL:
1220 if (bswap) {
1221 tcg_out_ldst_r(s, LDST_32, LDST_LD, data_r, addr_r, off_r);
1222 tcg_out_rev(s, TCG_TYPE_I32, data_r, data_r);
1223 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1224 } else {
1225 tcg_out_ldst_r(s, LDST_32, LDST_LD_S_X, data_r, addr_r, off_r);
1226 }
1227 break;
1228 case MO_Q:
1229 tcg_out_ldst_r(s, LDST_64, LDST_LD, data_r, addr_r, off_r);
1230 if (bswap) {
1231 tcg_out_rev(s, TCG_TYPE_I64, data_r, data_r);
1232 }
1233 break;
1234 default:
1235 tcg_abort();
1236 }
1237 }
1238
1239 static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp memop,
1240 TCGReg data_r, TCGReg addr_r, TCGReg off_r)
1241 {
1242 const TCGMemOp bswap = memop & MO_BSWAP;
1243
1244 switch (memop & MO_SIZE) {
1245 case MO_8:
1246 tcg_out_ldst_r(s, LDST_8, LDST_ST, data_r, addr_r, off_r);
1247 break;
1248 case MO_16:
1249 if (bswap) {
1250 tcg_out_rev16(s, TCG_TYPE_I32, TCG_REG_TMP, data_r);
1251 data_r = TCG_REG_TMP;
1252 }
1253 tcg_out_ldst_r(s, LDST_16, LDST_ST, data_r, addr_r, off_r);
1254 break;
1255 case MO_32:
1256 if (bswap) {
1257 tcg_out_rev(s, TCG_TYPE_I32, TCG_REG_TMP, data_r);
1258 data_r = TCG_REG_TMP;
1259 }
1260 tcg_out_ldst_r(s, LDST_32, LDST_ST, data_r, addr_r, off_r);
1261 break;
1262 case MO_64:
1263 if (bswap) {
1264 tcg_out_rev(s, TCG_TYPE_I64, TCG_REG_TMP, data_r);
1265 data_r = TCG_REG_TMP;
1266 }
1267 tcg_out_ldst_r(s, LDST_64, LDST_ST, data_r, addr_r, off_r);
1268 break;
1269 default:
1270 tcg_abort();
1271 }
1272 }
1273
1274 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, TCGMemOp memop)
1275 {
1276 TCGReg addr_reg, data_reg;
1277 #ifdef CONFIG_SOFTMMU
1278 int mem_index;
1279 TCGMemOp s_bits;
1280 uint8_t *label_ptr;
1281 #endif
1282 data_reg = args[0];
1283 addr_reg = args[1];
1284
1285 #ifdef CONFIG_SOFTMMU
1286 mem_index = args[2];
1287 s_bits = memop & MO_SIZE;
1288 tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 1);
1289 tcg_out_qemu_ld_direct(s, memop, data_reg, addr_reg, TCG_REG_X1);
1290 add_qemu_ldst_label(s, 1, memop, data_reg, addr_reg,
1291 mem_index, s->code_ptr, label_ptr);
1292 #else /* !CONFIG_SOFTMMU */
1293 tcg_out_qemu_ld_direct(s, memop, data_reg, addr_reg,
1294 GUEST_BASE ? TCG_REG_GUEST_BASE : TCG_REG_XZR);
1295 #endif /* CONFIG_SOFTMMU */
1296 }
1297
1298 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, TCGMemOp memop)
1299 {
1300 TCGReg addr_reg, data_reg;
1301 #ifdef CONFIG_SOFTMMU
1302 int mem_index;
1303 TCGMemOp s_bits;
1304 uint8_t *label_ptr;
1305 #endif
1306 data_reg = args[0];
1307 addr_reg = args[1];
1308
1309 #ifdef CONFIG_SOFTMMU
1310 mem_index = args[2];
1311 s_bits = memop & MO_SIZE;
1312
1313 tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 0);
1314 tcg_out_qemu_st_direct(s, memop, data_reg, addr_reg, TCG_REG_X1);
1315 add_qemu_ldst_label(s, 0, memop, data_reg, addr_reg,
1316 mem_index, s->code_ptr, label_ptr);
1317 #else /* !CONFIG_SOFTMMU */
1318 tcg_out_qemu_st_direct(s, memop, data_reg, addr_reg,
1319 GUEST_BASE ? TCG_REG_GUEST_BASE : TCG_REG_XZR);
1320 #endif /* CONFIG_SOFTMMU */
1321 }
1322
1323 static uint8_t *tb_ret_addr;
1324
1325 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1326 const TCGArg args[TCG_MAX_OP_ARGS],
1327 const int const_args[TCG_MAX_OP_ARGS])
1328 {
1329 /* 99% of the time, we can signal the use of extension registers
1330 by looking to see if the opcode handles 64-bit data. */
1331 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1332
1333 /* Hoist the loads of the most common arguments. */
1334 TCGArg a0 = args[0];
1335 TCGArg a1 = args[1];
1336 TCGArg a2 = args[2];
1337 int c2 = const_args[2];
1338
1339 /* Some operands are defined with "rZ" constraint, a register or
1340 the zero register. These need not actually test args[I] == 0. */
1341 #define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1342
1343 switch (opc) {
1344 case INDEX_op_exit_tb:
1345 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1346 tcg_out_goto(s, (intptr_t)tb_ret_addr);
1347 break;
1348
1349 case INDEX_op_goto_tb:
1350 #ifndef USE_DIRECT_JUMP
1351 #error "USE_DIRECT_JUMP required for aarch64"
1352 #endif
1353 assert(s->tb_jmp_offset != NULL); /* consistency for USE_DIRECT_JUMP */
1354 s->tb_jmp_offset[a0] = s->code_ptr - s->code_buf;
1355 /* actual branch destination will be patched by
1356 aarch64_tb_set_jmp_target later, beware retranslation. */
1357 tcg_out_goto_noaddr(s);
1358 s->tb_next_offset[a0] = s->code_ptr - s->code_buf;
1359 break;
1360
1361 case INDEX_op_call:
1362 if (const_args[0]) {
1363 tcg_out_call(s, a0);
1364 } else {
1365 tcg_out_callr(s, a0);
1366 }
1367 break;
1368
1369 case INDEX_op_br:
1370 tcg_out_goto_label(s, a0);
1371 break;
1372
1373 case INDEX_op_ld_i32:
1374 case INDEX_op_ld_i64:
1375 case INDEX_op_st_i32:
1376 case INDEX_op_st_i64:
1377 case INDEX_op_ld8u_i32:
1378 case INDEX_op_ld8s_i32:
1379 case INDEX_op_ld16u_i32:
1380 case INDEX_op_ld16s_i32:
1381 case INDEX_op_ld8u_i64:
1382 case INDEX_op_ld8s_i64:
1383 case INDEX_op_ld16u_i64:
1384 case INDEX_op_ld16s_i64:
1385 case INDEX_op_ld32u_i64:
1386 case INDEX_op_ld32s_i64:
1387 case INDEX_op_st8_i32:
1388 case INDEX_op_st8_i64:
1389 case INDEX_op_st16_i32:
1390 case INDEX_op_st16_i64:
1391 case INDEX_op_st32_i64:
1392 tcg_out_ldst(s, aarch64_ldst_get_data(opc), aarch64_ldst_get_type(opc),
1393 a0, a1, a2);
1394 break;
1395
1396 case INDEX_op_add_i32:
1397 a2 = (int32_t)a2;
1398 /* FALLTHRU */
1399 case INDEX_op_add_i64:
1400 if (c2) {
1401 tcg_out_addsubi(s, ext, a0, a1, a2);
1402 } else {
1403 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1404 }
1405 break;
1406
1407 case INDEX_op_sub_i32:
1408 a2 = (int32_t)a2;
1409 /* FALLTHRU */
1410 case INDEX_op_sub_i64:
1411 if (c2) {
1412 tcg_out_addsubi(s, ext, a0, a1, -a2);
1413 } else {
1414 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1415 }
1416 break;
1417
1418 case INDEX_op_neg_i64:
1419 case INDEX_op_neg_i32:
1420 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1421 break;
1422
1423 case INDEX_op_and_i32:
1424 a2 = (int32_t)a2;
1425 /* FALLTHRU */
1426 case INDEX_op_and_i64:
1427 if (c2) {
1428 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1429 } else {
1430 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1431 }
1432 break;
1433
1434 case INDEX_op_andc_i32:
1435 a2 = (int32_t)a2;
1436 /* FALLTHRU */
1437 case INDEX_op_andc_i64:
1438 if (c2) {
1439 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
1440 } else {
1441 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
1442 }
1443 break;
1444
1445 case INDEX_op_or_i32:
1446 a2 = (int32_t)a2;
1447 /* FALLTHRU */
1448 case INDEX_op_or_i64:
1449 if (c2) {
1450 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
1451 } else {
1452 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
1453 }
1454 break;
1455
1456 case INDEX_op_orc_i32:
1457 a2 = (int32_t)a2;
1458 /* FALLTHRU */
1459 case INDEX_op_orc_i64:
1460 if (c2) {
1461 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
1462 } else {
1463 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
1464 }
1465 break;
1466
1467 case INDEX_op_xor_i32:
1468 a2 = (int32_t)a2;
1469 /* FALLTHRU */
1470 case INDEX_op_xor_i64:
1471 if (c2) {
1472 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
1473 } else {
1474 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
1475 }
1476 break;
1477
1478 case INDEX_op_eqv_i32:
1479 a2 = (int32_t)a2;
1480 /* FALLTHRU */
1481 case INDEX_op_eqv_i64:
1482 if (c2) {
1483 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
1484 } else {
1485 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
1486 }
1487 break;
1488
1489 case INDEX_op_not_i64:
1490 case INDEX_op_not_i32:
1491 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
1492 break;
1493
1494 case INDEX_op_mul_i64:
1495 case INDEX_op_mul_i32:
1496 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
1497 break;
1498
1499 case INDEX_op_div_i64:
1500 case INDEX_op_div_i32:
1501 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
1502 break;
1503 case INDEX_op_divu_i64:
1504 case INDEX_op_divu_i32:
1505 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
1506 break;
1507
1508 case INDEX_op_rem_i64:
1509 case INDEX_op_rem_i32:
1510 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
1511 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1512 break;
1513 case INDEX_op_remu_i64:
1514 case INDEX_op_remu_i32:
1515 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
1516 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1517 break;
1518
1519 case INDEX_op_shl_i64:
1520 case INDEX_op_shl_i32:
1521 if (c2) {
1522 tcg_out_shl(s, ext, a0, a1, a2);
1523 } else {
1524 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
1525 }
1526 break;
1527
1528 case INDEX_op_shr_i64:
1529 case INDEX_op_shr_i32:
1530 if (c2) {
1531 tcg_out_shr(s, ext, a0, a1, a2);
1532 } else {
1533 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
1534 }
1535 break;
1536
1537 case INDEX_op_sar_i64:
1538 case INDEX_op_sar_i32:
1539 if (c2) {
1540 tcg_out_sar(s, ext, a0, a1, a2);
1541 } else {
1542 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
1543 }
1544 break;
1545
1546 case INDEX_op_rotr_i64:
1547 case INDEX_op_rotr_i32:
1548 if (c2) {
1549 tcg_out_rotr(s, ext, a0, a1, a2);
1550 } else {
1551 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
1552 }
1553 break;
1554
1555 case INDEX_op_rotl_i64:
1556 case INDEX_op_rotl_i32:
1557 if (c2) {
1558 tcg_out_rotl(s, ext, a0, a1, a2);
1559 } else {
1560 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
1561 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
1562 }
1563 break;
1564
1565 case INDEX_op_brcond_i32:
1566 a1 = (int32_t)a1;
1567 /* FALLTHRU */
1568 case INDEX_op_brcond_i64:
1569 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], args[3]);
1570 break;
1571
1572 case INDEX_op_setcond_i32:
1573 a2 = (int32_t)a2;
1574 /* FALLTHRU */
1575 case INDEX_op_setcond_i64:
1576 tcg_out_cmp(s, ext, a1, a2, c2);
1577 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */
1578 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
1579 TCG_REG_XZR, tcg_invert_cond(args[3]));
1580 break;
1581
1582 case INDEX_op_movcond_i32:
1583 a2 = (int32_t)a2;
1584 /* FALLTHRU */
1585 case INDEX_op_movcond_i64:
1586 tcg_out_cmp(s, ext, a1, a2, c2);
1587 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
1588 break;
1589
1590 case INDEX_op_qemu_ld8u:
1591 tcg_out_qemu_ld(s, args, MO_UB);
1592 break;
1593 case INDEX_op_qemu_ld8s:
1594 tcg_out_qemu_ld(s, args, MO_SB);
1595 break;
1596 case INDEX_op_qemu_ld16u:
1597 tcg_out_qemu_ld(s, args, MO_TEUW);
1598 break;
1599 case INDEX_op_qemu_ld16s:
1600 tcg_out_qemu_ld(s, args, MO_TESW);
1601 break;
1602 case INDEX_op_qemu_ld32u:
1603 case INDEX_op_qemu_ld32:
1604 tcg_out_qemu_ld(s, args, MO_TEUL);
1605 break;
1606 case INDEX_op_qemu_ld32s:
1607 tcg_out_qemu_ld(s, args, MO_TESL);
1608 break;
1609 case INDEX_op_qemu_ld64:
1610 tcg_out_qemu_ld(s, args, MO_TEQ);
1611 break;
1612 case INDEX_op_qemu_st8:
1613 tcg_out_qemu_st(s, args, MO_UB);
1614 break;
1615 case INDEX_op_qemu_st16:
1616 tcg_out_qemu_st(s, args, MO_TEUW);
1617 break;
1618 case INDEX_op_qemu_st32:
1619 tcg_out_qemu_st(s, args, MO_TEUL);
1620 break;
1621 case INDEX_op_qemu_st64:
1622 tcg_out_qemu_st(s, args, MO_TEQ);
1623 break;
1624
1625 case INDEX_op_bswap32_i64:
1626 /* Despite the _i64, this is a 32-bit bswap. */
1627 ext = 0;
1628 /* FALLTHRU */
1629 case INDEX_op_bswap64_i64:
1630 case INDEX_op_bswap32_i32:
1631 tcg_out_rev(s, ext, a0, a1);
1632 break;
1633 case INDEX_op_bswap16_i64:
1634 case INDEX_op_bswap16_i32:
1635 tcg_out_rev16(s, TCG_TYPE_I32, a0, a1);
1636 break;
1637
1638 case INDEX_op_ext8s_i64:
1639 case INDEX_op_ext8s_i32:
1640 tcg_out_sxt(s, ext, MO_8, a0, a1);
1641 break;
1642 case INDEX_op_ext16s_i64:
1643 case INDEX_op_ext16s_i32:
1644 tcg_out_sxt(s, ext, MO_16, a0, a1);
1645 break;
1646 case INDEX_op_ext32s_i64:
1647 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
1648 break;
1649 case INDEX_op_ext8u_i64:
1650 case INDEX_op_ext8u_i32:
1651 tcg_out_uxt(s, MO_8, a0, a1);
1652 break;
1653 case INDEX_op_ext16u_i64:
1654 case INDEX_op_ext16u_i32:
1655 tcg_out_uxt(s, MO_16, a0, a1);
1656 break;
1657 case INDEX_op_ext32u_i64:
1658 tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
1659 break;
1660
1661 case INDEX_op_deposit_i64:
1662 case INDEX_op_deposit_i32:
1663 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
1664 break;
1665
1666 case INDEX_op_add2_i32:
1667 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
1668 (int32_t)args[4], args[5], const_args[4],
1669 const_args[5], false);
1670 break;
1671 case INDEX_op_add2_i64:
1672 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
1673 args[5], const_args[4], const_args[5], false);
1674 break;
1675 case INDEX_op_sub2_i32:
1676 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
1677 (int32_t)args[4], args[5], const_args[4],
1678 const_args[5], true);
1679 break;
1680 case INDEX_op_sub2_i64:
1681 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
1682 args[5], const_args[4], const_args[5], true);
1683 break;
1684
1685 case INDEX_op_muluh_i64:
1686 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
1687 break;
1688 case INDEX_op_mulsh_i64:
1689 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
1690 break;
1691
1692 case INDEX_op_mov_i64:
1693 case INDEX_op_mov_i32:
1694 case INDEX_op_movi_i64:
1695 case INDEX_op_movi_i32:
1696 /* Always implemented with tcg_out_mov/i, never with tcg_out_op. */
1697 default:
1698 /* Opcode not implemented. */
1699 tcg_abort();
1700 }
1701
1702 #undef REG0
1703 }
1704
1705 static const TCGTargetOpDef aarch64_op_defs[] = {
1706 { INDEX_op_exit_tb, { } },
1707 { INDEX_op_goto_tb, { } },
1708 { INDEX_op_call, { "ri" } },
1709 { INDEX_op_br, { } },
1710
1711 { INDEX_op_mov_i32, { "r", "r" } },
1712 { INDEX_op_mov_i64, { "r", "r" } },
1713
1714 { INDEX_op_movi_i32, { "r" } },
1715 { INDEX_op_movi_i64, { "r" } },
1716
1717 { INDEX_op_ld8u_i32, { "r", "r" } },
1718 { INDEX_op_ld8s_i32, { "r", "r" } },
1719 { INDEX_op_ld16u_i32, { "r", "r" } },
1720 { INDEX_op_ld16s_i32, { "r", "r" } },
1721 { INDEX_op_ld_i32, { "r", "r" } },
1722 { INDEX_op_ld8u_i64, { "r", "r" } },
1723 { INDEX_op_ld8s_i64, { "r", "r" } },
1724 { INDEX_op_ld16u_i64, { "r", "r" } },
1725 { INDEX_op_ld16s_i64, { "r", "r" } },
1726 { INDEX_op_ld32u_i64, { "r", "r" } },
1727 { INDEX_op_ld32s_i64, { "r", "r" } },
1728 { INDEX_op_ld_i64, { "r", "r" } },
1729
1730 { INDEX_op_st8_i32, { "r", "r" } },
1731 { INDEX_op_st16_i32, { "r", "r" } },
1732 { INDEX_op_st_i32, { "r", "r" } },
1733 { INDEX_op_st8_i64, { "r", "r" } },
1734 { INDEX_op_st16_i64, { "r", "r" } },
1735 { INDEX_op_st32_i64, { "r", "r" } },
1736 { INDEX_op_st_i64, { "r", "r" } },
1737
1738 { INDEX_op_add_i32, { "r", "r", "rwA" } },
1739 { INDEX_op_add_i64, { "r", "r", "rA" } },
1740 { INDEX_op_sub_i32, { "r", "r", "rwA" } },
1741 { INDEX_op_sub_i64, { "r", "r", "rA" } },
1742 { INDEX_op_mul_i32, { "r", "r", "r" } },
1743 { INDEX_op_mul_i64, { "r", "r", "r" } },
1744 { INDEX_op_div_i32, { "r", "r", "r" } },
1745 { INDEX_op_div_i64, { "r", "r", "r" } },
1746 { INDEX_op_divu_i32, { "r", "r", "r" } },
1747 { INDEX_op_divu_i64, { "r", "r", "r" } },
1748 { INDEX_op_rem_i32, { "r", "r", "r" } },
1749 { INDEX_op_rem_i64, { "r", "r", "r" } },
1750 { INDEX_op_remu_i32, { "r", "r", "r" } },
1751 { INDEX_op_remu_i64, { "r", "r", "r" } },
1752 { INDEX_op_and_i32, { "r", "r", "rwL" } },
1753 { INDEX_op_and_i64, { "r", "r", "rL" } },
1754 { INDEX_op_or_i32, { "r", "r", "rwL" } },
1755 { INDEX_op_or_i64, { "r", "r", "rL" } },
1756 { INDEX_op_xor_i32, { "r", "r", "rwL" } },
1757 { INDEX_op_xor_i64, { "r", "r", "rL" } },
1758 { INDEX_op_andc_i32, { "r", "r", "rwL" } },
1759 { INDEX_op_andc_i64, { "r", "r", "rL" } },
1760 { INDEX_op_orc_i32, { "r", "r", "rwL" } },
1761 { INDEX_op_orc_i64, { "r", "r", "rL" } },
1762 { INDEX_op_eqv_i32, { "r", "r", "rwL" } },
1763 { INDEX_op_eqv_i64, { "r", "r", "rL" } },
1764
1765 { INDEX_op_neg_i32, { "r", "r" } },
1766 { INDEX_op_neg_i64, { "r", "r" } },
1767 { INDEX_op_not_i32, { "r", "r" } },
1768 { INDEX_op_not_i64, { "r", "r" } },
1769
1770 { INDEX_op_shl_i32, { "r", "r", "ri" } },
1771 { INDEX_op_shr_i32, { "r", "r", "ri" } },
1772 { INDEX_op_sar_i32, { "r", "r", "ri" } },
1773 { INDEX_op_rotl_i32, { "r", "r", "ri" } },
1774 { INDEX_op_rotr_i32, { "r", "r", "ri" } },
1775 { INDEX_op_shl_i64, { "r", "r", "ri" } },
1776 { INDEX_op_shr_i64, { "r", "r", "ri" } },
1777 { INDEX_op_sar_i64, { "r", "r", "ri" } },
1778 { INDEX_op_rotl_i64, { "r", "r", "ri" } },
1779 { INDEX_op_rotr_i64, { "r", "r", "ri" } },
1780
1781 { INDEX_op_brcond_i32, { "r", "rwA" } },
1782 { INDEX_op_brcond_i64, { "r", "rA" } },
1783 { INDEX_op_setcond_i32, { "r", "r", "rwA" } },
1784 { INDEX_op_setcond_i64, { "r", "r", "rA" } },
1785 { INDEX_op_movcond_i32, { "r", "r", "rwA", "rZ", "rZ" } },
1786 { INDEX_op_movcond_i64, { "r", "r", "rA", "rZ", "rZ" } },
1787
1788 { INDEX_op_qemu_ld8u, { "r", "l" } },
1789 { INDEX_op_qemu_ld8s, { "r", "l" } },
1790 { INDEX_op_qemu_ld16u, { "r", "l" } },
1791 { INDEX_op_qemu_ld16s, { "r", "l" } },
1792 { INDEX_op_qemu_ld32u, { "r", "l" } },
1793 { INDEX_op_qemu_ld32s, { "r", "l" } },
1794
1795 { INDEX_op_qemu_ld32, { "r", "l" } },
1796 { INDEX_op_qemu_ld64, { "r", "l" } },
1797
1798 { INDEX_op_qemu_st8, { "l", "l" } },
1799 { INDEX_op_qemu_st16, { "l", "l" } },
1800 { INDEX_op_qemu_st32, { "l", "l" } },
1801 { INDEX_op_qemu_st64, { "l", "l" } },
1802
1803 { INDEX_op_bswap16_i32, { "r", "r" } },
1804 { INDEX_op_bswap32_i32, { "r", "r" } },
1805 { INDEX_op_bswap16_i64, { "r", "r" } },
1806 { INDEX_op_bswap32_i64, { "r", "r" } },
1807 { INDEX_op_bswap64_i64, { "r", "r" } },
1808
1809 { INDEX_op_ext8s_i32, { "r", "r" } },
1810 { INDEX_op_ext16s_i32, { "r", "r" } },
1811 { INDEX_op_ext8u_i32, { "r", "r" } },
1812 { INDEX_op_ext16u_i32, { "r", "r" } },
1813
1814 { INDEX_op_ext8s_i64, { "r", "r" } },
1815 { INDEX_op_ext16s_i64, { "r", "r" } },
1816 { INDEX_op_ext32s_i64, { "r", "r" } },
1817 { INDEX_op_ext8u_i64, { "r", "r" } },
1818 { INDEX_op_ext16u_i64, { "r", "r" } },
1819 { INDEX_op_ext32u_i64, { "r", "r" } },
1820
1821 { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
1822 { INDEX_op_deposit_i64, { "r", "0", "rZ" } },
1823
1824 { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rwA", "rwMZ" } },
1825 { INDEX_op_add2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1826 { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rwA", "rwMZ" } },
1827 { INDEX_op_sub2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1828
1829 { INDEX_op_muluh_i64, { "r", "r", "r" } },
1830 { INDEX_op_mulsh_i64, { "r", "r", "r" } },
1831
1832 { -1 },
1833 };
1834
1835 static void tcg_target_init(TCGContext *s)
1836 {
1837 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
1838 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);
1839
1840 tcg_regset_set32(tcg_target_call_clobber_regs, 0,
1841 (1 << TCG_REG_X0) | (1 << TCG_REG_X1) |
1842 (1 << TCG_REG_X2) | (1 << TCG_REG_X3) |
1843 (1 << TCG_REG_X4) | (1 << TCG_REG_X5) |
1844 (1 << TCG_REG_X6) | (1 << TCG_REG_X7) |
1845 (1 << TCG_REG_X8) | (1 << TCG_REG_X9) |
1846 (1 << TCG_REG_X10) | (1 << TCG_REG_X11) |
1847 (1 << TCG_REG_X12) | (1 << TCG_REG_X13) |
1848 (1 << TCG_REG_X14) | (1 << TCG_REG_X15) |
1849 (1 << TCG_REG_X16) | (1 << TCG_REG_X17) |
1850 (1 << TCG_REG_X18) | (1 << TCG_REG_X30));
1851
1852 tcg_regset_clear(s->reserved_regs);
1853 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
1854 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
1855 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
1856 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
1857
1858 tcg_add_target_add_op_defs(aarch64_op_defs);
1859 }
1860
1861 /* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
1862 #define PUSH_SIZE ((30 - 19 + 1) * 8)
1863
1864 #define FRAME_SIZE \
1865 ((PUSH_SIZE \
1866 + TCG_STATIC_CALL_ARGS_SIZE \
1867 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
1868 + TCG_TARGET_STACK_ALIGN - 1) \
1869 & ~(TCG_TARGET_STACK_ALIGN - 1))
1870
1871 /* We're expecting a 2 byte uleb128 encoded value. */
1872 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
1873
1874 /* We're expecting to use a single ADDI insn. */
1875 QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
1876
1877 static void tcg_target_qemu_prologue(TCGContext *s)
1878 {
1879 TCGReg r;
1880
1881 /* Push (FP, LR) and allocate space for all saved registers. */
1882 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
1883 TCG_REG_SP, -PUSH_SIZE, 1, 1);
1884
1885 /* Set up frame pointer for canonical unwinding. */
1886 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
1887
1888 /* Store callee-preserved regs x19..x28. */
1889 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1890 int ofs = (r - TCG_REG_X19 + 2) * 8;
1891 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
1892 }
1893
1894 /* Make stack space for TCG locals. */
1895 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
1896 FRAME_SIZE - PUSH_SIZE);
1897
1898 /* Inform TCG about how to find TCG locals with register, offset, size. */
1899 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
1900 CPU_TEMP_BUF_NLONGS * sizeof(long));
1901
1902 #if defined(CONFIG_USE_GUEST_BASE)
1903 if (GUEST_BASE) {
1904 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, GUEST_BASE);
1905 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
1906 }
1907 #endif
1908
1909 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
1910 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
1911
1912 tb_ret_addr = s->code_ptr;
1913
1914 /* Remove TCG locals stack space. */
1915 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
1916 FRAME_SIZE - PUSH_SIZE);
1917
1918 /* Restore registers x19..x28. */
1919 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1920 int ofs = (r - TCG_REG_X19 + 2) * 8;
1921 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
1922 }
1923
1924 /* Pop (FP, LR), restore SP to previous frame. */
1925 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
1926 TCG_REG_SP, PUSH_SIZE, 0, 1);
1927 tcg_out_insn(s, 3207, RET, TCG_REG_LR);
1928 }
1929
1930 typedef struct {
1931 DebugFrameCIE cie;
1932 DebugFrameFDEHeader fde;
1933 uint8_t fde_def_cfa[4];
1934 uint8_t fde_reg_ofs[24];
1935 } DebugFrame;
1936
1937 #define ELF_HOST_MACHINE EM_AARCH64
1938
1939 static DebugFrame debug_frame = {
1940 .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
1941 .cie.id = -1,
1942 .cie.version = 1,
1943 .cie.code_align = 1,
1944 .cie.data_align = 0x78, /* sleb128 -8 */
1945 .cie.return_column = TCG_REG_LR,
1946
1947 /* Total FDE size does not include the "len" member. */
1948 .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
1949
1950 .fde_def_cfa = {
1951 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */
1952 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
1953 (FRAME_SIZE >> 7)
1954 },
1955 .fde_reg_ofs = {
1956 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */
1957 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */
1958 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */
1959 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */
1960 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */
1961 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */
1962 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */
1963 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */
1964 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */
1965 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */
1966 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */
1967 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */
1968 }
1969 };
1970
1971 void tcg_register_jit(void *buf, size_t buf_size)
1972 {
1973 debug_frame.fde.func_start = (intptr_t)buf;
1974 debug_frame.fde.func_len = buf_size;
1975
1976 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
1977 }