]> git.proxmox.com Git - mirror_qemu.git/blob - tcg/aarch64/tcg-target.c
tcg-aarch64: Create tcg_out_brcond
[mirror_qemu.git] / tcg / aarch64 / tcg-target.c
1 /*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13 #include "tcg-be-ldst.h"
14 #include "qemu/bitops.h"
15
16 /* We're going to re-use TCGType in setting of the SF bit, which controls
17 the size of the operation performed. If we know the values match, it
18 makes things much cleaner. */
19 QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
20
21 #ifndef NDEBUG
22 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
23 "%x0", "%x1", "%x2", "%x3", "%x4", "%x5", "%x6", "%x7",
24 "%x8", "%x9", "%x10", "%x11", "%x12", "%x13", "%x14", "%x15",
25 "%x16", "%x17", "%x18", "%x19", "%x20", "%x21", "%x22", "%x23",
26 "%x24", "%x25", "%x26", "%x27", "%x28",
27 "%fp", /* frame pointer */
28 "%lr", /* link register */
29 "%sp", /* stack pointer */
30 };
31 #endif /* NDEBUG */
32
33 #ifdef TARGET_WORDS_BIGENDIAN
34 #define TCG_LDST_BSWAP 1
35 #else
36 #define TCG_LDST_BSWAP 0
37 #endif
38
39 static const int tcg_target_reg_alloc_order[] = {
40 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
41 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
42 TCG_REG_X28, /* we will reserve this for GUEST_BASE if configured */
43
44 TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, TCG_REG_X12,
45 TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
46 TCG_REG_X16, TCG_REG_X17,
47
48 TCG_REG_X18, TCG_REG_X19, /* will not use these, see tcg_target_init */
49
50 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
51 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
52
53 TCG_REG_X8, /* will not use, see tcg_target_init */
54 };
55
56 static const int tcg_target_call_iarg_regs[8] = {
57 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
58 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
59 };
60 static const int tcg_target_call_oarg_regs[1] = {
61 TCG_REG_X0
62 };
63
64 #define TCG_REG_TMP TCG_REG_X8
65
66 #ifndef CONFIG_SOFTMMU
67 # if defined(CONFIG_USE_GUEST_BASE)
68 # define TCG_REG_GUEST_BASE TCG_REG_X28
69 # else
70 # define TCG_REG_GUEST_BASE TCG_REG_XZR
71 # endif
72 #endif
73
74 static inline void reloc_pc26(void *code_ptr, intptr_t target)
75 {
76 intptr_t offset = (target - (intptr_t)code_ptr) / 4;
77 /* read instruction, mask away previous PC_REL26 parameter contents,
78 set the proper offset, then write back the instruction. */
79 uint32_t insn = *(uint32_t *)code_ptr;
80 insn = deposit32(insn, 0, 26, offset);
81 *(uint32_t *)code_ptr = insn;
82 }
83
84 static inline void reloc_pc19(void *code_ptr, intptr_t target)
85 {
86 intptr_t offset = (target - (intptr_t)code_ptr) / 4;
87 /* read instruction, mask away previous PC_REL19 parameter contents,
88 set the proper offset, then write back the instruction. */
89 uint32_t insn = *(uint32_t *)code_ptr;
90 insn = deposit32(insn, 5, 19, offset);
91 *(uint32_t *)code_ptr = insn;
92 }
93
94 static inline void patch_reloc(uint8_t *code_ptr, int type,
95 intptr_t value, intptr_t addend)
96 {
97 value += addend;
98
99 switch (type) {
100 case R_AARCH64_JUMP26:
101 case R_AARCH64_CALL26:
102 reloc_pc26(code_ptr, value);
103 break;
104 case R_AARCH64_CONDBR19:
105 reloc_pc19(code_ptr, value);
106 break;
107
108 default:
109 tcg_abort();
110 }
111 }
112
113 #define TCG_CT_CONST_IS32 0x100
114 #define TCG_CT_CONST_AIMM 0x200
115 #define TCG_CT_CONST_LIMM 0x400
116 #define TCG_CT_CONST_ZERO 0x800
117 #define TCG_CT_CONST_MONE 0x1000
118
119 /* parse target specific constraints */
120 static int target_parse_constraint(TCGArgConstraint *ct,
121 const char **pct_str)
122 {
123 const char *ct_str = *pct_str;
124
125 switch (ct_str[0]) {
126 case 'r':
127 ct->ct |= TCG_CT_REG;
128 tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
129 break;
130 case 'l': /* qemu_ld / qemu_st address, data_reg */
131 ct->ct |= TCG_CT_REG;
132 tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
133 #ifdef CONFIG_SOFTMMU
134 /* x0 and x1 will be overwritten when reading the tlb entry,
135 and x2, and x3 for helper args, better to avoid using them. */
136 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
137 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
138 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
139 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
140 #endif
141 break;
142 case 'w': /* The operand should be considered 32-bit. */
143 ct->ct |= TCG_CT_CONST_IS32;
144 break;
145 case 'A': /* Valid for arithmetic immediate (positive or negative). */
146 ct->ct |= TCG_CT_CONST_AIMM;
147 break;
148 case 'L': /* Valid for logical immediate. */
149 ct->ct |= TCG_CT_CONST_LIMM;
150 break;
151 case 'M': /* minus one */
152 ct->ct |= TCG_CT_CONST_MONE;
153 break;
154 case 'Z': /* zero */
155 ct->ct |= TCG_CT_CONST_ZERO;
156 break;
157 default:
158 return -1;
159 }
160
161 ct_str++;
162 *pct_str = ct_str;
163 return 0;
164 }
165
166 static inline bool is_aimm(uint64_t val)
167 {
168 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
169 }
170
171 static inline bool is_limm(uint64_t val)
172 {
173 /* Taking a simplified view of the logical immediates for now, ignoring
174 the replication that can happen across the field. Match bit patterns
175 of the forms
176 0....01....1
177 0..01..10..0
178 and their inverses. */
179
180 /* Make things easier below, by testing the form with msb clear. */
181 if ((int64_t)val < 0) {
182 val = ~val;
183 }
184 if (val == 0) {
185 return false;
186 }
187 val += val & -val;
188 return (val & (val - 1)) == 0;
189 }
190
191 static int tcg_target_const_match(tcg_target_long val,
192 const TCGArgConstraint *arg_ct)
193 {
194 int ct = arg_ct->ct;
195
196 if (ct & TCG_CT_CONST) {
197 return 1;
198 }
199 if (ct & TCG_CT_CONST_IS32) {
200 val = (int32_t)val;
201 }
202 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
203 return 1;
204 }
205 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
206 return 1;
207 }
208 if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
209 return 1;
210 }
211 if ((ct & TCG_CT_CONST_MONE) && val == -1) {
212 return 1;
213 }
214
215 return 0;
216 }
217
218 enum aarch64_cond_code {
219 COND_EQ = 0x0,
220 COND_NE = 0x1,
221 COND_CS = 0x2, /* Unsigned greater or equal */
222 COND_HS = COND_CS, /* ALIAS greater or equal */
223 COND_CC = 0x3, /* Unsigned less than */
224 COND_LO = COND_CC, /* ALIAS Lower */
225 COND_MI = 0x4, /* Negative */
226 COND_PL = 0x5, /* Zero or greater */
227 COND_VS = 0x6, /* Overflow */
228 COND_VC = 0x7, /* No overflow */
229 COND_HI = 0x8, /* Unsigned greater than */
230 COND_LS = 0x9, /* Unsigned less or equal */
231 COND_GE = 0xa,
232 COND_LT = 0xb,
233 COND_GT = 0xc,
234 COND_LE = 0xd,
235 COND_AL = 0xe,
236 COND_NV = 0xf, /* behaves like COND_AL here */
237 };
238
239 static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
240 [TCG_COND_EQ] = COND_EQ,
241 [TCG_COND_NE] = COND_NE,
242 [TCG_COND_LT] = COND_LT,
243 [TCG_COND_GE] = COND_GE,
244 [TCG_COND_LE] = COND_LE,
245 [TCG_COND_GT] = COND_GT,
246 /* unsigned */
247 [TCG_COND_LTU] = COND_LO,
248 [TCG_COND_GTU] = COND_HI,
249 [TCG_COND_GEU] = COND_HS,
250 [TCG_COND_LEU] = COND_LS,
251 };
252
253 /* opcodes for LDR / STR instructions with base + simm9 addressing */
254 enum aarch64_ldst_op_data { /* size of the data moved */
255 LDST_8 = 0x38,
256 LDST_16 = 0x78,
257 LDST_32 = 0xb8,
258 LDST_64 = 0xf8,
259 };
260 enum aarch64_ldst_op_type { /* type of operation */
261 LDST_ST = 0x0, /* store */
262 LDST_LD = 0x4, /* load */
263 LDST_LD_S_X = 0x8, /* load and sign-extend into Xt */
264 LDST_LD_S_W = 0xc, /* load and sign-extend into Wt */
265 };
266
267 /* We encode the format of the insn into the beginning of the name, so that
268 we can have the preprocessor help "typecheck" the insn vs the output
269 function. Arm didn't provide us with nice names for the formats, so we
270 use the section number of the architecture reference manual in which the
271 instruction group is described. */
272 typedef enum {
273 /* Conditional branch (immediate). */
274 I3202_B_C = 0x54000000,
275
276 /* Unconditional branch (immediate). */
277 I3206_B = 0x14000000,
278 I3206_BL = 0x94000000,
279
280 /* Unconditional branch (register). */
281 I3207_BR = 0xd61f0000,
282 I3207_BLR = 0xd63f0000,
283 I3207_RET = 0xd65f0000,
284
285 /* Add/subtract immediate instructions. */
286 I3401_ADDI = 0x11000000,
287 I3401_ADDSI = 0x31000000,
288 I3401_SUBI = 0x51000000,
289 I3401_SUBSI = 0x71000000,
290
291 /* Bitfield instructions. */
292 I3402_BFM = 0x33000000,
293 I3402_SBFM = 0x13000000,
294 I3402_UBFM = 0x53000000,
295
296 /* Extract instruction. */
297 I3403_EXTR = 0x13800000,
298
299 /* Logical immediate instructions. */
300 I3404_ANDI = 0x12000000,
301 I3404_ORRI = 0x32000000,
302 I3404_EORI = 0x52000000,
303
304 /* Move wide immediate instructions. */
305 I3405_MOVN = 0x12800000,
306 I3405_MOVZ = 0x52800000,
307 I3405_MOVK = 0x72800000,
308
309 /* PC relative addressing instructions. */
310 I3406_ADR = 0x10000000,
311 I3406_ADRP = 0x90000000,
312
313 /* Add/subtract shifted register instructions (without a shift). */
314 I3502_ADD = 0x0b000000,
315 I3502_ADDS = 0x2b000000,
316 I3502_SUB = 0x4b000000,
317 I3502_SUBS = 0x6b000000,
318
319 /* Add/subtract shifted register instructions (with a shift). */
320 I3502S_ADD_LSL = I3502_ADD,
321
322 /* Add/subtract with carry instructions. */
323 I3503_ADC = 0x1a000000,
324 I3503_SBC = 0x5a000000,
325
326 /* Conditional select instructions. */
327 I3506_CSEL = 0x1a800000,
328 I3506_CSINC = 0x1a800400,
329
330 /* Data-processing (2 source) instructions. */
331 I3508_LSLV = 0x1ac02000,
332 I3508_LSRV = 0x1ac02400,
333 I3508_ASRV = 0x1ac02800,
334 I3508_RORV = 0x1ac02c00,
335 I3508_SMULH = 0x9b407c00,
336 I3508_UMULH = 0x9bc07c00,
337 I3508_UDIV = 0x1ac00800,
338 I3508_SDIV = 0x1ac00c00,
339
340 /* Data-processing (3 source) instructions. */
341 I3509_MADD = 0x1b000000,
342 I3509_MSUB = 0x1b008000,
343
344 /* Logical shifted register instructions (without a shift). */
345 I3510_AND = 0x0a000000,
346 I3510_BIC = 0x0a200000,
347 I3510_ORR = 0x2a000000,
348 I3510_ORN = 0x2a200000,
349 I3510_EOR = 0x4a000000,
350 I3510_EON = 0x4a200000,
351 I3510_ANDS = 0x6a000000,
352 } AArch64Insn;
353
354 static inline enum aarch64_ldst_op_data
355 aarch64_ldst_get_data(TCGOpcode tcg_op)
356 {
357 switch (tcg_op) {
358 case INDEX_op_ld8u_i32:
359 case INDEX_op_ld8s_i32:
360 case INDEX_op_ld8u_i64:
361 case INDEX_op_ld8s_i64:
362 case INDEX_op_st8_i32:
363 case INDEX_op_st8_i64:
364 return LDST_8;
365
366 case INDEX_op_ld16u_i32:
367 case INDEX_op_ld16s_i32:
368 case INDEX_op_ld16u_i64:
369 case INDEX_op_ld16s_i64:
370 case INDEX_op_st16_i32:
371 case INDEX_op_st16_i64:
372 return LDST_16;
373
374 case INDEX_op_ld_i32:
375 case INDEX_op_st_i32:
376 case INDEX_op_ld32u_i64:
377 case INDEX_op_ld32s_i64:
378 case INDEX_op_st32_i64:
379 return LDST_32;
380
381 case INDEX_op_ld_i64:
382 case INDEX_op_st_i64:
383 return LDST_64;
384
385 default:
386 tcg_abort();
387 }
388 }
389
390 static inline enum aarch64_ldst_op_type
391 aarch64_ldst_get_type(TCGOpcode tcg_op)
392 {
393 switch (tcg_op) {
394 case INDEX_op_st8_i32:
395 case INDEX_op_st16_i32:
396 case INDEX_op_st8_i64:
397 case INDEX_op_st16_i64:
398 case INDEX_op_st_i32:
399 case INDEX_op_st32_i64:
400 case INDEX_op_st_i64:
401 return LDST_ST;
402
403 case INDEX_op_ld8u_i32:
404 case INDEX_op_ld16u_i32:
405 case INDEX_op_ld8u_i64:
406 case INDEX_op_ld16u_i64:
407 case INDEX_op_ld_i32:
408 case INDEX_op_ld32u_i64:
409 case INDEX_op_ld_i64:
410 return LDST_LD;
411
412 case INDEX_op_ld8s_i32:
413 case INDEX_op_ld16s_i32:
414 return LDST_LD_S_W;
415
416 case INDEX_op_ld8s_i64:
417 case INDEX_op_ld16s_i64:
418 case INDEX_op_ld32s_i64:
419 return LDST_LD_S_X;
420
421 default:
422 tcg_abort();
423 }
424 }
425
426 static inline uint32_t tcg_in32(TCGContext *s)
427 {
428 uint32_t v = *(uint32_t *)s->code_ptr;
429 return v;
430 }
431
432 /* Emit an opcode with "type-checking" of the format. */
433 #define tcg_out_insn(S, FMT, OP, ...) \
434 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
435
436 static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
437 TCGCond c, int imm19)
438 {
439 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
440 }
441
442 static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
443 {
444 tcg_out32(s, insn | (imm26 & 0x03ffffff));
445 }
446
447 static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
448 {
449 tcg_out32(s, insn | rn << 5);
450 }
451
452 static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
453 TCGReg rd, TCGReg rn, uint64_t aimm)
454 {
455 if (aimm > 0xfff) {
456 assert((aimm & 0xfff) == 0);
457 aimm >>= 12;
458 assert(aimm <= 0xfff);
459 aimm |= 1 << 12; /* apply LSL 12 */
460 }
461 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
462 }
463
464 /* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
465 (Logical immediate). Both insn groups have N, IMMR and IMMS fields
466 that feed the DecodeBitMasks pseudo function. */
467 static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
468 TCGReg rd, TCGReg rn, int n, int immr, int imms)
469 {
470 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
471 | rn << 5 | rd);
472 }
473
474 #define tcg_out_insn_3404 tcg_out_insn_3402
475
476 static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
477 TCGReg rd, TCGReg rn, TCGReg rm, int imms)
478 {
479 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
480 | rn << 5 | rd);
481 }
482
483 /* This function is used for the Move (wide immediate) instruction group.
484 Note that SHIFT is a full shift count, not the 2 bit HW field. */
485 static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
486 TCGReg rd, uint16_t half, unsigned shift)
487 {
488 assert((shift & ~0x30) == 0);
489 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
490 }
491
492 static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
493 TCGReg rd, int64_t disp)
494 {
495 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
496 }
497
498 /* This function is for both 3.5.2 (Add/Subtract shifted register), for
499 the rare occasion when we actually want to supply a shift amount. */
500 static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
501 TCGType ext, TCGReg rd, TCGReg rn,
502 TCGReg rm, int imm6)
503 {
504 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
505 }
506
507 /* This function is for 3.5.2 (Add/subtract shifted register),
508 and 3.5.10 (Logical shifted register), for the vast majorty of cases
509 when we don't want to apply a shift. Thus it can also be used for
510 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */
511 static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
512 TCGReg rd, TCGReg rn, TCGReg rm)
513 {
514 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
515 }
516
517 #define tcg_out_insn_3503 tcg_out_insn_3502
518 #define tcg_out_insn_3508 tcg_out_insn_3502
519 #define tcg_out_insn_3510 tcg_out_insn_3502
520
521 static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
522 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
523 {
524 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
525 | tcg_cond_to_aarch64[c] << 12);
526 }
527
528 static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
529 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
530 {
531 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
532 }
533
534
535 static inline void tcg_out_ldst_9(TCGContext *s,
536 enum aarch64_ldst_op_data op_data,
537 enum aarch64_ldst_op_type op_type,
538 TCGReg rd, TCGReg rn, intptr_t offset)
539 {
540 /* use LDUR with BASE register with 9bit signed unscaled offset */
541 tcg_out32(s, op_data << 24 | op_type << 20
542 | (offset & 0x1ff) << 12 | rn << 5 | rd);
543 }
544
545 /* tcg_out_ldst_12 expects a scaled unsigned immediate offset */
546 static inline void tcg_out_ldst_12(TCGContext *s,
547 enum aarch64_ldst_op_data op_data,
548 enum aarch64_ldst_op_type op_type,
549 TCGReg rd, TCGReg rn,
550 tcg_target_ulong scaled_uimm)
551 {
552 tcg_out32(s, (op_data | 1) << 24
553 | op_type << 20 | scaled_uimm << 10 | rn << 5 | rd);
554 }
555
556 /* Register to register move using ORR (shifted register with no shift). */
557 static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
558 {
559 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
560 }
561
562 /* Register to register move using ADDI (move to/from SP). */
563 static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
564 {
565 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
566 }
567
568 /* This function is used for the Logical (immediate) instruction group.
569 The value of LIMM must satisfy IS_LIMM. See the comment above about
570 only supporting simplified logical immediates. */
571 static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
572 TCGReg rd, TCGReg rn, uint64_t limm)
573 {
574 unsigned h, l, r, c;
575
576 assert(is_limm(limm));
577
578 h = clz64(limm);
579 l = ctz64(limm);
580 if (l == 0) {
581 r = 0; /* form 0....01....1 */
582 c = ctz64(~limm) - 1;
583 if (h == 0) {
584 r = clz64(~limm); /* form 1..10..01..1 */
585 c += r;
586 }
587 } else {
588 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */
589 c = r - h - 1;
590 }
591 if (ext == TCG_TYPE_I32) {
592 r &= 31;
593 c &= 31;
594 }
595
596 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
597 }
598
599 static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
600 tcg_target_long value)
601 {
602 AArch64Insn insn;
603 int i, wantinv, shift;
604 tcg_target_long svalue = value;
605 tcg_target_long ivalue = ~value;
606 tcg_target_long imask;
607
608 /* For 32-bit values, discard potential garbage in value. For 64-bit
609 values within [2**31, 2**32-1], we can create smaller sequences by
610 interpreting this as a negative 32-bit number, while ensuring that
611 the high 32 bits are cleared by setting SF=0. */
612 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
613 svalue = (int32_t)value;
614 value = (uint32_t)value;
615 ivalue = (uint32_t)ivalue;
616 type = TCG_TYPE_I32;
617 }
618
619 /* Speed things up by handling the common case of small positive
620 and negative values specially. */
621 if ((value & ~0xffffull) == 0) {
622 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
623 return;
624 } else if ((ivalue & ~0xffffull) == 0) {
625 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
626 return;
627 }
628
629 /* Check for bitfield immediates. For the benefit of 32-bit quantities,
630 use the sign-extended value. That lets us match rotated values such
631 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
632 if (is_limm(svalue)) {
633 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
634 return;
635 }
636
637 /* Look for host pointer values within 4G of the PC. This happens
638 often when loading pointers to QEMU's own data structures. */
639 if (type == TCG_TYPE_I64) {
640 tcg_target_long disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
641 if (disp == sextract64(disp, 0, 21)) {
642 tcg_out_insn(s, 3406, ADRP, rd, disp);
643 if (value & 0xfff) {
644 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
645 }
646 return;
647 }
648 }
649
650 /* Would it take fewer insns to begin with MOVN? For the value and its
651 inverse, count the number of 16-bit lanes that are 0. */
652 for (i = wantinv = imask = 0; i < 64; i += 16) {
653 tcg_target_long mask = 0xffffull << i;
654 if ((value & mask) == 0) {
655 wantinv -= 1;
656 }
657 if ((ivalue & mask) == 0) {
658 wantinv += 1;
659 imask |= mask;
660 }
661 }
662
663 /* If we had more 0xffff than 0x0000, invert VALUE and use MOVN. */
664 insn = I3405_MOVZ;
665 if (wantinv > 0) {
666 value = ivalue;
667 insn = I3405_MOVN;
668 }
669
670 /* Find the lowest lane that is not 0x0000. */
671 shift = ctz64(value) & (63 & -16);
672 tcg_out_insn_3405(s, insn, type, rd, value >> shift, shift);
673
674 if (wantinv > 0) {
675 /* Re-invert the value, so MOVK sees non-inverted bits. */
676 value = ~value;
677 /* Clear out all the 0xffff lanes. */
678 value ^= imask;
679 }
680 /* Clear out the lane that we just set. */
681 value &= ~(0xffffUL << shift);
682
683 /* Iterate until all lanes have been set, and thus cleared from VALUE. */
684 while (value) {
685 shift = ctz64(value) & (63 & -16);
686 tcg_out_insn(s, 3405, MOVK, type, rd, value >> shift, shift);
687 value &= ~(0xffffUL << shift);
688 }
689 }
690
691 static inline void tcg_out_ldst_r(TCGContext *s,
692 enum aarch64_ldst_op_data op_data,
693 enum aarch64_ldst_op_type op_type,
694 TCGReg rd, TCGReg base, TCGReg regoff)
695 {
696 /* load from memory to register using base + 64bit register offset */
697 /* using f.e. STR Wt, [Xn, Xm] 0xb8600800|(regoff << 16)|(base << 5)|rd */
698 /* the 0x6000 is for the "no extend field" */
699 tcg_out32(s, 0x00206800
700 | op_data << 24 | op_type << 20 | regoff << 16 | base << 5 | rd);
701 }
702
703 /* solve the whole ldst problem */
704 static inline void tcg_out_ldst(TCGContext *s, enum aarch64_ldst_op_data data,
705 enum aarch64_ldst_op_type type,
706 TCGReg rd, TCGReg rn, intptr_t offset)
707 {
708 if (offset >= -256 && offset < 256) {
709 tcg_out_ldst_9(s, data, type, rd, rn, offset);
710 return;
711 }
712
713 if (offset >= 256) {
714 /* if the offset is naturally aligned and in range,
715 then we can use the scaled uimm12 encoding */
716 unsigned int s_bits = data >> 6;
717 if (!(offset & ((1 << s_bits) - 1))) {
718 tcg_target_ulong scaled_uimm = offset >> s_bits;
719 if (scaled_uimm <= 0xfff) {
720 tcg_out_ldst_12(s, data, type, rd, rn, scaled_uimm);
721 return;
722 }
723 }
724 }
725
726 /* worst-case scenario, move offset to temp register, use reg offset */
727 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
728 tcg_out_ldst_r(s, data, type, rd, rn, TCG_REG_TMP);
729 }
730
731 static inline void tcg_out_mov(TCGContext *s,
732 TCGType type, TCGReg ret, TCGReg arg)
733 {
734 if (ret != arg) {
735 tcg_out_movr(s, type, ret, arg);
736 }
737 }
738
739 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
740 TCGReg arg1, intptr_t arg2)
741 {
742 tcg_out_ldst(s, (type == TCG_TYPE_I64) ? LDST_64 : LDST_32, LDST_LD,
743 arg, arg1, arg2);
744 }
745
746 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
747 TCGReg arg1, intptr_t arg2)
748 {
749 tcg_out_ldst(s, (type == TCG_TYPE_I64) ? LDST_64 : LDST_32, LDST_ST,
750 arg, arg1, arg2);
751 }
752
753 static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
754 TCGReg rn, unsigned int a, unsigned int b)
755 {
756 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
757 }
758
759 static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
760 TCGReg rn, unsigned int a, unsigned int b)
761 {
762 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
763 }
764
765 static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
766 TCGReg rn, unsigned int a, unsigned int b)
767 {
768 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
769 }
770
771 static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
772 TCGReg rn, TCGReg rm, unsigned int a)
773 {
774 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
775 }
776
777 static inline void tcg_out_shl(TCGContext *s, TCGType ext,
778 TCGReg rd, TCGReg rn, unsigned int m)
779 {
780 int bits = ext ? 64 : 32;
781 int max = bits - 1;
782 tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
783 }
784
785 static inline void tcg_out_shr(TCGContext *s, TCGType ext,
786 TCGReg rd, TCGReg rn, unsigned int m)
787 {
788 int max = ext ? 63 : 31;
789 tcg_out_ubfm(s, ext, rd, rn, m & max, max);
790 }
791
792 static inline void tcg_out_sar(TCGContext *s, TCGType ext,
793 TCGReg rd, TCGReg rn, unsigned int m)
794 {
795 int max = ext ? 63 : 31;
796 tcg_out_sbfm(s, ext, rd, rn, m & max, max);
797 }
798
799 static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
800 TCGReg rd, TCGReg rn, unsigned int m)
801 {
802 int max = ext ? 63 : 31;
803 tcg_out_extr(s, ext, rd, rn, rn, m & max);
804 }
805
806 static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
807 TCGReg rd, TCGReg rn, unsigned int m)
808 {
809 int bits = ext ? 64 : 32;
810 int max = bits - 1;
811 tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
812 }
813
814 static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
815 TCGReg rn, unsigned lsb, unsigned width)
816 {
817 unsigned size = ext ? 64 : 32;
818 unsigned a = (size - lsb) & (size - 1);
819 unsigned b = width - 1;
820 tcg_out_bfm(s, ext, rd, rn, a, b);
821 }
822
823 static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
824 tcg_target_long b, bool const_b)
825 {
826 if (const_b) {
827 /* Using CMP or CMN aliases. */
828 if (b >= 0) {
829 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
830 } else {
831 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
832 }
833 } else {
834 /* Using CMP alias SUBS wzr, Wn, Wm */
835 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
836 }
837 }
838
839 static inline void tcg_out_goto(TCGContext *s, intptr_t target)
840 {
841 intptr_t offset = (target - (intptr_t)s->code_ptr) / 4;
842
843 if (offset < -0x02000000 || offset >= 0x02000000) {
844 /* out of 26bit range */
845 tcg_abort();
846 }
847
848 tcg_out_insn(s, 3206, B, offset);
849 }
850
851 static inline void tcg_out_goto_noaddr(TCGContext *s)
852 {
853 /* We pay attention here to not modify the branch target by reading from
854 the buffer. This ensure that caches and memory are kept coherent during
855 retranslation. Mask away possible garbage in the high bits for the
856 first translation, while keeping the offset bits for retranslation. */
857 uint32_t old = tcg_in32(s);
858 tcg_out_insn(s, 3206, B, old);
859 }
860
861 static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c)
862 {
863 /* See comments in tcg_out_goto_noaddr. */
864 uint32_t old = tcg_in32(s) >> 5;
865 tcg_out_insn(s, 3202, B_C, c, old);
866 }
867
868 static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
869 {
870 tcg_out_insn(s, 3207, BLR, reg);
871 }
872
873 static inline void tcg_out_call(TCGContext *s, intptr_t target)
874 {
875 intptr_t offset = (target - (intptr_t)s->code_ptr) / 4;
876
877 if (offset < -0x02000000 || offset >= 0x02000000) { /* out of 26bit rng */
878 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, target);
879 tcg_out_callr(s, TCG_REG_TMP);
880 } else {
881 tcg_out_insn(s, 3206, BL, offset);
882 }
883 }
884
885 void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
886 {
887 intptr_t target = addr;
888 intptr_t offset = (target - (intptr_t)jmp_addr) / 4;
889
890 if (offset < -0x02000000 || offset >= 0x02000000) {
891 /* out of 26bit range */
892 tcg_abort();
893 }
894
895 patch_reloc((uint8_t *)jmp_addr, R_AARCH64_JUMP26, target, 0);
896 flush_icache_range(jmp_addr, jmp_addr + 4);
897 }
898
899 static inline void tcg_out_goto_label(TCGContext *s, int label_index)
900 {
901 TCGLabel *l = &s->labels[label_index];
902
903 if (!l->has_value) {
904 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, label_index, 0);
905 tcg_out_goto_noaddr(s);
906 } else {
907 tcg_out_goto(s, l->u.value);
908 }
909 }
910
911 static void tcg_out_brcond(TCGContext *s, TCGMemOp ext, TCGCond c, TCGArg a,
912 TCGArg b, bool b_const, int label)
913 {
914 TCGLabel *l = &s->labels[label];
915 intptr_t offset;
916
917 tcg_out_cmp(s, ext, a, b, b_const);
918
919 if (!l->has_value) {
920 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, label, 0);
921 offset = tcg_in32(s) >> 5;
922 } else {
923 offset = l->u.value - (uintptr_t)s->code_ptr;
924 offset >>= 2;
925 assert(offset >= -0x40000 && offset < 0x40000);
926 }
927
928 tcg_out_insn(s, 3202, B_C, c, offset);
929 }
930
931 static inline void tcg_out_rev(TCGContext *s, TCGType ext,
932 TCGReg rd, TCGReg rm)
933 {
934 /* using REV 0x5ac00800 */
935 unsigned int base = ext ? 0xdac00c00 : 0x5ac00800;
936 tcg_out32(s, base | rm << 5 | rd);
937 }
938
939 static inline void tcg_out_rev16(TCGContext *s, TCGType ext,
940 TCGReg rd, TCGReg rm)
941 {
942 /* using REV16 0x5ac00400 */
943 unsigned int base = ext ? 0xdac00400 : 0x5ac00400;
944 tcg_out32(s, base | rm << 5 | rd);
945 }
946
947 static inline void tcg_out_sxt(TCGContext *s, TCGType ext, TCGMemOp s_bits,
948 TCGReg rd, TCGReg rn)
949 {
950 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
951 int bits = (8 << s_bits) - 1;
952 tcg_out_sbfm(s, ext, rd, rn, 0, bits);
953 }
954
955 static inline void tcg_out_uxt(TCGContext *s, TCGMemOp s_bits,
956 TCGReg rd, TCGReg rn)
957 {
958 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
959 int bits = (8 << s_bits) - 1;
960 tcg_out_ubfm(s, 0, rd, rn, 0, bits);
961 }
962
963 static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
964 TCGReg rn, int64_t aimm)
965 {
966 if (aimm >= 0) {
967 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
968 } else {
969 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
970 }
971 }
972
973 static inline void tcg_out_addsub2(TCGContext *s, int ext, TCGReg rl,
974 TCGReg rh, TCGReg al, TCGReg ah,
975 tcg_target_long bl, tcg_target_long bh,
976 bool const_bl, bool const_bh, bool sub)
977 {
978 TCGReg orig_rl = rl;
979 AArch64Insn insn;
980
981 if (rl == ah || (!const_bh && rl == bh)) {
982 rl = TCG_REG_TMP;
983 }
984
985 if (const_bl) {
986 insn = I3401_ADDSI;
987 if ((bl < 0) ^ sub) {
988 insn = I3401_SUBSI;
989 bl = -bl;
990 }
991 tcg_out_insn_3401(s, insn, ext, rl, al, bl);
992 } else {
993 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
994 }
995
996 insn = I3503_ADC;
997 if (const_bh) {
998 /* Note that the only two constants we support are 0 and -1, and
999 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
1000 if ((bh != 0) ^ sub) {
1001 insn = I3503_SBC;
1002 }
1003 bh = TCG_REG_XZR;
1004 } else if (sub) {
1005 insn = I3503_SBC;
1006 }
1007 tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1008
1009 if (rl != orig_rl) {
1010 tcg_out_movr(s, ext, orig_rl, rl);
1011 }
1012 }
1013
1014 #ifdef CONFIG_SOFTMMU
1015 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1016 * int mmu_idx, uintptr_t ra)
1017 */
1018 static const void * const qemu_ld_helpers[4] = {
1019 helper_ret_ldub_mmu,
1020 helper_ret_lduw_mmu,
1021 helper_ret_ldul_mmu,
1022 helper_ret_ldq_mmu,
1023 };
1024
1025 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1026 * uintxx_t val, int mmu_idx, uintptr_t ra)
1027 */
1028 static const void * const qemu_st_helpers[4] = {
1029 helper_ret_stb_mmu,
1030 helper_ret_stw_mmu,
1031 helper_ret_stl_mmu,
1032 helper_ret_stq_mmu,
1033 };
1034
1035 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1036 {
1037 TCGMemOp opc = lb->opc;
1038 TCGMemOp size = opc & MO_SIZE;
1039
1040 reloc_pc19(lb->label_ptr[0], (intptr_t)s->code_ptr);
1041
1042 tcg_out_movr(s, TCG_TYPE_I64, TCG_REG_X0, TCG_AREG0);
1043 tcg_out_movr(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1044 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, lb->mem_index);
1045 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_X3, (intptr_t)lb->raddr);
1046 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)qemu_ld_helpers[size]);
1047 tcg_out_callr(s, TCG_REG_TMP);
1048 if (opc & MO_SIGN) {
1049 tcg_out_sxt(s, TCG_TYPE_I64, size, lb->datalo_reg, TCG_REG_X0);
1050 } else {
1051 tcg_out_movr(s, TCG_TYPE_I64, lb->datalo_reg, TCG_REG_X0);
1052 }
1053
1054 tcg_out_goto(s, (intptr_t)lb->raddr);
1055 }
1056
1057 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1058 {
1059 TCGMemOp size = lb->opc;
1060
1061 reloc_pc19(lb->label_ptr[0], (intptr_t)s->code_ptr);
1062
1063 tcg_out_movr(s, TCG_TYPE_I64, TCG_REG_X0, TCG_AREG0);
1064 tcg_out_movr(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1065 tcg_out_movr(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1066 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, lb->mem_index);
1067 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_X4, (intptr_t)lb->raddr);
1068 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)qemu_st_helpers[size]);
1069 tcg_out_callr(s, TCG_REG_TMP);
1070 tcg_out_goto(s, (intptr_t)lb->raddr);
1071 }
1072
1073 static void add_qemu_ldst_label(TCGContext *s, int is_ld, int opc,
1074 TCGReg data_reg, TCGReg addr_reg,
1075 int mem_index,
1076 uint8_t *raddr, uint8_t *label_ptr)
1077 {
1078 TCGLabelQemuLdst *label = new_ldst_label(s);
1079
1080 label->is_ld = is_ld;
1081 label->opc = opc;
1082 label->datalo_reg = data_reg;
1083 label->addrlo_reg = addr_reg;
1084 label->mem_index = mem_index;
1085 label->raddr = raddr;
1086 label->label_ptr[0] = label_ptr;
1087 }
1088
1089 /* Load and compare a TLB entry, emitting the conditional jump to the
1090 slow path for the failure case, which will be patched later when finalizing
1091 the slow path. Generated code returns the host addend in X1,
1092 clobbers X0,X2,X3,TMP. */
1093 static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg,
1094 int s_bits, uint8_t **label_ptr, int mem_index, int is_read)
1095 {
1096 TCGReg base = TCG_AREG0;
1097 int tlb_offset = is_read ?
1098 offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
1099 : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
1100 /* Extract the TLB index from the address into X0.
1101 X0<CPU_TLB_BITS:0> =
1102 addr_reg<TARGET_PAGE_BITS+CPU_TLB_BITS:TARGET_PAGE_BITS> */
1103 tcg_out_ubfm(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, addr_reg,
1104 TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS);
1105 /* Store the page mask part of the address and the low s_bits into X3.
1106 Later this allows checking for equality and alignment at the same time.
1107 X3 = addr_reg & (PAGE_MASK | ((1 << s_bits) - 1)) */
1108 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64, TCG_REG_X3,
1109 addr_reg, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
1110 /* Add any "high bits" from the tlb offset to the env address into X2,
1111 to take advantage of the LSL12 form of the ADDI instruction.
1112 X2 = env + (tlb_offset & 0xfff000) */
1113 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_X2, base,
1114 tlb_offset & 0xfff000);
1115 /* Merge the tlb index contribution into X2.
1116 X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
1117 tcg_out_insn(s, 3502S, ADD_LSL, 1, TCG_REG_X2, TCG_REG_X2,
1118 TCG_REG_X0, CPU_TLB_ENTRY_BITS);
1119 /* Merge "low bits" from tlb offset, load the tlb comparator into X0.
1120 X0 = load [X2 + (tlb_offset & 0x000fff)] */
1121 tcg_out_ldst(s, TARGET_LONG_BITS == 64 ? LDST_64 : LDST_32,
1122 LDST_LD, TCG_REG_X0, TCG_REG_X2,
1123 (tlb_offset & 0xfff));
1124 /* Load the tlb addend. Do that early to avoid stalling.
1125 X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */
1126 tcg_out_ldst(s, LDST_64, LDST_LD, TCG_REG_X1, TCG_REG_X2,
1127 (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) -
1128 (is_read ? offsetof(CPUTLBEntry, addr_read)
1129 : offsetof(CPUTLBEntry, addr_write)));
1130 /* Perform the address comparison. */
1131 tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0);
1132 *label_ptr = s->code_ptr;
1133 /* If not equal, we jump to the slow path. */
1134 tcg_out_goto_cond_noaddr(s, TCG_COND_NE);
1135 }
1136
1137 #endif /* CONFIG_SOFTMMU */
1138
1139 static void tcg_out_qemu_ld_direct(TCGContext *s, int opc, TCGReg data_r,
1140 TCGReg addr_r, TCGReg off_r)
1141 {
1142 switch (opc) {
1143 case 0:
1144 tcg_out_ldst_r(s, LDST_8, LDST_LD, data_r, addr_r, off_r);
1145 break;
1146 case 0 | 4:
1147 tcg_out_ldst_r(s, LDST_8, LDST_LD_S_X, data_r, addr_r, off_r);
1148 break;
1149 case 1:
1150 tcg_out_ldst_r(s, LDST_16, LDST_LD, data_r, addr_r, off_r);
1151 if (TCG_LDST_BSWAP) {
1152 tcg_out_rev16(s, TCG_TYPE_I32, data_r, data_r);
1153 }
1154 break;
1155 case 1 | 4:
1156 if (TCG_LDST_BSWAP) {
1157 tcg_out_ldst_r(s, LDST_16, LDST_LD, data_r, addr_r, off_r);
1158 tcg_out_rev16(s, TCG_TYPE_I32, data_r, data_r);
1159 tcg_out_sxt(s, TCG_TYPE_I64, MO_16, data_r, data_r);
1160 } else {
1161 tcg_out_ldst_r(s, LDST_16, LDST_LD_S_X, data_r, addr_r, off_r);
1162 }
1163 break;
1164 case 2:
1165 tcg_out_ldst_r(s, LDST_32, LDST_LD, data_r, addr_r, off_r);
1166 if (TCG_LDST_BSWAP) {
1167 tcg_out_rev(s, TCG_TYPE_I32, data_r, data_r);
1168 }
1169 break;
1170 case 2 | 4:
1171 if (TCG_LDST_BSWAP) {
1172 tcg_out_ldst_r(s, LDST_32, LDST_LD, data_r, addr_r, off_r);
1173 tcg_out_rev(s, TCG_TYPE_I32, data_r, data_r);
1174 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1175 } else {
1176 tcg_out_ldst_r(s, LDST_32, LDST_LD_S_X, data_r, addr_r, off_r);
1177 }
1178 break;
1179 case 3:
1180 tcg_out_ldst_r(s, LDST_64, LDST_LD, data_r, addr_r, off_r);
1181 if (TCG_LDST_BSWAP) {
1182 tcg_out_rev(s, TCG_TYPE_I64, data_r, data_r);
1183 }
1184 break;
1185 default:
1186 tcg_abort();
1187 }
1188 }
1189
1190 static void tcg_out_qemu_st_direct(TCGContext *s, int opc, TCGReg data_r,
1191 TCGReg addr_r, TCGReg off_r)
1192 {
1193 switch (opc) {
1194 case 0:
1195 tcg_out_ldst_r(s, LDST_8, LDST_ST, data_r, addr_r, off_r);
1196 break;
1197 case 1:
1198 if (TCG_LDST_BSWAP) {
1199 tcg_out_rev16(s, TCG_TYPE_I32, TCG_REG_TMP, data_r);
1200 tcg_out_ldst_r(s, LDST_16, LDST_ST, TCG_REG_TMP, addr_r, off_r);
1201 } else {
1202 tcg_out_ldst_r(s, LDST_16, LDST_ST, data_r, addr_r, off_r);
1203 }
1204 break;
1205 case 2:
1206 if (TCG_LDST_BSWAP) {
1207 tcg_out_rev(s, TCG_TYPE_I32, TCG_REG_TMP, data_r);
1208 tcg_out_ldst_r(s, LDST_32, LDST_ST, TCG_REG_TMP, addr_r, off_r);
1209 } else {
1210 tcg_out_ldst_r(s, LDST_32, LDST_ST, data_r, addr_r, off_r);
1211 }
1212 break;
1213 case 3:
1214 if (TCG_LDST_BSWAP) {
1215 tcg_out_rev(s, TCG_TYPE_I64, TCG_REG_TMP, data_r);
1216 tcg_out_ldst_r(s, LDST_64, LDST_ST, TCG_REG_TMP, addr_r, off_r);
1217 } else {
1218 tcg_out_ldst_r(s, LDST_64, LDST_ST, data_r, addr_r, off_r);
1219 }
1220 break;
1221 default:
1222 tcg_abort();
1223 }
1224 }
1225
1226 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
1227 {
1228 TCGReg addr_reg, data_reg;
1229 #ifdef CONFIG_SOFTMMU
1230 int mem_index, s_bits;
1231 uint8_t *label_ptr;
1232 #endif
1233 data_reg = args[0];
1234 addr_reg = args[1];
1235
1236 #ifdef CONFIG_SOFTMMU
1237 mem_index = args[2];
1238 s_bits = opc & 3;
1239 tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 1);
1240 tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, TCG_REG_X1);
1241 add_qemu_ldst_label(s, 1, opc, data_reg, addr_reg,
1242 mem_index, s->code_ptr, label_ptr);
1243 #else /* !CONFIG_SOFTMMU */
1244 tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg,
1245 GUEST_BASE ? TCG_REG_GUEST_BASE : TCG_REG_XZR);
1246 #endif /* CONFIG_SOFTMMU */
1247 }
1248
1249 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
1250 {
1251 TCGReg addr_reg, data_reg;
1252 #ifdef CONFIG_SOFTMMU
1253 int mem_index, s_bits;
1254 uint8_t *label_ptr;
1255 #endif
1256 data_reg = args[0];
1257 addr_reg = args[1];
1258
1259 #ifdef CONFIG_SOFTMMU
1260 mem_index = args[2];
1261 s_bits = opc & 3;
1262
1263 tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 0);
1264 tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, TCG_REG_X1);
1265 add_qemu_ldst_label(s, 0, opc, data_reg, addr_reg,
1266 mem_index, s->code_ptr, label_ptr);
1267 #else /* !CONFIG_SOFTMMU */
1268 tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg,
1269 GUEST_BASE ? TCG_REG_GUEST_BASE : TCG_REG_XZR);
1270 #endif /* CONFIG_SOFTMMU */
1271 }
1272
1273 static uint8_t *tb_ret_addr;
1274
1275 /* callee stack use example:
1276 stp x29, x30, [sp,#-32]!
1277 mov x29, sp
1278 stp x1, x2, [sp,#16]
1279 ...
1280 ldp x1, x2, [sp,#16]
1281 ldp x29, x30, [sp],#32
1282 ret
1283 */
1284
1285 /* push r1 and r2, and alloc stack space for a total of
1286 alloc_n elements (1 element=16 bytes, must be between 1 and 31. */
1287 static inline void tcg_out_push_pair(TCGContext *s, TCGReg addr,
1288 TCGReg r1, TCGReg r2, int alloc_n)
1289 {
1290 /* using indexed scaled simm7 STP 0x28800000 | (ext) | 0x01000000 (pre-idx)
1291 | alloc_n * (-1) << 16 | r2 << 10 | addr << 5 | r1 */
1292 assert(alloc_n > 0 && alloc_n < 0x20);
1293 alloc_n = (-alloc_n) & 0x3f;
1294 tcg_out32(s, 0xa9800000 | alloc_n << 16 | r2 << 10 | addr << 5 | r1);
1295 }
1296
1297 /* dealloc stack space for a total of alloc_n elements and pop r1, r2. */
1298 static inline void tcg_out_pop_pair(TCGContext *s, TCGReg addr,
1299 TCGReg r1, TCGReg r2, int alloc_n)
1300 {
1301 /* using indexed scaled simm7 LDP 0x28c00000 | (ext) | nothing (post-idx)
1302 | alloc_n << 16 | r2 << 10 | addr << 5 | r1 */
1303 assert(alloc_n > 0 && alloc_n < 0x20);
1304 tcg_out32(s, 0xa8c00000 | alloc_n << 16 | r2 << 10 | addr << 5 | r1);
1305 }
1306
1307 static inline void tcg_out_store_pair(TCGContext *s, TCGReg addr,
1308 TCGReg r1, TCGReg r2, int idx)
1309 {
1310 /* using register pair offset simm7 STP 0x29000000 | (ext)
1311 | idx << 16 | r2 << 10 | addr << 5 | r1 */
1312 assert(idx > 0 && idx < 0x20);
1313 tcg_out32(s, 0xa9000000 | idx << 16 | r2 << 10 | addr << 5 | r1);
1314 }
1315
1316 static inline void tcg_out_load_pair(TCGContext *s, TCGReg addr,
1317 TCGReg r1, TCGReg r2, int idx)
1318 {
1319 /* using register pair offset simm7 LDP 0x29400000 | (ext)
1320 | idx << 16 | r2 << 10 | addr << 5 | r1 */
1321 assert(idx > 0 && idx < 0x20);
1322 tcg_out32(s, 0xa9400000 | idx << 16 | r2 << 10 | addr << 5 | r1);
1323 }
1324
1325 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1326 const TCGArg args[TCG_MAX_OP_ARGS],
1327 const int const_args[TCG_MAX_OP_ARGS])
1328 {
1329 /* 99% of the time, we can signal the use of extension registers
1330 by looking to see if the opcode handles 64-bit data. */
1331 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1332
1333 /* Hoist the loads of the most common arguments. */
1334 TCGArg a0 = args[0];
1335 TCGArg a1 = args[1];
1336 TCGArg a2 = args[2];
1337 int c2 = const_args[2];
1338
1339 /* Some operands are defined with "rZ" constraint, a register or
1340 the zero register. These need not actually test args[I] == 0. */
1341 #define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1342
1343 switch (opc) {
1344 case INDEX_op_exit_tb:
1345 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1346 tcg_out_goto(s, (intptr_t)tb_ret_addr);
1347 break;
1348
1349 case INDEX_op_goto_tb:
1350 #ifndef USE_DIRECT_JUMP
1351 #error "USE_DIRECT_JUMP required for aarch64"
1352 #endif
1353 assert(s->tb_jmp_offset != NULL); /* consistency for USE_DIRECT_JUMP */
1354 s->tb_jmp_offset[a0] = s->code_ptr - s->code_buf;
1355 /* actual branch destination will be patched by
1356 aarch64_tb_set_jmp_target later, beware retranslation. */
1357 tcg_out_goto_noaddr(s);
1358 s->tb_next_offset[a0] = s->code_ptr - s->code_buf;
1359 break;
1360
1361 case INDEX_op_call:
1362 if (const_args[0]) {
1363 tcg_out_call(s, a0);
1364 } else {
1365 tcg_out_callr(s, a0);
1366 }
1367 break;
1368
1369 case INDEX_op_br:
1370 tcg_out_goto_label(s, a0);
1371 break;
1372
1373 case INDEX_op_ld_i32:
1374 case INDEX_op_ld_i64:
1375 case INDEX_op_st_i32:
1376 case INDEX_op_st_i64:
1377 case INDEX_op_ld8u_i32:
1378 case INDEX_op_ld8s_i32:
1379 case INDEX_op_ld16u_i32:
1380 case INDEX_op_ld16s_i32:
1381 case INDEX_op_ld8u_i64:
1382 case INDEX_op_ld8s_i64:
1383 case INDEX_op_ld16u_i64:
1384 case INDEX_op_ld16s_i64:
1385 case INDEX_op_ld32u_i64:
1386 case INDEX_op_ld32s_i64:
1387 case INDEX_op_st8_i32:
1388 case INDEX_op_st8_i64:
1389 case INDEX_op_st16_i32:
1390 case INDEX_op_st16_i64:
1391 case INDEX_op_st32_i64:
1392 tcg_out_ldst(s, aarch64_ldst_get_data(opc), aarch64_ldst_get_type(opc),
1393 a0, a1, a2);
1394 break;
1395
1396 case INDEX_op_add_i32:
1397 a2 = (int32_t)a2;
1398 /* FALLTHRU */
1399 case INDEX_op_add_i64:
1400 if (c2) {
1401 tcg_out_addsubi(s, ext, a0, a1, a2);
1402 } else {
1403 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1404 }
1405 break;
1406
1407 case INDEX_op_sub_i32:
1408 a2 = (int32_t)a2;
1409 /* FALLTHRU */
1410 case INDEX_op_sub_i64:
1411 if (c2) {
1412 tcg_out_addsubi(s, ext, a0, a1, -a2);
1413 } else {
1414 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1415 }
1416 break;
1417
1418 case INDEX_op_neg_i64:
1419 case INDEX_op_neg_i32:
1420 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1421 break;
1422
1423 case INDEX_op_and_i32:
1424 a2 = (int32_t)a2;
1425 /* FALLTHRU */
1426 case INDEX_op_and_i64:
1427 if (c2) {
1428 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1429 } else {
1430 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1431 }
1432 break;
1433
1434 case INDEX_op_andc_i32:
1435 a2 = (int32_t)a2;
1436 /* FALLTHRU */
1437 case INDEX_op_andc_i64:
1438 if (c2) {
1439 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
1440 } else {
1441 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
1442 }
1443 break;
1444
1445 case INDEX_op_or_i32:
1446 a2 = (int32_t)a2;
1447 /* FALLTHRU */
1448 case INDEX_op_or_i64:
1449 if (c2) {
1450 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
1451 } else {
1452 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
1453 }
1454 break;
1455
1456 case INDEX_op_orc_i32:
1457 a2 = (int32_t)a2;
1458 /* FALLTHRU */
1459 case INDEX_op_orc_i64:
1460 if (c2) {
1461 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
1462 } else {
1463 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
1464 }
1465 break;
1466
1467 case INDEX_op_xor_i32:
1468 a2 = (int32_t)a2;
1469 /* FALLTHRU */
1470 case INDEX_op_xor_i64:
1471 if (c2) {
1472 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
1473 } else {
1474 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
1475 }
1476 break;
1477
1478 case INDEX_op_eqv_i32:
1479 a2 = (int32_t)a2;
1480 /* FALLTHRU */
1481 case INDEX_op_eqv_i64:
1482 if (c2) {
1483 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
1484 } else {
1485 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
1486 }
1487 break;
1488
1489 case INDEX_op_not_i64:
1490 case INDEX_op_not_i32:
1491 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
1492 break;
1493
1494 case INDEX_op_mul_i64:
1495 case INDEX_op_mul_i32:
1496 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
1497 break;
1498
1499 case INDEX_op_div_i64:
1500 case INDEX_op_div_i32:
1501 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
1502 break;
1503 case INDEX_op_divu_i64:
1504 case INDEX_op_divu_i32:
1505 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
1506 break;
1507
1508 case INDEX_op_rem_i64:
1509 case INDEX_op_rem_i32:
1510 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
1511 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1512 break;
1513 case INDEX_op_remu_i64:
1514 case INDEX_op_remu_i32:
1515 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
1516 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1517 break;
1518
1519 case INDEX_op_shl_i64:
1520 case INDEX_op_shl_i32:
1521 if (c2) {
1522 tcg_out_shl(s, ext, a0, a1, a2);
1523 } else {
1524 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
1525 }
1526 break;
1527
1528 case INDEX_op_shr_i64:
1529 case INDEX_op_shr_i32:
1530 if (c2) {
1531 tcg_out_shr(s, ext, a0, a1, a2);
1532 } else {
1533 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
1534 }
1535 break;
1536
1537 case INDEX_op_sar_i64:
1538 case INDEX_op_sar_i32:
1539 if (c2) {
1540 tcg_out_sar(s, ext, a0, a1, a2);
1541 } else {
1542 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
1543 }
1544 break;
1545
1546 case INDEX_op_rotr_i64:
1547 case INDEX_op_rotr_i32:
1548 if (c2) {
1549 tcg_out_rotr(s, ext, a0, a1, a2);
1550 } else {
1551 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
1552 }
1553 break;
1554
1555 case INDEX_op_rotl_i64:
1556 case INDEX_op_rotl_i32:
1557 if (c2) {
1558 tcg_out_rotl(s, ext, a0, a1, a2);
1559 } else {
1560 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
1561 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
1562 }
1563 break;
1564
1565 case INDEX_op_brcond_i32:
1566 a1 = (int32_t)a1;
1567 /* FALLTHRU */
1568 case INDEX_op_brcond_i64:
1569 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], args[3]);
1570 break;
1571
1572 case INDEX_op_setcond_i32:
1573 a2 = (int32_t)a2;
1574 /* FALLTHRU */
1575 case INDEX_op_setcond_i64:
1576 tcg_out_cmp(s, ext, a1, a2, c2);
1577 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */
1578 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
1579 TCG_REG_XZR, tcg_invert_cond(args[3]));
1580 break;
1581
1582 case INDEX_op_movcond_i32:
1583 a2 = (int32_t)a2;
1584 /* FALLTHRU */
1585 case INDEX_op_movcond_i64:
1586 tcg_out_cmp(s, ext, a1, a2, c2);
1587 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
1588 break;
1589
1590 case INDEX_op_qemu_ld8u:
1591 tcg_out_qemu_ld(s, args, 0 | 0);
1592 break;
1593 case INDEX_op_qemu_ld8s:
1594 tcg_out_qemu_ld(s, args, 4 | 0);
1595 break;
1596 case INDEX_op_qemu_ld16u:
1597 tcg_out_qemu_ld(s, args, 0 | 1);
1598 break;
1599 case INDEX_op_qemu_ld16s:
1600 tcg_out_qemu_ld(s, args, 4 | 1);
1601 break;
1602 case INDEX_op_qemu_ld32u:
1603 tcg_out_qemu_ld(s, args, 0 | 2);
1604 break;
1605 case INDEX_op_qemu_ld32s:
1606 tcg_out_qemu_ld(s, args, 4 | 2);
1607 break;
1608 case INDEX_op_qemu_ld32:
1609 tcg_out_qemu_ld(s, args, 0 | 2);
1610 break;
1611 case INDEX_op_qemu_ld64:
1612 tcg_out_qemu_ld(s, args, 0 | 3);
1613 break;
1614 case INDEX_op_qemu_st8:
1615 tcg_out_qemu_st(s, args, 0);
1616 break;
1617 case INDEX_op_qemu_st16:
1618 tcg_out_qemu_st(s, args, 1);
1619 break;
1620 case INDEX_op_qemu_st32:
1621 tcg_out_qemu_st(s, args, 2);
1622 break;
1623 case INDEX_op_qemu_st64:
1624 tcg_out_qemu_st(s, args, 3);
1625 break;
1626
1627 case INDEX_op_bswap32_i64:
1628 /* Despite the _i64, this is a 32-bit bswap. */
1629 ext = 0;
1630 /* FALLTHRU */
1631 case INDEX_op_bswap64_i64:
1632 case INDEX_op_bswap32_i32:
1633 tcg_out_rev(s, ext, a0, a1);
1634 break;
1635 case INDEX_op_bswap16_i64:
1636 case INDEX_op_bswap16_i32:
1637 tcg_out_rev16(s, TCG_TYPE_I32, a0, a1);
1638 break;
1639
1640 case INDEX_op_ext8s_i64:
1641 case INDEX_op_ext8s_i32:
1642 tcg_out_sxt(s, ext, MO_8, a0, a1);
1643 break;
1644 case INDEX_op_ext16s_i64:
1645 case INDEX_op_ext16s_i32:
1646 tcg_out_sxt(s, ext, MO_16, a0, a1);
1647 break;
1648 case INDEX_op_ext32s_i64:
1649 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
1650 break;
1651 case INDEX_op_ext8u_i64:
1652 case INDEX_op_ext8u_i32:
1653 tcg_out_uxt(s, MO_8, a0, a1);
1654 break;
1655 case INDEX_op_ext16u_i64:
1656 case INDEX_op_ext16u_i32:
1657 tcg_out_uxt(s, MO_16, a0, a1);
1658 break;
1659 case INDEX_op_ext32u_i64:
1660 tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
1661 break;
1662
1663 case INDEX_op_deposit_i64:
1664 case INDEX_op_deposit_i32:
1665 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
1666 break;
1667
1668 case INDEX_op_add2_i32:
1669 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
1670 (int32_t)args[4], args[5], const_args[4],
1671 const_args[5], false);
1672 break;
1673 case INDEX_op_add2_i64:
1674 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
1675 args[5], const_args[4], const_args[5], false);
1676 break;
1677 case INDEX_op_sub2_i32:
1678 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
1679 (int32_t)args[4], args[5], const_args[4],
1680 const_args[5], true);
1681 break;
1682 case INDEX_op_sub2_i64:
1683 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
1684 args[5], const_args[4], const_args[5], true);
1685 break;
1686
1687 case INDEX_op_muluh_i64:
1688 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
1689 break;
1690 case INDEX_op_mulsh_i64:
1691 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
1692 break;
1693
1694 case INDEX_op_mov_i64:
1695 case INDEX_op_mov_i32:
1696 case INDEX_op_movi_i64:
1697 case INDEX_op_movi_i32:
1698 /* Always implemented with tcg_out_mov/i, never with tcg_out_op. */
1699 default:
1700 /* Opcode not implemented. */
1701 tcg_abort();
1702 }
1703
1704 #undef REG0
1705 }
1706
1707 static const TCGTargetOpDef aarch64_op_defs[] = {
1708 { INDEX_op_exit_tb, { } },
1709 { INDEX_op_goto_tb, { } },
1710 { INDEX_op_call, { "ri" } },
1711 { INDEX_op_br, { } },
1712
1713 { INDEX_op_mov_i32, { "r", "r" } },
1714 { INDEX_op_mov_i64, { "r", "r" } },
1715
1716 { INDEX_op_movi_i32, { "r" } },
1717 { INDEX_op_movi_i64, { "r" } },
1718
1719 { INDEX_op_ld8u_i32, { "r", "r" } },
1720 { INDEX_op_ld8s_i32, { "r", "r" } },
1721 { INDEX_op_ld16u_i32, { "r", "r" } },
1722 { INDEX_op_ld16s_i32, { "r", "r" } },
1723 { INDEX_op_ld_i32, { "r", "r" } },
1724 { INDEX_op_ld8u_i64, { "r", "r" } },
1725 { INDEX_op_ld8s_i64, { "r", "r" } },
1726 { INDEX_op_ld16u_i64, { "r", "r" } },
1727 { INDEX_op_ld16s_i64, { "r", "r" } },
1728 { INDEX_op_ld32u_i64, { "r", "r" } },
1729 { INDEX_op_ld32s_i64, { "r", "r" } },
1730 { INDEX_op_ld_i64, { "r", "r" } },
1731
1732 { INDEX_op_st8_i32, { "r", "r" } },
1733 { INDEX_op_st16_i32, { "r", "r" } },
1734 { INDEX_op_st_i32, { "r", "r" } },
1735 { INDEX_op_st8_i64, { "r", "r" } },
1736 { INDEX_op_st16_i64, { "r", "r" } },
1737 { INDEX_op_st32_i64, { "r", "r" } },
1738 { INDEX_op_st_i64, { "r", "r" } },
1739
1740 { INDEX_op_add_i32, { "r", "r", "rwA" } },
1741 { INDEX_op_add_i64, { "r", "r", "rA" } },
1742 { INDEX_op_sub_i32, { "r", "r", "rwA" } },
1743 { INDEX_op_sub_i64, { "r", "r", "rA" } },
1744 { INDEX_op_mul_i32, { "r", "r", "r" } },
1745 { INDEX_op_mul_i64, { "r", "r", "r" } },
1746 { INDEX_op_div_i32, { "r", "r", "r" } },
1747 { INDEX_op_div_i64, { "r", "r", "r" } },
1748 { INDEX_op_divu_i32, { "r", "r", "r" } },
1749 { INDEX_op_divu_i64, { "r", "r", "r" } },
1750 { INDEX_op_rem_i32, { "r", "r", "r" } },
1751 { INDEX_op_rem_i64, { "r", "r", "r" } },
1752 { INDEX_op_remu_i32, { "r", "r", "r" } },
1753 { INDEX_op_remu_i64, { "r", "r", "r" } },
1754 { INDEX_op_and_i32, { "r", "r", "rwL" } },
1755 { INDEX_op_and_i64, { "r", "r", "rL" } },
1756 { INDEX_op_or_i32, { "r", "r", "rwL" } },
1757 { INDEX_op_or_i64, { "r", "r", "rL" } },
1758 { INDEX_op_xor_i32, { "r", "r", "rwL" } },
1759 { INDEX_op_xor_i64, { "r", "r", "rL" } },
1760 { INDEX_op_andc_i32, { "r", "r", "rwL" } },
1761 { INDEX_op_andc_i64, { "r", "r", "rL" } },
1762 { INDEX_op_orc_i32, { "r", "r", "rwL" } },
1763 { INDEX_op_orc_i64, { "r", "r", "rL" } },
1764 { INDEX_op_eqv_i32, { "r", "r", "rwL" } },
1765 { INDEX_op_eqv_i64, { "r", "r", "rL" } },
1766
1767 { INDEX_op_neg_i32, { "r", "r" } },
1768 { INDEX_op_neg_i64, { "r", "r" } },
1769 { INDEX_op_not_i32, { "r", "r" } },
1770 { INDEX_op_not_i64, { "r", "r" } },
1771
1772 { INDEX_op_shl_i32, { "r", "r", "ri" } },
1773 { INDEX_op_shr_i32, { "r", "r", "ri" } },
1774 { INDEX_op_sar_i32, { "r", "r", "ri" } },
1775 { INDEX_op_rotl_i32, { "r", "r", "ri" } },
1776 { INDEX_op_rotr_i32, { "r", "r", "ri" } },
1777 { INDEX_op_shl_i64, { "r", "r", "ri" } },
1778 { INDEX_op_shr_i64, { "r", "r", "ri" } },
1779 { INDEX_op_sar_i64, { "r", "r", "ri" } },
1780 { INDEX_op_rotl_i64, { "r", "r", "ri" } },
1781 { INDEX_op_rotr_i64, { "r", "r", "ri" } },
1782
1783 { INDEX_op_brcond_i32, { "r", "rwA" } },
1784 { INDEX_op_brcond_i64, { "r", "rA" } },
1785 { INDEX_op_setcond_i32, { "r", "r", "rwA" } },
1786 { INDEX_op_setcond_i64, { "r", "r", "rA" } },
1787 { INDEX_op_movcond_i32, { "r", "r", "rwA", "rZ", "rZ" } },
1788 { INDEX_op_movcond_i64, { "r", "r", "rA", "rZ", "rZ" } },
1789
1790 { INDEX_op_qemu_ld8u, { "r", "l" } },
1791 { INDEX_op_qemu_ld8s, { "r", "l" } },
1792 { INDEX_op_qemu_ld16u, { "r", "l" } },
1793 { INDEX_op_qemu_ld16s, { "r", "l" } },
1794 { INDEX_op_qemu_ld32u, { "r", "l" } },
1795 { INDEX_op_qemu_ld32s, { "r", "l" } },
1796
1797 { INDEX_op_qemu_ld32, { "r", "l" } },
1798 { INDEX_op_qemu_ld64, { "r", "l" } },
1799
1800 { INDEX_op_qemu_st8, { "l", "l" } },
1801 { INDEX_op_qemu_st16, { "l", "l" } },
1802 { INDEX_op_qemu_st32, { "l", "l" } },
1803 { INDEX_op_qemu_st64, { "l", "l" } },
1804
1805 { INDEX_op_bswap16_i32, { "r", "r" } },
1806 { INDEX_op_bswap32_i32, { "r", "r" } },
1807 { INDEX_op_bswap16_i64, { "r", "r" } },
1808 { INDEX_op_bswap32_i64, { "r", "r" } },
1809 { INDEX_op_bswap64_i64, { "r", "r" } },
1810
1811 { INDEX_op_ext8s_i32, { "r", "r" } },
1812 { INDEX_op_ext16s_i32, { "r", "r" } },
1813 { INDEX_op_ext8u_i32, { "r", "r" } },
1814 { INDEX_op_ext16u_i32, { "r", "r" } },
1815
1816 { INDEX_op_ext8s_i64, { "r", "r" } },
1817 { INDEX_op_ext16s_i64, { "r", "r" } },
1818 { INDEX_op_ext32s_i64, { "r", "r" } },
1819 { INDEX_op_ext8u_i64, { "r", "r" } },
1820 { INDEX_op_ext16u_i64, { "r", "r" } },
1821 { INDEX_op_ext32u_i64, { "r", "r" } },
1822
1823 { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
1824 { INDEX_op_deposit_i64, { "r", "0", "rZ" } },
1825
1826 { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rwA", "rwMZ" } },
1827 { INDEX_op_add2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1828 { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rwA", "rwMZ" } },
1829 { INDEX_op_sub2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1830
1831 { INDEX_op_muluh_i64, { "r", "r", "r" } },
1832 { INDEX_op_mulsh_i64, { "r", "r", "r" } },
1833
1834 { -1 },
1835 };
1836
1837 static void tcg_target_init(TCGContext *s)
1838 {
1839 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
1840 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);
1841
1842 tcg_regset_set32(tcg_target_call_clobber_regs, 0,
1843 (1 << TCG_REG_X0) | (1 << TCG_REG_X1) |
1844 (1 << TCG_REG_X2) | (1 << TCG_REG_X3) |
1845 (1 << TCG_REG_X4) | (1 << TCG_REG_X5) |
1846 (1 << TCG_REG_X6) | (1 << TCG_REG_X7) |
1847 (1 << TCG_REG_X8) | (1 << TCG_REG_X9) |
1848 (1 << TCG_REG_X10) | (1 << TCG_REG_X11) |
1849 (1 << TCG_REG_X12) | (1 << TCG_REG_X13) |
1850 (1 << TCG_REG_X14) | (1 << TCG_REG_X15) |
1851 (1 << TCG_REG_X16) | (1 << TCG_REG_X17) |
1852 (1 << TCG_REG_X18));
1853
1854 tcg_regset_clear(s->reserved_regs);
1855 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
1856 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
1857 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
1858 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
1859
1860 tcg_add_target_add_op_defs(aarch64_op_defs);
1861 }
1862
1863 static void tcg_target_qemu_prologue(TCGContext *s)
1864 {
1865 /* NB: frame sizes are in 16 byte stack units! */
1866 int frame_size_callee_saved, frame_size_tcg_locals;
1867 TCGReg r;
1868
1869 /* save pairs (FP, LR) and (X19, X20) .. (X27, X28) */
1870 frame_size_callee_saved = (1) + (TCG_REG_X28 - TCG_REG_X19) / 2 + 1;
1871
1872 /* frame size requirement for TCG local variables */
1873 frame_size_tcg_locals = TCG_STATIC_CALL_ARGS_SIZE
1874 + CPU_TEMP_BUF_NLONGS * sizeof(long)
1875 + (TCG_TARGET_STACK_ALIGN - 1);
1876 frame_size_tcg_locals &= ~(TCG_TARGET_STACK_ALIGN - 1);
1877 frame_size_tcg_locals /= TCG_TARGET_STACK_ALIGN;
1878
1879 /* push (FP, LR) and update sp */
1880 tcg_out_push_pair(s, TCG_REG_SP,
1881 TCG_REG_FP, TCG_REG_LR, frame_size_callee_saved);
1882
1883 /* FP -> callee_saved */
1884 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
1885
1886 /* store callee-preserved regs x19..x28 using FP -> callee_saved */
1887 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1888 int idx = (r - TCG_REG_X19) / 2 + 1;
1889 tcg_out_store_pair(s, TCG_REG_FP, r, r + 1, idx);
1890 }
1891
1892 /* Make stack space for TCG locals. */
1893 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
1894 frame_size_tcg_locals * TCG_TARGET_STACK_ALIGN);
1895
1896 /* inform TCG about how to find TCG locals with register, offset, size */
1897 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
1898 CPU_TEMP_BUF_NLONGS * sizeof(long));
1899
1900 #if defined(CONFIG_USE_GUEST_BASE)
1901 if (GUEST_BASE) {
1902 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, GUEST_BASE);
1903 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
1904 }
1905 #endif
1906
1907 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
1908 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
1909
1910 tb_ret_addr = s->code_ptr;
1911
1912 /* Remove TCG locals stack space. */
1913 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
1914 frame_size_tcg_locals * TCG_TARGET_STACK_ALIGN);
1915
1916 /* restore registers x19..x28.
1917 FP must be preserved, so it still points to callee_saved area */
1918 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1919 int idx = (r - TCG_REG_X19) / 2 + 1;
1920 tcg_out_load_pair(s, TCG_REG_FP, r, r + 1, idx);
1921 }
1922
1923 /* pop (FP, LR), restore SP to previous frame, return */
1924 tcg_out_pop_pair(s, TCG_REG_SP,
1925 TCG_REG_FP, TCG_REG_LR, frame_size_callee_saved);
1926 tcg_out_insn(s, 3207, RET, TCG_REG_LR);
1927 }