]> git.proxmox.com Git - mirror_qemu.git/blob - tcg/aarch64/tcg-target.c
1d7612c6e6b9269703294fdd7abd1540a29244d0
[mirror_qemu.git] / tcg / aarch64 / tcg-target.c
1 /*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13 #include "tcg-be-ldst.h"
14 #include "qemu/bitops.h"
15
16 /* We're going to re-use TCGType in setting of the SF bit, which controls
17 the size of the operation performed. If we know the values match, it
18 makes things much cleaner. */
19 QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
20
21 #ifndef NDEBUG
22 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
23 "%x0", "%x1", "%x2", "%x3", "%x4", "%x5", "%x6", "%x7",
24 "%x8", "%x9", "%x10", "%x11", "%x12", "%x13", "%x14", "%x15",
25 "%x16", "%x17", "%x18", "%x19", "%x20", "%x21", "%x22", "%x23",
26 "%x24", "%x25", "%x26", "%x27", "%x28",
27 "%fp", /* frame pointer */
28 "%lr", /* link register */
29 "%sp", /* stack pointer */
30 };
31 #endif /* NDEBUG */
32
33 #ifdef TARGET_WORDS_BIGENDIAN
34 #define TCG_LDST_BSWAP 1
35 #else
36 #define TCG_LDST_BSWAP 0
37 #endif
38
39 static const int tcg_target_reg_alloc_order[] = {
40 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
41 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
42 TCG_REG_X28, /* we will reserve this for GUEST_BASE if configured */
43
44 TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, TCG_REG_X12,
45 TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
46 TCG_REG_X16, TCG_REG_X17,
47
48 TCG_REG_X18, TCG_REG_X19, /* will not use these, see tcg_target_init */
49
50 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
51 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
52
53 TCG_REG_X8, /* will not use, see tcg_target_init */
54 };
55
56 static const int tcg_target_call_iarg_regs[8] = {
57 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
58 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
59 };
60 static const int tcg_target_call_oarg_regs[1] = {
61 TCG_REG_X0
62 };
63
64 #define TCG_REG_TMP TCG_REG_X8
65
66 #ifndef CONFIG_SOFTMMU
67 # if defined(CONFIG_USE_GUEST_BASE)
68 # define TCG_REG_GUEST_BASE TCG_REG_X28
69 # else
70 # define TCG_REG_GUEST_BASE TCG_REG_XZR
71 # endif
72 #endif
73
74 static inline void reloc_pc26(void *code_ptr, intptr_t target)
75 {
76 intptr_t offset = (target - (intptr_t)code_ptr) / 4;
77 /* read instruction, mask away previous PC_REL26 parameter contents,
78 set the proper offset, then write back the instruction. */
79 uint32_t insn = *(uint32_t *)code_ptr;
80 insn = deposit32(insn, 0, 26, offset);
81 *(uint32_t *)code_ptr = insn;
82 }
83
84 static inline void reloc_pc19(void *code_ptr, intptr_t target)
85 {
86 intptr_t offset = (target - (intptr_t)code_ptr) / 4;
87 /* read instruction, mask away previous PC_REL19 parameter contents,
88 set the proper offset, then write back the instruction. */
89 uint32_t insn = *(uint32_t *)code_ptr;
90 insn = deposit32(insn, 5, 19, offset);
91 *(uint32_t *)code_ptr = insn;
92 }
93
94 static inline void patch_reloc(uint8_t *code_ptr, int type,
95 intptr_t value, intptr_t addend)
96 {
97 value += addend;
98
99 switch (type) {
100 case R_AARCH64_JUMP26:
101 case R_AARCH64_CALL26:
102 reloc_pc26(code_ptr, value);
103 break;
104 case R_AARCH64_CONDBR19:
105 reloc_pc19(code_ptr, value);
106 break;
107
108 default:
109 tcg_abort();
110 }
111 }
112
113 #define TCG_CT_CONST_IS32 0x100
114 #define TCG_CT_CONST_AIMM 0x200
115 #define TCG_CT_CONST_LIMM 0x400
116 #define TCG_CT_CONST_ZERO 0x800
117 #define TCG_CT_CONST_MONE 0x1000
118
119 /* parse target specific constraints */
120 static int target_parse_constraint(TCGArgConstraint *ct,
121 const char **pct_str)
122 {
123 const char *ct_str = *pct_str;
124
125 switch (ct_str[0]) {
126 case 'r':
127 ct->ct |= TCG_CT_REG;
128 tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
129 break;
130 case 'l': /* qemu_ld / qemu_st address, data_reg */
131 ct->ct |= TCG_CT_REG;
132 tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
133 #ifdef CONFIG_SOFTMMU
134 /* x0 and x1 will be overwritten when reading the tlb entry,
135 and x2, and x3 for helper args, better to avoid using them. */
136 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
137 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
138 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
139 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
140 #endif
141 break;
142 case 'w': /* The operand should be considered 32-bit. */
143 ct->ct |= TCG_CT_CONST_IS32;
144 break;
145 case 'A': /* Valid for arithmetic immediate (positive or negative). */
146 ct->ct |= TCG_CT_CONST_AIMM;
147 break;
148 case 'L': /* Valid for logical immediate. */
149 ct->ct |= TCG_CT_CONST_LIMM;
150 break;
151 case 'M': /* minus one */
152 ct->ct |= TCG_CT_CONST_MONE;
153 break;
154 case 'Z': /* zero */
155 ct->ct |= TCG_CT_CONST_ZERO;
156 break;
157 default:
158 return -1;
159 }
160
161 ct_str++;
162 *pct_str = ct_str;
163 return 0;
164 }
165
166 static inline bool is_aimm(uint64_t val)
167 {
168 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
169 }
170
171 static inline bool is_limm(uint64_t val)
172 {
173 /* Taking a simplified view of the logical immediates for now, ignoring
174 the replication that can happen across the field. Match bit patterns
175 of the forms
176 0....01....1
177 0..01..10..0
178 and their inverses. */
179
180 /* Make things easier below, by testing the form with msb clear. */
181 if ((int64_t)val < 0) {
182 val = ~val;
183 }
184 if (val == 0) {
185 return false;
186 }
187 val += val & -val;
188 return (val & (val - 1)) == 0;
189 }
190
191 static int tcg_target_const_match(tcg_target_long val,
192 const TCGArgConstraint *arg_ct)
193 {
194 int ct = arg_ct->ct;
195
196 if (ct & TCG_CT_CONST) {
197 return 1;
198 }
199 if (ct & TCG_CT_CONST_IS32) {
200 val = (int32_t)val;
201 }
202 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
203 return 1;
204 }
205 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
206 return 1;
207 }
208 if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
209 return 1;
210 }
211 if ((ct & TCG_CT_CONST_MONE) && val == -1) {
212 return 1;
213 }
214
215 return 0;
216 }
217
218 enum aarch64_cond_code {
219 COND_EQ = 0x0,
220 COND_NE = 0x1,
221 COND_CS = 0x2, /* Unsigned greater or equal */
222 COND_HS = COND_CS, /* ALIAS greater or equal */
223 COND_CC = 0x3, /* Unsigned less than */
224 COND_LO = COND_CC, /* ALIAS Lower */
225 COND_MI = 0x4, /* Negative */
226 COND_PL = 0x5, /* Zero or greater */
227 COND_VS = 0x6, /* Overflow */
228 COND_VC = 0x7, /* No overflow */
229 COND_HI = 0x8, /* Unsigned greater than */
230 COND_LS = 0x9, /* Unsigned less or equal */
231 COND_GE = 0xa,
232 COND_LT = 0xb,
233 COND_GT = 0xc,
234 COND_LE = 0xd,
235 COND_AL = 0xe,
236 COND_NV = 0xf, /* behaves like COND_AL here */
237 };
238
239 static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
240 [TCG_COND_EQ] = COND_EQ,
241 [TCG_COND_NE] = COND_NE,
242 [TCG_COND_LT] = COND_LT,
243 [TCG_COND_GE] = COND_GE,
244 [TCG_COND_LE] = COND_LE,
245 [TCG_COND_GT] = COND_GT,
246 /* unsigned */
247 [TCG_COND_LTU] = COND_LO,
248 [TCG_COND_GTU] = COND_HI,
249 [TCG_COND_GEU] = COND_HS,
250 [TCG_COND_LEU] = COND_LS,
251 };
252
253 /* opcodes for LDR / STR instructions with base + simm9 addressing */
254 enum aarch64_ldst_op_data { /* size of the data moved */
255 LDST_8 = 0x38,
256 LDST_16 = 0x78,
257 LDST_32 = 0xb8,
258 LDST_64 = 0xf8,
259 };
260 enum aarch64_ldst_op_type { /* type of operation */
261 LDST_ST = 0x0, /* store */
262 LDST_LD = 0x4, /* load */
263 LDST_LD_S_X = 0x8, /* load and sign-extend into Xt */
264 LDST_LD_S_W = 0xc, /* load and sign-extend into Wt */
265 };
266
267 /* We encode the format of the insn into the beginning of the name, so that
268 we can have the preprocessor help "typecheck" the insn vs the output
269 function. Arm didn't provide us with nice names for the formats, so we
270 use the section number of the architecture reference manual in which the
271 instruction group is described. */
272 typedef enum {
273 /* Add/subtract immediate instructions. */
274 I3401_ADDI = 0x11000000,
275 I3401_ADDSI = 0x31000000,
276 I3401_SUBI = 0x51000000,
277 I3401_SUBSI = 0x71000000,
278
279 /* Bitfield instructions. */
280 I3402_BFM = 0x33000000,
281 I3402_SBFM = 0x13000000,
282 I3402_UBFM = 0x53000000,
283
284 /* Extract instruction. */
285 I3403_EXTR = 0x13800000,
286
287 /* Logical immediate instructions. */
288 I3404_ANDI = 0x12000000,
289 I3404_ORRI = 0x32000000,
290 I3404_EORI = 0x52000000,
291
292 /* Move wide immediate instructions. */
293 I3405_MOVN = 0x12800000,
294 I3405_MOVZ = 0x52800000,
295 I3405_MOVK = 0x72800000,
296
297 /* Add/subtract shifted register instructions (without a shift). */
298 I3502_ADD = 0x0b000000,
299 I3502_ADDS = 0x2b000000,
300 I3502_SUB = 0x4b000000,
301 I3502_SUBS = 0x6b000000,
302
303 /* Add/subtract shifted register instructions (with a shift). */
304 I3502S_ADD_LSL = I3502_ADD,
305
306 /* Add/subtract with carry instructions. */
307 I3503_ADC = 0x1a000000,
308 I3503_SBC = 0x5a000000,
309
310 /* Conditional select instructions. */
311 I3506_CSEL = 0x1a800000,
312 I3506_CSINC = 0x1a800400,
313
314 /* Data-processing (2 source) instructions. */
315 I3508_LSLV = 0x1ac02000,
316 I3508_LSRV = 0x1ac02400,
317 I3508_ASRV = 0x1ac02800,
318 I3508_RORV = 0x1ac02c00,
319 I3508_SMULH = 0x9b407c00,
320 I3508_UMULH = 0x9bc07c00,
321 I3508_UDIV = 0x1ac00800,
322 I3508_SDIV = 0x1ac00c00,
323
324 /* Data-processing (3 source) instructions. */
325 I3509_MADD = 0x1b000000,
326 I3509_MSUB = 0x1b008000,
327
328 /* Logical shifted register instructions (without a shift). */
329 I3510_AND = 0x0a000000,
330 I3510_BIC = 0x0a200000,
331 I3510_ORR = 0x2a000000,
332 I3510_ORN = 0x2a200000,
333 I3510_EOR = 0x4a000000,
334 I3510_EON = 0x4a200000,
335 I3510_ANDS = 0x6a000000,
336 } AArch64Insn;
337
338 static inline enum aarch64_ldst_op_data
339 aarch64_ldst_get_data(TCGOpcode tcg_op)
340 {
341 switch (tcg_op) {
342 case INDEX_op_ld8u_i32:
343 case INDEX_op_ld8s_i32:
344 case INDEX_op_ld8u_i64:
345 case INDEX_op_ld8s_i64:
346 case INDEX_op_st8_i32:
347 case INDEX_op_st8_i64:
348 return LDST_8;
349
350 case INDEX_op_ld16u_i32:
351 case INDEX_op_ld16s_i32:
352 case INDEX_op_ld16u_i64:
353 case INDEX_op_ld16s_i64:
354 case INDEX_op_st16_i32:
355 case INDEX_op_st16_i64:
356 return LDST_16;
357
358 case INDEX_op_ld_i32:
359 case INDEX_op_st_i32:
360 case INDEX_op_ld32u_i64:
361 case INDEX_op_ld32s_i64:
362 case INDEX_op_st32_i64:
363 return LDST_32;
364
365 case INDEX_op_ld_i64:
366 case INDEX_op_st_i64:
367 return LDST_64;
368
369 default:
370 tcg_abort();
371 }
372 }
373
374 static inline enum aarch64_ldst_op_type
375 aarch64_ldst_get_type(TCGOpcode tcg_op)
376 {
377 switch (tcg_op) {
378 case INDEX_op_st8_i32:
379 case INDEX_op_st16_i32:
380 case INDEX_op_st8_i64:
381 case INDEX_op_st16_i64:
382 case INDEX_op_st_i32:
383 case INDEX_op_st32_i64:
384 case INDEX_op_st_i64:
385 return LDST_ST;
386
387 case INDEX_op_ld8u_i32:
388 case INDEX_op_ld16u_i32:
389 case INDEX_op_ld8u_i64:
390 case INDEX_op_ld16u_i64:
391 case INDEX_op_ld_i32:
392 case INDEX_op_ld32u_i64:
393 case INDEX_op_ld_i64:
394 return LDST_LD;
395
396 case INDEX_op_ld8s_i32:
397 case INDEX_op_ld16s_i32:
398 return LDST_LD_S_W;
399
400 case INDEX_op_ld8s_i64:
401 case INDEX_op_ld16s_i64:
402 case INDEX_op_ld32s_i64:
403 return LDST_LD_S_X;
404
405 default:
406 tcg_abort();
407 }
408 }
409
410 static inline uint32_t tcg_in32(TCGContext *s)
411 {
412 uint32_t v = *(uint32_t *)s->code_ptr;
413 return v;
414 }
415
416 /* Emit an opcode with "type-checking" of the format. */
417 #define tcg_out_insn(S, FMT, OP, ...) \
418 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
419
420 static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
421 TCGReg rd, TCGReg rn, uint64_t aimm)
422 {
423 if (aimm > 0xfff) {
424 assert((aimm & 0xfff) == 0);
425 aimm >>= 12;
426 assert(aimm <= 0xfff);
427 aimm |= 1 << 12; /* apply LSL 12 */
428 }
429 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
430 }
431
432 /* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
433 (Logical immediate). Both insn groups have N, IMMR and IMMS fields
434 that feed the DecodeBitMasks pseudo function. */
435 static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
436 TCGReg rd, TCGReg rn, int n, int immr, int imms)
437 {
438 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
439 | rn << 5 | rd);
440 }
441
442 #define tcg_out_insn_3404 tcg_out_insn_3402
443
444 static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
445 TCGReg rd, TCGReg rn, TCGReg rm, int imms)
446 {
447 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
448 | rn << 5 | rd);
449 }
450
451 /* This function is used for the Move (wide immediate) instruction group.
452 Note that SHIFT is a full shift count, not the 2 bit HW field. */
453 static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
454 TCGReg rd, uint16_t half, unsigned shift)
455 {
456 assert((shift & ~0x30) == 0);
457 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
458 }
459
460 /* This function is for both 3.5.2 (Add/Subtract shifted register), for
461 the rare occasion when we actually want to supply a shift amount. */
462 static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
463 TCGType ext, TCGReg rd, TCGReg rn,
464 TCGReg rm, int imm6)
465 {
466 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
467 }
468
469 /* This function is for 3.5.2 (Add/subtract shifted register),
470 and 3.5.10 (Logical shifted register), for the vast majorty of cases
471 when we don't want to apply a shift. Thus it can also be used for
472 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */
473 static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
474 TCGReg rd, TCGReg rn, TCGReg rm)
475 {
476 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
477 }
478
479 #define tcg_out_insn_3503 tcg_out_insn_3502
480 #define tcg_out_insn_3508 tcg_out_insn_3502
481 #define tcg_out_insn_3510 tcg_out_insn_3502
482
483 static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
484 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
485 {
486 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
487 | tcg_cond_to_aarch64[c] << 12);
488 }
489
490 static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
491 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
492 {
493 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
494 }
495
496
497 static inline void tcg_out_ldst_9(TCGContext *s,
498 enum aarch64_ldst_op_data op_data,
499 enum aarch64_ldst_op_type op_type,
500 TCGReg rd, TCGReg rn, intptr_t offset)
501 {
502 /* use LDUR with BASE register with 9bit signed unscaled offset */
503 tcg_out32(s, op_data << 24 | op_type << 20
504 | (offset & 0x1ff) << 12 | rn << 5 | rd);
505 }
506
507 /* tcg_out_ldst_12 expects a scaled unsigned immediate offset */
508 static inline void tcg_out_ldst_12(TCGContext *s,
509 enum aarch64_ldst_op_data op_data,
510 enum aarch64_ldst_op_type op_type,
511 TCGReg rd, TCGReg rn,
512 tcg_target_ulong scaled_uimm)
513 {
514 tcg_out32(s, (op_data | 1) << 24
515 | op_type << 20 | scaled_uimm << 10 | rn << 5 | rd);
516 }
517
518 /* Register to register move using ORR (shifted register with no shift). */
519 static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
520 {
521 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
522 }
523
524 /* Register to register move using ADDI (move to/from SP). */
525 static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
526 {
527 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
528 }
529
530 static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
531 tcg_target_long value)
532 {
533 AArch64Insn insn;
534 int i, wantinv, shift;
535 tcg_target_long svalue = value;
536 tcg_target_long ivalue = ~value;
537 tcg_target_long imask;
538
539 /* For 32-bit values, discard potential garbage in value. For 64-bit
540 values within [2**31, 2**32-1], we can create smaller sequences by
541 interpreting this as a negative 32-bit number, while ensuring that
542 the high 32 bits are cleared by setting SF=0. */
543 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
544 svalue = (int32_t)value;
545 value = (uint32_t)value;
546 ivalue = (uint32_t)ivalue;
547 type = TCG_TYPE_I32;
548 }
549
550 /* Would it take fewer insns to begin with MOVN? For the value and its
551 inverse, count the number of 16-bit lanes that are 0. */
552 for (i = wantinv = imask = 0; i < 64; i += 16) {
553 tcg_target_long mask = 0xffffull << i;
554 if ((value & mask) == 0) {
555 wantinv -= 1;
556 }
557 if ((ivalue & mask) == 0) {
558 wantinv += 1;
559 imask |= mask;
560 }
561 }
562
563 /* If we had more 0xffff than 0x0000, invert VALUE and use MOVN. */
564 insn = I3405_MOVZ;
565 if (wantinv > 0) {
566 value = ivalue;
567 insn = I3405_MOVN;
568 }
569
570 /* Find the lowest lane that is not 0x0000. */
571 shift = ctz64(value) & (63 & -16);
572 tcg_out_insn_3405(s, insn, type, rd, value >> shift, shift);
573
574 if (wantinv > 0) {
575 /* Re-invert the value, so MOVK sees non-inverted bits. */
576 value = ~value;
577 /* Clear out all the 0xffff lanes. */
578 value ^= imask;
579 }
580 /* Clear out the lane that we just set. */
581 value &= ~(0xffffUL << shift);
582
583 /* Iterate until all lanes have been set, and thus cleared from VALUE. */
584 while (value) {
585 shift = ctz64(value) & (63 & -16);
586 tcg_out_insn(s, 3405, MOVK, type, rd, value >> shift, shift);
587 value &= ~(0xffffUL << shift);
588 }
589 }
590
591 static inline void tcg_out_ldst_r(TCGContext *s,
592 enum aarch64_ldst_op_data op_data,
593 enum aarch64_ldst_op_type op_type,
594 TCGReg rd, TCGReg base, TCGReg regoff)
595 {
596 /* load from memory to register using base + 64bit register offset */
597 /* using f.e. STR Wt, [Xn, Xm] 0xb8600800|(regoff << 16)|(base << 5)|rd */
598 /* the 0x6000 is for the "no extend field" */
599 tcg_out32(s, 0x00206800
600 | op_data << 24 | op_type << 20 | regoff << 16 | base << 5 | rd);
601 }
602
603 /* solve the whole ldst problem */
604 static inline void tcg_out_ldst(TCGContext *s, enum aarch64_ldst_op_data data,
605 enum aarch64_ldst_op_type type,
606 TCGReg rd, TCGReg rn, intptr_t offset)
607 {
608 if (offset >= -256 && offset < 256) {
609 tcg_out_ldst_9(s, data, type, rd, rn, offset);
610 return;
611 }
612
613 if (offset >= 256) {
614 /* if the offset is naturally aligned and in range,
615 then we can use the scaled uimm12 encoding */
616 unsigned int s_bits = data >> 6;
617 if (!(offset & ((1 << s_bits) - 1))) {
618 tcg_target_ulong scaled_uimm = offset >> s_bits;
619 if (scaled_uimm <= 0xfff) {
620 tcg_out_ldst_12(s, data, type, rd, rn, scaled_uimm);
621 return;
622 }
623 }
624 }
625
626 /* worst-case scenario, move offset to temp register, use reg offset */
627 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
628 tcg_out_ldst_r(s, data, type, rd, rn, TCG_REG_TMP);
629 }
630
631 static inline void tcg_out_mov(TCGContext *s,
632 TCGType type, TCGReg ret, TCGReg arg)
633 {
634 if (ret != arg) {
635 tcg_out_movr(s, type, ret, arg);
636 }
637 }
638
639 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
640 TCGReg arg1, intptr_t arg2)
641 {
642 tcg_out_ldst(s, (type == TCG_TYPE_I64) ? LDST_64 : LDST_32, LDST_LD,
643 arg, arg1, arg2);
644 }
645
646 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
647 TCGReg arg1, intptr_t arg2)
648 {
649 tcg_out_ldst(s, (type == TCG_TYPE_I64) ? LDST_64 : LDST_32, LDST_ST,
650 arg, arg1, arg2);
651 }
652
653 static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
654 TCGReg rn, unsigned int a, unsigned int b)
655 {
656 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
657 }
658
659 static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
660 TCGReg rn, unsigned int a, unsigned int b)
661 {
662 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
663 }
664
665 static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
666 TCGReg rn, unsigned int a, unsigned int b)
667 {
668 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
669 }
670
671 static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
672 TCGReg rn, TCGReg rm, unsigned int a)
673 {
674 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
675 }
676
677 static inline void tcg_out_shl(TCGContext *s, TCGType ext,
678 TCGReg rd, TCGReg rn, unsigned int m)
679 {
680 int bits = ext ? 64 : 32;
681 int max = bits - 1;
682 tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
683 }
684
685 static inline void tcg_out_shr(TCGContext *s, TCGType ext,
686 TCGReg rd, TCGReg rn, unsigned int m)
687 {
688 int max = ext ? 63 : 31;
689 tcg_out_ubfm(s, ext, rd, rn, m & max, max);
690 }
691
692 static inline void tcg_out_sar(TCGContext *s, TCGType ext,
693 TCGReg rd, TCGReg rn, unsigned int m)
694 {
695 int max = ext ? 63 : 31;
696 tcg_out_sbfm(s, ext, rd, rn, m & max, max);
697 }
698
699 static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
700 TCGReg rd, TCGReg rn, unsigned int m)
701 {
702 int max = ext ? 63 : 31;
703 tcg_out_extr(s, ext, rd, rn, rn, m & max);
704 }
705
706 static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
707 TCGReg rd, TCGReg rn, unsigned int m)
708 {
709 int bits = ext ? 64 : 32;
710 int max = bits - 1;
711 tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
712 }
713
714 static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
715 TCGReg rn, unsigned lsb, unsigned width)
716 {
717 unsigned size = ext ? 64 : 32;
718 unsigned a = (size - lsb) & (size - 1);
719 unsigned b = width - 1;
720 tcg_out_bfm(s, ext, rd, rn, a, b);
721 }
722
723 static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
724 tcg_target_long b, bool const_b)
725 {
726 if (const_b) {
727 /* Using CMP or CMN aliases. */
728 if (b >= 0) {
729 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
730 } else {
731 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
732 }
733 } else {
734 /* Using CMP alias SUBS wzr, Wn, Wm */
735 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
736 }
737 }
738
739 static inline void tcg_out_goto(TCGContext *s, intptr_t target)
740 {
741 intptr_t offset = (target - (intptr_t)s->code_ptr) / 4;
742
743 if (offset < -0x02000000 || offset >= 0x02000000) {
744 /* out of 26bit range */
745 tcg_abort();
746 }
747
748 tcg_out32(s, 0x14000000 | (offset & 0x03ffffff));
749 }
750
751 static inline void tcg_out_goto_noaddr(TCGContext *s)
752 {
753 /* We pay attention here to not modify the branch target by
754 reading from the buffer. This ensure that caches and memory are
755 kept coherent during retranslation.
756 Mask away possible garbage in the high bits for the first translation,
757 while keeping the offset bits for retranslation. */
758 uint32_t insn;
759 insn = (tcg_in32(s) & 0x03ffffff) | 0x14000000;
760 tcg_out32(s, insn);
761 }
762
763 static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c)
764 {
765 /* see comments in tcg_out_goto_noaddr */
766 uint32_t insn;
767 insn = tcg_in32(s) & (0x07ffff << 5);
768 insn |= 0x54000000 | tcg_cond_to_aarch64[c];
769 tcg_out32(s, insn);
770 }
771
772 static inline void tcg_out_goto_cond(TCGContext *s, TCGCond c, intptr_t target)
773 {
774 intptr_t offset = (target - (intptr_t)s->code_ptr) / 4;
775
776 if (offset < -0x40000 || offset >= 0x40000) {
777 /* out of 19bit range */
778 tcg_abort();
779 }
780
781 offset &= 0x7ffff;
782 tcg_out32(s, 0x54000000 | tcg_cond_to_aarch64[c] | offset << 5);
783 }
784
785 static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
786 {
787 tcg_out32(s, 0xd63f0000 | reg << 5);
788 }
789
790 static inline void tcg_out_gotor(TCGContext *s, TCGReg reg)
791 {
792 tcg_out32(s, 0xd61f0000 | reg << 5);
793 }
794
795 static inline void tcg_out_call(TCGContext *s, intptr_t target)
796 {
797 intptr_t offset = (target - (intptr_t)s->code_ptr) / 4;
798
799 if (offset < -0x02000000 || offset >= 0x02000000) { /* out of 26bit rng */
800 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, target);
801 tcg_out_callr(s, TCG_REG_TMP);
802 } else {
803 tcg_out32(s, 0x94000000 | (offset & 0x03ffffff));
804 }
805 }
806
807 static inline void tcg_out_ret(TCGContext *s)
808 {
809 /* emit RET { LR } */
810 tcg_out32(s, 0xd65f03c0);
811 }
812
813 void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
814 {
815 intptr_t target = addr;
816 intptr_t offset = (target - (intptr_t)jmp_addr) / 4;
817
818 if (offset < -0x02000000 || offset >= 0x02000000) {
819 /* out of 26bit range */
820 tcg_abort();
821 }
822
823 patch_reloc((uint8_t *)jmp_addr, R_AARCH64_JUMP26, target, 0);
824 flush_icache_range(jmp_addr, jmp_addr + 4);
825 }
826
827 static inline void tcg_out_goto_label(TCGContext *s, int label_index)
828 {
829 TCGLabel *l = &s->labels[label_index];
830
831 if (!l->has_value) {
832 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, label_index, 0);
833 tcg_out_goto_noaddr(s);
834 } else {
835 tcg_out_goto(s, l->u.value);
836 }
837 }
838
839 static inline void tcg_out_goto_label_cond(TCGContext *s,
840 TCGCond c, int label_index)
841 {
842 TCGLabel *l = &s->labels[label_index];
843
844 if (!l->has_value) {
845 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, label_index, 0);
846 tcg_out_goto_cond_noaddr(s, c);
847 } else {
848 tcg_out_goto_cond(s, c, l->u.value);
849 }
850 }
851
852 static inline void tcg_out_rev(TCGContext *s, TCGType ext,
853 TCGReg rd, TCGReg rm)
854 {
855 /* using REV 0x5ac00800 */
856 unsigned int base = ext ? 0xdac00c00 : 0x5ac00800;
857 tcg_out32(s, base | rm << 5 | rd);
858 }
859
860 static inline void tcg_out_rev16(TCGContext *s, TCGType ext,
861 TCGReg rd, TCGReg rm)
862 {
863 /* using REV16 0x5ac00400 */
864 unsigned int base = ext ? 0xdac00400 : 0x5ac00400;
865 tcg_out32(s, base | rm << 5 | rd);
866 }
867
868 static inline void tcg_out_sxt(TCGContext *s, TCGType ext, TCGMemOp s_bits,
869 TCGReg rd, TCGReg rn)
870 {
871 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
872 int bits = (8 << s_bits) - 1;
873 tcg_out_sbfm(s, ext, rd, rn, 0, bits);
874 }
875
876 static inline void tcg_out_uxt(TCGContext *s, TCGMemOp s_bits,
877 TCGReg rd, TCGReg rn)
878 {
879 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
880 int bits = (8 << s_bits) - 1;
881 tcg_out_ubfm(s, 0, rd, rn, 0, bits);
882 }
883
884 static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
885 TCGReg rn, int64_t aimm)
886 {
887 if (aimm >= 0) {
888 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
889 } else {
890 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
891 }
892 }
893
894 /* This function is used for the Logical (immediate) instruction group.
895 The value of LIMM must satisfy IS_LIMM. See the comment above about
896 only supporting simplified logical immediates. */
897 static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
898 TCGReg rd, TCGReg rn, uint64_t limm)
899 {
900 unsigned h, l, r, c;
901
902 assert(is_limm(limm));
903
904 h = clz64(limm);
905 l = ctz64(limm);
906 if (l == 0) {
907 r = 0; /* form 0....01....1 */
908 c = ctz64(~limm) - 1;
909 if (h == 0) {
910 r = clz64(~limm); /* form 1..10..01..1 */
911 c += r;
912 }
913 } else {
914 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */
915 c = r - h - 1;
916 }
917 if (ext == TCG_TYPE_I32) {
918 r &= 31;
919 c &= 31;
920 }
921
922 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
923 }
924
925 static inline void tcg_out_addsub2(TCGContext *s, int ext, TCGReg rl,
926 TCGReg rh, TCGReg al, TCGReg ah,
927 tcg_target_long bl, tcg_target_long bh,
928 bool const_bl, bool const_bh, bool sub)
929 {
930 TCGReg orig_rl = rl;
931 AArch64Insn insn;
932
933 if (rl == ah || (!const_bh && rl == bh)) {
934 rl = TCG_REG_TMP;
935 }
936
937 if (const_bl) {
938 insn = I3401_ADDSI;
939 if ((bl < 0) ^ sub) {
940 insn = I3401_SUBSI;
941 bl = -bl;
942 }
943 tcg_out_insn_3401(s, insn, ext, rl, al, bl);
944 } else {
945 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
946 }
947
948 insn = I3503_ADC;
949 if (const_bh) {
950 /* Note that the only two constants we support are 0 and -1, and
951 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
952 if ((bh != 0) ^ sub) {
953 insn = I3503_SBC;
954 }
955 bh = TCG_REG_XZR;
956 } else if (sub) {
957 insn = I3503_SBC;
958 }
959 tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
960
961 if (rl != orig_rl) {
962 tcg_out_movr(s, ext, orig_rl, rl);
963 }
964 }
965
966 #ifdef CONFIG_SOFTMMU
967 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
968 * int mmu_idx, uintptr_t ra)
969 */
970 static const void * const qemu_ld_helpers[4] = {
971 helper_ret_ldub_mmu,
972 helper_ret_lduw_mmu,
973 helper_ret_ldul_mmu,
974 helper_ret_ldq_mmu,
975 };
976
977 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
978 * uintxx_t val, int mmu_idx, uintptr_t ra)
979 */
980 static const void * const qemu_st_helpers[4] = {
981 helper_ret_stb_mmu,
982 helper_ret_stw_mmu,
983 helper_ret_stl_mmu,
984 helper_ret_stq_mmu,
985 };
986
987 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
988 {
989 TCGMemOp opc = lb->opc;
990 TCGMemOp size = opc & MO_SIZE;
991
992 reloc_pc19(lb->label_ptr[0], (intptr_t)s->code_ptr);
993
994 tcg_out_movr(s, TCG_TYPE_I64, TCG_REG_X0, TCG_AREG0);
995 tcg_out_movr(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
996 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, lb->mem_index);
997 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_X3, (intptr_t)lb->raddr);
998 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)qemu_ld_helpers[size]);
999 tcg_out_callr(s, TCG_REG_TMP);
1000 if (opc & MO_SIGN) {
1001 tcg_out_sxt(s, TCG_TYPE_I64, size, lb->datalo_reg, TCG_REG_X0);
1002 } else {
1003 tcg_out_movr(s, TCG_TYPE_I64, lb->datalo_reg, TCG_REG_X0);
1004 }
1005
1006 tcg_out_goto(s, (intptr_t)lb->raddr);
1007 }
1008
1009 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1010 {
1011 TCGMemOp size = lb->opc;
1012
1013 reloc_pc19(lb->label_ptr[0], (intptr_t)s->code_ptr);
1014
1015 tcg_out_movr(s, TCG_TYPE_I64, TCG_REG_X0, TCG_AREG0);
1016 tcg_out_movr(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1017 tcg_out_movr(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1018 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, lb->mem_index);
1019 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_X4, (intptr_t)lb->raddr);
1020 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)qemu_st_helpers[size]);
1021 tcg_out_callr(s, TCG_REG_TMP);
1022 tcg_out_goto(s, (intptr_t)lb->raddr);
1023 }
1024
1025 static void add_qemu_ldst_label(TCGContext *s, int is_ld, int opc,
1026 TCGReg data_reg, TCGReg addr_reg,
1027 int mem_index,
1028 uint8_t *raddr, uint8_t *label_ptr)
1029 {
1030 TCGLabelQemuLdst *label = new_ldst_label(s);
1031
1032 label->is_ld = is_ld;
1033 label->opc = opc;
1034 label->datalo_reg = data_reg;
1035 label->addrlo_reg = addr_reg;
1036 label->mem_index = mem_index;
1037 label->raddr = raddr;
1038 label->label_ptr[0] = label_ptr;
1039 }
1040
1041 /* Load and compare a TLB entry, emitting the conditional jump to the
1042 slow path for the failure case, which will be patched later when finalizing
1043 the slow path. Generated code returns the host addend in X1,
1044 clobbers X0,X2,X3,TMP. */
1045 static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg,
1046 int s_bits, uint8_t **label_ptr, int mem_index, int is_read)
1047 {
1048 TCGReg base = TCG_AREG0;
1049 int tlb_offset = is_read ?
1050 offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
1051 : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
1052 /* Extract the TLB index from the address into X0.
1053 X0<CPU_TLB_BITS:0> =
1054 addr_reg<TARGET_PAGE_BITS+CPU_TLB_BITS:TARGET_PAGE_BITS> */
1055 tcg_out_ubfm(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, addr_reg,
1056 TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS);
1057 /* Store the page mask part of the address and the low s_bits into X3.
1058 Later this allows checking for equality and alignment at the same time.
1059 X3 = addr_reg & (PAGE_MASK | ((1 << s_bits) - 1)) */
1060 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64, TCG_REG_X3,
1061 addr_reg, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
1062 /* Add any "high bits" from the tlb offset to the env address into X2,
1063 to take advantage of the LSL12 form of the ADDI instruction.
1064 X2 = env + (tlb_offset & 0xfff000) */
1065 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_X2, base,
1066 tlb_offset & 0xfff000);
1067 /* Merge the tlb index contribution into X2.
1068 X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
1069 tcg_out_insn(s, 3502S, ADD_LSL, 1, TCG_REG_X2, TCG_REG_X2,
1070 TCG_REG_X0, CPU_TLB_ENTRY_BITS);
1071 /* Merge "low bits" from tlb offset, load the tlb comparator into X0.
1072 X0 = load [X2 + (tlb_offset & 0x000fff)] */
1073 tcg_out_ldst(s, TARGET_LONG_BITS == 64 ? LDST_64 : LDST_32,
1074 LDST_LD, TCG_REG_X0, TCG_REG_X2,
1075 (tlb_offset & 0xfff));
1076 /* Load the tlb addend. Do that early to avoid stalling.
1077 X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */
1078 tcg_out_ldst(s, LDST_64, LDST_LD, TCG_REG_X1, TCG_REG_X2,
1079 (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) -
1080 (is_read ? offsetof(CPUTLBEntry, addr_read)
1081 : offsetof(CPUTLBEntry, addr_write)));
1082 /* Perform the address comparison. */
1083 tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0);
1084 *label_ptr = s->code_ptr;
1085 /* If not equal, we jump to the slow path. */
1086 tcg_out_goto_cond_noaddr(s, TCG_COND_NE);
1087 }
1088
1089 #endif /* CONFIG_SOFTMMU */
1090
1091 static void tcg_out_qemu_ld_direct(TCGContext *s, int opc, TCGReg data_r,
1092 TCGReg addr_r, TCGReg off_r)
1093 {
1094 switch (opc) {
1095 case 0:
1096 tcg_out_ldst_r(s, LDST_8, LDST_LD, data_r, addr_r, off_r);
1097 break;
1098 case 0 | 4:
1099 tcg_out_ldst_r(s, LDST_8, LDST_LD_S_X, data_r, addr_r, off_r);
1100 break;
1101 case 1:
1102 tcg_out_ldst_r(s, LDST_16, LDST_LD, data_r, addr_r, off_r);
1103 if (TCG_LDST_BSWAP) {
1104 tcg_out_rev16(s, TCG_TYPE_I32, data_r, data_r);
1105 }
1106 break;
1107 case 1 | 4:
1108 if (TCG_LDST_BSWAP) {
1109 tcg_out_ldst_r(s, LDST_16, LDST_LD, data_r, addr_r, off_r);
1110 tcg_out_rev16(s, TCG_TYPE_I32, data_r, data_r);
1111 tcg_out_sxt(s, TCG_TYPE_I64, MO_16, data_r, data_r);
1112 } else {
1113 tcg_out_ldst_r(s, LDST_16, LDST_LD_S_X, data_r, addr_r, off_r);
1114 }
1115 break;
1116 case 2:
1117 tcg_out_ldst_r(s, LDST_32, LDST_LD, data_r, addr_r, off_r);
1118 if (TCG_LDST_BSWAP) {
1119 tcg_out_rev(s, TCG_TYPE_I32, data_r, data_r);
1120 }
1121 break;
1122 case 2 | 4:
1123 if (TCG_LDST_BSWAP) {
1124 tcg_out_ldst_r(s, LDST_32, LDST_LD, data_r, addr_r, off_r);
1125 tcg_out_rev(s, TCG_TYPE_I32, data_r, data_r);
1126 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1127 } else {
1128 tcg_out_ldst_r(s, LDST_32, LDST_LD_S_X, data_r, addr_r, off_r);
1129 }
1130 break;
1131 case 3:
1132 tcg_out_ldst_r(s, LDST_64, LDST_LD, data_r, addr_r, off_r);
1133 if (TCG_LDST_BSWAP) {
1134 tcg_out_rev(s, TCG_TYPE_I64, data_r, data_r);
1135 }
1136 break;
1137 default:
1138 tcg_abort();
1139 }
1140 }
1141
1142 static void tcg_out_qemu_st_direct(TCGContext *s, int opc, TCGReg data_r,
1143 TCGReg addr_r, TCGReg off_r)
1144 {
1145 switch (opc) {
1146 case 0:
1147 tcg_out_ldst_r(s, LDST_8, LDST_ST, data_r, addr_r, off_r);
1148 break;
1149 case 1:
1150 if (TCG_LDST_BSWAP) {
1151 tcg_out_rev16(s, TCG_TYPE_I32, TCG_REG_TMP, data_r);
1152 tcg_out_ldst_r(s, LDST_16, LDST_ST, TCG_REG_TMP, addr_r, off_r);
1153 } else {
1154 tcg_out_ldst_r(s, LDST_16, LDST_ST, data_r, addr_r, off_r);
1155 }
1156 break;
1157 case 2:
1158 if (TCG_LDST_BSWAP) {
1159 tcg_out_rev(s, TCG_TYPE_I32, TCG_REG_TMP, data_r);
1160 tcg_out_ldst_r(s, LDST_32, LDST_ST, TCG_REG_TMP, addr_r, off_r);
1161 } else {
1162 tcg_out_ldst_r(s, LDST_32, LDST_ST, data_r, addr_r, off_r);
1163 }
1164 break;
1165 case 3:
1166 if (TCG_LDST_BSWAP) {
1167 tcg_out_rev(s, TCG_TYPE_I64, TCG_REG_TMP, data_r);
1168 tcg_out_ldst_r(s, LDST_64, LDST_ST, TCG_REG_TMP, addr_r, off_r);
1169 } else {
1170 tcg_out_ldst_r(s, LDST_64, LDST_ST, data_r, addr_r, off_r);
1171 }
1172 break;
1173 default:
1174 tcg_abort();
1175 }
1176 }
1177
1178 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
1179 {
1180 TCGReg addr_reg, data_reg;
1181 #ifdef CONFIG_SOFTMMU
1182 int mem_index, s_bits;
1183 uint8_t *label_ptr;
1184 #endif
1185 data_reg = args[0];
1186 addr_reg = args[1];
1187
1188 #ifdef CONFIG_SOFTMMU
1189 mem_index = args[2];
1190 s_bits = opc & 3;
1191 tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 1);
1192 tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, TCG_REG_X1);
1193 add_qemu_ldst_label(s, 1, opc, data_reg, addr_reg,
1194 mem_index, s->code_ptr, label_ptr);
1195 #else /* !CONFIG_SOFTMMU */
1196 tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg,
1197 GUEST_BASE ? TCG_REG_GUEST_BASE : TCG_REG_XZR);
1198 #endif /* CONFIG_SOFTMMU */
1199 }
1200
1201 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
1202 {
1203 TCGReg addr_reg, data_reg;
1204 #ifdef CONFIG_SOFTMMU
1205 int mem_index, s_bits;
1206 uint8_t *label_ptr;
1207 #endif
1208 data_reg = args[0];
1209 addr_reg = args[1];
1210
1211 #ifdef CONFIG_SOFTMMU
1212 mem_index = args[2];
1213 s_bits = opc & 3;
1214
1215 tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 0);
1216 tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, TCG_REG_X1);
1217 add_qemu_ldst_label(s, 0, opc, data_reg, addr_reg,
1218 mem_index, s->code_ptr, label_ptr);
1219 #else /* !CONFIG_SOFTMMU */
1220 tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg,
1221 GUEST_BASE ? TCG_REG_GUEST_BASE : TCG_REG_XZR);
1222 #endif /* CONFIG_SOFTMMU */
1223 }
1224
1225 static uint8_t *tb_ret_addr;
1226
1227 /* callee stack use example:
1228 stp x29, x30, [sp,#-32]!
1229 mov x29, sp
1230 stp x1, x2, [sp,#16]
1231 ...
1232 ldp x1, x2, [sp,#16]
1233 ldp x29, x30, [sp],#32
1234 ret
1235 */
1236
1237 /* push r1 and r2, and alloc stack space for a total of
1238 alloc_n elements (1 element=16 bytes, must be between 1 and 31. */
1239 static inline void tcg_out_push_pair(TCGContext *s, TCGReg addr,
1240 TCGReg r1, TCGReg r2, int alloc_n)
1241 {
1242 /* using indexed scaled simm7 STP 0x28800000 | (ext) | 0x01000000 (pre-idx)
1243 | alloc_n * (-1) << 16 | r2 << 10 | addr << 5 | r1 */
1244 assert(alloc_n > 0 && alloc_n < 0x20);
1245 alloc_n = (-alloc_n) & 0x3f;
1246 tcg_out32(s, 0xa9800000 | alloc_n << 16 | r2 << 10 | addr << 5 | r1);
1247 }
1248
1249 /* dealloc stack space for a total of alloc_n elements and pop r1, r2. */
1250 static inline void tcg_out_pop_pair(TCGContext *s, TCGReg addr,
1251 TCGReg r1, TCGReg r2, int alloc_n)
1252 {
1253 /* using indexed scaled simm7 LDP 0x28c00000 | (ext) | nothing (post-idx)
1254 | alloc_n << 16 | r2 << 10 | addr << 5 | r1 */
1255 assert(alloc_n > 0 && alloc_n < 0x20);
1256 tcg_out32(s, 0xa8c00000 | alloc_n << 16 | r2 << 10 | addr << 5 | r1);
1257 }
1258
1259 static inline void tcg_out_store_pair(TCGContext *s, TCGReg addr,
1260 TCGReg r1, TCGReg r2, int idx)
1261 {
1262 /* using register pair offset simm7 STP 0x29000000 | (ext)
1263 | idx << 16 | r2 << 10 | addr << 5 | r1 */
1264 assert(idx > 0 && idx < 0x20);
1265 tcg_out32(s, 0xa9000000 | idx << 16 | r2 << 10 | addr << 5 | r1);
1266 }
1267
1268 static inline void tcg_out_load_pair(TCGContext *s, TCGReg addr,
1269 TCGReg r1, TCGReg r2, int idx)
1270 {
1271 /* using register pair offset simm7 LDP 0x29400000 | (ext)
1272 | idx << 16 | r2 << 10 | addr << 5 | r1 */
1273 assert(idx > 0 && idx < 0x20);
1274 tcg_out32(s, 0xa9400000 | idx << 16 | r2 << 10 | addr << 5 | r1);
1275 }
1276
1277 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1278 const TCGArg args[TCG_MAX_OP_ARGS],
1279 const int const_args[TCG_MAX_OP_ARGS])
1280 {
1281 /* 99% of the time, we can signal the use of extension registers
1282 by looking to see if the opcode handles 64-bit data. */
1283 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1284
1285 /* Hoist the loads of the most common arguments. */
1286 TCGArg a0 = args[0];
1287 TCGArg a1 = args[1];
1288 TCGArg a2 = args[2];
1289 int c2 = const_args[2];
1290
1291 /* Some operands are defined with "rZ" constraint, a register or
1292 the zero register. These need not actually test args[I] == 0. */
1293 #define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1294
1295 switch (opc) {
1296 case INDEX_op_exit_tb:
1297 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1298 tcg_out_goto(s, (intptr_t)tb_ret_addr);
1299 break;
1300
1301 case INDEX_op_goto_tb:
1302 #ifndef USE_DIRECT_JUMP
1303 #error "USE_DIRECT_JUMP required for aarch64"
1304 #endif
1305 assert(s->tb_jmp_offset != NULL); /* consistency for USE_DIRECT_JUMP */
1306 s->tb_jmp_offset[a0] = s->code_ptr - s->code_buf;
1307 /* actual branch destination will be patched by
1308 aarch64_tb_set_jmp_target later, beware retranslation. */
1309 tcg_out_goto_noaddr(s);
1310 s->tb_next_offset[a0] = s->code_ptr - s->code_buf;
1311 break;
1312
1313 case INDEX_op_call:
1314 if (const_args[0]) {
1315 tcg_out_call(s, a0);
1316 } else {
1317 tcg_out_callr(s, a0);
1318 }
1319 break;
1320
1321 case INDEX_op_br:
1322 tcg_out_goto_label(s, a0);
1323 break;
1324
1325 case INDEX_op_ld_i32:
1326 case INDEX_op_ld_i64:
1327 case INDEX_op_st_i32:
1328 case INDEX_op_st_i64:
1329 case INDEX_op_ld8u_i32:
1330 case INDEX_op_ld8s_i32:
1331 case INDEX_op_ld16u_i32:
1332 case INDEX_op_ld16s_i32:
1333 case INDEX_op_ld8u_i64:
1334 case INDEX_op_ld8s_i64:
1335 case INDEX_op_ld16u_i64:
1336 case INDEX_op_ld16s_i64:
1337 case INDEX_op_ld32u_i64:
1338 case INDEX_op_ld32s_i64:
1339 case INDEX_op_st8_i32:
1340 case INDEX_op_st8_i64:
1341 case INDEX_op_st16_i32:
1342 case INDEX_op_st16_i64:
1343 case INDEX_op_st32_i64:
1344 tcg_out_ldst(s, aarch64_ldst_get_data(opc), aarch64_ldst_get_type(opc),
1345 a0, a1, a2);
1346 break;
1347
1348 case INDEX_op_add_i32:
1349 a2 = (int32_t)a2;
1350 /* FALLTHRU */
1351 case INDEX_op_add_i64:
1352 if (c2) {
1353 tcg_out_addsubi(s, ext, a0, a1, a2);
1354 } else {
1355 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1356 }
1357 break;
1358
1359 case INDEX_op_sub_i32:
1360 a2 = (int32_t)a2;
1361 /* FALLTHRU */
1362 case INDEX_op_sub_i64:
1363 if (c2) {
1364 tcg_out_addsubi(s, ext, a0, a1, -a2);
1365 } else {
1366 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1367 }
1368 break;
1369
1370 case INDEX_op_neg_i64:
1371 case INDEX_op_neg_i32:
1372 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1373 break;
1374
1375 case INDEX_op_and_i32:
1376 a2 = (int32_t)a2;
1377 /* FALLTHRU */
1378 case INDEX_op_and_i64:
1379 if (c2) {
1380 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1381 } else {
1382 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1383 }
1384 break;
1385
1386 case INDEX_op_andc_i32:
1387 a2 = (int32_t)a2;
1388 /* FALLTHRU */
1389 case INDEX_op_andc_i64:
1390 if (c2) {
1391 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
1392 } else {
1393 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
1394 }
1395 break;
1396
1397 case INDEX_op_or_i32:
1398 a2 = (int32_t)a2;
1399 /* FALLTHRU */
1400 case INDEX_op_or_i64:
1401 if (c2) {
1402 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
1403 } else {
1404 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
1405 }
1406 break;
1407
1408 case INDEX_op_orc_i32:
1409 a2 = (int32_t)a2;
1410 /* FALLTHRU */
1411 case INDEX_op_orc_i64:
1412 if (c2) {
1413 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
1414 } else {
1415 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
1416 }
1417 break;
1418
1419 case INDEX_op_xor_i32:
1420 a2 = (int32_t)a2;
1421 /* FALLTHRU */
1422 case INDEX_op_xor_i64:
1423 if (c2) {
1424 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
1425 } else {
1426 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
1427 }
1428 break;
1429
1430 case INDEX_op_eqv_i32:
1431 a2 = (int32_t)a2;
1432 /* FALLTHRU */
1433 case INDEX_op_eqv_i64:
1434 if (c2) {
1435 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
1436 } else {
1437 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
1438 }
1439 break;
1440
1441 case INDEX_op_not_i64:
1442 case INDEX_op_not_i32:
1443 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
1444 break;
1445
1446 case INDEX_op_mul_i64:
1447 case INDEX_op_mul_i32:
1448 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
1449 break;
1450
1451 case INDEX_op_div_i64:
1452 case INDEX_op_div_i32:
1453 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
1454 break;
1455 case INDEX_op_divu_i64:
1456 case INDEX_op_divu_i32:
1457 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
1458 break;
1459
1460 case INDEX_op_rem_i64:
1461 case INDEX_op_rem_i32:
1462 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
1463 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1464 break;
1465 case INDEX_op_remu_i64:
1466 case INDEX_op_remu_i32:
1467 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
1468 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1469 break;
1470
1471 case INDEX_op_shl_i64:
1472 case INDEX_op_shl_i32:
1473 if (c2) {
1474 tcg_out_shl(s, ext, a0, a1, a2);
1475 } else {
1476 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
1477 }
1478 break;
1479
1480 case INDEX_op_shr_i64:
1481 case INDEX_op_shr_i32:
1482 if (c2) {
1483 tcg_out_shr(s, ext, a0, a1, a2);
1484 } else {
1485 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
1486 }
1487 break;
1488
1489 case INDEX_op_sar_i64:
1490 case INDEX_op_sar_i32:
1491 if (c2) {
1492 tcg_out_sar(s, ext, a0, a1, a2);
1493 } else {
1494 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
1495 }
1496 break;
1497
1498 case INDEX_op_rotr_i64:
1499 case INDEX_op_rotr_i32:
1500 if (c2) {
1501 tcg_out_rotr(s, ext, a0, a1, a2);
1502 } else {
1503 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
1504 }
1505 break;
1506
1507 case INDEX_op_rotl_i64:
1508 case INDEX_op_rotl_i32:
1509 if (c2) {
1510 tcg_out_rotl(s, ext, a0, a1, a2);
1511 } else {
1512 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
1513 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
1514 }
1515 break;
1516
1517 case INDEX_op_brcond_i32:
1518 a1 = (int32_t)a1;
1519 /* FALLTHRU */
1520 case INDEX_op_brcond_i64:
1521 tcg_out_cmp(s, ext, a0, a1, const_args[1]);
1522 tcg_out_goto_label_cond(s, a2, args[3]);
1523 break;
1524
1525 case INDEX_op_setcond_i32:
1526 a2 = (int32_t)a2;
1527 /* FALLTHRU */
1528 case INDEX_op_setcond_i64:
1529 tcg_out_cmp(s, ext, a1, a2, c2);
1530 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */
1531 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
1532 TCG_REG_XZR, tcg_invert_cond(args[3]));
1533 break;
1534
1535 case INDEX_op_movcond_i32:
1536 a2 = (int32_t)a2;
1537 /* FALLTHRU */
1538 case INDEX_op_movcond_i64:
1539 tcg_out_cmp(s, ext, a1, a2, c2);
1540 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
1541 break;
1542
1543 case INDEX_op_qemu_ld8u:
1544 tcg_out_qemu_ld(s, args, 0 | 0);
1545 break;
1546 case INDEX_op_qemu_ld8s:
1547 tcg_out_qemu_ld(s, args, 4 | 0);
1548 break;
1549 case INDEX_op_qemu_ld16u:
1550 tcg_out_qemu_ld(s, args, 0 | 1);
1551 break;
1552 case INDEX_op_qemu_ld16s:
1553 tcg_out_qemu_ld(s, args, 4 | 1);
1554 break;
1555 case INDEX_op_qemu_ld32u:
1556 tcg_out_qemu_ld(s, args, 0 | 2);
1557 break;
1558 case INDEX_op_qemu_ld32s:
1559 tcg_out_qemu_ld(s, args, 4 | 2);
1560 break;
1561 case INDEX_op_qemu_ld32:
1562 tcg_out_qemu_ld(s, args, 0 | 2);
1563 break;
1564 case INDEX_op_qemu_ld64:
1565 tcg_out_qemu_ld(s, args, 0 | 3);
1566 break;
1567 case INDEX_op_qemu_st8:
1568 tcg_out_qemu_st(s, args, 0);
1569 break;
1570 case INDEX_op_qemu_st16:
1571 tcg_out_qemu_st(s, args, 1);
1572 break;
1573 case INDEX_op_qemu_st32:
1574 tcg_out_qemu_st(s, args, 2);
1575 break;
1576 case INDEX_op_qemu_st64:
1577 tcg_out_qemu_st(s, args, 3);
1578 break;
1579
1580 case INDEX_op_bswap32_i64:
1581 /* Despite the _i64, this is a 32-bit bswap. */
1582 ext = 0;
1583 /* FALLTHRU */
1584 case INDEX_op_bswap64_i64:
1585 case INDEX_op_bswap32_i32:
1586 tcg_out_rev(s, ext, a0, a1);
1587 break;
1588 case INDEX_op_bswap16_i64:
1589 case INDEX_op_bswap16_i32:
1590 tcg_out_rev16(s, TCG_TYPE_I32, a0, a1);
1591 break;
1592
1593 case INDEX_op_ext8s_i64:
1594 case INDEX_op_ext8s_i32:
1595 tcg_out_sxt(s, ext, MO_8, a0, a1);
1596 break;
1597 case INDEX_op_ext16s_i64:
1598 case INDEX_op_ext16s_i32:
1599 tcg_out_sxt(s, ext, MO_16, a0, a1);
1600 break;
1601 case INDEX_op_ext32s_i64:
1602 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
1603 break;
1604 case INDEX_op_ext8u_i64:
1605 case INDEX_op_ext8u_i32:
1606 tcg_out_uxt(s, MO_8, a0, a1);
1607 break;
1608 case INDEX_op_ext16u_i64:
1609 case INDEX_op_ext16u_i32:
1610 tcg_out_uxt(s, MO_16, a0, a1);
1611 break;
1612 case INDEX_op_ext32u_i64:
1613 tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
1614 break;
1615
1616 case INDEX_op_deposit_i64:
1617 case INDEX_op_deposit_i32:
1618 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
1619 break;
1620
1621 case INDEX_op_add2_i32:
1622 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
1623 (int32_t)args[4], args[5], const_args[4],
1624 const_args[5], false);
1625 break;
1626 case INDEX_op_add2_i64:
1627 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
1628 args[5], const_args[4], const_args[5], false);
1629 break;
1630 case INDEX_op_sub2_i32:
1631 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
1632 (int32_t)args[4], args[5], const_args[4],
1633 const_args[5], true);
1634 break;
1635 case INDEX_op_sub2_i64:
1636 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
1637 args[5], const_args[4], const_args[5], true);
1638 break;
1639
1640 case INDEX_op_muluh_i64:
1641 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
1642 break;
1643 case INDEX_op_mulsh_i64:
1644 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
1645 break;
1646
1647 case INDEX_op_mov_i64:
1648 case INDEX_op_mov_i32:
1649 case INDEX_op_movi_i64:
1650 case INDEX_op_movi_i32:
1651 /* Always implemented with tcg_out_mov/i, never with tcg_out_op. */
1652 default:
1653 /* Opcode not implemented. */
1654 tcg_abort();
1655 }
1656
1657 #undef REG0
1658 }
1659
1660 static const TCGTargetOpDef aarch64_op_defs[] = {
1661 { INDEX_op_exit_tb, { } },
1662 { INDEX_op_goto_tb, { } },
1663 { INDEX_op_call, { "ri" } },
1664 { INDEX_op_br, { } },
1665
1666 { INDEX_op_mov_i32, { "r", "r" } },
1667 { INDEX_op_mov_i64, { "r", "r" } },
1668
1669 { INDEX_op_movi_i32, { "r" } },
1670 { INDEX_op_movi_i64, { "r" } },
1671
1672 { INDEX_op_ld8u_i32, { "r", "r" } },
1673 { INDEX_op_ld8s_i32, { "r", "r" } },
1674 { INDEX_op_ld16u_i32, { "r", "r" } },
1675 { INDEX_op_ld16s_i32, { "r", "r" } },
1676 { INDEX_op_ld_i32, { "r", "r" } },
1677 { INDEX_op_ld8u_i64, { "r", "r" } },
1678 { INDEX_op_ld8s_i64, { "r", "r" } },
1679 { INDEX_op_ld16u_i64, { "r", "r" } },
1680 { INDEX_op_ld16s_i64, { "r", "r" } },
1681 { INDEX_op_ld32u_i64, { "r", "r" } },
1682 { INDEX_op_ld32s_i64, { "r", "r" } },
1683 { INDEX_op_ld_i64, { "r", "r" } },
1684
1685 { INDEX_op_st8_i32, { "r", "r" } },
1686 { INDEX_op_st16_i32, { "r", "r" } },
1687 { INDEX_op_st_i32, { "r", "r" } },
1688 { INDEX_op_st8_i64, { "r", "r" } },
1689 { INDEX_op_st16_i64, { "r", "r" } },
1690 { INDEX_op_st32_i64, { "r", "r" } },
1691 { INDEX_op_st_i64, { "r", "r" } },
1692
1693 { INDEX_op_add_i32, { "r", "r", "rwA" } },
1694 { INDEX_op_add_i64, { "r", "r", "rA" } },
1695 { INDEX_op_sub_i32, { "r", "r", "rwA" } },
1696 { INDEX_op_sub_i64, { "r", "r", "rA" } },
1697 { INDEX_op_mul_i32, { "r", "r", "r" } },
1698 { INDEX_op_mul_i64, { "r", "r", "r" } },
1699 { INDEX_op_div_i32, { "r", "r", "r" } },
1700 { INDEX_op_div_i64, { "r", "r", "r" } },
1701 { INDEX_op_divu_i32, { "r", "r", "r" } },
1702 { INDEX_op_divu_i64, { "r", "r", "r" } },
1703 { INDEX_op_rem_i32, { "r", "r", "r" } },
1704 { INDEX_op_rem_i64, { "r", "r", "r" } },
1705 { INDEX_op_remu_i32, { "r", "r", "r" } },
1706 { INDEX_op_remu_i64, { "r", "r", "r" } },
1707 { INDEX_op_and_i32, { "r", "r", "rwL" } },
1708 { INDEX_op_and_i64, { "r", "r", "rL" } },
1709 { INDEX_op_or_i32, { "r", "r", "rwL" } },
1710 { INDEX_op_or_i64, { "r", "r", "rL" } },
1711 { INDEX_op_xor_i32, { "r", "r", "rwL" } },
1712 { INDEX_op_xor_i64, { "r", "r", "rL" } },
1713 { INDEX_op_andc_i32, { "r", "r", "rwL" } },
1714 { INDEX_op_andc_i64, { "r", "r", "rL" } },
1715 { INDEX_op_orc_i32, { "r", "r", "rwL" } },
1716 { INDEX_op_orc_i64, { "r", "r", "rL" } },
1717 { INDEX_op_eqv_i32, { "r", "r", "rwL" } },
1718 { INDEX_op_eqv_i64, { "r", "r", "rL" } },
1719
1720 { INDEX_op_neg_i32, { "r", "r" } },
1721 { INDEX_op_neg_i64, { "r", "r" } },
1722 { INDEX_op_not_i32, { "r", "r" } },
1723 { INDEX_op_not_i64, { "r", "r" } },
1724
1725 { INDEX_op_shl_i32, { "r", "r", "ri" } },
1726 { INDEX_op_shr_i32, { "r", "r", "ri" } },
1727 { INDEX_op_sar_i32, { "r", "r", "ri" } },
1728 { INDEX_op_rotl_i32, { "r", "r", "ri" } },
1729 { INDEX_op_rotr_i32, { "r", "r", "ri" } },
1730 { INDEX_op_shl_i64, { "r", "r", "ri" } },
1731 { INDEX_op_shr_i64, { "r", "r", "ri" } },
1732 { INDEX_op_sar_i64, { "r", "r", "ri" } },
1733 { INDEX_op_rotl_i64, { "r", "r", "ri" } },
1734 { INDEX_op_rotr_i64, { "r", "r", "ri" } },
1735
1736 { INDEX_op_brcond_i32, { "r", "rwA" } },
1737 { INDEX_op_brcond_i64, { "r", "rA" } },
1738 { INDEX_op_setcond_i32, { "r", "r", "rwA" } },
1739 { INDEX_op_setcond_i64, { "r", "r", "rA" } },
1740 { INDEX_op_movcond_i32, { "r", "r", "rwA", "rZ", "rZ" } },
1741 { INDEX_op_movcond_i64, { "r", "r", "rA", "rZ", "rZ" } },
1742
1743 { INDEX_op_qemu_ld8u, { "r", "l" } },
1744 { INDEX_op_qemu_ld8s, { "r", "l" } },
1745 { INDEX_op_qemu_ld16u, { "r", "l" } },
1746 { INDEX_op_qemu_ld16s, { "r", "l" } },
1747 { INDEX_op_qemu_ld32u, { "r", "l" } },
1748 { INDEX_op_qemu_ld32s, { "r", "l" } },
1749
1750 { INDEX_op_qemu_ld32, { "r", "l" } },
1751 { INDEX_op_qemu_ld64, { "r", "l" } },
1752
1753 { INDEX_op_qemu_st8, { "l", "l" } },
1754 { INDEX_op_qemu_st16, { "l", "l" } },
1755 { INDEX_op_qemu_st32, { "l", "l" } },
1756 { INDEX_op_qemu_st64, { "l", "l" } },
1757
1758 { INDEX_op_bswap16_i32, { "r", "r" } },
1759 { INDEX_op_bswap32_i32, { "r", "r" } },
1760 { INDEX_op_bswap16_i64, { "r", "r" } },
1761 { INDEX_op_bswap32_i64, { "r", "r" } },
1762 { INDEX_op_bswap64_i64, { "r", "r" } },
1763
1764 { INDEX_op_ext8s_i32, { "r", "r" } },
1765 { INDEX_op_ext16s_i32, { "r", "r" } },
1766 { INDEX_op_ext8u_i32, { "r", "r" } },
1767 { INDEX_op_ext16u_i32, { "r", "r" } },
1768
1769 { INDEX_op_ext8s_i64, { "r", "r" } },
1770 { INDEX_op_ext16s_i64, { "r", "r" } },
1771 { INDEX_op_ext32s_i64, { "r", "r" } },
1772 { INDEX_op_ext8u_i64, { "r", "r" } },
1773 { INDEX_op_ext16u_i64, { "r", "r" } },
1774 { INDEX_op_ext32u_i64, { "r", "r" } },
1775
1776 { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
1777 { INDEX_op_deposit_i64, { "r", "0", "rZ" } },
1778
1779 { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rwA", "rwMZ" } },
1780 { INDEX_op_add2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1781 { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rwA", "rwMZ" } },
1782 { INDEX_op_sub2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1783
1784 { INDEX_op_muluh_i64, { "r", "r", "r" } },
1785 { INDEX_op_mulsh_i64, { "r", "r", "r" } },
1786
1787 { -1 },
1788 };
1789
1790 static void tcg_target_init(TCGContext *s)
1791 {
1792 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
1793 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);
1794
1795 tcg_regset_set32(tcg_target_call_clobber_regs, 0,
1796 (1 << TCG_REG_X0) | (1 << TCG_REG_X1) |
1797 (1 << TCG_REG_X2) | (1 << TCG_REG_X3) |
1798 (1 << TCG_REG_X4) | (1 << TCG_REG_X5) |
1799 (1 << TCG_REG_X6) | (1 << TCG_REG_X7) |
1800 (1 << TCG_REG_X8) | (1 << TCG_REG_X9) |
1801 (1 << TCG_REG_X10) | (1 << TCG_REG_X11) |
1802 (1 << TCG_REG_X12) | (1 << TCG_REG_X13) |
1803 (1 << TCG_REG_X14) | (1 << TCG_REG_X15) |
1804 (1 << TCG_REG_X16) | (1 << TCG_REG_X17) |
1805 (1 << TCG_REG_X18));
1806
1807 tcg_regset_clear(s->reserved_regs);
1808 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
1809 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
1810 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
1811 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
1812
1813 tcg_add_target_add_op_defs(aarch64_op_defs);
1814 }
1815
1816 static void tcg_target_qemu_prologue(TCGContext *s)
1817 {
1818 /* NB: frame sizes are in 16 byte stack units! */
1819 int frame_size_callee_saved, frame_size_tcg_locals;
1820 TCGReg r;
1821
1822 /* save pairs (FP, LR) and (X19, X20) .. (X27, X28) */
1823 frame_size_callee_saved = (1) + (TCG_REG_X28 - TCG_REG_X19) / 2 + 1;
1824
1825 /* frame size requirement for TCG local variables */
1826 frame_size_tcg_locals = TCG_STATIC_CALL_ARGS_SIZE
1827 + CPU_TEMP_BUF_NLONGS * sizeof(long)
1828 + (TCG_TARGET_STACK_ALIGN - 1);
1829 frame_size_tcg_locals &= ~(TCG_TARGET_STACK_ALIGN - 1);
1830 frame_size_tcg_locals /= TCG_TARGET_STACK_ALIGN;
1831
1832 /* push (FP, LR) and update sp */
1833 tcg_out_push_pair(s, TCG_REG_SP,
1834 TCG_REG_FP, TCG_REG_LR, frame_size_callee_saved);
1835
1836 /* FP -> callee_saved */
1837 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
1838
1839 /* store callee-preserved regs x19..x28 using FP -> callee_saved */
1840 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1841 int idx = (r - TCG_REG_X19) / 2 + 1;
1842 tcg_out_store_pair(s, TCG_REG_FP, r, r + 1, idx);
1843 }
1844
1845 /* Make stack space for TCG locals. */
1846 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
1847 frame_size_tcg_locals * TCG_TARGET_STACK_ALIGN);
1848
1849 /* inform TCG about how to find TCG locals with register, offset, size */
1850 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
1851 CPU_TEMP_BUF_NLONGS * sizeof(long));
1852
1853 #if defined(CONFIG_USE_GUEST_BASE)
1854 if (GUEST_BASE) {
1855 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, GUEST_BASE);
1856 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
1857 }
1858 #endif
1859
1860 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
1861 tcg_out_gotor(s, tcg_target_call_iarg_regs[1]);
1862
1863 tb_ret_addr = s->code_ptr;
1864
1865 /* Remove TCG locals stack space. */
1866 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
1867 frame_size_tcg_locals * TCG_TARGET_STACK_ALIGN);
1868
1869 /* restore registers x19..x28.
1870 FP must be preserved, so it still points to callee_saved area */
1871 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1872 int idx = (r - TCG_REG_X19) / 2 + 1;
1873 tcg_out_load_pair(s, TCG_REG_FP, r, r + 1, idx);
1874 }
1875
1876 /* pop (FP, LR), restore SP to previous frame, return */
1877 tcg_out_pop_pair(s, TCG_REG_SP,
1878 TCG_REG_FP, TCG_REG_LR, frame_size_callee_saved);
1879 tcg_out_ret(s);
1880 }