]> git.proxmox.com Git - mirror_qemu.git/blame - tcg/aarch64/tcg-target.c
tcg-aarch64: Convert shift insns to tcg_out_insn
[mirror_qemu.git] / tcg / aarch64 / tcg-target.c
CommitLineData
4a136e0a
CF
1/*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
9ecefc84 13#include "tcg-be-ldst.h"
4a136e0a
CF
14#include "qemu/bitops.h"
15
7763ffa0
RH
16/* We're going to re-use TCGType in setting of the SF bit, which controls
17 the size of the operation performed. If we know the values match, it
18 makes things much cleaner. */
19QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
20
4a136e0a
CF
21#ifndef NDEBUG
22static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
23 "%x0", "%x1", "%x2", "%x3", "%x4", "%x5", "%x6", "%x7",
24 "%x8", "%x9", "%x10", "%x11", "%x12", "%x13", "%x14", "%x15",
25 "%x16", "%x17", "%x18", "%x19", "%x20", "%x21", "%x22", "%x23",
26 "%x24", "%x25", "%x26", "%x27", "%x28",
27 "%fp", /* frame pointer */
28 "%lr", /* link register */
29 "%sp", /* stack pointer */
30};
31#endif /* NDEBUG */
32
6a91c7c9
JK
33#ifdef TARGET_WORDS_BIGENDIAN
34 #define TCG_LDST_BSWAP 1
35#else
36 #define TCG_LDST_BSWAP 0
37#endif
38
4a136e0a
CF
39static const int tcg_target_reg_alloc_order[] = {
40 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
41 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
6a91c7c9 42 TCG_REG_X28, /* we will reserve this for GUEST_BASE if configured */
4a136e0a
CF
43
44 TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, TCG_REG_X12,
45 TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
46 TCG_REG_X16, TCG_REG_X17,
47
48 TCG_REG_X18, TCG_REG_X19, /* will not use these, see tcg_target_init */
49
50 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
51 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
52
53 TCG_REG_X8, /* will not use, see tcg_target_init */
54};
55
56static const int tcg_target_call_iarg_regs[8] = {
57 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
58 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
59};
60static const int tcg_target_call_oarg_regs[1] = {
61 TCG_REG_X0
62};
63
64#define TCG_REG_TMP TCG_REG_X8
65
6a91c7c9
JK
66#ifndef CONFIG_SOFTMMU
67# if defined(CONFIG_USE_GUEST_BASE)
68# define TCG_REG_GUEST_BASE TCG_REG_X28
69# else
70# define TCG_REG_GUEST_BASE TCG_REG_XZR
71# endif
72#endif
73
017a86f7 74static inline void reloc_pc26(void *code_ptr, intptr_t target)
4a136e0a 75{
017a86f7 76 intptr_t offset = (target - (intptr_t)code_ptr) / 4;
4a136e0a
CF
77 /* read instruction, mask away previous PC_REL26 parameter contents,
78 set the proper offset, then write back the instruction. */
017a86f7 79 uint32_t insn = *(uint32_t *)code_ptr;
4a136e0a
CF
80 insn = deposit32(insn, 0, 26, offset);
81 *(uint32_t *)code_ptr = insn;
82}
83
017a86f7 84static inline void reloc_pc19(void *code_ptr, intptr_t target)
4a136e0a 85{
017a86f7 86 intptr_t offset = (target - (intptr_t)code_ptr) / 4;
4a136e0a
CF
87 /* read instruction, mask away previous PC_REL19 parameter contents,
88 set the proper offset, then write back the instruction. */
017a86f7 89 uint32_t insn = *(uint32_t *)code_ptr;
4a136e0a
CF
90 insn = deposit32(insn, 5, 19, offset);
91 *(uint32_t *)code_ptr = insn;
92}
93
94static inline void patch_reloc(uint8_t *code_ptr, int type,
2ba7fae2 95 intptr_t value, intptr_t addend)
4a136e0a
CF
96{
97 value += addend;
98
99 switch (type) {
100 case R_AARCH64_JUMP26:
101 case R_AARCH64_CALL26:
102 reloc_pc26(code_ptr, value);
103 break;
104 case R_AARCH64_CONDBR19:
105 reloc_pc19(code_ptr, value);
106 break;
107
108 default:
109 tcg_abort();
110 }
111}
112
113/* parse target specific constraints */
114static int target_parse_constraint(TCGArgConstraint *ct,
115 const char **pct_str)
116{
117 const char *ct_str = *pct_str;
118
119 switch (ct_str[0]) {
120 case 'r':
121 ct->ct |= TCG_CT_REG;
122 tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
123 break;
124 case 'l': /* qemu_ld / qemu_st address, data_reg */
125 ct->ct |= TCG_CT_REG;
126 tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
127#ifdef CONFIG_SOFTMMU
128 /* x0 and x1 will be overwritten when reading the tlb entry,
129 and x2, and x3 for helper args, better to avoid using them. */
130 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
131 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
132 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
133 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
134#endif
135 break;
136 default:
137 return -1;
138 }
139
140 ct_str++;
141 *pct_str = ct_str;
142 return 0;
143}
144
145static inline int tcg_target_const_match(tcg_target_long val,
146 const TCGArgConstraint *arg_ct)
147{
148 int ct = arg_ct->ct;
149
150 if (ct & TCG_CT_CONST) {
151 return 1;
152 }
153
154 return 0;
155}
156
157enum aarch64_cond_code {
158 COND_EQ = 0x0,
159 COND_NE = 0x1,
160 COND_CS = 0x2, /* Unsigned greater or equal */
161 COND_HS = COND_CS, /* ALIAS greater or equal */
162 COND_CC = 0x3, /* Unsigned less than */
163 COND_LO = COND_CC, /* ALIAS Lower */
164 COND_MI = 0x4, /* Negative */
165 COND_PL = 0x5, /* Zero or greater */
166 COND_VS = 0x6, /* Overflow */
167 COND_VC = 0x7, /* No overflow */
168 COND_HI = 0x8, /* Unsigned greater than */
169 COND_LS = 0x9, /* Unsigned less or equal */
170 COND_GE = 0xa,
171 COND_LT = 0xb,
172 COND_GT = 0xc,
173 COND_LE = 0xd,
174 COND_AL = 0xe,
175 COND_NV = 0xf, /* behaves like COND_AL here */
176};
177
178static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
179 [TCG_COND_EQ] = COND_EQ,
180 [TCG_COND_NE] = COND_NE,
181 [TCG_COND_LT] = COND_LT,
182 [TCG_COND_GE] = COND_GE,
183 [TCG_COND_LE] = COND_LE,
184 [TCG_COND_GT] = COND_GT,
185 /* unsigned */
186 [TCG_COND_LTU] = COND_LO,
187 [TCG_COND_GTU] = COND_HI,
188 [TCG_COND_GEU] = COND_HS,
189 [TCG_COND_LEU] = COND_LS,
190};
191
192/* opcodes for LDR / STR instructions with base + simm9 addressing */
193enum aarch64_ldst_op_data { /* size of the data moved */
194 LDST_8 = 0x38,
195 LDST_16 = 0x78,
196 LDST_32 = 0xb8,
197 LDST_64 = 0xf8,
198};
199enum aarch64_ldst_op_type { /* type of operation */
200 LDST_ST = 0x0, /* store */
201 LDST_LD = 0x4, /* load */
202 LDST_LD_S_X = 0x8, /* load and sign-extend into Xt */
203 LDST_LD_S_W = 0xc, /* load and sign-extend into Wt */
204};
205
50573c66
RH
206/* We encode the format of the insn into the beginning of the name, so that
207 we can have the preprocessor help "typecheck" the insn vs the output
208 function. Arm didn't provide us with nice names for the formats, so we
209 use the section number of the architecture reference manual in which the
210 instruction group is described. */
211typedef enum {
212 /* Add/subtract shifted register instructions (without a shift). */
213 I3502_ADD = 0x0b000000,
214 I3502_ADDS = 0x2b000000,
215 I3502_SUB = 0x4b000000,
216 I3502_SUBS = 0x6b000000,
217
218 /* Add/subtract shifted register instructions (with a shift). */
219 I3502S_ADD_LSL = I3502_ADD,
220
df9351e3
RH
221 /* Data-processing (2 source) instructions. */
222 I3508_LSLV = 0x1ac02000,
223 I3508_LSRV = 0x1ac02400,
224 I3508_ASRV = 0x1ac02800,
225 I3508_RORV = 0x1ac02c00,
226
50573c66
RH
227 /* Logical shifted register instructions (without a shift). */
228 I3510_AND = 0x0a000000,
229 I3510_ORR = 0x2a000000,
230 I3510_EOR = 0x4a000000,
231 I3510_ANDS = 0x6a000000,
232} AArch64Insn;
4a136e0a 233
4a136e0a
CF
234static inline enum aarch64_ldst_op_data
235aarch64_ldst_get_data(TCGOpcode tcg_op)
236{
237 switch (tcg_op) {
238 case INDEX_op_ld8u_i32:
239 case INDEX_op_ld8s_i32:
240 case INDEX_op_ld8u_i64:
241 case INDEX_op_ld8s_i64:
242 case INDEX_op_st8_i32:
243 case INDEX_op_st8_i64:
244 return LDST_8;
245
246 case INDEX_op_ld16u_i32:
247 case INDEX_op_ld16s_i32:
248 case INDEX_op_ld16u_i64:
249 case INDEX_op_ld16s_i64:
250 case INDEX_op_st16_i32:
251 case INDEX_op_st16_i64:
252 return LDST_16;
253
254 case INDEX_op_ld_i32:
255 case INDEX_op_st_i32:
256 case INDEX_op_ld32u_i64:
257 case INDEX_op_ld32s_i64:
258 case INDEX_op_st32_i64:
259 return LDST_32;
260
261 case INDEX_op_ld_i64:
262 case INDEX_op_st_i64:
263 return LDST_64;
264
265 default:
266 tcg_abort();
267 }
268}
269
270static inline enum aarch64_ldst_op_type
271aarch64_ldst_get_type(TCGOpcode tcg_op)
272{
273 switch (tcg_op) {
274 case INDEX_op_st8_i32:
275 case INDEX_op_st16_i32:
276 case INDEX_op_st8_i64:
277 case INDEX_op_st16_i64:
278 case INDEX_op_st_i32:
279 case INDEX_op_st32_i64:
280 case INDEX_op_st_i64:
281 return LDST_ST;
282
283 case INDEX_op_ld8u_i32:
284 case INDEX_op_ld16u_i32:
285 case INDEX_op_ld8u_i64:
286 case INDEX_op_ld16u_i64:
287 case INDEX_op_ld_i32:
288 case INDEX_op_ld32u_i64:
289 case INDEX_op_ld_i64:
290 return LDST_LD;
291
292 case INDEX_op_ld8s_i32:
293 case INDEX_op_ld16s_i32:
294 return LDST_LD_S_W;
295
296 case INDEX_op_ld8s_i64:
297 case INDEX_op_ld16s_i64:
298 case INDEX_op_ld32s_i64:
299 return LDST_LD_S_X;
300
301 default:
302 tcg_abort();
303 }
304}
305
306static inline uint32_t tcg_in32(TCGContext *s)
307{
308 uint32_t v = *(uint32_t *)s->code_ptr;
309 return v;
310}
311
50573c66
RH
312/* Emit an opcode with "type-checking" of the format. */
313#define tcg_out_insn(S, FMT, OP, ...) \
314 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
315
316/* This function is for both 3.5.2 (Add/Subtract shifted register), for
317 the rare occasion when we actually want to supply a shift amount. */
318static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
319 TCGType ext, TCGReg rd, TCGReg rn,
320 TCGReg rm, int imm6)
321{
322 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
323}
324
325/* This function is for 3.5.2 (Add/subtract shifted register),
326 and 3.5.10 (Logical shifted register), for the vast majorty of cases
327 when we don't want to apply a shift. Thus it can also be used for
328 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */
329static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
330 TCGReg rd, TCGReg rn, TCGReg rm)
331{
332 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
333}
334
335#define tcg_out_insn_3503 tcg_out_insn_3502
336#define tcg_out_insn_3508 tcg_out_insn_3502
337#define tcg_out_insn_3510 tcg_out_insn_3502
338
339
4a136e0a
CF
340static inline void tcg_out_ldst_9(TCGContext *s,
341 enum aarch64_ldst_op_data op_data,
342 enum aarch64_ldst_op_type op_type,
343 TCGReg rd, TCGReg rn, tcg_target_long offset)
344{
345 /* use LDUR with BASE register with 9bit signed unscaled offset */
523fdc08
RH
346 tcg_out32(s, op_data << 24 | op_type << 20
347 | (offset & 0x1ff) << 12 | rn << 5 | rd);
4a136e0a
CF
348}
349
b1f6dc0d
CF
350/* tcg_out_ldst_12 expects a scaled unsigned immediate offset */
351static inline void tcg_out_ldst_12(TCGContext *s,
352 enum aarch64_ldst_op_data op_data,
353 enum aarch64_ldst_op_type op_type,
354 TCGReg rd, TCGReg rn,
355 tcg_target_ulong scaled_uimm)
356{
357 tcg_out32(s, (op_data | 1) << 24
358 | op_type << 20 | scaled_uimm << 10 | rn << 5 | rd);
359}
360
7763ffa0
RH
361static inline void tcg_out_movr(TCGContext *s, TCGType ext,
362 TCGReg rd, TCGReg src)
4a136e0a
CF
363{
364 /* register to register move using MOV (shifted register with no shift) */
365 /* using MOV 0x2a0003e0 | (shift).. */
366 unsigned int base = ext ? 0xaa0003e0 : 0x2a0003e0;
367 tcg_out32(s, base | src << 16 | rd);
368}
369
370static inline void tcg_out_movi_aux(TCGContext *s,
371 TCGReg rd, uint64_t value)
372{
373 uint32_t half, base, shift, movk = 0;
374 /* construct halfwords of the immediate with MOVZ/MOVK with LSL */
375 /* using MOVZ 0x52800000 | extended reg.. */
376 base = (value > 0xffffffff) ? 0xd2800000 : 0x52800000;
377 /* count trailing zeros in 16 bit steps, mapping 64 to 0. Emit the
378 first MOVZ with the half-word immediate skipping the zeros, with a shift
379 (LSL) equal to this number. Then morph all next instructions into MOVKs.
380 Zero the processed half-word in the value, continue until empty.
381 We build the final result 16bits at a time with up to 4 instructions,
382 but do not emit instructions for 16bit zero holes. */
383 do {
384 shift = ctz64(value) & (63 & -16);
385 half = (value >> shift) & 0xffff;
386 tcg_out32(s, base | movk | shift << 17 | half << 5 | rd);
387 movk = 0x20000000; /* morph next MOVZs into MOVKs */
388 value &= ~(0xffffUL << shift);
389 } while (value);
390}
391
392static inline void tcg_out_movi(TCGContext *s, TCGType type,
393 TCGReg rd, tcg_target_long value)
394{
395 if (type == TCG_TYPE_I64) {
396 tcg_out_movi_aux(s, rd, value);
397 } else {
398 tcg_out_movi_aux(s, rd, value & 0xffffffff);
399 }
400}
401
402static inline void tcg_out_ldst_r(TCGContext *s,
403 enum aarch64_ldst_op_data op_data,
404 enum aarch64_ldst_op_type op_type,
405 TCGReg rd, TCGReg base, TCGReg regoff)
406{
407 /* load from memory to register using base + 64bit register offset */
408 /* using f.e. STR Wt, [Xn, Xm] 0xb8600800|(regoff << 16)|(base << 5)|rd */
409 /* the 0x6000 is for the "no extend field" */
410 tcg_out32(s, 0x00206800
411 | op_data << 24 | op_type << 20 | regoff << 16 | base << 5 | rd);
412}
413
414/* solve the whole ldst problem */
415static inline void tcg_out_ldst(TCGContext *s, enum aarch64_ldst_op_data data,
416 enum aarch64_ldst_op_type type,
417 TCGReg rd, TCGReg rn, tcg_target_long offset)
418{
419 if (offset >= -256 && offset < 256) {
420 tcg_out_ldst_9(s, data, type, rd, rn, offset);
b1f6dc0d 421 return;
4a136e0a 422 }
b1f6dc0d
CF
423
424 if (offset >= 256) {
425 /* if the offset is naturally aligned and in range,
426 then we can use the scaled uimm12 encoding */
427 unsigned int s_bits = data >> 6;
428 if (!(offset & ((1 << s_bits) - 1))) {
429 tcg_target_ulong scaled_uimm = offset >> s_bits;
430 if (scaled_uimm <= 0xfff) {
431 tcg_out_ldst_12(s, data, type, rd, rn, scaled_uimm);
432 return;
433 }
434 }
435 }
436
437 /* worst-case scenario, move offset to temp register, use reg offset */
438 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
439 tcg_out_ldst_r(s, data, type, rd, rn, TCG_REG_TMP);
4a136e0a
CF
440}
441
442/* mov alias implemented with add immediate, useful to move to/from SP */
7763ffa0
RH
443static inline void tcg_out_movr_sp(TCGContext *s, TCGType ext,
444 TCGReg rd, TCGReg rn)
4a136e0a
CF
445{
446 /* using ADD 0x11000000 | (ext) | rn << 5 | rd */
447 unsigned int base = ext ? 0x91000000 : 0x11000000;
448 tcg_out32(s, base | rn << 5 | rd);
449}
450
451static inline void tcg_out_mov(TCGContext *s,
452 TCGType type, TCGReg ret, TCGReg arg)
453{
454 if (ret != arg) {
455 tcg_out_movr(s, type == TCG_TYPE_I64, ret, arg);
456 }
457}
458
459static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
a05b5b9b 460 TCGReg arg1, intptr_t arg2)
4a136e0a
CF
461{
462 tcg_out_ldst(s, (type == TCG_TYPE_I64) ? LDST_64 : LDST_32, LDST_LD,
463 arg, arg1, arg2);
464}
465
466static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
a05b5b9b 467 TCGReg arg1, intptr_t arg2)
4a136e0a
CF
468{
469 tcg_out_ldst(s, (type == TCG_TYPE_I64) ? LDST_64 : LDST_32, LDST_ST,
470 arg, arg1, arg2);
471}
472
7763ffa0 473static inline void tcg_out_mul(TCGContext *s, TCGType ext,
4a136e0a
CF
474 TCGReg rd, TCGReg rn, TCGReg rm)
475{
476 /* Using MADD 0x1b000000 with Ra = wzr alias MUL 0x1b007c00 */
477 unsigned int base = ext ? 0x9b007c00 : 0x1b007c00;
478 tcg_out32(s, base | rm << 16 | rn << 5 | rd);
479}
480
7763ffa0
RH
481static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
482 TCGReg rn, unsigned int a, unsigned int b)
4a136e0a
CF
483{
484 /* Using UBFM 0x53000000 Wd, Wn, a, b */
485 unsigned int base = ext ? 0xd3400000 : 0x53000000;
486 tcg_out32(s, base | a << 16 | b << 10 | rn << 5 | rd);
487}
488
7763ffa0
RH
489static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
490 TCGReg rn, unsigned int a, unsigned int b)
4a136e0a
CF
491{
492 /* Using SBFM 0x13000000 Wd, Wn, a, b */
493 unsigned int base = ext ? 0x93400000 : 0x13000000;
494 tcg_out32(s, base | a << 16 | b << 10 | rn << 5 | rd);
495}
496
7763ffa0 497static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
4a136e0a
CF
498 TCGReg rn, TCGReg rm, unsigned int a)
499{
500 /* Using EXTR 0x13800000 Wd, Wn, Wm, a */
501 unsigned int base = ext ? 0x93c00000 : 0x13800000;
502 tcg_out32(s, base | rm << 16 | a << 10 | rn << 5 | rd);
503}
504
7763ffa0 505static inline void tcg_out_shl(TCGContext *s, TCGType ext,
4a136e0a
CF
506 TCGReg rd, TCGReg rn, unsigned int m)
507{
508 int bits, max;
509 bits = ext ? 64 : 32;
510 max = bits - 1;
511 tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
512}
513
7763ffa0 514static inline void tcg_out_shr(TCGContext *s, TCGType ext,
4a136e0a
CF
515 TCGReg rd, TCGReg rn, unsigned int m)
516{
517 int max = ext ? 63 : 31;
518 tcg_out_ubfm(s, ext, rd, rn, m & max, max);
519}
520
7763ffa0 521static inline void tcg_out_sar(TCGContext *s, TCGType ext,
4a136e0a
CF
522 TCGReg rd, TCGReg rn, unsigned int m)
523{
524 int max = ext ? 63 : 31;
525 tcg_out_sbfm(s, ext, rd, rn, m & max, max);
526}
527
7763ffa0 528static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
4a136e0a
CF
529 TCGReg rd, TCGReg rn, unsigned int m)
530{
531 int max = ext ? 63 : 31;
532 tcg_out_extr(s, ext, rd, rn, rn, m & max);
533}
534
7763ffa0 535static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
4a136e0a
CF
536 TCGReg rd, TCGReg rn, unsigned int m)
537{
538 int bits, max;
539 bits = ext ? 64 : 32;
540 max = bits - 1;
541 tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
542}
543
2e796c76 544static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg rn, TCGReg rm)
4a136e0a
CF
545{
546 /* Using CMP alias SUBS wzr, Wn, Wm */
50573c66 547 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, rn, rm);
4a136e0a
CF
548}
549
7763ffa0
RH
550static inline void tcg_out_cset(TCGContext *s, TCGType ext,
551 TCGReg rd, TCGCond c)
4a136e0a
CF
552{
553 /* Using CSET alias of CSINC 0x1a800400 Xd, XZR, XZR, invert(cond) */
554 unsigned int base = ext ? 0x9a9f07e0 : 0x1a9f07e0;
555 tcg_out32(s, base | tcg_cond_to_aarch64[tcg_invert_cond(c)] << 12 | rd);
556}
557
017a86f7 558static inline void tcg_out_goto(TCGContext *s, intptr_t target)
4a136e0a 559{
017a86f7 560 intptr_t offset = (target - (intptr_t)s->code_ptr) / 4;
4a136e0a
CF
561
562 if (offset < -0x02000000 || offset >= 0x02000000) {
563 /* out of 26bit range */
564 tcg_abort();
565 }
566
567 tcg_out32(s, 0x14000000 | (offset & 0x03ffffff));
568}
569
570static inline void tcg_out_goto_noaddr(TCGContext *s)
571{
572 /* We pay attention here to not modify the branch target by
573 reading from the buffer. This ensure that caches and memory are
574 kept coherent during retranslation.
575 Mask away possible garbage in the high bits for the first translation,
576 while keeping the offset bits for retranslation. */
577 uint32_t insn;
578 insn = (tcg_in32(s) & 0x03ffffff) | 0x14000000;
579 tcg_out32(s, insn);
580}
581
582static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c)
583{
584 /* see comments in tcg_out_goto_noaddr */
585 uint32_t insn;
586 insn = tcg_in32(s) & (0x07ffff << 5);
587 insn |= 0x54000000 | tcg_cond_to_aarch64[c];
588 tcg_out32(s, insn);
589}
590
017a86f7 591static inline void tcg_out_goto_cond(TCGContext *s, TCGCond c, intptr_t target)
4a136e0a 592{
017a86f7 593 intptr_t offset = (target - (intptr_t)s->code_ptr) / 4;
4a136e0a
CF
594
595 if (offset < -0x40000 || offset >= 0x40000) {
596 /* out of 19bit range */
597 tcg_abort();
598 }
599
600 offset &= 0x7ffff;
601 tcg_out32(s, 0x54000000 | tcg_cond_to_aarch64[c] | offset << 5);
602}
603
604static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
605{
606 tcg_out32(s, 0xd63f0000 | reg << 5);
607}
608
609static inline void tcg_out_gotor(TCGContext *s, TCGReg reg)
610{
611 tcg_out32(s, 0xd61f0000 | reg << 5);
612}
613
017a86f7 614static inline void tcg_out_call(TCGContext *s, intptr_t target)
4a136e0a 615{
017a86f7 616 intptr_t offset = (target - (intptr_t)s->code_ptr) / 4;
4a136e0a
CF
617
618 if (offset < -0x02000000 || offset >= 0x02000000) { /* out of 26bit rng */
619 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, target);
620 tcg_out_callr(s, TCG_REG_TMP);
621 } else {
622 tcg_out32(s, 0x94000000 | (offset & 0x03ffffff));
623 }
624}
625
7deea126
CF
626/* encode a logical immediate, mapping user parameter
627 M=set bits pattern length to S=M-1 */
628static inline unsigned int
629aarch64_limm(unsigned int m, unsigned int r)
630{
631 assert(m > 0);
632 return r << 16 | (m - 1) << 10;
633}
634
635/* test a register against an immediate bit pattern made of
636 M set bits rotated right by R.
637 Examples:
638 to test a 32/64 reg against 0x00000007, pass M = 3, R = 0.
639 to test a 32/64 reg against 0x000000ff, pass M = 8, R = 0.
640 to test a 32bit reg against 0xff000000, pass M = 8, R = 8.
641 to test a 32bit reg against 0xff0000ff, pass M = 16, R = 8.
642 */
7763ffa0 643static inline void tcg_out_tst(TCGContext *s, TCGType ext, TCGReg rn,
7deea126
CF
644 unsigned int m, unsigned int r)
645{
646 /* using TST alias of ANDS XZR, Xn,#bimm64 0x7200001f */
647 unsigned int base = ext ? 0xf240001f : 0x7200001f;
648 tcg_out32(s, base | aarch64_limm(m, r) | rn << 5);
649}
650
651/* and a register with a bit pattern, similarly to TST, no flags change */
7763ffa0
RH
652static inline void tcg_out_andi(TCGContext *s, TCGType ext, TCGReg rd,
653 TCGReg rn, unsigned int m, unsigned int r)
7deea126
CF
654{
655 /* using AND 0x12000000 */
656 unsigned int base = ext ? 0x92400000 : 0x12000000;
657 tcg_out32(s, base | aarch64_limm(m, r) | rn << 5 | rd);
658}
659
4a136e0a
CF
660static inline void tcg_out_ret(TCGContext *s)
661{
662 /* emit RET { LR } */
663 tcg_out32(s, 0xd65f03c0);
664}
665
666void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
667{
017a86f7
RH
668 intptr_t target = addr;
669 intptr_t offset = (target - (intptr_t)jmp_addr) / 4;
4a136e0a
CF
670
671 if (offset < -0x02000000 || offset >= 0x02000000) {
672 /* out of 26bit range */
673 tcg_abort();
674 }
675
676 patch_reloc((uint8_t *)jmp_addr, R_AARCH64_JUMP26, target, 0);
677 flush_icache_range(jmp_addr, jmp_addr + 4);
678}
679
680static inline void tcg_out_goto_label(TCGContext *s, int label_index)
681{
682 TCGLabel *l = &s->labels[label_index];
683
684 if (!l->has_value) {
685 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, label_index, 0);
686 tcg_out_goto_noaddr(s);
687 } else {
688 tcg_out_goto(s, l->u.value);
689 }
690}
691
692static inline void tcg_out_goto_label_cond(TCGContext *s,
693 TCGCond c, int label_index)
694{
695 TCGLabel *l = &s->labels[label_index];
696
697 if (!l->has_value) {
698 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, label_index, 0);
699 tcg_out_goto_cond_noaddr(s, c);
700 } else {
701 tcg_out_goto_cond(s, c, l->u.value);
702 }
703}
704
7763ffa0
RH
705static inline void tcg_out_rev(TCGContext *s, TCGType ext,
706 TCGReg rd, TCGReg rm)
9c4a059d
CF
707{
708 /* using REV 0x5ac00800 */
709 unsigned int base = ext ? 0xdac00c00 : 0x5ac00800;
710 tcg_out32(s, base | rm << 5 | rd);
711}
712
7763ffa0
RH
713static inline void tcg_out_rev16(TCGContext *s, TCGType ext,
714 TCGReg rd, TCGReg rm)
9c4a059d
CF
715{
716 /* using REV16 0x5ac00400 */
717 unsigned int base = ext ? 0xdac00400 : 0x5ac00400;
718 tcg_out32(s, base | rm << 5 | rd);
719}
720
7763ffa0 721static inline void tcg_out_sxt(TCGContext *s, TCGType ext, int s_bits,
31f1275b
CF
722 TCGReg rd, TCGReg rn)
723{
724 /* using ALIASes SXTB 0x13001c00, SXTH 0x13003c00, SXTW 0x93407c00
725 of SBFM Xd, Xn, #0, #7|15|31 */
726 int bits = 8 * (1 << s_bits) - 1;
727 tcg_out_sbfm(s, ext, rd, rn, 0, bits);
728}
729
730static inline void tcg_out_uxt(TCGContext *s, int s_bits,
731 TCGReg rd, TCGReg rn)
732{
733 /* using ALIASes UXTB 0x53001c00, UXTH 0x53003c00
734 of UBFM Wd, Wn, #0, #7|15 */
735 int bits = 8 * (1 << s_bits) - 1;
736 tcg_out_ubfm(s, 0, rd, rn, 0, bits);
737}
738
7763ffa0 739static inline void tcg_out_addi(TCGContext *s, TCGType ext,
c6d8ed24
JK
740 TCGReg rd, TCGReg rn, unsigned int aimm)
741{
742 /* add immediate aimm unsigned 12bit value (with LSL 0 or 12) */
743 /* using ADD 0x11000000 | (ext) | (aimm << 10) | (rn << 5) | rd */
744 unsigned int base = ext ? 0x91000000 : 0x11000000;
745
746 if (aimm <= 0xfff) {
747 aimm <<= 10;
748 } else {
749 /* we can only shift left by 12, on assert we cannot represent */
750 assert(!(aimm & 0xfff));
751 assert(aimm <= 0xfff000);
752 base |= 1 << 22; /* apply LSL 12 */
753 aimm >>= 2;
754 }
755
756 tcg_out32(s, base | aimm | (rn << 5) | rd);
757}
758
7763ffa0 759static inline void tcg_out_subi(TCGContext *s, TCGType ext,
c6d8ed24
JK
760 TCGReg rd, TCGReg rn, unsigned int aimm)
761{
762 /* sub immediate aimm unsigned 12bit value (with LSL 0 or 12) */
763 /* using SUB 0x51000000 | (ext) | (aimm << 10) | (rn << 5) | rd */
764 unsigned int base = ext ? 0xd1000000 : 0x51000000;
765
766 if (aimm <= 0xfff) {
767 aimm <<= 10;
768 } else {
769 /* we can only shift left by 12, on assert we cannot represent */
770 assert(!(aimm & 0xfff));
771 assert(aimm <= 0xfff000);
772 base |= 1 << 22; /* apply LSL 12 */
773 aimm >>= 2;
774 }
775
776 tcg_out32(s, base | aimm | (rn << 5) | rd);
777}
778
4a136e0a 779#ifdef CONFIG_SOFTMMU
023261ef
RH
780/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
781 * int mmu_idx, uintptr_t ra)
782 */
4a136e0a 783static const void * const qemu_ld_helpers[4] = {
023261ef
RH
784 helper_ret_ldub_mmu,
785 helper_ret_lduw_mmu,
786 helper_ret_ldul_mmu,
787 helper_ret_ldq_mmu,
4a136e0a
CF
788};
789
023261ef
RH
790/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
791 * uintxx_t val, int mmu_idx, uintptr_t ra)
792 */
4a136e0a 793static const void * const qemu_st_helpers[4] = {
023261ef
RH
794 helper_ret_stb_mmu,
795 helper_ret_stw_mmu,
796 helper_ret_stl_mmu,
797 helper_ret_stq_mmu,
4a136e0a
CF
798};
799
c6d8ed24
JK
800static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
801{
017a86f7
RH
802 reloc_pc19(lb->label_ptr[0], (intptr_t)s->code_ptr);
803
c6d8ed24
JK
804 tcg_out_movr(s, 1, TCG_REG_X0, TCG_AREG0);
805 tcg_out_movr(s, (TARGET_LONG_BITS == 64), TCG_REG_X1, lb->addrlo_reg);
806 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, lb->mem_index);
023261ef 807 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_X3, (tcg_target_long)lb->raddr);
c6d8ed24
JK
808 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP,
809 (tcg_target_long)qemu_ld_helpers[lb->opc & 3]);
810 tcg_out_callr(s, TCG_REG_TMP);
811 if (lb->opc & 0x04) {
812 tcg_out_sxt(s, 1, lb->opc & 3, lb->datalo_reg, TCG_REG_X0);
813 } else {
814 tcg_out_movr(s, 1, lb->datalo_reg, TCG_REG_X0);
815 }
816
017a86f7 817 tcg_out_goto(s, (intptr_t)lb->raddr);
c6d8ed24
JK
818}
819
820static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
821{
017a86f7 822 reloc_pc19(lb->label_ptr[0], (intptr_t)s->code_ptr);
c6d8ed24
JK
823
824 tcg_out_movr(s, 1, TCG_REG_X0, TCG_AREG0);
825 tcg_out_movr(s, (TARGET_LONG_BITS == 64), TCG_REG_X1, lb->addrlo_reg);
826 tcg_out_movr(s, 1, TCG_REG_X2, lb->datalo_reg);
827 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, lb->mem_index);
017a86f7 828 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_X4, (intptr_t)lb->raddr);
c6d8ed24 829 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP,
017a86f7 830 (intptr_t)qemu_st_helpers[lb->opc & 3]);
c6d8ed24 831 tcg_out_callr(s, TCG_REG_TMP);
c6d8ed24
JK
832 tcg_out_goto(s, (tcg_target_long)lb->raddr);
833}
834
c6d8ed24
JK
835static void add_qemu_ldst_label(TCGContext *s, int is_ld, int opc,
836 TCGReg data_reg, TCGReg addr_reg,
837 int mem_index,
838 uint8_t *raddr, uint8_t *label_ptr)
839{
9ecefc84 840 TCGLabelQemuLdst *label = new_ldst_label(s);
c6d8ed24 841
c6d8ed24
JK
842 label->is_ld = is_ld;
843 label->opc = opc;
844 label->datalo_reg = data_reg;
845 label->addrlo_reg = addr_reg;
846 label->mem_index = mem_index;
847 label->raddr = raddr;
848 label->label_ptr[0] = label_ptr;
849}
850
851/* Load and compare a TLB entry, emitting the conditional jump to the
852 slow path for the failure case, which will be patched later when finalizing
853 the slow path. Generated code returns the host addend in X1,
854 clobbers X0,X2,X3,TMP. */
855static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg,
856 int s_bits, uint8_t **label_ptr, int mem_index, int is_read)
857{
858 TCGReg base = TCG_AREG0;
859 int tlb_offset = is_read ?
860 offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
861 : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
862 /* Extract the TLB index from the address into X0.
863 X0<CPU_TLB_BITS:0> =
864 addr_reg<TARGET_PAGE_BITS+CPU_TLB_BITS:TARGET_PAGE_BITS> */
865 tcg_out_ubfm(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, addr_reg,
866 TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS);
867 /* Store the page mask part of the address and the low s_bits into X3.
868 Later this allows checking for equality and alignment at the same time.
869 X3 = addr_reg & (PAGE_MASK | ((1 << s_bits) - 1)) */
870 tcg_out_andi(s, (TARGET_LONG_BITS == 64), TCG_REG_X3, addr_reg,
871 (TARGET_LONG_BITS - TARGET_PAGE_BITS) + s_bits,
872 (TARGET_LONG_BITS - TARGET_PAGE_BITS));
873 /* Add any "high bits" from the tlb offset to the env address into X2,
874 to take advantage of the LSL12 form of the addi instruction.
875 X2 = env + (tlb_offset & 0xfff000) */
876 tcg_out_addi(s, 1, TCG_REG_X2, base, tlb_offset & 0xfff000);
877 /* Merge the tlb index contribution into X2.
878 X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
50573c66
RH
879 tcg_out_insn(s, 3502S, ADD_LSL, 1, TCG_REG_X2, TCG_REG_X2,
880 TCG_REG_X0, CPU_TLB_ENTRY_BITS);
c6d8ed24
JK
881 /* Merge "low bits" from tlb offset, load the tlb comparator into X0.
882 X0 = load [X2 + (tlb_offset & 0x000fff)] */
883 tcg_out_ldst(s, TARGET_LONG_BITS == 64 ? LDST_64 : LDST_32,
884 LDST_LD, TCG_REG_X0, TCG_REG_X2,
885 (tlb_offset & 0xfff));
886 /* Load the tlb addend. Do that early to avoid stalling.
887 X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */
888 tcg_out_ldst(s, LDST_64, LDST_LD, TCG_REG_X1, TCG_REG_X2,
889 (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) -
890 (is_read ? offsetof(CPUTLBEntry, addr_read)
891 : offsetof(CPUTLBEntry, addr_write)));
892 /* Perform the address comparison. */
2e796c76 893 tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3);
c6d8ed24
JK
894 *label_ptr = s->code_ptr;
895 /* If not equal, we jump to the slow path. */
896 tcg_out_goto_cond_noaddr(s, TCG_COND_NE);
897}
898
899#endif /* CONFIG_SOFTMMU */
6a91c7c9
JK
900
901static void tcg_out_qemu_ld_direct(TCGContext *s, int opc, TCGReg data_r,
902 TCGReg addr_r, TCGReg off_r)
903{
904 switch (opc) {
905 case 0:
906 tcg_out_ldst_r(s, LDST_8, LDST_LD, data_r, addr_r, off_r);
907 break;
908 case 0 | 4:
909 tcg_out_ldst_r(s, LDST_8, LDST_LD_S_X, data_r, addr_r, off_r);
910 break;
911 case 1:
912 tcg_out_ldst_r(s, LDST_16, LDST_LD, data_r, addr_r, off_r);
913 if (TCG_LDST_BSWAP) {
914 tcg_out_rev16(s, 0, data_r, data_r);
915 }
916 break;
917 case 1 | 4:
918 if (TCG_LDST_BSWAP) {
919 tcg_out_ldst_r(s, LDST_16, LDST_LD, data_r, addr_r, off_r);
920 tcg_out_rev16(s, 0, data_r, data_r);
921 tcg_out_sxt(s, 1, 1, data_r, data_r);
922 } else {
923 tcg_out_ldst_r(s, LDST_16, LDST_LD_S_X, data_r, addr_r, off_r);
924 }
925 break;
926 case 2:
927 tcg_out_ldst_r(s, LDST_32, LDST_LD, data_r, addr_r, off_r);
928 if (TCG_LDST_BSWAP) {
929 tcg_out_rev(s, 0, data_r, data_r);
930 }
931 break;
932 case 2 | 4:
933 if (TCG_LDST_BSWAP) {
934 tcg_out_ldst_r(s, LDST_32, LDST_LD, data_r, addr_r, off_r);
935 tcg_out_rev(s, 0, data_r, data_r);
936 tcg_out_sxt(s, 1, 2, data_r, data_r);
937 } else {
938 tcg_out_ldst_r(s, LDST_32, LDST_LD_S_X, data_r, addr_r, off_r);
939 }
940 break;
941 case 3:
942 tcg_out_ldst_r(s, LDST_64, LDST_LD, data_r, addr_r, off_r);
943 if (TCG_LDST_BSWAP) {
944 tcg_out_rev(s, 1, data_r, data_r);
945 }
946 break;
947 default:
948 tcg_abort();
949 }
950}
951
952static void tcg_out_qemu_st_direct(TCGContext *s, int opc, TCGReg data_r,
953 TCGReg addr_r, TCGReg off_r)
954{
955 switch (opc) {
956 case 0:
957 tcg_out_ldst_r(s, LDST_8, LDST_ST, data_r, addr_r, off_r);
958 break;
959 case 1:
960 if (TCG_LDST_BSWAP) {
961 tcg_out_rev16(s, 0, TCG_REG_TMP, data_r);
962 tcg_out_ldst_r(s, LDST_16, LDST_ST, TCG_REG_TMP, addr_r, off_r);
963 } else {
964 tcg_out_ldst_r(s, LDST_16, LDST_ST, data_r, addr_r, off_r);
965 }
966 break;
967 case 2:
968 if (TCG_LDST_BSWAP) {
969 tcg_out_rev(s, 0, TCG_REG_TMP, data_r);
970 tcg_out_ldst_r(s, LDST_32, LDST_ST, TCG_REG_TMP, addr_r, off_r);
971 } else {
972 tcg_out_ldst_r(s, LDST_32, LDST_ST, data_r, addr_r, off_r);
973 }
974 break;
975 case 3:
976 if (TCG_LDST_BSWAP) {
977 tcg_out_rev(s, 1, TCG_REG_TMP, data_r);
978 tcg_out_ldst_r(s, LDST_64, LDST_ST, TCG_REG_TMP, addr_r, off_r);
979 } else {
980 tcg_out_ldst_r(s, LDST_64, LDST_ST, data_r, addr_r, off_r);
981 }
982 break;
983 default:
984 tcg_abort();
985 }
986}
4a136e0a
CF
987
988static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
989{
990 TCGReg addr_reg, data_reg;
991#ifdef CONFIG_SOFTMMU
992 int mem_index, s_bits;
c6d8ed24 993 uint8_t *label_ptr;
4a136e0a
CF
994#endif
995 data_reg = args[0];
996 addr_reg = args[1];
997
998#ifdef CONFIG_SOFTMMU
999 mem_index = args[2];
1000 s_bits = opc & 3;
c6d8ed24
JK
1001 tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 1);
1002 tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, TCG_REG_X1);
1003 add_qemu_ldst_label(s, 1, opc, data_reg, addr_reg,
1004 mem_index, s->code_ptr, label_ptr);
4a136e0a 1005#else /* !CONFIG_SOFTMMU */
6a91c7c9
JK
1006 tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg,
1007 GUEST_BASE ? TCG_REG_GUEST_BASE : TCG_REG_XZR);
1008#endif /* CONFIG_SOFTMMU */
4a136e0a
CF
1009}
1010
1011static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
1012{
1013 TCGReg addr_reg, data_reg;
1014#ifdef CONFIG_SOFTMMU
1015 int mem_index, s_bits;
c6d8ed24 1016 uint8_t *label_ptr;
4a136e0a
CF
1017#endif
1018 data_reg = args[0];
1019 addr_reg = args[1];
1020
1021#ifdef CONFIG_SOFTMMU
1022 mem_index = args[2];
1023 s_bits = opc & 3;
1024
c6d8ed24
JK
1025 tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 0);
1026 tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, TCG_REG_X1);
1027 add_qemu_ldst_label(s, 0, opc, data_reg, addr_reg,
1028 mem_index, s->code_ptr, label_ptr);
4a136e0a 1029#else /* !CONFIG_SOFTMMU */
6a91c7c9
JK
1030 tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg,
1031 GUEST_BASE ? TCG_REG_GUEST_BASE : TCG_REG_XZR);
1032#endif /* CONFIG_SOFTMMU */
4a136e0a
CF
1033}
1034
1035static uint8_t *tb_ret_addr;
1036
1037/* callee stack use example:
1038 stp x29, x30, [sp,#-32]!
1039 mov x29, sp
1040 stp x1, x2, [sp,#16]
1041 ...
1042 ldp x1, x2, [sp,#16]
1043 ldp x29, x30, [sp],#32
1044 ret
1045*/
1046
1047/* push r1 and r2, and alloc stack space for a total of
1048 alloc_n elements (1 element=16 bytes, must be between 1 and 31. */
1049static inline void tcg_out_push_pair(TCGContext *s, TCGReg addr,
1050 TCGReg r1, TCGReg r2, int alloc_n)
1051{
1052 /* using indexed scaled simm7 STP 0x28800000 | (ext) | 0x01000000 (pre-idx)
1053 | alloc_n * (-1) << 16 | r2 << 10 | addr << 5 | r1 */
1054 assert(alloc_n > 0 && alloc_n < 0x20);
1055 alloc_n = (-alloc_n) & 0x3f;
1056 tcg_out32(s, 0xa9800000 | alloc_n << 16 | r2 << 10 | addr << 5 | r1);
1057}
1058
1059/* dealloc stack space for a total of alloc_n elements and pop r1, r2. */
1060static inline void tcg_out_pop_pair(TCGContext *s, TCGReg addr,
1061 TCGReg r1, TCGReg r2, int alloc_n)
1062{
1063 /* using indexed scaled simm7 LDP 0x28c00000 | (ext) | nothing (post-idx)
1064 | alloc_n << 16 | r2 << 10 | addr << 5 | r1 */
1065 assert(alloc_n > 0 && alloc_n < 0x20);
1066 tcg_out32(s, 0xa8c00000 | alloc_n << 16 | r2 << 10 | addr << 5 | r1);
1067}
1068
1069static inline void tcg_out_store_pair(TCGContext *s, TCGReg addr,
1070 TCGReg r1, TCGReg r2, int idx)
1071{
1072 /* using register pair offset simm7 STP 0x29000000 | (ext)
1073 | idx << 16 | r2 << 10 | addr << 5 | r1 */
1074 assert(idx > 0 && idx < 0x20);
1075 tcg_out32(s, 0xa9000000 | idx << 16 | r2 << 10 | addr << 5 | r1);
1076}
1077
1078static inline void tcg_out_load_pair(TCGContext *s, TCGReg addr,
1079 TCGReg r1, TCGReg r2, int idx)
1080{
1081 /* using register pair offset simm7 LDP 0x29400000 | (ext)
1082 | idx << 16 | r2 << 10 | addr << 5 | r1 */
1083 assert(idx > 0 && idx < 0x20);
1084 tcg_out32(s, 0xa9400000 | idx << 16 | r2 << 10 | addr << 5 | r1);
1085}
1086
1087static void tcg_out_op(TCGContext *s, TCGOpcode opc,
8d8db193
RH
1088 const TCGArg args[TCG_MAX_OP_ARGS],
1089 const int const_args[TCG_MAX_OP_ARGS])
4a136e0a 1090{
f0293414
RH
1091 /* 99% of the time, we can signal the use of extension registers
1092 by looking to see if the opcode handles 64-bit data. */
1093 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
4a136e0a 1094
8d8db193
RH
1095 /* Hoist the loads of the most common arguments. */
1096 TCGArg a0 = args[0];
1097 TCGArg a1 = args[1];
1098 TCGArg a2 = args[2];
1099 int c2 = const_args[2];
1100
4a136e0a
CF
1101 switch (opc) {
1102 case INDEX_op_exit_tb:
8d8db193 1103 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
017a86f7 1104 tcg_out_goto(s, (intptr_t)tb_ret_addr);
4a136e0a
CF
1105 break;
1106
1107 case INDEX_op_goto_tb:
1108#ifndef USE_DIRECT_JUMP
1109#error "USE_DIRECT_JUMP required for aarch64"
1110#endif
1111 assert(s->tb_jmp_offset != NULL); /* consistency for USE_DIRECT_JUMP */
8d8db193 1112 s->tb_jmp_offset[a0] = s->code_ptr - s->code_buf;
4a136e0a
CF
1113 /* actual branch destination will be patched by
1114 aarch64_tb_set_jmp_target later, beware retranslation. */
1115 tcg_out_goto_noaddr(s);
8d8db193 1116 s->tb_next_offset[a0] = s->code_ptr - s->code_buf;
4a136e0a
CF
1117 break;
1118
1119 case INDEX_op_call:
1120 if (const_args[0]) {
8d8db193 1121 tcg_out_call(s, a0);
4a136e0a 1122 } else {
8d8db193 1123 tcg_out_callr(s, a0);
4a136e0a
CF
1124 }
1125 break;
1126
1127 case INDEX_op_br:
8d8db193 1128 tcg_out_goto_label(s, a0);
4a136e0a
CF
1129 break;
1130
1131 case INDEX_op_ld_i32:
1132 case INDEX_op_ld_i64:
1133 case INDEX_op_st_i32:
1134 case INDEX_op_st_i64:
1135 case INDEX_op_ld8u_i32:
1136 case INDEX_op_ld8s_i32:
1137 case INDEX_op_ld16u_i32:
1138 case INDEX_op_ld16s_i32:
1139 case INDEX_op_ld8u_i64:
1140 case INDEX_op_ld8s_i64:
1141 case INDEX_op_ld16u_i64:
1142 case INDEX_op_ld16s_i64:
1143 case INDEX_op_ld32u_i64:
1144 case INDEX_op_ld32s_i64:
1145 case INDEX_op_st8_i32:
1146 case INDEX_op_st8_i64:
1147 case INDEX_op_st16_i32:
1148 case INDEX_op_st16_i64:
1149 case INDEX_op_st32_i64:
1150 tcg_out_ldst(s, aarch64_ldst_get_data(opc), aarch64_ldst_get_type(opc),
8d8db193 1151 a0, a1, a2);
4a136e0a
CF
1152 break;
1153
4a136e0a 1154 case INDEX_op_add_i64:
4a136e0a 1155 case INDEX_op_add_i32:
50573c66 1156 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
4a136e0a
CF
1157 break;
1158
1159 case INDEX_op_sub_i64:
4a136e0a 1160 case INDEX_op_sub_i32:
50573c66 1161 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
4a136e0a
CF
1162 break;
1163
1164 case INDEX_op_and_i64:
4a136e0a 1165 case INDEX_op_and_i32:
50573c66 1166 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
4a136e0a
CF
1167 break;
1168
1169 case INDEX_op_or_i64:
4a136e0a 1170 case INDEX_op_or_i32:
50573c66 1171 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
4a136e0a
CF
1172 break;
1173
1174 case INDEX_op_xor_i64:
4a136e0a 1175 case INDEX_op_xor_i32:
50573c66 1176 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
4a136e0a
CF
1177 break;
1178
1179 case INDEX_op_mul_i64:
4a136e0a 1180 case INDEX_op_mul_i32:
8d8db193 1181 tcg_out_mul(s, ext, a0, a1, a2);
4a136e0a
CF
1182 break;
1183
1184 case INDEX_op_shl_i64:
4a136e0a 1185 case INDEX_op_shl_i32:
df9351e3 1186 if (c2) {
8d8db193 1187 tcg_out_shl(s, ext, a0, a1, a2);
df9351e3
RH
1188 } else {
1189 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
4a136e0a
CF
1190 }
1191 break;
1192
1193 case INDEX_op_shr_i64:
4a136e0a 1194 case INDEX_op_shr_i32:
df9351e3 1195 if (c2) {
8d8db193 1196 tcg_out_shr(s, ext, a0, a1, a2);
df9351e3
RH
1197 } else {
1198 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
4a136e0a
CF
1199 }
1200 break;
1201
1202 case INDEX_op_sar_i64:
4a136e0a 1203 case INDEX_op_sar_i32:
df9351e3 1204 if (c2) {
8d8db193 1205 tcg_out_sar(s, ext, a0, a1, a2);
df9351e3
RH
1206 } else {
1207 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
4a136e0a
CF
1208 }
1209 break;
1210
1211 case INDEX_op_rotr_i64:
4a136e0a 1212 case INDEX_op_rotr_i32:
df9351e3 1213 if (c2) {
8d8db193 1214 tcg_out_rotr(s, ext, a0, a1, a2);
df9351e3
RH
1215 } else {
1216 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
4a136e0a
CF
1217 }
1218 break;
1219
1220 case INDEX_op_rotl_i64:
df9351e3
RH
1221 case INDEX_op_rotl_i32:
1222 if (c2) {
8d8db193 1223 tcg_out_rotl(s, ext, a0, a1, a2);
4a136e0a 1224 } else {
50573c66 1225 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
df9351e3 1226 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
4a136e0a
CF
1227 }
1228 break;
1229
1230 case INDEX_op_brcond_i64:
8d8db193 1231 case INDEX_op_brcond_i32:
2e796c76 1232 tcg_out_cmp(s, ext, a0, a1);
8d8db193 1233 tcg_out_goto_label_cond(s, a2, args[3]);
4a136e0a
CF
1234 break;
1235
1236 case INDEX_op_setcond_i64:
4a136e0a 1237 case INDEX_op_setcond_i32:
2e796c76 1238 tcg_out_cmp(s, ext, a1, a2);
8d8db193 1239 tcg_out_cset(s, 0, a0, args[3]);
4a136e0a
CF
1240 break;
1241
1242 case INDEX_op_qemu_ld8u:
1243 tcg_out_qemu_ld(s, args, 0 | 0);
1244 break;
1245 case INDEX_op_qemu_ld8s:
1246 tcg_out_qemu_ld(s, args, 4 | 0);
1247 break;
1248 case INDEX_op_qemu_ld16u:
1249 tcg_out_qemu_ld(s, args, 0 | 1);
1250 break;
1251 case INDEX_op_qemu_ld16s:
1252 tcg_out_qemu_ld(s, args, 4 | 1);
1253 break;
1254 case INDEX_op_qemu_ld32u:
1255 tcg_out_qemu_ld(s, args, 0 | 2);
1256 break;
1257 case INDEX_op_qemu_ld32s:
1258 tcg_out_qemu_ld(s, args, 4 | 2);
1259 break;
1260 case INDEX_op_qemu_ld32:
1261 tcg_out_qemu_ld(s, args, 0 | 2);
1262 break;
1263 case INDEX_op_qemu_ld64:
1264 tcg_out_qemu_ld(s, args, 0 | 3);
1265 break;
1266 case INDEX_op_qemu_st8:
1267 tcg_out_qemu_st(s, args, 0);
1268 break;
1269 case INDEX_op_qemu_st16:
1270 tcg_out_qemu_st(s, args, 1);
1271 break;
1272 case INDEX_op_qemu_st32:
1273 tcg_out_qemu_st(s, args, 2);
1274 break;
1275 case INDEX_op_qemu_st64:
1276 tcg_out_qemu_st(s, args, 3);
1277 break;
1278
9c4a059d 1279 case INDEX_op_bswap32_i64:
f0293414
RH
1280 /* Despite the _i64, this is a 32-bit bswap. */
1281 ext = 0;
1282 /* FALLTHRU */
1283 case INDEX_op_bswap64_i64:
9c4a059d 1284 case INDEX_op_bswap32_i32:
8d8db193 1285 tcg_out_rev(s, ext, a0, a1);
9c4a059d
CF
1286 break;
1287 case INDEX_op_bswap16_i64:
1288 case INDEX_op_bswap16_i32:
8d8db193 1289 tcg_out_rev16(s, 0, a0, a1);
9c4a059d
CF
1290 break;
1291
31f1275b 1292 case INDEX_op_ext8s_i64:
31f1275b 1293 case INDEX_op_ext8s_i32:
8d8db193 1294 tcg_out_sxt(s, ext, 0, a0, a1);
31f1275b
CF
1295 break;
1296 case INDEX_op_ext16s_i64:
31f1275b 1297 case INDEX_op_ext16s_i32:
8d8db193 1298 tcg_out_sxt(s, ext, 1, a0, a1);
31f1275b
CF
1299 break;
1300 case INDEX_op_ext32s_i64:
8d8db193 1301 tcg_out_sxt(s, 1, 2, a0, a1);
31f1275b
CF
1302 break;
1303 case INDEX_op_ext8u_i64:
1304 case INDEX_op_ext8u_i32:
8d8db193 1305 tcg_out_uxt(s, 0, a0, a1);
31f1275b
CF
1306 break;
1307 case INDEX_op_ext16u_i64:
1308 case INDEX_op_ext16u_i32:
8d8db193 1309 tcg_out_uxt(s, 1, a0, a1);
31f1275b
CF
1310 break;
1311 case INDEX_op_ext32u_i64:
8d8db193 1312 tcg_out_movr(s, 0, a0, a1);
31f1275b
CF
1313 break;
1314
a51a6b6a
RH
1315 case INDEX_op_mov_i64:
1316 case INDEX_op_mov_i32:
1317 case INDEX_op_movi_i64:
1318 case INDEX_op_movi_i32:
1319 /* Always implemented with tcg_out_mov/i, never with tcg_out_op. */
4a136e0a 1320 default:
a51a6b6a
RH
1321 /* Opcode not implemented. */
1322 tcg_abort();
4a136e0a
CF
1323 }
1324}
1325
1326static const TCGTargetOpDef aarch64_op_defs[] = {
1327 { INDEX_op_exit_tb, { } },
1328 { INDEX_op_goto_tb, { } },
1329 { INDEX_op_call, { "ri" } },
1330 { INDEX_op_br, { } },
1331
1332 { INDEX_op_mov_i32, { "r", "r" } },
1333 { INDEX_op_mov_i64, { "r", "r" } },
1334
1335 { INDEX_op_movi_i32, { "r" } },
1336 { INDEX_op_movi_i64, { "r" } },
1337
1338 { INDEX_op_ld8u_i32, { "r", "r" } },
1339 { INDEX_op_ld8s_i32, { "r", "r" } },
1340 { INDEX_op_ld16u_i32, { "r", "r" } },
1341 { INDEX_op_ld16s_i32, { "r", "r" } },
1342 { INDEX_op_ld_i32, { "r", "r" } },
1343 { INDEX_op_ld8u_i64, { "r", "r" } },
1344 { INDEX_op_ld8s_i64, { "r", "r" } },
1345 { INDEX_op_ld16u_i64, { "r", "r" } },
1346 { INDEX_op_ld16s_i64, { "r", "r" } },
1347 { INDEX_op_ld32u_i64, { "r", "r" } },
1348 { INDEX_op_ld32s_i64, { "r", "r" } },
1349 { INDEX_op_ld_i64, { "r", "r" } },
1350
1351 { INDEX_op_st8_i32, { "r", "r" } },
1352 { INDEX_op_st16_i32, { "r", "r" } },
1353 { INDEX_op_st_i32, { "r", "r" } },
1354 { INDEX_op_st8_i64, { "r", "r" } },
1355 { INDEX_op_st16_i64, { "r", "r" } },
1356 { INDEX_op_st32_i64, { "r", "r" } },
1357 { INDEX_op_st_i64, { "r", "r" } },
1358
1359 { INDEX_op_add_i32, { "r", "r", "r" } },
1360 { INDEX_op_add_i64, { "r", "r", "r" } },
1361 { INDEX_op_sub_i32, { "r", "r", "r" } },
1362 { INDEX_op_sub_i64, { "r", "r", "r" } },
1363 { INDEX_op_mul_i32, { "r", "r", "r" } },
1364 { INDEX_op_mul_i64, { "r", "r", "r" } },
1365 { INDEX_op_and_i32, { "r", "r", "r" } },
1366 { INDEX_op_and_i64, { "r", "r", "r" } },
1367 { INDEX_op_or_i32, { "r", "r", "r" } },
1368 { INDEX_op_or_i64, { "r", "r", "r" } },
1369 { INDEX_op_xor_i32, { "r", "r", "r" } },
1370 { INDEX_op_xor_i64, { "r", "r", "r" } },
1371
1372 { INDEX_op_shl_i32, { "r", "r", "ri" } },
1373 { INDEX_op_shr_i32, { "r", "r", "ri" } },
1374 { INDEX_op_sar_i32, { "r", "r", "ri" } },
1375 { INDEX_op_rotl_i32, { "r", "r", "ri" } },
1376 { INDEX_op_rotr_i32, { "r", "r", "ri" } },
1377 { INDEX_op_shl_i64, { "r", "r", "ri" } },
1378 { INDEX_op_shr_i64, { "r", "r", "ri" } },
1379 { INDEX_op_sar_i64, { "r", "r", "ri" } },
1380 { INDEX_op_rotl_i64, { "r", "r", "ri" } },
1381 { INDEX_op_rotr_i64, { "r", "r", "ri" } },
1382
1383 { INDEX_op_brcond_i32, { "r", "r" } },
1384 { INDEX_op_setcond_i32, { "r", "r", "r" } },
1385 { INDEX_op_brcond_i64, { "r", "r" } },
1386 { INDEX_op_setcond_i64, { "r", "r", "r" } },
1387
1388 { INDEX_op_qemu_ld8u, { "r", "l" } },
1389 { INDEX_op_qemu_ld8s, { "r", "l" } },
1390 { INDEX_op_qemu_ld16u, { "r", "l" } },
1391 { INDEX_op_qemu_ld16s, { "r", "l" } },
1392 { INDEX_op_qemu_ld32u, { "r", "l" } },
1393 { INDEX_op_qemu_ld32s, { "r", "l" } },
1394
1395 { INDEX_op_qemu_ld32, { "r", "l" } },
1396 { INDEX_op_qemu_ld64, { "r", "l" } },
1397
1398 { INDEX_op_qemu_st8, { "l", "l" } },
1399 { INDEX_op_qemu_st16, { "l", "l" } },
1400 { INDEX_op_qemu_st32, { "l", "l" } },
1401 { INDEX_op_qemu_st64, { "l", "l" } },
9c4a059d
CF
1402
1403 { INDEX_op_bswap16_i32, { "r", "r" } },
1404 { INDEX_op_bswap32_i32, { "r", "r" } },
1405 { INDEX_op_bswap16_i64, { "r", "r" } },
1406 { INDEX_op_bswap32_i64, { "r", "r" } },
1407 { INDEX_op_bswap64_i64, { "r", "r" } },
1408
31f1275b
CF
1409 { INDEX_op_ext8s_i32, { "r", "r" } },
1410 { INDEX_op_ext16s_i32, { "r", "r" } },
1411 { INDEX_op_ext8u_i32, { "r", "r" } },
1412 { INDEX_op_ext16u_i32, { "r", "r" } },
1413
1414 { INDEX_op_ext8s_i64, { "r", "r" } },
1415 { INDEX_op_ext16s_i64, { "r", "r" } },
1416 { INDEX_op_ext32s_i64, { "r", "r" } },
1417 { INDEX_op_ext8u_i64, { "r", "r" } },
1418 { INDEX_op_ext16u_i64, { "r", "r" } },
1419 { INDEX_op_ext32u_i64, { "r", "r" } },
1420
4a136e0a
CF
1421 { -1 },
1422};
1423
1424static void tcg_target_init(TCGContext *s)
1425{
4a136e0a
CF
1426 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
1427 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);
1428
1429 tcg_regset_set32(tcg_target_call_clobber_regs, 0,
1430 (1 << TCG_REG_X0) | (1 << TCG_REG_X1) |
1431 (1 << TCG_REG_X2) | (1 << TCG_REG_X3) |
1432 (1 << TCG_REG_X4) | (1 << TCG_REG_X5) |
1433 (1 << TCG_REG_X6) | (1 << TCG_REG_X7) |
1434 (1 << TCG_REG_X8) | (1 << TCG_REG_X9) |
1435 (1 << TCG_REG_X10) | (1 << TCG_REG_X11) |
1436 (1 << TCG_REG_X12) | (1 << TCG_REG_X13) |
1437 (1 << TCG_REG_X14) | (1 << TCG_REG_X15) |
1438 (1 << TCG_REG_X16) | (1 << TCG_REG_X17) |
1439 (1 << TCG_REG_X18));
1440
1441 tcg_regset_clear(s->reserved_regs);
1442 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
1443 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
1444 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
1445 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
1446
1447 tcg_add_target_add_op_defs(aarch64_op_defs);
1448}
1449
4a136e0a
CF
1450static void tcg_target_qemu_prologue(TCGContext *s)
1451{
1452 /* NB: frame sizes are in 16 byte stack units! */
1453 int frame_size_callee_saved, frame_size_tcg_locals;
1454 TCGReg r;
1455
1456 /* save pairs (FP, LR) and (X19, X20) .. (X27, X28) */
1457 frame_size_callee_saved = (1) + (TCG_REG_X28 - TCG_REG_X19) / 2 + 1;
1458
1459 /* frame size requirement for TCG local variables */
1460 frame_size_tcg_locals = TCG_STATIC_CALL_ARGS_SIZE
1461 + CPU_TEMP_BUF_NLONGS * sizeof(long)
1462 + (TCG_TARGET_STACK_ALIGN - 1);
1463 frame_size_tcg_locals &= ~(TCG_TARGET_STACK_ALIGN - 1);
1464 frame_size_tcg_locals /= TCG_TARGET_STACK_ALIGN;
1465
1466 /* push (FP, LR) and update sp */
1467 tcg_out_push_pair(s, TCG_REG_SP,
1468 TCG_REG_FP, TCG_REG_LR, frame_size_callee_saved);
1469
1470 /* FP -> callee_saved */
1471 tcg_out_movr_sp(s, 1, TCG_REG_FP, TCG_REG_SP);
1472
1473 /* store callee-preserved regs x19..x28 using FP -> callee_saved */
1474 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1475 int idx = (r - TCG_REG_X19) / 2 + 1;
1476 tcg_out_store_pair(s, TCG_REG_FP, r, r + 1, idx);
1477 }
1478
1479 /* make stack space for TCG locals */
1480 tcg_out_subi(s, 1, TCG_REG_SP, TCG_REG_SP,
1481 frame_size_tcg_locals * TCG_TARGET_STACK_ALIGN);
1482 /* inform TCG about how to find TCG locals with register, offset, size */
1483 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
1484 CPU_TEMP_BUF_NLONGS * sizeof(long));
1485
6a91c7c9
JK
1486#if defined(CONFIG_USE_GUEST_BASE)
1487 if (GUEST_BASE) {
1488 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, GUEST_BASE);
1489 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
1490 }
1491#endif
1492
4a136e0a
CF
1493 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
1494 tcg_out_gotor(s, tcg_target_call_iarg_regs[1]);
1495
1496 tb_ret_addr = s->code_ptr;
1497
1498 /* remove TCG locals stack space */
1499 tcg_out_addi(s, 1, TCG_REG_SP, TCG_REG_SP,
1500 frame_size_tcg_locals * TCG_TARGET_STACK_ALIGN);
1501
1502 /* restore registers x19..x28.
1503 FP must be preserved, so it still points to callee_saved area */
1504 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1505 int idx = (r - TCG_REG_X19) / 2 + 1;
1506 tcg_out_load_pair(s, TCG_REG_FP, r, r + 1, idx);
1507 }
1508
1509 /* pop (FP, LR), restore SP to previous frame, return */
1510 tcg_out_pop_pair(s, TCG_REG_SP,
1511 TCG_REG_FP, TCG_REG_LR, frame_size_callee_saved);
1512 tcg_out_ret(s);
1513}