]> git.proxmox.com Git - mirror_qemu.git/blob - tcg/aarch64/tcg-target.inc.c
4562d36d1ba8141e53968de1256b7afb8a1bdd6a
[mirror_qemu.git] / tcg / aarch64 / tcg-target.inc.c
1 /*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13 #include "tcg-pool.inc.c"
14 #include "qemu/bitops.h"
15
16 /* We're going to re-use TCGType in setting of the SF bit, which controls
17 the size of the operation performed. If we know the values match, it
18 makes things much cleaner. */
19 QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
20
21 #ifdef CONFIG_DEBUG_TCG
22 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
23 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
24 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
25 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
26 "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
27
28 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
29 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
30 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
31 "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
32 };
33 #endif /* CONFIG_DEBUG_TCG */
34
35 static const int tcg_target_reg_alloc_order[] = {
36 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
37 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
38 TCG_REG_X28, /* we will reserve this for guest_base if configured */
39
40 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
41 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
42 TCG_REG_X16, TCG_REG_X17,
43
44 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
45 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
46
47 /* X18 reserved by system */
48 /* X19 reserved for AREG0 */
49 /* X29 reserved as fp */
50 /* X30 reserved as temporary */
51
52 TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
53 TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
54 /* V8 - V15 are call-saved, and skipped. */
55 TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
56 TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
57 TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
58 TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
59 };
60
61 static const int tcg_target_call_iarg_regs[8] = {
62 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
63 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
64 };
65 static const int tcg_target_call_oarg_regs[1] = {
66 TCG_REG_X0
67 };
68
69 #define TCG_REG_TMP TCG_REG_X30
70 #define TCG_VEC_TMP TCG_REG_V31
71
72 #ifndef CONFIG_SOFTMMU
73 /* Note that XZR cannot be encoded in the address base register slot,
74 as that actaully encodes SP. So if we need to zero-extend the guest
75 address, via the address index register slot, we need to load even
76 a zero guest base into a register. */
77 #define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32)
78 #define TCG_REG_GUEST_BASE TCG_REG_X28
79 #endif
80
81 static inline void reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
82 {
83 ptrdiff_t offset = target - code_ptr;
84 tcg_debug_assert(offset == sextract64(offset, 0, 26));
85 /* read instruction, mask away previous PC_REL26 parameter contents,
86 set the proper offset, then write back the instruction. */
87 *code_ptr = deposit32(*code_ptr, 0, 26, offset);
88 }
89
90 static inline void reloc_pc26_atomic(tcg_insn_unit *code_ptr,
91 tcg_insn_unit *target)
92 {
93 ptrdiff_t offset = target - code_ptr;
94 tcg_insn_unit insn;
95 tcg_debug_assert(offset == sextract64(offset, 0, 26));
96 /* read instruction, mask away previous PC_REL26 parameter contents,
97 set the proper offset, then write back the instruction. */
98 insn = atomic_read(code_ptr);
99 atomic_set(code_ptr, deposit32(insn, 0, 26, offset));
100 }
101
102 static inline void reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
103 {
104 ptrdiff_t offset = target - code_ptr;
105 tcg_debug_assert(offset == sextract64(offset, 0, 19));
106 *code_ptr = deposit32(*code_ptr, 5, 19, offset);
107 }
108
109 static inline void patch_reloc(tcg_insn_unit *code_ptr, int type,
110 intptr_t value, intptr_t addend)
111 {
112 tcg_debug_assert(addend == 0);
113 switch (type) {
114 case R_AARCH64_JUMP26:
115 case R_AARCH64_CALL26:
116 reloc_pc26(code_ptr, (tcg_insn_unit *)value);
117 break;
118 case R_AARCH64_CONDBR19:
119 reloc_pc19(code_ptr, (tcg_insn_unit *)value);
120 break;
121 default:
122 tcg_abort();
123 }
124 }
125
126 #define TCG_CT_CONST_AIMM 0x100
127 #define TCG_CT_CONST_LIMM 0x200
128 #define TCG_CT_CONST_ZERO 0x400
129 #define TCG_CT_CONST_MONE 0x800
130
131 /* parse target specific constraints */
132 static const char *target_parse_constraint(TCGArgConstraint *ct,
133 const char *ct_str, TCGType type)
134 {
135 switch (*ct_str++) {
136 case 'r': /* general registers */
137 ct->ct |= TCG_CT_REG;
138 ct->u.regs |= 0xffffffffu;
139 break;
140 case 'w': /* advsimd registers */
141 ct->ct |= TCG_CT_REG;
142 ct->u.regs |= 0xffffffff00000000ull;
143 break;
144 case 'l': /* qemu_ld / qemu_st address, data_reg */
145 ct->ct |= TCG_CT_REG;
146 ct->u.regs = 0xffffffffu;
147 #ifdef CONFIG_SOFTMMU
148 /* x0 and x1 will be overwritten when reading the tlb entry,
149 and x2, and x3 for helper args, better to avoid using them. */
150 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
151 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
152 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
153 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
154 #endif
155 break;
156 case 'A': /* Valid for arithmetic immediate (positive or negative). */
157 ct->ct |= TCG_CT_CONST_AIMM;
158 break;
159 case 'L': /* Valid for logical immediate. */
160 ct->ct |= TCG_CT_CONST_LIMM;
161 break;
162 case 'M': /* minus one */
163 ct->ct |= TCG_CT_CONST_MONE;
164 break;
165 case 'Z': /* zero */
166 ct->ct |= TCG_CT_CONST_ZERO;
167 break;
168 default:
169 return NULL;
170 }
171 return ct_str;
172 }
173
174 /* Match a constant valid for addition (12-bit, optionally shifted). */
175 static inline bool is_aimm(uint64_t val)
176 {
177 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
178 }
179
180 /* Match a constant valid for logical operations. */
181 static inline bool is_limm(uint64_t val)
182 {
183 /* Taking a simplified view of the logical immediates for now, ignoring
184 the replication that can happen across the field. Match bit patterns
185 of the forms
186 0....01....1
187 0..01..10..0
188 and their inverses. */
189
190 /* Make things easier below, by testing the form with msb clear. */
191 if ((int64_t)val < 0) {
192 val = ~val;
193 }
194 if (val == 0) {
195 return false;
196 }
197 val += val & -val;
198 return (val & (val - 1)) == 0;
199 }
200
201 /* Match a constant that is valid for vectors. */
202 static bool is_fimm(uint64_t v64, int *op, int *cmode, int *imm8)
203 {
204 int i;
205
206 *op = 0;
207 /* Match replication across 8 bits. */
208 if (v64 == dup_const(MO_8, v64)) {
209 *cmode = 0xe;
210 *imm8 = v64 & 0xff;
211 return true;
212 }
213 /* Match replication across 16 bits. */
214 if (v64 == dup_const(MO_16, v64)) {
215 uint16_t v16 = v64;
216
217 if (v16 == (v16 & 0xff)) {
218 *cmode = 0x8;
219 *imm8 = v16 & 0xff;
220 return true;
221 } else if (v16 == (v16 & 0xff00)) {
222 *cmode = 0xa;
223 *imm8 = v16 >> 8;
224 return true;
225 }
226 }
227 /* Match replication across 32 bits. */
228 if (v64 == dup_const(MO_32, v64)) {
229 uint32_t v32 = v64;
230
231 if (v32 == (v32 & 0xff)) {
232 *cmode = 0x0;
233 *imm8 = v32 & 0xff;
234 return true;
235 } else if (v32 == (v32 & 0xff00)) {
236 *cmode = 0x2;
237 *imm8 = (v32 >> 8) & 0xff;
238 return true;
239 } else if (v32 == (v32 & 0xff0000)) {
240 *cmode = 0x4;
241 *imm8 = (v32 >> 16) & 0xff;
242 return true;
243 } else if (v32 == (v32 & 0xff000000)) {
244 *cmode = 0x6;
245 *imm8 = v32 >> 24;
246 return true;
247 } else if ((v32 & 0xffff00ff) == 0xff) {
248 *cmode = 0xc;
249 *imm8 = (v32 >> 8) & 0xff;
250 return true;
251 } else if ((v32 & 0xff00ffff) == 0xffff) {
252 *cmode = 0xd;
253 *imm8 = (v32 >> 16) & 0xff;
254 return true;
255 }
256 /* Match forms of a float32. */
257 if (extract32(v32, 0, 19) == 0
258 && (extract32(v32, 25, 6) == 0x20
259 || extract32(v32, 25, 6) == 0x1f)) {
260 *cmode = 0xf;
261 *imm8 = (extract32(v32, 31, 1) << 7)
262 | (extract32(v32, 25, 1) << 6)
263 | extract32(v32, 19, 6);
264 return true;
265 }
266 }
267 /* Match forms of a float64. */
268 if (extract64(v64, 0, 48) == 0
269 && (extract64(v64, 54, 9) == 0x100
270 || extract64(v64, 54, 9) == 0x0ff)) {
271 *cmode = 0xf;
272 *op = 1;
273 *imm8 = (extract64(v64, 63, 1) << 7)
274 | (extract64(v64, 54, 1) << 6)
275 | extract64(v64, 48, 6);
276 return true;
277 }
278 /* Match bytes of 0x00 and 0xff. */
279 for (i = 0; i < 64; i += 8) {
280 uint64_t byte = extract64(v64, i, 8);
281 if (byte != 0 && byte != 0xff) {
282 break;
283 }
284 }
285 if (i == 64) {
286 *cmode = 0xe;
287 *op = 1;
288 *imm8 = (extract64(v64, 0, 1) << 0)
289 | (extract64(v64, 8, 1) << 1)
290 | (extract64(v64, 16, 1) << 2)
291 | (extract64(v64, 24, 1) << 3)
292 | (extract64(v64, 32, 1) << 4)
293 | (extract64(v64, 40, 1) << 5)
294 | (extract64(v64, 48, 1) << 6)
295 | (extract64(v64, 56, 1) << 7);
296 return true;
297 }
298 return false;
299 }
300
301 static int tcg_target_const_match(tcg_target_long val, TCGType type,
302 const TCGArgConstraint *arg_ct)
303 {
304 int ct = arg_ct->ct;
305
306 if (ct & TCG_CT_CONST) {
307 return 1;
308 }
309 if (type == TCG_TYPE_I32) {
310 val = (int32_t)val;
311 }
312 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
313 return 1;
314 }
315 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
316 return 1;
317 }
318 if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
319 return 1;
320 }
321 if ((ct & TCG_CT_CONST_MONE) && val == -1) {
322 return 1;
323 }
324
325 return 0;
326 }
327
328 enum aarch64_cond_code {
329 COND_EQ = 0x0,
330 COND_NE = 0x1,
331 COND_CS = 0x2, /* Unsigned greater or equal */
332 COND_HS = COND_CS, /* ALIAS greater or equal */
333 COND_CC = 0x3, /* Unsigned less than */
334 COND_LO = COND_CC, /* ALIAS Lower */
335 COND_MI = 0x4, /* Negative */
336 COND_PL = 0x5, /* Zero or greater */
337 COND_VS = 0x6, /* Overflow */
338 COND_VC = 0x7, /* No overflow */
339 COND_HI = 0x8, /* Unsigned greater than */
340 COND_LS = 0x9, /* Unsigned less or equal */
341 COND_GE = 0xa,
342 COND_LT = 0xb,
343 COND_GT = 0xc,
344 COND_LE = 0xd,
345 COND_AL = 0xe,
346 COND_NV = 0xf, /* behaves like COND_AL here */
347 };
348
349 static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
350 [TCG_COND_EQ] = COND_EQ,
351 [TCG_COND_NE] = COND_NE,
352 [TCG_COND_LT] = COND_LT,
353 [TCG_COND_GE] = COND_GE,
354 [TCG_COND_LE] = COND_LE,
355 [TCG_COND_GT] = COND_GT,
356 /* unsigned */
357 [TCG_COND_LTU] = COND_LO,
358 [TCG_COND_GTU] = COND_HI,
359 [TCG_COND_GEU] = COND_HS,
360 [TCG_COND_LEU] = COND_LS,
361 };
362
363 typedef enum {
364 LDST_ST = 0, /* store */
365 LDST_LD = 1, /* load */
366 LDST_LD_S_X = 2, /* load and sign-extend into Xt */
367 LDST_LD_S_W = 3, /* load and sign-extend into Wt */
368 } AArch64LdstType;
369
370 /* We encode the format of the insn into the beginning of the name, so that
371 we can have the preprocessor help "typecheck" the insn vs the output
372 function. Arm didn't provide us with nice names for the formats, so we
373 use the section number of the architecture reference manual in which the
374 instruction group is described. */
375 typedef enum {
376 /* Compare and branch (immediate). */
377 I3201_CBZ = 0x34000000,
378 I3201_CBNZ = 0x35000000,
379
380 /* Conditional branch (immediate). */
381 I3202_B_C = 0x54000000,
382
383 /* Unconditional branch (immediate). */
384 I3206_B = 0x14000000,
385 I3206_BL = 0x94000000,
386
387 /* Unconditional branch (register). */
388 I3207_BR = 0xd61f0000,
389 I3207_BLR = 0xd63f0000,
390 I3207_RET = 0xd65f0000,
391
392 /* Load literal for loading the address at pc-relative offset */
393 I3305_LDR = 0x58000000,
394 I3305_LDR_v64 = 0x5c000000,
395 I3305_LDR_v128 = 0x9c000000,
396
397 /* Load/store register. Described here as 3.3.12, but the helper
398 that emits them can transform to 3.3.10 or 3.3.13. */
399 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
400 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
401 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
402 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
403
404 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
405 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
406 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
407 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
408
409 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
410 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
411
412 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
413 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
414 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
415
416 I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
417 I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
418
419 I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
420 I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
421
422 I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30,
423 I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30,
424
425 I3312_TO_I3310 = 0x00200800,
426 I3312_TO_I3313 = 0x01000000,
427
428 /* Load/store register pair instructions. */
429 I3314_LDP = 0x28400000,
430 I3314_STP = 0x28000000,
431
432 /* Add/subtract immediate instructions. */
433 I3401_ADDI = 0x11000000,
434 I3401_ADDSI = 0x31000000,
435 I3401_SUBI = 0x51000000,
436 I3401_SUBSI = 0x71000000,
437
438 /* Bitfield instructions. */
439 I3402_BFM = 0x33000000,
440 I3402_SBFM = 0x13000000,
441 I3402_UBFM = 0x53000000,
442
443 /* Extract instruction. */
444 I3403_EXTR = 0x13800000,
445
446 /* Logical immediate instructions. */
447 I3404_ANDI = 0x12000000,
448 I3404_ORRI = 0x32000000,
449 I3404_EORI = 0x52000000,
450
451 /* Move wide immediate instructions. */
452 I3405_MOVN = 0x12800000,
453 I3405_MOVZ = 0x52800000,
454 I3405_MOVK = 0x72800000,
455
456 /* PC relative addressing instructions. */
457 I3406_ADR = 0x10000000,
458 I3406_ADRP = 0x90000000,
459
460 /* Add/subtract shifted register instructions (without a shift). */
461 I3502_ADD = 0x0b000000,
462 I3502_ADDS = 0x2b000000,
463 I3502_SUB = 0x4b000000,
464 I3502_SUBS = 0x6b000000,
465
466 /* Add/subtract shifted register instructions (with a shift). */
467 I3502S_ADD_LSL = I3502_ADD,
468
469 /* Add/subtract with carry instructions. */
470 I3503_ADC = 0x1a000000,
471 I3503_SBC = 0x5a000000,
472
473 /* Conditional select instructions. */
474 I3506_CSEL = 0x1a800000,
475 I3506_CSINC = 0x1a800400,
476 I3506_CSINV = 0x5a800000,
477 I3506_CSNEG = 0x5a800400,
478
479 /* Data-processing (1 source) instructions. */
480 I3507_CLZ = 0x5ac01000,
481 I3507_RBIT = 0x5ac00000,
482 I3507_REV16 = 0x5ac00400,
483 I3507_REV32 = 0x5ac00800,
484 I3507_REV64 = 0x5ac00c00,
485
486 /* Data-processing (2 source) instructions. */
487 I3508_LSLV = 0x1ac02000,
488 I3508_LSRV = 0x1ac02400,
489 I3508_ASRV = 0x1ac02800,
490 I3508_RORV = 0x1ac02c00,
491 I3508_SMULH = 0x9b407c00,
492 I3508_UMULH = 0x9bc07c00,
493 I3508_UDIV = 0x1ac00800,
494 I3508_SDIV = 0x1ac00c00,
495
496 /* Data-processing (3 source) instructions. */
497 I3509_MADD = 0x1b000000,
498 I3509_MSUB = 0x1b008000,
499
500 /* Logical shifted register instructions (without a shift). */
501 I3510_AND = 0x0a000000,
502 I3510_BIC = 0x0a200000,
503 I3510_ORR = 0x2a000000,
504 I3510_ORN = 0x2a200000,
505 I3510_EOR = 0x4a000000,
506 I3510_EON = 0x4a200000,
507 I3510_ANDS = 0x6a000000,
508
509 /* AdvSIMD copy */
510 I3605_DUP = 0x0e000400,
511 I3605_INS = 0x4e001c00,
512 I3605_UMOV = 0x0e003c00,
513
514 /* AdvSIMD modified immediate */
515 I3606_MOVI = 0x0f000400,
516
517 /* AdvSIMD shift by immediate */
518 I3614_SSHR = 0x0f000400,
519 I3614_SSRA = 0x0f001400,
520 I3614_SHL = 0x0f005400,
521 I3614_USHR = 0x2f000400,
522 I3614_USRA = 0x2f001400,
523
524 /* AdvSIMD three same. */
525 I3616_ADD = 0x0e208400,
526 I3616_AND = 0x0e201c00,
527 I3616_BIC = 0x0e601c00,
528 I3616_EOR = 0x2e201c00,
529 I3616_MUL = 0x0e209c00,
530 I3616_ORR = 0x0ea01c00,
531 I3616_ORN = 0x0ee01c00,
532 I3616_SUB = 0x2e208400,
533 I3616_CMGT = 0x0e203400,
534 I3616_CMGE = 0x0e203c00,
535 I3616_CMTST = 0x0e208c00,
536 I3616_CMHI = 0x2e203400,
537 I3616_CMHS = 0x2e203c00,
538 I3616_CMEQ = 0x2e208c00,
539
540 /* AdvSIMD two-reg misc. */
541 I3617_CMGT0 = 0x0e208800,
542 I3617_CMEQ0 = 0x0e209800,
543 I3617_CMLT0 = 0x0e20a800,
544 I3617_CMGE0 = 0x2e208800,
545 I3617_CMLE0 = 0x2e20a800,
546 I3617_NOT = 0x2e205800,
547 I3617_NEG = 0x2e20b800,
548
549 /* System instructions. */
550 NOP = 0xd503201f,
551 DMB_ISH = 0xd50338bf,
552 DMB_LD = 0x00000100,
553 DMB_ST = 0x00000200,
554 } AArch64Insn;
555
556 static inline uint32_t tcg_in32(TCGContext *s)
557 {
558 uint32_t v = *(uint32_t *)s->code_ptr;
559 return v;
560 }
561
562 /* Emit an opcode with "type-checking" of the format. */
563 #define tcg_out_insn(S, FMT, OP, ...) \
564 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
565
566 static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, int imm19, TCGReg rt)
567 {
568 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
569 }
570
571 static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
572 TCGReg rt, int imm19)
573 {
574 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
575 }
576
577 static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
578 TCGCond c, int imm19)
579 {
580 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
581 }
582
583 static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
584 {
585 tcg_out32(s, insn | (imm26 & 0x03ffffff));
586 }
587
588 static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
589 {
590 tcg_out32(s, insn | rn << 5);
591 }
592
593 static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
594 TCGReg r1, TCGReg r2, TCGReg rn,
595 tcg_target_long ofs, bool pre, bool w)
596 {
597 insn |= 1u << 31; /* ext */
598 insn |= pre << 24;
599 insn |= w << 23;
600
601 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
602 insn |= (ofs & (0x7f << 3)) << (15 - 3);
603
604 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
605 }
606
607 static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
608 TCGReg rd, TCGReg rn, uint64_t aimm)
609 {
610 if (aimm > 0xfff) {
611 tcg_debug_assert((aimm & 0xfff) == 0);
612 aimm >>= 12;
613 tcg_debug_assert(aimm <= 0xfff);
614 aimm |= 1 << 12; /* apply LSL 12 */
615 }
616 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
617 }
618
619 /* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
620 (Logical immediate). Both insn groups have N, IMMR and IMMS fields
621 that feed the DecodeBitMasks pseudo function. */
622 static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
623 TCGReg rd, TCGReg rn, int n, int immr, int imms)
624 {
625 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
626 | rn << 5 | rd);
627 }
628
629 #define tcg_out_insn_3404 tcg_out_insn_3402
630
631 static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
632 TCGReg rd, TCGReg rn, TCGReg rm, int imms)
633 {
634 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
635 | rn << 5 | rd);
636 }
637
638 /* This function is used for the Move (wide immediate) instruction group.
639 Note that SHIFT is a full shift count, not the 2 bit HW field. */
640 static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
641 TCGReg rd, uint16_t half, unsigned shift)
642 {
643 tcg_debug_assert((shift & ~0x30) == 0);
644 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
645 }
646
647 static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
648 TCGReg rd, int64_t disp)
649 {
650 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
651 }
652
653 /* This function is for both 3.5.2 (Add/Subtract shifted register), for
654 the rare occasion when we actually want to supply a shift amount. */
655 static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
656 TCGType ext, TCGReg rd, TCGReg rn,
657 TCGReg rm, int imm6)
658 {
659 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
660 }
661
662 /* This function is for 3.5.2 (Add/subtract shifted register),
663 and 3.5.10 (Logical shifted register), for the vast majorty of cases
664 when we don't want to apply a shift. Thus it can also be used for
665 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */
666 static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
667 TCGReg rd, TCGReg rn, TCGReg rm)
668 {
669 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
670 }
671
672 #define tcg_out_insn_3503 tcg_out_insn_3502
673 #define tcg_out_insn_3508 tcg_out_insn_3502
674 #define tcg_out_insn_3510 tcg_out_insn_3502
675
676 static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
677 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
678 {
679 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
680 | tcg_cond_to_aarch64[c] << 12);
681 }
682
683 static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
684 TCGReg rd, TCGReg rn)
685 {
686 tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
687 }
688
689 static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
690 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
691 {
692 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
693 }
694
695 static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
696 TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
697 {
698 /* Note that bit 11 set means general register input. Therefore
699 we can handle both register sets with one function. */
700 tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
701 | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
702 }
703
704 static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
705 TCGReg rd, bool op, int cmode, uint8_t imm8)
706 {
707 tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
708 | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
709 }
710
711 static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
712 TCGReg rd, TCGReg rn, unsigned immhb)
713 {
714 tcg_out32(s, insn | q << 30 | immhb << 16
715 | (rn & 0x1f) << 5 | (rd & 0x1f));
716 }
717
718 static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
719 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
720 {
721 tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
722 | (rn & 0x1f) << 5 | (rd & 0x1f));
723 }
724
725 static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
726 unsigned size, TCGReg rd, TCGReg rn)
727 {
728 tcg_out32(s, insn | q << 30 | (size << 22)
729 | (rn & 0x1f) << 5 | (rd & 0x1f));
730 }
731
732 static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
733 TCGReg rd, TCGReg base, TCGType ext,
734 TCGReg regoff)
735 {
736 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
737 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
738 0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
739 }
740
741 static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
742 TCGReg rd, TCGReg rn, intptr_t offset)
743 {
744 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
745 }
746
747 static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
748 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
749 {
750 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
751 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
752 | rn << 5 | (rd & 0x1f));
753 }
754
755 /* Register to register move using ORR (shifted register with no shift). */
756 static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
757 {
758 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
759 }
760
761 /* Register to register move using ADDI (move to/from SP). */
762 static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
763 {
764 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
765 }
766
767 /* This function is used for the Logical (immediate) instruction group.
768 The value of LIMM must satisfy IS_LIMM. See the comment above about
769 only supporting simplified logical immediates. */
770 static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
771 TCGReg rd, TCGReg rn, uint64_t limm)
772 {
773 unsigned h, l, r, c;
774
775 tcg_debug_assert(is_limm(limm));
776
777 h = clz64(limm);
778 l = ctz64(limm);
779 if (l == 0) {
780 r = 0; /* form 0....01....1 */
781 c = ctz64(~limm) - 1;
782 if (h == 0) {
783 r = clz64(~limm); /* form 1..10..01..1 */
784 c += r;
785 }
786 } else {
787 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */
788 c = r - h - 1;
789 }
790 if (ext == TCG_TYPE_I32) {
791 r &= 31;
792 c &= 31;
793 }
794
795 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
796 }
797
798 static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
799 TCGReg rd, uint64_t v64)
800 {
801 int op, cmode, imm8;
802
803 if (is_fimm(v64, &op, &cmode, &imm8)) {
804 tcg_out_insn(s, 3606, MOVI, type == TCG_TYPE_V128, rd, op, cmode, imm8);
805 } else if (type == TCG_TYPE_V128) {
806 new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
807 tcg_out_insn(s, 3305, LDR_v128, 0, rd);
808 } else {
809 new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
810 tcg_out_insn(s, 3305, LDR_v64, 0, rd);
811 }
812 }
813
814 static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
815 tcg_target_long value)
816 {
817 tcg_target_long svalue = value;
818 tcg_target_long ivalue = ~value;
819 tcg_target_long t0, t1, t2;
820 int s0, s1;
821 AArch64Insn opc;
822
823 switch (type) {
824 case TCG_TYPE_I32:
825 case TCG_TYPE_I64:
826 tcg_debug_assert(rd < 32);
827 break;
828
829 case TCG_TYPE_V64:
830 case TCG_TYPE_V128:
831 tcg_debug_assert(rd >= 32);
832 tcg_out_dupi_vec(s, type, rd, value);
833 return;
834
835 default:
836 g_assert_not_reached();
837 }
838
839 /* For 32-bit values, discard potential garbage in value. For 64-bit
840 values within [2**31, 2**32-1], we can create smaller sequences by
841 interpreting this as a negative 32-bit number, while ensuring that
842 the high 32 bits are cleared by setting SF=0. */
843 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
844 svalue = (int32_t)value;
845 value = (uint32_t)value;
846 ivalue = (uint32_t)ivalue;
847 type = TCG_TYPE_I32;
848 }
849
850 /* Speed things up by handling the common case of small positive
851 and negative values specially. */
852 if ((value & ~0xffffull) == 0) {
853 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
854 return;
855 } else if ((ivalue & ~0xffffull) == 0) {
856 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
857 return;
858 }
859
860 /* Check for bitfield immediates. For the benefit of 32-bit quantities,
861 use the sign-extended value. That lets us match rotated values such
862 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
863 if (is_limm(svalue)) {
864 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
865 return;
866 }
867
868 /* Look for host pointer values within 4G of the PC. This happens
869 often when loading pointers to QEMU's own data structures. */
870 if (type == TCG_TYPE_I64) {
871 tcg_target_long disp = value - (intptr_t)s->code_ptr;
872 if (disp == sextract64(disp, 0, 21)) {
873 tcg_out_insn(s, 3406, ADR, rd, disp);
874 return;
875 }
876 disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
877 if (disp == sextract64(disp, 0, 21)) {
878 tcg_out_insn(s, 3406, ADRP, rd, disp);
879 if (value & 0xfff) {
880 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
881 }
882 return;
883 }
884 }
885
886 /* Would it take fewer insns to begin with MOVN? */
887 if (ctpop64(value) >= 32) {
888 t0 = ivalue;
889 opc = I3405_MOVN;
890 } else {
891 t0 = value;
892 opc = I3405_MOVZ;
893 }
894 s0 = ctz64(t0) & (63 & -16);
895 t1 = t0 & ~(0xffffUL << s0);
896 s1 = ctz64(t1) & (63 & -16);
897 t2 = t1 & ~(0xffffUL << s1);
898 if (t2 == 0) {
899 tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
900 if (t1 != 0) {
901 tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
902 }
903 return;
904 }
905
906 /* For more than 2 insns, dump it into the constant pool. */
907 new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
908 tcg_out_insn(s, 3305, LDR, 0, rd);
909 }
910
911 /* Define something more legible for general use. */
912 #define tcg_out_ldst_r tcg_out_insn_3310
913
914 static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
915 TCGReg rn, intptr_t offset, int lgsize)
916 {
917 /* If the offset is naturally aligned and in range, then we can
918 use the scaled uimm12 encoding */
919 if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
920 uintptr_t scaled_uimm = offset >> lgsize;
921 if (scaled_uimm <= 0xfff) {
922 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
923 return;
924 }
925 }
926
927 /* Small signed offsets can use the unscaled encoding. */
928 if (offset >= -256 && offset < 256) {
929 tcg_out_insn_3312(s, insn, rd, rn, offset);
930 return;
931 }
932
933 /* Worst-case scenario, move offset to temp register, use reg offset. */
934 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
935 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
936 }
937
938 static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
939 {
940 if (ret == arg) {
941 return;
942 }
943 switch (type) {
944 case TCG_TYPE_I32:
945 case TCG_TYPE_I64:
946 if (ret < 32 && arg < 32) {
947 tcg_out_movr(s, type, ret, arg);
948 break;
949 } else if (ret < 32) {
950 tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
951 break;
952 } else if (arg < 32) {
953 tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
954 break;
955 }
956 /* FALLTHRU */
957
958 case TCG_TYPE_V64:
959 tcg_debug_assert(ret >= 32 && arg >= 32);
960 tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
961 break;
962 case TCG_TYPE_V128:
963 tcg_debug_assert(ret >= 32 && arg >= 32);
964 tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
965 break;
966
967 default:
968 g_assert_not_reached();
969 }
970 }
971
972 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
973 TCGReg base, intptr_t ofs)
974 {
975 AArch64Insn insn;
976 int lgsz;
977
978 switch (type) {
979 case TCG_TYPE_I32:
980 insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
981 lgsz = 2;
982 break;
983 case TCG_TYPE_I64:
984 insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
985 lgsz = 3;
986 break;
987 case TCG_TYPE_V64:
988 insn = I3312_LDRVD;
989 lgsz = 3;
990 break;
991 case TCG_TYPE_V128:
992 insn = I3312_LDRVQ;
993 lgsz = 4;
994 break;
995 default:
996 g_assert_not_reached();
997 }
998 tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
999 }
1000
1001 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1002 TCGReg base, intptr_t ofs)
1003 {
1004 AArch64Insn insn;
1005 int lgsz;
1006
1007 switch (type) {
1008 case TCG_TYPE_I32:
1009 insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1010 lgsz = 2;
1011 break;
1012 case TCG_TYPE_I64:
1013 insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1014 lgsz = 3;
1015 break;
1016 case TCG_TYPE_V64:
1017 insn = I3312_STRVD;
1018 lgsz = 3;
1019 break;
1020 case TCG_TYPE_V128:
1021 insn = I3312_STRVQ;
1022 lgsz = 4;
1023 break;
1024 default:
1025 g_assert_not_reached();
1026 }
1027 tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1028 }
1029
1030 static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1031 TCGReg base, intptr_t ofs)
1032 {
1033 if (type <= TCG_TYPE_I64 && val == 0) {
1034 tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1035 return true;
1036 }
1037 return false;
1038 }
1039
1040 static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1041 TCGReg rn, unsigned int a, unsigned int b)
1042 {
1043 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1044 }
1045
1046 static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1047 TCGReg rn, unsigned int a, unsigned int b)
1048 {
1049 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1050 }
1051
1052 static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1053 TCGReg rn, unsigned int a, unsigned int b)
1054 {
1055 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1056 }
1057
1058 static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1059 TCGReg rn, TCGReg rm, unsigned int a)
1060 {
1061 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1062 }
1063
1064 static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1065 TCGReg rd, TCGReg rn, unsigned int m)
1066 {
1067 int bits = ext ? 64 : 32;
1068 int max = bits - 1;
1069 tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
1070 }
1071
1072 static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1073 TCGReg rd, TCGReg rn, unsigned int m)
1074 {
1075 int max = ext ? 63 : 31;
1076 tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1077 }
1078
1079 static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1080 TCGReg rd, TCGReg rn, unsigned int m)
1081 {
1082 int max = ext ? 63 : 31;
1083 tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1084 }
1085
1086 static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1087 TCGReg rd, TCGReg rn, unsigned int m)
1088 {
1089 int max = ext ? 63 : 31;
1090 tcg_out_extr(s, ext, rd, rn, rn, m & max);
1091 }
1092
1093 static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1094 TCGReg rd, TCGReg rn, unsigned int m)
1095 {
1096 int bits = ext ? 64 : 32;
1097 int max = bits - 1;
1098 tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
1099 }
1100
1101 static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1102 TCGReg rn, unsigned lsb, unsigned width)
1103 {
1104 unsigned size = ext ? 64 : 32;
1105 unsigned a = (size - lsb) & (size - 1);
1106 unsigned b = width - 1;
1107 tcg_out_bfm(s, ext, rd, rn, a, b);
1108 }
1109
1110 static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1111 tcg_target_long b, bool const_b)
1112 {
1113 if (const_b) {
1114 /* Using CMP or CMN aliases. */
1115 if (b >= 0) {
1116 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1117 } else {
1118 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1119 }
1120 } else {
1121 /* Using CMP alias SUBS wzr, Wn, Wm */
1122 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1123 }
1124 }
1125
1126 static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target)
1127 {
1128 ptrdiff_t offset = target - s->code_ptr;
1129 tcg_debug_assert(offset == sextract64(offset, 0, 26));
1130 tcg_out_insn(s, 3206, B, offset);
1131 }
1132
1133 static inline void tcg_out_goto_long(TCGContext *s, tcg_insn_unit *target)
1134 {
1135 ptrdiff_t offset = target - s->code_ptr;
1136 if (offset == sextract64(offset, 0, 26)) {
1137 tcg_out_insn(s, 3206, BL, offset);
1138 } else {
1139 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1140 tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1141 }
1142 }
1143
1144 static inline void tcg_out_goto_noaddr(TCGContext *s)
1145 {
1146 /* We pay attention here to not modify the branch target by reading from
1147 the buffer. This ensure that caches and memory are kept coherent during
1148 retranslation. Mask away possible garbage in the high bits for the
1149 first translation, while keeping the offset bits for retranslation. */
1150 uint32_t old = tcg_in32(s);
1151 tcg_out_insn(s, 3206, B, old);
1152 }
1153
1154 static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c)
1155 {
1156 /* See comments in tcg_out_goto_noaddr. */
1157 uint32_t old = tcg_in32(s) >> 5;
1158 tcg_out_insn(s, 3202, B_C, c, old);
1159 }
1160
1161 static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
1162 {
1163 tcg_out_insn(s, 3207, BLR, reg);
1164 }
1165
1166 static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
1167 {
1168 ptrdiff_t offset = target - s->code_ptr;
1169 if (offset == sextract64(offset, 0, 26)) {
1170 tcg_out_insn(s, 3206, BL, offset);
1171 } else {
1172 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1173 tcg_out_callr(s, TCG_REG_TMP);
1174 }
1175 }
1176
1177 void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
1178 uintptr_t addr)
1179 {
1180 tcg_insn_unit i1, i2;
1181 TCGType rt = TCG_TYPE_I64;
1182 TCGReg rd = TCG_REG_TMP;
1183 uint64_t pair;
1184
1185 ptrdiff_t offset = addr - jmp_addr;
1186
1187 if (offset == sextract64(offset, 0, 26)) {
1188 i1 = I3206_B | ((offset >> 2) & 0x3ffffff);
1189 i2 = NOP;
1190 } else {
1191 offset = (addr >> 12) - (jmp_addr >> 12);
1192
1193 /* patch ADRP */
1194 i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd;
1195 /* patch ADDI */
1196 i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd;
1197 }
1198 pair = (uint64_t)i2 << 32 | i1;
1199 atomic_set((uint64_t *)jmp_addr, pair);
1200 flush_icache_range(jmp_addr, jmp_addr + 8);
1201 }
1202
1203 static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1204 {
1205 if (!l->has_value) {
1206 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1207 tcg_out_goto_noaddr(s);
1208 } else {
1209 tcg_out_goto(s, l->u.value_ptr);
1210 }
1211 }
1212
1213 static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1214 TCGArg b, bool b_const, TCGLabel *l)
1215 {
1216 intptr_t offset;
1217 bool need_cmp;
1218
1219 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1220 need_cmp = false;
1221 } else {
1222 need_cmp = true;
1223 tcg_out_cmp(s, ext, a, b, b_const);
1224 }
1225
1226 if (!l->has_value) {
1227 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1228 offset = tcg_in32(s) >> 5;
1229 } else {
1230 offset = l->u.value_ptr - s->code_ptr;
1231 tcg_debug_assert(offset == sextract64(offset, 0, 19));
1232 }
1233
1234 if (need_cmp) {
1235 tcg_out_insn(s, 3202, B_C, c, offset);
1236 } else if (c == TCG_COND_EQ) {
1237 tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1238 } else {
1239 tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1240 }
1241 }
1242
1243 static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
1244 {
1245 tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
1246 }
1247
1248 static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
1249 {
1250 tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
1251 }
1252
1253 static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
1254 {
1255 tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
1256 }
1257
1258 static inline void tcg_out_sxt(TCGContext *s, TCGType ext, TCGMemOp s_bits,
1259 TCGReg rd, TCGReg rn)
1260 {
1261 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1262 int bits = (8 << s_bits) - 1;
1263 tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1264 }
1265
1266 static inline void tcg_out_uxt(TCGContext *s, TCGMemOp s_bits,
1267 TCGReg rd, TCGReg rn)
1268 {
1269 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1270 int bits = (8 << s_bits) - 1;
1271 tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1272 }
1273
1274 static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1275 TCGReg rn, int64_t aimm)
1276 {
1277 if (aimm >= 0) {
1278 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1279 } else {
1280 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1281 }
1282 }
1283
1284 static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1285 TCGReg rh, TCGReg al, TCGReg ah,
1286 tcg_target_long bl, tcg_target_long bh,
1287 bool const_bl, bool const_bh, bool sub)
1288 {
1289 TCGReg orig_rl = rl;
1290 AArch64Insn insn;
1291
1292 if (rl == ah || (!const_bh && rl == bh)) {
1293 rl = TCG_REG_TMP;
1294 }
1295
1296 if (const_bl) {
1297 insn = I3401_ADDSI;
1298 if ((bl < 0) ^ sub) {
1299 insn = I3401_SUBSI;
1300 bl = -bl;
1301 }
1302 if (unlikely(al == TCG_REG_XZR)) {
1303 /* ??? We want to allow al to be zero for the benefit of
1304 negation via subtraction. However, that leaves open the
1305 possibility of adding 0+const in the low part, and the
1306 immediate add instructions encode XSP not XZR. Don't try
1307 anything more elaborate here than loading another zero. */
1308 al = TCG_REG_TMP;
1309 tcg_out_movi(s, ext, al, 0);
1310 }
1311 tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1312 } else {
1313 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1314 }
1315
1316 insn = I3503_ADC;
1317 if (const_bh) {
1318 /* Note that the only two constants we support are 0 and -1, and
1319 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
1320 if ((bh != 0) ^ sub) {
1321 insn = I3503_SBC;
1322 }
1323 bh = TCG_REG_XZR;
1324 } else if (sub) {
1325 insn = I3503_SBC;
1326 }
1327 tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1328
1329 tcg_out_mov(s, ext, orig_rl, rl);
1330 }
1331
1332 static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1333 {
1334 static const uint32_t sync[] = {
1335 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST,
1336 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST,
1337 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1338 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD,
1339 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1340 };
1341 tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1342 }
1343
1344 static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1345 TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1346 {
1347 TCGReg a1 = a0;
1348 if (is_ctz) {
1349 a1 = TCG_REG_TMP;
1350 tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1351 }
1352 if (const_b && b == (ext ? 64 : 32)) {
1353 tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1354 } else {
1355 AArch64Insn sel = I3506_CSEL;
1356
1357 tcg_out_cmp(s, ext, a0, 0, 1);
1358 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1359
1360 if (const_b) {
1361 if (b == -1) {
1362 b = TCG_REG_XZR;
1363 sel = I3506_CSINV;
1364 } else if (b == 0) {
1365 b = TCG_REG_XZR;
1366 } else {
1367 tcg_out_movi(s, ext, d, b);
1368 b = d;
1369 }
1370 }
1371 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1372 }
1373 }
1374
1375 #ifdef CONFIG_SOFTMMU
1376 #include "tcg-ldst.inc.c"
1377
1378 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1379 * TCGMemOpIdx oi, uintptr_t ra)
1380 */
1381 static void * const qemu_ld_helpers[16] = {
1382 [MO_UB] = helper_ret_ldub_mmu,
1383 [MO_LEUW] = helper_le_lduw_mmu,
1384 [MO_LEUL] = helper_le_ldul_mmu,
1385 [MO_LEQ] = helper_le_ldq_mmu,
1386 [MO_BEUW] = helper_be_lduw_mmu,
1387 [MO_BEUL] = helper_be_ldul_mmu,
1388 [MO_BEQ] = helper_be_ldq_mmu,
1389 };
1390
1391 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1392 * uintxx_t val, TCGMemOpIdx oi,
1393 * uintptr_t ra)
1394 */
1395 static void * const qemu_st_helpers[16] = {
1396 [MO_UB] = helper_ret_stb_mmu,
1397 [MO_LEUW] = helper_le_stw_mmu,
1398 [MO_LEUL] = helper_le_stl_mmu,
1399 [MO_LEQ] = helper_le_stq_mmu,
1400 [MO_BEUW] = helper_be_stw_mmu,
1401 [MO_BEUL] = helper_be_stl_mmu,
1402 [MO_BEQ] = helper_be_stq_mmu,
1403 };
1404
1405 static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target)
1406 {
1407 ptrdiff_t offset = tcg_pcrel_diff(s, target);
1408 tcg_debug_assert(offset == sextract64(offset, 0, 21));
1409 tcg_out_insn(s, 3406, ADR, rd, offset);
1410 }
1411
1412 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1413 {
1414 TCGMemOpIdx oi = lb->oi;
1415 TCGMemOp opc = get_memop(oi);
1416 TCGMemOp size = opc & MO_SIZE;
1417
1418 reloc_pc19(lb->label_ptr[0], s->code_ptr);
1419
1420 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1421 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1422 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1423 tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1424 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1425 if (opc & MO_SIGN) {
1426 tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1427 } else {
1428 tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1429 }
1430
1431 tcg_out_goto(s, lb->raddr);
1432 }
1433
1434 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1435 {
1436 TCGMemOpIdx oi = lb->oi;
1437 TCGMemOp opc = get_memop(oi);
1438 TCGMemOp size = opc & MO_SIZE;
1439
1440 reloc_pc19(lb->label_ptr[0], s->code_ptr);
1441
1442 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1443 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1444 tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1445 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1446 tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1447 tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1448 tcg_out_goto(s, lb->raddr);
1449 }
1450
1451 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1452 TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1453 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1454 {
1455 TCGLabelQemuLdst *label = new_ldst_label(s);
1456
1457 label->is_ld = is_ld;
1458 label->oi = oi;
1459 label->type = ext;
1460 label->datalo_reg = data_reg;
1461 label->addrlo_reg = addr_reg;
1462 label->raddr = raddr;
1463 label->label_ptr[0] = label_ptr;
1464 }
1465
1466 /* Load and compare a TLB entry, emitting the conditional jump to the
1467 slow path for the failure case, which will be patched later when finalizing
1468 the slow path. Generated code returns the host addend in X1,
1469 clobbers X0,X2,X3,TMP. */
1470 static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc,
1471 tcg_insn_unit **label_ptr, int mem_index,
1472 bool is_read)
1473 {
1474 int tlb_offset = is_read ?
1475 offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
1476 : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
1477 unsigned a_bits = get_alignment_bits(opc);
1478 unsigned s_bits = opc & MO_SIZE;
1479 unsigned a_mask = (1u << a_bits) - 1;
1480 unsigned s_mask = (1u << s_bits) - 1;
1481 TCGReg base = TCG_AREG0, x3;
1482 uint64_t tlb_mask;
1483
1484 /* For aligned accesses, we check the first byte and include the alignment
1485 bits within the address. For unaligned access, we check that we don't
1486 cross pages using the address of the last byte of the access. */
1487 if (a_bits >= s_bits) {
1488 x3 = addr_reg;
1489 } else {
1490 tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1491 TCG_REG_X3, addr_reg, s_mask - a_mask);
1492 x3 = TCG_REG_X3;
1493 }
1494 tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1495
1496 /* Extract the TLB index from the address into X0.
1497 X0<CPU_TLB_BITS:0> =
1498 addr_reg<TARGET_PAGE_BITS+CPU_TLB_BITS:TARGET_PAGE_BITS> */
1499 tcg_out_ubfm(s, TARGET_LONG_BITS == 64, TCG_REG_X0, addr_reg,
1500 TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS);
1501
1502 /* Store the page mask part of the address into X3. */
1503 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1504 TCG_REG_X3, x3, tlb_mask);
1505
1506 /* Add any "high bits" from the tlb offset to the env address into X2,
1507 to take advantage of the LSL12 form of the ADDI instruction.
1508 X2 = env + (tlb_offset & 0xfff000) */
1509 if (tlb_offset & 0xfff000) {
1510 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_X2, base,
1511 tlb_offset & 0xfff000);
1512 base = TCG_REG_X2;
1513 }
1514
1515 /* Merge the tlb index contribution into X2.
1516 X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
1517 tcg_out_insn(s, 3502S, ADD_LSL, TCG_TYPE_I64, TCG_REG_X2, base,
1518 TCG_REG_X0, CPU_TLB_ENTRY_BITS);
1519
1520 /* Merge "low bits" from tlb offset, load the tlb comparator into X0.
1521 X0 = load [X2 + (tlb_offset & 0x000fff)] */
1522 tcg_out_ldst(s, TARGET_LONG_BITS == 32 ? I3312_LDRW : I3312_LDRX,
1523 TCG_REG_X0, TCG_REG_X2, tlb_offset & 0xfff,
1524 TARGET_LONG_BITS == 32 ? 2 : 3);
1525
1526 /* Load the tlb addend. Do that early to avoid stalling.
1527 X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */
1528 tcg_out_ldst(s, I3312_LDRX, TCG_REG_X1, TCG_REG_X2,
1529 (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) -
1530 (is_read ? offsetof(CPUTLBEntry, addr_read)
1531 : offsetof(CPUTLBEntry, addr_write)), 3);
1532
1533 /* Perform the address comparison. */
1534 tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0);
1535
1536 /* If not equal, we jump to the slow path. */
1537 *label_ptr = s->code_ptr;
1538 tcg_out_goto_cond_noaddr(s, TCG_COND_NE);
1539 }
1540
1541 #endif /* CONFIG_SOFTMMU */
1542
1543 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp memop, TCGType ext,
1544 TCGReg data_r, TCGReg addr_r,
1545 TCGType otype, TCGReg off_r)
1546 {
1547 const TCGMemOp bswap = memop & MO_BSWAP;
1548
1549 switch (memop & MO_SSIZE) {
1550 case MO_UB:
1551 tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1552 break;
1553 case MO_SB:
1554 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1555 data_r, addr_r, otype, off_r);
1556 break;
1557 case MO_UW:
1558 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1559 if (bswap) {
1560 tcg_out_rev16(s, data_r, data_r);
1561 }
1562 break;
1563 case MO_SW:
1564 if (bswap) {
1565 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1566 tcg_out_rev16(s, data_r, data_r);
1567 tcg_out_sxt(s, ext, MO_16, data_r, data_r);
1568 } else {
1569 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1570 data_r, addr_r, otype, off_r);
1571 }
1572 break;
1573 case MO_UL:
1574 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1575 if (bswap) {
1576 tcg_out_rev32(s, data_r, data_r);
1577 }
1578 break;
1579 case MO_SL:
1580 if (bswap) {
1581 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1582 tcg_out_rev32(s, data_r, data_r);
1583 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1584 } else {
1585 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1586 }
1587 break;
1588 case MO_Q:
1589 tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1590 if (bswap) {
1591 tcg_out_rev64(s, data_r, data_r);
1592 }
1593 break;
1594 default:
1595 tcg_abort();
1596 }
1597 }
1598
1599 static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp memop,
1600 TCGReg data_r, TCGReg addr_r,
1601 TCGType otype, TCGReg off_r)
1602 {
1603 const TCGMemOp bswap = memop & MO_BSWAP;
1604
1605 switch (memop & MO_SIZE) {
1606 case MO_8:
1607 tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1608 break;
1609 case MO_16:
1610 if (bswap && data_r != TCG_REG_XZR) {
1611 tcg_out_rev16(s, TCG_REG_TMP, data_r);
1612 data_r = TCG_REG_TMP;
1613 }
1614 tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1615 break;
1616 case MO_32:
1617 if (bswap && data_r != TCG_REG_XZR) {
1618 tcg_out_rev32(s, TCG_REG_TMP, data_r);
1619 data_r = TCG_REG_TMP;
1620 }
1621 tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1622 break;
1623 case MO_64:
1624 if (bswap && data_r != TCG_REG_XZR) {
1625 tcg_out_rev64(s, TCG_REG_TMP, data_r);
1626 data_r = TCG_REG_TMP;
1627 }
1628 tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1629 break;
1630 default:
1631 tcg_abort();
1632 }
1633 }
1634
1635 static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1636 TCGMemOpIdx oi, TCGType ext)
1637 {
1638 TCGMemOp memop = get_memop(oi);
1639 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1640 #ifdef CONFIG_SOFTMMU
1641 unsigned mem_index = get_mmuidx(oi);
1642 tcg_insn_unit *label_ptr;
1643
1644 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1645 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1646 TCG_REG_X1, otype, addr_reg);
1647 add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1648 s->code_ptr, label_ptr);
1649 #else /* !CONFIG_SOFTMMU */
1650 if (USE_GUEST_BASE) {
1651 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1652 TCG_REG_GUEST_BASE, otype, addr_reg);
1653 } else {
1654 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1655 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1656 }
1657 #endif /* CONFIG_SOFTMMU */
1658 }
1659
1660 static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1661 TCGMemOpIdx oi)
1662 {
1663 TCGMemOp memop = get_memop(oi);
1664 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1665 #ifdef CONFIG_SOFTMMU
1666 unsigned mem_index = get_mmuidx(oi);
1667 tcg_insn_unit *label_ptr;
1668
1669 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1670 tcg_out_qemu_st_direct(s, memop, data_reg,
1671 TCG_REG_X1, otype, addr_reg);
1672 add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1673 data_reg, addr_reg, s->code_ptr, label_ptr);
1674 #else /* !CONFIG_SOFTMMU */
1675 if (USE_GUEST_BASE) {
1676 tcg_out_qemu_st_direct(s, memop, data_reg,
1677 TCG_REG_GUEST_BASE, otype, addr_reg);
1678 } else {
1679 tcg_out_qemu_st_direct(s, memop, data_reg,
1680 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1681 }
1682 #endif /* CONFIG_SOFTMMU */
1683 }
1684
1685 static tcg_insn_unit *tb_ret_addr;
1686
1687 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1688 const TCGArg args[TCG_MAX_OP_ARGS],
1689 const int const_args[TCG_MAX_OP_ARGS])
1690 {
1691 /* 99% of the time, we can signal the use of extension registers
1692 by looking to see if the opcode handles 64-bit data. */
1693 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1694
1695 /* Hoist the loads of the most common arguments. */
1696 TCGArg a0 = args[0];
1697 TCGArg a1 = args[1];
1698 TCGArg a2 = args[2];
1699 int c2 = const_args[2];
1700
1701 /* Some operands are defined with "rZ" constraint, a register or
1702 the zero register. These need not actually test args[I] == 0. */
1703 #define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1704
1705 switch (opc) {
1706 case INDEX_op_exit_tb:
1707 /* Reuse the zeroing that exists for goto_ptr. */
1708 if (a0 == 0) {
1709 tcg_out_goto_long(s, s->code_gen_epilogue);
1710 } else {
1711 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1712 tcg_out_goto_long(s, tb_ret_addr);
1713 }
1714 break;
1715
1716 case INDEX_op_goto_tb:
1717 if (s->tb_jmp_insn_offset != NULL) {
1718 /* TCG_TARGET_HAS_direct_jump */
1719 /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
1720 write can be used to patch the target address. */
1721 if ((uintptr_t)s->code_ptr & 7) {
1722 tcg_out32(s, NOP);
1723 }
1724 s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1725 /* actual branch destination will be patched by
1726 tb_target_set_jmp_target later. */
1727 tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
1728 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
1729 } else {
1730 /* !TCG_TARGET_HAS_direct_jump */
1731 tcg_debug_assert(s->tb_jmp_target_addr != NULL);
1732 intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2;
1733 tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
1734 }
1735 tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1736 set_jmp_reset_offset(s, a0);
1737 break;
1738
1739 case INDEX_op_goto_ptr:
1740 tcg_out_insn(s, 3207, BR, a0);
1741 break;
1742
1743 case INDEX_op_br:
1744 tcg_out_goto_label(s, arg_label(a0));
1745 break;
1746
1747 case INDEX_op_ld8u_i32:
1748 case INDEX_op_ld8u_i64:
1749 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
1750 break;
1751 case INDEX_op_ld8s_i32:
1752 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
1753 break;
1754 case INDEX_op_ld8s_i64:
1755 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
1756 break;
1757 case INDEX_op_ld16u_i32:
1758 case INDEX_op_ld16u_i64:
1759 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
1760 break;
1761 case INDEX_op_ld16s_i32:
1762 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
1763 break;
1764 case INDEX_op_ld16s_i64:
1765 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
1766 break;
1767 case INDEX_op_ld_i32:
1768 case INDEX_op_ld32u_i64:
1769 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
1770 break;
1771 case INDEX_op_ld32s_i64:
1772 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
1773 break;
1774 case INDEX_op_ld_i64:
1775 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
1776 break;
1777
1778 case INDEX_op_st8_i32:
1779 case INDEX_op_st8_i64:
1780 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
1781 break;
1782 case INDEX_op_st16_i32:
1783 case INDEX_op_st16_i64:
1784 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
1785 break;
1786 case INDEX_op_st_i32:
1787 case INDEX_op_st32_i64:
1788 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
1789 break;
1790 case INDEX_op_st_i64:
1791 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
1792 break;
1793
1794 case INDEX_op_add_i32:
1795 a2 = (int32_t)a2;
1796 /* FALLTHRU */
1797 case INDEX_op_add_i64:
1798 if (c2) {
1799 tcg_out_addsubi(s, ext, a0, a1, a2);
1800 } else {
1801 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1802 }
1803 break;
1804
1805 case INDEX_op_sub_i32:
1806 a2 = (int32_t)a2;
1807 /* FALLTHRU */
1808 case INDEX_op_sub_i64:
1809 if (c2) {
1810 tcg_out_addsubi(s, ext, a0, a1, -a2);
1811 } else {
1812 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1813 }
1814 break;
1815
1816 case INDEX_op_neg_i64:
1817 case INDEX_op_neg_i32:
1818 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1819 break;
1820
1821 case INDEX_op_and_i32:
1822 a2 = (int32_t)a2;
1823 /* FALLTHRU */
1824 case INDEX_op_and_i64:
1825 if (c2) {
1826 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1827 } else {
1828 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1829 }
1830 break;
1831
1832 case INDEX_op_andc_i32:
1833 a2 = (int32_t)a2;
1834 /* FALLTHRU */
1835 case INDEX_op_andc_i64:
1836 if (c2) {
1837 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
1838 } else {
1839 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
1840 }
1841 break;
1842
1843 case INDEX_op_or_i32:
1844 a2 = (int32_t)a2;
1845 /* FALLTHRU */
1846 case INDEX_op_or_i64:
1847 if (c2) {
1848 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
1849 } else {
1850 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
1851 }
1852 break;
1853
1854 case INDEX_op_orc_i32:
1855 a2 = (int32_t)a2;
1856 /* FALLTHRU */
1857 case INDEX_op_orc_i64:
1858 if (c2) {
1859 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
1860 } else {
1861 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
1862 }
1863 break;
1864
1865 case INDEX_op_xor_i32:
1866 a2 = (int32_t)a2;
1867 /* FALLTHRU */
1868 case INDEX_op_xor_i64:
1869 if (c2) {
1870 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
1871 } else {
1872 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
1873 }
1874 break;
1875
1876 case INDEX_op_eqv_i32:
1877 a2 = (int32_t)a2;
1878 /* FALLTHRU */
1879 case INDEX_op_eqv_i64:
1880 if (c2) {
1881 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
1882 } else {
1883 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
1884 }
1885 break;
1886
1887 case INDEX_op_not_i64:
1888 case INDEX_op_not_i32:
1889 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
1890 break;
1891
1892 case INDEX_op_mul_i64:
1893 case INDEX_op_mul_i32:
1894 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
1895 break;
1896
1897 case INDEX_op_div_i64:
1898 case INDEX_op_div_i32:
1899 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
1900 break;
1901 case INDEX_op_divu_i64:
1902 case INDEX_op_divu_i32:
1903 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
1904 break;
1905
1906 case INDEX_op_rem_i64:
1907 case INDEX_op_rem_i32:
1908 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
1909 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1910 break;
1911 case INDEX_op_remu_i64:
1912 case INDEX_op_remu_i32:
1913 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
1914 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1915 break;
1916
1917 case INDEX_op_shl_i64:
1918 case INDEX_op_shl_i32:
1919 if (c2) {
1920 tcg_out_shl(s, ext, a0, a1, a2);
1921 } else {
1922 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
1923 }
1924 break;
1925
1926 case INDEX_op_shr_i64:
1927 case INDEX_op_shr_i32:
1928 if (c2) {
1929 tcg_out_shr(s, ext, a0, a1, a2);
1930 } else {
1931 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
1932 }
1933 break;
1934
1935 case INDEX_op_sar_i64:
1936 case INDEX_op_sar_i32:
1937 if (c2) {
1938 tcg_out_sar(s, ext, a0, a1, a2);
1939 } else {
1940 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
1941 }
1942 break;
1943
1944 case INDEX_op_rotr_i64:
1945 case INDEX_op_rotr_i32:
1946 if (c2) {
1947 tcg_out_rotr(s, ext, a0, a1, a2);
1948 } else {
1949 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
1950 }
1951 break;
1952
1953 case INDEX_op_rotl_i64:
1954 case INDEX_op_rotl_i32:
1955 if (c2) {
1956 tcg_out_rotl(s, ext, a0, a1, a2);
1957 } else {
1958 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
1959 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
1960 }
1961 break;
1962
1963 case INDEX_op_clz_i64:
1964 case INDEX_op_clz_i32:
1965 tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
1966 break;
1967 case INDEX_op_ctz_i64:
1968 case INDEX_op_ctz_i32:
1969 tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
1970 break;
1971
1972 case INDEX_op_brcond_i32:
1973 a1 = (int32_t)a1;
1974 /* FALLTHRU */
1975 case INDEX_op_brcond_i64:
1976 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
1977 break;
1978
1979 case INDEX_op_setcond_i32:
1980 a2 = (int32_t)a2;
1981 /* FALLTHRU */
1982 case INDEX_op_setcond_i64:
1983 tcg_out_cmp(s, ext, a1, a2, c2);
1984 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */
1985 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
1986 TCG_REG_XZR, tcg_invert_cond(args[3]));
1987 break;
1988
1989 case INDEX_op_movcond_i32:
1990 a2 = (int32_t)a2;
1991 /* FALLTHRU */
1992 case INDEX_op_movcond_i64:
1993 tcg_out_cmp(s, ext, a1, a2, c2);
1994 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
1995 break;
1996
1997 case INDEX_op_qemu_ld_i32:
1998 case INDEX_op_qemu_ld_i64:
1999 tcg_out_qemu_ld(s, a0, a1, a2, ext);
2000 break;
2001 case INDEX_op_qemu_st_i32:
2002 case INDEX_op_qemu_st_i64:
2003 tcg_out_qemu_st(s, REG0(0), a1, a2);
2004 break;
2005
2006 case INDEX_op_bswap64_i64:
2007 tcg_out_rev64(s, a0, a1);
2008 break;
2009 case INDEX_op_bswap32_i64:
2010 case INDEX_op_bswap32_i32:
2011 tcg_out_rev32(s, a0, a1);
2012 break;
2013 case INDEX_op_bswap16_i64:
2014 case INDEX_op_bswap16_i32:
2015 tcg_out_rev16(s, a0, a1);
2016 break;
2017
2018 case INDEX_op_ext8s_i64:
2019 case INDEX_op_ext8s_i32:
2020 tcg_out_sxt(s, ext, MO_8, a0, a1);
2021 break;
2022 case INDEX_op_ext16s_i64:
2023 case INDEX_op_ext16s_i32:
2024 tcg_out_sxt(s, ext, MO_16, a0, a1);
2025 break;
2026 case INDEX_op_ext_i32_i64:
2027 case INDEX_op_ext32s_i64:
2028 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
2029 break;
2030 case INDEX_op_ext8u_i64:
2031 case INDEX_op_ext8u_i32:
2032 tcg_out_uxt(s, MO_8, a0, a1);
2033 break;
2034 case INDEX_op_ext16u_i64:
2035 case INDEX_op_ext16u_i32:
2036 tcg_out_uxt(s, MO_16, a0, a1);
2037 break;
2038 case INDEX_op_extu_i32_i64:
2039 case INDEX_op_ext32u_i64:
2040 tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
2041 break;
2042
2043 case INDEX_op_deposit_i64:
2044 case INDEX_op_deposit_i32:
2045 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2046 break;
2047
2048 case INDEX_op_extract_i64:
2049 case INDEX_op_extract_i32:
2050 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2051 break;
2052
2053 case INDEX_op_sextract_i64:
2054 case INDEX_op_sextract_i32:
2055 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2056 break;
2057
2058 case INDEX_op_add2_i32:
2059 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2060 (int32_t)args[4], args[5], const_args[4],
2061 const_args[5], false);
2062 break;
2063 case INDEX_op_add2_i64:
2064 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2065 args[5], const_args[4], const_args[5], false);
2066 break;
2067 case INDEX_op_sub2_i32:
2068 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2069 (int32_t)args[4], args[5], const_args[4],
2070 const_args[5], true);
2071 break;
2072 case INDEX_op_sub2_i64:
2073 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2074 args[5], const_args[4], const_args[5], true);
2075 break;
2076
2077 case INDEX_op_muluh_i64:
2078 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2079 break;
2080 case INDEX_op_mulsh_i64:
2081 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2082 break;
2083
2084 case INDEX_op_mb:
2085 tcg_out_mb(s, a0);
2086 break;
2087
2088 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
2089 case INDEX_op_mov_i64:
2090 case INDEX_op_mov_vec:
2091 case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
2092 case INDEX_op_movi_i64:
2093 case INDEX_op_dupi_vec:
2094 case INDEX_op_call: /* Always emitted via tcg_out_call. */
2095 default:
2096 g_assert_not_reached();
2097 }
2098
2099 #undef REG0
2100 }
2101
2102 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2103 unsigned vecl, unsigned vece,
2104 const TCGArg *args, const int *const_args)
2105 {
2106 static const AArch64Insn cmp_insn[16] = {
2107 [TCG_COND_EQ] = I3616_CMEQ,
2108 [TCG_COND_GT] = I3616_CMGT,
2109 [TCG_COND_GE] = I3616_CMGE,
2110 [TCG_COND_GTU] = I3616_CMHI,
2111 [TCG_COND_GEU] = I3616_CMHS,
2112 };
2113 static const AArch64Insn cmp0_insn[16] = {
2114 [TCG_COND_EQ] = I3617_CMEQ0,
2115 [TCG_COND_GT] = I3617_CMGT0,
2116 [TCG_COND_GE] = I3617_CMGE0,
2117 [TCG_COND_LT] = I3617_CMLT0,
2118 [TCG_COND_LE] = I3617_CMLE0,
2119 };
2120
2121 TCGType type = vecl + TCG_TYPE_V64;
2122 unsigned is_q = vecl;
2123 TCGArg a0, a1, a2;
2124
2125 a0 = args[0];
2126 a1 = args[1];
2127 a2 = args[2];
2128
2129 switch (opc) {
2130 case INDEX_op_ld_vec:
2131 tcg_out_ld(s, type, a0, a1, a2);
2132 break;
2133 case INDEX_op_st_vec:
2134 tcg_out_st(s, type, a0, a1, a2);
2135 break;
2136 case INDEX_op_add_vec:
2137 tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2138 break;
2139 case INDEX_op_sub_vec:
2140 tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2141 break;
2142 case INDEX_op_mul_vec:
2143 tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2144 break;
2145 case INDEX_op_neg_vec:
2146 tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2147 break;
2148 case INDEX_op_and_vec:
2149 tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2150 break;
2151 case INDEX_op_or_vec:
2152 tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2153 break;
2154 case INDEX_op_xor_vec:
2155 tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2156 break;
2157 case INDEX_op_andc_vec:
2158 tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2159 break;
2160 case INDEX_op_orc_vec:
2161 tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2162 break;
2163 case INDEX_op_not_vec:
2164 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2165 break;
2166 case INDEX_op_dup_vec:
2167 tcg_out_insn(s, 3605, DUP, is_q, a0, a1, 1 << vece, 0);
2168 break;
2169 case INDEX_op_shli_vec:
2170 tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2171 break;
2172 case INDEX_op_shri_vec:
2173 tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2174 break;
2175 case INDEX_op_sari_vec:
2176 tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2177 break;
2178 case INDEX_op_cmp_vec:
2179 {
2180 TCGCond cond = args[3];
2181 AArch64Insn insn;
2182
2183 if (cond == TCG_COND_NE) {
2184 if (const_args[2]) {
2185 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2186 } else {
2187 tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2188 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2189 }
2190 } else {
2191 if (const_args[2]) {
2192 insn = cmp0_insn[cond];
2193 if (insn) {
2194 tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2195 break;
2196 }
2197 tcg_out_dupi_vec(s, type, TCG_VEC_TMP, 0);
2198 a2 = TCG_VEC_TMP;
2199 }
2200 insn = cmp_insn[cond];
2201 if (insn == 0) {
2202 TCGArg t;
2203 t = a1, a1 = a2, a2 = t;
2204 cond = tcg_swap_cond(cond);
2205 insn = cmp_insn[cond];
2206 tcg_debug_assert(insn != 0);
2207 }
2208 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2209 }
2210 }
2211 break;
2212 default:
2213 g_assert_not_reached();
2214 }
2215 }
2216
2217 int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2218 {
2219 switch (opc) {
2220 case INDEX_op_add_vec:
2221 case INDEX_op_sub_vec:
2222 case INDEX_op_mul_vec:
2223 case INDEX_op_and_vec:
2224 case INDEX_op_or_vec:
2225 case INDEX_op_xor_vec:
2226 case INDEX_op_andc_vec:
2227 case INDEX_op_orc_vec:
2228 case INDEX_op_neg_vec:
2229 case INDEX_op_not_vec:
2230 case INDEX_op_cmp_vec:
2231 case INDEX_op_shli_vec:
2232 case INDEX_op_shri_vec:
2233 case INDEX_op_sari_vec:
2234 return 1;
2235
2236 default:
2237 return 0;
2238 }
2239 }
2240
2241 void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2242 TCGArg a0, ...)
2243 {
2244 }
2245
2246 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
2247 {
2248 static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
2249 static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
2250 static const TCGTargetOpDef w_w = { .args_ct_str = { "w", "w" } };
2251 static const TCGTargetOpDef w_r = { .args_ct_str = { "w", "r" } };
2252 static const TCGTargetOpDef w_wr = { .args_ct_str = { "w", "wr" } };
2253 static const TCGTargetOpDef r_l = { .args_ct_str = { "r", "l" } };
2254 static const TCGTargetOpDef r_rA = { .args_ct_str = { "r", "rA" } };
2255 static const TCGTargetOpDef rZ_r = { .args_ct_str = { "rZ", "r" } };
2256 static const TCGTargetOpDef lZ_l = { .args_ct_str = { "lZ", "l" } };
2257 static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
2258 static const TCGTargetOpDef w_w_w = { .args_ct_str = { "w", "w", "w" } };
2259 static const TCGTargetOpDef w_w_wZ = { .args_ct_str = { "w", "w", "wZ" } };
2260 static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
2261 static const TCGTargetOpDef r_r_rA = { .args_ct_str = { "r", "r", "rA" } };
2262 static const TCGTargetOpDef r_r_rL = { .args_ct_str = { "r", "r", "rL" } };
2263 static const TCGTargetOpDef r_r_rAL
2264 = { .args_ct_str = { "r", "r", "rAL" } };
2265 static const TCGTargetOpDef dep
2266 = { .args_ct_str = { "r", "0", "rZ" } };
2267 static const TCGTargetOpDef movc
2268 = { .args_ct_str = { "r", "r", "rA", "rZ", "rZ" } };
2269 static const TCGTargetOpDef add2
2270 = { .args_ct_str = { "r", "r", "rZ", "rZ", "rA", "rMZ" } };
2271
2272 switch (op) {
2273 case INDEX_op_goto_ptr:
2274 return &r;
2275
2276 case INDEX_op_ld8u_i32:
2277 case INDEX_op_ld8s_i32:
2278 case INDEX_op_ld16u_i32:
2279 case INDEX_op_ld16s_i32:
2280 case INDEX_op_ld_i32:
2281 case INDEX_op_ld8u_i64:
2282 case INDEX_op_ld8s_i64:
2283 case INDEX_op_ld16u_i64:
2284 case INDEX_op_ld16s_i64:
2285 case INDEX_op_ld32u_i64:
2286 case INDEX_op_ld32s_i64:
2287 case INDEX_op_ld_i64:
2288 case INDEX_op_neg_i32:
2289 case INDEX_op_neg_i64:
2290 case INDEX_op_not_i32:
2291 case INDEX_op_not_i64:
2292 case INDEX_op_bswap16_i32:
2293 case INDEX_op_bswap32_i32:
2294 case INDEX_op_bswap16_i64:
2295 case INDEX_op_bswap32_i64:
2296 case INDEX_op_bswap64_i64:
2297 case INDEX_op_ext8s_i32:
2298 case INDEX_op_ext16s_i32:
2299 case INDEX_op_ext8u_i32:
2300 case INDEX_op_ext16u_i32:
2301 case INDEX_op_ext8s_i64:
2302 case INDEX_op_ext16s_i64:
2303 case INDEX_op_ext32s_i64:
2304 case INDEX_op_ext8u_i64:
2305 case INDEX_op_ext16u_i64:
2306 case INDEX_op_ext32u_i64:
2307 case INDEX_op_ext_i32_i64:
2308 case INDEX_op_extu_i32_i64:
2309 case INDEX_op_extract_i32:
2310 case INDEX_op_extract_i64:
2311 case INDEX_op_sextract_i32:
2312 case INDEX_op_sextract_i64:
2313 return &r_r;
2314
2315 case INDEX_op_st8_i32:
2316 case INDEX_op_st16_i32:
2317 case INDEX_op_st_i32:
2318 case INDEX_op_st8_i64:
2319 case INDEX_op_st16_i64:
2320 case INDEX_op_st32_i64:
2321 case INDEX_op_st_i64:
2322 return &rZ_r;
2323
2324 case INDEX_op_add_i32:
2325 case INDEX_op_add_i64:
2326 case INDEX_op_sub_i32:
2327 case INDEX_op_sub_i64:
2328 case INDEX_op_setcond_i32:
2329 case INDEX_op_setcond_i64:
2330 return &r_r_rA;
2331
2332 case INDEX_op_mul_i32:
2333 case INDEX_op_mul_i64:
2334 case INDEX_op_div_i32:
2335 case INDEX_op_div_i64:
2336 case INDEX_op_divu_i32:
2337 case INDEX_op_divu_i64:
2338 case INDEX_op_rem_i32:
2339 case INDEX_op_rem_i64:
2340 case INDEX_op_remu_i32:
2341 case INDEX_op_remu_i64:
2342 case INDEX_op_muluh_i64:
2343 case INDEX_op_mulsh_i64:
2344 return &r_r_r;
2345
2346 case INDEX_op_and_i32:
2347 case INDEX_op_and_i64:
2348 case INDEX_op_or_i32:
2349 case INDEX_op_or_i64:
2350 case INDEX_op_xor_i32:
2351 case INDEX_op_xor_i64:
2352 case INDEX_op_andc_i32:
2353 case INDEX_op_andc_i64:
2354 case INDEX_op_orc_i32:
2355 case INDEX_op_orc_i64:
2356 case INDEX_op_eqv_i32:
2357 case INDEX_op_eqv_i64:
2358 return &r_r_rL;
2359
2360 case INDEX_op_shl_i32:
2361 case INDEX_op_shr_i32:
2362 case INDEX_op_sar_i32:
2363 case INDEX_op_rotl_i32:
2364 case INDEX_op_rotr_i32:
2365 case INDEX_op_shl_i64:
2366 case INDEX_op_shr_i64:
2367 case INDEX_op_sar_i64:
2368 case INDEX_op_rotl_i64:
2369 case INDEX_op_rotr_i64:
2370 return &r_r_ri;
2371
2372 case INDEX_op_clz_i32:
2373 case INDEX_op_ctz_i32:
2374 case INDEX_op_clz_i64:
2375 case INDEX_op_ctz_i64:
2376 return &r_r_rAL;
2377
2378 case INDEX_op_brcond_i32:
2379 case INDEX_op_brcond_i64:
2380 return &r_rA;
2381
2382 case INDEX_op_movcond_i32:
2383 case INDEX_op_movcond_i64:
2384 return &movc;
2385
2386 case INDEX_op_qemu_ld_i32:
2387 case INDEX_op_qemu_ld_i64:
2388 return &r_l;
2389 case INDEX_op_qemu_st_i32:
2390 case INDEX_op_qemu_st_i64:
2391 return &lZ_l;
2392
2393 case INDEX_op_deposit_i32:
2394 case INDEX_op_deposit_i64:
2395 return &dep;
2396
2397 case INDEX_op_add2_i32:
2398 case INDEX_op_add2_i64:
2399 case INDEX_op_sub2_i32:
2400 case INDEX_op_sub2_i64:
2401 return &add2;
2402
2403 case INDEX_op_add_vec:
2404 case INDEX_op_sub_vec:
2405 case INDEX_op_mul_vec:
2406 case INDEX_op_and_vec:
2407 case INDEX_op_or_vec:
2408 case INDEX_op_xor_vec:
2409 case INDEX_op_andc_vec:
2410 case INDEX_op_orc_vec:
2411 return &w_w_w;
2412 case INDEX_op_not_vec:
2413 case INDEX_op_neg_vec:
2414 case INDEX_op_shli_vec:
2415 case INDEX_op_shri_vec:
2416 case INDEX_op_sari_vec:
2417 return &w_w;
2418 case INDEX_op_ld_vec:
2419 case INDEX_op_st_vec:
2420 return &w_r;
2421 case INDEX_op_dup_vec:
2422 return &w_wr;
2423 case INDEX_op_cmp_vec:
2424 return &w_w_wZ;
2425
2426 default:
2427 return NULL;
2428 }
2429 }
2430
2431 static void tcg_target_init(TCGContext *s)
2432 {
2433 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2434 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
2435 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2436 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
2437
2438 tcg_target_call_clobber_regs = -1ull;
2439 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2440 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2441 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2442 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2443 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2444 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2445 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2446 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2447 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2448 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2449 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
2450 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2451 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2452 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2453 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2454 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2455 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2456 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2457 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
2458
2459 s->reserved_regs = 0;
2460 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2461 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2462 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2463 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
2464 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
2465 }
2466
2467 /* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
2468 #define PUSH_SIZE ((30 - 19 + 1) * 8)
2469
2470 #define FRAME_SIZE \
2471 ((PUSH_SIZE \
2472 + TCG_STATIC_CALL_ARGS_SIZE \
2473 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2474 + TCG_TARGET_STACK_ALIGN - 1) \
2475 & ~(TCG_TARGET_STACK_ALIGN - 1))
2476
2477 /* We're expecting a 2 byte uleb128 encoded value. */
2478 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2479
2480 /* We're expecting to use a single ADDI insn. */
2481 QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
2482
2483 static void tcg_target_qemu_prologue(TCGContext *s)
2484 {
2485 TCGReg r;
2486
2487 /* Push (FP, LR) and allocate space for all saved registers. */
2488 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
2489 TCG_REG_SP, -PUSH_SIZE, 1, 1);
2490
2491 /* Set up frame pointer for canonical unwinding. */
2492 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
2493
2494 /* Store callee-preserved regs x19..x28. */
2495 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2496 int ofs = (r - TCG_REG_X19 + 2) * 8;
2497 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2498 }
2499
2500 /* Make stack space for TCG locals. */
2501 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2502 FRAME_SIZE - PUSH_SIZE);
2503
2504 /* Inform TCG about how to find TCG locals with register, offset, size. */
2505 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
2506 CPU_TEMP_BUF_NLONGS * sizeof(long));
2507
2508 #if !defined(CONFIG_SOFTMMU)
2509 if (USE_GUEST_BASE) {
2510 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
2511 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
2512 }
2513 #endif
2514
2515 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2516 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
2517
2518 /*
2519 * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2520 * and fall through to the rest of the epilogue.
2521 */
2522 s->code_gen_epilogue = s->code_ptr;
2523 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
2524
2525 /* TB epilogue */
2526 tb_ret_addr = s->code_ptr;
2527
2528 /* Remove TCG locals stack space. */
2529 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2530 FRAME_SIZE - PUSH_SIZE);
2531
2532 /* Restore registers x19..x28. */
2533 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2534 int ofs = (r - TCG_REG_X19 + 2) * 8;
2535 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2536 }
2537
2538 /* Pop (FP, LR), restore SP to previous frame. */
2539 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
2540 TCG_REG_SP, PUSH_SIZE, 0, 1);
2541 tcg_out_insn(s, 3207, RET, TCG_REG_LR);
2542 }
2543
2544 static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2545 {
2546 int i;
2547 for (i = 0; i < count; ++i) {
2548 p[i] = NOP;
2549 }
2550 }
2551
2552 typedef struct {
2553 DebugFrameHeader h;
2554 uint8_t fde_def_cfa[4];
2555 uint8_t fde_reg_ofs[24];
2556 } DebugFrame;
2557
2558 #define ELF_HOST_MACHINE EM_AARCH64
2559
2560 static const DebugFrame debug_frame = {
2561 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2562 .h.cie.id = -1,
2563 .h.cie.version = 1,
2564 .h.cie.code_align = 1,
2565 .h.cie.data_align = 0x78, /* sleb128 -8 */
2566 .h.cie.return_column = TCG_REG_LR,
2567
2568 /* Total FDE size does not include the "len" member. */
2569 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2570
2571 .fde_def_cfa = {
2572 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */
2573 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2574 (FRAME_SIZE >> 7)
2575 },
2576 .fde_reg_ofs = {
2577 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */
2578 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */
2579 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */
2580 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */
2581 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */
2582 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */
2583 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */
2584 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */
2585 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */
2586 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */
2587 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */
2588 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */
2589 }
2590 };
2591
2592 void tcg_register_jit(void *buf, size_t buf_size)
2593 {
2594 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2595 }