]> git.proxmox.com Git - mirror_qemu.git/blob - tcg/aarch64/tcg-target.c.inc
tcg/aarch64: Split out target constraints to tcg-target-con-str.h
[mirror_qemu.git] / tcg / aarch64 / tcg-target.c.inc
1 /*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13 #include "../tcg-pool.c.inc"
14 #include "qemu/bitops.h"
15
16 /* We're going to re-use TCGType in setting of the SF bit, which controls
17 the size of the operation performed. If we know the values match, it
18 makes things much cleaner. */
19 QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
20
21 #ifdef CONFIG_DEBUG_TCG
22 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
23 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
24 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
25 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
26 "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
27
28 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
29 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
30 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
31 "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
32 };
33 #endif /* CONFIG_DEBUG_TCG */
34
35 static const int tcg_target_reg_alloc_order[] = {
36 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
37 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
38 TCG_REG_X28, /* we will reserve this for guest_base if configured */
39
40 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
41 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
42 TCG_REG_X16, TCG_REG_X17,
43
44 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
45 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
46
47 /* X18 reserved by system */
48 /* X19 reserved for AREG0 */
49 /* X29 reserved as fp */
50 /* X30 reserved as temporary */
51
52 TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
53 TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
54 /* V8 - V15 are call-saved, and skipped. */
55 TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
56 TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
57 TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
58 TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
59 };
60
61 static const int tcg_target_call_iarg_regs[8] = {
62 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
63 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
64 };
65 static const int tcg_target_call_oarg_regs[1] = {
66 TCG_REG_X0
67 };
68
69 #define TCG_REG_TMP TCG_REG_X30
70 #define TCG_VEC_TMP TCG_REG_V31
71
72 #ifndef CONFIG_SOFTMMU
73 /* Note that XZR cannot be encoded in the address base register slot,
74 as that actaully encodes SP. So if we need to zero-extend the guest
75 address, via the address index register slot, we need to load even
76 a zero guest base into a register. */
77 #define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32)
78 #define TCG_REG_GUEST_BASE TCG_REG_X28
79 #endif
80
81 static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
82 {
83 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
84 ptrdiff_t offset = target - src_rx;
85
86 if (offset == sextract64(offset, 0, 26)) {
87 /* read instruction, mask away previous PC_REL26 parameter contents,
88 set the proper offset, then write back the instruction. */
89 *src_rw = deposit32(*src_rw, 0, 26, offset);
90 return true;
91 }
92 return false;
93 }
94
95 static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
96 {
97 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
98 ptrdiff_t offset = target - src_rx;
99
100 if (offset == sextract64(offset, 0, 19)) {
101 *src_rw = deposit32(*src_rw, 5, 19, offset);
102 return true;
103 }
104 return false;
105 }
106
107 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
108 intptr_t value, intptr_t addend)
109 {
110 tcg_debug_assert(addend == 0);
111 switch (type) {
112 case R_AARCH64_JUMP26:
113 case R_AARCH64_CALL26:
114 return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
115 case R_AARCH64_CONDBR19:
116 return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
117 default:
118 g_assert_not_reached();
119 }
120 }
121
122 #define TCG_CT_CONST_AIMM 0x100
123 #define TCG_CT_CONST_LIMM 0x200
124 #define TCG_CT_CONST_ZERO 0x400
125 #define TCG_CT_CONST_MONE 0x800
126 #define TCG_CT_CONST_ORRI 0x1000
127 #define TCG_CT_CONST_ANDI 0x2000
128
129 #define ALL_GENERAL_REGS 0xffffffffu
130 #define ALL_VECTOR_REGS 0xffffffff00000000ull
131
132 #ifdef CONFIG_SOFTMMU
133 #define ALL_QLDST_REGS \
134 (ALL_GENERAL_REGS & ~((1 << TCG_REG_X0) | (1 << TCG_REG_X1) | \
135 (1 << TCG_REG_X2) | (1 << TCG_REG_X3)))
136 #else
137 #define ALL_QLDST_REGS ALL_GENERAL_REGS
138 #endif
139
140 /* Match a constant valid for addition (12-bit, optionally shifted). */
141 static inline bool is_aimm(uint64_t val)
142 {
143 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
144 }
145
146 /* Match a constant valid for logical operations. */
147 static inline bool is_limm(uint64_t val)
148 {
149 /* Taking a simplified view of the logical immediates for now, ignoring
150 the replication that can happen across the field. Match bit patterns
151 of the forms
152 0....01....1
153 0..01..10..0
154 and their inverses. */
155
156 /* Make things easier below, by testing the form with msb clear. */
157 if ((int64_t)val < 0) {
158 val = ~val;
159 }
160 if (val == 0) {
161 return false;
162 }
163 val += val & -val;
164 return (val & (val - 1)) == 0;
165 }
166
167 /* Return true if v16 is a valid 16-bit shifted immediate. */
168 static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
169 {
170 if (v16 == (v16 & 0xff)) {
171 *cmode = 0x8;
172 *imm8 = v16 & 0xff;
173 return true;
174 } else if (v16 == (v16 & 0xff00)) {
175 *cmode = 0xa;
176 *imm8 = v16 >> 8;
177 return true;
178 }
179 return false;
180 }
181
182 /* Return true if v32 is a valid 32-bit shifted immediate. */
183 static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
184 {
185 if (v32 == (v32 & 0xff)) {
186 *cmode = 0x0;
187 *imm8 = v32 & 0xff;
188 return true;
189 } else if (v32 == (v32 & 0xff00)) {
190 *cmode = 0x2;
191 *imm8 = (v32 >> 8) & 0xff;
192 return true;
193 } else if (v32 == (v32 & 0xff0000)) {
194 *cmode = 0x4;
195 *imm8 = (v32 >> 16) & 0xff;
196 return true;
197 } else if (v32 == (v32 & 0xff000000)) {
198 *cmode = 0x6;
199 *imm8 = v32 >> 24;
200 return true;
201 }
202 return false;
203 }
204
205 /* Return true if v32 is a valid 32-bit shifting ones immediate. */
206 static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
207 {
208 if ((v32 & 0xffff00ff) == 0xff) {
209 *cmode = 0xc;
210 *imm8 = (v32 >> 8) & 0xff;
211 return true;
212 } else if ((v32 & 0xff00ffff) == 0xffff) {
213 *cmode = 0xd;
214 *imm8 = (v32 >> 16) & 0xff;
215 return true;
216 }
217 return false;
218 }
219
220 /* Return true if v32 is a valid float32 immediate. */
221 static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
222 {
223 if (extract32(v32, 0, 19) == 0
224 && (extract32(v32, 25, 6) == 0x20
225 || extract32(v32, 25, 6) == 0x1f)) {
226 *cmode = 0xf;
227 *imm8 = (extract32(v32, 31, 1) << 7)
228 | (extract32(v32, 25, 1) << 6)
229 | extract32(v32, 19, 6);
230 return true;
231 }
232 return false;
233 }
234
235 /* Return true if v64 is a valid float64 immediate. */
236 static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
237 {
238 if (extract64(v64, 0, 48) == 0
239 && (extract64(v64, 54, 9) == 0x100
240 || extract64(v64, 54, 9) == 0x0ff)) {
241 *cmode = 0xf;
242 *imm8 = (extract64(v64, 63, 1) << 7)
243 | (extract64(v64, 54, 1) << 6)
244 | extract64(v64, 48, 6);
245 return true;
246 }
247 return false;
248 }
249
250 /*
251 * Return non-zero if v32 can be formed by MOVI+ORR.
252 * Place the parameters for MOVI in (cmode, imm8).
253 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
254 */
255 static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
256 {
257 int i;
258
259 for (i = 6; i > 0; i -= 2) {
260 /* Mask out one byte we can add with ORR. */
261 uint32_t tmp = v32 & ~(0xffu << (i * 4));
262 if (is_shimm32(tmp, cmode, imm8) ||
263 is_soimm32(tmp, cmode, imm8)) {
264 break;
265 }
266 }
267 return i;
268 }
269
270 /* Return true if V is a valid 16-bit or 32-bit shifted immediate. */
271 static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
272 {
273 if (v32 == deposit32(v32, 16, 16, v32)) {
274 return is_shimm16(v32, cmode, imm8);
275 } else {
276 return is_shimm32(v32, cmode, imm8);
277 }
278 }
279
280 static int tcg_target_const_match(tcg_target_long val, TCGType type,
281 const TCGArgConstraint *arg_ct)
282 {
283 int ct = arg_ct->ct;
284
285 if (ct & TCG_CT_CONST) {
286 return 1;
287 }
288 if (type == TCG_TYPE_I32) {
289 val = (int32_t)val;
290 }
291 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
292 return 1;
293 }
294 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
295 return 1;
296 }
297 if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
298 return 1;
299 }
300 if ((ct & TCG_CT_CONST_MONE) && val == -1) {
301 return 1;
302 }
303
304 switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
305 case 0:
306 break;
307 case TCG_CT_CONST_ANDI:
308 val = ~val;
309 /* fallthru */
310 case TCG_CT_CONST_ORRI:
311 if (val == deposit64(val, 32, 32, val)) {
312 int cmode, imm8;
313 return is_shimm1632(val, &cmode, &imm8);
314 }
315 break;
316 default:
317 /* Both bits should not be set for the same insn. */
318 g_assert_not_reached();
319 }
320
321 return 0;
322 }
323
324 enum aarch64_cond_code {
325 COND_EQ = 0x0,
326 COND_NE = 0x1,
327 COND_CS = 0x2, /* Unsigned greater or equal */
328 COND_HS = COND_CS, /* ALIAS greater or equal */
329 COND_CC = 0x3, /* Unsigned less than */
330 COND_LO = COND_CC, /* ALIAS Lower */
331 COND_MI = 0x4, /* Negative */
332 COND_PL = 0x5, /* Zero or greater */
333 COND_VS = 0x6, /* Overflow */
334 COND_VC = 0x7, /* No overflow */
335 COND_HI = 0x8, /* Unsigned greater than */
336 COND_LS = 0x9, /* Unsigned less or equal */
337 COND_GE = 0xa,
338 COND_LT = 0xb,
339 COND_GT = 0xc,
340 COND_LE = 0xd,
341 COND_AL = 0xe,
342 COND_NV = 0xf, /* behaves like COND_AL here */
343 };
344
345 static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
346 [TCG_COND_EQ] = COND_EQ,
347 [TCG_COND_NE] = COND_NE,
348 [TCG_COND_LT] = COND_LT,
349 [TCG_COND_GE] = COND_GE,
350 [TCG_COND_LE] = COND_LE,
351 [TCG_COND_GT] = COND_GT,
352 /* unsigned */
353 [TCG_COND_LTU] = COND_LO,
354 [TCG_COND_GTU] = COND_HI,
355 [TCG_COND_GEU] = COND_HS,
356 [TCG_COND_LEU] = COND_LS,
357 };
358
359 typedef enum {
360 LDST_ST = 0, /* store */
361 LDST_LD = 1, /* load */
362 LDST_LD_S_X = 2, /* load and sign-extend into Xt */
363 LDST_LD_S_W = 3, /* load and sign-extend into Wt */
364 } AArch64LdstType;
365
366 /* We encode the format of the insn into the beginning of the name, so that
367 we can have the preprocessor help "typecheck" the insn vs the output
368 function. Arm didn't provide us with nice names for the formats, so we
369 use the section number of the architecture reference manual in which the
370 instruction group is described. */
371 typedef enum {
372 /* Compare and branch (immediate). */
373 I3201_CBZ = 0x34000000,
374 I3201_CBNZ = 0x35000000,
375
376 /* Conditional branch (immediate). */
377 I3202_B_C = 0x54000000,
378
379 /* Unconditional branch (immediate). */
380 I3206_B = 0x14000000,
381 I3206_BL = 0x94000000,
382
383 /* Unconditional branch (register). */
384 I3207_BR = 0xd61f0000,
385 I3207_BLR = 0xd63f0000,
386 I3207_RET = 0xd65f0000,
387
388 /* AdvSIMD load/store single structure. */
389 I3303_LD1R = 0x0d40c000,
390
391 /* Load literal for loading the address at pc-relative offset */
392 I3305_LDR = 0x58000000,
393 I3305_LDR_v64 = 0x5c000000,
394 I3305_LDR_v128 = 0x9c000000,
395
396 /* Load/store register. Described here as 3.3.12, but the helper
397 that emits them can transform to 3.3.10 or 3.3.13. */
398 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
399 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
400 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
401 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
402
403 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
404 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
405 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
406 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
407
408 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
409 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
410
411 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
412 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
413 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
414
415 I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
416 I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
417
418 I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
419 I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
420
421 I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30,
422 I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30,
423
424 I3312_TO_I3310 = 0x00200800,
425 I3312_TO_I3313 = 0x01000000,
426
427 /* Load/store register pair instructions. */
428 I3314_LDP = 0x28400000,
429 I3314_STP = 0x28000000,
430
431 /* Add/subtract immediate instructions. */
432 I3401_ADDI = 0x11000000,
433 I3401_ADDSI = 0x31000000,
434 I3401_SUBI = 0x51000000,
435 I3401_SUBSI = 0x71000000,
436
437 /* Bitfield instructions. */
438 I3402_BFM = 0x33000000,
439 I3402_SBFM = 0x13000000,
440 I3402_UBFM = 0x53000000,
441
442 /* Extract instruction. */
443 I3403_EXTR = 0x13800000,
444
445 /* Logical immediate instructions. */
446 I3404_ANDI = 0x12000000,
447 I3404_ORRI = 0x32000000,
448 I3404_EORI = 0x52000000,
449
450 /* Move wide immediate instructions. */
451 I3405_MOVN = 0x12800000,
452 I3405_MOVZ = 0x52800000,
453 I3405_MOVK = 0x72800000,
454
455 /* PC relative addressing instructions. */
456 I3406_ADR = 0x10000000,
457 I3406_ADRP = 0x90000000,
458
459 /* Add/subtract shifted register instructions (without a shift). */
460 I3502_ADD = 0x0b000000,
461 I3502_ADDS = 0x2b000000,
462 I3502_SUB = 0x4b000000,
463 I3502_SUBS = 0x6b000000,
464
465 /* Add/subtract shifted register instructions (with a shift). */
466 I3502S_ADD_LSL = I3502_ADD,
467
468 /* Add/subtract with carry instructions. */
469 I3503_ADC = 0x1a000000,
470 I3503_SBC = 0x5a000000,
471
472 /* Conditional select instructions. */
473 I3506_CSEL = 0x1a800000,
474 I3506_CSINC = 0x1a800400,
475 I3506_CSINV = 0x5a800000,
476 I3506_CSNEG = 0x5a800400,
477
478 /* Data-processing (1 source) instructions. */
479 I3507_CLZ = 0x5ac01000,
480 I3507_RBIT = 0x5ac00000,
481 I3507_REV16 = 0x5ac00400,
482 I3507_REV32 = 0x5ac00800,
483 I3507_REV64 = 0x5ac00c00,
484
485 /* Data-processing (2 source) instructions. */
486 I3508_LSLV = 0x1ac02000,
487 I3508_LSRV = 0x1ac02400,
488 I3508_ASRV = 0x1ac02800,
489 I3508_RORV = 0x1ac02c00,
490 I3508_SMULH = 0x9b407c00,
491 I3508_UMULH = 0x9bc07c00,
492 I3508_UDIV = 0x1ac00800,
493 I3508_SDIV = 0x1ac00c00,
494
495 /* Data-processing (3 source) instructions. */
496 I3509_MADD = 0x1b000000,
497 I3509_MSUB = 0x1b008000,
498
499 /* Logical shifted register instructions (without a shift). */
500 I3510_AND = 0x0a000000,
501 I3510_BIC = 0x0a200000,
502 I3510_ORR = 0x2a000000,
503 I3510_ORN = 0x2a200000,
504 I3510_EOR = 0x4a000000,
505 I3510_EON = 0x4a200000,
506 I3510_ANDS = 0x6a000000,
507
508 /* Logical shifted register instructions (with a shift). */
509 I3502S_AND_LSR = I3510_AND | (1 << 22),
510
511 /* AdvSIMD copy */
512 I3605_DUP = 0x0e000400,
513 I3605_INS = 0x4e001c00,
514 I3605_UMOV = 0x0e003c00,
515
516 /* AdvSIMD modified immediate */
517 I3606_MOVI = 0x0f000400,
518 I3606_MVNI = 0x2f000400,
519 I3606_BIC = 0x2f001400,
520 I3606_ORR = 0x0f001400,
521
522 /* AdvSIMD shift by immediate */
523 I3614_SSHR = 0x0f000400,
524 I3614_SSRA = 0x0f001400,
525 I3614_SHL = 0x0f005400,
526 I3614_SLI = 0x2f005400,
527 I3614_USHR = 0x2f000400,
528 I3614_USRA = 0x2f001400,
529
530 /* AdvSIMD three same. */
531 I3616_ADD = 0x0e208400,
532 I3616_AND = 0x0e201c00,
533 I3616_BIC = 0x0e601c00,
534 I3616_BIF = 0x2ee01c00,
535 I3616_BIT = 0x2ea01c00,
536 I3616_BSL = 0x2e601c00,
537 I3616_EOR = 0x2e201c00,
538 I3616_MUL = 0x0e209c00,
539 I3616_ORR = 0x0ea01c00,
540 I3616_ORN = 0x0ee01c00,
541 I3616_SUB = 0x2e208400,
542 I3616_CMGT = 0x0e203400,
543 I3616_CMGE = 0x0e203c00,
544 I3616_CMTST = 0x0e208c00,
545 I3616_CMHI = 0x2e203400,
546 I3616_CMHS = 0x2e203c00,
547 I3616_CMEQ = 0x2e208c00,
548 I3616_SMAX = 0x0e206400,
549 I3616_SMIN = 0x0e206c00,
550 I3616_SSHL = 0x0e204400,
551 I3616_SQADD = 0x0e200c00,
552 I3616_SQSUB = 0x0e202c00,
553 I3616_UMAX = 0x2e206400,
554 I3616_UMIN = 0x2e206c00,
555 I3616_UQADD = 0x2e200c00,
556 I3616_UQSUB = 0x2e202c00,
557 I3616_USHL = 0x2e204400,
558
559 /* AdvSIMD two-reg misc. */
560 I3617_CMGT0 = 0x0e208800,
561 I3617_CMEQ0 = 0x0e209800,
562 I3617_CMLT0 = 0x0e20a800,
563 I3617_CMGE0 = 0x2e208800,
564 I3617_CMLE0 = 0x2e20a800,
565 I3617_NOT = 0x2e205800,
566 I3617_ABS = 0x0e20b800,
567 I3617_NEG = 0x2e20b800,
568
569 /* System instructions. */
570 NOP = 0xd503201f,
571 DMB_ISH = 0xd50338bf,
572 DMB_LD = 0x00000100,
573 DMB_ST = 0x00000200,
574 } AArch64Insn;
575
576 static inline uint32_t tcg_in32(TCGContext *s)
577 {
578 uint32_t v = *(uint32_t *)s->code_ptr;
579 return v;
580 }
581
582 /* Emit an opcode with "type-checking" of the format. */
583 #define tcg_out_insn(S, FMT, OP, ...) \
584 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
585
586 static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
587 TCGReg rt, TCGReg rn, unsigned size)
588 {
589 tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
590 }
591
592 static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
593 int imm19, TCGReg rt)
594 {
595 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
596 }
597
598 static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
599 TCGReg rt, int imm19)
600 {
601 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
602 }
603
604 static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
605 TCGCond c, int imm19)
606 {
607 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
608 }
609
610 static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
611 {
612 tcg_out32(s, insn | (imm26 & 0x03ffffff));
613 }
614
615 static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
616 {
617 tcg_out32(s, insn | rn << 5);
618 }
619
620 static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
621 TCGReg r1, TCGReg r2, TCGReg rn,
622 tcg_target_long ofs, bool pre, bool w)
623 {
624 insn |= 1u << 31; /* ext */
625 insn |= pre << 24;
626 insn |= w << 23;
627
628 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
629 insn |= (ofs & (0x7f << 3)) << (15 - 3);
630
631 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
632 }
633
634 static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
635 TCGReg rd, TCGReg rn, uint64_t aimm)
636 {
637 if (aimm > 0xfff) {
638 tcg_debug_assert((aimm & 0xfff) == 0);
639 aimm >>= 12;
640 tcg_debug_assert(aimm <= 0xfff);
641 aimm |= 1 << 12; /* apply LSL 12 */
642 }
643 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
644 }
645
646 /* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
647 (Logical immediate). Both insn groups have N, IMMR and IMMS fields
648 that feed the DecodeBitMasks pseudo function. */
649 static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
650 TCGReg rd, TCGReg rn, int n, int immr, int imms)
651 {
652 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
653 | rn << 5 | rd);
654 }
655
656 #define tcg_out_insn_3404 tcg_out_insn_3402
657
658 static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
659 TCGReg rd, TCGReg rn, TCGReg rm, int imms)
660 {
661 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
662 | rn << 5 | rd);
663 }
664
665 /* This function is used for the Move (wide immediate) instruction group.
666 Note that SHIFT is a full shift count, not the 2 bit HW field. */
667 static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
668 TCGReg rd, uint16_t half, unsigned shift)
669 {
670 tcg_debug_assert((shift & ~0x30) == 0);
671 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
672 }
673
674 static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
675 TCGReg rd, int64_t disp)
676 {
677 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
678 }
679
680 /* This function is for both 3.5.2 (Add/Subtract shifted register), for
681 the rare occasion when we actually want to supply a shift amount. */
682 static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
683 TCGType ext, TCGReg rd, TCGReg rn,
684 TCGReg rm, int imm6)
685 {
686 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
687 }
688
689 /* This function is for 3.5.2 (Add/subtract shifted register),
690 and 3.5.10 (Logical shifted register), for the vast majorty of cases
691 when we don't want to apply a shift. Thus it can also be used for
692 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */
693 static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
694 TCGReg rd, TCGReg rn, TCGReg rm)
695 {
696 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
697 }
698
699 #define tcg_out_insn_3503 tcg_out_insn_3502
700 #define tcg_out_insn_3508 tcg_out_insn_3502
701 #define tcg_out_insn_3510 tcg_out_insn_3502
702
703 static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
704 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
705 {
706 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
707 | tcg_cond_to_aarch64[c] << 12);
708 }
709
710 static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
711 TCGReg rd, TCGReg rn)
712 {
713 tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
714 }
715
716 static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
717 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
718 {
719 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
720 }
721
722 static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
723 TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
724 {
725 /* Note that bit 11 set means general register input. Therefore
726 we can handle both register sets with one function. */
727 tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
728 | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
729 }
730
731 static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
732 TCGReg rd, bool op, int cmode, uint8_t imm8)
733 {
734 tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
735 | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
736 }
737
738 static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
739 TCGReg rd, TCGReg rn, unsigned immhb)
740 {
741 tcg_out32(s, insn | q << 30 | immhb << 16
742 | (rn & 0x1f) << 5 | (rd & 0x1f));
743 }
744
745 static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
746 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
747 {
748 tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
749 | (rn & 0x1f) << 5 | (rd & 0x1f));
750 }
751
752 static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
753 unsigned size, TCGReg rd, TCGReg rn)
754 {
755 tcg_out32(s, insn | q << 30 | (size << 22)
756 | (rn & 0x1f) << 5 | (rd & 0x1f));
757 }
758
759 static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
760 TCGReg rd, TCGReg base, TCGType ext,
761 TCGReg regoff)
762 {
763 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
764 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
765 0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
766 }
767
768 static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
769 TCGReg rd, TCGReg rn, intptr_t offset)
770 {
771 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
772 }
773
774 static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
775 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
776 {
777 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
778 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
779 | rn << 5 | (rd & 0x1f));
780 }
781
782 /* Register to register move using ORR (shifted register with no shift). */
783 static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
784 {
785 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
786 }
787
788 /* Register to register move using ADDI (move to/from SP). */
789 static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
790 {
791 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
792 }
793
794 /* This function is used for the Logical (immediate) instruction group.
795 The value of LIMM must satisfy IS_LIMM. See the comment above about
796 only supporting simplified logical immediates. */
797 static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
798 TCGReg rd, TCGReg rn, uint64_t limm)
799 {
800 unsigned h, l, r, c;
801
802 tcg_debug_assert(is_limm(limm));
803
804 h = clz64(limm);
805 l = ctz64(limm);
806 if (l == 0) {
807 r = 0; /* form 0....01....1 */
808 c = ctz64(~limm) - 1;
809 if (h == 0) {
810 r = clz64(~limm); /* form 1..10..01..1 */
811 c += r;
812 }
813 } else {
814 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */
815 c = r - h - 1;
816 }
817 if (ext == TCG_TYPE_I32) {
818 r &= 31;
819 c &= 31;
820 }
821
822 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
823 }
824
825 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
826 TCGReg rd, int64_t v64)
827 {
828 bool q = type == TCG_TYPE_V128;
829 int cmode, imm8, i;
830
831 /* Test all bytes equal first. */
832 if (vece == MO_8) {
833 imm8 = (uint8_t)v64;
834 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
835 return;
836 }
837
838 /*
839 * Test all bytes 0x00 or 0xff second. This can match cases that
840 * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
841 */
842 for (i = imm8 = 0; i < 8; i++) {
843 uint8_t byte = v64 >> (i * 8);
844 if (byte == 0xff) {
845 imm8 |= 1 << i;
846 } else if (byte != 0) {
847 goto fail_bytes;
848 }
849 }
850 tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
851 return;
852 fail_bytes:
853
854 /*
855 * Tests for various replications. For each element width, if we
856 * cannot find an expansion there's no point checking a larger
857 * width because we already know by replication it cannot match.
858 */
859 if (vece == MO_16) {
860 uint16_t v16 = v64;
861
862 if (is_shimm16(v16, &cmode, &imm8)) {
863 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
864 return;
865 }
866 if (is_shimm16(~v16, &cmode, &imm8)) {
867 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
868 return;
869 }
870
871 /*
872 * Otherwise, all remaining constants can be loaded in two insns:
873 * rd = v16 & 0xff, rd |= v16 & 0xff00.
874 */
875 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
876 tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
877 return;
878 } else if (vece == MO_32) {
879 uint32_t v32 = v64;
880 uint32_t n32 = ~v32;
881
882 if (is_shimm32(v32, &cmode, &imm8) ||
883 is_soimm32(v32, &cmode, &imm8) ||
884 is_fimm32(v32, &cmode, &imm8)) {
885 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
886 return;
887 }
888 if (is_shimm32(n32, &cmode, &imm8) ||
889 is_soimm32(n32, &cmode, &imm8)) {
890 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
891 return;
892 }
893
894 /*
895 * Restrict the set of constants to those we can load with
896 * two instructions. Others we load from the pool.
897 */
898 i = is_shimm32_pair(v32, &cmode, &imm8);
899 if (i) {
900 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
901 tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
902 return;
903 }
904 i = is_shimm32_pair(n32, &cmode, &imm8);
905 if (i) {
906 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
907 tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
908 return;
909 }
910 } else if (is_fimm64(v64, &cmode, &imm8)) {
911 tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
912 return;
913 }
914
915 /*
916 * As a last resort, load from the constant pool. Sadly there
917 * is no LD1R (literal), so store the full 16-byte vector.
918 */
919 if (type == TCG_TYPE_V128) {
920 new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
921 tcg_out_insn(s, 3305, LDR_v128, 0, rd);
922 } else {
923 new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
924 tcg_out_insn(s, 3305, LDR_v64, 0, rd);
925 }
926 }
927
928 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
929 TCGReg rd, TCGReg rs)
930 {
931 int is_q = type - TCG_TYPE_V64;
932 tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
933 return true;
934 }
935
936 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
937 TCGReg r, TCGReg base, intptr_t offset)
938 {
939 TCGReg temp = TCG_REG_TMP;
940
941 if (offset < -0xffffff || offset > 0xffffff) {
942 tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
943 tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
944 base = temp;
945 } else {
946 AArch64Insn add_insn = I3401_ADDI;
947
948 if (offset < 0) {
949 add_insn = I3401_SUBI;
950 offset = -offset;
951 }
952 if (offset & 0xfff000) {
953 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
954 base = temp;
955 }
956 if (offset & 0xfff) {
957 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
958 base = temp;
959 }
960 }
961 tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
962 return true;
963 }
964
965 static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
966 tcg_target_long value)
967 {
968 tcg_target_long svalue = value;
969 tcg_target_long ivalue = ~value;
970 tcg_target_long t0, t1, t2;
971 int s0, s1;
972 AArch64Insn opc;
973
974 switch (type) {
975 case TCG_TYPE_I32:
976 case TCG_TYPE_I64:
977 tcg_debug_assert(rd < 32);
978 break;
979 default:
980 g_assert_not_reached();
981 }
982
983 /* For 32-bit values, discard potential garbage in value. For 64-bit
984 values within [2**31, 2**32-1], we can create smaller sequences by
985 interpreting this as a negative 32-bit number, while ensuring that
986 the high 32 bits are cleared by setting SF=0. */
987 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
988 svalue = (int32_t)value;
989 value = (uint32_t)value;
990 ivalue = (uint32_t)ivalue;
991 type = TCG_TYPE_I32;
992 }
993
994 /* Speed things up by handling the common case of small positive
995 and negative values specially. */
996 if ((value & ~0xffffull) == 0) {
997 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
998 return;
999 } else if ((ivalue & ~0xffffull) == 0) {
1000 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1001 return;
1002 }
1003
1004 /* Check for bitfield immediates. For the benefit of 32-bit quantities,
1005 use the sign-extended value. That lets us match rotated values such
1006 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1007 if (is_limm(svalue)) {
1008 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1009 return;
1010 }
1011
1012 /* Look for host pointer values within 4G of the PC. This happens
1013 often when loading pointers to QEMU's own data structures. */
1014 if (type == TCG_TYPE_I64) {
1015 intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
1016 tcg_target_long disp = value - src_rx;
1017 if (disp == sextract64(disp, 0, 21)) {
1018 tcg_out_insn(s, 3406, ADR, rd, disp);
1019 return;
1020 }
1021 disp = (value >> 12) - (src_rx >> 12);
1022 if (disp == sextract64(disp, 0, 21)) {
1023 tcg_out_insn(s, 3406, ADRP, rd, disp);
1024 if (value & 0xfff) {
1025 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1026 }
1027 return;
1028 }
1029 }
1030
1031 /* Would it take fewer insns to begin with MOVN? */
1032 if (ctpop64(value) >= 32) {
1033 t0 = ivalue;
1034 opc = I3405_MOVN;
1035 } else {
1036 t0 = value;
1037 opc = I3405_MOVZ;
1038 }
1039 s0 = ctz64(t0) & (63 & -16);
1040 t1 = t0 & ~(0xffffUL << s0);
1041 s1 = ctz64(t1) & (63 & -16);
1042 t2 = t1 & ~(0xffffUL << s1);
1043 if (t2 == 0) {
1044 tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1045 if (t1 != 0) {
1046 tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1047 }
1048 return;
1049 }
1050
1051 /* For more than 2 insns, dump it into the constant pool. */
1052 new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1053 tcg_out_insn(s, 3305, LDR, 0, rd);
1054 }
1055
1056 /* Define something more legible for general use. */
1057 #define tcg_out_ldst_r tcg_out_insn_3310
1058
1059 static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1060 TCGReg rn, intptr_t offset, int lgsize)
1061 {
1062 /* If the offset is naturally aligned and in range, then we can
1063 use the scaled uimm12 encoding */
1064 if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1065 uintptr_t scaled_uimm = offset >> lgsize;
1066 if (scaled_uimm <= 0xfff) {
1067 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1068 return;
1069 }
1070 }
1071
1072 /* Small signed offsets can use the unscaled encoding. */
1073 if (offset >= -256 && offset < 256) {
1074 tcg_out_insn_3312(s, insn, rd, rn, offset);
1075 return;
1076 }
1077
1078 /* Worst-case scenario, move offset to temp register, use reg offset. */
1079 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
1080 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
1081 }
1082
1083 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1084 {
1085 if (ret == arg) {
1086 return true;
1087 }
1088 switch (type) {
1089 case TCG_TYPE_I32:
1090 case TCG_TYPE_I64:
1091 if (ret < 32 && arg < 32) {
1092 tcg_out_movr(s, type, ret, arg);
1093 break;
1094 } else if (ret < 32) {
1095 tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1096 break;
1097 } else if (arg < 32) {
1098 tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1099 break;
1100 }
1101 /* FALLTHRU */
1102
1103 case TCG_TYPE_V64:
1104 tcg_debug_assert(ret >= 32 && arg >= 32);
1105 tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1106 break;
1107 case TCG_TYPE_V128:
1108 tcg_debug_assert(ret >= 32 && arg >= 32);
1109 tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1110 break;
1111
1112 default:
1113 g_assert_not_reached();
1114 }
1115 return true;
1116 }
1117
1118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1119 TCGReg base, intptr_t ofs)
1120 {
1121 AArch64Insn insn;
1122 int lgsz;
1123
1124 switch (type) {
1125 case TCG_TYPE_I32:
1126 insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1127 lgsz = 2;
1128 break;
1129 case TCG_TYPE_I64:
1130 insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1131 lgsz = 3;
1132 break;
1133 case TCG_TYPE_V64:
1134 insn = I3312_LDRVD;
1135 lgsz = 3;
1136 break;
1137 case TCG_TYPE_V128:
1138 insn = I3312_LDRVQ;
1139 lgsz = 4;
1140 break;
1141 default:
1142 g_assert_not_reached();
1143 }
1144 tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1145 }
1146
1147 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1148 TCGReg base, intptr_t ofs)
1149 {
1150 AArch64Insn insn;
1151 int lgsz;
1152
1153 switch (type) {
1154 case TCG_TYPE_I32:
1155 insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1156 lgsz = 2;
1157 break;
1158 case TCG_TYPE_I64:
1159 insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1160 lgsz = 3;
1161 break;
1162 case TCG_TYPE_V64:
1163 insn = I3312_STRVD;
1164 lgsz = 3;
1165 break;
1166 case TCG_TYPE_V128:
1167 insn = I3312_STRVQ;
1168 lgsz = 4;
1169 break;
1170 default:
1171 g_assert_not_reached();
1172 }
1173 tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1174 }
1175
1176 static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1177 TCGReg base, intptr_t ofs)
1178 {
1179 if (type <= TCG_TYPE_I64 && val == 0) {
1180 tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1181 return true;
1182 }
1183 return false;
1184 }
1185
1186 static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1187 TCGReg rn, unsigned int a, unsigned int b)
1188 {
1189 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1190 }
1191
1192 static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1193 TCGReg rn, unsigned int a, unsigned int b)
1194 {
1195 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1196 }
1197
1198 static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1199 TCGReg rn, unsigned int a, unsigned int b)
1200 {
1201 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1202 }
1203
1204 static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1205 TCGReg rn, TCGReg rm, unsigned int a)
1206 {
1207 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1208 }
1209
1210 static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1211 TCGReg rd, TCGReg rn, unsigned int m)
1212 {
1213 int bits = ext ? 64 : 32;
1214 int max = bits - 1;
1215 tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
1216 }
1217
1218 static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1219 TCGReg rd, TCGReg rn, unsigned int m)
1220 {
1221 int max = ext ? 63 : 31;
1222 tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1223 }
1224
1225 static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1226 TCGReg rd, TCGReg rn, unsigned int m)
1227 {
1228 int max = ext ? 63 : 31;
1229 tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1230 }
1231
1232 static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1233 TCGReg rd, TCGReg rn, unsigned int m)
1234 {
1235 int max = ext ? 63 : 31;
1236 tcg_out_extr(s, ext, rd, rn, rn, m & max);
1237 }
1238
1239 static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1240 TCGReg rd, TCGReg rn, unsigned int m)
1241 {
1242 int bits = ext ? 64 : 32;
1243 int max = bits - 1;
1244 tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
1245 }
1246
1247 static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1248 TCGReg rn, unsigned lsb, unsigned width)
1249 {
1250 unsigned size = ext ? 64 : 32;
1251 unsigned a = (size - lsb) & (size - 1);
1252 unsigned b = width - 1;
1253 tcg_out_bfm(s, ext, rd, rn, a, b);
1254 }
1255
1256 static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1257 tcg_target_long b, bool const_b)
1258 {
1259 if (const_b) {
1260 /* Using CMP or CMN aliases. */
1261 if (b >= 0) {
1262 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1263 } else {
1264 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1265 }
1266 } else {
1267 /* Using CMP alias SUBS wzr, Wn, Wm */
1268 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1269 }
1270 }
1271
1272 static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1273 {
1274 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1275 tcg_debug_assert(offset == sextract64(offset, 0, 26));
1276 tcg_out_insn(s, 3206, B, offset);
1277 }
1278
1279 static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target)
1280 {
1281 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1282 if (offset == sextract64(offset, 0, 26)) {
1283 tcg_out_insn(s, 3206, B, offset);
1284 } else {
1285 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1286 tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1287 }
1288 }
1289
1290 static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
1291 {
1292 tcg_out_insn(s, 3207, BLR, reg);
1293 }
1294
1295 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target)
1296 {
1297 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1298 if (offset == sextract64(offset, 0, 26)) {
1299 tcg_out_insn(s, 3206, BL, offset);
1300 } else {
1301 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1302 tcg_out_callr(s, TCG_REG_TMP);
1303 }
1304 }
1305
1306 void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
1307 uintptr_t jmp_rw, uintptr_t addr)
1308 {
1309 tcg_insn_unit i1, i2;
1310 TCGType rt = TCG_TYPE_I64;
1311 TCGReg rd = TCG_REG_TMP;
1312 uint64_t pair;
1313
1314 ptrdiff_t offset = addr - jmp_rx;
1315
1316 if (offset == sextract64(offset, 0, 26)) {
1317 i1 = I3206_B | ((offset >> 2) & 0x3ffffff);
1318 i2 = NOP;
1319 } else {
1320 offset = (addr >> 12) - (jmp_rx >> 12);
1321
1322 /* patch ADRP */
1323 i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd;
1324 /* patch ADDI */
1325 i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd;
1326 }
1327 pair = (uint64_t)i2 << 32 | i1;
1328 qatomic_set((uint64_t *)jmp_rw, pair);
1329 flush_idcache_range(jmp_rx, jmp_rw, 8);
1330 }
1331
1332 static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1333 {
1334 if (!l->has_value) {
1335 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1336 tcg_out_insn(s, 3206, B, 0);
1337 } else {
1338 tcg_out_goto(s, l->u.value_ptr);
1339 }
1340 }
1341
1342 static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1343 TCGArg b, bool b_const, TCGLabel *l)
1344 {
1345 intptr_t offset;
1346 bool need_cmp;
1347
1348 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1349 need_cmp = false;
1350 } else {
1351 need_cmp = true;
1352 tcg_out_cmp(s, ext, a, b, b_const);
1353 }
1354
1355 if (!l->has_value) {
1356 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1357 offset = tcg_in32(s) >> 5;
1358 } else {
1359 offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2;
1360 tcg_debug_assert(offset == sextract64(offset, 0, 19));
1361 }
1362
1363 if (need_cmp) {
1364 tcg_out_insn(s, 3202, B_C, c, offset);
1365 } else if (c == TCG_COND_EQ) {
1366 tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1367 } else {
1368 tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1369 }
1370 }
1371
1372 static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
1373 {
1374 tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
1375 }
1376
1377 static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
1378 {
1379 tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
1380 }
1381
1382 static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
1383 {
1384 tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
1385 }
1386
1387 static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
1388 TCGReg rd, TCGReg rn)
1389 {
1390 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1391 int bits = (8 << s_bits) - 1;
1392 tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1393 }
1394
1395 static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
1396 TCGReg rd, TCGReg rn)
1397 {
1398 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1399 int bits = (8 << s_bits) - 1;
1400 tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1401 }
1402
1403 static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1404 TCGReg rn, int64_t aimm)
1405 {
1406 if (aimm >= 0) {
1407 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1408 } else {
1409 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1410 }
1411 }
1412
1413 static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1414 TCGReg rh, TCGReg al, TCGReg ah,
1415 tcg_target_long bl, tcg_target_long bh,
1416 bool const_bl, bool const_bh, bool sub)
1417 {
1418 TCGReg orig_rl = rl;
1419 AArch64Insn insn;
1420
1421 if (rl == ah || (!const_bh && rl == bh)) {
1422 rl = TCG_REG_TMP;
1423 }
1424
1425 if (const_bl) {
1426 insn = I3401_ADDSI;
1427 if ((bl < 0) ^ sub) {
1428 insn = I3401_SUBSI;
1429 bl = -bl;
1430 }
1431 if (unlikely(al == TCG_REG_XZR)) {
1432 /* ??? We want to allow al to be zero for the benefit of
1433 negation via subtraction. However, that leaves open the
1434 possibility of adding 0+const in the low part, and the
1435 immediate add instructions encode XSP not XZR. Don't try
1436 anything more elaborate here than loading another zero. */
1437 al = TCG_REG_TMP;
1438 tcg_out_movi(s, ext, al, 0);
1439 }
1440 tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1441 } else {
1442 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1443 }
1444
1445 insn = I3503_ADC;
1446 if (const_bh) {
1447 /* Note that the only two constants we support are 0 and -1, and
1448 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
1449 if ((bh != 0) ^ sub) {
1450 insn = I3503_SBC;
1451 }
1452 bh = TCG_REG_XZR;
1453 } else if (sub) {
1454 insn = I3503_SBC;
1455 }
1456 tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1457
1458 tcg_out_mov(s, ext, orig_rl, rl);
1459 }
1460
1461 static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1462 {
1463 static const uint32_t sync[] = {
1464 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST,
1465 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST,
1466 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1467 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD,
1468 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1469 };
1470 tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1471 }
1472
1473 static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1474 TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1475 {
1476 TCGReg a1 = a0;
1477 if (is_ctz) {
1478 a1 = TCG_REG_TMP;
1479 tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1480 }
1481 if (const_b && b == (ext ? 64 : 32)) {
1482 tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1483 } else {
1484 AArch64Insn sel = I3506_CSEL;
1485
1486 tcg_out_cmp(s, ext, a0, 0, 1);
1487 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1488
1489 if (const_b) {
1490 if (b == -1) {
1491 b = TCG_REG_XZR;
1492 sel = I3506_CSINV;
1493 } else if (b == 0) {
1494 b = TCG_REG_XZR;
1495 } else {
1496 tcg_out_movi(s, ext, d, b);
1497 b = d;
1498 }
1499 }
1500 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1501 }
1502 }
1503
1504 #ifdef CONFIG_SOFTMMU
1505 #include "../tcg-ldst.c.inc"
1506
1507 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1508 * TCGMemOpIdx oi, uintptr_t ra)
1509 */
1510 static void * const qemu_ld_helpers[16] = {
1511 [MO_UB] = helper_ret_ldub_mmu,
1512 [MO_LEUW] = helper_le_lduw_mmu,
1513 [MO_LEUL] = helper_le_ldul_mmu,
1514 [MO_LEQ] = helper_le_ldq_mmu,
1515 [MO_BEUW] = helper_be_lduw_mmu,
1516 [MO_BEUL] = helper_be_ldul_mmu,
1517 [MO_BEQ] = helper_be_ldq_mmu,
1518 };
1519
1520 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1521 * uintxx_t val, TCGMemOpIdx oi,
1522 * uintptr_t ra)
1523 */
1524 static void * const qemu_st_helpers[16] = {
1525 [MO_UB] = helper_ret_stb_mmu,
1526 [MO_LEUW] = helper_le_stw_mmu,
1527 [MO_LEUL] = helper_le_stl_mmu,
1528 [MO_LEQ] = helper_le_stq_mmu,
1529 [MO_BEUW] = helper_be_stw_mmu,
1530 [MO_BEUL] = helper_be_stl_mmu,
1531 [MO_BEQ] = helper_be_stq_mmu,
1532 };
1533
1534 static inline void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
1535 {
1536 ptrdiff_t offset = tcg_pcrel_diff(s, target);
1537 tcg_debug_assert(offset == sextract64(offset, 0, 21));
1538 tcg_out_insn(s, 3406, ADR, rd, offset);
1539 }
1540
1541 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1542 {
1543 TCGMemOpIdx oi = lb->oi;
1544 MemOp opc = get_memop(oi);
1545 MemOp size = opc & MO_SIZE;
1546
1547 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1548 return false;
1549 }
1550
1551 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1552 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1553 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1554 tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1555 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1556 if (opc & MO_SIGN) {
1557 tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1558 } else {
1559 tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1560 }
1561
1562 tcg_out_goto(s, lb->raddr);
1563 return true;
1564 }
1565
1566 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1567 {
1568 TCGMemOpIdx oi = lb->oi;
1569 MemOp opc = get_memop(oi);
1570 MemOp size = opc & MO_SIZE;
1571
1572 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1573 return false;
1574 }
1575
1576 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1577 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1578 tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1579 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1580 tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1581 tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1582 tcg_out_goto(s, lb->raddr);
1583 return true;
1584 }
1585
1586 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1587 TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1588 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1589 {
1590 TCGLabelQemuLdst *label = new_ldst_label(s);
1591
1592 label->is_ld = is_ld;
1593 label->oi = oi;
1594 label->type = ext;
1595 label->datalo_reg = data_reg;
1596 label->addrlo_reg = addr_reg;
1597 label->raddr = tcg_splitwx_to_rx(raddr);
1598 label->label_ptr[0] = label_ptr;
1599 }
1600
1601 /* We expect to use a 7-bit scaled negative offset from ENV. */
1602 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1603 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
1604
1605 /* These offsets are built into the LDP below. */
1606 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1607 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1608
1609 /* Load and compare a TLB entry, emitting the conditional jump to the
1610 slow path for the failure case, which will be patched later when finalizing
1611 the slow path. Generated code returns the host addend in X1,
1612 clobbers X0,X2,X3,TMP. */
1613 static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
1614 tcg_insn_unit **label_ptr, int mem_index,
1615 bool is_read)
1616 {
1617 unsigned a_bits = get_alignment_bits(opc);
1618 unsigned s_bits = opc & MO_SIZE;
1619 unsigned a_mask = (1u << a_bits) - 1;
1620 unsigned s_mask = (1u << s_bits) - 1;
1621 TCGReg x3;
1622 TCGType mask_type;
1623 uint64_t compare_mask;
1624
1625 mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
1626 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1627
1628 /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */
1629 tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
1630 TLB_MASK_TABLE_OFS(mem_index), 1, 0);
1631
1632 /* Extract the TLB index from the address into X0. */
1633 tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1634 TCG_REG_X0, TCG_REG_X0, addr_reg,
1635 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1636
1637 /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */
1638 tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
1639
1640 /* Load the tlb comparator into X0, and the fast path addend into X1. */
1641 tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read
1642 ? offsetof(CPUTLBEntry, addr_read)
1643 : offsetof(CPUTLBEntry, addr_write));
1644 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
1645 offsetof(CPUTLBEntry, addend));
1646
1647 /* For aligned accesses, we check the first byte and include the alignment
1648 bits within the address. For unaligned access, we check that we don't
1649 cross pages using the address of the last byte of the access. */
1650 if (a_bits >= s_bits) {
1651 x3 = addr_reg;
1652 } else {
1653 tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1654 TCG_REG_X3, addr_reg, s_mask - a_mask);
1655 x3 = TCG_REG_X3;
1656 }
1657 compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1658
1659 /* Store the page mask part of the address into X3. */
1660 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1661 TCG_REG_X3, x3, compare_mask);
1662
1663 /* Perform the address comparison. */
1664 tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
1665
1666 /* If not equal, we jump to the slow path. */
1667 *label_ptr = s->code_ptr;
1668 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1669 }
1670
1671 #endif /* CONFIG_SOFTMMU */
1672
1673 static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
1674 TCGReg data_r, TCGReg addr_r,
1675 TCGType otype, TCGReg off_r)
1676 {
1677 const MemOp bswap = memop & MO_BSWAP;
1678
1679 switch (memop & MO_SSIZE) {
1680 case MO_UB:
1681 tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1682 break;
1683 case MO_SB:
1684 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1685 data_r, addr_r, otype, off_r);
1686 break;
1687 case MO_UW:
1688 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1689 if (bswap) {
1690 tcg_out_rev16(s, data_r, data_r);
1691 }
1692 break;
1693 case MO_SW:
1694 if (bswap) {
1695 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1696 tcg_out_rev16(s, data_r, data_r);
1697 tcg_out_sxt(s, ext, MO_16, data_r, data_r);
1698 } else {
1699 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1700 data_r, addr_r, otype, off_r);
1701 }
1702 break;
1703 case MO_UL:
1704 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1705 if (bswap) {
1706 tcg_out_rev32(s, data_r, data_r);
1707 }
1708 break;
1709 case MO_SL:
1710 if (bswap) {
1711 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1712 tcg_out_rev32(s, data_r, data_r);
1713 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1714 } else {
1715 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1716 }
1717 break;
1718 case MO_Q:
1719 tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1720 if (bswap) {
1721 tcg_out_rev64(s, data_r, data_r);
1722 }
1723 break;
1724 default:
1725 tcg_abort();
1726 }
1727 }
1728
1729 static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
1730 TCGReg data_r, TCGReg addr_r,
1731 TCGType otype, TCGReg off_r)
1732 {
1733 const MemOp bswap = memop & MO_BSWAP;
1734
1735 switch (memop & MO_SIZE) {
1736 case MO_8:
1737 tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1738 break;
1739 case MO_16:
1740 if (bswap && data_r != TCG_REG_XZR) {
1741 tcg_out_rev16(s, TCG_REG_TMP, data_r);
1742 data_r = TCG_REG_TMP;
1743 }
1744 tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1745 break;
1746 case MO_32:
1747 if (bswap && data_r != TCG_REG_XZR) {
1748 tcg_out_rev32(s, TCG_REG_TMP, data_r);
1749 data_r = TCG_REG_TMP;
1750 }
1751 tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1752 break;
1753 case MO_64:
1754 if (bswap && data_r != TCG_REG_XZR) {
1755 tcg_out_rev64(s, TCG_REG_TMP, data_r);
1756 data_r = TCG_REG_TMP;
1757 }
1758 tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1759 break;
1760 default:
1761 tcg_abort();
1762 }
1763 }
1764
1765 static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1766 TCGMemOpIdx oi, TCGType ext)
1767 {
1768 MemOp memop = get_memop(oi);
1769 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1770 #ifdef CONFIG_SOFTMMU
1771 unsigned mem_index = get_mmuidx(oi);
1772 tcg_insn_unit *label_ptr;
1773
1774 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1775 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1776 TCG_REG_X1, otype, addr_reg);
1777 add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1778 s->code_ptr, label_ptr);
1779 #else /* !CONFIG_SOFTMMU */
1780 if (USE_GUEST_BASE) {
1781 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1782 TCG_REG_GUEST_BASE, otype, addr_reg);
1783 } else {
1784 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1785 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1786 }
1787 #endif /* CONFIG_SOFTMMU */
1788 }
1789
1790 static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1791 TCGMemOpIdx oi)
1792 {
1793 MemOp memop = get_memop(oi);
1794 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1795 #ifdef CONFIG_SOFTMMU
1796 unsigned mem_index = get_mmuidx(oi);
1797 tcg_insn_unit *label_ptr;
1798
1799 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1800 tcg_out_qemu_st_direct(s, memop, data_reg,
1801 TCG_REG_X1, otype, addr_reg);
1802 add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1803 data_reg, addr_reg, s->code_ptr, label_ptr);
1804 #else /* !CONFIG_SOFTMMU */
1805 if (USE_GUEST_BASE) {
1806 tcg_out_qemu_st_direct(s, memop, data_reg,
1807 TCG_REG_GUEST_BASE, otype, addr_reg);
1808 } else {
1809 tcg_out_qemu_st_direct(s, memop, data_reg,
1810 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1811 }
1812 #endif /* CONFIG_SOFTMMU */
1813 }
1814
1815 static const tcg_insn_unit *tb_ret_addr;
1816
1817 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1818 const TCGArg args[TCG_MAX_OP_ARGS],
1819 const int const_args[TCG_MAX_OP_ARGS])
1820 {
1821 /* 99% of the time, we can signal the use of extension registers
1822 by looking to see if the opcode handles 64-bit data. */
1823 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1824
1825 /* Hoist the loads of the most common arguments. */
1826 TCGArg a0 = args[0];
1827 TCGArg a1 = args[1];
1828 TCGArg a2 = args[2];
1829 int c2 = const_args[2];
1830
1831 /* Some operands are defined with "rZ" constraint, a register or
1832 the zero register. These need not actually test args[I] == 0. */
1833 #define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1834
1835 switch (opc) {
1836 case INDEX_op_exit_tb:
1837 /* Reuse the zeroing that exists for goto_ptr. */
1838 if (a0 == 0) {
1839 tcg_out_goto_long(s, tcg_code_gen_epilogue);
1840 } else {
1841 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1842 tcg_out_goto_long(s, tb_ret_addr);
1843 }
1844 break;
1845
1846 case INDEX_op_goto_tb:
1847 if (s->tb_jmp_insn_offset != NULL) {
1848 /* TCG_TARGET_HAS_direct_jump */
1849 /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
1850 write can be used to patch the target address. */
1851 if ((uintptr_t)s->code_ptr & 7) {
1852 tcg_out32(s, NOP);
1853 }
1854 s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1855 /* actual branch destination will be patched by
1856 tb_target_set_jmp_target later. */
1857 tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
1858 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
1859 } else {
1860 /* !TCG_TARGET_HAS_direct_jump */
1861 tcg_debug_assert(s->tb_jmp_target_addr != NULL);
1862 intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2;
1863 tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
1864 }
1865 tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1866 set_jmp_reset_offset(s, a0);
1867 break;
1868
1869 case INDEX_op_goto_ptr:
1870 tcg_out_insn(s, 3207, BR, a0);
1871 break;
1872
1873 case INDEX_op_br:
1874 tcg_out_goto_label(s, arg_label(a0));
1875 break;
1876
1877 case INDEX_op_ld8u_i32:
1878 case INDEX_op_ld8u_i64:
1879 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
1880 break;
1881 case INDEX_op_ld8s_i32:
1882 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
1883 break;
1884 case INDEX_op_ld8s_i64:
1885 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
1886 break;
1887 case INDEX_op_ld16u_i32:
1888 case INDEX_op_ld16u_i64:
1889 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
1890 break;
1891 case INDEX_op_ld16s_i32:
1892 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
1893 break;
1894 case INDEX_op_ld16s_i64:
1895 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
1896 break;
1897 case INDEX_op_ld_i32:
1898 case INDEX_op_ld32u_i64:
1899 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
1900 break;
1901 case INDEX_op_ld32s_i64:
1902 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
1903 break;
1904 case INDEX_op_ld_i64:
1905 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
1906 break;
1907
1908 case INDEX_op_st8_i32:
1909 case INDEX_op_st8_i64:
1910 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
1911 break;
1912 case INDEX_op_st16_i32:
1913 case INDEX_op_st16_i64:
1914 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
1915 break;
1916 case INDEX_op_st_i32:
1917 case INDEX_op_st32_i64:
1918 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
1919 break;
1920 case INDEX_op_st_i64:
1921 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
1922 break;
1923
1924 case INDEX_op_add_i32:
1925 a2 = (int32_t)a2;
1926 /* FALLTHRU */
1927 case INDEX_op_add_i64:
1928 if (c2) {
1929 tcg_out_addsubi(s, ext, a0, a1, a2);
1930 } else {
1931 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1932 }
1933 break;
1934
1935 case INDEX_op_sub_i32:
1936 a2 = (int32_t)a2;
1937 /* FALLTHRU */
1938 case INDEX_op_sub_i64:
1939 if (c2) {
1940 tcg_out_addsubi(s, ext, a0, a1, -a2);
1941 } else {
1942 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1943 }
1944 break;
1945
1946 case INDEX_op_neg_i64:
1947 case INDEX_op_neg_i32:
1948 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1949 break;
1950
1951 case INDEX_op_and_i32:
1952 a2 = (int32_t)a2;
1953 /* FALLTHRU */
1954 case INDEX_op_and_i64:
1955 if (c2) {
1956 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1957 } else {
1958 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1959 }
1960 break;
1961
1962 case INDEX_op_andc_i32:
1963 a2 = (int32_t)a2;
1964 /* FALLTHRU */
1965 case INDEX_op_andc_i64:
1966 if (c2) {
1967 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
1968 } else {
1969 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
1970 }
1971 break;
1972
1973 case INDEX_op_or_i32:
1974 a2 = (int32_t)a2;
1975 /* FALLTHRU */
1976 case INDEX_op_or_i64:
1977 if (c2) {
1978 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
1979 } else {
1980 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
1981 }
1982 break;
1983
1984 case INDEX_op_orc_i32:
1985 a2 = (int32_t)a2;
1986 /* FALLTHRU */
1987 case INDEX_op_orc_i64:
1988 if (c2) {
1989 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
1990 } else {
1991 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
1992 }
1993 break;
1994
1995 case INDEX_op_xor_i32:
1996 a2 = (int32_t)a2;
1997 /* FALLTHRU */
1998 case INDEX_op_xor_i64:
1999 if (c2) {
2000 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2001 } else {
2002 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2003 }
2004 break;
2005
2006 case INDEX_op_eqv_i32:
2007 a2 = (int32_t)a2;
2008 /* FALLTHRU */
2009 case INDEX_op_eqv_i64:
2010 if (c2) {
2011 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2012 } else {
2013 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2014 }
2015 break;
2016
2017 case INDEX_op_not_i64:
2018 case INDEX_op_not_i32:
2019 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2020 break;
2021
2022 case INDEX_op_mul_i64:
2023 case INDEX_op_mul_i32:
2024 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2025 break;
2026
2027 case INDEX_op_div_i64:
2028 case INDEX_op_div_i32:
2029 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2030 break;
2031 case INDEX_op_divu_i64:
2032 case INDEX_op_divu_i32:
2033 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2034 break;
2035
2036 case INDEX_op_rem_i64:
2037 case INDEX_op_rem_i32:
2038 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
2039 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2040 break;
2041 case INDEX_op_remu_i64:
2042 case INDEX_op_remu_i32:
2043 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
2044 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2045 break;
2046
2047 case INDEX_op_shl_i64:
2048 case INDEX_op_shl_i32:
2049 if (c2) {
2050 tcg_out_shl(s, ext, a0, a1, a2);
2051 } else {
2052 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
2053 }
2054 break;
2055
2056 case INDEX_op_shr_i64:
2057 case INDEX_op_shr_i32:
2058 if (c2) {
2059 tcg_out_shr(s, ext, a0, a1, a2);
2060 } else {
2061 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
2062 }
2063 break;
2064
2065 case INDEX_op_sar_i64:
2066 case INDEX_op_sar_i32:
2067 if (c2) {
2068 tcg_out_sar(s, ext, a0, a1, a2);
2069 } else {
2070 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
2071 }
2072 break;
2073
2074 case INDEX_op_rotr_i64:
2075 case INDEX_op_rotr_i32:
2076 if (c2) {
2077 tcg_out_rotr(s, ext, a0, a1, a2);
2078 } else {
2079 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
2080 }
2081 break;
2082
2083 case INDEX_op_rotl_i64:
2084 case INDEX_op_rotl_i32:
2085 if (c2) {
2086 tcg_out_rotl(s, ext, a0, a1, a2);
2087 } else {
2088 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
2089 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
2090 }
2091 break;
2092
2093 case INDEX_op_clz_i64:
2094 case INDEX_op_clz_i32:
2095 tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2096 break;
2097 case INDEX_op_ctz_i64:
2098 case INDEX_op_ctz_i32:
2099 tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2100 break;
2101
2102 case INDEX_op_brcond_i32:
2103 a1 = (int32_t)a1;
2104 /* FALLTHRU */
2105 case INDEX_op_brcond_i64:
2106 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
2107 break;
2108
2109 case INDEX_op_setcond_i32:
2110 a2 = (int32_t)a2;
2111 /* FALLTHRU */
2112 case INDEX_op_setcond_i64:
2113 tcg_out_cmp(s, ext, a1, a2, c2);
2114 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */
2115 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2116 TCG_REG_XZR, tcg_invert_cond(args[3]));
2117 break;
2118
2119 case INDEX_op_movcond_i32:
2120 a2 = (int32_t)a2;
2121 /* FALLTHRU */
2122 case INDEX_op_movcond_i64:
2123 tcg_out_cmp(s, ext, a1, a2, c2);
2124 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2125 break;
2126
2127 case INDEX_op_qemu_ld_i32:
2128 case INDEX_op_qemu_ld_i64:
2129 tcg_out_qemu_ld(s, a0, a1, a2, ext);
2130 break;
2131 case INDEX_op_qemu_st_i32:
2132 case INDEX_op_qemu_st_i64:
2133 tcg_out_qemu_st(s, REG0(0), a1, a2);
2134 break;
2135
2136 case INDEX_op_bswap64_i64:
2137 tcg_out_rev64(s, a0, a1);
2138 break;
2139 case INDEX_op_bswap32_i64:
2140 case INDEX_op_bswap32_i32:
2141 tcg_out_rev32(s, a0, a1);
2142 break;
2143 case INDEX_op_bswap16_i64:
2144 case INDEX_op_bswap16_i32:
2145 tcg_out_rev16(s, a0, a1);
2146 break;
2147
2148 case INDEX_op_ext8s_i64:
2149 case INDEX_op_ext8s_i32:
2150 tcg_out_sxt(s, ext, MO_8, a0, a1);
2151 break;
2152 case INDEX_op_ext16s_i64:
2153 case INDEX_op_ext16s_i32:
2154 tcg_out_sxt(s, ext, MO_16, a0, a1);
2155 break;
2156 case INDEX_op_ext_i32_i64:
2157 case INDEX_op_ext32s_i64:
2158 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
2159 break;
2160 case INDEX_op_ext8u_i64:
2161 case INDEX_op_ext8u_i32:
2162 tcg_out_uxt(s, MO_8, a0, a1);
2163 break;
2164 case INDEX_op_ext16u_i64:
2165 case INDEX_op_ext16u_i32:
2166 tcg_out_uxt(s, MO_16, a0, a1);
2167 break;
2168 case INDEX_op_extu_i32_i64:
2169 case INDEX_op_ext32u_i64:
2170 tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
2171 break;
2172
2173 case INDEX_op_deposit_i64:
2174 case INDEX_op_deposit_i32:
2175 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2176 break;
2177
2178 case INDEX_op_extract_i64:
2179 case INDEX_op_extract_i32:
2180 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2181 break;
2182
2183 case INDEX_op_sextract_i64:
2184 case INDEX_op_sextract_i32:
2185 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2186 break;
2187
2188 case INDEX_op_extract2_i64:
2189 case INDEX_op_extract2_i32:
2190 tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
2191 break;
2192
2193 case INDEX_op_add2_i32:
2194 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2195 (int32_t)args[4], args[5], const_args[4],
2196 const_args[5], false);
2197 break;
2198 case INDEX_op_add2_i64:
2199 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2200 args[5], const_args[4], const_args[5], false);
2201 break;
2202 case INDEX_op_sub2_i32:
2203 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2204 (int32_t)args[4], args[5], const_args[4],
2205 const_args[5], true);
2206 break;
2207 case INDEX_op_sub2_i64:
2208 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2209 args[5], const_args[4], const_args[5], true);
2210 break;
2211
2212 case INDEX_op_muluh_i64:
2213 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2214 break;
2215 case INDEX_op_mulsh_i64:
2216 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2217 break;
2218
2219 case INDEX_op_mb:
2220 tcg_out_mb(s, a0);
2221 break;
2222
2223 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
2224 case INDEX_op_mov_i64:
2225 case INDEX_op_call: /* Always emitted via tcg_out_call. */
2226 default:
2227 g_assert_not_reached();
2228 }
2229
2230 #undef REG0
2231 }
2232
2233 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2234 unsigned vecl, unsigned vece,
2235 const TCGArg *args, const int *const_args)
2236 {
2237 static const AArch64Insn cmp_insn[16] = {
2238 [TCG_COND_EQ] = I3616_CMEQ,
2239 [TCG_COND_GT] = I3616_CMGT,
2240 [TCG_COND_GE] = I3616_CMGE,
2241 [TCG_COND_GTU] = I3616_CMHI,
2242 [TCG_COND_GEU] = I3616_CMHS,
2243 };
2244 static const AArch64Insn cmp0_insn[16] = {
2245 [TCG_COND_EQ] = I3617_CMEQ0,
2246 [TCG_COND_GT] = I3617_CMGT0,
2247 [TCG_COND_GE] = I3617_CMGE0,
2248 [TCG_COND_LT] = I3617_CMLT0,
2249 [TCG_COND_LE] = I3617_CMLE0,
2250 };
2251
2252 TCGType type = vecl + TCG_TYPE_V64;
2253 unsigned is_q = vecl;
2254 TCGArg a0, a1, a2, a3;
2255 int cmode, imm8;
2256
2257 a0 = args[0];
2258 a1 = args[1];
2259 a2 = args[2];
2260
2261 switch (opc) {
2262 case INDEX_op_ld_vec:
2263 tcg_out_ld(s, type, a0, a1, a2);
2264 break;
2265 case INDEX_op_st_vec:
2266 tcg_out_st(s, type, a0, a1, a2);
2267 break;
2268 case INDEX_op_dupm_vec:
2269 tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2270 break;
2271 case INDEX_op_add_vec:
2272 tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2273 break;
2274 case INDEX_op_sub_vec:
2275 tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2276 break;
2277 case INDEX_op_mul_vec:
2278 tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2279 break;
2280 case INDEX_op_neg_vec:
2281 tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2282 break;
2283 case INDEX_op_abs_vec:
2284 tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2285 break;
2286 case INDEX_op_and_vec:
2287 if (const_args[2]) {
2288 is_shimm1632(~a2, &cmode, &imm8);
2289 if (a0 == a1) {
2290 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2291 return;
2292 }
2293 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2294 a2 = a0;
2295 }
2296 tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2297 break;
2298 case INDEX_op_or_vec:
2299 if (const_args[2]) {
2300 is_shimm1632(a2, &cmode, &imm8);
2301 if (a0 == a1) {
2302 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2303 return;
2304 }
2305 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2306 a2 = a0;
2307 }
2308 tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2309 break;
2310 case INDEX_op_andc_vec:
2311 if (const_args[2]) {
2312 is_shimm1632(a2, &cmode, &imm8);
2313 if (a0 == a1) {
2314 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2315 return;
2316 }
2317 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2318 a2 = a0;
2319 }
2320 tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2321 break;
2322 case INDEX_op_orc_vec:
2323 if (const_args[2]) {
2324 is_shimm1632(~a2, &cmode, &imm8);
2325 if (a0 == a1) {
2326 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2327 return;
2328 }
2329 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2330 a2 = a0;
2331 }
2332 tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2333 break;
2334 case INDEX_op_xor_vec:
2335 tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2336 break;
2337 case INDEX_op_ssadd_vec:
2338 tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2339 break;
2340 case INDEX_op_sssub_vec:
2341 tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2342 break;
2343 case INDEX_op_usadd_vec:
2344 tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2345 break;
2346 case INDEX_op_ussub_vec:
2347 tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2348 break;
2349 case INDEX_op_smax_vec:
2350 tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2351 break;
2352 case INDEX_op_smin_vec:
2353 tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2354 break;
2355 case INDEX_op_umax_vec:
2356 tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2357 break;
2358 case INDEX_op_umin_vec:
2359 tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2360 break;
2361 case INDEX_op_not_vec:
2362 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2363 break;
2364 case INDEX_op_shli_vec:
2365 tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2366 break;
2367 case INDEX_op_shri_vec:
2368 tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2369 break;
2370 case INDEX_op_sari_vec:
2371 tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2372 break;
2373 case INDEX_op_aa64_sli_vec:
2374 tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2375 break;
2376 case INDEX_op_shlv_vec:
2377 tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2378 break;
2379 case INDEX_op_aa64_sshl_vec:
2380 tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2381 break;
2382 case INDEX_op_cmp_vec:
2383 {
2384 TCGCond cond = args[3];
2385 AArch64Insn insn;
2386
2387 if (cond == TCG_COND_NE) {
2388 if (const_args[2]) {
2389 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2390 } else {
2391 tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2392 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2393 }
2394 } else {
2395 if (const_args[2]) {
2396 insn = cmp0_insn[cond];
2397 if (insn) {
2398 tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2399 break;
2400 }
2401 tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
2402 a2 = TCG_VEC_TMP;
2403 }
2404 insn = cmp_insn[cond];
2405 if (insn == 0) {
2406 TCGArg t;
2407 t = a1, a1 = a2, a2 = t;
2408 cond = tcg_swap_cond(cond);
2409 insn = cmp_insn[cond];
2410 tcg_debug_assert(insn != 0);
2411 }
2412 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2413 }
2414 }
2415 break;
2416
2417 case INDEX_op_bitsel_vec:
2418 a3 = args[3];
2419 if (a0 == a3) {
2420 tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2421 } else if (a0 == a2) {
2422 tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2423 } else {
2424 if (a0 != a1) {
2425 tcg_out_mov(s, type, a0, a1);
2426 }
2427 tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2428 }
2429 break;
2430
2431 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
2432 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
2433 default:
2434 g_assert_not_reached();
2435 }
2436 }
2437
2438 int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2439 {
2440 switch (opc) {
2441 case INDEX_op_add_vec:
2442 case INDEX_op_sub_vec:
2443 case INDEX_op_and_vec:
2444 case INDEX_op_or_vec:
2445 case INDEX_op_xor_vec:
2446 case INDEX_op_andc_vec:
2447 case INDEX_op_orc_vec:
2448 case INDEX_op_neg_vec:
2449 case INDEX_op_abs_vec:
2450 case INDEX_op_not_vec:
2451 case INDEX_op_cmp_vec:
2452 case INDEX_op_shli_vec:
2453 case INDEX_op_shri_vec:
2454 case INDEX_op_sari_vec:
2455 case INDEX_op_ssadd_vec:
2456 case INDEX_op_sssub_vec:
2457 case INDEX_op_usadd_vec:
2458 case INDEX_op_ussub_vec:
2459 case INDEX_op_shlv_vec:
2460 case INDEX_op_bitsel_vec:
2461 return 1;
2462 case INDEX_op_rotli_vec:
2463 case INDEX_op_shrv_vec:
2464 case INDEX_op_sarv_vec:
2465 case INDEX_op_rotlv_vec:
2466 case INDEX_op_rotrv_vec:
2467 return -1;
2468 case INDEX_op_mul_vec:
2469 case INDEX_op_smax_vec:
2470 case INDEX_op_smin_vec:
2471 case INDEX_op_umax_vec:
2472 case INDEX_op_umin_vec:
2473 return vece < MO_64;
2474
2475 default:
2476 return 0;
2477 }
2478 }
2479
2480 void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2481 TCGArg a0, ...)
2482 {
2483 va_list va;
2484 TCGv_vec v0, v1, v2, t1, t2, c1;
2485 TCGArg a2;
2486
2487 va_start(va, a0);
2488 v0 = temp_tcgv_vec(arg_temp(a0));
2489 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2490 a2 = va_arg(va, TCGArg);
2491 v2 = temp_tcgv_vec(arg_temp(a2));
2492
2493 switch (opc) {
2494 case INDEX_op_rotli_vec:
2495 t1 = tcg_temp_new_vec(type);
2496 tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
2497 vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
2498 tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
2499 tcg_temp_free_vec(t1);
2500 break;
2501
2502 case INDEX_op_shrv_vec:
2503 case INDEX_op_sarv_vec:
2504 /* Right shifts are negative left shifts for AArch64. */
2505 t1 = tcg_temp_new_vec(type);
2506 tcg_gen_neg_vec(vece, t1, v2);
2507 opc = (opc == INDEX_op_shrv_vec
2508 ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2509 vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2510 tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2511 tcg_temp_free_vec(t1);
2512 break;
2513
2514 case INDEX_op_rotlv_vec:
2515 t1 = tcg_temp_new_vec(type);
2516 c1 = tcg_constant_vec(type, vece, 8 << vece);
2517 tcg_gen_sub_vec(vece, t1, v2, c1);
2518 /* Right shifts are negative left shifts for AArch64. */
2519 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2520 tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2521 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
2522 tcgv_vec_arg(v1), tcgv_vec_arg(v2));
2523 tcg_gen_or_vec(vece, v0, v0, t1);
2524 tcg_temp_free_vec(t1);
2525 break;
2526
2527 case INDEX_op_rotrv_vec:
2528 t1 = tcg_temp_new_vec(type);
2529 t2 = tcg_temp_new_vec(type);
2530 c1 = tcg_constant_vec(type, vece, 8 << vece);
2531 tcg_gen_neg_vec(vece, t1, v2);
2532 tcg_gen_sub_vec(vece, t2, c1, v2);
2533 /* Right shifts are negative left shifts for AArch64. */
2534 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2535 tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2536 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
2537 tcgv_vec_arg(v1), tcgv_vec_arg(t2));
2538 tcg_gen_or_vec(vece, v0, t1, t2);
2539 tcg_temp_free_vec(t1);
2540 tcg_temp_free_vec(t2);
2541 break;
2542
2543 default:
2544 g_assert_not_reached();
2545 }
2546
2547 va_end(va);
2548 }
2549
2550 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
2551 {
2552 static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
2553 static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
2554 static const TCGTargetOpDef w_w = { .args_ct_str = { "w", "w" } };
2555 static const TCGTargetOpDef w_r = { .args_ct_str = { "w", "r" } };
2556 static const TCGTargetOpDef w_wr = { .args_ct_str = { "w", "wr" } };
2557 static const TCGTargetOpDef r_l = { .args_ct_str = { "r", "l" } };
2558 static const TCGTargetOpDef r_rA = { .args_ct_str = { "r", "rA" } };
2559 static const TCGTargetOpDef rZ_r = { .args_ct_str = { "rZ", "r" } };
2560 static const TCGTargetOpDef lZ_l = { .args_ct_str = { "lZ", "l" } };
2561 static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
2562 static const TCGTargetOpDef w_w_w = { .args_ct_str = { "w", "w", "w" } };
2563 static const TCGTargetOpDef w_0_w = { .args_ct_str = { "w", "0", "w" } };
2564 static const TCGTargetOpDef w_w_wO = { .args_ct_str = { "w", "w", "wO" } };
2565 static const TCGTargetOpDef w_w_wN = { .args_ct_str = { "w", "w", "wN" } };
2566 static const TCGTargetOpDef w_w_wZ = { .args_ct_str = { "w", "w", "wZ" } };
2567 static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
2568 static const TCGTargetOpDef r_r_rA = { .args_ct_str = { "r", "r", "rA" } };
2569 static const TCGTargetOpDef r_r_rL = { .args_ct_str = { "r", "r", "rL" } };
2570 static const TCGTargetOpDef r_r_rAL
2571 = { .args_ct_str = { "r", "r", "rAL" } };
2572 static const TCGTargetOpDef dep
2573 = { .args_ct_str = { "r", "0", "rZ" } };
2574 static const TCGTargetOpDef ext2
2575 = { .args_ct_str = { "r", "rZ", "rZ" } };
2576 static const TCGTargetOpDef movc
2577 = { .args_ct_str = { "r", "r", "rA", "rZ", "rZ" } };
2578 static const TCGTargetOpDef add2
2579 = { .args_ct_str = { "r", "r", "rZ", "rZ", "rA", "rMZ" } };
2580 static const TCGTargetOpDef w_w_w_w
2581 = { .args_ct_str = { "w", "w", "w", "w" } };
2582
2583 switch (op) {
2584 case INDEX_op_goto_ptr:
2585 return &r;
2586
2587 case INDEX_op_ld8u_i32:
2588 case INDEX_op_ld8s_i32:
2589 case INDEX_op_ld16u_i32:
2590 case INDEX_op_ld16s_i32:
2591 case INDEX_op_ld_i32:
2592 case INDEX_op_ld8u_i64:
2593 case INDEX_op_ld8s_i64:
2594 case INDEX_op_ld16u_i64:
2595 case INDEX_op_ld16s_i64:
2596 case INDEX_op_ld32u_i64:
2597 case INDEX_op_ld32s_i64:
2598 case INDEX_op_ld_i64:
2599 case INDEX_op_neg_i32:
2600 case INDEX_op_neg_i64:
2601 case INDEX_op_not_i32:
2602 case INDEX_op_not_i64:
2603 case INDEX_op_bswap16_i32:
2604 case INDEX_op_bswap32_i32:
2605 case INDEX_op_bswap16_i64:
2606 case INDEX_op_bswap32_i64:
2607 case INDEX_op_bswap64_i64:
2608 case INDEX_op_ext8s_i32:
2609 case INDEX_op_ext16s_i32:
2610 case INDEX_op_ext8u_i32:
2611 case INDEX_op_ext16u_i32:
2612 case INDEX_op_ext8s_i64:
2613 case INDEX_op_ext16s_i64:
2614 case INDEX_op_ext32s_i64:
2615 case INDEX_op_ext8u_i64:
2616 case INDEX_op_ext16u_i64:
2617 case INDEX_op_ext32u_i64:
2618 case INDEX_op_ext_i32_i64:
2619 case INDEX_op_extu_i32_i64:
2620 case INDEX_op_extract_i32:
2621 case INDEX_op_extract_i64:
2622 case INDEX_op_sextract_i32:
2623 case INDEX_op_sextract_i64:
2624 return &r_r;
2625
2626 case INDEX_op_st8_i32:
2627 case INDEX_op_st16_i32:
2628 case INDEX_op_st_i32:
2629 case INDEX_op_st8_i64:
2630 case INDEX_op_st16_i64:
2631 case INDEX_op_st32_i64:
2632 case INDEX_op_st_i64:
2633 return &rZ_r;
2634
2635 case INDEX_op_add_i32:
2636 case INDEX_op_add_i64:
2637 case INDEX_op_sub_i32:
2638 case INDEX_op_sub_i64:
2639 case INDEX_op_setcond_i32:
2640 case INDEX_op_setcond_i64:
2641 return &r_r_rA;
2642
2643 case INDEX_op_mul_i32:
2644 case INDEX_op_mul_i64:
2645 case INDEX_op_div_i32:
2646 case INDEX_op_div_i64:
2647 case INDEX_op_divu_i32:
2648 case INDEX_op_divu_i64:
2649 case INDEX_op_rem_i32:
2650 case INDEX_op_rem_i64:
2651 case INDEX_op_remu_i32:
2652 case INDEX_op_remu_i64:
2653 case INDEX_op_muluh_i64:
2654 case INDEX_op_mulsh_i64:
2655 return &r_r_r;
2656
2657 case INDEX_op_and_i32:
2658 case INDEX_op_and_i64:
2659 case INDEX_op_or_i32:
2660 case INDEX_op_or_i64:
2661 case INDEX_op_xor_i32:
2662 case INDEX_op_xor_i64:
2663 case INDEX_op_andc_i32:
2664 case INDEX_op_andc_i64:
2665 case INDEX_op_orc_i32:
2666 case INDEX_op_orc_i64:
2667 case INDEX_op_eqv_i32:
2668 case INDEX_op_eqv_i64:
2669 return &r_r_rL;
2670
2671 case INDEX_op_shl_i32:
2672 case INDEX_op_shr_i32:
2673 case INDEX_op_sar_i32:
2674 case INDEX_op_rotl_i32:
2675 case INDEX_op_rotr_i32:
2676 case INDEX_op_shl_i64:
2677 case INDEX_op_shr_i64:
2678 case INDEX_op_sar_i64:
2679 case INDEX_op_rotl_i64:
2680 case INDEX_op_rotr_i64:
2681 return &r_r_ri;
2682
2683 case INDEX_op_clz_i32:
2684 case INDEX_op_ctz_i32:
2685 case INDEX_op_clz_i64:
2686 case INDEX_op_ctz_i64:
2687 return &r_r_rAL;
2688
2689 case INDEX_op_brcond_i32:
2690 case INDEX_op_brcond_i64:
2691 return &r_rA;
2692
2693 case INDEX_op_movcond_i32:
2694 case INDEX_op_movcond_i64:
2695 return &movc;
2696
2697 case INDEX_op_qemu_ld_i32:
2698 case INDEX_op_qemu_ld_i64:
2699 return &r_l;
2700 case INDEX_op_qemu_st_i32:
2701 case INDEX_op_qemu_st_i64:
2702 return &lZ_l;
2703
2704 case INDEX_op_deposit_i32:
2705 case INDEX_op_deposit_i64:
2706 return &dep;
2707
2708 case INDEX_op_extract2_i32:
2709 case INDEX_op_extract2_i64:
2710 return &ext2;
2711
2712 case INDEX_op_add2_i32:
2713 case INDEX_op_add2_i64:
2714 case INDEX_op_sub2_i32:
2715 case INDEX_op_sub2_i64:
2716 return &add2;
2717
2718 case INDEX_op_add_vec:
2719 case INDEX_op_sub_vec:
2720 case INDEX_op_mul_vec:
2721 case INDEX_op_xor_vec:
2722 case INDEX_op_ssadd_vec:
2723 case INDEX_op_sssub_vec:
2724 case INDEX_op_usadd_vec:
2725 case INDEX_op_ussub_vec:
2726 case INDEX_op_smax_vec:
2727 case INDEX_op_smin_vec:
2728 case INDEX_op_umax_vec:
2729 case INDEX_op_umin_vec:
2730 case INDEX_op_shlv_vec:
2731 case INDEX_op_shrv_vec:
2732 case INDEX_op_sarv_vec:
2733 case INDEX_op_aa64_sshl_vec:
2734 return &w_w_w;
2735 case INDEX_op_not_vec:
2736 case INDEX_op_neg_vec:
2737 case INDEX_op_abs_vec:
2738 case INDEX_op_shli_vec:
2739 case INDEX_op_shri_vec:
2740 case INDEX_op_sari_vec:
2741 return &w_w;
2742 case INDEX_op_ld_vec:
2743 case INDEX_op_st_vec:
2744 case INDEX_op_dupm_vec:
2745 return &w_r;
2746 case INDEX_op_dup_vec:
2747 return &w_wr;
2748 case INDEX_op_or_vec:
2749 case INDEX_op_andc_vec:
2750 return &w_w_wO;
2751 case INDEX_op_and_vec:
2752 case INDEX_op_orc_vec:
2753 return &w_w_wN;
2754 case INDEX_op_cmp_vec:
2755 return &w_w_wZ;
2756 case INDEX_op_bitsel_vec:
2757 return &w_w_w_w;
2758 case INDEX_op_aa64_sli_vec:
2759 return &w_0_w;
2760
2761 default:
2762 return NULL;
2763 }
2764 }
2765
2766 static void tcg_target_init(TCGContext *s)
2767 {
2768 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2769 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
2770 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2771 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
2772
2773 tcg_target_call_clobber_regs = -1ull;
2774 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2775 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2776 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2777 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2778 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2779 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2780 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2781 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2782 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2783 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2784 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
2785 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2786 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2787 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2788 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2789 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2790 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2791 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2792 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
2793
2794 s->reserved_regs = 0;
2795 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2796 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2797 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2798 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
2799 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
2800 }
2801
2802 /* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
2803 #define PUSH_SIZE ((30 - 19 + 1) * 8)
2804
2805 #define FRAME_SIZE \
2806 ((PUSH_SIZE \
2807 + TCG_STATIC_CALL_ARGS_SIZE \
2808 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2809 + TCG_TARGET_STACK_ALIGN - 1) \
2810 & ~(TCG_TARGET_STACK_ALIGN - 1))
2811
2812 /* We're expecting a 2 byte uleb128 encoded value. */
2813 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2814
2815 /* We're expecting to use a single ADDI insn. */
2816 QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
2817
2818 static void tcg_target_qemu_prologue(TCGContext *s)
2819 {
2820 TCGReg r;
2821
2822 /* Push (FP, LR) and allocate space for all saved registers. */
2823 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
2824 TCG_REG_SP, -PUSH_SIZE, 1, 1);
2825
2826 /* Set up frame pointer for canonical unwinding. */
2827 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
2828
2829 /* Store callee-preserved regs x19..x28. */
2830 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2831 int ofs = (r - TCG_REG_X19 + 2) * 8;
2832 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2833 }
2834
2835 /* Make stack space for TCG locals. */
2836 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2837 FRAME_SIZE - PUSH_SIZE);
2838
2839 /* Inform TCG about how to find TCG locals with register, offset, size. */
2840 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
2841 CPU_TEMP_BUF_NLONGS * sizeof(long));
2842
2843 #if !defined(CONFIG_SOFTMMU)
2844 if (USE_GUEST_BASE) {
2845 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
2846 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
2847 }
2848 #endif
2849
2850 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2851 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
2852
2853 /*
2854 * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2855 * and fall through to the rest of the epilogue.
2856 */
2857 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
2858 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
2859
2860 /* TB epilogue */
2861 tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
2862
2863 /* Remove TCG locals stack space. */
2864 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2865 FRAME_SIZE - PUSH_SIZE);
2866
2867 /* Restore registers x19..x28. */
2868 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2869 int ofs = (r - TCG_REG_X19 + 2) * 8;
2870 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2871 }
2872
2873 /* Pop (FP, LR), restore SP to previous frame. */
2874 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
2875 TCG_REG_SP, PUSH_SIZE, 0, 1);
2876 tcg_out_insn(s, 3207, RET, TCG_REG_LR);
2877 }
2878
2879 static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2880 {
2881 int i;
2882 for (i = 0; i < count; ++i) {
2883 p[i] = NOP;
2884 }
2885 }
2886
2887 typedef struct {
2888 DebugFrameHeader h;
2889 uint8_t fde_def_cfa[4];
2890 uint8_t fde_reg_ofs[24];
2891 } DebugFrame;
2892
2893 #define ELF_HOST_MACHINE EM_AARCH64
2894
2895 static const DebugFrame debug_frame = {
2896 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2897 .h.cie.id = -1,
2898 .h.cie.version = 1,
2899 .h.cie.code_align = 1,
2900 .h.cie.data_align = 0x78, /* sleb128 -8 */
2901 .h.cie.return_column = TCG_REG_LR,
2902
2903 /* Total FDE size does not include the "len" member. */
2904 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2905
2906 .fde_def_cfa = {
2907 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */
2908 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2909 (FRAME_SIZE >> 7)
2910 },
2911 .fde_reg_ofs = {
2912 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */
2913 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */
2914 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */
2915 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */
2916 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */
2917 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */
2918 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */
2919 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */
2920 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */
2921 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */
2922 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */
2923 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */
2924 }
2925 };
2926
2927 void tcg_register_jit(const void *buf, size_t buf_size)
2928 {
2929 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2930 }