]> git.proxmox.com Git - mirror_qemu.git/blob - tcg/aarch64/tcg-target.c.inc
Merge tag 'pull-maintainer-may24-160524-2' of https://gitlab.com/stsquad/qemu into...
[mirror_qemu.git] / tcg / aarch64 / tcg-target.c.inc
1 /*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13 #include "../tcg-ldst.c.inc"
14 #include "../tcg-pool.c.inc"
15 #include "qemu/bitops.h"
16
17 /* We're going to re-use TCGType in setting of the SF bit, which controls
18 the size of the operation performed. If we know the values match, it
19 makes things much cleaner. */
20 QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
21
22 #ifdef CONFIG_DEBUG_TCG
23 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
24 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
25 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
26 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
27 "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
28
29 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
30 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
31 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
32 "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
33 };
34 #endif /* CONFIG_DEBUG_TCG */
35
36 static const int tcg_target_reg_alloc_order[] = {
37 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
38 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
39 TCG_REG_X28, /* we will reserve this for guest_base if configured */
40
41 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
42 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
43
44 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
45 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
46
47 /* X16 reserved as temporary */
48 /* X17 reserved as temporary */
49 /* X18 reserved by system */
50 /* X19 reserved for AREG0 */
51 /* X29 reserved as fp */
52 /* X30 reserved as temporary */
53
54 TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
55 TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
56 /* V8 - V15 are call-saved, and skipped. */
57 TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
58 TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
59 TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
60 TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
61 };
62
63 static const int tcg_target_call_iarg_regs[8] = {
64 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
65 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
66 };
67
68 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
69 {
70 tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
71 tcg_debug_assert(slot >= 0 && slot <= 1);
72 return TCG_REG_X0 + slot;
73 }
74
75 #define TCG_REG_TMP0 TCG_REG_X16
76 #define TCG_REG_TMP1 TCG_REG_X17
77 #define TCG_REG_TMP2 TCG_REG_X30
78 #define TCG_VEC_TMP0 TCG_REG_V31
79
80 #define TCG_REG_GUEST_BASE TCG_REG_X28
81
82 static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
83 {
84 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
85 ptrdiff_t offset = target - src_rx;
86
87 if (offset == sextract64(offset, 0, 26)) {
88 /* read instruction, mask away previous PC_REL26 parameter contents,
89 set the proper offset, then write back the instruction. */
90 *src_rw = deposit32(*src_rw, 0, 26, offset);
91 return true;
92 }
93 return false;
94 }
95
96 static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
97 {
98 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
99 ptrdiff_t offset = target - src_rx;
100
101 if (offset == sextract64(offset, 0, 19)) {
102 *src_rw = deposit32(*src_rw, 5, 19, offset);
103 return true;
104 }
105 return false;
106 }
107
108 static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
109 {
110 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
111 ptrdiff_t offset = target - src_rx;
112
113 if (offset == sextract64(offset, 0, 14)) {
114 *src_rw = deposit32(*src_rw, 5, 14, offset);
115 return true;
116 }
117 return false;
118 }
119
120 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
121 intptr_t value, intptr_t addend)
122 {
123 tcg_debug_assert(addend == 0);
124 switch (type) {
125 case R_AARCH64_JUMP26:
126 case R_AARCH64_CALL26:
127 return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
128 case R_AARCH64_CONDBR19:
129 return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
130 case R_AARCH64_TSTBR14:
131 return reloc_pc14(code_ptr, (const tcg_insn_unit *)value);
132 default:
133 g_assert_not_reached();
134 }
135 }
136
137 #define TCG_CT_CONST_AIMM 0x100
138 #define TCG_CT_CONST_LIMM 0x200
139 #define TCG_CT_CONST_ZERO 0x400
140 #define TCG_CT_CONST_MONE 0x800
141 #define TCG_CT_CONST_ORRI 0x1000
142 #define TCG_CT_CONST_ANDI 0x2000
143 #define TCG_CT_CONST_CMP 0x4000
144
145 #define ALL_GENERAL_REGS 0xffffffffu
146 #define ALL_VECTOR_REGS 0xffffffff00000000ull
147
148 /* Match a constant valid for addition (12-bit, optionally shifted). */
149 static inline bool is_aimm(uint64_t val)
150 {
151 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
152 }
153
154 /* Match a constant valid for logical operations. */
155 static inline bool is_limm(uint64_t val)
156 {
157 /* Taking a simplified view of the logical immediates for now, ignoring
158 the replication that can happen across the field. Match bit patterns
159 of the forms
160 0....01....1
161 0..01..10..0
162 and their inverses. */
163
164 /* Make things easier below, by testing the form with msb clear. */
165 if ((int64_t)val < 0) {
166 val = ~val;
167 }
168 if (val == 0) {
169 return false;
170 }
171 val += val & -val;
172 return (val & (val - 1)) == 0;
173 }
174
175 /* Return true if v16 is a valid 16-bit shifted immediate. */
176 static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
177 {
178 if (v16 == (v16 & 0xff)) {
179 *cmode = 0x8;
180 *imm8 = v16 & 0xff;
181 return true;
182 } else if (v16 == (v16 & 0xff00)) {
183 *cmode = 0xa;
184 *imm8 = v16 >> 8;
185 return true;
186 }
187 return false;
188 }
189
190 /* Return true if v32 is a valid 32-bit shifted immediate. */
191 static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
192 {
193 if (v32 == (v32 & 0xff)) {
194 *cmode = 0x0;
195 *imm8 = v32 & 0xff;
196 return true;
197 } else if (v32 == (v32 & 0xff00)) {
198 *cmode = 0x2;
199 *imm8 = (v32 >> 8) & 0xff;
200 return true;
201 } else if (v32 == (v32 & 0xff0000)) {
202 *cmode = 0x4;
203 *imm8 = (v32 >> 16) & 0xff;
204 return true;
205 } else if (v32 == (v32 & 0xff000000)) {
206 *cmode = 0x6;
207 *imm8 = v32 >> 24;
208 return true;
209 }
210 return false;
211 }
212
213 /* Return true if v32 is a valid 32-bit shifting ones immediate. */
214 static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
215 {
216 if ((v32 & 0xffff00ff) == 0xff) {
217 *cmode = 0xc;
218 *imm8 = (v32 >> 8) & 0xff;
219 return true;
220 } else if ((v32 & 0xff00ffff) == 0xffff) {
221 *cmode = 0xd;
222 *imm8 = (v32 >> 16) & 0xff;
223 return true;
224 }
225 return false;
226 }
227
228 /* Return true if v32 is a valid float32 immediate. */
229 static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
230 {
231 if (extract32(v32, 0, 19) == 0
232 && (extract32(v32, 25, 6) == 0x20
233 || extract32(v32, 25, 6) == 0x1f)) {
234 *cmode = 0xf;
235 *imm8 = (extract32(v32, 31, 1) << 7)
236 | (extract32(v32, 25, 1) << 6)
237 | extract32(v32, 19, 6);
238 return true;
239 }
240 return false;
241 }
242
243 /* Return true if v64 is a valid float64 immediate. */
244 static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
245 {
246 if (extract64(v64, 0, 48) == 0
247 && (extract64(v64, 54, 9) == 0x100
248 || extract64(v64, 54, 9) == 0x0ff)) {
249 *cmode = 0xf;
250 *imm8 = (extract64(v64, 63, 1) << 7)
251 | (extract64(v64, 54, 1) << 6)
252 | extract64(v64, 48, 6);
253 return true;
254 }
255 return false;
256 }
257
258 /*
259 * Return non-zero if v32 can be formed by MOVI+ORR.
260 * Place the parameters for MOVI in (cmode, imm8).
261 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
262 */
263 static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
264 {
265 int i;
266
267 for (i = 6; i > 0; i -= 2) {
268 /* Mask out one byte we can add with ORR. */
269 uint32_t tmp = v32 & ~(0xffu << (i * 4));
270 if (is_shimm32(tmp, cmode, imm8) ||
271 is_soimm32(tmp, cmode, imm8)) {
272 break;
273 }
274 }
275 return i;
276 }
277
278 /* Return true if V is a valid 16-bit or 32-bit shifted immediate. */
279 static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
280 {
281 if (v32 == deposit32(v32, 16, 16, v32)) {
282 return is_shimm16(v32, cmode, imm8);
283 } else {
284 return is_shimm32(v32, cmode, imm8);
285 }
286 }
287
288 static bool tcg_target_const_match(int64_t val, int ct,
289 TCGType type, TCGCond cond, int vece)
290 {
291 if (ct & TCG_CT_CONST) {
292 return 1;
293 }
294 if (type == TCG_TYPE_I32) {
295 val = (int32_t)val;
296 }
297
298 if (ct & TCG_CT_CONST_CMP) {
299 if (is_tst_cond(cond)) {
300 ct |= TCG_CT_CONST_LIMM;
301 } else {
302 ct |= TCG_CT_CONST_AIMM;
303 }
304 }
305
306 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
307 return 1;
308 }
309 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
310 return 1;
311 }
312 if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
313 return 1;
314 }
315 if ((ct & TCG_CT_CONST_MONE) && val == -1) {
316 return 1;
317 }
318
319 switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
320 case 0:
321 break;
322 case TCG_CT_CONST_ANDI:
323 val = ~val;
324 /* fallthru */
325 case TCG_CT_CONST_ORRI:
326 if (val == deposit64(val, 32, 32, val)) {
327 int cmode, imm8;
328 return is_shimm1632(val, &cmode, &imm8);
329 }
330 break;
331 default:
332 /* Both bits should not be set for the same insn. */
333 g_assert_not_reached();
334 }
335
336 return 0;
337 }
338
339 enum aarch64_cond_code {
340 COND_EQ = 0x0,
341 COND_NE = 0x1,
342 COND_CS = 0x2, /* Unsigned greater or equal */
343 COND_HS = COND_CS, /* ALIAS greater or equal */
344 COND_CC = 0x3, /* Unsigned less than */
345 COND_LO = COND_CC, /* ALIAS Lower */
346 COND_MI = 0x4, /* Negative */
347 COND_PL = 0x5, /* Zero or greater */
348 COND_VS = 0x6, /* Overflow */
349 COND_VC = 0x7, /* No overflow */
350 COND_HI = 0x8, /* Unsigned greater than */
351 COND_LS = 0x9, /* Unsigned less or equal */
352 COND_GE = 0xa,
353 COND_LT = 0xb,
354 COND_GT = 0xc,
355 COND_LE = 0xd,
356 COND_AL = 0xe,
357 COND_NV = 0xf, /* behaves like COND_AL here */
358 };
359
360 static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
361 [TCG_COND_EQ] = COND_EQ,
362 [TCG_COND_NE] = COND_NE,
363 [TCG_COND_LT] = COND_LT,
364 [TCG_COND_GE] = COND_GE,
365 [TCG_COND_LE] = COND_LE,
366 [TCG_COND_GT] = COND_GT,
367 /* unsigned */
368 [TCG_COND_LTU] = COND_LO,
369 [TCG_COND_GTU] = COND_HI,
370 [TCG_COND_GEU] = COND_HS,
371 [TCG_COND_LEU] = COND_LS,
372 /* bit test */
373 [TCG_COND_TSTEQ] = COND_EQ,
374 [TCG_COND_TSTNE] = COND_NE,
375 };
376
377 typedef enum {
378 LDST_ST = 0, /* store */
379 LDST_LD = 1, /* load */
380 LDST_LD_S_X = 2, /* load and sign-extend into Xt */
381 LDST_LD_S_W = 3, /* load and sign-extend into Wt */
382 } AArch64LdstType;
383
384 /* We encode the format of the insn into the beginning of the name, so that
385 we can have the preprocessor help "typecheck" the insn vs the output
386 function. Arm didn't provide us with nice names for the formats, so we
387 use the section number of the architecture reference manual in which the
388 instruction group is described. */
389 typedef enum {
390 /* Compare and branch (immediate). */
391 I3201_CBZ = 0x34000000,
392 I3201_CBNZ = 0x35000000,
393
394 /* Conditional branch (immediate). */
395 I3202_B_C = 0x54000000,
396
397 /* Test and branch (immediate). */
398 I3205_TBZ = 0x36000000,
399 I3205_TBNZ = 0x37000000,
400
401 /* Unconditional branch (immediate). */
402 I3206_B = 0x14000000,
403 I3206_BL = 0x94000000,
404
405 /* Unconditional branch (register). */
406 I3207_BR = 0xd61f0000,
407 I3207_BLR = 0xd63f0000,
408 I3207_RET = 0xd65f0000,
409
410 /* AdvSIMD load/store single structure. */
411 I3303_LD1R = 0x0d40c000,
412
413 /* Load literal for loading the address at pc-relative offset */
414 I3305_LDR = 0x58000000,
415 I3305_LDR_v64 = 0x5c000000,
416 I3305_LDR_v128 = 0x9c000000,
417
418 /* Load/store exclusive. */
419 I3306_LDXP = 0xc8600000,
420 I3306_STXP = 0xc8200000,
421
422 /* Load/store register. Described here as 3.3.12, but the helper
423 that emits them can transform to 3.3.10 or 3.3.13. */
424 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
425 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
426 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
427 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
428
429 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
430 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
431 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
432 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
433
434 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
435 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
436
437 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
438 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
439 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
440
441 I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
442 I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
443
444 I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
445 I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
446
447 I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30,
448 I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30,
449
450 I3312_TO_I3310 = 0x00200800,
451 I3312_TO_I3313 = 0x01000000,
452
453 /* Load/store register pair instructions. */
454 I3314_LDP = 0x28400000,
455 I3314_STP = 0x28000000,
456
457 /* Add/subtract immediate instructions. */
458 I3401_ADDI = 0x11000000,
459 I3401_ADDSI = 0x31000000,
460 I3401_SUBI = 0x51000000,
461 I3401_SUBSI = 0x71000000,
462
463 /* Bitfield instructions. */
464 I3402_BFM = 0x33000000,
465 I3402_SBFM = 0x13000000,
466 I3402_UBFM = 0x53000000,
467
468 /* Extract instruction. */
469 I3403_EXTR = 0x13800000,
470
471 /* Logical immediate instructions. */
472 I3404_ANDI = 0x12000000,
473 I3404_ORRI = 0x32000000,
474 I3404_EORI = 0x52000000,
475 I3404_ANDSI = 0x72000000,
476
477 /* Move wide immediate instructions. */
478 I3405_MOVN = 0x12800000,
479 I3405_MOVZ = 0x52800000,
480 I3405_MOVK = 0x72800000,
481
482 /* PC relative addressing instructions. */
483 I3406_ADR = 0x10000000,
484 I3406_ADRP = 0x90000000,
485
486 /* Add/subtract extended register instructions. */
487 I3501_ADD = 0x0b200000,
488
489 /* Add/subtract shifted register instructions (without a shift). */
490 I3502_ADD = 0x0b000000,
491 I3502_ADDS = 0x2b000000,
492 I3502_SUB = 0x4b000000,
493 I3502_SUBS = 0x6b000000,
494
495 /* Add/subtract shifted register instructions (with a shift). */
496 I3502S_ADD_LSL = I3502_ADD,
497
498 /* Add/subtract with carry instructions. */
499 I3503_ADC = 0x1a000000,
500 I3503_SBC = 0x5a000000,
501
502 /* Conditional select instructions. */
503 I3506_CSEL = 0x1a800000,
504 I3506_CSINC = 0x1a800400,
505 I3506_CSINV = 0x5a800000,
506 I3506_CSNEG = 0x5a800400,
507
508 /* Data-processing (1 source) instructions. */
509 I3507_CLZ = 0x5ac01000,
510 I3507_RBIT = 0x5ac00000,
511 I3507_REV = 0x5ac00000, /* + size << 10 */
512
513 /* Data-processing (2 source) instructions. */
514 I3508_LSLV = 0x1ac02000,
515 I3508_LSRV = 0x1ac02400,
516 I3508_ASRV = 0x1ac02800,
517 I3508_RORV = 0x1ac02c00,
518 I3508_SMULH = 0x9b407c00,
519 I3508_UMULH = 0x9bc07c00,
520 I3508_UDIV = 0x1ac00800,
521 I3508_SDIV = 0x1ac00c00,
522
523 /* Data-processing (3 source) instructions. */
524 I3509_MADD = 0x1b000000,
525 I3509_MSUB = 0x1b008000,
526
527 /* Logical shifted register instructions (without a shift). */
528 I3510_AND = 0x0a000000,
529 I3510_BIC = 0x0a200000,
530 I3510_ORR = 0x2a000000,
531 I3510_ORN = 0x2a200000,
532 I3510_EOR = 0x4a000000,
533 I3510_EON = 0x4a200000,
534 I3510_ANDS = 0x6a000000,
535
536 /* Logical shifted register instructions (with a shift). */
537 I3502S_AND_LSR = I3510_AND | (1 << 22),
538
539 /* AdvSIMD copy */
540 I3605_DUP = 0x0e000400,
541 I3605_INS = 0x4e001c00,
542 I3605_UMOV = 0x0e003c00,
543
544 /* AdvSIMD modified immediate */
545 I3606_MOVI = 0x0f000400,
546 I3606_MVNI = 0x2f000400,
547 I3606_BIC = 0x2f001400,
548 I3606_ORR = 0x0f001400,
549
550 /* AdvSIMD scalar shift by immediate */
551 I3609_SSHR = 0x5f000400,
552 I3609_SSRA = 0x5f001400,
553 I3609_SHL = 0x5f005400,
554 I3609_USHR = 0x7f000400,
555 I3609_USRA = 0x7f001400,
556 I3609_SLI = 0x7f005400,
557
558 /* AdvSIMD scalar three same */
559 I3611_SQADD = 0x5e200c00,
560 I3611_SQSUB = 0x5e202c00,
561 I3611_CMGT = 0x5e203400,
562 I3611_CMGE = 0x5e203c00,
563 I3611_SSHL = 0x5e204400,
564 I3611_ADD = 0x5e208400,
565 I3611_CMTST = 0x5e208c00,
566 I3611_UQADD = 0x7e200c00,
567 I3611_UQSUB = 0x7e202c00,
568 I3611_CMHI = 0x7e203400,
569 I3611_CMHS = 0x7e203c00,
570 I3611_USHL = 0x7e204400,
571 I3611_SUB = 0x7e208400,
572 I3611_CMEQ = 0x7e208c00,
573
574 /* AdvSIMD scalar two-reg misc */
575 I3612_CMGT0 = 0x5e208800,
576 I3612_CMEQ0 = 0x5e209800,
577 I3612_CMLT0 = 0x5e20a800,
578 I3612_ABS = 0x5e20b800,
579 I3612_CMGE0 = 0x7e208800,
580 I3612_CMLE0 = 0x7e209800,
581 I3612_NEG = 0x7e20b800,
582
583 /* AdvSIMD shift by immediate */
584 I3614_SSHR = 0x0f000400,
585 I3614_SSRA = 0x0f001400,
586 I3614_SHL = 0x0f005400,
587 I3614_SLI = 0x2f005400,
588 I3614_USHR = 0x2f000400,
589 I3614_USRA = 0x2f001400,
590
591 /* AdvSIMD three same. */
592 I3616_ADD = 0x0e208400,
593 I3616_AND = 0x0e201c00,
594 I3616_BIC = 0x0e601c00,
595 I3616_BIF = 0x2ee01c00,
596 I3616_BIT = 0x2ea01c00,
597 I3616_BSL = 0x2e601c00,
598 I3616_EOR = 0x2e201c00,
599 I3616_MUL = 0x0e209c00,
600 I3616_ORR = 0x0ea01c00,
601 I3616_ORN = 0x0ee01c00,
602 I3616_SUB = 0x2e208400,
603 I3616_CMGT = 0x0e203400,
604 I3616_CMGE = 0x0e203c00,
605 I3616_CMTST = 0x0e208c00,
606 I3616_CMHI = 0x2e203400,
607 I3616_CMHS = 0x2e203c00,
608 I3616_CMEQ = 0x2e208c00,
609 I3616_SMAX = 0x0e206400,
610 I3616_SMIN = 0x0e206c00,
611 I3616_SSHL = 0x0e204400,
612 I3616_SQADD = 0x0e200c00,
613 I3616_SQSUB = 0x0e202c00,
614 I3616_UMAX = 0x2e206400,
615 I3616_UMIN = 0x2e206c00,
616 I3616_UQADD = 0x2e200c00,
617 I3616_UQSUB = 0x2e202c00,
618 I3616_USHL = 0x2e204400,
619
620 /* AdvSIMD two-reg misc. */
621 I3617_CMGT0 = 0x0e208800,
622 I3617_CMEQ0 = 0x0e209800,
623 I3617_CMLT0 = 0x0e20a800,
624 I3617_CMGE0 = 0x2e208800,
625 I3617_CMLE0 = 0x2e209800,
626 I3617_NOT = 0x2e205800,
627 I3617_ABS = 0x0e20b800,
628 I3617_NEG = 0x2e20b800,
629
630 /* System instructions. */
631 NOP = 0xd503201f,
632 DMB_ISH = 0xd50338bf,
633 DMB_LD = 0x00000100,
634 DMB_ST = 0x00000200,
635
636 BTI_C = 0xd503245f,
637 BTI_J = 0xd503249f,
638 BTI_JC = 0xd50324df,
639 } AArch64Insn;
640
641 static inline uint32_t tcg_in32(TCGContext *s)
642 {
643 uint32_t v = *(uint32_t *)s->code_ptr;
644 return v;
645 }
646
647 /* Emit an opcode with "type-checking" of the format. */
648 #define tcg_out_insn(S, FMT, OP, ...) \
649 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
650
651 static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
652 TCGReg rt, TCGReg rn, unsigned size)
653 {
654 tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
655 }
656
657 static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
658 int imm19, TCGReg rt)
659 {
660 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
661 }
662
663 static void tcg_out_insn_3306(TCGContext *s, AArch64Insn insn, TCGReg rs,
664 TCGReg rt, TCGReg rt2, TCGReg rn)
665 {
666 tcg_out32(s, insn | rs << 16 | rt2 << 10 | rn << 5 | rt);
667 }
668
669 static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
670 TCGReg rt, int imm19)
671 {
672 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
673 }
674
675 static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
676 TCGCond c, int imm19)
677 {
678 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
679 }
680
681 static void tcg_out_insn_3205(TCGContext *s, AArch64Insn insn,
682 TCGReg rt, int imm6, int imm14)
683 {
684 insn |= (imm6 & 0x20) << (31 - 5);
685 insn |= (imm6 & 0x1f) << 19;
686 tcg_out32(s, insn | (imm14 & 0x3fff) << 5 | rt);
687 }
688
689 static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
690 {
691 tcg_out32(s, insn | (imm26 & 0x03ffffff));
692 }
693
694 static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
695 {
696 tcg_out32(s, insn | rn << 5);
697 }
698
699 static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
700 TCGReg r1, TCGReg r2, TCGReg rn,
701 tcg_target_long ofs, bool pre, bool w)
702 {
703 insn |= 1u << 31; /* ext */
704 insn |= pre << 24;
705 insn |= w << 23;
706
707 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
708 insn |= (ofs & (0x7f << 3)) << (15 - 3);
709
710 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
711 }
712
713 static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
714 TCGReg rd, TCGReg rn, uint64_t aimm)
715 {
716 if (aimm > 0xfff) {
717 tcg_debug_assert((aimm & 0xfff) == 0);
718 aimm >>= 12;
719 tcg_debug_assert(aimm <= 0xfff);
720 aimm |= 1 << 12; /* apply LSL 12 */
721 }
722 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
723 }
724
725 /* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
726 (Logical immediate). Both insn groups have N, IMMR and IMMS fields
727 that feed the DecodeBitMasks pseudo function. */
728 static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
729 TCGReg rd, TCGReg rn, int n, int immr, int imms)
730 {
731 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
732 | rn << 5 | rd);
733 }
734
735 #define tcg_out_insn_3404 tcg_out_insn_3402
736
737 static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
738 TCGReg rd, TCGReg rn, TCGReg rm, int imms)
739 {
740 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
741 | rn << 5 | rd);
742 }
743
744 /* This function is used for the Move (wide immediate) instruction group.
745 Note that SHIFT is a full shift count, not the 2 bit HW field. */
746 static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
747 TCGReg rd, uint16_t half, unsigned shift)
748 {
749 tcg_debug_assert((shift & ~0x30) == 0);
750 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
751 }
752
753 static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
754 TCGReg rd, int64_t disp)
755 {
756 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
757 }
758
759 static inline void tcg_out_insn_3501(TCGContext *s, AArch64Insn insn,
760 TCGType sf, TCGReg rd, TCGReg rn,
761 TCGReg rm, int opt, int imm3)
762 {
763 tcg_out32(s, insn | sf << 31 | rm << 16 | opt << 13 |
764 imm3 << 10 | rn << 5 | rd);
765 }
766
767 /* This function is for both 3.5.2 (Add/Subtract shifted register), for
768 the rare occasion when we actually want to supply a shift amount. */
769 static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
770 TCGType ext, TCGReg rd, TCGReg rn,
771 TCGReg rm, int imm6)
772 {
773 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
774 }
775
776 /* This function is for 3.5.2 (Add/subtract shifted register),
777 and 3.5.10 (Logical shifted register), for the vast majorty of cases
778 when we don't want to apply a shift. Thus it can also be used for
779 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */
780 static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
781 TCGReg rd, TCGReg rn, TCGReg rm)
782 {
783 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
784 }
785
786 #define tcg_out_insn_3503 tcg_out_insn_3502
787 #define tcg_out_insn_3508 tcg_out_insn_3502
788 #define tcg_out_insn_3510 tcg_out_insn_3502
789
790 static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
791 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
792 {
793 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
794 | tcg_cond_to_aarch64[c] << 12);
795 }
796
797 static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
798 TCGReg rd, TCGReg rn)
799 {
800 tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
801 }
802
803 static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
804 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
805 {
806 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
807 }
808
809 static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
810 TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
811 {
812 /* Note that bit 11 set means general register input. Therefore
813 we can handle both register sets with one function. */
814 tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
815 | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
816 }
817
818 static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
819 TCGReg rd, bool op, int cmode, uint8_t imm8)
820 {
821 tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
822 | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
823 }
824
825 static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn,
826 TCGReg rd, TCGReg rn, unsigned immhb)
827 {
828 tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f));
829 }
830
831 static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn,
832 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
833 {
834 tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16
835 | (rn & 0x1f) << 5 | (rd & 0x1f));
836 }
837
838 static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn,
839 unsigned size, TCGReg rd, TCGReg rn)
840 {
841 tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f));
842 }
843
844 static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
845 TCGReg rd, TCGReg rn, unsigned immhb)
846 {
847 tcg_out32(s, insn | q << 30 | immhb << 16
848 | (rn & 0x1f) << 5 | (rd & 0x1f));
849 }
850
851 static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
852 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
853 {
854 tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
855 | (rn & 0x1f) << 5 | (rd & 0x1f));
856 }
857
858 static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
859 unsigned size, TCGReg rd, TCGReg rn)
860 {
861 tcg_out32(s, insn | q << 30 | (size << 22)
862 | (rn & 0x1f) << 5 | (rd & 0x1f));
863 }
864
865 static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
866 TCGReg rd, TCGReg base, TCGType ext,
867 TCGReg regoff)
868 {
869 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
870 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
871 0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
872 }
873
874 static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
875 TCGReg rd, TCGReg rn, intptr_t offset)
876 {
877 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
878 }
879
880 static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
881 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
882 {
883 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
884 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
885 | rn << 5 | (rd & 0x1f));
886 }
887
888 static void tcg_out_bti(TCGContext *s, AArch64Insn insn)
889 {
890 /*
891 * While BTI insns are nops on hosts without FEAT_BTI,
892 * there is no point in emitting them in that case either.
893 */
894 if (cpuinfo & CPUINFO_BTI) {
895 tcg_out32(s, insn);
896 }
897 }
898
899 /* Register to register move using ORR (shifted register with no shift). */
900 static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
901 {
902 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
903 }
904
905 /* Register to register move using ADDI (move to/from SP). */
906 static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
907 {
908 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
909 }
910
911 /* This function is used for the Logical (immediate) instruction group.
912 The value of LIMM must satisfy IS_LIMM. See the comment above about
913 only supporting simplified logical immediates. */
914 static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
915 TCGReg rd, TCGReg rn, uint64_t limm)
916 {
917 unsigned h, l, r, c;
918
919 tcg_debug_assert(is_limm(limm));
920
921 h = clz64(limm);
922 l = ctz64(limm);
923 if (l == 0) {
924 r = 0; /* form 0....01....1 */
925 c = ctz64(~limm) - 1;
926 if (h == 0) {
927 r = clz64(~limm); /* form 1..10..01..1 */
928 c += r;
929 }
930 } else {
931 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */
932 c = r - h - 1;
933 }
934 if (ext == TCG_TYPE_I32) {
935 r &= 31;
936 c &= 31;
937 }
938
939 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
940 }
941
942 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
943 TCGReg rd, int64_t v64)
944 {
945 bool q = type == TCG_TYPE_V128;
946 int cmode, imm8, i;
947
948 /* Test all bytes equal first. */
949 if (vece == MO_8) {
950 imm8 = (uint8_t)v64;
951 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
952 return;
953 }
954
955 /*
956 * Test all bytes 0x00 or 0xff second. This can match cases that
957 * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
958 */
959 for (i = imm8 = 0; i < 8; i++) {
960 uint8_t byte = v64 >> (i * 8);
961 if (byte == 0xff) {
962 imm8 |= 1 << i;
963 } else if (byte != 0) {
964 goto fail_bytes;
965 }
966 }
967 tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
968 return;
969 fail_bytes:
970
971 /*
972 * Tests for various replications. For each element width, if we
973 * cannot find an expansion there's no point checking a larger
974 * width because we already know by replication it cannot match.
975 */
976 if (vece == MO_16) {
977 uint16_t v16 = v64;
978
979 if (is_shimm16(v16, &cmode, &imm8)) {
980 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
981 return;
982 }
983 if (is_shimm16(~v16, &cmode, &imm8)) {
984 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
985 return;
986 }
987
988 /*
989 * Otherwise, all remaining constants can be loaded in two insns:
990 * rd = v16 & 0xff, rd |= v16 & 0xff00.
991 */
992 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
993 tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
994 return;
995 } else if (vece == MO_32) {
996 uint32_t v32 = v64;
997 uint32_t n32 = ~v32;
998
999 if (is_shimm32(v32, &cmode, &imm8) ||
1000 is_soimm32(v32, &cmode, &imm8) ||
1001 is_fimm32(v32, &cmode, &imm8)) {
1002 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
1003 return;
1004 }
1005 if (is_shimm32(n32, &cmode, &imm8) ||
1006 is_soimm32(n32, &cmode, &imm8)) {
1007 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
1008 return;
1009 }
1010
1011 /*
1012 * Restrict the set of constants to those we can load with
1013 * two instructions. Others we load from the pool.
1014 */
1015 i = is_shimm32_pair(v32, &cmode, &imm8);
1016 if (i) {
1017 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
1018 tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
1019 return;
1020 }
1021 i = is_shimm32_pair(n32, &cmode, &imm8);
1022 if (i) {
1023 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
1024 tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
1025 return;
1026 }
1027 } else if (is_fimm64(v64, &cmode, &imm8)) {
1028 tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
1029 return;
1030 }
1031
1032 /*
1033 * As a last resort, load from the constant pool. Sadly there
1034 * is no LD1R (literal), so store the full 16-byte vector.
1035 */
1036 if (type == TCG_TYPE_V128) {
1037 new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
1038 tcg_out_insn(s, 3305, LDR_v128, 0, rd);
1039 } else {
1040 new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
1041 tcg_out_insn(s, 3305, LDR_v64, 0, rd);
1042 }
1043 }
1044
1045 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
1046 TCGReg rd, TCGReg rs)
1047 {
1048 int is_q = type - TCG_TYPE_V64;
1049 tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
1050 return true;
1051 }
1052
1053 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
1054 TCGReg r, TCGReg base, intptr_t offset)
1055 {
1056 TCGReg temp = TCG_REG_TMP0;
1057
1058 if (offset < -0xffffff || offset > 0xffffff) {
1059 tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
1060 tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
1061 base = temp;
1062 } else {
1063 AArch64Insn add_insn = I3401_ADDI;
1064
1065 if (offset < 0) {
1066 add_insn = I3401_SUBI;
1067 offset = -offset;
1068 }
1069 if (offset & 0xfff000) {
1070 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
1071 base = temp;
1072 }
1073 if (offset & 0xfff) {
1074 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
1075 base = temp;
1076 }
1077 }
1078 tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
1079 return true;
1080 }
1081
1082 static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
1083 tcg_target_long value)
1084 {
1085 tcg_target_long svalue = value;
1086 tcg_target_long ivalue = ~value;
1087 tcg_target_long t0, t1, t2;
1088 int s0, s1;
1089 AArch64Insn opc;
1090
1091 switch (type) {
1092 case TCG_TYPE_I32:
1093 case TCG_TYPE_I64:
1094 tcg_debug_assert(rd < 32);
1095 break;
1096 default:
1097 g_assert_not_reached();
1098 }
1099
1100 /* For 32-bit values, discard potential garbage in value. For 64-bit
1101 values within [2**31, 2**32-1], we can create smaller sequences by
1102 interpreting this as a negative 32-bit number, while ensuring that
1103 the high 32 bits are cleared by setting SF=0. */
1104 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1105 svalue = (int32_t)value;
1106 value = (uint32_t)value;
1107 ivalue = (uint32_t)ivalue;
1108 type = TCG_TYPE_I32;
1109 }
1110
1111 /* Speed things up by handling the common case of small positive
1112 and negative values specially. */
1113 if ((value & ~0xffffull) == 0) {
1114 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1115 return;
1116 } else if ((ivalue & ~0xffffull) == 0) {
1117 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1118 return;
1119 }
1120
1121 /* Check for bitfield immediates. For the benefit of 32-bit quantities,
1122 use the sign-extended value. That lets us match rotated values such
1123 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1124 if (is_limm(svalue)) {
1125 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1126 return;
1127 }
1128
1129 /* Look for host pointer values within 4G of the PC. This happens
1130 often when loading pointers to QEMU's own data structures. */
1131 if (type == TCG_TYPE_I64) {
1132 intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
1133 tcg_target_long disp = value - src_rx;
1134 if (disp == sextract64(disp, 0, 21)) {
1135 tcg_out_insn(s, 3406, ADR, rd, disp);
1136 return;
1137 }
1138 disp = (value >> 12) - (src_rx >> 12);
1139 if (disp == sextract64(disp, 0, 21)) {
1140 tcg_out_insn(s, 3406, ADRP, rd, disp);
1141 if (value & 0xfff) {
1142 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1143 }
1144 return;
1145 }
1146 }
1147
1148 /* Would it take fewer insns to begin with MOVN? */
1149 if (ctpop64(value) >= 32) {
1150 t0 = ivalue;
1151 opc = I3405_MOVN;
1152 } else {
1153 t0 = value;
1154 opc = I3405_MOVZ;
1155 }
1156 s0 = ctz64(t0) & (63 & -16);
1157 t1 = t0 & ~(0xffffull << s0);
1158 s1 = ctz64(t1) & (63 & -16);
1159 t2 = t1 & ~(0xffffull << s1);
1160 if (t2 == 0) {
1161 tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1162 if (t1 != 0) {
1163 tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1164 }
1165 return;
1166 }
1167
1168 /* For more than 2 insns, dump it into the constant pool. */
1169 new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1170 tcg_out_insn(s, 3305, LDR, 0, rd);
1171 }
1172
1173 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
1174 {
1175 return false;
1176 }
1177
1178 static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1179 tcg_target_long imm)
1180 {
1181 /* This function is only used for passing structs by reference. */
1182 g_assert_not_reached();
1183 }
1184
1185 /* Define something more legible for general use. */
1186 #define tcg_out_ldst_r tcg_out_insn_3310
1187
1188 static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1189 TCGReg rn, intptr_t offset, int lgsize)
1190 {
1191 /* If the offset is naturally aligned and in range, then we can
1192 use the scaled uimm12 encoding */
1193 if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1194 uintptr_t scaled_uimm = offset >> lgsize;
1195 if (scaled_uimm <= 0xfff) {
1196 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1197 return;
1198 }
1199 }
1200
1201 /* Small signed offsets can use the unscaled encoding. */
1202 if (offset >= -256 && offset < 256) {
1203 tcg_out_insn_3312(s, insn, rd, rn, offset);
1204 return;
1205 }
1206
1207 /* Worst-case scenario, move offset to temp register, use reg offset. */
1208 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, offset);
1209 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP0);
1210 }
1211
1212 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1213 {
1214 if (ret == arg) {
1215 return true;
1216 }
1217 switch (type) {
1218 case TCG_TYPE_I32:
1219 case TCG_TYPE_I64:
1220 if (ret < 32 && arg < 32) {
1221 tcg_out_movr(s, type, ret, arg);
1222 break;
1223 } else if (ret < 32) {
1224 tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1225 break;
1226 } else if (arg < 32) {
1227 tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1228 break;
1229 }
1230 /* FALLTHRU */
1231
1232 case TCG_TYPE_V64:
1233 tcg_debug_assert(ret >= 32 && arg >= 32);
1234 tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1235 break;
1236 case TCG_TYPE_V128:
1237 tcg_debug_assert(ret >= 32 && arg >= 32);
1238 tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1239 break;
1240
1241 default:
1242 g_assert_not_reached();
1243 }
1244 return true;
1245 }
1246
1247 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1248 TCGReg base, intptr_t ofs)
1249 {
1250 AArch64Insn insn;
1251 int lgsz;
1252
1253 switch (type) {
1254 case TCG_TYPE_I32:
1255 insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1256 lgsz = 2;
1257 break;
1258 case TCG_TYPE_I64:
1259 insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1260 lgsz = 3;
1261 break;
1262 case TCG_TYPE_V64:
1263 insn = I3312_LDRVD;
1264 lgsz = 3;
1265 break;
1266 case TCG_TYPE_V128:
1267 insn = I3312_LDRVQ;
1268 lgsz = 4;
1269 break;
1270 default:
1271 g_assert_not_reached();
1272 }
1273 tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1274 }
1275
1276 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1277 TCGReg base, intptr_t ofs)
1278 {
1279 AArch64Insn insn;
1280 int lgsz;
1281
1282 switch (type) {
1283 case TCG_TYPE_I32:
1284 insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1285 lgsz = 2;
1286 break;
1287 case TCG_TYPE_I64:
1288 insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1289 lgsz = 3;
1290 break;
1291 case TCG_TYPE_V64:
1292 insn = I3312_STRVD;
1293 lgsz = 3;
1294 break;
1295 case TCG_TYPE_V128:
1296 insn = I3312_STRVQ;
1297 lgsz = 4;
1298 break;
1299 default:
1300 g_assert_not_reached();
1301 }
1302 tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1303 }
1304
1305 static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1306 TCGReg base, intptr_t ofs)
1307 {
1308 if (type <= TCG_TYPE_I64 && val == 0) {
1309 tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1310 return true;
1311 }
1312 return false;
1313 }
1314
1315 static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1316 TCGReg rn, unsigned int a, unsigned int b)
1317 {
1318 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1319 }
1320
1321 static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1322 TCGReg rn, unsigned int a, unsigned int b)
1323 {
1324 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1325 }
1326
1327 static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1328 TCGReg rn, unsigned int a, unsigned int b)
1329 {
1330 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1331 }
1332
1333 static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1334 TCGReg rn, TCGReg rm, unsigned int a)
1335 {
1336 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1337 }
1338
1339 static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1340 TCGReg rd, TCGReg rn, unsigned int m)
1341 {
1342 int bits = ext ? 64 : 32;
1343 int max = bits - 1;
1344 tcg_out_ubfm(s, ext, rd, rn, (bits - m) & max, (max - m) & max);
1345 }
1346
1347 static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1348 TCGReg rd, TCGReg rn, unsigned int m)
1349 {
1350 int max = ext ? 63 : 31;
1351 tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1352 }
1353
1354 static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1355 TCGReg rd, TCGReg rn, unsigned int m)
1356 {
1357 int max = ext ? 63 : 31;
1358 tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1359 }
1360
1361 static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1362 TCGReg rd, TCGReg rn, unsigned int m)
1363 {
1364 int max = ext ? 63 : 31;
1365 tcg_out_extr(s, ext, rd, rn, rn, m & max);
1366 }
1367
1368 static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1369 TCGReg rd, TCGReg rn, unsigned int m)
1370 {
1371 int max = ext ? 63 : 31;
1372 tcg_out_extr(s, ext, rd, rn, rn, -m & max);
1373 }
1374
1375 static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1376 TCGReg rn, unsigned lsb, unsigned width)
1377 {
1378 unsigned size = ext ? 64 : 32;
1379 unsigned a = (size - lsb) & (size - 1);
1380 unsigned b = width - 1;
1381 tcg_out_bfm(s, ext, rd, rn, a, b);
1382 }
1383
1384 static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGCond cond, TCGReg a,
1385 tcg_target_long b, bool const_b)
1386 {
1387 if (is_tst_cond(cond)) {
1388 if (!const_b) {
1389 tcg_out_insn(s, 3510, ANDS, ext, TCG_REG_XZR, a, b);
1390 } else {
1391 tcg_out_logicali(s, I3404_ANDSI, ext, TCG_REG_XZR, a, b);
1392 }
1393 } else {
1394 if (!const_b) {
1395 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1396 } else if (b >= 0) {
1397 tcg_debug_assert(is_aimm(b));
1398 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1399 } else {
1400 tcg_debug_assert(is_aimm(-b));
1401 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1402 }
1403 }
1404 }
1405
1406 static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1407 {
1408 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1409 tcg_debug_assert(offset == sextract64(offset, 0, 26));
1410 tcg_out_insn(s, 3206, B, offset);
1411 }
1412
1413 static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target)
1414 {
1415 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1416 if (offset == sextract64(offset, 0, 26)) {
1417 tcg_out_insn(s, 3206, BL, offset);
1418 } else {
1419 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target);
1420 tcg_out_insn(s, 3207, BLR, TCG_REG_TMP0);
1421 }
1422 }
1423
1424 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
1425 const TCGHelperInfo *info)
1426 {
1427 tcg_out_call_int(s, target);
1428 }
1429
1430 static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1431 {
1432 if (!l->has_value) {
1433 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1434 tcg_out_insn(s, 3206, B, 0);
1435 } else {
1436 tcg_out_goto(s, l->u.value_ptr);
1437 }
1438 }
1439
1440 static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1441 TCGArg b, bool b_const, TCGLabel *l)
1442 {
1443 int tbit = -1;
1444 bool need_cmp = true;
1445
1446 switch (c) {
1447 case TCG_COND_EQ:
1448 case TCG_COND_NE:
1449 /* cmp xN,0; b.ne L -> cbnz xN,L */
1450 if (b_const && b == 0) {
1451 need_cmp = false;
1452 }
1453 break;
1454 case TCG_COND_LT:
1455 case TCG_COND_GE:
1456 /* cmp xN,0; b.mi L -> tbnz xN,63,L */
1457 if (b_const && b == 0) {
1458 c = (c == TCG_COND_LT ? TCG_COND_TSTNE : TCG_COND_TSTEQ);
1459 tbit = ext ? 63 : 31;
1460 need_cmp = false;
1461 }
1462 break;
1463 case TCG_COND_TSTEQ:
1464 case TCG_COND_TSTNE:
1465 /* tst xN,0xffffffff; b.ne L -> cbnz wN,L */
1466 if (b_const && b == UINT32_MAX) {
1467 c = tcg_tst_eqne_cond(c);
1468 ext = TCG_TYPE_I32;
1469 need_cmp = false;
1470 break;
1471 }
1472 /* tst xN,1<<B; b.ne L -> tbnz xN,B,L */
1473 if (b_const && is_power_of_2(b)) {
1474 tbit = ctz64(b);
1475 need_cmp = false;
1476 }
1477 break;
1478 default:
1479 break;
1480 }
1481
1482 if (need_cmp) {
1483 tcg_out_cmp(s, ext, c, a, b, b_const);
1484 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1485 tcg_out_insn(s, 3202, B_C, c, 0);
1486 return;
1487 }
1488
1489 if (tbit >= 0) {
1490 tcg_out_reloc(s, s->code_ptr, R_AARCH64_TSTBR14, l, 0);
1491 switch (c) {
1492 case TCG_COND_TSTEQ:
1493 tcg_out_insn(s, 3205, TBZ, a, tbit, 0);
1494 break;
1495 case TCG_COND_TSTNE:
1496 tcg_out_insn(s, 3205, TBNZ, a, tbit, 0);
1497 break;
1498 default:
1499 g_assert_not_reached();
1500 }
1501 } else {
1502 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1503 switch (c) {
1504 case TCG_COND_EQ:
1505 tcg_out_insn(s, 3201, CBZ, ext, a, 0);
1506 break;
1507 case TCG_COND_NE:
1508 tcg_out_insn(s, 3201, CBNZ, ext, a, 0);
1509 break;
1510 default:
1511 g_assert_not_reached();
1512 }
1513 }
1514 }
1515
1516 static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits,
1517 TCGReg rd, TCGReg rn)
1518 {
1519 /* REV, REV16, REV32 */
1520 tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn);
1521 }
1522
1523 static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
1524 TCGReg rd, TCGReg rn)
1525 {
1526 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1527 int bits = (8 << s_bits) - 1;
1528 tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1529 }
1530
1531 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1532 {
1533 tcg_out_sxt(s, type, MO_8, rd, rn);
1534 }
1535
1536 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1537 {
1538 tcg_out_sxt(s, type, MO_16, rd, rn);
1539 }
1540
1541 static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn)
1542 {
1543 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, rd, rn);
1544 }
1545
1546 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1547 {
1548 tcg_out_ext32s(s, rd, rn);
1549 }
1550
1551 static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
1552 TCGReg rd, TCGReg rn)
1553 {
1554 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1555 int bits = (8 << s_bits) - 1;
1556 tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1557 }
1558
1559 static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn)
1560 {
1561 tcg_out_uxt(s, MO_8, rd, rn);
1562 }
1563
1564 static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn)
1565 {
1566 tcg_out_uxt(s, MO_16, rd, rn);
1567 }
1568
1569 static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rn)
1570 {
1571 tcg_out_movr(s, TCG_TYPE_I32, rd, rn);
1572 }
1573
1574 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1575 {
1576 tcg_out_ext32u(s, rd, rn);
1577 }
1578
1579 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn)
1580 {
1581 tcg_out_mov(s, TCG_TYPE_I32, rd, rn);
1582 }
1583
1584 static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1585 TCGReg rn, int64_t aimm)
1586 {
1587 if (aimm >= 0) {
1588 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1589 } else {
1590 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1591 }
1592 }
1593
1594 static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1595 TCGReg rh, TCGReg al, TCGReg ah,
1596 tcg_target_long bl, tcg_target_long bh,
1597 bool const_bl, bool const_bh, bool sub)
1598 {
1599 TCGReg orig_rl = rl;
1600 AArch64Insn insn;
1601
1602 if (rl == ah || (!const_bh && rl == bh)) {
1603 rl = TCG_REG_TMP0;
1604 }
1605
1606 if (const_bl) {
1607 if (bl < 0) {
1608 bl = -bl;
1609 insn = sub ? I3401_ADDSI : I3401_SUBSI;
1610 } else {
1611 insn = sub ? I3401_SUBSI : I3401_ADDSI;
1612 }
1613
1614 if (unlikely(al == TCG_REG_XZR)) {
1615 /* ??? We want to allow al to be zero for the benefit of
1616 negation via subtraction. However, that leaves open the
1617 possibility of adding 0+const in the low part, and the
1618 immediate add instructions encode XSP not XZR. Don't try
1619 anything more elaborate here than loading another zero. */
1620 al = TCG_REG_TMP0;
1621 tcg_out_movi(s, ext, al, 0);
1622 }
1623 tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1624 } else {
1625 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1626 }
1627
1628 insn = I3503_ADC;
1629 if (const_bh) {
1630 /* Note that the only two constants we support are 0 and -1, and
1631 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
1632 if ((bh != 0) ^ sub) {
1633 insn = I3503_SBC;
1634 }
1635 bh = TCG_REG_XZR;
1636 } else if (sub) {
1637 insn = I3503_SBC;
1638 }
1639 tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1640
1641 tcg_out_mov(s, ext, orig_rl, rl);
1642 }
1643
1644 static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1645 {
1646 static const uint32_t sync[] = {
1647 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST,
1648 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST,
1649 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1650 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD,
1651 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1652 };
1653 tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1654 }
1655
1656 static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1657 TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1658 {
1659 TCGReg a1 = a0;
1660 if (is_ctz) {
1661 a1 = TCG_REG_TMP0;
1662 tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1663 }
1664 if (const_b && b == (ext ? 64 : 32)) {
1665 tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1666 } else {
1667 AArch64Insn sel = I3506_CSEL;
1668
1669 tcg_out_cmp(s, ext, TCG_COND_NE, a0, 0, 1);
1670 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP0, a1);
1671
1672 if (const_b) {
1673 if (b == -1) {
1674 b = TCG_REG_XZR;
1675 sel = I3506_CSINV;
1676 } else if (b == 0) {
1677 b = TCG_REG_XZR;
1678 } else {
1679 tcg_out_movi(s, ext, d, b);
1680 b = d;
1681 }
1682 }
1683 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP0, b, TCG_COND_NE);
1684 }
1685 }
1686
1687 typedef struct {
1688 TCGReg base;
1689 TCGReg index;
1690 TCGType index_ext;
1691 TCGAtomAlign aa;
1692 } HostAddress;
1693
1694 bool tcg_target_has_memory_bswap(MemOp memop)
1695 {
1696 return false;
1697 }
1698
1699 static const TCGLdstHelperParam ldst_helper_param = {
1700 .ntmp = 1, .tmp = { TCG_REG_TMP0 }
1701 };
1702
1703 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1704 {
1705 MemOp opc = get_memop(lb->oi);
1706
1707 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1708 return false;
1709 }
1710
1711 tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
1712 tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
1713 tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
1714 tcg_out_goto(s, lb->raddr);
1715 return true;
1716 }
1717
1718 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1719 {
1720 MemOp opc = get_memop(lb->oi);
1721
1722 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1723 return false;
1724 }
1725
1726 tcg_out_st_helper_args(s, lb, &ldst_helper_param);
1727 tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]);
1728 tcg_out_goto(s, lb->raddr);
1729 return true;
1730 }
1731
1732 /* We expect to use a 7-bit scaled negative offset from ENV. */
1733 #define MIN_TLB_MASK_TABLE_OFS -512
1734
1735 /*
1736 * For system-mode, perform the TLB load and compare.
1737 * For user-mode, perform any required alignment tests.
1738 * In both cases, return a TCGLabelQemuLdst structure if the slow path
1739 * is required and fill in @h with the host address for the fast path.
1740 */
1741 static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
1742 TCGReg addr_reg, MemOpIdx oi,
1743 bool is_ld)
1744 {
1745 TCGType addr_type = s->addr_type;
1746 TCGLabelQemuLdst *ldst = NULL;
1747 MemOp opc = get_memop(oi);
1748 MemOp s_bits = opc & MO_SIZE;
1749 unsigned a_mask;
1750
1751 h->aa = atom_and_align_for_opc(s, opc,
1752 have_lse2 ? MO_ATOM_WITHIN16
1753 : MO_ATOM_IFALIGN,
1754 s_bits == MO_128);
1755 a_mask = (1 << h->aa.align) - 1;
1756
1757 if (tcg_use_softmmu) {
1758 unsigned s_mask = (1u << s_bits) - 1;
1759 unsigned mem_index = get_mmuidx(oi);
1760 TCGReg addr_adj;
1761 TCGType mask_type;
1762 uint64_t compare_mask;
1763
1764 ldst = new_ldst_label(s);
1765 ldst->is_ld = is_ld;
1766 ldst->oi = oi;
1767 ldst->addrlo_reg = addr_reg;
1768
1769 mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32
1770 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1771
1772 /* Load cpu->neg.tlb.f[mmu_idx].{mask,table} into {tmp0,tmp1}. */
1773 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1774 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1775 tcg_out_insn(s, 3314, LDP, TCG_REG_TMP0, TCG_REG_TMP1, TCG_AREG0,
1776 tlb_mask_table_ofs(s, mem_index), 1, 0);
1777
1778 /* Extract the TLB index from the address into X0. */
1779 tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1780 TCG_REG_TMP0, TCG_REG_TMP0, addr_reg,
1781 s->page_bits - CPU_TLB_ENTRY_BITS);
1782
1783 /* Add the tlb_table pointer, forming the CPUTLBEntry address. */
1784 tcg_out_insn(s, 3502, ADD, 1, TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP0);
1785
1786 /* Load the tlb comparator into TMP0, and the fast path addend. */
1787 QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN);
1788 tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP1,
1789 is_ld ? offsetof(CPUTLBEntry, addr_read)
1790 : offsetof(CPUTLBEntry, addr_write));
1791 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
1792 offsetof(CPUTLBEntry, addend));
1793
1794 /*
1795 * For aligned accesses, we check the first byte and include
1796 * the alignment bits within the address. For unaligned access,
1797 * we check that we don't cross pages using the address of the
1798 * last byte of the access.
1799 */
1800 if (a_mask >= s_mask) {
1801 addr_adj = addr_reg;
1802 } else {
1803 addr_adj = TCG_REG_TMP2;
1804 tcg_out_insn(s, 3401, ADDI, addr_type,
1805 addr_adj, addr_reg, s_mask - a_mask);
1806 }
1807 compare_mask = (uint64_t)s->page_mask | a_mask;
1808
1809 /* Store the page mask part of the address into TMP2. */
1810 tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_TMP2,
1811 addr_adj, compare_mask);
1812
1813 /* Perform the address comparison. */
1814 tcg_out_cmp(s, addr_type, TCG_COND_NE, TCG_REG_TMP0, TCG_REG_TMP2, 0);
1815
1816 /* If not equal, we jump to the slow path. */
1817 ldst->label_ptr[0] = s->code_ptr;
1818 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1819
1820 h->base = TCG_REG_TMP1;
1821 h->index = addr_reg;
1822 h->index_ext = addr_type;
1823 } else {
1824 if (a_mask) {
1825 ldst = new_ldst_label(s);
1826
1827 ldst->is_ld = is_ld;
1828 ldst->oi = oi;
1829 ldst->addrlo_reg = addr_reg;
1830
1831 /* tst addr, #mask */
1832 tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
1833
1834 /* b.ne slow_path */
1835 ldst->label_ptr[0] = s->code_ptr;
1836 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1837 }
1838
1839 if (guest_base || addr_type == TCG_TYPE_I32) {
1840 h->base = TCG_REG_GUEST_BASE;
1841 h->index = addr_reg;
1842 h->index_ext = addr_type;
1843 } else {
1844 h->base = addr_reg;
1845 h->index = TCG_REG_XZR;
1846 h->index_ext = TCG_TYPE_I64;
1847 }
1848 }
1849
1850 return ldst;
1851 }
1852
1853 static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
1854 TCGReg data_r, HostAddress h)
1855 {
1856 switch (memop & MO_SSIZE) {
1857 case MO_UB:
1858 tcg_out_ldst_r(s, I3312_LDRB, data_r, h.base, h.index_ext, h.index);
1859 break;
1860 case MO_SB:
1861 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1862 data_r, h.base, h.index_ext, h.index);
1863 break;
1864 case MO_UW:
1865 tcg_out_ldst_r(s, I3312_LDRH, data_r, h.base, h.index_ext, h.index);
1866 break;
1867 case MO_SW:
1868 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1869 data_r, h.base, h.index_ext, h.index);
1870 break;
1871 case MO_UL:
1872 tcg_out_ldst_r(s, I3312_LDRW, data_r, h.base, h.index_ext, h.index);
1873 break;
1874 case MO_SL:
1875 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, h.base, h.index_ext, h.index);
1876 break;
1877 case MO_UQ:
1878 tcg_out_ldst_r(s, I3312_LDRX, data_r, h.base, h.index_ext, h.index);
1879 break;
1880 default:
1881 g_assert_not_reached();
1882 }
1883 }
1884
1885 static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
1886 TCGReg data_r, HostAddress h)
1887 {
1888 switch (memop & MO_SIZE) {
1889 case MO_8:
1890 tcg_out_ldst_r(s, I3312_STRB, data_r, h.base, h.index_ext, h.index);
1891 break;
1892 case MO_16:
1893 tcg_out_ldst_r(s, I3312_STRH, data_r, h.base, h.index_ext, h.index);
1894 break;
1895 case MO_32:
1896 tcg_out_ldst_r(s, I3312_STRW, data_r, h.base, h.index_ext, h.index);
1897 break;
1898 case MO_64:
1899 tcg_out_ldst_r(s, I3312_STRX, data_r, h.base, h.index_ext, h.index);
1900 break;
1901 default:
1902 g_assert_not_reached();
1903 }
1904 }
1905
1906 static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1907 MemOpIdx oi, TCGType data_type)
1908 {
1909 TCGLabelQemuLdst *ldst;
1910 HostAddress h;
1911
1912 ldst = prepare_host_addr(s, &h, addr_reg, oi, true);
1913 tcg_out_qemu_ld_direct(s, get_memop(oi), data_type, data_reg, h);
1914
1915 if (ldst) {
1916 ldst->type = data_type;
1917 ldst->datalo_reg = data_reg;
1918 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1919 }
1920 }
1921
1922 static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1923 MemOpIdx oi, TCGType data_type)
1924 {
1925 TCGLabelQemuLdst *ldst;
1926 HostAddress h;
1927
1928 ldst = prepare_host_addr(s, &h, addr_reg, oi, false);
1929 tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h);
1930
1931 if (ldst) {
1932 ldst->type = data_type;
1933 ldst->datalo_reg = data_reg;
1934 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1935 }
1936 }
1937
1938 static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
1939 TCGReg addr_reg, MemOpIdx oi, bool is_ld)
1940 {
1941 TCGLabelQemuLdst *ldst;
1942 HostAddress h;
1943 TCGReg base;
1944 bool use_pair;
1945
1946 ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
1947
1948 /* Compose the final address, as LDP/STP have no indexing. */
1949 if (h.index == TCG_REG_XZR) {
1950 base = h.base;
1951 } else {
1952 base = TCG_REG_TMP2;
1953 if (h.index_ext == TCG_TYPE_I32) {
1954 /* add base, base, index, uxtw */
1955 tcg_out_insn(s, 3501, ADD, TCG_TYPE_I64, base,
1956 h.base, h.index, MO_32, 0);
1957 } else {
1958 /* add base, base, index */
1959 tcg_out_insn(s, 3502, ADD, 1, base, h.base, h.index);
1960 }
1961 }
1962
1963 use_pair = h.aa.atom < MO_128 || have_lse2;
1964
1965 if (!use_pair) {
1966 tcg_insn_unit *branch = NULL;
1967 TCGReg ll, lh, sl, sh;
1968
1969 /*
1970 * If we have already checked for 16-byte alignment, that's all
1971 * we need. Otherwise we have determined that misaligned atomicity
1972 * may be handled with two 8-byte loads.
1973 */
1974 if (h.aa.align < MO_128) {
1975 /*
1976 * TODO: align should be MO_64, so we only need test bit 3,
1977 * which means we could use TBNZ instead of ANDS+B_C.
1978 */
1979 tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, 15);
1980 branch = s->code_ptr;
1981 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1982 use_pair = true;
1983 }
1984
1985 if (is_ld) {
1986 /*
1987 * 16-byte atomicity without LSE2 requires LDXP+STXP loop:
1988 * ldxp lo, hi, [base]
1989 * stxp t0, lo, hi, [base]
1990 * cbnz t0, .-8
1991 * Require no overlap between data{lo,hi} and base.
1992 */
1993 if (datalo == base || datahi == base) {
1994 tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_TMP2, base);
1995 base = TCG_REG_TMP2;
1996 }
1997 ll = sl = datalo;
1998 lh = sh = datahi;
1999 } else {
2000 /*
2001 * 16-byte atomicity without LSE2 requires LDXP+STXP loop:
2002 * 1: ldxp t0, t1, [base]
2003 * stxp t0, lo, hi, [base]
2004 * cbnz t0, 1b
2005 */
2006 tcg_debug_assert(base != TCG_REG_TMP0 && base != TCG_REG_TMP1);
2007 ll = TCG_REG_TMP0;
2008 lh = TCG_REG_TMP1;
2009 sl = datalo;
2010 sh = datahi;
2011 }
2012
2013 tcg_out_insn(s, 3306, LDXP, TCG_REG_XZR, ll, lh, base);
2014 tcg_out_insn(s, 3306, STXP, TCG_REG_TMP0, sl, sh, base);
2015 tcg_out_insn(s, 3201, CBNZ, 0, TCG_REG_TMP0, -2);
2016
2017 if (use_pair) {
2018 /* "b .+8", branching across the one insn of use_pair. */
2019 tcg_out_insn(s, 3206, B, 2);
2020 reloc_pc19(branch, tcg_splitwx_to_rx(s->code_ptr));
2021 }
2022 }
2023
2024 if (use_pair) {
2025 if (is_ld) {
2026 tcg_out_insn(s, 3314, LDP, datalo, datahi, base, 0, 1, 0);
2027 } else {
2028 tcg_out_insn(s, 3314, STP, datalo, datahi, base, 0, 1, 0);
2029 }
2030 }
2031
2032 if (ldst) {
2033 ldst->type = TCG_TYPE_I128;
2034 ldst->datalo_reg = datalo;
2035 ldst->datahi_reg = datahi;
2036 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2037 }
2038 }
2039
2040 static const tcg_insn_unit *tb_ret_addr;
2041
2042 static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
2043 {
2044 const tcg_insn_unit *target;
2045 ptrdiff_t offset;
2046
2047 /* Reuse the zeroing that exists for goto_ptr. */
2048 if (a0 == 0) {
2049 target = tcg_code_gen_epilogue;
2050 } else {
2051 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
2052 target = tb_ret_addr;
2053 }
2054
2055 offset = tcg_pcrel_diff(s, target) >> 2;
2056 if (offset == sextract64(offset, 0, 26)) {
2057 tcg_out_insn(s, 3206, B, offset);
2058 } else {
2059 /*
2060 * Only x16/x17 generate BTI type Jump (2),
2061 * other registers generate BTI type Jump|Call (3).
2062 */
2063 QEMU_BUILD_BUG_ON(TCG_REG_TMP0 != TCG_REG_X16);
2064 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target);
2065 tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
2066 }
2067 }
2068
2069 static void tcg_out_goto_tb(TCGContext *s, int which)
2070 {
2071 /*
2072 * Direct branch, or indirect address load, will be patched
2073 * by tb_target_set_jmp_target. Assert indirect load offset
2074 * in range early, regardless of direct branch distance.
2075 */
2076 intptr_t i_off = tcg_pcrel_diff(s, (void *)get_jmp_target_addr(s, which));
2077 tcg_debug_assert(i_off == sextract64(i_off, 0, 21));
2078
2079 set_jmp_insn_offset(s, which);
2080 tcg_out32(s, I3206_B);
2081 tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
2082 set_jmp_reset_offset(s, which);
2083 tcg_out_bti(s, BTI_J);
2084 }
2085
2086 void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
2087 uintptr_t jmp_rx, uintptr_t jmp_rw)
2088 {
2089 uintptr_t d_addr = tb->jmp_target_addr[n];
2090 ptrdiff_t d_offset = d_addr - jmp_rx;
2091 tcg_insn_unit insn;
2092
2093 /* Either directly branch, or indirect branch load. */
2094 if (d_offset == sextract64(d_offset, 0, 28)) {
2095 insn = deposit32(I3206_B, 0, 26, d_offset >> 2);
2096 } else {
2097 uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n];
2098 ptrdiff_t i_offset = i_addr - jmp_rx;
2099
2100 /* Note that we asserted this in range in tcg_out_goto_tb. */
2101 insn = deposit32(I3305_LDR | TCG_REG_TMP0, 5, 19, i_offset >> 2);
2102 }
2103 qatomic_set((uint32_t *)jmp_rw, insn);
2104 flush_idcache_range(jmp_rx, jmp_rw, 4);
2105 }
2106
2107 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
2108 const TCGArg args[TCG_MAX_OP_ARGS],
2109 const int const_args[TCG_MAX_OP_ARGS])
2110 {
2111 /* 99% of the time, we can signal the use of extension registers
2112 by looking to see if the opcode handles 64-bit data. */
2113 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
2114
2115 /* Hoist the loads of the most common arguments. */
2116 TCGArg a0 = args[0];
2117 TCGArg a1 = args[1];
2118 TCGArg a2 = args[2];
2119 int c2 = const_args[2];
2120
2121 /* Some operands are defined with "rZ" constraint, a register or
2122 the zero register. These need not actually test args[I] == 0. */
2123 #define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
2124
2125 switch (opc) {
2126 case INDEX_op_goto_ptr:
2127 tcg_out_insn(s, 3207, BR, a0);
2128 break;
2129
2130 case INDEX_op_br:
2131 tcg_out_goto_label(s, arg_label(a0));
2132 break;
2133
2134 case INDEX_op_ld8u_i32:
2135 case INDEX_op_ld8u_i64:
2136 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
2137 break;
2138 case INDEX_op_ld8s_i32:
2139 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
2140 break;
2141 case INDEX_op_ld8s_i64:
2142 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
2143 break;
2144 case INDEX_op_ld16u_i32:
2145 case INDEX_op_ld16u_i64:
2146 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
2147 break;
2148 case INDEX_op_ld16s_i32:
2149 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
2150 break;
2151 case INDEX_op_ld16s_i64:
2152 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
2153 break;
2154 case INDEX_op_ld_i32:
2155 case INDEX_op_ld32u_i64:
2156 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
2157 break;
2158 case INDEX_op_ld32s_i64:
2159 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
2160 break;
2161 case INDEX_op_ld_i64:
2162 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
2163 break;
2164
2165 case INDEX_op_st8_i32:
2166 case INDEX_op_st8_i64:
2167 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
2168 break;
2169 case INDEX_op_st16_i32:
2170 case INDEX_op_st16_i64:
2171 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
2172 break;
2173 case INDEX_op_st_i32:
2174 case INDEX_op_st32_i64:
2175 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
2176 break;
2177 case INDEX_op_st_i64:
2178 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
2179 break;
2180
2181 case INDEX_op_add_i32:
2182 a2 = (int32_t)a2;
2183 /* FALLTHRU */
2184 case INDEX_op_add_i64:
2185 if (c2) {
2186 tcg_out_addsubi(s, ext, a0, a1, a2);
2187 } else {
2188 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
2189 }
2190 break;
2191
2192 case INDEX_op_sub_i32:
2193 a2 = (int32_t)a2;
2194 /* FALLTHRU */
2195 case INDEX_op_sub_i64:
2196 if (c2) {
2197 tcg_out_addsubi(s, ext, a0, a1, -a2);
2198 } else {
2199 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
2200 }
2201 break;
2202
2203 case INDEX_op_neg_i64:
2204 case INDEX_op_neg_i32:
2205 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
2206 break;
2207
2208 case INDEX_op_and_i32:
2209 a2 = (int32_t)a2;
2210 /* FALLTHRU */
2211 case INDEX_op_and_i64:
2212 if (c2) {
2213 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
2214 } else {
2215 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
2216 }
2217 break;
2218
2219 case INDEX_op_andc_i32:
2220 a2 = (int32_t)a2;
2221 /* FALLTHRU */
2222 case INDEX_op_andc_i64:
2223 if (c2) {
2224 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
2225 } else {
2226 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
2227 }
2228 break;
2229
2230 case INDEX_op_or_i32:
2231 a2 = (int32_t)a2;
2232 /* FALLTHRU */
2233 case INDEX_op_or_i64:
2234 if (c2) {
2235 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
2236 } else {
2237 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
2238 }
2239 break;
2240
2241 case INDEX_op_orc_i32:
2242 a2 = (int32_t)a2;
2243 /* FALLTHRU */
2244 case INDEX_op_orc_i64:
2245 if (c2) {
2246 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2247 } else {
2248 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2249 }
2250 break;
2251
2252 case INDEX_op_xor_i32:
2253 a2 = (int32_t)a2;
2254 /* FALLTHRU */
2255 case INDEX_op_xor_i64:
2256 if (c2) {
2257 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2258 } else {
2259 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2260 }
2261 break;
2262
2263 case INDEX_op_eqv_i32:
2264 a2 = (int32_t)a2;
2265 /* FALLTHRU */
2266 case INDEX_op_eqv_i64:
2267 if (c2) {
2268 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2269 } else {
2270 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2271 }
2272 break;
2273
2274 case INDEX_op_not_i64:
2275 case INDEX_op_not_i32:
2276 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2277 break;
2278
2279 case INDEX_op_mul_i64:
2280 case INDEX_op_mul_i32:
2281 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2282 break;
2283
2284 case INDEX_op_div_i64:
2285 case INDEX_op_div_i32:
2286 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2287 break;
2288 case INDEX_op_divu_i64:
2289 case INDEX_op_divu_i32:
2290 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2291 break;
2292
2293 case INDEX_op_rem_i64:
2294 case INDEX_op_rem_i32:
2295 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP0, a1, a2);
2296 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1);
2297 break;
2298 case INDEX_op_remu_i64:
2299 case INDEX_op_remu_i32:
2300 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP0, a1, a2);
2301 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1);
2302 break;
2303
2304 case INDEX_op_shl_i64:
2305 case INDEX_op_shl_i32:
2306 if (c2) {
2307 tcg_out_shl(s, ext, a0, a1, a2);
2308 } else {
2309 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
2310 }
2311 break;
2312
2313 case INDEX_op_shr_i64:
2314 case INDEX_op_shr_i32:
2315 if (c2) {
2316 tcg_out_shr(s, ext, a0, a1, a2);
2317 } else {
2318 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
2319 }
2320 break;
2321
2322 case INDEX_op_sar_i64:
2323 case INDEX_op_sar_i32:
2324 if (c2) {
2325 tcg_out_sar(s, ext, a0, a1, a2);
2326 } else {
2327 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
2328 }
2329 break;
2330
2331 case INDEX_op_rotr_i64:
2332 case INDEX_op_rotr_i32:
2333 if (c2) {
2334 tcg_out_rotr(s, ext, a0, a1, a2);
2335 } else {
2336 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
2337 }
2338 break;
2339
2340 case INDEX_op_rotl_i64:
2341 case INDEX_op_rotl_i32:
2342 if (c2) {
2343 tcg_out_rotl(s, ext, a0, a1, a2);
2344 } else {
2345 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP0, TCG_REG_XZR, a2);
2346 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP0);
2347 }
2348 break;
2349
2350 case INDEX_op_clz_i64:
2351 case INDEX_op_clz_i32:
2352 tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2353 break;
2354 case INDEX_op_ctz_i64:
2355 case INDEX_op_ctz_i32:
2356 tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2357 break;
2358
2359 case INDEX_op_brcond_i32:
2360 a1 = (int32_t)a1;
2361 /* FALLTHRU */
2362 case INDEX_op_brcond_i64:
2363 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
2364 break;
2365
2366 case INDEX_op_setcond_i32:
2367 a2 = (int32_t)a2;
2368 /* FALLTHRU */
2369 case INDEX_op_setcond_i64:
2370 tcg_out_cmp(s, ext, args[3], a1, a2, c2);
2371 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */
2372 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2373 TCG_REG_XZR, tcg_invert_cond(args[3]));
2374 break;
2375
2376 case INDEX_op_negsetcond_i32:
2377 a2 = (int32_t)a2;
2378 /* FALLTHRU */
2379 case INDEX_op_negsetcond_i64:
2380 tcg_out_cmp(s, ext, args[3], a1, a2, c2);
2381 /* Use CSETM alias of CSINV Wd, WZR, WZR, invert(cond). */
2382 tcg_out_insn(s, 3506, CSINV, ext, a0, TCG_REG_XZR,
2383 TCG_REG_XZR, tcg_invert_cond(args[3]));
2384 break;
2385
2386 case INDEX_op_movcond_i32:
2387 a2 = (int32_t)a2;
2388 /* FALLTHRU */
2389 case INDEX_op_movcond_i64:
2390 tcg_out_cmp(s, ext, args[5], a1, a2, c2);
2391 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2392 break;
2393
2394 case INDEX_op_qemu_ld_a32_i32:
2395 case INDEX_op_qemu_ld_a64_i32:
2396 case INDEX_op_qemu_ld_a32_i64:
2397 case INDEX_op_qemu_ld_a64_i64:
2398 tcg_out_qemu_ld(s, a0, a1, a2, ext);
2399 break;
2400 case INDEX_op_qemu_st_a32_i32:
2401 case INDEX_op_qemu_st_a64_i32:
2402 case INDEX_op_qemu_st_a32_i64:
2403 case INDEX_op_qemu_st_a64_i64:
2404 tcg_out_qemu_st(s, REG0(0), a1, a2, ext);
2405 break;
2406 case INDEX_op_qemu_ld_a32_i128:
2407 case INDEX_op_qemu_ld_a64_i128:
2408 tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], true);
2409 break;
2410 case INDEX_op_qemu_st_a32_i128:
2411 case INDEX_op_qemu_st_a64_i128:
2412 tcg_out_qemu_ldst_i128(s, REG0(0), REG0(1), a2, args[3], false);
2413 break;
2414
2415 case INDEX_op_bswap64_i64:
2416 tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
2417 break;
2418 case INDEX_op_bswap32_i64:
2419 tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2420 if (a2 & TCG_BSWAP_OS) {
2421 tcg_out_ext32s(s, a0, a0);
2422 }
2423 break;
2424 case INDEX_op_bswap32_i32:
2425 tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2426 break;
2427 case INDEX_op_bswap16_i64:
2428 case INDEX_op_bswap16_i32:
2429 tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1);
2430 if (a2 & TCG_BSWAP_OS) {
2431 /* Output must be sign-extended. */
2432 tcg_out_ext16s(s, ext, a0, a0);
2433 } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2434 /* Output must be zero-extended, but input isn't. */
2435 tcg_out_ext16u(s, a0, a0);
2436 }
2437 break;
2438
2439 case INDEX_op_deposit_i64:
2440 case INDEX_op_deposit_i32:
2441 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2442 break;
2443
2444 case INDEX_op_extract_i64:
2445 case INDEX_op_extract_i32:
2446 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2447 break;
2448
2449 case INDEX_op_sextract_i64:
2450 case INDEX_op_sextract_i32:
2451 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2452 break;
2453
2454 case INDEX_op_extract2_i64:
2455 case INDEX_op_extract2_i32:
2456 tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
2457 break;
2458
2459 case INDEX_op_add2_i32:
2460 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2461 (int32_t)args[4], args[5], const_args[4],
2462 const_args[5], false);
2463 break;
2464 case INDEX_op_add2_i64:
2465 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2466 args[5], const_args[4], const_args[5], false);
2467 break;
2468 case INDEX_op_sub2_i32:
2469 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2470 (int32_t)args[4], args[5], const_args[4],
2471 const_args[5], true);
2472 break;
2473 case INDEX_op_sub2_i64:
2474 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2475 args[5], const_args[4], const_args[5], true);
2476 break;
2477
2478 case INDEX_op_muluh_i64:
2479 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2480 break;
2481 case INDEX_op_mulsh_i64:
2482 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2483 break;
2484
2485 case INDEX_op_mb:
2486 tcg_out_mb(s, a0);
2487 break;
2488
2489 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
2490 case INDEX_op_mov_i64:
2491 case INDEX_op_call: /* Always emitted via tcg_out_call. */
2492 case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */
2493 case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */
2494 case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */
2495 case INDEX_op_ext8s_i64:
2496 case INDEX_op_ext8u_i32:
2497 case INDEX_op_ext8u_i64:
2498 case INDEX_op_ext16s_i64:
2499 case INDEX_op_ext16s_i32:
2500 case INDEX_op_ext16u_i64:
2501 case INDEX_op_ext16u_i32:
2502 case INDEX_op_ext32s_i64:
2503 case INDEX_op_ext32u_i64:
2504 case INDEX_op_ext_i32_i64:
2505 case INDEX_op_extu_i32_i64:
2506 case INDEX_op_extrl_i64_i32:
2507 default:
2508 g_assert_not_reached();
2509 }
2510
2511 #undef REG0
2512 }
2513
2514 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2515 unsigned vecl, unsigned vece,
2516 const TCGArg args[TCG_MAX_OP_ARGS],
2517 const int const_args[TCG_MAX_OP_ARGS])
2518 {
2519 static const AArch64Insn cmp_vec_insn[16] = {
2520 [TCG_COND_EQ] = I3616_CMEQ,
2521 [TCG_COND_GT] = I3616_CMGT,
2522 [TCG_COND_GE] = I3616_CMGE,
2523 [TCG_COND_GTU] = I3616_CMHI,
2524 [TCG_COND_GEU] = I3616_CMHS,
2525 };
2526 static const AArch64Insn cmp_scalar_insn[16] = {
2527 [TCG_COND_EQ] = I3611_CMEQ,
2528 [TCG_COND_GT] = I3611_CMGT,
2529 [TCG_COND_GE] = I3611_CMGE,
2530 [TCG_COND_GTU] = I3611_CMHI,
2531 [TCG_COND_GEU] = I3611_CMHS,
2532 };
2533 static const AArch64Insn cmp0_vec_insn[16] = {
2534 [TCG_COND_EQ] = I3617_CMEQ0,
2535 [TCG_COND_GT] = I3617_CMGT0,
2536 [TCG_COND_GE] = I3617_CMGE0,
2537 [TCG_COND_LT] = I3617_CMLT0,
2538 [TCG_COND_LE] = I3617_CMLE0,
2539 };
2540 static const AArch64Insn cmp0_scalar_insn[16] = {
2541 [TCG_COND_EQ] = I3612_CMEQ0,
2542 [TCG_COND_GT] = I3612_CMGT0,
2543 [TCG_COND_GE] = I3612_CMGE0,
2544 [TCG_COND_LT] = I3612_CMLT0,
2545 [TCG_COND_LE] = I3612_CMLE0,
2546 };
2547
2548 TCGType type = vecl + TCG_TYPE_V64;
2549 unsigned is_q = vecl;
2550 bool is_scalar = !is_q && vece == MO_64;
2551 TCGArg a0, a1, a2, a3;
2552 int cmode, imm8;
2553
2554 a0 = args[0];
2555 a1 = args[1];
2556 a2 = args[2];
2557
2558 switch (opc) {
2559 case INDEX_op_ld_vec:
2560 tcg_out_ld(s, type, a0, a1, a2);
2561 break;
2562 case INDEX_op_st_vec:
2563 tcg_out_st(s, type, a0, a1, a2);
2564 break;
2565 case INDEX_op_dupm_vec:
2566 tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2567 break;
2568 case INDEX_op_add_vec:
2569 if (is_scalar) {
2570 tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2);
2571 } else {
2572 tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2573 }
2574 break;
2575 case INDEX_op_sub_vec:
2576 if (is_scalar) {
2577 tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2);
2578 } else {
2579 tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2580 }
2581 break;
2582 case INDEX_op_mul_vec:
2583 tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2584 break;
2585 case INDEX_op_neg_vec:
2586 if (is_scalar) {
2587 tcg_out_insn(s, 3612, NEG, vece, a0, a1);
2588 } else {
2589 tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2590 }
2591 break;
2592 case INDEX_op_abs_vec:
2593 if (is_scalar) {
2594 tcg_out_insn(s, 3612, ABS, vece, a0, a1);
2595 } else {
2596 tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2597 }
2598 break;
2599 case INDEX_op_and_vec:
2600 if (const_args[2]) {
2601 is_shimm1632(~a2, &cmode, &imm8);
2602 if (a0 == a1) {
2603 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2604 return;
2605 }
2606 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2607 a2 = a0;
2608 }
2609 tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2610 break;
2611 case INDEX_op_or_vec:
2612 if (const_args[2]) {
2613 is_shimm1632(a2, &cmode, &imm8);
2614 if (a0 == a1) {
2615 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2616 return;
2617 }
2618 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2619 a2 = a0;
2620 }
2621 tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2622 break;
2623 case INDEX_op_andc_vec:
2624 if (const_args[2]) {
2625 is_shimm1632(a2, &cmode, &imm8);
2626 if (a0 == a1) {
2627 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2628 return;
2629 }
2630 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2631 a2 = a0;
2632 }
2633 tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2634 break;
2635 case INDEX_op_orc_vec:
2636 if (const_args[2]) {
2637 is_shimm1632(~a2, &cmode, &imm8);
2638 if (a0 == a1) {
2639 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2640 return;
2641 }
2642 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2643 a2 = a0;
2644 }
2645 tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2646 break;
2647 case INDEX_op_xor_vec:
2648 tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2649 break;
2650 case INDEX_op_ssadd_vec:
2651 if (is_scalar) {
2652 tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2);
2653 } else {
2654 tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2655 }
2656 break;
2657 case INDEX_op_sssub_vec:
2658 if (is_scalar) {
2659 tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2);
2660 } else {
2661 tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2662 }
2663 break;
2664 case INDEX_op_usadd_vec:
2665 if (is_scalar) {
2666 tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2);
2667 } else {
2668 tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2669 }
2670 break;
2671 case INDEX_op_ussub_vec:
2672 if (is_scalar) {
2673 tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2);
2674 } else {
2675 tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2676 }
2677 break;
2678 case INDEX_op_smax_vec:
2679 tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2680 break;
2681 case INDEX_op_smin_vec:
2682 tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2683 break;
2684 case INDEX_op_umax_vec:
2685 tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2686 break;
2687 case INDEX_op_umin_vec:
2688 tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2689 break;
2690 case INDEX_op_not_vec:
2691 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2692 break;
2693 case INDEX_op_shli_vec:
2694 if (is_scalar) {
2695 tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece));
2696 } else {
2697 tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2698 }
2699 break;
2700 case INDEX_op_shri_vec:
2701 if (is_scalar) {
2702 tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2);
2703 } else {
2704 tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2705 }
2706 break;
2707 case INDEX_op_sari_vec:
2708 if (is_scalar) {
2709 tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2);
2710 } else {
2711 tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2712 }
2713 break;
2714 case INDEX_op_aa64_sli_vec:
2715 if (is_scalar) {
2716 tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece));
2717 } else {
2718 tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2719 }
2720 break;
2721 case INDEX_op_shlv_vec:
2722 if (is_scalar) {
2723 tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2);
2724 } else {
2725 tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2726 }
2727 break;
2728 case INDEX_op_aa64_sshl_vec:
2729 if (is_scalar) {
2730 tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2);
2731 } else {
2732 tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2733 }
2734 break;
2735 case INDEX_op_cmp_vec:
2736 {
2737 TCGCond cond = args[3];
2738 AArch64Insn insn;
2739
2740 if (cond == TCG_COND_NE) {
2741 if (const_args[2]) {
2742 if (is_scalar) {
2743 tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1);
2744 } else {
2745 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2746 }
2747 } else {
2748 if (is_scalar) {
2749 tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2);
2750 } else {
2751 tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2752 }
2753 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2754 }
2755 } else {
2756 if (const_args[2]) {
2757 if (is_scalar) {
2758 insn = cmp0_scalar_insn[cond];
2759 if (insn) {
2760 tcg_out_insn_3612(s, insn, vece, a0, a1);
2761 break;
2762 }
2763 } else {
2764 insn = cmp0_vec_insn[cond];
2765 if (insn) {
2766 tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2767 break;
2768 }
2769 }
2770 tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP0, 0);
2771 a2 = TCG_VEC_TMP0;
2772 }
2773 if (is_scalar) {
2774 insn = cmp_scalar_insn[cond];
2775 if (insn == 0) {
2776 TCGArg t;
2777 t = a1, a1 = a2, a2 = t;
2778 cond = tcg_swap_cond(cond);
2779 insn = cmp_scalar_insn[cond];
2780 tcg_debug_assert(insn != 0);
2781 }
2782 tcg_out_insn_3611(s, insn, vece, a0, a1, a2);
2783 } else {
2784 insn = cmp_vec_insn[cond];
2785 if (insn == 0) {
2786 TCGArg t;
2787 t = a1, a1 = a2, a2 = t;
2788 cond = tcg_swap_cond(cond);
2789 insn = cmp_vec_insn[cond];
2790 tcg_debug_assert(insn != 0);
2791 }
2792 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2793 }
2794 }
2795 }
2796 break;
2797
2798 case INDEX_op_bitsel_vec:
2799 a3 = args[3];
2800 if (a0 == a3) {
2801 tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2802 } else if (a0 == a2) {
2803 tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2804 } else {
2805 if (a0 != a1) {
2806 tcg_out_mov(s, type, a0, a1);
2807 }
2808 tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2809 }
2810 break;
2811
2812 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
2813 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
2814 default:
2815 g_assert_not_reached();
2816 }
2817 }
2818
2819 int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2820 {
2821 switch (opc) {
2822 case INDEX_op_add_vec:
2823 case INDEX_op_sub_vec:
2824 case INDEX_op_and_vec:
2825 case INDEX_op_or_vec:
2826 case INDEX_op_xor_vec:
2827 case INDEX_op_andc_vec:
2828 case INDEX_op_orc_vec:
2829 case INDEX_op_neg_vec:
2830 case INDEX_op_abs_vec:
2831 case INDEX_op_not_vec:
2832 case INDEX_op_cmp_vec:
2833 case INDEX_op_shli_vec:
2834 case INDEX_op_shri_vec:
2835 case INDEX_op_sari_vec:
2836 case INDEX_op_ssadd_vec:
2837 case INDEX_op_sssub_vec:
2838 case INDEX_op_usadd_vec:
2839 case INDEX_op_ussub_vec:
2840 case INDEX_op_shlv_vec:
2841 case INDEX_op_bitsel_vec:
2842 return 1;
2843 case INDEX_op_rotli_vec:
2844 case INDEX_op_shrv_vec:
2845 case INDEX_op_sarv_vec:
2846 case INDEX_op_rotlv_vec:
2847 case INDEX_op_rotrv_vec:
2848 return -1;
2849 case INDEX_op_mul_vec:
2850 case INDEX_op_smax_vec:
2851 case INDEX_op_smin_vec:
2852 case INDEX_op_umax_vec:
2853 case INDEX_op_umin_vec:
2854 return vece < MO_64;
2855
2856 default:
2857 return 0;
2858 }
2859 }
2860
2861 void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2862 TCGArg a0, ...)
2863 {
2864 va_list va;
2865 TCGv_vec v0, v1, v2, t1, t2, c1;
2866 TCGArg a2;
2867
2868 va_start(va, a0);
2869 v0 = temp_tcgv_vec(arg_temp(a0));
2870 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2871 a2 = va_arg(va, TCGArg);
2872 va_end(va);
2873
2874 switch (opc) {
2875 case INDEX_op_rotli_vec:
2876 t1 = tcg_temp_new_vec(type);
2877 tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
2878 vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
2879 tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
2880 tcg_temp_free_vec(t1);
2881 break;
2882
2883 case INDEX_op_shrv_vec:
2884 case INDEX_op_sarv_vec:
2885 /* Right shifts are negative left shifts for AArch64. */
2886 v2 = temp_tcgv_vec(arg_temp(a2));
2887 t1 = tcg_temp_new_vec(type);
2888 tcg_gen_neg_vec(vece, t1, v2);
2889 opc = (opc == INDEX_op_shrv_vec
2890 ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2891 vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2892 tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2893 tcg_temp_free_vec(t1);
2894 break;
2895
2896 case INDEX_op_rotlv_vec:
2897 v2 = temp_tcgv_vec(arg_temp(a2));
2898 t1 = tcg_temp_new_vec(type);
2899 c1 = tcg_constant_vec(type, vece, 8 << vece);
2900 tcg_gen_sub_vec(vece, t1, v2, c1);
2901 /* Right shifts are negative left shifts for AArch64. */
2902 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2903 tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2904 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
2905 tcgv_vec_arg(v1), tcgv_vec_arg(v2));
2906 tcg_gen_or_vec(vece, v0, v0, t1);
2907 tcg_temp_free_vec(t1);
2908 break;
2909
2910 case INDEX_op_rotrv_vec:
2911 v2 = temp_tcgv_vec(arg_temp(a2));
2912 t1 = tcg_temp_new_vec(type);
2913 t2 = tcg_temp_new_vec(type);
2914 c1 = tcg_constant_vec(type, vece, 8 << vece);
2915 tcg_gen_neg_vec(vece, t1, v2);
2916 tcg_gen_sub_vec(vece, t2, c1, v2);
2917 /* Right shifts are negative left shifts for AArch64. */
2918 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2919 tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2920 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
2921 tcgv_vec_arg(v1), tcgv_vec_arg(t2));
2922 tcg_gen_or_vec(vece, v0, t1, t2);
2923 tcg_temp_free_vec(t1);
2924 tcg_temp_free_vec(t2);
2925 break;
2926
2927 default:
2928 g_assert_not_reached();
2929 }
2930 }
2931
2932 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
2933 {
2934 switch (op) {
2935 case INDEX_op_goto_ptr:
2936 return C_O0_I1(r);
2937
2938 case INDEX_op_ld8u_i32:
2939 case INDEX_op_ld8s_i32:
2940 case INDEX_op_ld16u_i32:
2941 case INDEX_op_ld16s_i32:
2942 case INDEX_op_ld_i32:
2943 case INDEX_op_ld8u_i64:
2944 case INDEX_op_ld8s_i64:
2945 case INDEX_op_ld16u_i64:
2946 case INDEX_op_ld16s_i64:
2947 case INDEX_op_ld32u_i64:
2948 case INDEX_op_ld32s_i64:
2949 case INDEX_op_ld_i64:
2950 case INDEX_op_neg_i32:
2951 case INDEX_op_neg_i64:
2952 case INDEX_op_not_i32:
2953 case INDEX_op_not_i64:
2954 case INDEX_op_bswap16_i32:
2955 case INDEX_op_bswap32_i32:
2956 case INDEX_op_bswap16_i64:
2957 case INDEX_op_bswap32_i64:
2958 case INDEX_op_bswap64_i64:
2959 case INDEX_op_ext8s_i32:
2960 case INDEX_op_ext16s_i32:
2961 case INDEX_op_ext8u_i32:
2962 case INDEX_op_ext16u_i32:
2963 case INDEX_op_ext8s_i64:
2964 case INDEX_op_ext16s_i64:
2965 case INDEX_op_ext32s_i64:
2966 case INDEX_op_ext8u_i64:
2967 case INDEX_op_ext16u_i64:
2968 case INDEX_op_ext32u_i64:
2969 case INDEX_op_ext_i32_i64:
2970 case INDEX_op_extu_i32_i64:
2971 case INDEX_op_extract_i32:
2972 case INDEX_op_extract_i64:
2973 case INDEX_op_sextract_i32:
2974 case INDEX_op_sextract_i64:
2975 return C_O1_I1(r, r);
2976
2977 case INDEX_op_st8_i32:
2978 case INDEX_op_st16_i32:
2979 case INDEX_op_st_i32:
2980 case INDEX_op_st8_i64:
2981 case INDEX_op_st16_i64:
2982 case INDEX_op_st32_i64:
2983 case INDEX_op_st_i64:
2984 return C_O0_I2(rZ, r);
2985
2986 case INDEX_op_add_i32:
2987 case INDEX_op_add_i64:
2988 case INDEX_op_sub_i32:
2989 case INDEX_op_sub_i64:
2990 return C_O1_I2(r, r, rA);
2991
2992 case INDEX_op_setcond_i32:
2993 case INDEX_op_setcond_i64:
2994 case INDEX_op_negsetcond_i32:
2995 case INDEX_op_negsetcond_i64:
2996 return C_O1_I2(r, r, rC);
2997
2998 case INDEX_op_mul_i32:
2999 case INDEX_op_mul_i64:
3000 case INDEX_op_div_i32:
3001 case INDEX_op_div_i64:
3002 case INDEX_op_divu_i32:
3003 case INDEX_op_divu_i64:
3004 case INDEX_op_rem_i32:
3005 case INDEX_op_rem_i64:
3006 case INDEX_op_remu_i32:
3007 case INDEX_op_remu_i64:
3008 case INDEX_op_muluh_i64:
3009 case INDEX_op_mulsh_i64:
3010 return C_O1_I2(r, r, r);
3011
3012 case INDEX_op_and_i32:
3013 case INDEX_op_and_i64:
3014 case INDEX_op_or_i32:
3015 case INDEX_op_or_i64:
3016 case INDEX_op_xor_i32:
3017 case INDEX_op_xor_i64:
3018 case INDEX_op_andc_i32:
3019 case INDEX_op_andc_i64:
3020 case INDEX_op_orc_i32:
3021 case INDEX_op_orc_i64:
3022 case INDEX_op_eqv_i32:
3023 case INDEX_op_eqv_i64:
3024 return C_O1_I2(r, r, rL);
3025
3026 case INDEX_op_shl_i32:
3027 case INDEX_op_shr_i32:
3028 case INDEX_op_sar_i32:
3029 case INDEX_op_rotl_i32:
3030 case INDEX_op_rotr_i32:
3031 case INDEX_op_shl_i64:
3032 case INDEX_op_shr_i64:
3033 case INDEX_op_sar_i64:
3034 case INDEX_op_rotl_i64:
3035 case INDEX_op_rotr_i64:
3036 return C_O1_I2(r, r, ri);
3037
3038 case INDEX_op_clz_i32:
3039 case INDEX_op_ctz_i32:
3040 case INDEX_op_clz_i64:
3041 case INDEX_op_ctz_i64:
3042 return C_O1_I2(r, r, rAL);
3043
3044 case INDEX_op_brcond_i32:
3045 case INDEX_op_brcond_i64:
3046 return C_O0_I2(r, rC);
3047
3048 case INDEX_op_movcond_i32:
3049 case INDEX_op_movcond_i64:
3050 return C_O1_I4(r, r, rC, rZ, rZ);
3051
3052 case INDEX_op_qemu_ld_a32_i32:
3053 case INDEX_op_qemu_ld_a64_i32:
3054 case INDEX_op_qemu_ld_a32_i64:
3055 case INDEX_op_qemu_ld_a64_i64:
3056 return C_O1_I1(r, r);
3057 case INDEX_op_qemu_ld_a32_i128:
3058 case INDEX_op_qemu_ld_a64_i128:
3059 return C_O2_I1(r, r, r);
3060 case INDEX_op_qemu_st_a32_i32:
3061 case INDEX_op_qemu_st_a64_i32:
3062 case INDEX_op_qemu_st_a32_i64:
3063 case INDEX_op_qemu_st_a64_i64:
3064 return C_O0_I2(rZ, r);
3065 case INDEX_op_qemu_st_a32_i128:
3066 case INDEX_op_qemu_st_a64_i128:
3067 return C_O0_I3(rZ, rZ, r);
3068
3069 case INDEX_op_deposit_i32:
3070 case INDEX_op_deposit_i64:
3071 return C_O1_I2(r, 0, rZ);
3072
3073 case INDEX_op_extract2_i32:
3074 case INDEX_op_extract2_i64:
3075 return C_O1_I2(r, rZ, rZ);
3076
3077 case INDEX_op_add2_i32:
3078 case INDEX_op_add2_i64:
3079 case INDEX_op_sub2_i32:
3080 case INDEX_op_sub2_i64:
3081 return C_O2_I4(r, r, rZ, rZ, rA, rMZ);
3082
3083 case INDEX_op_add_vec:
3084 case INDEX_op_sub_vec:
3085 case INDEX_op_mul_vec:
3086 case INDEX_op_xor_vec:
3087 case INDEX_op_ssadd_vec:
3088 case INDEX_op_sssub_vec:
3089 case INDEX_op_usadd_vec:
3090 case INDEX_op_ussub_vec:
3091 case INDEX_op_smax_vec:
3092 case INDEX_op_smin_vec:
3093 case INDEX_op_umax_vec:
3094 case INDEX_op_umin_vec:
3095 case INDEX_op_shlv_vec:
3096 case INDEX_op_shrv_vec:
3097 case INDEX_op_sarv_vec:
3098 case INDEX_op_aa64_sshl_vec:
3099 return C_O1_I2(w, w, w);
3100 case INDEX_op_not_vec:
3101 case INDEX_op_neg_vec:
3102 case INDEX_op_abs_vec:
3103 case INDEX_op_shli_vec:
3104 case INDEX_op_shri_vec:
3105 case INDEX_op_sari_vec:
3106 return C_O1_I1(w, w);
3107 case INDEX_op_ld_vec:
3108 case INDEX_op_dupm_vec:
3109 return C_O1_I1(w, r);
3110 case INDEX_op_st_vec:
3111 return C_O0_I2(w, r);
3112 case INDEX_op_dup_vec:
3113 return C_O1_I1(w, wr);
3114 case INDEX_op_or_vec:
3115 case INDEX_op_andc_vec:
3116 return C_O1_I2(w, w, wO);
3117 case INDEX_op_and_vec:
3118 case INDEX_op_orc_vec:
3119 return C_O1_I2(w, w, wN);
3120 case INDEX_op_cmp_vec:
3121 return C_O1_I2(w, w, wZ);
3122 case INDEX_op_bitsel_vec:
3123 return C_O1_I3(w, w, w, w);
3124 case INDEX_op_aa64_sli_vec:
3125 return C_O1_I2(w, 0, w);
3126
3127 default:
3128 g_assert_not_reached();
3129 }
3130 }
3131
3132 static void tcg_target_init(TCGContext *s)
3133 {
3134 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
3135 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
3136 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
3137 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
3138
3139 tcg_target_call_clobber_regs = -1ull;
3140 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
3141 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
3142 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
3143 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
3144 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
3145 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
3146 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
3147 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
3148 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
3149 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
3150 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
3151 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
3152 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
3153 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
3154 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
3155 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
3156 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
3157 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
3158 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
3159
3160 s->reserved_regs = 0;
3161 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
3162 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
3163 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
3164 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
3165 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
3166 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
3167 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0);
3168 }
3169
3170 /* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
3171 #define PUSH_SIZE ((30 - 19 + 1) * 8)
3172
3173 #define FRAME_SIZE \
3174 ((PUSH_SIZE \
3175 + TCG_STATIC_CALL_ARGS_SIZE \
3176 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
3177 + TCG_TARGET_STACK_ALIGN - 1) \
3178 & ~(TCG_TARGET_STACK_ALIGN - 1))
3179
3180 /* We're expecting a 2 byte uleb128 encoded value. */
3181 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3182
3183 /* We're expecting to use a single ADDI insn. */
3184 QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
3185
3186 static void tcg_target_qemu_prologue(TCGContext *s)
3187 {
3188 TCGReg r;
3189
3190 tcg_out_bti(s, BTI_C);
3191
3192 /* Push (FP, LR) and allocate space for all saved registers. */
3193 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
3194 TCG_REG_SP, -PUSH_SIZE, 1, 1);
3195
3196 /* Set up frame pointer for canonical unwinding. */
3197 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
3198
3199 /* Store callee-preserved regs x19..x28. */
3200 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
3201 int ofs = (r - TCG_REG_X19 + 2) * 8;
3202 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3203 }
3204
3205 /* Make stack space for TCG locals. */
3206 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
3207 FRAME_SIZE - PUSH_SIZE);
3208
3209 /* Inform TCG about how to find TCG locals with register, offset, size. */
3210 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
3211 CPU_TEMP_BUF_NLONGS * sizeof(long));
3212
3213 if (!tcg_use_softmmu) {
3214 /*
3215 * Note that XZR cannot be encoded in the address base register slot,
3216 * as that actually encodes SP. Depending on the guest, we may need
3217 * to zero-extend the guest address via the address index register slot,
3218 * therefore we need to load even a zero guest base into a register.
3219 */
3220 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
3221 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
3222 }
3223
3224 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
3225 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
3226
3227 /*
3228 * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
3229 * and fall through to the rest of the epilogue.
3230 */
3231 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
3232 tcg_out_bti(s, BTI_J);
3233 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
3234
3235 /* TB epilogue */
3236 tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
3237 tcg_out_bti(s, BTI_J);
3238
3239 /* Remove TCG locals stack space. */
3240 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
3241 FRAME_SIZE - PUSH_SIZE);
3242
3243 /* Restore registers x19..x28. */
3244 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
3245 int ofs = (r - TCG_REG_X19 + 2) * 8;
3246 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3247 }
3248
3249 /* Pop (FP, LR), restore SP to previous frame. */
3250 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
3251 TCG_REG_SP, PUSH_SIZE, 0, 1);
3252 tcg_out_insn(s, 3207, RET, TCG_REG_LR);
3253 }
3254
3255 static void tcg_out_tb_start(TCGContext *s)
3256 {
3257 tcg_out_bti(s, BTI_J);
3258 }
3259
3260 static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3261 {
3262 int i;
3263 for (i = 0; i < count; ++i) {
3264 p[i] = NOP;
3265 }
3266 }
3267
3268 typedef struct {
3269 DebugFrameHeader h;
3270 uint8_t fde_def_cfa[4];
3271 uint8_t fde_reg_ofs[24];
3272 } DebugFrame;
3273
3274 #define ELF_HOST_MACHINE EM_AARCH64
3275
3276 static const DebugFrame debug_frame = {
3277 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3278 .h.cie.id = -1,
3279 .h.cie.version = 1,
3280 .h.cie.code_align = 1,
3281 .h.cie.data_align = 0x78, /* sleb128 -8 */
3282 .h.cie.return_column = TCG_REG_LR,
3283
3284 /* Total FDE size does not include the "len" member. */
3285 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3286
3287 .fde_def_cfa = {
3288 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */
3289 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
3290 (FRAME_SIZE >> 7)
3291 },
3292 .fde_reg_ofs = {
3293 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */
3294 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */
3295 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */
3296 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */
3297 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */
3298 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */
3299 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */
3300 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */
3301 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */
3302 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */
3303 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */
3304 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */
3305 }
3306 };
3307
3308 void tcg_register_jit(const void *buf, size_t buf_size)
3309 {
3310 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3311 }