]> git.proxmox.com Git - mirror_qemu.git/blob - tcg/aarch64/tcg-target.c.inc
tcg/aarch64: Fix illegal insn from out-of-range shli
[mirror_qemu.git] / tcg / aarch64 / tcg-target.c.inc
1 /*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13 #include "../tcg-ldst.c.inc"
14 #include "../tcg-pool.c.inc"
15 #include "qemu/bitops.h"
16
17 /* We're going to re-use TCGType in setting of the SF bit, which controls
18 the size of the operation performed. If we know the values match, it
19 makes things much cleaner. */
20 QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
21
22 #ifdef CONFIG_DEBUG_TCG
23 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
24 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
25 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
26 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
27 "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
28
29 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
30 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
31 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
32 "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
33 };
34 #endif /* CONFIG_DEBUG_TCG */
35
36 static const int tcg_target_reg_alloc_order[] = {
37 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
38 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
39 TCG_REG_X28, /* we will reserve this for guest_base if configured */
40
41 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
42 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
43 TCG_REG_X16, TCG_REG_X17,
44
45 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
46 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
47
48 /* X18 reserved by system */
49 /* X19 reserved for AREG0 */
50 /* X29 reserved as fp */
51 /* X30 reserved as temporary */
52
53 TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
54 TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
55 /* V8 - V15 are call-saved, and skipped. */
56 TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
57 TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
58 TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
59 TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
60 };
61
62 static const int tcg_target_call_iarg_regs[8] = {
63 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
64 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
65 };
66 static const int tcg_target_call_oarg_regs[1] = {
67 TCG_REG_X0
68 };
69
70 #define TCG_REG_TMP TCG_REG_X30
71 #define TCG_VEC_TMP TCG_REG_V31
72
73 #ifndef CONFIG_SOFTMMU
74 /* Note that XZR cannot be encoded in the address base register slot,
75 as that actaully encodes SP. So if we need to zero-extend the guest
76 address, via the address index register slot, we need to load even
77 a zero guest base into a register. */
78 #define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32)
79 #define TCG_REG_GUEST_BASE TCG_REG_X28
80 #endif
81
82 static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
83 {
84 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
85 ptrdiff_t offset = target - src_rx;
86
87 if (offset == sextract64(offset, 0, 26)) {
88 /* read instruction, mask away previous PC_REL26 parameter contents,
89 set the proper offset, then write back the instruction. */
90 *src_rw = deposit32(*src_rw, 0, 26, offset);
91 return true;
92 }
93 return false;
94 }
95
96 static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
97 {
98 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
99 ptrdiff_t offset = target - src_rx;
100
101 if (offset == sextract64(offset, 0, 19)) {
102 *src_rw = deposit32(*src_rw, 5, 19, offset);
103 return true;
104 }
105 return false;
106 }
107
108 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
109 intptr_t value, intptr_t addend)
110 {
111 tcg_debug_assert(addend == 0);
112 switch (type) {
113 case R_AARCH64_JUMP26:
114 case R_AARCH64_CALL26:
115 return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
116 case R_AARCH64_CONDBR19:
117 return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
118 default:
119 g_assert_not_reached();
120 }
121 }
122
123 #define TCG_CT_CONST_AIMM 0x100
124 #define TCG_CT_CONST_LIMM 0x200
125 #define TCG_CT_CONST_ZERO 0x400
126 #define TCG_CT_CONST_MONE 0x800
127 #define TCG_CT_CONST_ORRI 0x1000
128 #define TCG_CT_CONST_ANDI 0x2000
129
130 #define ALL_GENERAL_REGS 0xffffffffu
131 #define ALL_VECTOR_REGS 0xffffffff00000000ull
132
133 #ifdef CONFIG_SOFTMMU
134 #define ALL_QLDST_REGS \
135 (ALL_GENERAL_REGS & ~((1 << TCG_REG_X0) | (1 << TCG_REG_X1) | \
136 (1 << TCG_REG_X2) | (1 << TCG_REG_X3)))
137 #else
138 #define ALL_QLDST_REGS ALL_GENERAL_REGS
139 #endif
140
141 /* Match a constant valid for addition (12-bit, optionally shifted). */
142 static inline bool is_aimm(uint64_t val)
143 {
144 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
145 }
146
147 /* Match a constant valid for logical operations. */
148 static inline bool is_limm(uint64_t val)
149 {
150 /* Taking a simplified view of the logical immediates for now, ignoring
151 the replication that can happen across the field. Match bit patterns
152 of the forms
153 0....01....1
154 0..01..10..0
155 and their inverses. */
156
157 /* Make things easier below, by testing the form with msb clear. */
158 if ((int64_t)val < 0) {
159 val = ~val;
160 }
161 if (val == 0) {
162 return false;
163 }
164 val += val & -val;
165 return (val & (val - 1)) == 0;
166 }
167
168 /* Return true if v16 is a valid 16-bit shifted immediate. */
169 static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
170 {
171 if (v16 == (v16 & 0xff)) {
172 *cmode = 0x8;
173 *imm8 = v16 & 0xff;
174 return true;
175 } else if (v16 == (v16 & 0xff00)) {
176 *cmode = 0xa;
177 *imm8 = v16 >> 8;
178 return true;
179 }
180 return false;
181 }
182
183 /* Return true if v32 is a valid 32-bit shifted immediate. */
184 static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
185 {
186 if (v32 == (v32 & 0xff)) {
187 *cmode = 0x0;
188 *imm8 = v32 & 0xff;
189 return true;
190 } else if (v32 == (v32 & 0xff00)) {
191 *cmode = 0x2;
192 *imm8 = (v32 >> 8) & 0xff;
193 return true;
194 } else if (v32 == (v32 & 0xff0000)) {
195 *cmode = 0x4;
196 *imm8 = (v32 >> 16) & 0xff;
197 return true;
198 } else if (v32 == (v32 & 0xff000000)) {
199 *cmode = 0x6;
200 *imm8 = v32 >> 24;
201 return true;
202 }
203 return false;
204 }
205
206 /* Return true if v32 is a valid 32-bit shifting ones immediate. */
207 static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
208 {
209 if ((v32 & 0xffff00ff) == 0xff) {
210 *cmode = 0xc;
211 *imm8 = (v32 >> 8) & 0xff;
212 return true;
213 } else if ((v32 & 0xff00ffff) == 0xffff) {
214 *cmode = 0xd;
215 *imm8 = (v32 >> 16) & 0xff;
216 return true;
217 }
218 return false;
219 }
220
221 /* Return true if v32 is a valid float32 immediate. */
222 static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
223 {
224 if (extract32(v32, 0, 19) == 0
225 && (extract32(v32, 25, 6) == 0x20
226 || extract32(v32, 25, 6) == 0x1f)) {
227 *cmode = 0xf;
228 *imm8 = (extract32(v32, 31, 1) << 7)
229 | (extract32(v32, 25, 1) << 6)
230 | extract32(v32, 19, 6);
231 return true;
232 }
233 return false;
234 }
235
236 /* Return true if v64 is a valid float64 immediate. */
237 static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
238 {
239 if (extract64(v64, 0, 48) == 0
240 && (extract64(v64, 54, 9) == 0x100
241 || extract64(v64, 54, 9) == 0x0ff)) {
242 *cmode = 0xf;
243 *imm8 = (extract64(v64, 63, 1) << 7)
244 | (extract64(v64, 54, 1) << 6)
245 | extract64(v64, 48, 6);
246 return true;
247 }
248 return false;
249 }
250
251 /*
252 * Return non-zero if v32 can be formed by MOVI+ORR.
253 * Place the parameters for MOVI in (cmode, imm8).
254 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
255 */
256 static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
257 {
258 int i;
259
260 for (i = 6; i > 0; i -= 2) {
261 /* Mask out one byte we can add with ORR. */
262 uint32_t tmp = v32 & ~(0xffu << (i * 4));
263 if (is_shimm32(tmp, cmode, imm8) ||
264 is_soimm32(tmp, cmode, imm8)) {
265 break;
266 }
267 }
268 return i;
269 }
270
271 /* Return true if V is a valid 16-bit or 32-bit shifted immediate. */
272 static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
273 {
274 if (v32 == deposit32(v32, 16, 16, v32)) {
275 return is_shimm16(v32, cmode, imm8);
276 } else {
277 return is_shimm32(v32, cmode, imm8);
278 }
279 }
280
281 static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
282 {
283 if (ct & TCG_CT_CONST) {
284 return 1;
285 }
286 if (type == TCG_TYPE_I32) {
287 val = (int32_t)val;
288 }
289 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
290 return 1;
291 }
292 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
293 return 1;
294 }
295 if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
296 return 1;
297 }
298 if ((ct & TCG_CT_CONST_MONE) && val == -1) {
299 return 1;
300 }
301
302 switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
303 case 0:
304 break;
305 case TCG_CT_CONST_ANDI:
306 val = ~val;
307 /* fallthru */
308 case TCG_CT_CONST_ORRI:
309 if (val == deposit64(val, 32, 32, val)) {
310 int cmode, imm8;
311 return is_shimm1632(val, &cmode, &imm8);
312 }
313 break;
314 default:
315 /* Both bits should not be set for the same insn. */
316 g_assert_not_reached();
317 }
318
319 return 0;
320 }
321
322 enum aarch64_cond_code {
323 COND_EQ = 0x0,
324 COND_NE = 0x1,
325 COND_CS = 0x2, /* Unsigned greater or equal */
326 COND_HS = COND_CS, /* ALIAS greater or equal */
327 COND_CC = 0x3, /* Unsigned less than */
328 COND_LO = COND_CC, /* ALIAS Lower */
329 COND_MI = 0x4, /* Negative */
330 COND_PL = 0x5, /* Zero or greater */
331 COND_VS = 0x6, /* Overflow */
332 COND_VC = 0x7, /* No overflow */
333 COND_HI = 0x8, /* Unsigned greater than */
334 COND_LS = 0x9, /* Unsigned less or equal */
335 COND_GE = 0xa,
336 COND_LT = 0xb,
337 COND_GT = 0xc,
338 COND_LE = 0xd,
339 COND_AL = 0xe,
340 COND_NV = 0xf, /* behaves like COND_AL here */
341 };
342
343 static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
344 [TCG_COND_EQ] = COND_EQ,
345 [TCG_COND_NE] = COND_NE,
346 [TCG_COND_LT] = COND_LT,
347 [TCG_COND_GE] = COND_GE,
348 [TCG_COND_LE] = COND_LE,
349 [TCG_COND_GT] = COND_GT,
350 /* unsigned */
351 [TCG_COND_LTU] = COND_LO,
352 [TCG_COND_GTU] = COND_HI,
353 [TCG_COND_GEU] = COND_HS,
354 [TCG_COND_LEU] = COND_LS,
355 };
356
357 typedef enum {
358 LDST_ST = 0, /* store */
359 LDST_LD = 1, /* load */
360 LDST_LD_S_X = 2, /* load and sign-extend into Xt */
361 LDST_LD_S_W = 3, /* load and sign-extend into Wt */
362 } AArch64LdstType;
363
364 /* We encode the format of the insn into the beginning of the name, so that
365 we can have the preprocessor help "typecheck" the insn vs the output
366 function. Arm didn't provide us with nice names for the formats, so we
367 use the section number of the architecture reference manual in which the
368 instruction group is described. */
369 typedef enum {
370 /* Compare and branch (immediate). */
371 I3201_CBZ = 0x34000000,
372 I3201_CBNZ = 0x35000000,
373
374 /* Conditional branch (immediate). */
375 I3202_B_C = 0x54000000,
376
377 /* Unconditional branch (immediate). */
378 I3206_B = 0x14000000,
379 I3206_BL = 0x94000000,
380
381 /* Unconditional branch (register). */
382 I3207_BR = 0xd61f0000,
383 I3207_BLR = 0xd63f0000,
384 I3207_RET = 0xd65f0000,
385
386 /* AdvSIMD load/store single structure. */
387 I3303_LD1R = 0x0d40c000,
388
389 /* Load literal for loading the address at pc-relative offset */
390 I3305_LDR = 0x58000000,
391 I3305_LDR_v64 = 0x5c000000,
392 I3305_LDR_v128 = 0x9c000000,
393
394 /* Load/store register. Described here as 3.3.12, but the helper
395 that emits them can transform to 3.3.10 or 3.3.13. */
396 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
397 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
398 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
399 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
400
401 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
402 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
403 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
404 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
405
406 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
407 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
408
409 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
410 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
411 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
412
413 I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
414 I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
415
416 I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
417 I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
418
419 I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30,
420 I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30,
421
422 I3312_TO_I3310 = 0x00200800,
423 I3312_TO_I3313 = 0x01000000,
424
425 /* Load/store register pair instructions. */
426 I3314_LDP = 0x28400000,
427 I3314_STP = 0x28000000,
428
429 /* Add/subtract immediate instructions. */
430 I3401_ADDI = 0x11000000,
431 I3401_ADDSI = 0x31000000,
432 I3401_SUBI = 0x51000000,
433 I3401_SUBSI = 0x71000000,
434
435 /* Bitfield instructions. */
436 I3402_BFM = 0x33000000,
437 I3402_SBFM = 0x13000000,
438 I3402_UBFM = 0x53000000,
439
440 /* Extract instruction. */
441 I3403_EXTR = 0x13800000,
442
443 /* Logical immediate instructions. */
444 I3404_ANDI = 0x12000000,
445 I3404_ORRI = 0x32000000,
446 I3404_EORI = 0x52000000,
447 I3404_ANDSI = 0x72000000,
448
449 /* Move wide immediate instructions. */
450 I3405_MOVN = 0x12800000,
451 I3405_MOVZ = 0x52800000,
452 I3405_MOVK = 0x72800000,
453
454 /* PC relative addressing instructions. */
455 I3406_ADR = 0x10000000,
456 I3406_ADRP = 0x90000000,
457
458 /* Add/subtract shifted register instructions (without a shift). */
459 I3502_ADD = 0x0b000000,
460 I3502_ADDS = 0x2b000000,
461 I3502_SUB = 0x4b000000,
462 I3502_SUBS = 0x6b000000,
463
464 /* Add/subtract shifted register instructions (with a shift). */
465 I3502S_ADD_LSL = I3502_ADD,
466
467 /* Add/subtract with carry instructions. */
468 I3503_ADC = 0x1a000000,
469 I3503_SBC = 0x5a000000,
470
471 /* Conditional select instructions. */
472 I3506_CSEL = 0x1a800000,
473 I3506_CSINC = 0x1a800400,
474 I3506_CSINV = 0x5a800000,
475 I3506_CSNEG = 0x5a800400,
476
477 /* Data-processing (1 source) instructions. */
478 I3507_CLZ = 0x5ac01000,
479 I3507_RBIT = 0x5ac00000,
480 I3507_REV = 0x5ac00000, /* + size << 10 */
481
482 /* Data-processing (2 source) instructions. */
483 I3508_LSLV = 0x1ac02000,
484 I3508_LSRV = 0x1ac02400,
485 I3508_ASRV = 0x1ac02800,
486 I3508_RORV = 0x1ac02c00,
487 I3508_SMULH = 0x9b407c00,
488 I3508_UMULH = 0x9bc07c00,
489 I3508_UDIV = 0x1ac00800,
490 I3508_SDIV = 0x1ac00c00,
491
492 /* Data-processing (3 source) instructions. */
493 I3509_MADD = 0x1b000000,
494 I3509_MSUB = 0x1b008000,
495
496 /* Logical shifted register instructions (without a shift). */
497 I3510_AND = 0x0a000000,
498 I3510_BIC = 0x0a200000,
499 I3510_ORR = 0x2a000000,
500 I3510_ORN = 0x2a200000,
501 I3510_EOR = 0x4a000000,
502 I3510_EON = 0x4a200000,
503 I3510_ANDS = 0x6a000000,
504
505 /* Logical shifted register instructions (with a shift). */
506 I3502S_AND_LSR = I3510_AND | (1 << 22),
507
508 /* AdvSIMD copy */
509 I3605_DUP = 0x0e000400,
510 I3605_INS = 0x4e001c00,
511 I3605_UMOV = 0x0e003c00,
512
513 /* AdvSIMD modified immediate */
514 I3606_MOVI = 0x0f000400,
515 I3606_MVNI = 0x2f000400,
516 I3606_BIC = 0x2f001400,
517 I3606_ORR = 0x0f001400,
518
519 /* AdvSIMD scalar shift by immediate */
520 I3609_SSHR = 0x5f000400,
521 I3609_SSRA = 0x5f001400,
522 I3609_SHL = 0x5f005400,
523 I3609_USHR = 0x7f000400,
524 I3609_USRA = 0x7f001400,
525 I3609_SLI = 0x7f005400,
526
527 /* AdvSIMD scalar three same */
528 I3611_SQADD = 0x5e200c00,
529 I3611_SQSUB = 0x5e202c00,
530 I3611_CMGT = 0x5e203400,
531 I3611_CMGE = 0x5e203c00,
532 I3611_SSHL = 0x5e204400,
533 I3611_ADD = 0x5e208400,
534 I3611_CMTST = 0x5e208c00,
535 I3611_UQADD = 0x7e200c00,
536 I3611_UQSUB = 0x7e202c00,
537 I3611_CMHI = 0x7e203400,
538 I3611_CMHS = 0x7e203c00,
539 I3611_USHL = 0x7e204400,
540 I3611_SUB = 0x7e208400,
541 I3611_CMEQ = 0x7e208c00,
542
543 /* AdvSIMD scalar two-reg misc */
544 I3612_CMGT0 = 0x5e208800,
545 I3612_CMEQ0 = 0x5e209800,
546 I3612_CMLT0 = 0x5e20a800,
547 I3612_ABS = 0x5e20b800,
548 I3612_CMGE0 = 0x7e208800,
549 I3612_CMLE0 = 0x7e209800,
550 I3612_NEG = 0x7e20b800,
551
552 /* AdvSIMD shift by immediate */
553 I3614_SSHR = 0x0f000400,
554 I3614_SSRA = 0x0f001400,
555 I3614_SHL = 0x0f005400,
556 I3614_SLI = 0x2f005400,
557 I3614_USHR = 0x2f000400,
558 I3614_USRA = 0x2f001400,
559
560 /* AdvSIMD three same. */
561 I3616_ADD = 0x0e208400,
562 I3616_AND = 0x0e201c00,
563 I3616_BIC = 0x0e601c00,
564 I3616_BIF = 0x2ee01c00,
565 I3616_BIT = 0x2ea01c00,
566 I3616_BSL = 0x2e601c00,
567 I3616_EOR = 0x2e201c00,
568 I3616_MUL = 0x0e209c00,
569 I3616_ORR = 0x0ea01c00,
570 I3616_ORN = 0x0ee01c00,
571 I3616_SUB = 0x2e208400,
572 I3616_CMGT = 0x0e203400,
573 I3616_CMGE = 0x0e203c00,
574 I3616_CMTST = 0x0e208c00,
575 I3616_CMHI = 0x2e203400,
576 I3616_CMHS = 0x2e203c00,
577 I3616_CMEQ = 0x2e208c00,
578 I3616_SMAX = 0x0e206400,
579 I3616_SMIN = 0x0e206c00,
580 I3616_SSHL = 0x0e204400,
581 I3616_SQADD = 0x0e200c00,
582 I3616_SQSUB = 0x0e202c00,
583 I3616_UMAX = 0x2e206400,
584 I3616_UMIN = 0x2e206c00,
585 I3616_UQADD = 0x2e200c00,
586 I3616_UQSUB = 0x2e202c00,
587 I3616_USHL = 0x2e204400,
588
589 /* AdvSIMD two-reg misc. */
590 I3617_CMGT0 = 0x0e208800,
591 I3617_CMEQ0 = 0x0e209800,
592 I3617_CMLT0 = 0x0e20a800,
593 I3617_CMGE0 = 0x2e208800,
594 I3617_CMLE0 = 0x2e209800,
595 I3617_NOT = 0x2e205800,
596 I3617_ABS = 0x0e20b800,
597 I3617_NEG = 0x2e20b800,
598
599 /* System instructions. */
600 NOP = 0xd503201f,
601 DMB_ISH = 0xd50338bf,
602 DMB_LD = 0x00000100,
603 DMB_ST = 0x00000200,
604 } AArch64Insn;
605
606 static inline uint32_t tcg_in32(TCGContext *s)
607 {
608 uint32_t v = *(uint32_t *)s->code_ptr;
609 return v;
610 }
611
612 /* Emit an opcode with "type-checking" of the format. */
613 #define tcg_out_insn(S, FMT, OP, ...) \
614 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
615
616 static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
617 TCGReg rt, TCGReg rn, unsigned size)
618 {
619 tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
620 }
621
622 static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
623 int imm19, TCGReg rt)
624 {
625 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
626 }
627
628 static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
629 TCGReg rt, int imm19)
630 {
631 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
632 }
633
634 static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
635 TCGCond c, int imm19)
636 {
637 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
638 }
639
640 static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
641 {
642 tcg_out32(s, insn | (imm26 & 0x03ffffff));
643 }
644
645 static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
646 {
647 tcg_out32(s, insn | rn << 5);
648 }
649
650 static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
651 TCGReg r1, TCGReg r2, TCGReg rn,
652 tcg_target_long ofs, bool pre, bool w)
653 {
654 insn |= 1u << 31; /* ext */
655 insn |= pre << 24;
656 insn |= w << 23;
657
658 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
659 insn |= (ofs & (0x7f << 3)) << (15 - 3);
660
661 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
662 }
663
664 static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
665 TCGReg rd, TCGReg rn, uint64_t aimm)
666 {
667 if (aimm > 0xfff) {
668 tcg_debug_assert((aimm & 0xfff) == 0);
669 aimm >>= 12;
670 tcg_debug_assert(aimm <= 0xfff);
671 aimm |= 1 << 12; /* apply LSL 12 */
672 }
673 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
674 }
675
676 /* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
677 (Logical immediate). Both insn groups have N, IMMR and IMMS fields
678 that feed the DecodeBitMasks pseudo function. */
679 static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
680 TCGReg rd, TCGReg rn, int n, int immr, int imms)
681 {
682 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
683 | rn << 5 | rd);
684 }
685
686 #define tcg_out_insn_3404 tcg_out_insn_3402
687
688 static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
689 TCGReg rd, TCGReg rn, TCGReg rm, int imms)
690 {
691 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
692 | rn << 5 | rd);
693 }
694
695 /* This function is used for the Move (wide immediate) instruction group.
696 Note that SHIFT is a full shift count, not the 2 bit HW field. */
697 static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
698 TCGReg rd, uint16_t half, unsigned shift)
699 {
700 tcg_debug_assert((shift & ~0x30) == 0);
701 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
702 }
703
704 static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
705 TCGReg rd, int64_t disp)
706 {
707 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
708 }
709
710 /* This function is for both 3.5.2 (Add/Subtract shifted register), for
711 the rare occasion when we actually want to supply a shift amount. */
712 static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
713 TCGType ext, TCGReg rd, TCGReg rn,
714 TCGReg rm, int imm6)
715 {
716 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
717 }
718
719 /* This function is for 3.5.2 (Add/subtract shifted register),
720 and 3.5.10 (Logical shifted register), for the vast majorty of cases
721 when we don't want to apply a shift. Thus it can also be used for
722 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */
723 static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
724 TCGReg rd, TCGReg rn, TCGReg rm)
725 {
726 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
727 }
728
729 #define tcg_out_insn_3503 tcg_out_insn_3502
730 #define tcg_out_insn_3508 tcg_out_insn_3502
731 #define tcg_out_insn_3510 tcg_out_insn_3502
732
733 static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
734 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
735 {
736 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
737 | tcg_cond_to_aarch64[c] << 12);
738 }
739
740 static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
741 TCGReg rd, TCGReg rn)
742 {
743 tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
744 }
745
746 static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
747 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
748 {
749 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
750 }
751
752 static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
753 TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
754 {
755 /* Note that bit 11 set means general register input. Therefore
756 we can handle both register sets with one function. */
757 tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
758 | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
759 }
760
761 static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
762 TCGReg rd, bool op, int cmode, uint8_t imm8)
763 {
764 tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
765 | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
766 }
767
768 static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn,
769 TCGReg rd, TCGReg rn, unsigned immhb)
770 {
771 tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f));
772 }
773
774 static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn,
775 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
776 {
777 tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16
778 | (rn & 0x1f) << 5 | (rd & 0x1f));
779 }
780
781 static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn,
782 unsigned size, TCGReg rd, TCGReg rn)
783 {
784 tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f));
785 }
786
787 static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
788 TCGReg rd, TCGReg rn, unsigned immhb)
789 {
790 tcg_out32(s, insn | q << 30 | immhb << 16
791 | (rn & 0x1f) << 5 | (rd & 0x1f));
792 }
793
794 static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
795 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
796 {
797 tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
798 | (rn & 0x1f) << 5 | (rd & 0x1f));
799 }
800
801 static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
802 unsigned size, TCGReg rd, TCGReg rn)
803 {
804 tcg_out32(s, insn | q << 30 | (size << 22)
805 | (rn & 0x1f) << 5 | (rd & 0x1f));
806 }
807
808 static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
809 TCGReg rd, TCGReg base, TCGType ext,
810 TCGReg regoff)
811 {
812 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
813 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
814 0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
815 }
816
817 static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
818 TCGReg rd, TCGReg rn, intptr_t offset)
819 {
820 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
821 }
822
823 static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
824 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
825 {
826 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
827 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
828 | rn << 5 | (rd & 0x1f));
829 }
830
831 /* Register to register move using ORR (shifted register with no shift). */
832 static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
833 {
834 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
835 }
836
837 /* Register to register move using ADDI (move to/from SP). */
838 static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
839 {
840 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
841 }
842
843 /* This function is used for the Logical (immediate) instruction group.
844 The value of LIMM must satisfy IS_LIMM. See the comment above about
845 only supporting simplified logical immediates. */
846 static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
847 TCGReg rd, TCGReg rn, uint64_t limm)
848 {
849 unsigned h, l, r, c;
850
851 tcg_debug_assert(is_limm(limm));
852
853 h = clz64(limm);
854 l = ctz64(limm);
855 if (l == 0) {
856 r = 0; /* form 0....01....1 */
857 c = ctz64(~limm) - 1;
858 if (h == 0) {
859 r = clz64(~limm); /* form 1..10..01..1 */
860 c += r;
861 }
862 } else {
863 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */
864 c = r - h - 1;
865 }
866 if (ext == TCG_TYPE_I32) {
867 r &= 31;
868 c &= 31;
869 }
870
871 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
872 }
873
874 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
875 TCGReg rd, int64_t v64)
876 {
877 bool q = type == TCG_TYPE_V128;
878 int cmode, imm8, i;
879
880 /* Test all bytes equal first. */
881 if (vece == MO_8) {
882 imm8 = (uint8_t)v64;
883 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
884 return;
885 }
886
887 /*
888 * Test all bytes 0x00 or 0xff second. This can match cases that
889 * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
890 */
891 for (i = imm8 = 0; i < 8; i++) {
892 uint8_t byte = v64 >> (i * 8);
893 if (byte == 0xff) {
894 imm8 |= 1 << i;
895 } else if (byte != 0) {
896 goto fail_bytes;
897 }
898 }
899 tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
900 return;
901 fail_bytes:
902
903 /*
904 * Tests for various replications. For each element width, if we
905 * cannot find an expansion there's no point checking a larger
906 * width because we already know by replication it cannot match.
907 */
908 if (vece == MO_16) {
909 uint16_t v16 = v64;
910
911 if (is_shimm16(v16, &cmode, &imm8)) {
912 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
913 return;
914 }
915 if (is_shimm16(~v16, &cmode, &imm8)) {
916 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
917 return;
918 }
919
920 /*
921 * Otherwise, all remaining constants can be loaded in two insns:
922 * rd = v16 & 0xff, rd |= v16 & 0xff00.
923 */
924 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
925 tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
926 return;
927 } else if (vece == MO_32) {
928 uint32_t v32 = v64;
929 uint32_t n32 = ~v32;
930
931 if (is_shimm32(v32, &cmode, &imm8) ||
932 is_soimm32(v32, &cmode, &imm8) ||
933 is_fimm32(v32, &cmode, &imm8)) {
934 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
935 return;
936 }
937 if (is_shimm32(n32, &cmode, &imm8) ||
938 is_soimm32(n32, &cmode, &imm8)) {
939 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
940 return;
941 }
942
943 /*
944 * Restrict the set of constants to those we can load with
945 * two instructions. Others we load from the pool.
946 */
947 i = is_shimm32_pair(v32, &cmode, &imm8);
948 if (i) {
949 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
950 tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
951 return;
952 }
953 i = is_shimm32_pair(n32, &cmode, &imm8);
954 if (i) {
955 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
956 tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
957 return;
958 }
959 } else if (is_fimm64(v64, &cmode, &imm8)) {
960 tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
961 return;
962 }
963
964 /*
965 * As a last resort, load from the constant pool. Sadly there
966 * is no LD1R (literal), so store the full 16-byte vector.
967 */
968 if (type == TCG_TYPE_V128) {
969 new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
970 tcg_out_insn(s, 3305, LDR_v128, 0, rd);
971 } else {
972 new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
973 tcg_out_insn(s, 3305, LDR_v64, 0, rd);
974 }
975 }
976
977 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
978 TCGReg rd, TCGReg rs)
979 {
980 int is_q = type - TCG_TYPE_V64;
981 tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
982 return true;
983 }
984
985 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
986 TCGReg r, TCGReg base, intptr_t offset)
987 {
988 TCGReg temp = TCG_REG_TMP;
989
990 if (offset < -0xffffff || offset > 0xffffff) {
991 tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
992 tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
993 base = temp;
994 } else {
995 AArch64Insn add_insn = I3401_ADDI;
996
997 if (offset < 0) {
998 add_insn = I3401_SUBI;
999 offset = -offset;
1000 }
1001 if (offset & 0xfff000) {
1002 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
1003 base = temp;
1004 }
1005 if (offset & 0xfff) {
1006 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
1007 base = temp;
1008 }
1009 }
1010 tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
1011 return true;
1012 }
1013
1014 static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
1015 tcg_target_long value)
1016 {
1017 tcg_target_long svalue = value;
1018 tcg_target_long ivalue = ~value;
1019 tcg_target_long t0, t1, t2;
1020 int s0, s1;
1021 AArch64Insn opc;
1022
1023 switch (type) {
1024 case TCG_TYPE_I32:
1025 case TCG_TYPE_I64:
1026 tcg_debug_assert(rd < 32);
1027 break;
1028 default:
1029 g_assert_not_reached();
1030 }
1031
1032 /* For 32-bit values, discard potential garbage in value. For 64-bit
1033 values within [2**31, 2**32-1], we can create smaller sequences by
1034 interpreting this as a negative 32-bit number, while ensuring that
1035 the high 32 bits are cleared by setting SF=0. */
1036 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1037 svalue = (int32_t)value;
1038 value = (uint32_t)value;
1039 ivalue = (uint32_t)ivalue;
1040 type = TCG_TYPE_I32;
1041 }
1042
1043 /* Speed things up by handling the common case of small positive
1044 and negative values specially. */
1045 if ((value & ~0xffffull) == 0) {
1046 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1047 return;
1048 } else if ((ivalue & ~0xffffull) == 0) {
1049 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1050 return;
1051 }
1052
1053 /* Check for bitfield immediates. For the benefit of 32-bit quantities,
1054 use the sign-extended value. That lets us match rotated values such
1055 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1056 if (is_limm(svalue)) {
1057 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1058 return;
1059 }
1060
1061 /* Look for host pointer values within 4G of the PC. This happens
1062 often when loading pointers to QEMU's own data structures. */
1063 if (type == TCG_TYPE_I64) {
1064 intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
1065 tcg_target_long disp = value - src_rx;
1066 if (disp == sextract64(disp, 0, 21)) {
1067 tcg_out_insn(s, 3406, ADR, rd, disp);
1068 return;
1069 }
1070 disp = (value >> 12) - (src_rx >> 12);
1071 if (disp == sextract64(disp, 0, 21)) {
1072 tcg_out_insn(s, 3406, ADRP, rd, disp);
1073 if (value & 0xfff) {
1074 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1075 }
1076 return;
1077 }
1078 }
1079
1080 /* Would it take fewer insns to begin with MOVN? */
1081 if (ctpop64(value) >= 32) {
1082 t0 = ivalue;
1083 opc = I3405_MOVN;
1084 } else {
1085 t0 = value;
1086 opc = I3405_MOVZ;
1087 }
1088 s0 = ctz64(t0) & (63 & -16);
1089 t1 = t0 & ~(0xffffull << s0);
1090 s1 = ctz64(t1) & (63 & -16);
1091 t2 = t1 & ~(0xffffull << s1);
1092 if (t2 == 0) {
1093 tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1094 if (t1 != 0) {
1095 tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1096 }
1097 return;
1098 }
1099
1100 /* For more than 2 insns, dump it into the constant pool. */
1101 new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1102 tcg_out_insn(s, 3305, LDR, 0, rd);
1103 }
1104
1105 /* Define something more legible for general use. */
1106 #define tcg_out_ldst_r tcg_out_insn_3310
1107
1108 static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1109 TCGReg rn, intptr_t offset, int lgsize)
1110 {
1111 /* If the offset is naturally aligned and in range, then we can
1112 use the scaled uimm12 encoding */
1113 if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1114 uintptr_t scaled_uimm = offset >> lgsize;
1115 if (scaled_uimm <= 0xfff) {
1116 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1117 return;
1118 }
1119 }
1120
1121 /* Small signed offsets can use the unscaled encoding. */
1122 if (offset >= -256 && offset < 256) {
1123 tcg_out_insn_3312(s, insn, rd, rn, offset);
1124 return;
1125 }
1126
1127 /* Worst-case scenario, move offset to temp register, use reg offset. */
1128 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
1129 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
1130 }
1131
1132 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1133 {
1134 if (ret == arg) {
1135 return true;
1136 }
1137 switch (type) {
1138 case TCG_TYPE_I32:
1139 case TCG_TYPE_I64:
1140 if (ret < 32 && arg < 32) {
1141 tcg_out_movr(s, type, ret, arg);
1142 break;
1143 } else if (ret < 32) {
1144 tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1145 break;
1146 } else if (arg < 32) {
1147 tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1148 break;
1149 }
1150 /* FALLTHRU */
1151
1152 case TCG_TYPE_V64:
1153 tcg_debug_assert(ret >= 32 && arg >= 32);
1154 tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1155 break;
1156 case TCG_TYPE_V128:
1157 tcg_debug_assert(ret >= 32 && arg >= 32);
1158 tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1159 break;
1160
1161 default:
1162 g_assert_not_reached();
1163 }
1164 return true;
1165 }
1166
1167 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1168 TCGReg base, intptr_t ofs)
1169 {
1170 AArch64Insn insn;
1171 int lgsz;
1172
1173 switch (type) {
1174 case TCG_TYPE_I32:
1175 insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1176 lgsz = 2;
1177 break;
1178 case TCG_TYPE_I64:
1179 insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1180 lgsz = 3;
1181 break;
1182 case TCG_TYPE_V64:
1183 insn = I3312_LDRVD;
1184 lgsz = 3;
1185 break;
1186 case TCG_TYPE_V128:
1187 insn = I3312_LDRVQ;
1188 lgsz = 4;
1189 break;
1190 default:
1191 g_assert_not_reached();
1192 }
1193 tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1194 }
1195
1196 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1197 TCGReg base, intptr_t ofs)
1198 {
1199 AArch64Insn insn;
1200 int lgsz;
1201
1202 switch (type) {
1203 case TCG_TYPE_I32:
1204 insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1205 lgsz = 2;
1206 break;
1207 case TCG_TYPE_I64:
1208 insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1209 lgsz = 3;
1210 break;
1211 case TCG_TYPE_V64:
1212 insn = I3312_STRVD;
1213 lgsz = 3;
1214 break;
1215 case TCG_TYPE_V128:
1216 insn = I3312_STRVQ;
1217 lgsz = 4;
1218 break;
1219 default:
1220 g_assert_not_reached();
1221 }
1222 tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1223 }
1224
1225 static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1226 TCGReg base, intptr_t ofs)
1227 {
1228 if (type <= TCG_TYPE_I64 && val == 0) {
1229 tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1230 return true;
1231 }
1232 return false;
1233 }
1234
1235 static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1236 TCGReg rn, unsigned int a, unsigned int b)
1237 {
1238 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1239 }
1240
1241 static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1242 TCGReg rn, unsigned int a, unsigned int b)
1243 {
1244 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1245 }
1246
1247 static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1248 TCGReg rn, unsigned int a, unsigned int b)
1249 {
1250 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1251 }
1252
1253 static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1254 TCGReg rn, TCGReg rm, unsigned int a)
1255 {
1256 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1257 }
1258
1259 static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1260 TCGReg rd, TCGReg rn, unsigned int m)
1261 {
1262 int bits = ext ? 64 : 32;
1263 int max = bits - 1;
1264 tcg_out_ubfm(s, ext, rd, rn, (bits - m) & max, (max - m) & max);
1265 }
1266
1267 static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1268 TCGReg rd, TCGReg rn, unsigned int m)
1269 {
1270 int max = ext ? 63 : 31;
1271 tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1272 }
1273
1274 static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1275 TCGReg rd, TCGReg rn, unsigned int m)
1276 {
1277 int max = ext ? 63 : 31;
1278 tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1279 }
1280
1281 static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1282 TCGReg rd, TCGReg rn, unsigned int m)
1283 {
1284 int max = ext ? 63 : 31;
1285 tcg_out_extr(s, ext, rd, rn, rn, m & max);
1286 }
1287
1288 static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1289 TCGReg rd, TCGReg rn, unsigned int m)
1290 {
1291 int max = ext ? 63 : 31;
1292 tcg_out_extr(s, ext, rd, rn, rn, -m & max);
1293 }
1294
1295 static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1296 TCGReg rn, unsigned lsb, unsigned width)
1297 {
1298 unsigned size = ext ? 64 : 32;
1299 unsigned a = (size - lsb) & (size - 1);
1300 unsigned b = width - 1;
1301 tcg_out_bfm(s, ext, rd, rn, a, b);
1302 }
1303
1304 static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1305 tcg_target_long b, bool const_b)
1306 {
1307 if (const_b) {
1308 /* Using CMP or CMN aliases. */
1309 if (b >= 0) {
1310 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1311 } else {
1312 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1313 }
1314 } else {
1315 /* Using CMP alias SUBS wzr, Wn, Wm */
1316 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1317 }
1318 }
1319
1320 static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1321 {
1322 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1323 tcg_debug_assert(offset == sextract64(offset, 0, 26));
1324 tcg_out_insn(s, 3206, B, offset);
1325 }
1326
1327 static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target)
1328 {
1329 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1330 if (offset == sextract64(offset, 0, 26)) {
1331 tcg_out_insn(s, 3206, B, offset);
1332 } else {
1333 /* Choose X9 as a call-clobbered non-LR temporary. */
1334 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X9, (intptr_t)target);
1335 tcg_out_insn(s, 3207, BR, TCG_REG_X9);
1336 }
1337 }
1338
1339 static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
1340 {
1341 tcg_out_insn(s, 3207, BLR, reg);
1342 }
1343
1344 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target)
1345 {
1346 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1347 if (offset == sextract64(offset, 0, 26)) {
1348 tcg_out_insn(s, 3206, BL, offset);
1349 } else {
1350 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1351 tcg_out_callr(s, TCG_REG_TMP);
1352 }
1353 }
1354
1355 void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
1356 uintptr_t jmp_rw, uintptr_t addr)
1357 {
1358 tcg_insn_unit i1, i2;
1359 TCGType rt = TCG_TYPE_I64;
1360 TCGReg rd = TCG_REG_TMP;
1361 uint64_t pair;
1362
1363 ptrdiff_t offset = addr - jmp_rx;
1364
1365 if (offset == sextract64(offset, 0, 26)) {
1366 i1 = I3206_B | ((offset >> 2) & 0x3ffffff);
1367 i2 = NOP;
1368 } else {
1369 offset = (addr >> 12) - (jmp_rx >> 12);
1370
1371 /* patch ADRP */
1372 i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd;
1373 /* patch ADDI */
1374 i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd;
1375 }
1376 pair = (uint64_t)i2 << 32 | i1;
1377 qatomic_set((uint64_t *)jmp_rw, pair);
1378 flush_idcache_range(jmp_rx, jmp_rw, 8);
1379 }
1380
1381 static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1382 {
1383 if (!l->has_value) {
1384 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1385 tcg_out_insn(s, 3206, B, 0);
1386 } else {
1387 tcg_out_goto(s, l->u.value_ptr);
1388 }
1389 }
1390
1391 static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1392 TCGArg b, bool b_const, TCGLabel *l)
1393 {
1394 intptr_t offset;
1395 bool need_cmp;
1396
1397 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1398 need_cmp = false;
1399 } else {
1400 need_cmp = true;
1401 tcg_out_cmp(s, ext, a, b, b_const);
1402 }
1403
1404 if (!l->has_value) {
1405 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1406 offset = tcg_in32(s) >> 5;
1407 } else {
1408 offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2;
1409 tcg_debug_assert(offset == sextract64(offset, 0, 19));
1410 }
1411
1412 if (need_cmp) {
1413 tcg_out_insn(s, 3202, B_C, c, offset);
1414 } else if (c == TCG_COND_EQ) {
1415 tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1416 } else {
1417 tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1418 }
1419 }
1420
1421 static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits,
1422 TCGReg rd, TCGReg rn)
1423 {
1424 /* REV, REV16, REV32 */
1425 tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn);
1426 }
1427
1428 static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
1429 TCGReg rd, TCGReg rn)
1430 {
1431 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1432 int bits = (8 << s_bits) - 1;
1433 tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1434 }
1435
1436 static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
1437 TCGReg rd, TCGReg rn)
1438 {
1439 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1440 int bits = (8 << s_bits) - 1;
1441 tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1442 }
1443
1444 static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1445 TCGReg rn, int64_t aimm)
1446 {
1447 if (aimm >= 0) {
1448 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1449 } else {
1450 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1451 }
1452 }
1453
1454 static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1455 TCGReg rh, TCGReg al, TCGReg ah,
1456 tcg_target_long bl, tcg_target_long bh,
1457 bool const_bl, bool const_bh, bool sub)
1458 {
1459 TCGReg orig_rl = rl;
1460 AArch64Insn insn;
1461
1462 if (rl == ah || (!const_bh && rl == bh)) {
1463 rl = TCG_REG_TMP;
1464 }
1465
1466 if (const_bl) {
1467 if (bl < 0) {
1468 bl = -bl;
1469 insn = sub ? I3401_ADDSI : I3401_SUBSI;
1470 } else {
1471 insn = sub ? I3401_SUBSI : I3401_ADDSI;
1472 }
1473
1474 if (unlikely(al == TCG_REG_XZR)) {
1475 /* ??? We want to allow al to be zero for the benefit of
1476 negation via subtraction. However, that leaves open the
1477 possibility of adding 0+const in the low part, and the
1478 immediate add instructions encode XSP not XZR. Don't try
1479 anything more elaborate here than loading another zero. */
1480 al = TCG_REG_TMP;
1481 tcg_out_movi(s, ext, al, 0);
1482 }
1483 tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1484 } else {
1485 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1486 }
1487
1488 insn = I3503_ADC;
1489 if (const_bh) {
1490 /* Note that the only two constants we support are 0 and -1, and
1491 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
1492 if ((bh != 0) ^ sub) {
1493 insn = I3503_SBC;
1494 }
1495 bh = TCG_REG_XZR;
1496 } else if (sub) {
1497 insn = I3503_SBC;
1498 }
1499 tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1500
1501 tcg_out_mov(s, ext, orig_rl, rl);
1502 }
1503
1504 static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1505 {
1506 static const uint32_t sync[] = {
1507 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST,
1508 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST,
1509 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1510 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD,
1511 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1512 };
1513 tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1514 }
1515
1516 static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1517 TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1518 {
1519 TCGReg a1 = a0;
1520 if (is_ctz) {
1521 a1 = TCG_REG_TMP;
1522 tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1523 }
1524 if (const_b && b == (ext ? 64 : 32)) {
1525 tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1526 } else {
1527 AArch64Insn sel = I3506_CSEL;
1528
1529 tcg_out_cmp(s, ext, a0, 0, 1);
1530 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1531
1532 if (const_b) {
1533 if (b == -1) {
1534 b = TCG_REG_XZR;
1535 sel = I3506_CSINV;
1536 } else if (b == 0) {
1537 b = TCG_REG_XZR;
1538 } else {
1539 tcg_out_movi(s, ext, d, b);
1540 b = d;
1541 }
1542 }
1543 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1544 }
1545 }
1546
1547 static void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
1548 {
1549 ptrdiff_t offset = tcg_pcrel_diff(s, target);
1550 tcg_debug_assert(offset == sextract64(offset, 0, 21));
1551 tcg_out_insn(s, 3406, ADR, rd, offset);
1552 }
1553
1554 #ifdef CONFIG_SOFTMMU
1555 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1556 * MemOpIdx oi, uintptr_t ra)
1557 */
1558 static void * const qemu_ld_helpers[MO_SIZE + 1] = {
1559 [MO_8] = helper_ret_ldub_mmu,
1560 #if HOST_BIG_ENDIAN
1561 [MO_16] = helper_be_lduw_mmu,
1562 [MO_32] = helper_be_ldul_mmu,
1563 [MO_64] = helper_be_ldq_mmu,
1564 #else
1565 [MO_16] = helper_le_lduw_mmu,
1566 [MO_32] = helper_le_ldul_mmu,
1567 [MO_64] = helper_le_ldq_mmu,
1568 #endif
1569 };
1570
1571 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1572 * uintxx_t val, MemOpIdx oi,
1573 * uintptr_t ra)
1574 */
1575 static void * const qemu_st_helpers[MO_SIZE + 1] = {
1576 [MO_8] = helper_ret_stb_mmu,
1577 #if HOST_BIG_ENDIAN
1578 [MO_16] = helper_be_stw_mmu,
1579 [MO_32] = helper_be_stl_mmu,
1580 [MO_64] = helper_be_stq_mmu,
1581 #else
1582 [MO_16] = helper_le_stw_mmu,
1583 [MO_32] = helper_le_stl_mmu,
1584 [MO_64] = helper_le_stq_mmu,
1585 #endif
1586 };
1587
1588 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1589 {
1590 MemOpIdx oi = lb->oi;
1591 MemOp opc = get_memop(oi);
1592 MemOp size = opc & MO_SIZE;
1593
1594 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1595 return false;
1596 }
1597
1598 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1599 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1600 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1601 tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1602 tcg_out_call(s, qemu_ld_helpers[opc & MO_SIZE]);
1603 if (opc & MO_SIGN) {
1604 tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1605 } else {
1606 tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1607 }
1608
1609 tcg_out_goto(s, lb->raddr);
1610 return true;
1611 }
1612
1613 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1614 {
1615 MemOpIdx oi = lb->oi;
1616 MemOp opc = get_memop(oi);
1617 MemOp size = opc & MO_SIZE;
1618
1619 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1620 return false;
1621 }
1622
1623 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1624 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1625 tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1626 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1627 tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1628 tcg_out_call(s, qemu_st_helpers[opc & MO_SIZE]);
1629 tcg_out_goto(s, lb->raddr);
1630 return true;
1631 }
1632
1633 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
1634 TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1635 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1636 {
1637 TCGLabelQemuLdst *label = new_ldst_label(s);
1638
1639 label->is_ld = is_ld;
1640 label->oi = oi;
1641 label->type = ext;
1642 label->datalo_reg = data_reg;
1643 label->addrlo_reg = addr_reg;
1644 label->raddr = tcg_splitwx_to_rx(raddr);
1645 label->label_ptr[0] = label_ptr;
1646 }
1647
1648 /* We expect to use a 7-bit scaled negative offset from ENV. */
1649 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1650 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
1651
1652 /* These offsets are built into the LDP below. */
1653 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1654 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1655
1656 /* Load and compare a TLB entry, emitting the conditional jump to the
1657 slow path for the failure case, which will be patched later when finalizing
1658 the slow path. Generated code returns the host addend in X1,
1659 clobbers X0,X2,X3,TMP. */
1660 static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
1661 tcg_insn_unit **label_ptr, int mem_index,
1662 bool is_read)
1663 {
1664 unsigned a_bits = get_alignment_bits(opc);
1665 unsigned s_bits = opc & MO_SIZE;
1666 unsigned a_mask = (1u << a_bits) - 1;
1667 unsigned s_mask = (1u << s_bits) - 1;
1668 TCGReg x3;
1669 TCGType mask_type;
1670 uint64_t compare_mask;
1671
1672 mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
1673 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1674
1675 /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */
1676 tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
1677 TLB_MASK_TABLE_OFS(mem_index), 1, 0);
1678
1679 /* Extract the TLB index from the address into X0. */
1680 tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1681 TCG_REG_X0, TCG_REG_X0, addr_reg,
1682 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1683
1684 /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */
1685 tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
1686
1687 /* Load the tlb comparator into X0, and the fast path addend into X1. */
1688 tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read
1689 ? offsetof(CPUTLBEntry, addr_read)
1690 : offsetof(CPUTLBEntry, addr_write));
1691 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
1692 offsetof(CPUTLBEntry, addend));
1693
1694 /* For aligned accesses, we check the first byte and include the alignment
1695 bits within the address. For unaligned access, we check that we don't
1696 cross pages using the address of the last byte of the access. */
1697 if (a_bits >= s_bits) {
1698 x3 = addr_reg;
1699 } else {
1700 tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1701 TCG_REG_X3, addr_reg, s_mask - a_mask);
1702 x3 = TCG_REG_X3;
1703 }
1704 compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1705
1706 /* Store the page mask part of the address into X3. */
1707 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1708 TCG_REG_X3, x3, compare_mask);
1709
1710 /* Perform the address comparison. */
1711 tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
1712
1713 /* If not equal, we jump to the slow path. */
1714 *label_ptr = s->code_ptr;
1715 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1716 }
1717
1718 #else
1719 static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addr_reg,
1720 unsigned a_bits)
1721 {
1722 unsigned a_mask = (1 << a_bits) - 1;
1723 TCGLabelQemuLdst *label = new_ldst_label(s);
1724
1725 label->is_ld = is_ld;
1726 label->addrlo_reg = addr_reg;
1727
1728 /* tst addr, #mask */
1729 tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
1730
1731 label->label_ptr[0] = s->code_ptr;
1732
1733 /* b.ne slow_path */
1734 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1735
1736 label->raddr = tcg_splitwx_to_rx(s->code_ptr);
1737 }
1738
1739 static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
1740 {
1741 if (!reloc_pc19(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1742 return false;
1743 }
1744
1745 tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_X1, l->addrlo_reg);
1746 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1747
1748 /* "Tail call" to the helper, with the return address back inline. */
1749 tcg_out_adr(s, TCG_REG_LR, l->raddr);
1750 tcg_out_goto_long(s, (const void *)(l->is_ld ? helper_unaligned_ld
1751 : helper_unaligned_st));
1752 return true;
1753 }
1754
1755 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1756 {
1757 return tcg_out_fail_alignment(s, l);
1758 }
1759
1760 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1761 {
1762 return tcg_out_fail_alignment(s, l);
1763 }
1764 #endif /* CONFIG_SOFTMMU */
1765
1766 static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
1767 TCGReg data_r, TCGReg addr_r,
1768 TCGType otype, TCGReg off_r)
1769 {
1770 switch (memop & MO_SSIZE) {
1771 case MO_UB:
1772 tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1773 break;
1774 case MO_SB:
1775 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1776 data_r, addr_r, otype, off_r);
1777 break;
1778 case MO_UW:
1779 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1780 break;
1781 case MO_SW:
1782 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1783 data_r, addr_r, otype, off_r);
1784 break;
1785 case MO_UL:
1786 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1787 break;
1788 case MO_SL:
1789 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1790 break;
1791 case MO_UQ:
1792 tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1793 break;
1794 default:
1795 tcg_abort();
1796 }
1797 }
1798
1799 static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
1800 TCGReg data_r, TCGReg addr_r,
1801 TCGType otype, TCGReg off_r)
1802 {
1803 switch (memop & MO_SIZE) {
1804 case MO_8:
1805 tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1806 break;
1807 case MO_16:
1808 tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1809 break;
1810 case MO_32:
1811 tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1812 break;
1813 case MO_64:
1814 tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1815 break;
1816 default:
1817 tcg_abort();
1818 }
1819 }
1820
1821 static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1822 MemOpIdx oi, TCGType ext)
1823 {
1824 MemOp memop = get_memop(oi);
1825 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1826
1827 /* Byte swapping is left to middle-end expansion. */
1828 tcg_debug_assert((memop & MO_BSWAP) == 0);
1829
1830 #ifdef CONFIG_SOFTMMU
1831 unsigned mem_index = get_mmuidx(oi);
1832 tcg_insn_unit *label_ptr;
1833
1834 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1835 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1836 TCG_REG_X1, otype, addr_reg);
1837 add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1838 s->code_ptr, label_ptr);
1839 #else /* !CONFIG_SOFTMMU */
1840 unsigned a_bits = get_alignment_bits(memop);
1841 if (a_bits) {
1842 tcg_out_test_alignment(s, true, addr_reg, a_bits);
1843 }
1844 if (USE_GUEST_BASE) {
1845 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1846 TCG_REG_GUEST_BASE, otype, addr_reg);
1847 } else {
1848 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1849 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1850 }
1851 #endif /* CONFIG_SOFTMMU */
1852 }
1853
1854 static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1855 MemOpIdx oi)
1856 {
1857 MemOp memop = get_memop(oi);
1858 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1859
1860 /* Byte swapping is left to middle-end expansion. */
1861 tcg_debug_assert((memop & MO_BSWAP) == 0);
1862
1863 #ifdef CONFIG_SOFTMMU
1864 unsigned mem_index = get_mmuidx(oi);
1865 tcg_insn_unit *label_ptr;
1866
1867 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1868 tcg_out_qemu_st_direct(s, memop, data_reg,
1869 TCG_REG_X1, otype, addr_reg);
1870 add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1871 data_reg, addr_reg, s->code_ptr, label_ptr);
1872 #else /* !CONFIG_SOFTMMU */
1873 unsigned a_bits = get_alignment_bits(memop);
1874 if (a_bits) {
1875 tcg_out_test_alignment(s, false, addr_reg, a_bits);
1876 }
1877 if (USE_GUEST_BASE) {
1878 tcg_out_qemu_st_direct(s, memop, data_reg,
1879 TCG_REG_GUEST_BASE, otype, addr_reg);
1880 } else {
1881 tcg_out_qemu_st_direct(s, memop, data_reg,
1882 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1883 }
1884 #endif /* CONFIG_SOFTMMU */
1885 }
1886
1887 static const tcg_insn_unit *tb_ret_addr;
1888
1889 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1890 const TCGArg args[TCG_MAX_OP_ARGS],
1891 const int const_args[TCG_MAX_OP_ARGS])
1892 {
1893 /* 99% of the time, we can signal the use of extension registers
1894 by looking to see if the opcode handles 64-bit data. */
1895 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1896
1897 /* Hoist the loads of the most common arguments. */
1898 TCGArg a0 = args[0];
1899 TCGArg a1 = args[1];
1900 TCGArg a2 = args[2];
1901 int c2 = const_args[2];
1902
1903 /* Some operands are defined with "rZ" constraint, a register or
1904 the zero register. These need not actually test args[I] == 0. */
1905 #define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1906
1907 switch (opc) {
1908 case INDEX_op_exit_tb:
1909 /* Reuse the zeroing that exists for goto_ptr. */
1910 if (a0 == 0) {
1911 tcg_out_goto_long(s, tcg_code_gen_epilogue);
1912 } else {
1913 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1914 tcg_out_goto_long(s, tb_ret_addr);
1915 }
1916 break;
1917
1918 case INDEX_op_goto_tb:
1919 if (s->tb_jmp_insn_offset != NULL) {
1920 /* TCG_TARGET_HAS_direct_jump */
1921 /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
1922 write can be used to patch the target address. */
1923 if ((uintptr_t)s->code_ptr & 7) {
1924 tcg_out32(s, NOP);
1925 }
1926 s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1927 /* actual branch destination will be patched by
1928 tb_target_set_jmp_target later. */
1929 tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
1930 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
1931 } else {
1932 /* !TCG_TARGET_HAS_direct_jump */
1933 tcg_debug_assert(s->tb_jmp_target_addr != NULL);
1934 intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2;
1935 tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
1936 }
1937 tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1938 set_jmp_reset_offset(s, a0);
1939 break;
1940
1941 case INDEX_op_goto_ptr:
1942 tcg_out_insn(s, 3207, BR, a0);
1943 break;
1944
1945 case INDEX_op_br:
1946 tcg_out_goto_label(s, arg_label(a0));
1947 break;
1948
1949 case INDEX_op_ld8u_i32:
1950 case INDEX_op_ld8u_i64:
1951 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
1952 break;
1953 case INDEX_op_ld8s_i32:
1954 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
1955 break;
1956 case INDEX_op_ld8s_i64:
1957 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
1958 break;
1959 case INDEX_op_ld16u_i32:
1960 case INDEX_op_ld16u_i64:
1961 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
1962 break;
1963 case INDEX_op_ld16s_i32:
1964 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
1965 break;
1966 case INDEX_op_ld16s_i64:
1967 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
1968 break;
1969 case INDEX_op_ld_i32:
1970 case INDEX_op_ld32u_i64:
1971 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
1972 break;
1973 case INDEX_op_ld32s_i64:
1974 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
1975 break;
1976 case INDEX_op_ld_i64:
1977 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
1978 break;
1979
1980 case INDEX_op_st8_i32:
1981 case INDEX_op_st8_i64:
1982 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
1983 break;
1984 case INDEX_op_st16_i32:
1985 case INDEX_op_st16_i64:
1986 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
1987 break;
1988 case INDEX_op_st_i32:
1989 case INDEX_op_st32_i64:
1990 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
1991 break;
1992 case INDEX_op_st_i64:
1993 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
1994 break;
1995
1996 case INDEX_op_add_i32:
1997 a2 = (int32_t)a2;
1998 /* FALLTHRU */
1999 case INDEX_op_add_i64:
2000 if (c2) {
2001 tcg_out_addsubi(s, ext, a0, a1, a2);
2002 } else {
2003 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
2004 }
2005 break;
2006
2007 case INDEX_op_sub_i32:
2008 a2 = (int32_t)a2;
2009 /* FALLTHRU */
2010 case INDEX_op_sub_i64:
2011 if (c2) {
2012 tcg_out_addsubi(s, ext, a0, a1, -a2);
2013 } else {
2014 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
2015 }
2016 break;
2017
2018 case INDEX_op_neg_i64:
2019 case INDEX_op_neg_i32:
2020 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
2021 break;
2022
2023 case INDEX_op_and_i32:
2024 a2 = (int32_t)a2;
2025 /* FALLTHRU */
2026 case INDEX_op_and_i64:
2027 if (c2) {
2028 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
2029 } else {
2030 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
2031 }
2032 break;
2033
2034 case INDEX_op_andc_i32:
2035 a2 = (int32_t)a2;
2036 /* FALLTHRU */
2037 case INDEX_op_andc_i64:
2038 if (c2) {
2039 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
2040 } else {
2041 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
2042 }
2043 break;
2044
2045 case INDEX_op_or_i32:
2046 a2 = (int32_t)a2;
2047 /* FALLTHRU */
2048 case INDEX_op_or_i64:
2049 if (c2) {
2050 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
2051 } else {
2052 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
2053 }
2054 break;
2055
2056 case INDEX_op_orc_i32:
2057 a2 = (int32_t)a2;
2058 /* FALLTHRU */
2059 case INDEX_op_orc_i64:
2060 if (c2) {
2061 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2062 } else {
2063 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2064 }
2065 break;
2066
2067 case INDEX_op_xor_i32:
2068 a2 = (int32_t)a2;
2069 /* FALLTHRU */
2070 case INDEX_op_xor_i64:
2071 if (c2) {
2072 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2073 } else {
2074 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2075 }
2076 break;
2077
2078 case INDEX_op_eqv_i32:
2079 a2 = (int32_t)a2;
2080 /* FALLTHRU */
2081 case INDEX_op_eqv_i64:
2082 if (c2) {
2083 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2084 } else {
2085 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2086 }
2087 break;
2088
2089 case INDEX_op_not_i64:
2090 case INDEX_op_not_i32:
2091 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2092 break;
2093
2094 case INDEX_op_mul_i64:
2095 case INDEX_op_mul_i32:
2096 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2097 break;
2098
2099 case INDEX_op_div_i64:
2100 case INDEX_op_div_i32:
2101 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2102 break;
2103 case INDEX_op_divu_i64:
2104 case INDEX_op_divu_i32:
2105 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2106 break;
2107
2108 case INDEX_op_rem_i64:
2109 case INDEX_op_rem_i32:
2110 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
2111 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2112 break;
2113 case INDEX_op_remu_i64:
2114 case INDEX_op_remu_i32:
2115 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
2116 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2117 break;
2118
2119 case INDEX_op_shl_i64:
2120 case INDEX_op_shl_i32:
2121 if (c2) {
2122 tcg_out_shl(s, ext, a0, a1, a2);
2123 } else {
2124 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
2125 }
2126 break;
2127
2128 case INDEX_op_shr_i64:
2129 case INDEX_op_shr_i32:
2130 if (c2) {
2131 tcg_out_shr(s, ext, a0, a1, a2);
2132 } else {
2133 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
2134 }
2135 break;
2136
2137 case INDEX_op_sar_i64:
2138 case INDEX_op_sar_i32:
2139 if (c2) {
2140 tcg_out_sar(s, ext, a0, a1, a2);
2141 } else {
2142 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
2143 }
2144 break;
2145
2146 case INDEX_op_rotr_i64:
2147 case INDEX_op_rotr_i32:
2148 if (c2) {
2149 tcg_out_rotr(s, ext, a0, a1, a2);
2150 } else {
2151 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
2152 }
2153 break;
2154
2155 case INDEX_op_rotl_i64:
2156 case INDEX_op_rotl_i32:
2157 if (c2) {
2158 tcg_out_rotl(s, ext, a0, a1, a2);
2159 } else {
2160 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
2161 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
2162 }
2163 break;
2164
2165 case INDEX_op_clz_i64:
2166 case INDEX_op_clz_i32:
2167 tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2168 break;
2169 case INDEX_op_ctz_i64:
2170 case INDEX_op_ctz_i32:
2171 tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2172 break;
2173
2174 case INDEX_op_brcond_i32:
2175 a1 = (int32_t)a1;
2176 /* FALLTHRU */
2177 case INDEX_op_brcond_i64:
2178 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
2179 break;
2180
2181 case INDEX_op_setcond_i32:
2182 a2 = (int32_t)a2;
2183 /* FALLTHRU */
2184 case INDEX_op_setcond_i64:
2185 tcg_out_cmp(s, ext, a1, a2, c2);
2186 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */
2187 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2188 TCG_REG_XZR, tcg_invert_cond(args[3]));
2189 break;
2190
2191 case INDEX_op_movcond_i32:
2192 a2 = (int32_t)a2;
2193 /* FALLTHRU */
2194 case INDEX_op_movcond_i64:
2195 tcg_out_cmp(s, ext, a1, a2, c2);
2196 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2197 break;
2198
2199 case INDEX_op_qemu_ld_i32:
2200 case INDEX_op_qemu_ld_i64:
2201 tcg_out_qemu_ld(s, a0, a1, a2, ext);
2202 break;
2203 case INDEX_op_qemu_st_i32:
2204 case INDEX_op_qemu_st_i64:
2205 tcg_out_qemu_st(s, REG0(0), a1, a2);
2206 break;
2207
2208 case INDEX_op_bswap64_i64:
2209 tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
2210 break;
2211 case INDEX_op_bswap32_i64:
2212 tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2213 if (a2 & TCG_BSWAP_OS) {
2214 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a0);
2215 }
2216 break;
2217 case INDEX_op_bswap32_i32:
2218 tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2219 break;
2220 case INDEX_op_bswap16_i64:
2221 case INDEX_op_bswap16_i32:
2222 tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1);
2223 if (a2 & TCG_BSWAP_OS) {
2224 /* Output must be sign-extended. */
2225 tcg_out_sxt(s, ext, MO_16, a0, a0);
2226 } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2227 /* Output must be zero-extended, but input isn't. */
2228 tcg_out_uxt(s, MO_16, a0, a0);
2229 }
2230 break;
2231
2232 case INDEX_op_ext8s_i64:
2233 case INDEX_op_ext8s_i32:
2234 tcg_out_sxt(s, ext, MO_8, a0, a1);
2235 break;
2236 case INDEX_op_ext16s_i64:
2237 case INDEX_op_ext16s_i32:
2238 tcg_out_sxt(s, ext, MO_16, a0, a1);
2239 break;
2240 case INDEX_op_ext_i32_i64:
2241 case INDEX_op_ext32s_i64:
2242 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
2243 break;
2244 case INDEX_op_ext8u_i64:
2245 case INDEX_op_ext8u_i32:
2246 tcg_out_uxt(s, MO_8, a0, a1);
2247 break;
2248 case INDEX_op_ext16u_i64:
2249 case INDEX_op_ext16u_i32:
2250 tcg_out_uxt(s, MO_16, a0, a1);
2251 break;
2252 case INDEX_op_extu_i32_i64:
2253 case INDEX_op_ext32u_i64:
2254 tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
2255 break;
2256
2257 case INDEX_op_deposit_i64:
2258 case INDEX_op_deposit_i32:
2259 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2260 break;
2261
2262 case INDEX_op_extract_i64:
2263 case INDEX_op_extract_i32:
2264 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2265 break;
2266
2267 case INDEX_op_sextract_i64:
2268 case INDEX_op_sextract_i32:
2269 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2270 break;
2271
2272 case INDEX_op_extract2_i64:
2273 case INDEX_op_extract2_i32:
2274 tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
2275 break;
2276
2277 case INDEX_op_add2_i32:
2278 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2279 (int32_t)args[4], args[5], const_args[4],
2280 const_args[5], false);
2281 break;
2282 case INDEX_op_add2_i64:
2283 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2284 args[5], const_args[4], const_args[5], false);
2285 break;
2286 case INDEX_op_sub2_i32:
2287 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2288 (int32_t)args[4], args[5], const_args[4],
2289 const_args[5], true);
2290 break;
2291 case INDEX_op_sub2_i64:
2292 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2293 args[5], const_args[4], const_args[5], true);
2294 break;
2295
2296 case INDEX_op_muluh_i64:
2297 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2298 break;
2299 case INDEX_op_mulsh_i64:
2300 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2301 break;
2302
2303 case INDEX_op_mb:
2304 tcg_out_mb(s, a0);
2305 break;
2306
2307 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
2308 case INDEX_op_mov_i64:
2309 case INDEX_op_call: /* Always emitted via tcg_out_call. */
2310 default:
2311 g_assert_not_reached();
2312 }
2313
2314 #undef REG0
2315 }
2316
2317 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2318 unsigned vecl, unsigned vece,
2319 const TCGArg args[TCG_MAX_OP_ARGS],
2320 const int const_args[TCG_MAX_OP_ARGS])
2321 {
2322 static const AArch64Insn cmp_vec_insn[16] = {
2323 [TCG_COND_EQ] = I3616_CMEQ,
2324 [TCG_COND_GT] = I3616_CMGT,
2325 [TCG_COND_GE] = I3616_CMGE,
2326 [TCG_COND_GTU] = I3616_CMHI,
2327 [TCG_COND_GEU] = I3616_CMHS,
2328 };
2329 static const AArch64Insn cmp_scalar_insn[16] = {
2330 [TCG_COND_EQ] = I3611_CMEQ,
2331 [TCG_COND_GT] = I3611_CMGT,
2332 [TCG_COND_GE] = I3611_CMGE,
2333 [TCG_COND_GTU] = I3611_CMHI,
2334 [TCG_COND_GEU] = I3611_CMHS,
2335 };
2336 static const AArch64Insn cmp0_vec_insn[16] = {
2337 [TCG_COND_EQ] = I3617_CMEQ0,
2338 [TCG_COND_GT] = I3617_CMGT0,
2339 [TCG_COND_GE] = I3617_CMGE0,
2340 [TCG_COND_LT] = I3617_CMLT0,
2341 [TCG_COND_LE] = I3617_CMLE0,
2342 };
2343 static const AArch64Insn cmp0_scalar_insn[16] = {
2344 [TCG_COND_EQ] = I3612_CMEQ0,
2345 [TCG_COND_GT] = I3612_CMGT0,
2346 [TCG_COND_GE] = I3612_CMGE0,
2347 [TCG_COND_LT] = I3612_CMLT0,
2348 [TCG_COND_LE] = I3612_CMLE0,
2349 };
2350
2351 TCGType type = vecl + TCG_TYPE_V64;
2352 unsigned is_q = vecl;
2353 bool is_scalar = !is_q && vece == MO_64;
2354 TCGArg a0, a1, a2, a3;
2355 int cmode, imm8;
2356
2357 a0 = args[0];
2358 a1 = args[1];
2359 a2 = args[2];
2360
2361 switch (opc) {
2362 case INDEX_op_ld_vec:
2363 tcg_out_ld(s, type, a0, a1, a2);
2364 break;
2365 case INDEX_op_st_vec:
2366 tcg_out_st(s, type, a0, a1, a2);
2367 break;
2368 case INDEX_op_dupm_vec:
2369 tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2370 break;
2371 case INDEX_op_add_vec:
2372 if (is_scalar) {
2373 tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2);
2374 } else {
2375 tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2376 }
2377 break;
2378 case INDEX_op_sub_vec:
2379 if (is_scalar) {
2380 tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2);
2381 } else {
2382 tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2383 }
2384 break;
2385 case INDEX_op_mul_vec:
2386 tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2387 break;
2388 case INDEX_op_neg_vec:
2389 if (is_scalar) {
2390 tcg_out_insn(s, 3612, NEG, vece, a0, a1);
2391 } else {
2392 tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2393 }
2394 break;
2395 case INDEX_op_abs_vec:
2396 if (is_scalar) {
2397 tcg_out_insn(s, 3612, ABS, vece, a0, a1);
2398 } else {
2399 tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2400 }
2401 break;
2402 case INDEX_op_and_vec:
2403 if (const_args[2]) {
2404 is_shimm1632(~a2, &cmode, &imm8);
2405 if (a0 == a1) {
2406 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2407 return;
2408 }
2409 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2410 a2 = a0;
2411 }
2412 tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2413 break;
2414 case INDEX_op_or_vec:
2415 if (const_args[2]) {
2416 is_shimm1632(a2, &cmode, &imm8);
2417 if (a0 == a1) {
2418 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2419 return;
2420 }
2421 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2422 a2 = a0;
2423 }
2424 tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2425 break;
2426 case INDEX_op_andc_vec:
2427 if (const_args[2]) {
2428 is_shimm1632(a2, &cmode, &imm8);
2429 if (a0 == a1) {
2430 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2431 return;
2432 }
2433 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2434 a2 = a0;
2435 }
2436 tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2437 break;
2438 case INDEX_op_orc_vec:
2439 if (const_args[2]) {
2440 is_shimm1632(~a2, &cmode, &imm8);
2441 if (a0 == a1) {
2442 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2443 return;
2444 }
2445 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2446 a2 = a0;
2447 }
2448 tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2449 break;
2450 case INDEX_op_xor_vec:
2451 tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2452 break;
2453 case INDEX_op_ssadd_vec:
2454 if (is_scalar) {
2455 tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2);
2456 } else {
2457 tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2458 }
2459 break;
2460 case INDEX_op_sssub_vec:
2461 if (is_scalar) {
2462 tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2);
2463 } else {
2464 tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2465 }
2466 break;
2467 case INDEX_op_usadd_vec:
2468 if (is_scalar) {
2469 tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2);
2470 } else {
2471 tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2472 }
2473 break;
2474 case INDEX_op_ussub_vec:
2475 if (is_scalar) {
2476 tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2);
2477 } else {
2478 tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2479 }
2480 break;
2481 case INDEX_op_smax_vec:
2482 tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2483 break;
2484 case INDEX_op_smin_vec:
2485 tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2486 break;
2487 case INDEX_op_umax_vec:
2488 tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2489 break;
2490 case INDEX_op_umin_vec:
2491 tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2492 break;
2493 case INDEX_op_not_vec:
2494 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2495 break;
2496 case INDEX_op_shli_vec:
2497 if (is_scalar) {
2498 tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece));
2499 } else {
2500 tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2501 }
2502 break;
2503 case INDEX_op_shri_vec:
2504 if (is_scalar) {
2505 tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2);
2506 } else {
2507 tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2508 }
2509 break;
2510 case INDEX_op_sari_vec:
2511 if (is_scalar) {
2512 tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2);
2513 } else {
2514 tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2515 }
2516 break;
2517 case INDEX_op_aa64_sli_vec:
2518 if (is_scalar) {
2519 tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece));
2520 } else {
2521 tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2522 }
2523 break;
2524 case INDEX_op_shlv_vec:
2525 if (is_scalar) {
2526 tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2);
2527 } else {
2528 tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2529 }
2530 break;
2531 case INDEX_op_aa64_sshl_vec:
2532 if (is_scalar) {
2533 tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2);
2534 } else {
2535 tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2536 }
2537 break;
2538 case INDEX_op_cmp_vec:
2539 {
2540 TCGCond cond = args[3];
2541 AArch64Insn insn;
2542
2543 if (cond == TCG_COND_NE) {
2544 if (const_args[2]) {
2545 if (is_scalar) {
2546 tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1);
2547 } else {
2548 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2549 }
2550 } else {
2551 if (is_scalar) {
2552 tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2);
2553 } else {
2554 tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2555 }
2556 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2557 }
2558 } else {
2559 if (const_args[2]) {
2560 if (is_scalar) {
2561 insn = cmp0_scalar_insn[cond];
2562 if (insn) {
2563 tcg_out_insn_3612(s, insn, vece, a0, a1);
2564 break;
2565 }
2566 } else {
2567 insn = cmp0_vec_insn[cond];
2568 if (insn) {
2569 tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2570 break;
2571 }
2572 }
2573 tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
2574 a2 = TCG_VEC_TMP;
2575 }
2576 if (is_scalar) {
2577 insn = cmp_scalar_insn[cond];
2578 if (insn == 0) {
2579 TCGArg t;
2580 t = a1, a1 = a2, a2 = t;
2581 cond = tcg_swap_cond(cond);
2582 insn = cmp_scalar_insn[cond];
2583 tcg_debug_assert(insn != 0);
2584 }
2585 tcg_out_insn_3611(s, insn, vece, a0, a1, a2);
2586 } else {
2587 insn = cmp_vec_insn[cond];
2588 if (insn == 0) {
2589 TCGArg t;
2590 t = a1, a1 = a2, a2 = t;
2591 cond = tcg_swap_cond(cond);
2592 insn = cmp_vec_insn[cond];
2593 tcg_debug_assert(insn != 0);
2594 }
2595 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2596 }
2597 }
2598 }
2599 break;
2600
2601 case INDEX_op_bitsel_vec:
2602 a3 = args[3];
2603 if (a0 == a3) {
2604 tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2605 } else if (a0 == a2) {
2606 tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2607 } else {
2608 if (a0 != a1) {
2609 tcg_out_mov(s, type, a0, a1);
2610 }
2611 tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2612 }
2613 break;
2614
2615 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
2616 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
2617 default:
2618 g_assert_not_reached();
2619 }
2620 }
2621
2622 int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2623 {
2624 switch (opc) {
2625 case INDEX_op_add_vec:
2626 case INDEX_op_sub_vec:
2627 case INDEX_op_and_vec:
2628 case INDEX_op_or_vec:
2629 case INDEX_op_xor_vec:
2630 case INDEX_op_andc_vec:
2631 case INDEX_op_orc_vec:
2632 case INDEX_op_neg_vec:
2633 case INDEX_op_abs_vec:
2634 case INDEX_op_not_vec:
2635 case INDEX_op_cmp_vec:
2636 case INDEX_op_shli_vec:
2637 case INDEX_op_shri_vec:
2638 case INDEX_op_sari_vec:
2639 case INDEX_op_ssadd_vec:
2640 case INDEX_op_sssub_vec:
2641 case INDEX_op_usadd_vec:
2642 case INDEX_op_ussub_vec:
2643 case INDEX_op_shlv_vec:
2644 case INDEX_op_bitsel_vec:
2645 return 1;
2646 case INDEX_op_rotli_vec:
2647 case INDEX_op_shrv_vec:
2648 case INDEX_op_sarv_vec:
2649 case INDEX_op_rotlv_vec:
2650 case INDEX_op_rotrv_vec:
2651 return -1;
2652 case INDEX_op_mul_vec:
2653 case INDEX_op_smax_vec:
2654 case INDEX_op_smin_vec:
2655 case INDEX_op_umax_vec:
2656 case INDEX_op_umin_vec:
2657 return vece < MO_64;
2658
2659 default:
2660 return 0;
2661 }
2662 }
2663
2664 void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2665 TCGArg a0, ...)
2666 {
2667 va_list va;
2668 TCGv_vec v0, v1, v2, t1, t2, c1;
2669 TCGArg a2;
2670
2671 va_start(va, a0);
2672 v0 = temp_tcgv_vec(arg_temp(a0));
2673 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2674 a2 = va_arg(va, TCGArg);
2675 va_end(va);
2676
2677 switch (opc) {
2678 case INDEX_op_rotli_vec:
2679 t1 = tcg_temp_new_vec(type);
2680 tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
2681 vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
2682 tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
2683 tcg_temp_free_vec(t1);
2684 break;
2685
2686 case INDEX_op_shrv_vec:
2687 case INDEX_op_sarv_vec:
2688 /* Right shifts are negative left shifts for AArch64. */
2689 v2 = temp_tcgv_vec(arg_temp(a2));
2690 t1 = tcg_temp_new_vec(type);
2691 tcg_gen_neg_vec(vece, t1, v2);
2692 opc = (opc == INDEX_op_shrv_vec
2693 ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2694 vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2695 tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2696 tcg_temp_free_vec(t1);
2697 break;
2698
2699 case INDEX_op_rotlv_vec:
2700 v2 = temp_tcgv_vec(arg_temp(a2));
2701 t1 = tcg_temp_new_vec(type);
2702 c1 = tcg_constant_vec(type, vece, 8 << vece);
2703 tcg_gen_sub_vec(vece, t1, v2, c1);
2704 /* Right shifts are negative left shifts for AArch64. */
2705 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2706 tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2707 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
2708 tcgv_vec_arg(v1), tcgv_vec_arg(v2));
2709 tcg_gen_or_vec(vece, v0, v0, t1);
2710 tcg_temp_free_vec(t1);
2711 break;
2712
2713 case INDEX_op_rotrv_vec:
2714 v2 = temp_tcgv_vec(arg_temp(a2));
2715 t1 = tcg_temp_new_vec(type);
2716 t2 = tcg_temp_new_vec(type);
2717 c1 = tcg_constant_vec(type, vece, 8 << vece);
2718 tcg_gen_neg_vec(vece, t1, v2);
2719 tcg_gen_sub_vec(vece, t2, c1, v2);
2720 /* Right shifts are negative left shifts for AArch64. */
2721 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2722 tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2723 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
2724 tcgv_vec_arg(v1), tcgv_vec_arg(t2));
2725 tcg_gen_or_vec(vece, v0, t1, t2);
2726 tcg_temp_free_vec(t1);
2727 tcg_temp_free_vec(t2);
2728 break;
2729
2730 default:
2731 g_assert_not_reached();
2732 }
2733 }
2734
2735 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
2736 {
2737 switch (op) {
2738 case INDEX_op_goto_ptr:
2739 return C_O0_I1(r);
2740
2741 case INDEX_op_ld8u_i32:
2742 case INDEX_op_ld8s_i32:
2743 case INDEX_op_ld16u_i32:
2744 case INDEX_op_ld16s_i32:
2745 case INDEX_op_ld_i32:
2746 case INDEX_op_ld8u_i64:
2747 case INDEX_op_ld8s_i64:
2748 case INDEX_op_ld16u_i64:
2749 case INDEX_op_ld16s_i64:
2750 case INDEX_op_ld32u_i64:
2751 case INDEX_op_ld32s_i64:
2752 case INDEX_op_ld_i64:
2753 case INDEX_op_neg_i32:
2754 case INDEX_op_neg_i64:
2755 case INDEX_op_not_i32:
2756 case INDEX_op_not_i64:
2757 case INDEX_op_bswap16_i32:
2758 case INDEX_op_bswap32_i32:
2759 case INDEX_op_bswap16_i64:
2760 case INDEX_op_bswap32_i64:
2761 case INDEX_op_bswap64_i64:
2762 case INDEX_op_ext8s_i32:
2763 case INDEX_op_ext16s_i32:
2764 case INDEX_op_ext8u_i32:
2765 case INDEX_op_ext16u_i32:
2766 case INDEX_op_ext8s_i64:
2767 case INDEX_op_ext16s_i64:
2768 case INDEX_op_ext32s_i64:
2769 case INDEX_op_ext8u_i64:
2770 case INDEX_op_ext16u_i64:
2771 case INDEX_op_ext32u_i64:
2772 case INDEX_op_ext_i32_i64:
2773 case INDEX_op_extu_i32_i64:
2774 case INDEX_op_extract_i32:
2775 case INDEX_op_extract_i64:
2776 case INDEX_op_sextract_i32:
2777 case INDEX_op_sextract_i64:
2778 return C_O1_I1(r, r);
2779
2780 case INDEX_op_st8_i32:
2781 case INDEX_op_st16_i32:
2782 case INDEX_op_st_i32:
2783 case INDEX_op_st8_i64:
2784 case INDEX_op_st16_i64:
2785 case INDEX_op_st32_i64:
2786 case INDEX_op_st_i64:
2787 return C_O0_I2(rZ, r);
2788
2789 case INDEX_op_add_i32:
2790 case INDEX_op_add_i64:
2791 case INDEX_op_sub_i32:
2792 case INDEX_op_sub_i64:
2793 case INDEX_op_setcond_i32:
2794 case INDEX_op_setcond_i64:
2795 return C_O1_I2(r, r, rA);
2796
2797 case INDEX_op_mul_i32:
2798 case INDEX_op_mul_i64:
2799 case INDEX_op_div_i32:
2800 case INDEX_op_div_i64:
2801 case INDEX_op_divu_i32:
2802 case INDEX_op_divu_i64:
2803 case INDEX_op_rem_i32:
2804 case INDEX_op_rem_i64:
2805 case INDEX_op_remu_i32:
2806 case INDEX_op_remu_i64:
2807 case INDEX_op_muluh_i64:
2808 case INDEX_op_mulsh_i64:
2809 return C_O1_I2(r, r, r);
2810
2811 case INDEX_op_and_i32:
2812 case INDEX_op_and_i64:
2813 case INDEX_op_or_i32:
2814 case INDEX_op_or_i64:
2815 case INDEX_op_xor_i32:
2816 case INDEX_op_xor_i64:
2817 case INDEX_op_andc_i32:
2818 case INDEX_op_andc_i64:
2819 case INDEX_op_orc_i32:
2820 case INDEX_op_orc_i64:
2821 case INDEX_op_eqv_i32:
2822 case INDEX_op_eqv_i64:
2823 return C_O1_I2(r, r, rL);
2824
2825 case INDEX_op_shl_i32:
2826 case INDEX_op_shr_i32:
2827 case INDEX_op_sar_i32:
2828 case INDEX_op_rotl_i32:
2829 case INDEX_op_rotr_i32:
2830 case INDEX_op_shl_i64:
2831 case INDEX_op_shr_i64:
2832 case INDEX_op_sar_i64:
2833 case INDEX_op_rotl_i64:
2834 case INDEX_op_rotr_i64:
2835 return C_O1_I2(r, r, ri);
2836
2837 case INDEX_op_clz_i32:
2838 case INDEX_op_ctz_i32:
2839 case INDEX_op_clz_i64:
2840 case INDEX_op_ctz_i64:
2841 return C_O1_I2(r, r, rAL);
2842
2843 case INDEX_op_brcond_i32:
2844 case INDEX_op_brcond_i64:
2845 return C_O0_I2(r, rA);
2846
2847 case INDEX_op_movcond_i32:
2848 case INDEX_op_movcond_i64:
2849 return C_O1_I4(r, r, rA, rZ, rZ);
2850
2851 case INDEX_op_qemu_ld_i32:
2852 case INDEX_op_qemu_ld_i64:
2853 return C_O1_I1(r, l);
2854 case INDEX_op_qemu_st_i32:
2855 case INDEX_op_qemu_st_i64:
2856 return C_O0_I2(lZ, l);
2857
2858 case INDEX_op_deposit_i32:
2859 case INDEX_op_deposit_i64:
2860 return C_O1_I2(r, 0, rZ);
2861
2862 case INDEX_op_extract2_i32:
2863 case INDEX_op_extract2_i64:
2864 return C_O1_I2(r, rZ, rZ);
2865
2866 case INDEX_op_add2_i32:
2867 case INDEX_op_add2_i64:
2868 case INDEX_op_sub2_i32:
2869 case INDEX_op_sub2_i64:
2870 return C_O2_I4(r, r, rZ, rZ, rA, rMZ);
2871
2872 case INDEX_op_add_vec:
2873 case INDEX_op_sub_vec:
2874 case INDEX_op_mul_vec:
2875 case INDEX_op_xor_vec:
2876 case INDEX_op_ssadd_vec:
2877 case INDEX_op_sssub_vec:
2878 case INDEX_op_usadd_vec:
2879 case INDEX_op_ussub_vec:
2880 case INDEX_op_smax_vec:
2881 case INDEX_op_smin_vec:
2882 case INDEX_op_umax_vec:
2883 case INDEX_op_umin_vec:
2884 case INDEX_op_shlv_vec:
2885 case INDEX_op_shrv_vec:
2886 case INDEX_op_sarv_vec:
2887 case INDEX_op_aa64_sshl_vec:
2888 return C_O1_I2(w, w, w);
2889 case INDEX_op_not_vec:
2890 case INDEX_op_neg_vec:
2891 case INDEX_op_abs_vec:
2892 case INDEX_op_shli_vec:
2893 case INDEX_op_shri_vec:
2894 case INDEX_op_sari_vec:
2895 return C_O1_I1(w, w);
2896 case INDEX_op_ld_vec:
2897 case INDEX_op_dupm_vec:
2898 return C_O1_I1(w, r);
2899 case INDEX_op_st_vec:
2900 return C_O0_I2(w, r);
2901 case INDEX_op_dup_vec:
2902 return C_O1_I1(w, wr);
2903 case INDEX_op_or_vec:
2904 case INDEX_op_andc_vec:
2905 return C_O1_I2(w, w, wO);
2906 case INDEX_op_and_vec:
2907 case INDEX_op_orc_vec:
2908 return C_O1_I2(w, w, wN);
2909 case INDEX_op_cmp_vec:
2910 return C_O1_I2(w, w, wZ);
2911 case INDEX_op_bitsel_vec:
2912 return C_O1_I3(w, w, w, w);
2913 case INDEX_op_aa64_sli_vec:
2914 return C_O1_I2(w, 0, w);
2915
2916 default:
2917 g_assert_not_reached();
2918 }
2919 }
2920
2921 static void tcg_target_init(TCGContext *s)
2922 {
2923 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2924 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
2925 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2926 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
2927
2928 tcg_target_call_clobber_regs = -1ull;
2929 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2930 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2931 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2932 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2933 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2934 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2935 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2936 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2937 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2938 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2939 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
2940 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2941 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2942 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2943 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2944 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2945 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2946 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2947 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
2948
2949 s->reserved_regs = 0;
2950 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2951 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2952 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2953 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
2954 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
2955 }
2956
2957 /* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
2958 #define PUSH_SIZE ((30 - 19 + 1) * 8)
2959
2960 #define FRAME_SIZE \
2961 ((PUSH_SIZE \
2962 + TCG_STATIC_CALL_ARGS_SIZE \
2963 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2964 + TCG_TARGET_STACK_ALIGN - 1) \
2965 & ~(TCG_TARGET_STACK_ALIGN - 1))
2966
2967 /* We're expecting a 2 byte uleb128 encoded value. */
2968 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2969
2970 /* We're expecting to use a single ADDI insn. */
2971 QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
2972
2973 static void tcg_target_qemu_prologue(TCGContext *s)
2974 {
2975 TCGReg r;
2976
2977 /* Push (FP, LR) and allocate space for all saved registers. */
2978 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
2979 TCG_REG_SP, -PUSH_SIZE, 1, 1);
2980
2981 /* Set up frame pointer for canonical unwinding. */
2982 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
2983
2984 /* Store callee-preserved regs x19..x28. */
2985 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2986 int ofs = (r - TCG_REG_X19 + 2) * 8;
2987 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2988 }
2989
2990 /* Make stack space for TCG locals. */
2991 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2992 FRAME_SIZE - PUSH_SIZE);
2993
2994 /* Inform TCG about how to find TCG locals with register, offset, size. */
2995 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
2996 CPU_TEMP_BUF_NLONGS * sizeof(long));
2997
2998 #if !defined(CONFIG_SOFTMMU)
2999 if (USE_GUEST_BASE) {
3000 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
3001 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
3002 }
3003 #endif
3004
3005 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
3006 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
3007
3008 /*
3009 * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
3010 * and fall through to the rest of the epilogue.
3011 */
3012 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
3013 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
3014
3015 /* TB epilogue */
3016 tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
3017
3018 /* Remove TCG locals stack space. */
3019 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
3020 FRAME_SIZE - PUSH_SIZE);
3021
3022 /* Restore registers x19..x28. */
3023 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
3024 int ofs = (r - TCG_REG_X19 + 2) * 8;
3025 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3026 }
3027
3028 /* Pop (FP, LR), restore SP to previous frame. */
3029 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
3030 TCG_REG_SP, PUSH_SIZE, 0, 1);
3031 tcg_out_insn(s, 3207, RET, TCG_REG_LR);
3032 }
3033
3034 static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3035 {
3036 int i;
3037 for (i = 0; i < count; ++i) {
3038 p[i] = NOP;
3039 }
3040 }
3041
3042 typedef struct {
3043 DebugFrameHeader h;
3044 uint8_t fde_def_cfa[4];
3045 uint8_t fde_reg_ofs[24];
3046 } DebugFrame;
3047
3048 #define ELF_HOST_MACHINE EM_AARCH64
3049
3050 static const DebugFrame debug_frame = {
3051 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3052 .h.cie.id = -1,
3053 .h.cie.version = 1,
3054 .h.cie.code_align = 1,
3055 .h.cie.data_align = 0x78, /* sleb128 -8 */
3056 .h.cie.return_column = TCG_REG_LR,
3057
3058 /* Total FDE size does not include the "len" member. */
3059 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3060
3061 .fde_def_cfa = {
3062 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */
3063 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
3064 (FRAME_SIZE >> 7)
3065 },
3066 .fde_reg_ofs = {
3067 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */
3068 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */
3069 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */
3070 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */
3071 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */
3072 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */
3073 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */
3074 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */
3075 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */
3076 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */
3077 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */
3078 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */
3079 }
3080 };
3081
3082 void tcg_register_jit(const void *buf, size_t buf_size)
3083 {
3084 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3085 }