2 * Initial TCG Implementation for aarch64
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
10 * See the COPYING file in the top-level directory for details.
13 #include "../tcg-ldst.c.inc"
14 #include "../tcg-pool.c.inc"
15 #include "qemu/bitops.h"
17 /* We're going to re-use TCGType in setting of the SF bit, which controls
18 the size of the operation performed. If we know the values match, it
19 makes things much cleaner. */
20 QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
22 #ifdef CONFIG_DEBUG_TCG
23 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
24 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
25 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
26 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
27 "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
29 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
30 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
31 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
32 "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
34 #endif /* CONFIG_DEBUG_TCG */
36 static const int tcg_target_reg_alloc_order[] = {
37 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
38 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
39 TCG_REG_X28, /* we will reserve this for guest_base if configured */
41 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
42 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
44 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
45 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
47 /* X16 reserved as temporary */
48 /* X17 reserved as temporary */
49 /* X18 reserved by system */
50 /* X19 reserved for AREG0 */
51 /* X29 reserved as fp */
52 /* X30 reserved as temporary */
54 TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
55 TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
56 /* V8 - V15 are call-saved, and skipped. */
57 TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
58 TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
59 TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
60 TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
63 static const int tcg_target_call_iarg_regs[8] = {
64 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
65 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
68 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
70 tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
71 tcg_debug_assert(slot >= 0 && slot <= 1);
72 return TCG_REG_X0 + slot;
75 #define TCG_REG_TMP0 TCG_REG_X16
76 #define TCG_REG_TMP1 TCG_REG_X17
77 #define TCG_REG_TMP2 TCG_REG_X30
78 #define TCG_VEC_TMP0 TCG_REG_V31
80 #ifndef CONFIG_SOFTMMU
81 #define TCG_REG_GUEST_BASE TCG_REG_X28
84 static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
86 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
87 ptrdiff_t offset = target - src_rx;
89 if (offset == sextract64(offset, 0, 26)) {
90 /* read instruction, mask away previous PC_REL26 parameter contents,
91 set the proper offset, then write back the instruction. */
92 *src_rw = deposit32(*src_rw, 0, 26, offset);
98 static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
100 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
101 ptrdiff_t offset = target - src_rx;
103 if (offset == sextract64(offset, 0, 19)) {
104 *src_rw = deposit32(*src_rw, 5, 19, offset);
110 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
111 intptr_t value, intptr_t addend)
113 tcg_debug_assert(addend == 0);
115 case R_AARCH64_JUMP26:
116 case R_AARCH64_CALL26:
117 return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
118 case R_AARCH64_CONDBR19:
119 return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
121 g_assert_not_reached();
125 #define TCG_CT_CONST_AIMM 0x100
126 #define TCG_CT_CONST_LIMM 0x200
127 #define TCG_CT_CONST_ZERO 0x400
128 #define TCG_CT_CONST_MONE 0x800
129 #define TCG_CT_CONST_ORRI 0x1000
130 #define TCG_CT_CONST_ANDI 0x2000
132 #define ALL_GENERAL_REGS 0xffffffffu
133 #define ALL_VECTOR_REGS 0xffffffff00000000ull
135 /* Match a constant valid for addition (12-bit, optionally shifted). */
136 static inline bool is_aimm(uint64_t val)
138 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
141 /* Match a constant valid for logical operations. */
142 static inline bool is_limm(uint64_t val)
144 /* Taking a simplified view of the logical immediates for now, ignoring
145 the replication that can happen across the field. Match bit patterns
149 and their inverses. */
151 /* Make things easier below, by testing the form with msb clear. */
152 if ((int64_t)val < 0) {
159 return (val & (val - 1)) == 0;
162 /* Return true if v16 is a valid 16-bit shifted immediate. */
163 static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
165 if (v16 == (v16 & 0xff)) {
169 } else if (v16 == (v16 & 0xff00)) {
177 /* Return true if v32 is a valid 32-bit shifted immediate. */
178 static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
180 if (v32 == (v32 & 0xff)) {
184 } else if (v32 == (v32 & 0xff00)) {
186 *imm8 = (v32 >> 8) & 0xff;
188 } else if (v32 == (v32 & 0xff0000)) {
190 *imm8 = (v32 >> 16) & 0xff;
192 } else if (v32 == (v32 & 0xff000000)) {
200 /* Return true if v32 is a valid 32-bit shifting ones immediate. */
201 static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
203 if ((v32 & 0xffff00ff) == 0xff) {
205 *imm8 = (v32 >> 8) & 0xff;
207 } else if ((v32 & 0xff00ffff) == 0xffff) {
209 *imm8 = (v32 >> 16) & 0xff;
215 /* Return true if v32 is a valid float32 immediate. */
216 static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
218 if (extract32(v32, 0, 19) == 0
219 && (extract32(v32, 25, 6) == 0x20
220 || extract32(v32, 25, 6) == 0x1f)) {
222 *imm8 = (extract32(v32, 31, 1) << 7)
223 | (extract32(v32, 25, 1) << 6)
224 | extract32(v32, 19, 6);
230 /* Return true if v64 is a valid float64 immediate. */
231 static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
233 if (extract64(v64, 0, 48) == 0
234 && (extract64(v64, 54, 9) == 0x100
235 || extract64(v64, 54, 9) == 0x0ff)) {
237 *imm8 = (extract64(v64, 63, 1) << 7)
238 | (extract64(v64, 54, 1) << 6)
239 | extract64(v64, 48, 6);
246 * Return non-zero if v32 can be formed by MOVI+ORR.
247 * Place the parameters for MOVI in (cmode, imm8).
248 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
250 static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
254 for (i = 6; i > 0; i -= 2) {
255 /* Mask out one byte we can add with ORR. */
256 uint32_t tmp = v32 & ~(0xffu << (i * 4));
257 if (is_shimm32(tmp, cmode, imm8) ||
258 is_soimm32(tmp, cmode, imm8)) {
265 /* Return true if V is a valid 16-bit or 32-bit shifted immediate. */
266 static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
268 if (v32 == deposit32(v32, 16, 16, v32)) {
269 return is_shimm16(v32, cmode, imm8);
271 return is_shimm32(v32, cmode, imm8);
275 static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
277 if (ct & TCG_CT_CONST) {
280 if (type == TCG_TYPE_I32) {
283 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
286 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
289 if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
292 if ((ct & TCG_CT_CONST_MONE) && val == -1) {
296 switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
299 case TCG_CT_CONST_ANDI:
302 case TCG_CT_CONST_ORRI:
303 if (val == deposit64(val, 32, 32, val)) {
305 return is_shimm1632(val, &cmode, &imm8);
309 /* Both bits should not be set for the same insn. */
310 g_assert_not_reached();
316 enum aarch64_cond_code {
319 COND_CS = 0x2, /* Unsigned greater or equal */
320 COND_HS = COND_CS, /* ALIAS greater or equal */
321 COND_CC = 0x3, /* Unsigned less than */
322 COND_LO = COND_CC, /* ALIAS Lower */
323 COND_MI = 0x4, /* Negative */
324 COND_PL = 0x5, /* Zero or greater */
325 COND_VS = 0x6, /* Overflow */
326 COND_VC = 0x7, /* No overflow */
327 COND_HI = 0x8, /* Unsigned greater than */
328 COND_LS = 0x9, /* Unsigned less or equal */
334 COND_NV = 0xf, /* behaves like COND_AL here */
337 static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
338 [TCG_COND_EQ] = COND_EQ,
339 [TCG_COND_NE] = COND_NE,
340 [TCG_COND_LT] = COND_LT,
341 [TCG_COND_GE] = COND_GE,
342 [TCG_COND_LE] = COND_LE,
343 [TCG_COND_GT] = COND_GT,
345 [TCG_COND_LTU] = COND_LO,
346 [TCG_COND_GTU] = COND_HI,
347 [TCG_COND_GEU] = COND_HS,
348 [TCG_COND_LEU] = COND_LS,
352 LDST_ST = 0, /* store */
353 LDST_LD = 1, /* load */
354 LDST_LD_S_X = 2, /* load and sign-extend into Xt */
355 LDST_LD_S_W = 3, /* load and sign-extend into Wt */
358 /* We encode the format of the insn into the beginning of the name, so that
359 we can have the preprocessor help "typecheck" the insn vs the output
360 function. Arm didn't provide us with nice names for the formats, so we
361 use the section number of the architecture reference manual in which the
362 instruction group is described. */
364 /* Compare and branch (immediate). */
365 I3201_CBZ = 0x34000000,
366 I3201_CBNZ = 0x35000000,
368 /* Conditional branch (immediate). */
369 I3202_B_C = 0x54000000,
371 /* Unconditional branch (immediate). */
372 I3206_B = 0x14000000,
373 I3206_BL = 0x94000000,
375 /* Unconditional branch (register). */
376 I3207_BR = 0xd61f0000,
377 I3207_BLR = 0xd63f0000,
378 I3207_RET = 0xd65f0000,
380 /* AdvSIMD load/store single structure. */
381 I3303_LD1R = 0x0d40c000,
383 /* Load literal for loading the address at pc-relative offset */
384 I3305_LDR = 0x58000000,
385 I3305_LDR_v64 = 0x5c000000,
386 I3305_LDR_v128 = 0x9c000000,
388 /* Load/store exclusive. */
389 I3306_LDXP = 0xc8600000,
390 I3306_STXP = 0xc8200000,
392 /* Load/store register. Described here as 3.3.12, but the helper
393 that emits them can transform to 3.3.10 or 3.3.13. */
394 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
395 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
396 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
397 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
399 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
400 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
401 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
402 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
404 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
405 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
407 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
408 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
409 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
411 I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
412 I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
414 I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
415 I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
417 I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30,
418 I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30,
420 I3312_TO_I3310 = 0x00200800,
421 I3312_TO_I3313 = 0x01000000,
423 /* Load/store register pair instructions. */
424 I3314_LDP = 0x28400000,
425 I3314_STP = 0x28000000,
427 /* Add/subtract immediate instructions. */
428 I3401_ADDI = 0x11000000,
429 I3401_ADDSI = 0x31000000,
430 I3401_SUBI = 0x51000000,
431 I3401_SUBSI = 0x71000000,
433 /* Bitfield instructions. */
434 I3402_BFM = 0x33000000,
435 I3402_SBFM = 0x13000000,
436 I3402_UBFM = 0x53000000,
438 /* Extract instruction. */
439 I3403_EXTR = 0x13800000,
441 /* Logical immediate instructions. */
442 I3404_ANDI = 0x12000000,
443 I3404_ORRI = 0x32000000,
444 I3404_EORI = 0x52000000,
445 I3404_ANDSI = 0x72000000,
447 /* Move wide immediate instructions. */
448 I3405_MOVN = 0x12800000,
449 I3405_MOVZ = 0x52800000,
450 I3405_MOVK = 0x72800000,
452 /* PC relative addressing instructions. */
453 I3406_ADR = 0x10000000,
454 I3406_ADRP = 0x90000000,
456 /* Add/subtract extended register instructions. */
457 I3501_ADD = 0x0b200000,
459 /* Add/subtract shifted register instructions (without a shift). */
460 I3502_ADD = 0x0b000000,
461 I3502_ADDS = 0x2b000000,
462 I3502_SUB = 0x4b000000,
463 I3502_SUBS = 0x6b000000,
465 /* Add/subtract shifted register instructions (with a shift). */
466 I3502S_ADD_LSL = I3502_ADD,
468 /* Add/subtract with carry instructions. */
469 I3503_ADC = 0x1a000000,
470 I3503_SBC = 0x5a000000,
472 /* Conditional select instructions. */
473 I3506_CSEL = 0x1a800000,
474 I3506_CSINC = 0x1a800400,
475 I3506_CSINV = 0x5a800000,
476 I3506_CSNEG = 0x5a800400,
478 /* Data-processing (1 source) instructions. */
479 I3507_CLZ = 0x5ac01000,
480 I3507_RBIT = 0x5ac00000,
481 I3507_REV = 0x5ac00000, /* + size << 10 */
483 /* Data-processing (2 source) instructions. */
484 I3508_LSLV = 0x1ac02000,
485 I3508_LSRV = 0x1ac02400,
486 I3508_ASRV = 0x1ac02800,
487 I3508_RORV = 0x1ac02c00,
488 I3508_SMULH = 0x9b407c00,
489 I3508_UMULH = 0x9bc07c00,
490 I3508_UDIV = 0x1ac00800,
491 I3508_SDIV = 0x1ac00c00,
493 /* Data-processing (3 source) instructions. */
494 I3509_MADD = 0x1b000000,
495 I3509_MSUB = 0x1b008000,
497 /* Logical shifted register instructions (without a shift). */
498 I3510_AND = 0x0a000000,
499 I3510_BIC = 0x0a200000,
500 I3510_ORR = 0x2a000000,
501 I3510_ORN = 0x2a200000,
502 I3510_EOR = 0x4a000000,
503 I3510_EON = 0x4a200000,
504 I3510_ANDS = 0x6a000000,
506 /* Logical shifted register instructions (with a shift). */
507 I3502S_AND_LSR = I3510_AND | (1 << 22),
510 I3605_DUP = 0x0e000400,
511 I3605_INS = 0x4e001c00,
512 I3605_UMOV = 0x0e003c00,
514 /* AdvSIMD modified immediate */
515 I3606_MOVI = 0x0f000400,
516 I3606_MVNI = 0x2f000400,
517 I3606_BIC = 0x2f001400,
518 I3606_ORR = 0x0f001400,
520 /* AdvSIMD scalar shift by immediate */
521 I3609_SSHR = 0x5f000400,
522 I3609_SSRA = 0x5f001400,
523 I3609_SHL = 0x5f005400,
524 I3609_USHR = 0x7f000400,
525 I3609_USRA = 0x7f001400,
526 I3609_SLI = 0x7f005400,
528 /* AdvSIMD scalar three same */
529 I3611_SQADD = 0x5e200c00,
530 I3611_SQSUB = 0x5e202c00,
531 I3611_CMGT = 0x5e203400,
532 I3611_CMGE = 0x5e203c00,
533 I3611_SSHL = 0x5e204400,
534 I3611_ADD = 0x5e208400,
535 I3611_CMTST = 0x5e208c00,
536 I3611_UQADD = 0x7e200c00,
537 I3611_UQSUB = 0x7e202c00,
538 I3611_CMHI = 0x7e203400,
539 I3611_CMHS = 0x7e203c00,
540 I3611_USHL = 0x7e204400,
541 I3611_SUB = 0x7e208400,
542 I3611_CMEQ = 0x7e208c00,
544 /* AdvSIMD scalar two-reg misc */
545 I3612_CMGT0 = 0x5e208800,
546 I3612_CMEQ0 = 0x5e209800,
547 I3612_CMLT0 = 0x5e20a800,
548 I3612_ABS = 0x5e20b800,
549 I3612_CMGE0 = 0x7e208800,
550 I3612_CMLE0 = 0x7e209800,
551 I3612_NEG = 0x7e20b800,
553 /* AdvSIMD shift by immediate */
554 I3614_SSHR = 0x0f000400,
555 I3614_SSRA = 0x0f001400,
556 I3614_SHL = 0x0f005400,
557 I3614_SLI = 0x2f005400,
558 I3614_USHR = 0x2f000400,
559 I3614_USRA = 0x2f001400,
561 /* AdvSIMD three same. */
562 I3616_ADD = 0x0e208400,
563 I3616_AND = 0x0e201c00,
564 I3616_BIC = 0x0e601c00,
565 I3616_BIF = 0x2ee01c00,
566 I3616_BIT = 0x2ea01c00,
567 I3616_BSL = 0x2e601c00,
568 I3616_EOR = 0x2e201c00,
569 I3616_MUL = 0x0e209c00,
570 I3616_ORR = 0x0ea01c00,
571 I3616_ORN = 0x0ee01c00,
572 I3616_SUB = 0x2e208400,
573 I3616_CMGT = 0x0e203400,
574 I3616_CMGE = 0x0e203c00,
575 I3616_CMTST = 0x0e208c00,
576 I3616_CMHI = 0x2e203400,
577 I3616_CMHS = 0x2e203c00,
578 I3616_CMEQ = 0x2e208c00,
579 I3616_SMAX = 0x0e206400,
580 I3616_SMIN = 0x0e206c00,
581 I3616_SSHL = 0x0e204400,
582 I3616_SQADD = 0x0e200c00,
583 I3616_SQSUB = 0x0e202c00,
584 I3616_UMAX = 0x2e206400,
585 I3616_UMIN = 0x2e206c00,
586 I3616_UQADD = 0x2e200c00,
587 I3616_UQSUB = 0x2e202c00,
588 I3616_USHL = 0x2e204400,
590 /* AdvSIMD two-reg misc. */
591 I3617_CMGT0 = 0x0e208800,
592 I3617_CMEQ0 = 0x0e209800,
593 I3617_CMLT0 = 0x0e20a800,
594 I3617_CMGE0 = 0x2e208800,
595 I3617_CMLE0 = 0x2e209800,
596 I3617_NOT = 0x2e205800,
597 I3617_ABS = 0x0e20b800,
598 I3617_NEG = 0x2e20b800,
600 /* System instructions. */
602 DMB_ISH = 0xd50338bf,
611 static inline uint32_t tcg_in32(TCGContext *s)
613 uint32_t v = *(uint32_t *)s->code_ptr;
617 /* Emit an opcode with "type-checking" of the format. */
618 #define tcg_out_insn(S, FMT, OP, ...) \
619 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
621 static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
622 TCGReg rt, TCGReg rn, unsigned size)
624 tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
627 static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
628 int imm19, TCGReg rt)
630 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
633 static void tcg_out_insn_3306(TCGContext *s, AArch64Insn insn, TCGReg rs,
634 TCGReg rt, TCGReg rt2, TCGReg rn)
636 tcg_out32(s, insn | rs << 16 | rt2 << 10 | rn << 5 | rt);
639 static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
640 TCGReg rt, int imm19)
642 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
645 static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
646 TCGCond c, int imm19)
648 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
651 static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
653 tcg_out32(s, insn | (imm26 & 0x03ffffff));
656 static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
658 tcg_out32(s, insn | rn << 5);
661 static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
662 TCGReg r1, TCGReg r2, TCGReg rn,
663 tcg_target_long ofs, bool pre, bool w)
665 insn |= 1u << 31; /* ext */
669 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
670 insn |= (ofs & (0x7f << 3)) << (15 - 3);
672 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
675 static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
676 TCGReg rd, TCGReg rn, uint64_t aimm)
679 tcg_debug_assert((aimm & 0xfff) == 0);
681 tcg_debug_assert(aimm <= 0xfff);
682 aimm |= 1 << 12; /* apply LSL 12 */
684 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
687 /* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
688 (Logical immediate). Both insn groups have N, IMMR and IMMS fields
689 that feed the DecodeBitMasks pseudo function. */
690 static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
691 TCGReg rd, TCGReg rn, int n, int immr, int imms)
693 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
697 #define tcg_out_insn_3404 tcg_out_insn_3402
699 static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
700 TCGReg rd, TCGReg rn, TCGReg rm, int imms)
702 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
706 /* This function is used for the Move (wide immediate) instruction group.
707 Note that SHIFT is a full shift count, not the 2 bit HW field. */
708 static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
709 TCGReg rd, uint16_t half, unsigned shift)
711 tcg_debug_assert((shift & ~0x30) == 0);
712 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
715 static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
716 TCGReg rd, int64_t disp)
718 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
721 static inline void tcg_out_insn_3501(TCGContext *s, AArch64Insn insn,
722 TCGType sf, TCGReg rd, TCGReg rn,
723 TCGReg rm, int opt, int imm3)
725 tcg_out32(s, insn | sf << 31 | rm << 16 | opt << 13 |
726 imm3 << 10 | rn << 5 | rd);
729 /* This function is for both 3.5.2 (Add/Subtract shifted register), for
730 the rare occasion when we actually want to supply a shift amount. */
731 static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
732 TCGType ext, TCGReg rd, TCGReg rn,
735 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
738 /* This function is for 3.5.2 (Add/subtract shifted register),
739 and 3.5.10 (Logical shifted register), for the vast majorty of cases
740 when we don't want to apply a shift. Thus it can also be used for
741 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */
742 static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
743 TCGReg rd, TCGReg rn, TCGReg rm)
745 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
748 #define tcg_out_insn_3503 tcg_out_insn_3502
749 #define tcg_out_insn_3508 tcg_out_insn_3502
750 #define tcg_out_insn_3510 tcg_out_insn_3502
752 static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
753 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
755 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
756 | tcg_cond_to_aarch64[c] << 12);
759 static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
760 TCGReg rd, TCGReg rn)
762 tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
765 static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
766 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
768 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
771 static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
772 TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
774 /* Note that bit 11 set means general register input. Therefore
775 we can handle both register sets with one function. */
776 tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
777 | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
780 static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
781 TCGReg rd, bool op, int cmode, uint8_t imm8)
783 tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
784 | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
787 static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn,
788 TCGReg rd, TCGReg rn, unsigned immhb)
790 tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f));
793 static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn,
794 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
796 tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16
797 | (rn & 0x1f) << 5 | (rd & 0x1f));
800 static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn,
801 unsigned size, TCGReg rd, TCGReg rn)
803 tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f));
806 static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
807 TCGReg rd, TCGReg rn, unsigned immhb)
809 tcg_out32(s, insn | q << 30 | immhb << 16
810 | (rn & 0x1f) << 5 | (rd & 0x1f));
813 static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
814 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
816 tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
817 | (rn & 0x1f) << 5 | (rd & 0x1f));
820 static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
821 unsigned size, TCGReg rd, TCGReg rn)
823 tcg_out32(s, insn | q << 30 | (size << 22)
824 | (rn & 0x1f) << 5 | (rd & 0x1f));
827 static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
828 TCGReg rd, TCGReg base, TCGType ext,
831 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
832 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
833 0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
836 static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
837 TCGReg rd, TCGReg rn, intptr_t offset)
839 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
842 static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
843 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
845 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
846 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
847 | rn << 5 | (rd & 0x1f));
850 static void tcg_out_bti(TCGContext *s, AArch64Insn insn)
853 * While BTI insns are nops on hosts without FEAT_BTI,
854 * there is no point in emitting them in that case either.
856 if (cpuinfo & CPUINFO_BTI) {
861 /* Register to register move using ORR (shifted register with no shift). */
862 static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
864 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
867 /* Register to register move using ADDI (move to/from SP). */
868 static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
870 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
873 /* This function is used for the Logical (immediate) instruction group.
874 The value of LIMM must satisfy IS_LIMM. See the comment above about
875 only supporting simplified logical immediates. */
876 static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
877 TCGReg rd, TCGReg rn, uint64_t limm)
881 tcg_debug_assert(is_limm(limm));
886 r = 0; /* form 0....01....1 */
887 c = ctz64(~limm) - 1;
889 r = clz64(~limm); /* form 1..10..01..1 */
893 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */
896 if (ext == TCG_TYPE_I32) {
901 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
904 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
905 TCGReg rd, int64_t v64)
907 bool q = type == TCG_TYPE_V128;
910 /* Test all bytes equal first. */
913 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
918 * Test all bytes 0x00 or 0xff second. This can match cases that
919 * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
921 for (i = imm8 = 0; i < 8; i++) {
922 uint8_t byte = v64 >> (i * 8);
925 } else if (byte != 0) {
929 tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
934 * Tests for various replications. For each element width, if we
935 * cannot find an expansion there's no point checking a larger
936 * width because we already know by replication it cannot match.
941 if (is_shimm16(v16, &cmode, &imm8)) {
942 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
945 if (is_shimm16(~v16, &cmode, &imm8)) {
946 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
951 * Otherwise, all remaining constants can be loaded in two insns:
952 * rd = v16 & 0xff, rd |= v16 & 0xff00.
954 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
955 tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
957 } else if (vece == MO_32) {
961 if (is_shimm32(v32, &cmode, &imm8) ||
962 is_soimm32(v32, &cmode, &imm8) ||
963 is_fimm32(v32, &cmode, &imm8)) {
964 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
967 if (is_shimm32(n32, &cmode, &imm8) ||
968 is_soimm32(n32, &cmode, &imm8)) {
969 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
974 * Restrict the set of constants to those we can load with
975 * two instructions. Others we load from the pool.
977 i = is_shimm32_pair(v32, &cmode, &imm8);
979 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
980 tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
983 i = is_shimm32_pair(n32, &cmode, &imm8);
985 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
986 tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
989 } else if (is_fimm64(v64, &cmode, &imm8)) {
990 tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
995 * As a last resort, load from the constant pool. Sadly there
996 * is no LD1R (literal), so store the full 16-byte vector.
998 if (type == TCG_TYPE_V128) {
999 new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
1000 tcg_out_insn(s, 3305, LDR_v128, 0, rd);
1002 new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
1003 tcg_out_insn(s, 3305, LDR_v64, 0, rd);
1007 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
1008 TCGReg rd, TCGReg rs)
1010 int is_q = type - TCG_TYPE_V64;
1011 tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
1015 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
1016 TCGReg r, TCGReg base, intptr_t offset)
1018 TCGReg temp = TCG_REG_TMP0;
1020 if (offset < -0xffffff || offset > 0xffffff) {
1021 tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
1022 tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
1025 AArch64Insn add_insn = I3401_ADDI;
1028 add_insn = I3401_SUBI;
1031 if (offset & 0xfff000) {
1032 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
1035 if (offset & 0xfff) {
1036 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
1040 tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
1044 static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
1045 tcg_target_long value)
1047 tcg_target_long svalue = value;
1048 tcg_target_long ivalue = ~value;
1049 tcg_target_long t0, t1, t2;
1056 tcg_debug_assert(rd < 32);
1059 g_assert_not_reached();
1062 /* For 32-bit values, discard potential garbage in value. For 64-bit
1063 values within [2**31, 2**32-1], we can create smaller sequences by
1064 interpreting this as a negative 32-bit number, while ensuring that
1065 the high 32 bits are cleared by setting SF=0. */
1066 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1067 svalue = (int32_t)value;
1068 value = (uint32_t)value;
1069 ivalue = (uint32_t)ivalue;
1070 type = TCG_TYPE_I32;
1073 /* Speed things up by handling the common case of small positive
1074 and negative values specially. */
1075 if ((value & ~0xffffull) == 0) {
1076 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1078 } else if ((ivalue & ~0xffffull) == 0) {
1079 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1083 /* Check for bitfield immediates. For the benefit of 32-bit quantities,
1084 use the sign-extended value. That lets us match rotated values such
1085 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1086 if (is_limm(svalue)) {
1087 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1091 /* Look for host pointer values within 4G of the PC. This happens
1092 often when loading pointers to QEMU's own data structures. */
1093 if (type == TCG_TYPE_I64) {
1094 intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
1095 tcg_target_long disp = value - src_rx;
1096 if (disp == sextract64(disp, 0, 21)) {
1097 tcg_out_insn(s, 3406, ADR, rd, disp);
1100 disp = (value >> 12) - (src_rx >> 12);
1101 if (disp == sextract64(disp, 0, 21)) {
1102 tcg_out_insn(s, 3406, ADRP, rd, disp);
1103 if (value & 0xfff) {
1104 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1110 /* Would it take fewer insns to begin with MOVN? */
1111 if (ctpop64(value) >= 32) {
1118 s0 = ctz64(t0) & (63 & -16);
1119 t1 = t0 & ~(0xffffull << s0);
1120 s1 = ctz64(t1) & (63 & -16);
1121 t2 = t1 & ~(0xffffull << s1);
1123 tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1125 tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1130 /* For more than 2 insns, dump it into the constant pool. */
1131 new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1132 tcg_out_insn(s, 3305, LDR, 0, rd);
1135 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
1140 static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1141 tcg_target_long imm)
1143 /* This function is only used for passing structs by reference. */
1144 g_assert_not_reached();
1147 /* Define something more legible for general use. */
1148 #define tcg_out_ldst_r tcg_out_insn_3310
1150 static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1151 TCGReg rn, intptr_t offset, int lgsize)
1153 /* If the offset is naturally aligned and in range, then we can
1154 use the scaled uimm12 encoding */
1155 if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1156 uintptr_t scaled_uimm = offset >> lgsize;
1157 if (scaled_uimm <= 0xfff) {
1158 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1163 /* Small signed offsets can use the unscaled encoding. */
1164 if (offset >= -256 && offset < 256) {
1165 tcg_out_insn_3312(s, insn, rd, rn, offset);
1169 /* Worst-case scenario, move offset to temp register, use reg offset. */
1170 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, offset);
1171 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP0);
1174 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1182 if (ret < 32 && arg < 32) {
1183 tcg_out_movr(s, type, ret, arg);
1185 } else if (ret < 32) {
1186 tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1188 } else if (arg < 32) {
1189 tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1195 tcg_debug_assert(ret >= 32 && arg >= 32);
1196 tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1199 tcg_debug_assert(ret >= 32 && arg >= 32);
1200 tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1204 g_assert_not_reached();
1209 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1210 TCGReg base, intptr_t ofs)
1217 insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1221 insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1233 g_assert_not_reached();
1235 tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1238 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1239 TCGReg base, intptr_t ofs)
1246 insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1250 insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1262 g_assert_not_reached();
1264 tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1267 static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1268 TCGReg base, intptr_t ofs)
1270 if (type <= TCG_TYPE_I64 && val == 0) {
1271 tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1277 static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1278 TCGReg rn, unsigned int a, unsigned int b)
1280 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1283 static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1284 TCGReg rn, unsigned int a, unsigned int b)
1286 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1289 static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1290 TCGReg rn, unsigned int a, unsigned int b)
1292 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1295 static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1296 TCGReg rn, TCGReg rm, unsigned int a)
1298 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1301 static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1302 TCGReg rd, TCGReg rn, unsigned int m)
1304 int bits = ext ? 64 : 32;
1306 tcg_out_ubfm(s, ext, rd, rn, (bits - m) & max, (max - m) & max);
1309 static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1310 TCGReg rd, TCGReg rn, unsigned int m)
1312 int max = ext ? 63 : 31;
1313 tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1316 static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1317 TCGReg rd, TCGReg rn, unsigned int m)
1319 int max = ext ? 63 : 31;
1320 tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1323 static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1324 TCGReg rd, TCGReg rn, unsigned int m)
1326 int max = ext ? 63 : 31;
1327 tcg_out_extr(s, ext, rd, rn, rn, m & max);
1330 static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1331 TCGReg rd, TCGReg rn, unsigned int m)
1333 int max = ext ? 63 : 31;
1334 tcg_out_extr(s, ext, rd, rn, rn, -m & max);
1337 static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1338 TCGReg rn, unsigned lsb, unsigned width)
1340 unsigned size = ext ? 64 : 32;
1341 unsigned a = (size - lsb) & (size - 1);
1342 unsigned b = width - 1;
1343 tcg_out_bfm(s, ext, rd, rn, a, b);
1346 static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1347 tcg_target_long b, bool const_b)
1350 /* Using CMP or CMN aliases. */
1352 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1354 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1357 /* Using CMP alias SUBS wzr, Wn, Wm */
1358 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1362 static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1364 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1365 tcg_debug_assert(offset == sextract64(offset, 0, 26));
1366 tcg_out_insn(s, 3206, B, offset);
1369 static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target)
1371 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1372 if (offset == sextract64(offset, 0, 26)) {
1373 tcg_out_insn(s, 3206, BL, offset);
1375 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target);
1376 tcg_out_insn(s, 3207, BLR, TCG_REG_TMP0);
1380 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
1381 const TCGHelperInfo *info)
1383 tcg_out_call_int(s, target);
1386 static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1388 if (!l->has_value) {
1389 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1390 tcg_out_insn(s, 3206, B, 0);
1392 tcg_out_goto(s, l->u.value_ptr);
1396 static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1397 TCGArg b, bool b_const, TCGLabel *l)
1402 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1406 tcg_out_cmp(s, ext, a, b, b_const);
1409 if (!l->has_value) {
1410 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1411 offset = tcg_in32(s) >> 5;
1413 offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2;
1414 tcg_debug_assert(offset == sextract64(offset, 0, 19));
1418 tcg_out_insn(s, 3202, B_C, c, offset);
1419 } else if (c == TCG_COND_EQ) {
1420 tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1422 tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1426 static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits,
1427 TCGReg rd, TCGReg rn)
1429 /* REV, REV16, REV32 */
1430 tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn);
1433 static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
1434 TCGReg rd, TCGReg rn)
1436 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1437 int bits = (8 << s_bits) - 1;
1438 tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1441 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1443 tcg_out_sxt(s, type, MO_8, rd, rn);
1446 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1448 tcg_out_sxt(s, type, MO_16, rd, rn);
1451 static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn)
1453 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, rd, rn);
1456 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1458 tcg_out_ext32s(s, rd, rn);
1461 static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
1462 TCGReg rd, TCGReg rn)
1464 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1465 int bits = (8 << s_bits) - 1;
1466 tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1469 static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn)
1471 tcg_out_uxt(s, MO_8, rd, rn);
1474 static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn)
1476 tcg_out_uxt(s, MO_16, rd, rn);
1479 static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rn)
1481 tcg_out_movr(s, TCG_TYPE_I32, rd, rn);
1484 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1486 tcg_out_ext32u(s, rd, rn);
1489 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn)
1491 tcg_out_mov(s, TCG_TYPE_I32, rd, rn);
1494 static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1495 TCGReg rn, int64_t aimm)
1498 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1500 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1504 static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1505 TCGReg rh, TCGReg al, TCGReg ah,
1506 tcg_target_long bl, tcg_target_long bh,
1507 bool const_bl, bool const_bh, bool sub)
1509 TCGReg orig_rl = rl;
1512 if (rl == ah || (!const_bh && rl == bh)) {
1519 insn = sub ? I3401_ADDSI : I3401_SUBSI;
1521 insn = sub ? I3401_SUBSI : I3401_ADDSI;
1524 if (unlikely(al == TCG_REG_XZR)) {
1525 /* ??? We want to allow al to be zero for the benefit of
1526 negation via subtraction. However, that leaves open the
1527 possibility of adding 0+const in the low part, and the
1528 immediate add instructions encode XSP not XZR. Don't try
1529 anything more elaborate here than loading another zero. */
1531 tcg_out_movi(s, ext, al, 0);
1533 tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1535 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1540 /* Note that the only two constants we support are 0 and -1, and
1541 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
1542 if ((bh != 0) ^ sub) {
1549 tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1551 tcg_out_mov(s, ext, orig_rl, rl);
1554 static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1556 static const uint32_t sync[] = {
1557 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST,
1558 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST,
1559 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1560 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD,
1561 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1563 tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1566 static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1567 TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1572 tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1574 if (const_b && b == (ext ? 64 : 32)) {
1575 tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1577 AArch64Insn sel = I3506_CSEL;
1579 tcg_out_cmp(s, ext, a0, 0, 1);
1580 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP0, a1);
1586 } else if (b == 0) {
1589 tcg_out_movi(s, ext, d, b);
1593 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP0, b, TCG_COND_NE);
1604 bool tcg_target_has_memory_bswap(MemOp memop)
1609 static const TCGLdstHelperParam ldst_helper_param = {
1610 .ntmp = 1, .tmp = { TCG_REG_TMP0 }
1613 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1615 MemOp opc = get_memop(lb->oi);
1617 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1621 tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
1622 tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
1623 tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
1624 tcg_out_goto(s, lb->raddr);
1628 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1630 MemOp opc = get_memop(lb->oi);
1632 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1636 tcg_out_st_helper_args(s, lb, &ldst_helper_param);
1637 tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]);
1638 tcg_out_goto(s, lb->raddr);
1642 /* We expect to use a 7-bit scaled negative offset from ENV. */
1643 #define MIN_TLB_MASK_TABLE_OFS -512
1646 * For system-mode, perform the TLB load and compare.
1647 * For user-mode, perform any required alignment tests.
1648 * In both cases, return a TCGLabelQemuLdst structure if the slow path
1649 * is required and fill in @h with the host address for the fast path.
1651 static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
1652 TCGReg addr_reg, MemOpIdx oi,
1655 TCGType addr_type = s->addr_type;
1656 TCGLabelQemuLdst *ldst = NULL;
1657 MemOp opc = get_memop(oi);
1658 MemOp s_bits = opc & MO_SIZE;
1661 h->aa = atom_and_align_for_opc(s, opc,
1662 have_lse2 ? MO_ATOM_WITHIN16
1665 a_mask = (1 << h->aa.align) - 1;
1667 #ifdef CONFIG_SOFTMMU
1668 unsigned s_mask = (1u << s_bits) - 1;
1669 unsigned mem_index = get_mmuidx(oi);
1672 uint64_t compare_mask;
1674 ldst = new_ldst_label(s);
1675 ldst->is_ld = is_ld;
1677 ldst->addrlo_reg = addr_reg;
1679 mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32
1680 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1682 /* Load cpu->neg.tlb.f[mmu_idx].{mask,table} into {tmp0,tmp1}. */
1683 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1684 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1685 tcg_out_insn(s, 3314, LDP, TCG_REG_TMP0, TCG_REG_TMP1, TCG_AREG0,
1686 tlb_mask_table_ofs(s, mem_index), 1, 0);
1688 /* Extract the TLB index from the address into X0. */
1689 tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1690 TCG_REG_TMP0, TCG_REG_TMP0, addr_reg,
1691 s->page_bits - CPU_TLB_ENTRY_BITS);
1693 /* Add the tlb_table pointer, forming the CPUTLBEntry address in TMP1. */
1694 tcg_out_insn(s, 3502, ADD, 1, TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP0);
1696 /* Load the tlb comparator into TMP0, and the fast path addend into TMP1. */
1697 QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN);
1698 tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP1,
1699 is_ld ? offsetof(CPUTLBEntry, addr_read)
1700 : offsetof(CPUTLBEntry, addr_write));
1701 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
1702 offsetof(CPUTLBEntry, addend));
1705 * For aligned accesses, we check the first byte and include the alignment
1706 * bits within the address. For unaligned access, we check that we don't
1707 * cross pages using the address of the last byte of the access.
1709 if (a_mask >= s_mask) {
1710 addr_adj = addr_reg;
1712 addr_adj = TCG_REG_TMP2;
1713 tcg_out_insn(s, 3401, ADDI, addr_type,
1714 addr_adj, addr_reg, s_mask - a_mask);
1716 compare_mask = (uint64_t)s->page_mask | a_mask;
1718 /* Store the page mask part of the address into TMP2. */
1719 tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_TMP2,
1720 addr_adj, compare_mask);
1722 /* Perform the address comparison. */
1723 tcg_out_cmp(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2, 0);
1725 /* If not equal, we jump to the slow path. */
1726 ldst->label_ptr[0] = s->code_ptr;
1727 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1729 h->base = TCG_REG_TMP1;
1730 h->index = addr_reg;
1731 h->index_ext = addr_type;
1734 ldst = new_ldst_label(s);
1736 ldst->is_ld = is_ld;
1738 ldst->addrlo_reg = addr_reg;
1740 /* tst addr, #mask */
1741 tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
1743 /* b.ne slow_path */
1744 ldst->label_ptr[0] = s->code_ptr;
1745 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1748 if (guest_base || addr_type == TCG_TYPE_I32) {
1749 h->base = TCG_REG_GUEST_BASE;
1750 h->index = addr_reg;
1751 h->index_ext = addr_type;
1754 h->index = TCG_REG_XZR;
1755 h->index_ext = TCG_TYPE_I64;
1762 static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
1763 TCGReg data_r, HostAddress h)
1765 switch (memop & MO_SSIZE) {
1767 tcg_out_ldst_r(s, I3312_LDRB, data_r, h.base, h.index_ext, h.index);
1770 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1771 data_r, h.base, h.index_ext, h.index);
1774 tcg_out_ldst_r(s, I3312_LDRH, data_r, h.base, h.index_ext, h.index);
1777 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1778 data_r, h.base, h.index_ext, h.index);
1781 tcg_out_ldst_r(s, I3312_LDRW, data_r, h.base, h.index_ext, h.index);
1784 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, h.base, h.index_ext, h.index);
1787 tcg_out_ldst_r(s, I3312_LDRX, data_r, h.base, h.index_ext, h.index);
1790 g_assert_not_reached();
1794 static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
1795 TCGReg data_r, HostAddress h)
1797 switch (memop & MO_SIZE) {
1799 tcg_out_ldst_r(s, I3312_STRB, data_r, h.base, h.index_ext, h.index);
1802 tcg_out_ldst_r(s, I3312_STRH, data_r, h.base, h.index_ext, h.index);
1805 tcg_out_ldst_r(s, I3312_STRW, data_r, h.base, h.index_ext, h.index);
1808 tcg_out_ldst_r(s, I3312_STRX, data_r, h.base, h.index_ext, h.index);
1811 g_assert_not_reached();
1815 static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1816 MemOpIdx oi, TCGType data_type)
1818 TCGLabelQemuLdst *ldst;
1821 ldst = prepare_host_addr(s, &h, addr_reg, oi, true);
1822 tcg_out_qemu_ld_direct(s, get_memop(oi), data_type, data_reg, h);
1825 ldst->type = data_type;
1826 ldst->datalo_reg = data_reg;
1827 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1831 static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1832 MemOpIdx oi, TCGType data_type)
1834 TCGLabelQemuLdst *ldst;
1837 ldst = prepare_host_addr(s, &h, addr_reg, oi, false);
1838 tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h);
1841 ldst->type = data_type;
1842 ldst->datalo_reg = data_reg;
1843 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1847 static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
1848 TCGReg addr_reg, MemOpIdx oi, bool is_ld)
1850 TCGLabelQemuLdst *ldst;
1855 ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
1857 /* Compose the final address, as LDP/STP have no indexing. */
1858 if (h.index == TCG_REG_XZR) {
1861 base = TCG_REG_TMP2;
1862 if (h.index_ext == TCG_TYPE_I32) {
1863 /* add base, base, index, uxtw */
1864 tcg_out_insn(s, 3501, ADD, TCG_TYPE_I64, base,
1865 h.base, h.index, MO_32, 0);
1867 /* add base, base, index */
1868 tcg_out_insn(s, 3502, ADD, 1, base, h.base, h.index);
1872 use_pair = h.aa.atom < MO_128 || have_lse2;
1875 tcg_insn_unit *branch = NULL;
1876 TCGReg ll, lh, sl, sh;
1879 * If we have already checked for 16-byte alignment, that's all
1880 * we need. Otherwise we have determined that misaligned atomicity
1881 * may be handled with two 8-byte loads.
1883 if (h.aa.align < MO_128) {
1885 * TODO: align should be MO_64, so we only need test bit 3,
1886 * which means we could use TBNZ instead of ANDS+B_C.
1888 tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, 15);
1889 branch = s->code_ptr;
1890 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1896 * 16-byte atomicity without LSE2 requires LDXP+STXP loop:
1897 * ldxp lo, hi, [base]
1898 * stxp t0, lo, hi, [base]
1900 * Require no overlap between data{lo,hi} and base.
1902 if (datalo == base || datahi == base) {
1903 tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_TMP2, base);
1904 base = TCG_REG_TMP2;
1910 * 16-byte atomicity without LSE2 requires LDXP+STXP loop:
1911 * 1: ldxp t0, t1, [base]
1912 * stxp t0, lo, hi, [base]
1915 tcg_debug_assert(base != TCG_REG_TMP0 && base != TCG_REG_TMP1);
1922 tcg_out_insn(s, 3306, LDXP, TCG_REG_XZR, ll, lh, base);
1923 tcg_out_insn(s, 3306, STXP, TCG_REG_TMP0, sl, sh, base);
1924 tcg_out_insn(s, 3201, CBNZ, 0, TCG_REG_TMP0, -2);
1927 /* "b .+8", branching across the one insn of use_pair. */
1928 tcg_out_insn(s, 3206, B, 2);
1929 reloc_pc19(branch, tcg_splitwx_to_rx(s->code_ptr));
1935 tcg_out_insn(s, 3314, LDP, datalo, datahi, base, 0, 1, 0);
1937 tcg_out_insn(s, 3314, STP, datalo, datahi, base, 0, 1, 0);
1942 ldst->type = TCG_TYPE_I128;
1943 ldst->datalo_reg = datalo;
1944 ldst->datahi_reg = datahi;
1945 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1949 static const tcg_insn_unit *tb_ret_addr;
1951 static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
1953 const tcg_insn_unit *target;
1956 /* Reuse the zeroing that exists for goto_ptr. */
1958 target = tcg_code_gen_epilogue;
1960 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1961 target = tb_ret_addr;
1964 offset = tcg_pcrel_diff(s, target) >> 2;
1965 if (offset == sextract64(offset, 0, 26)) {
1966 tcg_out_insn(s, 3206, B, offset);
1969 * Only x16/x17 generate BTI type Jump (2),
1970 * other registers generate BTI type Jump|Call (3).
1972 QEMU_BUILD_BUG_ON(TCG_REG_TMP0 != TCG_REG_X16);
1973 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target);
1974 tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
1978 static void tcg_out_goto_tb(TCGContext *s, int which)
1981 * Direct branch, or indirect address load, will be patched
1982 * by tb_target_set_jmp_target. Assert indirect load offset
1983 * in range early, regardless of direct branch distance.
1985 intptr_t i_off = tcg_pcrel_diff(s, (void *)get_jmp_target_addr(s, which));
1986 tcg_debug_assert(i_off == sextract64(i_off, 0, 21));
1988 set_jmp_insn_offset(s, which);
1989 tcg_out32(s, I3206_B);
1990 tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
1991 set_jmp_reset_offset(s, which);
1992 tcg_out_bti(s, BTI_J);
1995 void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
1996 uintptr_t jmp_rx, uintptr_t jmp_rw)
1998 uintptr_t d_addr = tb->jmp_target_addr[n];
1999 ptrdiff_t d_offset = d_addr - jmp_rx;
2002 /* Either directly branch, or indirect branch load. */
2003 if (d_offset == sextract64(d_offset, 0, 28)) {
2004 insn = deposit32(I3206_B, 0, 26, d_offset >> 2);
2006 uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n];
2007 ptrdiff_t i_offset = i_addr - jmp_rx;
2009 /* Note that we asserted this in range in tcg_out_goto_tb. */
2010 insn = deposit32(I3305_LDR | TCG_REG_TMP0, 5, 19, i_offset >> 2);
2012 qatomic_set((uint32_t *)jmp_rw, insn);
2013 flush_idcache_range(jmp_rx, jmp_rw, 4);
2016 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
2017 const TCGArg args[TCG_MAX_OP_ARGS],
2018 const int const_args[TCG_MAX_OP_ARGS])
2020 /* 99% of the time, we can signal the use of extension registers
2021 by looking to see if the opcode handles 64-bit data. */
2022 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
2024 /* Hoist the loads of the most common arguments. */
2025 TCGArg a0 = args[0];
2026 TCGArg a1 = args[1];
2027 TCGArg a2 = args[2];
2028 int c2 = const_args[2];
2030 /* Some operands are defined with "rZ" constraint, a register or
2031 the zero register. These need not actually test args[I] == 0. */
2032 #define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
2035 case INDEX_op_goto_ptr:
2036 tcg_out_insn(s, 3207, BR, a0);
2040 tcg_out_goto_label(s, arg_label(a0));
2043 case INDEX_op_ld8u_i32:
2044 case INDEX_op_ld8u_i64:
2045 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
2047 case INDEX_op_ld8s_i32:
2048 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
2050 case INDEX_op_ld8s_i64:
2051 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
2053 case INDEX_op_ld16u_i32:
2054 case INDEX_op_ld16u_i64:
2055 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
2057 case INDEX_op_ld16s_i32:
2058 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
2060 case INDEX_op_ld16s_i64:
2061 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
2063 case INDEX_op_ld_i32:
2064 case INDEX_op_ld32u_i64:
2065 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
2067 case INDEX_op_ld32s_i64:
2068 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
2070 case INDEX_op_ld_i64:
2071 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
2074 case INDEX_op_st8_i32:
2075 case INDEX_op_st8_i64:
2076 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
2078 case INDEX_op_st16_i32:
2079 case INDEX_op_st16_i64:
2080 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
2082 case INDEX_op_st_i32:
2083 case INDEX_op_st32_i64:
2084 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
2086 case INDEX_op_st_i64:
2087 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
2090 case INDEX_op_add_i32:
2093 case INDEX_op_add_i64:
2095 tcg_out_addsubi(s, ext, a0, a1, a2);
2097 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
2101 case INDEX_op_sub_i32:
2104 case INDEX_op_sub_i64:
2106 tcg_out_addsubi(s, ext, a0, a1, -a2);
2108 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
2112 case INDEX_op_neg_i64:
2113 case INDEX_op_neg_i32:
2114 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
2117 case INDEX_op_and_i32:
2120 case INDEX_op_and_i64:
2122 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
2124 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
2128 case INDEX_op_andc_i32:
2131 case INDEX_op_andc_i64:
2133 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
2135 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
2139 case INDEX_op_or_i32:
2142 case INDEX_op_or_i64:
2144 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
2146 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
2150 case INDEX_op_orc_i32:
2153 case INDEX_op_orc_i64:
2155 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2157 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2161 case INDEX_op_xor_i32:
2164 case INDEX_op_xor_i64:
2166 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2168 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2172 case INDEX_op_eqv_i32:
2175 case INDEX_op_eqv_i64:
2177 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2179 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2183 case INDEX_op_not_i64:
2184 case INDEX_op_not_i32:
2185 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2188 case INDEX_op_mul_i64:
2189 case INDEX_op_mul_i32:
2190 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2193 case INDEX_op_div_i64:
2194 case INDEX_op_div_i32:
2195 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2197 case INDEX_op_divu_i64:
2198 case INDEX_op_divu_i32:
2199 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2202 case INDEX_op_rem_i64:
2203 case INDEX_op_rem_i32:
2204 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP0, a1, a2);
2205 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1);
2207 case INDEX_op_remu_i64:
2208 case INDEX_op_remu_i32:
2209 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP0, a1, a2);
2210 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1);
2213 case INDEX_op_shl_i64:
2214 case INDEX_op_shl_i32:
2216 tcg_out_shl(s, ext, a0, a1, a2);
2218 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
2222 case INDEX_op_shr_i64:
2223 case INDEX_op_shr_i32:
2225 tcg_out_shr(s, ext, a0, a1, a2);
2227 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
2231 case INDEX_op_sar_i64:
2232 case INDEX_op_sar_i32:
2234 tcg_out_sar(s, ext, a0, a1, a2);
2236 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
2240 case INDEX_op_rotr_i64:
2241 case INDEX_op_rotr_i32:
2243 tcg_out_rotr(s, ext, a0, a1, a2);
2245 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
2249 case INDEX_op_rotl_i64:
2250 case INDEX_op_rotl_i32:
2252 tcg_out_rotl(s, ext, a0, a1, a2);
2254 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP0, TCG_REG_XZR, a2);
2255 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP0);
2259 case INDEX_op_clz_i64:
2260 case INDEX_op_clz_i32:
2261 tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2263 case INDEX_op_ctz_i64:
2264 case INDEX_op_ctz_i32:
2265 tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2268 case INDEX_op_brcond_i32:
2271 case INDEX_op_brcond_i64:
2272 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
2275 case INDEX_op_setcond_i32:
2278 case INDEX_op_setcond_i64:
2279 tcg_out_cmp(s, ext, a1, a2, c2);
2280 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */
2281 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2282 TCG_REG_XZR, tcg_invert_cond(args[3]));
2285 case INDEX_op_negsetcond_i32:
2288 case INDEX_op_negsetcond_i64:
2289 tcg_out_cmp(s, ext, a1, a2, c2);
2290 /* Use CSETM alias of CSINV Wd, WZR, WZR, invert(cond). */
2291 tcg_out_insn(s, 3506, CSINV, ext, a0, TCG_REG_XZR,
2292 TCG_REG_XZR, tcg_invert_cond(args[3]));
2295 case INDEX_op_movcond_i32:
2298 case INDEX_op_movcond_i64:
2299 tcg_out_cmp(s, ext, a1, a2, c2);
2300 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2303 case INDEX_op_qemu_ld_a32_i32:
2304 case INDEX_op_qemu_ld_a64_i32:
2305 case INDEX_op_qemu_ld_a32_i64:
2306 case INDEX_op_qemu_ld_a64_i64:
2307 tcg_out_qemu_ld(s, a0, a1, a2, ext);
2309 case INDEX_op_qemu_st_a32_i32:
2310 case INDEX_op_qemu_st_a64_i32:
2311 case INDEX_op_qemu_st_a32_i64:
2312 case INDEX_op_qemu_st_a64_i64:
2313 tcg_out_qemu_st(s, REG0(0), a1, a2, ext);
2315 case INDEX_op_qemu_ld_a32_i128:
2316 case INDEX_op_qemu_ld_a64_i128:
2317 tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], true);
2319 case INDEX_op_qemu_st_a32_i128:
2320 case INDEX_op_qemu_st_a64_i128:
2321 tcg_out_qemu_ldst_i128(s, REG0(0), REG0(1), a2, args[3], false);
2324 case INDEX_op_bswap64_i64:
2325 tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
2327 case INDEX_op_bswap32_i64:
2328 tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2329 if (a2 & TCG_BSWAP_OS) {
2330 tcg_out_ext32s(s, a0, a0);
2333 case INDEX_op_bswap32_i32:
2334 tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2336 case INDEX_op_bswap16_i64:
2337 case INDEX_op_bswap16_i32:
2338 tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1);
2339 if (a2 & TCG_BSWAP_OS) {
2340 /* Output must be sign-extended. */
2341 tcg_out_ext16s(s, ext, a0, a0);
2342 } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2343 /* Output must be zero-extended, but input isn't. */
2344 tcg_out_ext16u(s, a0, a0);
2348 case INDEX_op_deposit_i64:
2349 case INDEX_op_deposit_i32:
2350 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2353 case INDEX_op_extract_i64:
2354 case INDEX_op_extract_i32:
2355 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2358 case INDEX_op_sextract_i64:
2359 case INDEX_op_sextract_i32:
2360 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2363 case INDEX_op_extract2_i64:
2364 case INDEX_op_extract2_i32:
2365 tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
2368 case INDEX_op_add2_i32:
2369 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2370 (int32_t)args[4], args[5], const_args[4],
2371 const_args[5], false);
2373 case INDEX_op_add2_i64:
2374 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2375 args[5], const_args[4], const_args[5], false);
2377 case INDEX_op_sub2_i32:
2378 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2379 (int32_t)args[4], args[5], const_args[4],
2380 const_args[5], true);
2382 case INDEX_op_sub2_i64:
2383 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2384 args[5], const_args[4], const_args[5], true);
2387 case INDEX_op_muluh_i64:
2388 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2390 case INDEX_op_mulsh_i64:
2391 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2398 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
2399 case INDEX_op_mov_i64:
2400 case INDEX_op_call: /* Always emitted via tcg_out_call. */
2401 case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */
2402 case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */
2403 case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */
2404 case INDEX_op_ext8s_i64:
2405 case INDEX_op_ext8u_i32:
2406 case INDEX_op_ext8u_i64:
2407 case INDEX_op_ext16s_i64:
2408 case INDEX_op_ext16s_i32:
2409 case INDEX_op_ext16u_i64:
2410 case INDEX_op_ext16u_i32:
2411 case INDEX_op_ext32s_i64:
2412 case INDEX_op_ext32u_i64:
2413 case INDEX_op_ext_i32_i64:
2414 case INDEX_op_extu_i32_i64:
2415 case INDEX_op_extrl_i64_i32:
2417 g_assert_not_reached();
2423 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2424 unsigned vecl, unsigned vece,
2425 const TCGArg args[TCG_MAX_OP_ARGS],
2426 const int const_args[TCG_MAX_OP_ARGS])
2428 static const AArch64Insn cmp_vec_insn[16] = {
2429 [TCG_COND_EQ] = I3616_CMEQ,
2430 [TCG_COND_GT] = I3616_CMGT,
2431 [TCG_COND_GE] = I3616_CMGE,
2432 [TCG_COND_GTU] = I3616_CMHI,
2433 [TCG_COND_GEU] = I3616_CMHS,
2435 static const AArch64Insn cmp_scalar_insn[16] = {
2436 [TCG_COND_EQ] = I3611_CMEQ,
2437 [TCG_COND_GT] = I3611_CMGT,
2438 [TCG_COND_GE] = I3611_CMGE,
2439 [TCG_COND_GTU] = I3611_CMHI,
2440 [TCG_COND_GEU] = I3611_CMHS,
2442 static const AArch64Insn cmp0_vec_insn[16] = {
2443 [TCG_COND_EQ] = I3617_CMEQ0,
2444 [TCG_COND_GT] = I3617_CMGT0,
2445 [TCG_COND_GE] = I3617_CMGE0,
2446 [TCG_COND_LT] = I3617_CMLT0,
2447 [TCG_COND_LE] = I3617_CMLE0,
2449 static const AArch64Insn cmp0_scalar_insn[16] = {
2450 [TCG_COND_EQ] = I3612_CMEQ0,
2451 [TCG_COND_GT] = I3612_CMGT0,
2452 [TCG_COND_GE] = I3612_CMGE0,
2453 [TCG_COND_LT] = I3612_CMLT0,
2454 [TCG_COND_LE] = I3612_CMLE0,
2457 TCGType type = vecl + TCG_TYPE_V64;
2458 unsigned is_q = vecl;
2459 bool is_scalar = !is_q && vece == MO_64;
2460 TCGArg a0, a1, a2, a3;
2468 case INDEX_op_ld_vec:
2469 tcg_out_ld(s, type, a0, a1, a2);
2471 case INDEX_op_st_vec:
2472 tcg_out_st(s, type, a0, a1, a2);
2474 case INDEX_op_dupm_vec:
2475 tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2477 case INDEX_op_add_vec:
2479 tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2);
2481 tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2484 case INDEX_op_sub_vec:
2486 tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2);
2488 tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2491 case INDEX_op_mul_vec:
2492 tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2494 case INDEX_op_neg_vec:
2496 tcg_out_insn(s, 3612, NEG, vece, a0, a1);
2498 tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2501 case INDEX_op_abs_vec:
2503 tcg_out_insn(s, 3612, ABS, vece, a0, a1);
2505 tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2508 case INDEX_op_and_vec:
2509 if (const_args[2]) {
2510 is_shimm1632(~a2, &cmode, &imm8);
2512 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2515 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2518 tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2520 case INDEX_op_or_vec:
2521 if (const_args[2]) {
2522 is_shimm1632(a2, &cmode, &imm8);
2524 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2527 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2530 tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2532 case INDEX_op_andc_vec:
2533 if (const_args[2]) {
2534 is_shimm1632(a2, &cmode, &imm8);
2536 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2539 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2542 tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2544 case INDEX_op_orc_vec:
2545 if (const_args[2]) {
2546 is_shimm1632(~a2, &cmode, &imm8);
2548 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2551 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2554 tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2556 case INDEX_op_xor_vec:
2557 tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2559 case INDEX_op_ssadd_vec:
2561 tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2);
2563 tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2566 case INDEX_op_sssub_vec:
2568 tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2);
2570 tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2573 case INDEX_op_usadd_vec:
2575 tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2);
2577 tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2580 case INDEX_op_ussub_vec:
2582 tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2);
2584 tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2587 case INDEX_op_smax_vec:
2588 tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2590 case INDEX_op_smin_vec:
2591 tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2593 case INDEX_op_umax_vec:
2594 tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2596 case INDEX_op_umin_vec:
2597 tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2599 case INDEX_op_not_vec:
2600 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2602 case INDEX_op_shli_vec:
2604 tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece));
2606 tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2609 case INDEX_op_shri_vec:
2611 tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2);
2613 tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2616 case INDEX_op_sari_vec:
2618 tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2);
2620 tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2623 case INDEX_op_aa64_sli_vec:
2625 tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece));
2627 tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2630 case INDEX_op_shlv_vec:
2632 tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2);
2634 tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2637 case INDEX_op_aa64_sshl_vec:
2639 tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2);
2641 tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2644 case INDEX_op_cmp_vec:
2646 TCGCond cond = args[3];
2649 if (cond == TCG_COND_NE) {
2650 if (const_args[2]) {
2652 tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1);
2654 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2658 tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2);
2660 tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2662 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2665 if (const_args[2]) {
2667 insn = cmp0_scalar_insn[cond];
2669 tcg_out_insn_3612(s, insn, vece, a0, a1);
2673 insn = cmp0_vec_insn[cond];
2675 tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2679 tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP0, 0);
2683 insn = cmp_scalar_insn[cond];
2686 t = a1, a1 = a2, a2 = t;
2687 cond = tcg_swap_cond(cond);
2688 insn = cmp_scalar_insn[cond];
2689 tcg_debug_assert(insn != 0);
2691 tcg_out_insn_3611(s, insn, vece, a0, a1, a2);
2693 insn = cmp_vec_insn[cond];
2696 t = a1, a1 = a2, a2 = t;
2697 cond = tcg_swap_cond(cond);
2698 insn = cmp_vec_insn[cond];
2699 tcg_debug_assert(insn != 0);
2701 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2707 case INDEX_op_bitsel_vec:
2710 tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2711 } else if (a0 == a2) {
2712 tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2715 tcg_out_mov(s, type, a0, a1);
2717 tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2721 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
2722 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
2724 g_assert_not_reached();
2728 int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2731 case INDEX_op_add_vec:
2732 case INDEX_op_sub_vec:
2733 case INDEX_op_and_vec:
2734 case INDEX_op_or_vec:
2735 case INDEX_op_xor_vec:
2736 case INDEX_op_andc_vec:
2737 case INDEX_op_orc_vec:
2738 case INDEX_op_neg_vec:
2739 case INDEX_op_abs_vec:
2740 case INDEX_op_not_vec:
2741 case INDEX_op_cmp_vec:
2742 case INDEX_op_shli_vec:
2743 case INDEX_op_shri_vec:
2744 case INDEX_op_sari_vec:
2745 case INDEX_op_ssadd_vec:
2746 case INDEX_op_sssub_vec:
2747 case INDEX_op_usadd_vec:
2748 case INDEX_op_ussub_vec:
2749 case INDEX_op_shlv_vec:
2750 case INDEX_op_bitsel_vec:
2752 case INDEX_op_rotli_vec:
2753 case INDEX_op_shrv_vec:
2754 case INDEX_op_sarv_vec:
2755 case INDEX_op_rotlv_vec:
2756 case INDEX_op_rotrv_vec:
2758 case INDEX_op_mul_vec:
2759 case INDEX_op_smax_vec:
2760 case INDEX_op_smin_vec:
2761 case INDEX_op_umax_vec:
2762 case INDEX_op_umin_vec:
2763 return vece < MO_64;
2770 void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2774 TCGv_vec v0, v1, v2, t1, t2, c1;
2778 v0 = temp_tcgv_vec(arg_temp(a0));
2779 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2780 a2 = va_arg(va, TCGArg);
2784 case INDEX_op_rotli_vec:
2785 t1 = tcg_temp_new_vec(type);
2786 tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
2787 vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
2788 tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
2789 tcg_temp_free_vec(t1);
2792 case INDEX_op_shrv_vec:
2793 case INDEX_op_sarv_vec:
2794 /* Right shifts are negative left shifts for AArch64. */
2795 v2 = temp_tcgv_vec(arg_temp(a2));
2796 t1 = tcg_temp_new_vec(type);
2797 tcg_gen_neg_vec(vece, t1, v2);
2798 opc = (opc == INDEX_op_shrv_vec
2799 ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2800 vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2801 tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2802 tcg_temp_free_vec(t1);
2805 case INDEX_op_rotlv_vec:
2806 v2 = temp_tcgv_vec(arg_temp(a2));
2807 t1 = tcg_temp_new_vec(type);
2808 c1 = tcg_constant_vec(type, vece, 8 << vece);
2809 tcg_gen_sub_vec(vece, t1, v2, c1);
2810 /* Right shifts are negative left shifts for AArch64. */
2811 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2812 tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2813 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
2814 tcgv_vec_arg(v1), tcgv_vec_arg(v2));
2815 tcg_gen_or_vec(vece, v0, v0, t1);
2816 tcg_temp_free_vec(t1);
2819 case INDEX_op_rotrv_vec:
2820 v2 = temp_tcgv_vec(arg_temp(a2));
2821 t1 = tcg_temp_new_vec(type);
2822 t2 = tcg_temp_new_vec(type);
2823 c1 = tcg_constant_vec(type, vece, 8 << vece);
2824 tcg_gen_neg_vec(vece, t1, v2);
2825 tcg_gen_sub_vec(vece, t2, c1, v2);
2826 /* Right shifts are negative left shifts for AArch64. */
2827 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2828 tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2829 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
2830 tcgv_vec_arg(v1), tcgv_vec_arg(t2));
2831 tcg_gen_or_vec(vece, v0, t1, t2);
2832 tcg_temp_free_vec(t1);
2833 tcg_temp_free_vec(t2);
2837 g_assert_not_reached();
2841 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
2844 case INDEX_op_goto_ptr:
2847 case INDEX_op_ld8u_i32:
2848 case INDEX_op_ld8s_i32:
2849 case INDEX_op_ld16u_i32:
2850 case INDEX_op_ld16s_i32:
2851 case INDEX_op_ld_i32:
2852 case INDEX_op_ld8u_i64:
2853 case INDEX_op_ld8s_i64:
2854 case INDEX_op_ld16u_i64:
2855 case INDEX_op_ld16s_i64:
2856 case INDEX_op_ld32u_i64:
2857 case INDEX_op_ld32s_i64:
2858 case INDEX_op_ld_i64:
2859 case INDEX_op_neg_i32:
2860 case INDEX_op_neg_i64:
2861 case INDEX_op_not_i32:
2862 case INDEX_op_not_i64:
2863 case INDEX_op_bswap16_i32:
2864 case INDEX_op_bswap32_i32:
2865 case INDEX_op_bswap16_i64:
2866 case INDEX_op_bswap32_i64:
2867 case INDEX_op_bswap64_i64:
2868 case INDEX_op_ext8s_i32:
2869 case INDEX_op_ext16s_i32:
2870 case INDEX_op_ext8u_i32:
2871 case INDEX_op_ext16u_i32:
2872 case INDEX_op_ext8s_i64:
2873 case INDEX_op_ext16s_i64:
2874 case INDEX_op_ext32s_i64:
2875 case INDEX_op_ext8u_i64:
2876 case INDEX_op_ext16u_i64:
2877 case INDEX_op_ext32u_i64:
2878 case INDEX_op_ext_i32_i64:
2879 case INDEX_op_extu_i32_i64:
2880 case INDEX_op_extract_i32:
2881 case INDEX_op_extract_i64:
2882 case INDEX_op_sextract_i32:
2883 case INDEX_op_sextract_i64:
2884 return C_O1_I1(r, r);
2886 case INDEX_op_st8_i32:
2887 case INDEX_op_st16_i32:
2888 case INDEX_op_st_i32:
2889 case INDEX_op_st8_i64:
2890 case INDEX_op_st16_i64:
2891 case INDEX_op_st32_i64:
2892 case INDEX_op_st_i64:
2893 return C_O0_I2(rZ, r);
2895 case INDEX_op_add_i32:
2896 case INDEX_op_add_i64:
2897 case INDEX_op_sub_i32:
2898 case INDEX_op_sub_i64:
2899 case INDEX_op_setcond_i32:
2900 case INDEX_op_setcond_i64:
2901 case INDEX_op_negsetcond_i32:
2902 case INDEX_op_negsetcond_i64:
2903 return C_O1_I2(r, r, rA);
2905 case INDEX_op_mul_i32:
2906 case INDEX_op_mul_i64:
2907 case INDEX_op_div_i32:
2908 case INDEX_op_div_i64:
2909 case INDEX_op_divu_i32:
2910 case INDEX_op_divu_i64:
2911 case INDEX_op_rem_i32:
2912 case INDEX_op_rem_i64:
2913 case INDEX_op_remu_i32:
2914 case INDEX_op_remu_i64:
2915 case INDEX_op_muluh_i64:
2916 case INDEX_op_mulsh_i64:
2917 return C_O1_I2(r, r, r);
2919 case INDEX_op_and_i32:
2920 case INDEX_op_and_i64:
2921 case INDEX_op_or_i32:
2922 case INDEX_op_or_i64:
2923 case INDEX_op_xor_i32:
2924 case INDEX_op_xor_i64:
2925 case INDEX_op_andc_i32:
2926 case INDEX_op_andc_i64:
2927 case INDEX_op_orc_i32:
2928 case INDEX_op_orc_i64:
2929 case INDEX_op_eqv_i32:
2930 case INDEX_op_eqv_i64:
2931 return C_O1_I2(r, r, rL);
2933 case INDEX_op_shl_i32:
2934 case INDEX_op_shr_i32:
2935 case INDEX_op_sar_i32:
2936 case INDEX_op_rotl_i32:
2937 case INDEX_op_rotr_i32:
2938 case INDEX_op_shl_i64:
2939 case INDEX_op_shr_i64:
2940 case INDEX_op_sar_i64:
2941 case INDEX_op_rotl_i64:
2942 case INDEX_op_rotr_i64:
2943 return C_O1_I2(r, r, ri);
2945 case INDEX_op_clz_i32:
2946 case INDEX_op_ctz_i32:
2947 case INDEX_op_clz_i64:
2948 case INDEX_op_ctz_i64:
2949 return C_O1_I2(r, r, rAL);
2951 case INDEX_op_brcond_i32:
2952 case INDEX_op_brcond_i64:
2953 return C_O0_I2(r, rA);
2955 case INDEX_op_movcond_i32:
2956 case INDEX_op_movcond_i64:
2957 return C_O1_I4(r, r, rA, rZ, rZ);
2959 case INDEX_op_qemu_ld_a32_i32:
2960 case INDEX_op_qemu_ld_a64_i32:
2961 case INDEX_op_qemu_ld_a32_i64:
2962 case INDEX_op_qemu_ld_a64_i64:
2963 return C_O1_I1(r, r);
2964 case INDEX_op_qemu_ld_a32_i128:
2965 case INDEX_op_qemu_ld_a64_i128:
2966 return C_O2_I1(r, r, r);
2967 case INDEX_op_qemu_st_a32_i32:
2968 case INDEX_op_qemu_st_a64_i32:
2969 case INDEX_op_qemu_st_a32_i64:
2970 case INDEX_op_qemu_st_a64_i64:
2971 return C_O0_I2(rZ, r);
2972 case INDEX_op_qemu_st_a32_i128:
2973 case INDEX_op_qemu_st_a64_i128:
2974 return C_O0_I3(rZ, rZ, r);
2976 case INDEX_op_deposit_i32:
2977 case INDEX_op_deposit_i64:
2978 return C_O1_I2(r, 0, rZ);
2980 case INDEX_op_extract2_i32:
2981 case INDEX_op_extract2_i64:
2982 return C_O1_I2(r, rZ, rZ);
2984 case INDEX_op_add2_i32:
2985 case INDEX_op_add2_i64:
2986 case INDEX_op_sub2_i32:
2987 case INDEX_op_sub2_i64:
2988 return C_O2_I4(r, r, rZ, rZ, rA, rMZ);
2990 case INDEX_op_add_vec:
2991 case INDEX_op_sub_vec:
2992 case INDEX_op_mul_vec:
2993 case INDEX_op_xor_vec:
2994 case INDEX_op_ssadd_vec:
2995 case INDEX_op_sssub_vec:
2996 case INDEX_op_usadd_vec:
2997 case INDEX_op_ussub_vec:
2998 case INDEX_op_smax_vec:
2999 case INDEX_op_smin_vec:
3000 case INDEX_op_umax_vec:
3001 case INDEX_op_umin_vec:
3002 case INDEX_op_shlv_vec:
3003 case INDEX_op_shrv_vec:
3004 case INDEX_op_sarv_vec:
3005 case INDEX_op_aa64_sshl_vec:
3006 return C_O1_I2(w, w, w);
3007 case INDEX_op_not_vec:
3008 case INDEX_op_neg_vec:
3009 case INDEX_op_abs_vec:
3010 case INDEX_op_shli_vec:
3011 case INDEX_op_shri_vec:
3012 case INDEX_op_sari_vec:
3013 return C_O1_I1(w, w);
3014 case INDEX_op_ld_vec:
3015 case INDEX_op_dupm_vec:
3016 return C_O1_I1(w, r);
3017 case INDEX_op_st_vec:
3018 return C_O0_I2(w, r);
3019 case INDEX_op_dup_vec:
3020 return C_O1_I1(w, wr);
3021 case INDEX_op_or_vec:
3022 case INDEX_op_andc_vec:
3023 return C_O1_I2(w, w, wO);
3024 case INDEX_op_and_vec:
3025 case INDEX_op_orc_vec:
3026 return C_O1_I2(w, w, wN);
3027 case INDEX_op_cmp_vec:
3028 return C_O1_I2(w, w, wZ);
3029 case INDEX_op_bitsel_vec:
3030 return C_O1_I3(w, w, w, w);
3031 case INDEX_op_aa64_sli_vec:
3032 return C_O1_I2(w, 0, w);
3035 g_assert_not_reached();
3039 static void tcg_target_init(TCGContext *s)
3041 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
3042 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
3043 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
3044 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
3046 tcg_target_call_clobber_regs = -1ull;
3047 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
3048 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
3049 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
3050 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
3051 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
3052 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
3053 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
3054 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
3055 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
3056 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
3057 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
3058 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
3059 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
3060 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
3061 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
3062 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
3063 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
3064 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
3065 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
3067 s->reserved_regs = 0;
3068 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
3069 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
3070 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
3071 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
3072 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
3073 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
3074 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0);
3077 /* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
3078 #define PUSH_SIZE ((30 - 19 + 1) * 8)
3080 #define FRAME_SIZE \
3082 + TCG_STATIC_CALL_ARGS_SIZE \
3083 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
3084 + TCG_TARGET_STACK_ALIGN - 1) \
3085 & ~(TCG_TARGET_STACK_ALIGN - 1))
3087 /* We're expecting a 2 byte uleb128 encoded value. */
3088 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3090 /* We're expecting to use a single ADDI insn. */
3091 QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
3093 static void tcg_target_qemu_prologue(TCGContext *s)
3097 tcg_out_bti(s, BTI_C);
3099 /* Push (FP, LR) and allocate space for all saved registers. */
3100 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
3101 TCG_REG_SP, -PUSH_SIZE, 1, 1);
3103 /* Set up frame pointer for canonical unwinding. */
3104 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
3106 /* Store callee-preserved regs x19..x28. */
3107 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
3108 int ofs = (r - TCG_REG_X19 + 2) * 8;
3109 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3112 /* Make stack space for TCG locals. */
3113 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
3114 FRAME_SIZE - PUSH_SIZE);
3116 /* Inform TCG about how to find TCG locals with register, offset, size. */
3117 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
3118 CPU_TEMP_BUF_NLONGS * sizeof(long));
3120 #if !defined(CONFIG_SOFTMMU)
3122 * Note that XZR cannot be encoded in the address base register slot,
3123 * as that actually encodes SP. Depending on the guest, we may need
3124 * to zero-extend the guest address via the address index register slot,
3125 * therefore we need to load even a zero guest base into a register.
3127 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
3128 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
3131 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
3132 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
3135 * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
3136 * and fall through to the rest of the epilogue.
3138 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
3139 tcg_out_bti(s, BTI_J);
3140 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
3143 tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
3144 tcg_out_bti(s, BTI_J);
3146 /* Remove TCG locals stack space. */
3147 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
3148 FRAME_SIZE - PUSH_SIZE);
3150 /* Restore registers x19..x28. */
3151 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
3152 int ofs = (r - TCG_REG_X19 + 2) * 8;
3153 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3156 /* Pop (FP, LR), restore SP to previous frame. */
3157 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
3158 TCG_REG_SP, PUSH_SIZE, 0, 1);
3159 tcg_out_insn(s, 3207, RET, TCG_REG_LR);
3162 static void tcg_out_tb_start(TCGContext *s)
3164 tcg_out_bti(s, BTI_J);
3167 static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3170 for (i = 0; i < count; ++i) {
3177 uint8_t fde_def_cfa[4];
3178 uint8_t fde_reg_ofs[24];
3181 #define ELF_HOST_MACHINE EM_AARCH64
3183 static const DebugFrame debug_frame = {
3184 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3187 .h.cie.code_align = 1,
3188 .h.cie.data_align = 0x78, /* sleb128 -8 */
3189 .h.cie.return_column = TCG_REG_LR,
3191 /* Total FDE size does not include the "len" member. */
3192 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3195 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */
3196 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
3200 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */
3201 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */
3202 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */
3203 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */
3204 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */
3205 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */
3206 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */
3207 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */
3208 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */
3209 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */
3210 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */
3211 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */
3215 void tcg_register_jit(const void *buf, size_t buf_size)
3217 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));