tcg/aarch64/tcg-target.inc.c

   1 /*
   2  * Initial TCG Implementation for aarch64
   3  *
   4  * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
   5  * Written by Claudio Fontana
   6  *
   7  * This work is licensed under the terms of the GNU GPL, version 2 or
   8  * (at your option) any later version.
   9  *
  10  * See the COPYING file in the top-level directory for details.
  11  */
  12
  13 #include "tcg-pool.inc.c"
  14 #include "qemu/bitops.h"
  15
  16 /* We're going to re-use TCGType in setting of the SF bit, which controls
  17    the size of the operation performed.  If we know the values match, it
  18    makes things much cleaner.  */
  19 QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
  20
  21 #ifdef CONFIG_DEBUG_TCG
  22 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
  23     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
  24     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
  25     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
  26     "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
  27
  28     "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
  29     "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
  30     "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
  31     "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
  32 };
  33 #endif /* CONFIG_DEBUG_TCG */
  34
  35 static const int tcg_target_reg_alloc_order[] = {
  36     TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
  37     TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
  38     TCG_REG_X28, /* we will reserve this for guest_base if configured */
  39
  40     TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
  41     TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
  42     TCG_REG_X16, TCG_REG_X17,
  43
  44     TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
  45     TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
  46
  47     /* X18 reserved by system */
  48     /* X19 reserved for AREG0 */
  49     /* X29 reserved as fp */
  50     /* X30 reserved as temporary */
  51
  52     TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
  53     TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
  54     /* V8 - V15 are call-saved, and skipped.  */
  55     TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
  56     TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
  57     TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
  58     TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
  59 };
  60
  61 static const int tcg_target_call_iarg_regs[8] = {
  62     TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
  63     TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
  64 };
  65 static const int tcg_target_call_oarg_regs[1] = {
  66     TCG_REG_X0
  67 };
  68
  69 #define TCG_REG_TMP TCG_REG_X30
  70 #define TCG_VEC_TMP TCG_REG_V31
  71
  72 #ifndef CONFIG_SOFTMMU
  73 /* Note that XZR cannot be encoded in the address base register slot,
  74    as that actaully encodes SP.  So if we need to zero-extend the guest
  75    address, via the address index register slot, we need to load even
  76    a zero guest base into a register.  */
  77 #define USE_GUEST_BASE     (guest_base != 0 || TARGET_LONG_BITS == 32)
  78 #define TCG_REG_GUEST_BASE TCG_REG_X28
  79 #endif
  80
  81 static inline bool reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
  82 {
  83     ptrdiff_t offset = target - code_ptr;
  84     if (offset == sextract64(offset, 0, 26)) {
  85         /* read instruction, mask away previous PC_REL26 parameter contents,
  86            set the proper offset, then write back the instruction. */
  87         *code_ptr = deposit32(*code_ptr, 0, 26, offset);
  88         return true;
  89     }
  90     return false;
  91 }
  92
  93 static inline bool reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
  94 {
  95     ptrdiff_t offset = target - code_ptr;
  96     if (offset == sextract64(offset, 0, 19)) {
  97         *code_ptr = deposit32(*code_ptr, 5, 19, offset);
  98         return true;
  99     }
 100     return false;
 101 }
 102
 103 static inline bool patch_reloc(tcg_insn_unit *code_ptr, int type,
 104                                intptr_t value, intptr_t addend)
 105 {
 106     tcg_debug_assert(addend == 0);
 107     switch (type) {
 108     case R_AARCH64_JUMP26:
 109     case R_AARCH64_CALL26:
 110         return reloc_pc26(code_ptr, (tcg_insn_unit *)value);
 111     case R_AARCH64_CONDBR19:
 112         return reloc_pc19(code_ptr, (tcg_insn_unit *)value);
 113     default:
 114         g_assert_not_reached();
 115     }
 116 }
 117
 118 #define TCG_CT_CONST_AIMM 0x100
 119 #define TCG_CT_CONST_LIMM 0x200
 120 #define TCG_CT_CONST_ZERO 0x400
 121 #define TCG_CT_CONST_MONE 0x800
 122 #define TCG_CT_CONST_ORRI 0x1000
 123 #define TCG_CT_CONST_ANDI 0x2000
 124
 125 /* parse target specific constraints */
 126 static const char *target_parse_constraint(TCGArgConstraint *ct,
 127                                            const char *ct_str, TCGType type)
 128 {
 129     switch (*ct_str++) {
 130     case 'r': /* general registers */
 131         ct->ct |= TCG_CT_REG;
 132         ct->u.regs |= 0xffffffffu;
 133         break;
 134     case 'w': /* advsimd registers */
 135         ct->ct |= TCG_CT_REG;
 136         ct->u.regs |= 0xffffffff00000000ull;
 137         break;
 138     case 'l': /* qemu_ld / qemu_st address, data_reg */
 139         ct->ct |= TCG_CT_REG;
 140         ct->u.regs = 0xffffffffu;
 141 #ifdef CONFIG_SOFTMMU
 142         /* x0 and x1 will be overwritten when reading the tlb entry,
 143            and x2, and x3 for helper args, better to avoid using them. */
 144         tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
 145         tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
 146         tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
 147         tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
 148 #endif
 149         break;
 150     case 'A': /* Valid for arithmetic immediate (positive or negative).  */
 151         ct->ct |= TCG_CT_CONST_AIMM;
 152         break;
 153     case 'L': /* Valid for logical immediate.  */
 154         ct->ct |= TCG_CT_CONST_LIMM;
 155         break;
 156     case 'M': /* minus one */
 157         ct->ct |= TCG_CT_CONST_MONE;
 158         break;
 159     case 'O': /* vector orr/bic immediate */
 160         ct->ct |= TCG_CT_CONST_ORRI;
 161         break;
 162     case 'N': /* vector orr/bic immediate, inverted */
 163         ct->ct |= TCG_CT_CONST_ANDI;
 164         break;
 165     case 'Z': /* zero */
 166         ct->ct |= TCG_CT_CONST_ZERO;
 167         break;
 168     default:
 169         return NULL;
 170     }
 171     return ct_str;
 172 }
 173
 174 /* Match a constant valid for addition (12-bit, optionally shifted).  */
 175 static inline bool is_aimm(uint64_t val)
 176 {
 177     return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
 178 }
 179
 180 /* Match a constant valid for logical operations.  */
 181 static inline bool is_limm(uint64_t val)
 182 {
 183     /* Taking a simplified view of the logical immediates for now, ignoring
 184        the replication that can happen across the field.  Match bit patterns
 185        of the forms
 186            0....01....1
 187            0..01..10..0
 188        and their inverses.  */
 189
 190     /* Make things easier below, by testing the form with msb clear. */
 191     if ((int64_t)val < 0) {
 192         val = ~val;
 193     }
 194     if (val == 0) {
 195         return false;
 196     }
 197     val += val & -val;
 198     return (val & (val - 1)) == 0;
 199 }
 200
 201 /* Return true if v16 is a valid 16-bit shifted immediate.  */
 202 static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
 203 {
 204     if (v16 == (v16 & 0xff)) {
 205         *cmode = 0x8;
 206         *imm8 = v16 & 0xff;
 207         return true;
 208     } else if (v16 == (v16 & 0xff00)) {
 209         *cmode = 0xa;
 210         *imm8 = v16 >> 8;
 211         return true;
 212     }
 213     return false;
 214 }
 215
 216 /* Return true if v32 is a valid 32-bit shifted immediate.  */
 217 static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
 218 {
 219     if (v32 == (v32 & 0xff)) {
 220         *cmode = 0x0;
 221         *imm8 = v32 & 0xff;
 222         return true;
 223     } else if (v32 == (v32 & 0xff00)) {
 224         *cmode = 0x2;
 225         *imm8 = (v32 >> 8) & 0xff;
 226         return true;
 227     } else if (v32 == (v32 & 0xff0000)) {
 228         *cmode = 0x4;
 229         *imm8 = (v32 >> 16) & 0xff;
 230         return true;
 231     } else if (v32 == (v32 & 0xff000000)) {
 232         *cmode = 0x6;
 233         *imm8 = v32 >> 24;
 234         return true;
 235     }
 236     return false;
 237 }
 238
 239 /* Return true if v32 is a valid 32-bit shifting ones immediate.  */
 240 static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
 241 {
 242     if ((v32 & 0xffff00ff) == 0xff) {
 243         *cmode = 0xc;
 244         *imm8 = (v32 >> 8) & 0xff;
 245         return true;
 246     } else if ((v32 & 0xff00ffff) == 0xffff) {
 247         *cmode = 0xd;
 248         *imm8 = (v32 >> 16) & 0xff;
 249         return true;
 250     }
 251     return false;
 252 }
 253
 254 /* Return true if v32 is a valid float32 immediate.  */
 255 static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
 256 {
 257     if (extract32(v32, 0, 19) == 0
 258         && (extract32(v32, 25, 6) == 0x20
 259             || extract32(v32, 25, 6) == 0x1f)) {
 260         *cmode = 0xf;
 261         *imm8 = (extract32(v32, 31, 1) << 7)
 262               | (extract32(v32, 25, 1) << 6)
 263               | extract32(v32, 19, 6);
 264         return true;
 265     }
 266     return false;
 267 }
 268
 269 /* Return true if v64 is a valid float64 immediate.  */
 270 static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
 271 {
 272     if (extract64(v64, 0, 48) == 0
 273         && (extract64(v64, 54, 9) == 0x100
 274             || extract64(v64, 54, 9) == 0x0ff)) {
 275         *cmode = 0xf;
 276         *imm8 = (extract64(v64, 63, 1) << 7)
 277               | (extract64(v64, 54, 1) << 6)
 278               | extract64(v64, 48, 6);
 279         return true;
 280     }
 281     return false;
 282 }
 283
 284 /*
 285  * Return non-zero if v32 can be formed by MOVI+ORR.
 286  * Place the parameters for MOVI in (cmode, imm8).
 287  * Return the cmode for ORR; the imm8 can be had via extraction from v32.
 288  */
 289 static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
 290 {
 291     int i;
 292
 293     for (i = 6; i > 0; i -= 2) {
 294         /* Mask out one byte we can add with ORR.  */
 295         uint32_t tmp = v32 & ~(0xffu << (i * 4));
 296         if (is_shimm32(tmp, cmode, imm8) ||
 297             is_soimm32(tmp, cmode, imm8)) {
 298             break;
 299         }
 300     }
 301     return i;
 302 }
 303
 304 /* Return true if V is a valid 16-bit or 32-bit shifted immediate.  */
 305 static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
 306 {
 307     if (v32 == deposit32(v32, 16, 16, v32)) {
 308         return is_shimm16(v32, cmode, imm8);
 309     } else {
 310         return is_shimm32(v32, cmode, imm8);
 311     }
 312 }
 313
 314 static int tcg_target_const_match(tcg_target_long val, TCGType type,
 315                                   const TCGArgConstraint *arg_ct)
 316 {
 317     int ct = arg_ct->ct;
 318
 319     if (ct & TCG_CT_CONST) {
 320         return 1;
 321     }
 322     if (type == TCG_TYPE_I32) {
 323         val = (int32_t)val;
 324     }
 325     if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
 326         return 1;
 327     }
 328     if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
 329         return 1;
 330     }
 331     if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
 332         return 1;
 333     }
 334     if ((ct & TCG_CT_CONST_MONE) && val == -1) {
 335         return 1;
 336     }
 337
 338     switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
 339     case 0:
 340         break;
 341     case TCG_CT_CONST_ANDI:
 342         val = ~val;
 343         /* fallthru */
 344     case TCG_CT_CONST_ORRI:
 345         if (val == deposit64(val, 32, 32, val)) {
 346             int cmode, imm8;
 347             return is_shimm1632(val, &cmode, &imm8);
 348         }
 349         break;
 350     default:
 351         /* Both bits should not be set for the same insn.  */
 352         g_assert_not_reached();
 353     }
 354
 355     return 0;
 356 }
 357
 358 enum aarch64_cond_code {
 359     COND_EQ = 0x0,
 360     COND_NE = 0x1,
 361     COND_CS = 0x2,     /* Unsigned greater or equal */
 362     COND_HS = COND_CS, /* ALIAS greater or equal */
 363     COND_CC = 0x3,     /* Unsigned less than */
 364     COND_LO = COND_CC, /* ALIAS Lower */
 365     COND_MI = 0x4,     /* Negative */
 366     COND_PL = 0x5,     /* Zero or greater */
 367     COND_VS = 0x6,     /* Overflow */
 368     COND_VC = 0x7,     /* No overflow */
 369     COND_HI = 0x8,     /* Unsigned greater than */
 370     COND_LS = 0x9,     /* Unsigned less or equal */
 371     COND_GE = 0xa,
 372     COND_LT = 0xb,
 373     COND_GT = 0xc,
 374     COND_LE = 0xd,
 375     COND_AL = 0xe,
 376     COND_NV = 0xf, /* behaves like COND_AL here */
 377 };
 378
 379 static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
 380     [TCG_COND_EQ] = COND_EQ,
 381     [TCG_COND_NE] = COND_NE,
 382     [TCG_COND_LT] = COND_LT,
 383     [TCG_COND_GE] = COND_GE,
 384     [TCG_COND_LE] = COND_LE,
 385     [TCG_COND_GT] = COND_GT,
 386     /* unsigned */
 387     [TCG_COND_LTU] = COND_LO,
 388     [TCG_COND_GTU] = COND_HI,
 389     [TCG_COND_GEU] = COND_HS,
 390     [TCG_COND_LEU] = COND_LS,
 391 };
 392
 393 typedef enum {
 394     LDST_ST = 0,    /* store */
 395     LDST_LD = 1,    /* load */
 396     LDST_LD_S_X = 2,  /* load and sign-extend into Xt */
 397     LDST_LD_S_W = 3,  /* load and sign-extend into Wt */
 398 } AArch64LdstType;
 399
 400 /* We encode the format of the insn into the beginning of the name, so that
 401    we can have the preprocessor help "typecheck" the insn vs the output
 402    function.  Arm didn't provide us with nice names for the formats, so we
 403    use the section number of the architecture reference manual in which the
 404    instruction group is described.  */
 405 typedef enum {
 406     /* Compare and branch (immediate).  */
 407     I3201_CBZ       = 0x34000000,
 408     I3201_CBNZ      = 0x35000000,
 409
 410     /* Conditional branch (immediate).  */
 411     I3202_B_C       = 0x54000000,
 412
 413     /* Unconditional branch (immediate).  */
 414     I3206_B         = 0x14000000,
 415     I3206_BL        = 0x94000000,
 416
 417     /* Unconditional branch (register).  */
 418     I3207_BR        = 0xd61f0000,
 419     I3207_BLR       = 0xd63f0000,
 420     I3207_RET       = 0xd65f0000,
 421
 422     /* AdvSIMD load/store single structure.  */
 423     I3303_LD1R      = 0x0d40c000,
 424
 425     /* Load literal for loading the address at pc-relative offset */
 426     I3305_LDR       = 0x58000000,
 427     I3305_LDR_v64   = 0x5c000000,
 428     I3305_LDR_v128  = 0x9c000000,
 429
 430     /* Load/store register.  Described here as 3.3.12, but the helper
 431        that emits them can transform to 3.3.10 or 3.3.13.  */
 432     I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
 433     I3312_STRH      = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
 434     I3312_STRW      = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
 435     I3312_STRX      = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
 436
 437     I3312_LDRB      = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
 438     I3312_LDRH      = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
 439     I3312_LDRW      = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
 440     I3312_LDRX      = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
 441
 442     I3312_LDRSBW    = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
 443     I3312_LDRSHW    = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
 444
 445     I3312_LDRSBX    = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
 446     I3312_LDRSHX    = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
 447     I3312_LDRSWX    = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
 448
 449     I3312_LDRVS     = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
 450     I3312_STRVS     = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
 451
 452     I3312_LDRVD     = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
 453     I3312_STRVD     = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
 454
 455     I3312_LDRVQ     = 0x3c000000 | 3 << 22 | 0 << 30,
 456     I3312_STRVQ     = 0x3c000000 | 2 << 22 | 0 << 30,
 457
 458     I3312_TO_I3310  = 0x00200800,
 459     I3312_TO_I3313  = 0x01000000,
 460
 461     /* Load/store register pair instructions.  */
 462     I3314_LDP       = 0x28400000,
 463     I3314_STP       = 0x28000000,
 464
 465     /* Add/subtract immediate instructions.  */
 466     I3401_ADDI      = 0x11000000,
 467     I3401_ADDSI     = 0x31000000,
 468     I3401_SUBI      = 0x51000000,
 469     I3401_SUBSI     = 0x71000000,
 470
 471     /* Bitfield instructions.  */
 472     I3402_BFM       = 0x33000000,
 473     I3402_SBFM      = 0x13000000,
 474     I3402_UBFM      = 0x53000000,
 475
 476     /* Extract instruction.  */
 477     I3403_EXTR      = 0x13800000,
 478
 479     /* Logical immediate instructions.  */
 480     I3404_ANDI      = 0x12000000,
 481     I3404_ORRI      = 0x32000000,
 482     I3404_EORI      = 0x52000000,
 483
 484     /* Move wide immediate instructions.  */
 485     I3405_MOVN      = 0x12800000,
 486     I3405_MOVZ      = 0x52800000,
 487     I3405_MOVK      = 0x72800000,
 488
 489     /* PC relative addressing instructions.  */
 490     I3406_ADR       = 0x10000000,
 491     I3406_ADRP      = 0x90000000,
 492
 493     /* Add/subtract shifted register instructions (without a shift).  */
 494     I3502_ADD       = 0x0b000000,
 495     I3502_ADDS      = 0x2b000000,
 496     I3502_SUB       = 0x4b000000,
 497     I3502_SUBS      = 0x6b000000,
 498
 499     /* Add/subtract shifted register instructions (with a shift).  */
 500     I3502S_ADD_LSL  = I3502_ADD,
 501
 502     /* Add/subtract with carry instructions.  */
 503     I3503_ADC       = 0x1a000000,
 504     I3503_SBC       = 0x5a000000,
 505
 506     /* Conditional select instructions.  */
 507     I3506_CSEL      = 0x1a800000,
 508     I3506_CSINC     = 0x1a800400,
 509     I3506_CSINV     = 0x5a800000,
 510     I3506_CSNEG     = 0x5a800400,
 511
 512     /* Data-processing (1 source) instructions.  */
 513     I3507_CLZ       = 0x5ac01000,
 514     I3507_RBIT      = 0x5ac00000,
 515     I3507_REV16     = 0x5ac00400,
 516     I3507_REV32     = 0x5ac00800,
 517     I3507_REV64     = 0x5ac00c00,
 518
 519     /* Data-processing (2 source) instructions.  */
 520     I3508_LSLV      = 0x1ac02000,
 521     I3508_LSRV      = 0x1ac02400,
 522     I3508_ASRV      = 0x1ac02800,
 523     I3508_RORV      = 0x1ac02c00,
 524     I3508_SMULH     = 0x9b407c00,
 525     I3508_UMULH     = 0x9bc07c00,
 526     I3508_UDIV      = 0x1ac00800,
 527     I3508_SDIV      = 0x1ac00c00,
 528
 529     /* Data-processing (3 source) instructions.  */
 530     I3509_MADD      = 0x1b000000,
 531     I3509_MSUB      = 0x1b008000,
 532
 533     /* Logical shifted register instructions (without a shift).  */
 534     I3510_AND       = 0x0a000000,
 535     I3510_BIC       = 0x0a200000,
 536     I3510_ORR       = 0x2a000000,
 537     I3510_ORN       = 0x2a200000,
 538     I3510_EOR       = 0x4a000000,
 539     I3510_EON       = 0x4a200000,
 540     I3510_ANDS      = 0x6a000000,
 541
 542     /* Logical shifted register instructions (with a shift).  */
 543     I3502S_AND_LSR  = I3510_AND | (1 << 22),
 544
 545     /* AdvSIMD copy */
 546     I3605_DUP      = 0x0e000400,
 547     I3605_INS      = 0x4e001c00,
 548     I3605_UMOV     = 0x0e003c00,
 549
 550     /* AdvSIMD modified immediate */
 551     I3606_MOVI      = 0x0f000400,
 552     I3606_MVNI      = 0x2f000400,
 553     I3606_BIC       = 0x2f001400,
 554     I3606_ORR       = 0x0f001400,
 555
 556     /* AdvSIMD shift by immediate */
 557     I3614_SSHR      = 0x0f000400,
 558     I3614_SSRA      = 0x0f001400,
 559     I3614_SHL       = 0x0f005400,
 560     I3614_USHR      = 0x2f000400,
 561     I3614_USRA      = 0x2f001400,
 562
 563     /* AdvSIMD three same.  */
 564     I3616_ADD       = 0x0e208400,
 565     I3616_AND       = 0x0e201c00,
 566     I3616_BIC       = 0x0e601c00,
 567     I3616_BIF       = 0x2ee01c00,
 568     I3616_BIT       = 0x2ea01c00,
 569     I3616_BSL       = 0x2e601c00,
 570     I3616_EOR       = 0x2e201c00,
 571     I3616_MUL       = 0x0e209c00,
 572     I3616_ORR       = 0x0ea01c00,
 573     I3616_ORN       = 0x0ee01c00,
 574     I3616_SUB       = 0x2e208400,
 575     I3616_CMGT      = 0x0e203400,
 576     I3616_CMGE      = 0x0e203c00,
 577     I3616_CMTST     = 0x0e208c00,
 578     I3616_CMHI      = 0x2e203400,
 579     I3616_CMHS      = 0x2e203c00,
 580     I3616_CMEQ      = 0x2e208c00,
 581     I3616_SMAX      = 0x0e206400,
 582     I3616_SMIN      = 0x0e206c00,
 583     I3616_SSHL      = 0x0e204400,
 584     I3616_SQADD     = 0x0e200c00,
 585     I3616_SQSUB     = 0x0e202c00,
 586     I3616_UMAX      = 0x2e206400,
 587     I3616_UMIN      = 0x2e206c00,
 588     I3616_UQADD     = 0x2e200c00,
 589     I3616_UQSUB     = 0x2e202c00,
 590     I3616_USHL      = 0x2e204400,
 591
 592     /* AdvSIMD two-reg misc.  */
 593     I3617_CMGT0     = 0x0e208800,
 594     I3617_CMEQ0     = 0x0e209800,
 595     I3617_CMLT0     = 0x0e20a800,
 596     I3617_CMGE0     = 0x2e208800,
 597     I3617_CMLE0     = 0x2e20a800,
 598     I3617_NOT       = 0x2e205800,
 599     I3617_ABS       = 0x0e20b800,
 600     I3617_NEG       = 0x2e20b800,
 601
 602     /* System instructions.  */
 603     NOP             = 0xd503201f,
 604     DMB_ISH         = 0xd50338bf,
 605     DMB_LD          = 0x00000100,
 606     DMB_ST          = 0x00000200,
 607 } AArch64Insn;
 608
 609 static inline uint32_t tcg_in32(TCGContext *s)
 610 {
 611     uint32_t v = *(uint32_t *)s->code_ptr;
 612     return v;
 613 }
 614
 615 /* Emit an opcode with "type-checking" of the format.  */
 616 #define tcg_out_insn(S, FMT, OP, ...) \
 617     glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
 618
 619 static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
 620                               TCGReg rt, TCGReg rn, unsigned size)
 621 {
 622     tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
 623 }
 624
 625 static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
 626                               int imm19, TCGReg rt)
 627 {
 628     tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
 629 }
 630
 631 static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
 632                               TCGReg rt, int imm19)
 633 {
 634     tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
 635 }
 636
 637 static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
 638                               TCGCond c, int imm19)
 639 {
 640     tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
 641 }
 642
 643 static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
 644 {
 645     tcg_out32(s, insn | (imm26 & 0x03ffffff));
 646 }
 647
 648 static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
 649 {
 650     tcg_out32(s, insn | rn << 5);
 651 }
 652
 653 static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
 654                               TCGReg r1, TCGReg r2, TCGReg rn,
 655                               tcg_target_long ofs, bool pre, bool w)
 656 {
 657     insn |= 1u << 31; /* ext */
 658     insn |= pre << 24;
 659     insn |= w << 23;
 660
 661     tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
 662     insn |= (ofs & (0x7f << 3)) << (15 - 3);
 663
 664     tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
 665 }
 666
 667 static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
 668                               TCGReg rd, TCGReg rn, uint64_t aimm)
 669 {
 670     if (aimm > 0xfff) {
 671         tcg_debug_assert((aimm & 0xfff) == 0);
 672         aimm >>= 12;
 673         tcg_debug_assert(aimm <= 0xfff);
 674         aimm |= 1 << 12;  /* apply LSL 12 */
 675     }
 676     tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
 677 }
 678
 679 /* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
 680    (Logical immediate).  Both insn groups have N, IMMR and IMMS fields
 681    that feed the DecodeBitMasks pseudo function.  */
 682 static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
 683                               TCGReg rd, TCGReg rn, int n, int immr, int imms)
 684 {
 685     tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
 686               | rn << 5 | rd);
 687 }
 688
 689 #define tcg_out_insn_3404  tcg_out_insn_3402
 690
 691 static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
 692                               TCGReg rd, TCGReg rn, TCGReg rm, int imms)
 693 {
 694     tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
 695               | rn << 5 | rd);
 696 }
 697
 698 /* This function is used for the Move (wide immediate) instruction group.
 699    Note that SHIFT is a full shift count, not the 2 bit HW field. */
 700 static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
 701                               TCGReg rd, uint16_t half, unsigned shift)
 702 {
 703     tcg_debug_assert((shift & ~0x30) == 0);
 704     tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
 705 }
 706
 707 static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
 708                               TCGReg rd, int64_t disp)
 709 {
 710     tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
 711 }
 712
 713 /* This function is for both 3.5.2 (Add/Subtract shifted register), for
 714    the rare occasion when we actually want to supply a shift amount.  */
 715 static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
 716                                       TCGType ext, TCGReg rd, TCGReg rn,
 717                                       TCGReg rm, int imm6)
 718 {
 719     tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
 720 }
 721
 722 /* This function is for 3.5.2 (Add/subtract shifted register),
 723    and 3.5.10 (Logical shifted register), for the vast majorty of cases
 724    when we don't want to apply a shift.  Thus it can also be used for
 725    3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source).  */
 726 static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
 727                               TCGReg rd, TCGReg rn, TCGReg rm)
 728 {
 729     tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
 730 }
 731
 732 #define tcg_out_insn_3503  tcg_out_insn_3502
 733 #define tcg_out_insn_3508  tcg_out_insn_3502
 734 #define tcg_out_insn_3510  tcg_out_insn_3502
 735
 736 static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
 737                               TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
 738 {
 739     tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
 740               | tcg_cond_to_aarch64[c] << 12);
 741 }
 742
 743 static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
 744                               TCGReg rd, TCGReg rn)
 745 {
 746     tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
 747 }
 748
 749 static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
 750                               TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
 751 {
 752     tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
 753 }
 754
 755 static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
 756                               TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
 757 {
 758     /* Note that bit 11 set means general register input.  Therefore
 759        we can handle both register sets with one function.  */
 760     tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
 761               | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
 762 }
 763
 764 static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
 765                               TCGReg rd, bool op, int cmode, uint8_t imm8)
 766 {
 767     tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
 768               | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
 769 }
 770
 771 static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
 772                               TCGReg rd, TCGReg rn, unsigned immhb)
 773 {
 774     tcg_out32(s, insn | q << 30 | immhb << 16
 775               | (rn & 0x1f) << 5 | (rd & 0x1f));
 776 }
 777
 778 static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
 779                               unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
 780 {
 781     tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
 782               | (rn & 0x1f) << 5 | (rd & 0x1f));
 783 }
 784
 785 static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
 786                               unsigned size, TCGReg rd, TCGReg rn)
 787 {
 788     tcg_out32(s, insn | q << 30 | (size << 22)
 789               | (rn & 0x1f) << 5 | (rd & 0x1f));
 790 }
 791
 792 static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
 793                               TCGReg rd, TCGReg base, TCGType ext,
 794                               TCGReg regoff)
 795 {
 796     /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
 797     tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
 798               0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
 799 }
 800
 801 static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
 802                               TCGReg rd, TCGReg rn, intptr_t offset)
 803 {
 804     tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
 805 }
 806
 807 static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
 808                               TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
 809 {
 810     /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
 811     tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
 812               | rn << 5 | (rd & 0x1f));
 813 }
 814
 815 /* Register to register move using ORR (shifted register with no shift). */
 816 static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
 817 {
 818     tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
 819 }
 820
 821 /* Register to register move using ADDI (move to/from SP).  */
 822 static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
 823 {
 824     tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
 825 }
 826
 827 /* This function is used for the Logical (immediate) instruction group.
 828    The value of LIMM must satisfy IS_LIMM.  See the comment above about
 829    only supporting simplified logical immediates.  */
 830 static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
 831                              TCGReg rd, TCGReg rn, uint64_t limm)
 832 {
 833     unsigned h, l, r, c;
 834
 835     tcg_debug_assert(is_limm(limm));
 836
 837     h = clz64(limm);
 838     l = ctz64(limm);
 839     if (l == 0) {
 840         r = 0;                  /* form 0....01....1 */
 841         c = ctz64(~limm) - 1;
 842         if (h == 0) {
 843             r = clz64(~limm);   /* form 1..10..01..1 */
 844             c += r;
 845         }
 846     } else {
 847         r = 64 - l;             /* form 1....10....0 or 0..01..10..0 */
 848         c = r - h - 1;
 849     }
 850     if (ext == TCG_TYPE_I32) {
 851         r &= 31;
 852         c &= 31;
 853     }
 854
 855     tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
 856 }
 857
 858 static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
 859                              TCGReg rd, tcg_target_long v64)
 860 {
 861     bool q = type == TCG_TYPE_V128;
 862     int cmode, imm8, i;
 863
 864     /* Test all bytes equal first.  */
 865     if (v64 == dup_const(MO_8, v64)) {
 866         imm8 = (uint8_t)v64;
 867         tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
 868         return;
 869     }
 870
 871     /*
 872      * Test all bytes 0x00 or 0xff second.  This can match cases that
 873      * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
 874      */
 875     for (i = imm8 = 0; i < 8; i++) {
 876         uint8_t byte = v64 >> (i * 8);
 877         if (byte == 0xff) {
 878             imm8 |= 1 << i;
 879         } else if (byte != 0) {
 880             goto fail_bytes;
 881         }
 882     }
 883     tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
 884     return;
 885  fail_bytes:
 886
 887     /*
 888      * Tests for various replications.  For each element width, if we
 889      * cannot find an expansion there's no point checking a larger
 890      * width because we already know by replication it cannot match.
 891      */
 892     if (v64 == dup_const(MO_16, v64)) {
 893         uint16_t v16 = v64;
 894
 895         if (is_shimm16(v16, &cmode, &imm8)) {
 896             tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
 897             return;
 898         }
 899         if (is_shimm16(~v16, &cmode, &imm8)) {
 900             tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
 901             return;
 902         }
 903
 904         /*
 905          * Otherwise, all remaining constants can be loaded in two insns:
 906          * rd = v16 & 0xff, rd |= v16 & 0xff00.
 907          */
 908         tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
 909         tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
 910         return;
 911     } else if (v64 == dup_const(MO_32, v64)) {
 912         uint32_t v32 = v64;
 913         uint32_t n32 = ~v32;
 914
 915         if (is_shimm32(v32, &cmode, &imm8) ||
 916             is_soimm32(v32, &cmode, &imm8) ||
 917             is_fimm32(v32, &cmode, &imm8)) {
 918             tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
 919             return;
 920         }
 921         if (is_shimm32(n32, &cmode, &imm8) ||
 922             is_soimm32(n32, &cmode, &imm8)) {
 923             tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
 924             return;
 925         }
 926
 927         /*
 928          * Restrict the set of constants to those we can load with
 929          * two instructions.  Others we load from the pool.
 930          */
 931         i = is_shimm32_pair(v32, &cmode, &imm8);
 932         if (i) {
 933             tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
 934             tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
 935             return;
 936         }
 937         i = is_shimm32_pair(n32, &cmode, &imm8);
 938         if (i) {
 939             tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
 940             tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
 941             return;
 942         }
 943     } else if (is_fimm64(v64, &cmode, &imm8)) {
 944         tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
 945         return;
 946     }
 947
 948     /*
 949      * As a last resort, load from the constant pool.  Sadly there
 950      * is no LD1R (literal), so store the full 16-byte vector.
 951      */
 952     if (type == TCG_TYPE_V128) {
 953         new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
 954         tcg_out_insn(s, 3305, LDR_v128, 0, rd);
 955     } else {
 956         new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
 957         tcg_out_insn(s, 3305, LDR_v64, 0, rd);
 958     }
 959 }
 960
 961 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
 962                             TCGReg rd, TCGReg rs)
 963 {
 964     int is_q = type - TCG_TYPE_V64;
 965     tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
 966     return true;
 967 }
 968
 969 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
 970                              TCGReg r, TCGReg base, intptr_t offset)
 971 {
 972     TCGReg temp = TCG_REG_TMP;
 973
 974     if (offset < -0xffffff || offset > 0xffffff) {
 975         tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
 976         tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
 977         base = temp;
 978     } else {
 979         AArch64Insn add_insn = I3401_ADDI;
 980
 981         if (offset < 0) {
 982             add_insn = I3401_SUBI;
 983             offset = -offset;
 984         }
 985         if (offset & 0xfff000) {
 986             tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
 987             base = temp;
 988         }
 989         if (offset & 0xfff) {
 990             tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
 991             base = temp;
 992         }
 993     }
 994     tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
 995     return true;
 996 }
 997
 998 static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
 999                          tcg_target_long value)
1000 {
1001     tcg_target_long svalue = value;
1002     tcg_target_long ivalue = ~value;
1003     tcg_target_long t0, t1, t2;
1004     int s0, s1;
1005     AArch64Insn opc;
1006
1007     switch (type) {
1008     case TCG_TYPE_I32:
1009     case TCG_TYPE_I64:
1010         tcg_debug_assert(rd < 32);
1011         break;
1012
1013     case TCG_TYPE_V64:
1014     case TCG_TYPE_V128:
1015         tcg_debug_assert(rd >= 32);
1016         tcg_out_dupi_vec(s, type, rd, value);
1017         return;
1018
1019     default:
1020         g_assert_not_reached();
1021     }
1022
1023     /* For 32-bit values, discard potential garbage in value.  For 64-bit
1024        values within [2**31, 2**32-1], we can create smaller sequences by
1025        interpreting this as a negative 32-bit number, while ensuring that
1026        the high 32 bits are cleared by setting SF=0.  */
1027     if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1028         svalue = (int32_t)value;
1029         value = (uint32_t)value;
1030         ivalue = (uint32_t)ivalue;
1031         type = TCG_TYPE_I32;
1032     }
1033
1034     /* Speed things up by handling the common case of small positive
1035        and negative values specially.  */
1036     if ((value & ~0xffffull) == 0) {
1037         tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1038         return;
1039     } else if ((ivalue & ~0xffffull) == 0) {
1040         tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1041         return;
1042     }
1043
1044     /* Check for bitfield immediates.  For the benefit of 32-bit quantities,
1045        use the sign-extended value.  That lets us match rotated values such
1046        as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1047     if (is_limm(svalue)) {
1048         tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1049         return;
1050     }
1051
1052     /* Look for host pointer values within 4G of the PC.  This happens
1053        often when loading pointers to QEMU's own data structures.  */
1054     if (type == TCG_TYPE_I64) {
1055         tcg_target_long disp = value - (intptr_t)s->code_ptr;
1056         if (disp == sextract64(disp, 0, 21)) {
1057             tcg_out_insn(s, 3406, ADR, rd, disp);
1058             return;
1059         }
1060         disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
1061         if (disp == sextract64(disp, 0, 21)) {
1062             tcg_out_insn(s, 3406, ADRP, rd, disp);
1063             if (value & 0xfff) {
1064                 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1065             }
1066             return;
1067         }
1068     }
1069
1070     /* Would it take fewer insns to begin with MOVN?  */
1071     if (ctpop64(value) >= 32) {
1072         t0 = ivalue;
1073         opc = I3405_MOVN;
1074     } else {
1075         t0 = value;
1076         opc = I3405_MOVZ;
1077     }
1078     s0 = ctz64(t0) & (63 & -16);
1079     t1 = t0 & ~(0xffffUL << s0);
1080     s1 = ctz64(t1) & (63 & -16);
1081     t2 = t1 & ~(0xffffUL << s1);
1082     if (t2 == 0) {
1083         tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1084         if (t1 != 0) {
1085             tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1086         }
1087         return;
1088     }
1089
1090     /* For more than 2 insns, dump it into the constant pool.  */
1091     new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1092     tcg_out_insn(s, 3305, LDR, 0, rd);
1093 }
1094
1095 /* Define something more legible for general use.  */
1096 #define tcg_out_ldst_r  tcg_out_insn_3310
1097
1098 static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1099                          TCGReg rn, intptr_t offset, int lgsize)
1100 {
1101     /* If the offset is naturally aligned and in range, then we can
1102        use the scaled uimm12 encoding */
1103     if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1104         uintptr_t scaled_uimm = offset >> lgsize;
1105         if (scaled_uimm <= 0xfff) {
1106             tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1107             return;
1108         }
1109     }
1110
1111     /* Small signed offsets can use the unscaled encoding.  */
1112     if (offset >= -256 && offset < 256) {
1113         tcg_out_insn_3312(s, insn, rd, rn, offset);
1114         return;
1115     }
1116
1117     /* Worst-case scenario, move offset to temp register, use reg offset.  */
1118     tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
1119     tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
1120 }
1121
1122 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1123 {
1124     if (ret == arg) {
1125         return true;
1126     }
1127     switch (type) {
1128     case TCG_TYPE_I32:
1129     case TCG_TYPE_I64:
1130         if (ret < 32 && arg < 32) {
1131             tcg_out_movr(s, type, ret, arg);
1132             break;
1133         } else if (ret < 32) {
1134             tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1135             break;
1136         } else if (arg < 32) {
1137             tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1138             break;
1139         }
1140         /* FALLTHRU */
1141
1142     case TCG_TYPE_V64:
1143         tcg_debug_assert(ret >= 32 && arg >= 32);
1144         tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1145         break;
1146     case TCG_TYPE_V128:
1147         tcg_debug_assert(ret >= 32 && arg >= 32);
1148         tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1149         break;
1150
1151     default:
1152         g_assert_not_reached();
1153     }
1154     return true;
1155 }
1156
1157 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1158                        TCGReg base, intptr_t ofs)
1159 {
1160     AArch64Insn insn;
1161     int lgsz;
1162
1163     switch (type) {
1164     case TCG_TYPE_I32:
1165         insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1166         lgsz = 2;
1167         break;
1168     case TCG_TYPE_I64:
1169         insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1170         lgsz = 3;
1171         break;
1172     case TCG_TYPE_V64:
1173         insn = I3312_LDRVD;
1174         lgsz = 3;
1175         break;
1176     case TCG_TYPE_V128:
1177         insn = I3312_LDRVQ;
1178         lgsz = 4;
1179         break;
1180     default:
1181         g_assert_not_reached();
1182     }
1183     tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1184 }
1185
1186 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1187                        TCGReg base, intptr_t ofs)
1188 {
1189     AArch64Insn insn;
1190     int lgsz;
1191
1192     switch (type) {
1193     case TCG_TYPE_I32:
1194         insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1195         lgsz = 2;
1196         break;
1197     case TCG_TYPE_I64:
1198         insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1199         lgsz = 3;
1200         break;
1201     case TCG_TYPE_V64:
1202         insn = I3312_STRVD;
1203         lgsz = 3;
1204         break;
1205     case TCG_TYPE_V128:
1206         insn = I3312_STRVQ;
1207         lgsz = 4;
1208         break;
1209     default:
1210         g_assert_not_reached();
1211     }
1212     tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1213 }
1214
1215 static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1216                                TCGReg base, intptr_t ofs)
1217 {
1218     if (type <= TCG_TYPE_I64 && val == 0) {
1219         tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1220         return true;
1221     }
1222     return false;
1223 }
1224
1225 static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1226                                TCGReg rn, unsigned int a, unsigned int b)
1227 {
1228     tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1229 }
1230
1231 static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1232                                 TCGReg rn, unsigned int a, unsigned int b)
1233 {
1234     tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1235 }
1236
1237 static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1238                                 TCGReg rn, unsigned int a, unsigned int b)
1239 {
1240     tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1241 }
1242
1243 static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1244                                 TCGReg rn, TCGReg rm, unsigned int a)
1245 {
1246     tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1247 }
1248
1249 static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1250                                TCGReg rd, TCGReg rn, unsigned int m)
1251 {
1252     int bits = ext ? 64 : 32;
1253     int max = bits - 1;
1254     tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
1255 }
1256
1257 static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1258                                TCGReg rd, TCGReg rn, unsigned int m)
1259 {
1260     int max = ext ? 63 : 31;
1261     tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1262 }
1263
1264 static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1265                                TCGReg rd, TCGReg rn, unsigned int m)
1266 {
1267     int max = ext ? 63 : 31;
1268     tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1269 }
1270
1271 static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1272                                 TCGReg rd, TCGReg rn, unsigned int m)
1273 {
1274     int max = ext ? 63 : 31;
1275     tcg_out_extr(s, ext, rd, rn, rn, m & max);
1276 }
1277
1278 static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1279                                 TCGReg rd, TCGReg rn, unsigned int m)
1280 {
1281     int bits = ext ? 64 : 32;
1282     int max = bits - 1;
1283     tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
1284 }
1285
1286 static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1287                                TCGReg rn, unsigned lsb, unsigned width)
1288 {
1289     unsigned size = ext ? 64 : 32;
1290     unsigned a = (size - lsb) & (size - 1);
1291     unsigned b = width - 1;
1292     tcg_out_bfm(s, ext, rd, rn, a, b);
1293 }
1294
1295 static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1296                         tcg_target_long b, bool const_b)
1297 {
1298     if (const_b) {
1299         /* Using CMP or CMN aliases.  */
1300         if (b >= 0) {
1301             tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1302         } else {
1303             tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1304         }
1305     } else {
1306         /* Using CMP alias SUBS wzr, Wn, Wm */
1307         tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1308     }
1309 }
1310
1311 static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target)
1312 {
1313     ptrdiff_t offset = target - s->code_ptr;
1314     tcg_debug_assert(offset == sextract64(offset, 0, 26));
1315     tcg_out_insn(s, 3206, B, offset);
1316 }
1317
1318 static inline void tcg_out_goto_long(TCGContext *s, tcg_insn_unit *target)
1319 {
1320     ptrdiff_t offset = target - s->code_ptr;
1321     if (offset == sextract64(offset, 0, 26)) {
1322         tcg_out_insn(s, 3206, BL, offset);
1323     } else {
1324         tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1325         tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1326     }
1327 }
1328
1329 static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
1330 {
1331     tcg_out_insn(s, 3207, BLR, reg);
1332 }
1333
1334 static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
1335 {
1336     ptrdiff_t offset = target - s->code_ptr;
1337     if (offset == sextract64(offset, 0, 26)) {
1338         tcg_out_insn(s, 3206, BL, offset);
1339     } else {
1340         tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1341         tcg_out_callr(s, TCG_REG_TMP);
1342     }
1343 }
1344
1345 void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
1346                               uintptr_t addr)
1347 {
1348     tcg_insn_unit i1, i2;
1349     TCGType rt = TCG_TYPE_I64;
1350     TCGReg  rd = TCG_REG_TMP;
1351     uint64_t pair;
1352
1353     ptrdiff_t offset = addr - jmp_addr;
1354
1355     if (offset == sextract64(offset, 0, 26)) {
1356         i1 = I3206_B | ((offset >> 2) & 0x3ffffff);
1357         i2 = NOP;
1358     } else {
1359         offset = (addr >> 12) - (jmp_addr >> 12);
1360
1361         /* patch ADRP */
1362         i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd;
1363         /* patch ADDI */
1364         i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd;
1365     }
1366     pair = (uint64_t)i2 << 32 | i1;
1367     atomic_set((uint64_t *)jmp_addr, pair);
1368     flush_icache_range(jmp_addr, jmp_addr + 8);
1369 }
1370
1371 static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1372 {
1373     if (!l->has_value) {
1374         tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1375         tcg_out_insn(s, 3206, B, 0);
1376     } else {
1377         tcg_out_goto(s, l->u.value_ptr);
1378     }
1379 }
1380
1381 static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1382                            TCGArg b, bool b_const, TCGLabel *l)
1383 {
1384     intptr_t offset;
1385     bool need_cmp;
1386
1387     if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1388         need_cmp = false;
1389     } else {
1390         need_cmp = true;
1391         tcg_out_cmp(s, ext, a, b, b_const);
1392     }
1393
1394     if (!l->has_value) {
1395         tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1396         offset = tcg_in32(s) >> 5;
1397     } else {
1398         offset = l->u.value_ptr - s->code_ptr;
1399         tcg_debug_assert(offset == sextract64(offset, 0, 19));
1400     }
1401
1402     if (need_cmp) {
1403         tcg_out_insn(s, 3202, B_C, c, offset);
1404     } else if (c == TCG_COND_EQ) {
1405         tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1406     } else {
1407         tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1408     }
1409 }
1410
1411 static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
1412 {
1413     tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
1414 }
1415
1416 static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
1417 {
1418     tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
1419 }
1420
1421 static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
1422 {
1423     tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
1424 }
1425
1426 static inline void tcg_out_sxt(TCGContext *s, TCGType ext, TCGMemOp s_bits,
1427                                TCGReg rd, TCGReg rn)
1428 {
1429     /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1430     int bits = (8 << s_bits) - 1;
1431     tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1432 }
1433
1434 static inline void tcg_out_uxt(TCGContext *s, TCGMemOp s_bits,
1435                                TCGReg rd, TCGReg rn)
1436 {
1437     /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1438     int bits = (8 << s_bits) - 1;
1439     tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1440 }
1441
1442 static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1443                             TCGReg rn, int64_t aimm)
1444 {
1445     if (aimm >= 0) {
1446         tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1447     } else {
1448         tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1449     }
1450 }
1451
1452 static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1453                                    TCGReg rh, TCGReg al, TCGReg ah,
1454                                    tcg_target_long bl, tcg_target_long bh,
1455                                    bool const_bl, bool const_bh, bool sub)
1456 {
1457     TCGReg orig_rl = rl;
1458     AArch64Insn insn;
1459
1460     if (rl == ah || (!const_bh && rl == bh)) {
1461         rl = TCG_REG_TMP;
1462     }
1463
1464     if (const_bl) {
1465         insn = I3401_ADDSI;
1466         if ((bl < 0) ^ sub) {
1467             insn = I3401_SUBSI;
1468             bl = -bl;
1469         }
1470         if (unlikely(al == TCG_REG_XZR)) {
1471             /* ??? We want to allow al to be zero for the benefit of
1472                negation via subtraction.  However, that leaves open the
1473                possibility of adding 0+const in the low part, and the
1474                immediate add instructions encode XSP not XZR.  Don't try
1475                anything more elaborate here than loading another zero.  */
1476             al = TCG_REG_TMP;
1477             tcg_out_movi(s, ext, al, 0);
1478         }
1479         tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1480     } else {
1481         tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1482     }
1483
1484     insn = I3503_ADC;
1485     if (const_bh) {
1486         /* Note that the only two constants we support are 0 and -1, and
1487            that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.  */
1488         if ((bh != 0) ^ sub) {
1489             insn = I3503_SBC;
1490         }
1491         bh = TCG_REG_XZR;
1492     } else if (sub) {
1493         insn = I3503_SBC;
1494     }
1495     tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1496
1497     tcg_out_mov(s, ext, orig_rl, rl);
1498 }
1499
1500 static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1501 {
1502     static const uint32_t sync[] = {
1503         [0 ... TCG_MO_ALL]            = DMB_ISH | DMB_LD | DMB_ST,
1504         [TCG_MO_ST_ST]                = DMB_ISH | DMB_ST,
1505         [TCG_MO_LD_LD]                = DMB_ISH | DMB_LD,
1506         [TCG_MO_LD_ST]                = DMB_ISH | DMB_LD,
1507         [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1508     };
1509     tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1510 }
1511
1512 static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1513                          TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1514 {
1515     TCGReg a1 = a0;
1516     if (is_ctz) {
1517         a1 = TCG_REG_TMP;
1518         tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1519     }
1520     if (const_b && b == (ext ? 64 : 32)) {
1521         tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1522     } else {
1523         AArch64Insn sel = I3506_CSEL;
1524
1525         tcg_out_cmp(s, ext, a0, 0, 1);
1526         tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1527
1528         if (const_b) {
1529             if (b == -1) {
1530                 b = TCG_REG_XZR;
1531                 sel = I3506_CSINV;
1532             } else if (b == 0) {
1533                 b = TCG_REG_XZR;
1534             } else {
1535                 tcg_out_movi(s, ext, d, b);
1536                 b = d;
1537             }
1538         }
1539         tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1540     }
1541 }
1542
1543 #ifdef CONFIG_SOFTMMU
1544 #include "tcg-ldst.inc.c"
1545
1546 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1547  *                                     TCGMemOpIdx oi, uintptr_t ra)
1548  */
1549 static void * const qemu_ld_helpers[16] = {
1550     [MO_UB]   = helper_ret_ldub_mmu,
1551     [MO_LEUW] = helper_le_lduw_mmu,
1552     [MO_LEUL] = helper_le_ldul_mmu,
1553     [MO_LEQ]  = helper_le_ldq_mmu,
1554     [MO_BEUW] = helper_be_lduw_mmu,
1555     [MO_BEUL] = helper_be_ldul_mmu,
1556     [MO_BEQ]  = helper_be_ldq_mmu,
1557 };
1558
1559 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1560  *                                     uintxx_t val, TCGMemOpIdx oi,
1561  *                                     uintptr_t ra)
1562  */
1563 static void * const qemu_st_helpers[16] = {
1564     [MO_UB]   = helper_ret_stb_mmu,
1565     [MO_LEUW] = helper_le_stw_mmu,
1566     [MO_LEUL] = helper_le_stl_mmu,
1567     [MO_LEQ]  = helper_le_stq_mmu,
1568     [MO_BEUW] = helper_be_stw_mmu,
1569     [MO_BEUL] = helper_be_stl_mmu,
1570     [MO_BEQ]  = helper_be_stq_mmu,
1571 };
1572
1573 static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target)
1574 {
1575     ptrdiff_t offset = tcg_pcrel_diff(s, target);
1576     tcg_debug_assert(offset == sextract64(offset, 0, 21));
1577     tcg_out_insn(s, 3406, ADR, rd, offset);
1578 }
1579
1580 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1581 {
1582     TCGMemOpIdx oi = lb->oi;
1583     TCGMemOp opc = get_memop(oi);
1584     TCGMemOp size = opc & MO_SIZE;
1585
1586     if (!reloc_pc19(lb->label_ptr[0], s->code_ptr)) {
1587         return false;
1588     }
1589
1590     tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1591     tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1592     tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1593     tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1594     tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1595     if (opc & MO_SIGN) {
1596         tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1597     } else {
1598         tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1599     }
1600
1601     tcg_out_goto(s, lb->raddr);
1602     return true;
1603 }
1604
1605 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1606 {
1607     TCGMemOpIdx oi = lb->oi;
1608     TCGMemOp opc = get_memop(oi);
1609     TCGMemOp size = opc & MO_SIZE;
1610
1611     if (!reloc_pc19(lb->label_ptr[0], s->code_ptr)) {
1612         return false;
1613     }
1614
1615     tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1616     tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1617     tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1618     tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1619     tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1620     tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1621     tcg_out_goto(s, lb->raddr);
1622     return true;
1623 }
1624
1625 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1626                                 TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1627                                 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1628 {
1629     TCGLabelQemuLdst *label = new_ldst_label(s);
1630
1631     label->is_ld = is_ld;
1632     label->oi = oi;
1633     label->type = ext;
1634     label->datalo_reg = data_reg;
1635     label->addrlo_reg = addr_reg;
1636     label->raddr = raddr;
1637     label->label_ptr[0] = label_ptr;
1638 }
1639
1640 /* We expect tlb_mask to be before tlb_table.  */
1641 QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) <
1642                   offsetof(CPUArchState, tlb_mask));
1643
1644 /* We expect to use a 24-bit unsigned offset from ENV.  */
1645 QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1])
1646                   > 0xffffff);
1647
1648 /* Load and compare a TLB entry, emitting the conditional jump to the
1649    slow path for the failure case, which will be patched later when finalizing
1650    the slow path. Generated code returns the host addend in X1,
1651    clobbers X0,X2,X3,TMP. */
1652 static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc,
1653                              tcg_insn_unit **label_ptr, int mem_index,
1654                              bool is_read)
1655 {
1656     int mask_ofs = offsetof(CPUArchState, tlb_mask[mem_index]);
1657     int table_ofs = offsetof(CPUArchState, tlb_table[mem_index]);
1658     unsigned a_bits = get_alignment_bits(opc);
1659     unsigned s_bits = opc & MO_SIZE;
1660     unsigned a_mask = (1u << a_bits) - 1;
1661     unsigned s_mask = (1u << s_bits) - 1;
1662     TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0, x3;
1663     TCGType mask_type;
1664     uint64_t compare_mask;
1665
1666     if (table_ofs > 0xfff) {
1667         int table_hi = table_ofs & ~0xfff;
1668         int mask_hi = mask_ofs & ~0xfff;
1669
1670         table_base = TCG_REG_X1;
1671         if (mask_hi == table_hi) {
1672             mask_base = table_base;
1673         } else if (mask_hi) {
1674             mask_base = TCG_REG_X0;
1675             tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64,
1676                          mask_base, TCG_AREG0, mask_hi);
1677         }
1678         tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64,
1679                      table_base, TCG_AREG0, table_hi);
1680         mask_ofs -= mask_hi;
1681         table_ofs -= table_hi;
1682     }
1683
1684     mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
1685                  ? TCG_TYPE_I64 : TCG_TYPE_I32);
1686
1687     /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx].  */
1688     tcg_out_ld(s, mask_type, TCG_REG_X0, mask_base, mask_ofs);
1689     tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, table_base, table_ofs);
1690
1691     /* Extract the TLB index from the address into X0.  */
1692     tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1693                  TCG_REG_X0, TCG_REG_X0, addr_reg,
1694                  TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1695
1696     /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1.  */
1697     tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
1698
1699     /* Load the tlb comparator into X0, and the fast path addend into X1.  */
1700     tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read
1701                ? offsetof(CPUTLBEntry, addr_read)
1702                : offsetof(CPUTLBEntry, addr_write));
1703     tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
1704                offsetof(CPUTLBEntry, addend));
1705
1706     /* For aligned accesses, we check the first byte and include the alignment
1707        bits within the address.  For unaligned access, we check that we don't
1708        cross pages using the address of the last byte of the access.  */
1709     if (a_bits >= s_bits) {
1710         x3 = addr_reg;
1711     } else {
1712         tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1713                      TCG_REG_X3, addr_reg, s_mask - a_mask);
1714         x3 = TCG_REG_X3;
1715     }
1716     compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1717
1718     /* Store the page mask part of the address into X3.  */
1719     tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1720                      TCG_REG_X3, x3, compare_mask);
1721
1722     /* Perform the address comparison. */
1723     tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
1724
1725     /* If not equal, we jump to the slow path. */
1726     *label_ptr = s->code_ptr;
1727     tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1728 }
1729
1730 #endif /* CONFIG_SOFTMMU */
1731
1732 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp memop, TCGType ext,
1733                                    TCGReg data_r, TCGReg addr_r,
1734                                    TCGType otype, TCGReg off_r)
1735 {
1736     const TCGMemOp bswap = memop & MO_BSWAP;
1737
1738     switch (memop & MO_SSIZE) {
1739     case MO_UB:
1740         tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1741         break;
1742     case MO_SB:
1743         tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1744                        data_r, addr_r, otype, off_r);
1745         break;
1746     case MO_UW:
1747         tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1748         if (bswap) {
1749             tcg_out_rev16(s, data_r, data_r);
1750         }
1751         break;
1752     case MO_SW:
1753         if (bswap) {
1754             tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1755             tcg_out_rev16(s, data_r, data_r);
1756             tcg_out_sxt(s, ext, MO_16, data_r, data_r);
1757         } else {
1758             tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1759                            data_r, addr_r, otype, off_r);
1760         }
1761         break;
1762     case MO_UL:
1763         tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1764         if (bswap) {
1765             tcg_out_rev32(s, data_r, data_r);
1766         }
1767         break;
1768     case MO_SL:
1769         if (bswap) {
1770             tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1771             tcg_out_rev32(s, data_r, data_r);
1772             tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1773         } else {
1774             tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1775         }
1776         break;
1777     case MO_Q:
1778         tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1779         if (bswap) {
1780             tcg_out_rev64(s, data_r, data_r);
1781         }
1782         break;
1783     default:
1784         tcg_abort();
1785     }
1786 }
1787
1788 static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp memop,
1789                                    TCGReg data_r, TCGReg addr_r,
1790                                    TCGType otype, TCGReg off_r)
1791 {
1792     const TCGMemOp bswap = memop & MO_BSWAP;
1793
1794     switch (memop & MO_SIZE) {
1795     case MO_8:
1796         tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1797         break;
1798     case MO_16:
1799         if (bswap && data_r != TCG_REG_XZR) {
1800             tcg_out_rev16(s, TCG_REG_TMP, data_r);
1801             data_r = TCG_REG_TMP;
1802         }
1803         tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1804         break;
1805     case MO_32:
1806         if (bswap && data_r != TCG_REG_XZR) {
1807             tcg_out_rev32(s, TCG_REG_TMP, data_r);
1808             data_r = TCG_REG_TMP;
1809         }
1810         tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1811         break;
1812     case MO_64:
1813         if (bswap && data_r != TCG_REG_XZR) {
1814             tcg_out_rev64(s, TCG_REG_TMP, data_r);
1815             data_r = TCG_REG_TMP;
1816         }
1817         tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1818         break;
1819     default:
1820         tcg_abort();
1821     }
1822 }
1823
1824 static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1825                             TCGMemOpIdx oi, TCGType ext)
1826 {
1827     TCGMemOp memop = get_memop(oi);
1828     const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1829 #ifdef CONFIG_SOFTMMU
1830     unsigned mem_index = get_mmuidx(oi);
1831     tcg_insn_unit *label_ptr;
1832
1833     tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1834     tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1835                            TCG_REG_X1, otype, addr_reg);
1836     add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1837                         s->code_ptr, label_ptr);
1838 #else /* !CONFIG_SOFTMMU */
1839     if (USE_GUEST_BASE) {
1840         tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1841                                TCG_REG_GUEST_BASE, otype, addr_reg);
1842     } else {
1843         tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1844                                addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1845     }
1846 #endif /* CONFIG_SOFTMMU */
1847 }
1848
1849 static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1850                             TCGMemOpIdx oi)
1851 {
1852     TCGMemOp memop = get_memop(oi);
1853     const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1854 #ifdef CONFIG_SOFTMMU
1855     unsigned mem_index = get_mmuidx(oi);
1856     tcg_insn_unit *label_ptr;
1857
1858     tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1859     tcg_out_qemu_st_direct(s, memop, data_reg,
1860                            TCG_REG_X1, otype, addr_reg);
1861     add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1862                         data_reg, addr_reg, s->code_ptr, label_ptr);
1863 #else /* !CONFIG_SOFTMMU */
1864     if (USE_GUEST_BASE) {
1865         tcg_out_qemu_st_direct(s, memop, data_reg,
1866                                TCG_REG_GUEST_BASE, otype, addr_reg);
1867     } else {
1868         tcg_out_qemu_st_direct(s, memop, data_reg,
1869                                addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1870     }
1871 #endif /* CONFIG_SOFTMMU */
1872 }
1873
1874 static tcg_insn_unit *tb_ret_addr;
1875
1876 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1877                        const TCGArg args[TCG_MAX_OP_ARGS],
1878                        const int const_args[TCG_MAX_OP_ARGS])
1879 {
1880     /* 99% of the time, we can signal the use of extension registers
1881        by looking to see if the opcode handles 64-bit data.  */
1882     TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1883
1884     /* Hoist the loads of the most common arguments.  */
1885     TCGArg a0 = args[0];
1886     TCGArg a1 = args[1];
1887     TCGArg a2 = args[2];
1888     int c2 = const_args[2];
1889
1890     /* Some operands are defined with "rZ" constraint, a register or
1891        the zero register.  These need not actually test args[I] == 0.  */
1892 #define REG0(I)  (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1893
1894     switch (opc) {
1895     case INDEX_op_exit_tb:
1896         /* Reuse the zeroing that exists for goto_ptr.  */
1897         if (a0 == 0) {
1898             tcg_out_goto_long(s, s->code_gen_epilogue);
1899         } else {
1900             tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1901             tcg_out_goto_long(s, tb_ret_addr);
1902         }
1903         break;
1904
1905     case INDEX_op_goto_tb:
1906         if (s->tb_jmp_insn_offset != NULL) {
1907             /* TCG_TARGET_HAS_direct_jump */
1908             /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
1909                write can be used to patch the target address. */
1910             if ((uintptr_t)s->code_ptr & 7) {
1911                 tcg_out32(s, NOP);
1912             }
1913             s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1914             /* actual branch destination will be patched by
1915                tb_target_set_jmp_target later. */
1916             tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
1917             tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
1918         } else {
1919             /* !TCG_TARGET_HAS_direct_jump */
1920             tcg_debug_assert(s->tb_jmp_target_addr != NULL);
1921             intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2;
1922             tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
1923         }
1924         tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1925         set_jmp_reset_offset(s, a0);
1926         break;
1927
1928     case INDEX_op_goto_ptr:
1929         tcg_out_insn(s, 3207, BR, a0);
1930         break;
1931
1932     case INDEX_op_br:
1933         tcg_out_goto_label(s, arg_label(a0));
1934         break;
1935
1936     case INDEX_op_ld8u_i32:
1937     case INDEX_op_ld8u_i64:
1938         tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
1939         break;
1940     case INDEX_op_ld8s_i32:
1941         tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
1942         break;
1943     case INDEX_op_ld8s_i64:
1944         tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
1945         break;
1946     case INDEX_op_ld16u_i32:
1947     case INDEX_op_ld16u_i64:
1948         tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
1949         break;
1950     case INDEX_op_ld16s_i32:
1951         tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
1952         break;
1953     case INDEX_op_ld16s_i64:
1954         tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
1955         break;
1956     case INDEX_op_ld_i32:
1957     case INDEX_op_ld32u_i64:
1958         tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
1959         break;
1960     case INDEX_op_ld32s_i64:
1961         tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
1962         break;
1963     case INDEX_op_ld_i64:
1964         tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
1965         break;
1966
1967     case INDEX_op_st8_i32:
1968     case INDEX_op_st8_i64:
1969         tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
1970         break;
1971     case INDEX_op_st16_i32:
1972     case INDEX_op_st16_i64:
1973         tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
1974         break;
1975     case INDEX_op_st_i32:
1976     case INDEX_op_st32_i64:
1977         tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
1978         break;
1979     case INDEX_op_st_i64:
1980         tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
1981         break;
1982
1983     case INDEX_op_add_i32:
1984         a2 = (int32_t)a2;
1985         /* FALLTHRU */
1986     case INDEX_op_add_i64:
1987         if (c2) {
1988             tcg_out_addsubi(s, ext, a0, a1, a2);
1989         } else {
1990             tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1991         }
1992         break;
1993
1994     case INDEX_op_sub_i32:
1995         a2 = (int32_t)a2;
1996         /* FALLTHRU */
1997     case INDEX_op_sub_i64:
1998         if (c2) {
1999             tcg_out_addsubi(s, ext, a0, a1, -a2);
2000         } else {
2001             tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
2002         }
2003         break;
2004
2005     case INDEX_op_neg_i64:
2006     case INDEX_op_neg_i32:
2007         tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
2008         break;
2009
2010     case INDEX_op_and_i32:
2011         a2 = (int32_t)a2;
2012         /* FALLTHRU */
2013     case INDEX_op_and_i64:
2014         if (c2) {
2015             tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
2016         } else {
2017             tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
2018         }
2019         break;
2020
2021     case INDEX_op_andc_i32:
2022         a2 = (int32_t)a2;
2023         /* FALLTHRU */
2024     case INDEX_op_andc_i64:
2025         if (c2) {
2026             tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
2027         } else {
2028             tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
2029         }
2030         break;
2031
2032     case INDEX_op_or_i32:
2033         a2 = (int32_t)a2;
2034         /* FALLTHRU */
2035     case INDEX_op_or_i64:
2036         if (c2) {
2037             tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
2038         } else {
2039             tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
2040         }
2041         break;
2042
2043     case INDEX_op_orc_i32:
2044         a2 = (int32_t)a2;
2045         /* FALLTHRU */
2046     case INDEX_op_orc_i64:
2047         if (c2) {
2048             tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2049         } else {
2050             tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2051         }
2052         break;
2053
2054     case INDEX_op_xor_i32:
2055         a2 = (int32_t)a2;
2056         /* FALLTHRU */
2057     case INDEX_op_xor_i64:
2058         if (c2) {
2059             tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2060         } else {
2061             tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2062         }
2063         break;
2064
2065     case INDEX_op_eqv_i32:
2066         a2 = (int32_t)a2;
2067         /* FALLTHRU */
2068     case INDEX_op_eqv_i64:
2069         if (c2) {
2070             tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2071         } else {
2072             tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2073         }
2074         break;
2075
2076     case INDEX_op_not_i64:
2077     case INDEX_op_not_i32:
2078         tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2079         break;
2080
2081     case INDEX_op_mul_i64:
2082     case INDEX_op_mul_i32:
2083         tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2084         break;
2085
2086     case INDEX_op_div_i64:
2087     case INDEX_op_div_i32:
2088         tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2089         break;
2090     case INDEX_op_divu_i64:
2091     case INDEX_op_divu_i32:
2092         tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2093         break;
2094
2095     case INDEX_op_rem_i64:
2096     case INDEX_op_rem_i32:
2097         tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
2098         tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2099         break;
2100     case INDEX_op_remu_i64:
2101     case INDEX_op_remu_i32:
2102         tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
2103         tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2104         break;
2105
2106     case INDEX_op_shl_i64:
2107     case INDEX_op_shl_i32:
2108         if (c2) {
2109             tcg_out_shl(s, ext, a0, a1, a2);
2110         } else {
2111             tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
2112         }
2113         break;
2114
2115     case INDEX_op_shr_i64:
2116     case INDEX_op_shr_i32:
2117         if (c2) {
2118             tcg_out_shr(s, ext, a0, a1, a2);
2119         } else {
2120             tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
2121         }
2122         break;
2123
2124     case INDEX_op_sar_i64:
2125     case INDEX_op_sar_i32:
2126         if (c2) {
2127             tcg_out_sar(s, ext, a0, a1, a2);
2128         } else {
2129             tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
2130         }
2131         break;
2132
2133     case INDEX_op_rotr_i64:
2134     case INDEX_op_rotr_i32:
2135         if (c2) {
2136             tcg_out_rotr(s, ext, a0, a1, a2);
2137         } else {
2138             tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
2139         }
2140         break;
2141
2142     case INDEX_op_rotl_i64:
2143     case INDEX_op_rotl_i32:
2144         if (c2) {
2145             tcg_out_rotl(s, ext, a0, a1, a2);
2146         } else {
2147             tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
2148             tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
2149         }
2150         break;
2151
2152     case INDEX_op_clz_i64:
2153     case INDEX_op_clz_i32:
2154         tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2155         break;
2156     case INDEX_op_ctz_i64:
2157     case INDEX_op_ctz_i32:
2158         tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2159         break;
2160
2161     case INDEX_op_brcond_i32:
2162         a1 = (int32_t)a1;
2163         /* FALLTHRU */
2164     case INDEX_op_brcond_i64:
2165         tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
2166         break;
2167
2168     case INDEX_op_setcond_i32:
2169         a2 = (int32_t)a2;
2170         /* FALLTHRU */
2171     case INDEX_op_setcond_i64:
2172         tcg_out_cmp(s, ext, a1, a2, c2);
2173         /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond).  */
2174         tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2175                      TCG_REG_XZR, tcg_invert_cond(args[3]));
2176         break;
2177
2178     case INDEX_op_movcond_i32:
2179         a2 = (int32_t)a2;
2180         /* FALLTHRU */
2181     case INDEX_op_movcond_i64:
2182         tcg_out_cmp(s, ext, a1, a2, c2);
2183         tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2184         break;
2185
2186     case INDEX_op_qemu_ld_i32:
2187     case INDEX_op_qemu_ld_i64:
2188         tcg_out_qemu_ld(s, a0, a1, a2, ext);
2189         break;
2190     case INDEX_op_qemu_st_i32:
2191     case INDEX_op_qemu_st_i64:
2192         tcg_out_qemu_st(s, REG0(0), a1, a2);
2193         break;
2194
2195     case INDEX_op_bswap64_i64:
2196         tcg_out_rev64(s, a0, a1);
2197         break;
2198     case INDEX_op_bswap32_i64:
2199     case INDEX_op_bswap32_i32:
2200         tcg_out_rev32(s, a0, a1);
2201         break;
2202     case INDEX_op_bswap16_i64:
2203     case INDEX_op_bswap16_i32:
2204         tcg_out_rev16(s, a0, a1);
2205         break;
2206
2207     case INDEX_op_ext8s_i64:
2208     case INDEX_op_ext8s_i32:
2209         tcg_out_sxt(s, ext, MO_8, a0, a1);
2210         break;
2211     case INDEX_op_ext16s_i64:
2212     case INDEX_op_ext16s_i32:
2213         tcg_out_sxt(s, ext, MO_16, a0, a1);
2214         break;
2215     case INDEX_op_ext_i32_i64:
2216     case INDEX_op_ext32s_i64:
2217         tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
2218         break;
2219     case INDEX_op_ext8u_i64:
2220     case INDEX_op_ext8u_i32:
2221         tcg_out_uxt(s, MO_8, a0, a1);
2222         break;
2223     case INDEX_op_ext16u_i64:
2224     case INDEX_op_ext16u_i32:
2225         tcg_out_uxt(s, MO_16, a0, a1);
2226         break;
2227     case INDEX_op_extu_i32_i64:
2228     case INDEX_op_ext32u_i64:
2229         tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
2230         break;
2231
2232     case INDEX_op_deposit_i64:
2233     case INDEX_op_deposit_i32:
2234         tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2235         break;
2236
2237     case INDEX_op_extract_i64:
2238     case INDEX_op_extract_i32:
2239         tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2240         break;
2241
2242     case INDEX_op_sextract_i64:
2243     case INDEX_op_sextract_i32:
2244         tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2245         break;
2246
2247     case INDEX_op_extract2_i64:
2248     case INDEX_op_extract2_i32:
2249         tcg_out_extr(s, ext, a0, a1, a2, args[3]);
2250         break;
2251
2252     case INDEX_op_add2_i32:
2253         tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2254                         (int32_t)args[4], args[5], const_args[4],
2255                         const_args[5], false);
2256         break;
2257     case INDEX_op_add2_i64:
2258         tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2259                         args[5], const_args[4], const_args[5], false);
2260         break;
2261     case INDEX_op_sub2_i32:
2262         tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2263                         (int32_t)args[4], args[5], const_args[4],
2264                         const_args[5], true);
2265         break;
2266     case INDEX_op_sub2_i64:
2267         tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2268                         args[5], const_args[4], const_args[5], true);
2269         break;
2270
2271     case INDEX_op_muluh_i64:
2272         tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2273         break;
2274     case INDEX_op_mulsh_i64:
2275         tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2276         break;
2277
2278     case INDEX_op_mb:
2279         tcg_out_mb(s, a0);
2280         break;
2281
2282     case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2283     case INDEX_op_mov_i64:
2284     case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
2285     case INDEX_op_movi_i64:
2286     case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2287     default:
2288         g_assert_not_reached();
2289     }
2290
2291 #undef REG0
2292 }
2293
2294 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2295                            unsigned vecl, unsigned vece,
2296                            const TCGArg *args, const int *const_args)
2297 {
2298     static const AArch64Insn cmp_insn[16] = {
2299         [TCG_COND_EQ] = I3616_CMEQ,
2300         [TCG_COND_GT] = I3616_CMGT,
2301         [TCG_COND_GE] = I3616_CMGE,
2302         [TCG_COND_GTU] = I3616_CMHI,
2303         [TCG_COND_GEU] = I3616_CMHS,
2304     };
2305     static const AArch64Insn cmp0_insn[16] = {
2306         [TCG_COND_EQ] = I3617_CMEQ0,
2307         [TCG_COND_GT] = I3617_CMGT0,
2308         [TCG_COND_GE] = I3617_CMGE0,
2309         [TCG_COND_LT] = I3617_CMLT0,
2310         [TCG_COND_LE] = I3617_CMLE0,
2311     };
2312
2313     TCGType type = vecl + TCG_TYPE_V64;
2314     unsigned is_q = vecl;
2315     TCGArg a0, a1, a2, a3;
2316     int cmode, imm8;
2317
2318     a0 = args[0];
2319     a1 = args[1];
2320     a2 = args[2];
2321
2322     switch (opc) {
2323     case INDEX_op_ld_vec:
2324         tcg_out_ld(s, type, a0, a1, a2);
2325         break;
2326     case INDEX_op_st_vec:
2327         tcg_out_st(s, type, a0, a1, a2);
2328         break;
2329     case INDEX_op_dupm_vec:
2330         tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2331         break;
2332     case INDEX_op_add_vec:
2333         tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2334         break;
2335     case INDEX_op_sub_vec:
2336         tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2337         break;
2338     case INDEX_op_mul_vec:
2339         tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2340         break;
2341     case INDEX_op_neg_vec:
2342         tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2343         break;
2344     case INDEX_op_abs_vec:
2345         tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2346         break;
2347     case INDEX_op_and_vec:
2348         if (const_args[2]) {
2349             is_shimm1632(~a2, &cmode, &imm8);
2350             if (a0 == a1) {
2351                 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2352                 return;
2353             }
2354             tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2355             a2 = a0;
2356         }
2357         tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2358         break;
2359     case INDEX_op_or_vec:
2360         if (const_args[2]) {
2361             is_shimm1632(a2, &cmode, &imm8);
2362             if (a0 == a1) {
2363                 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2364                 return;
2365             }
2366             tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2367             a2 = a0;
2368         }
2369         tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2370         break;
2371     case INDEX_op_andc_vec:
2372         if (const_args[2]) {
2373             is_shimm1632(a2, &cmode, &imm8);
2374             if (a0 == a1) {
2375                 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2376                 return;
2377             }
2378             tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2379             a2 = a0;
2380         }
2381         tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2382         break;
2383     case INDEX_op_orc_vec:
2384         if (const_args[2]) {
2385             is_shimm1632(~a2, &cmode, &imm8);
2386             if (a0 == a1) {
2387                 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2388                 return;
2389             }
2390             tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2391             a2 = a0;
2392         }
2393         tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2394         break;
2395     case INDEX_op_xor_vec:
2396         tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2397         break;
2398     case INDEX_op_ssadd_vec:
2399         tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2400         break;
2401     case INDEX_op_sssub_vec:
2402         tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2403         break;
2404     case INDEX_op_usadd_vec:
2405         tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2406         break;
2407     case INDEX_op_ussub_vec:
2408         tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2409         break;
2410     case INDEX_op_smax_vec:
2411         tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2412         break;
2413     case INDEX_op_smin_vec:
2414         tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2415         break;
2416     case INDEX_op_umax_vec:
2417         tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2418         break;
2419     case INDEX_op_umin_vec:
2420         tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2421         break;
2422     case INDEX_op_not_vec:
2423         tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2424         break;
2425     case INDEX_op_shli_vec:
2426         tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2427         break;
2428     case INDEX_op_shri_vec:
2429         tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2430         break;
2431     case INDEX_op_sari_vec:
2432         tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2433         break;
2434     case INDEX_op_shlv_vec:
2435         tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2436         break;
2437     case INDEX_op_aa64_sshl_vec:
2438         tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2439         break;
2440     case INDEX_op_cmp_vec:
2441         {
2442             TCGCond cond = args[3];
2443             AArch64Insn insn;
2444
2445             if (cond == TCG_COND_NE) {
2446                 if (const_args[2]) {
2447                     tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2448                 } else {
2449                     tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2450                     tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2451                 }
2452             } else {
2453                 if (const_args[2]) {
2454                     insn = cmp0_insn[cond];
2455                     if (insn) {
2456                         tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2457                         break;
2458                     }
2459                     tcg_out_dupi_vec(s, type, TCG_VEC_TMP, 0);
2460                     a2 = TCG_VEC_TMP;
2461                 }
2462                 insn = cmp_insn[cond];
2463                 if (insn == 0) {
2464                     TCGArg t;
2465                     t = a1, a1 = a2, a2 = t;
2466                     cond = tcg_swap_cond(cond);
2467                     insn = cmp_insn[cond];
2468                     tcg_debug_assert(insn != 0);
2469                 }
2470                 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2471             }
2472         }
2473         break;
2474
2475     case INDEX_op_bitsel_vec:
2476         a3 = args[3];
2477         if (a0 == a3) {
2478             tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2479         } else if (a0 == a2) {
2480             tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2481         } else {
2482             if (a0 != a1) {
2483                 tcg_out_mov(s, type, a0, a1);
2484             }
2485             tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2486         }
2487         break;
2488
2489     case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
2490     case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi.  */
2491     case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
2492     default:
2493         g_assert_not_reached();
2494     }
2495 }
2496
2497 int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2498 {
2499     switch (opc) {
2500     case INDEX_op_add_vec:
2501     case INDEX_op_sub_vec:
2502     case INDEX_op_and_vec:
2503     case INDEX_op_or_vec:
2504     case INDEX_op_xor_vec:
2505     case INDEX_op_andc_vec:
2506     case INDEX_op_orc_vec:
2507     case INDEX_op_neg_vec:
2508     case INDEX_op_abs_vec:
2509     case INDEX_op_not_vec:
2510     case INDEX_op_cmp_vec:
2511     case INDEX_op_shli_vec:
2512     case INDEX_op_shri_vec:
2513     case INDEX_op_sari_vec:
2514     case INDEX_op_ssadd_vec:
2515     case INDEX_op_sssub_vec:
2516     case INDEX_op_usadd_vec:
2517     case INDEX_op_ussub_vec:
2518     case INDEX_op_shlv_vec:
2519     case INDEX_op_bitsel_vec:
2520         return 1;
2521     case INDEX_op_shrv_vec:
2522     case INDEX_op_sarv_vec:
2523         return -1;
2524     case INDEX_op_mul_vec:
2525     case INDEX_op_smax_vec:
2526     case INDEX_op_smin_vec:
2527     case INDEX_op_umax_vec:
2528     case INDEX_op_umin_vec:
2529         return vece < MO_64;
2530
2531     default:
2532         return 0;
2533     }
2534 }
2535
2536 void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2537                        TCGArg a0, ...)
2538 {
2539     va_list va;
2540     TCGv_vec v0, v1, v2, t1;
2541
2542     va_start(va, a0);
2543     v0 = temp_tcgv_vec(arg_temp(a0));
2544     v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2545     v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2546
2547     switch (opc) {
2548     case INDEX_op_shrv_vec:
2549     case INDEX_op_sarv_vec:
2550         /* Right shifts are negative left shifts for AArch64.  */
2551         t1 = tcg_temp_new_vec(type);
2552         tcg_gen_neg_vec(vece, t1, v2);
2553         opc = (opc == INDEX_op_shrv_vec
2554                ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2555         vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2556                   tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2557         tcg_temp_free_vec(t1);
2558         break;
2559
2560     default:
2561         g_assert_not_reached();
2562     }
2563
2564     va_end(va);
2565 }
2566
2567 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
2568 {
2569     static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
2570     static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
2571     static const TCGTargetOpDef w_w = { .args_ct_str = { "w", "w" } };
2572     static const TCGTargetOpDef w_r = { .args_ct_str = { "w", "r" } };
2573     static const TCGTargetOpDef w_wr = { .args_ct_str = { "w", "wr" } };
2574     static const TCGTargetOpDef r_l = { .args_ct_str = { "r", "l" } };
2575     static const TCGTargetOpDef r_rA = { .args_ct_str = { "r", "rA" } };
2576     static const TCGTargetOpDef rZ_r = { .args_ct_str = { "rZ", "r" } };
2577     static const TCGTargetOpDef lZ_l = { .args_ct_str = { "lZ", "l" } };
2578     static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
2579     static const TCGTargetOpDef w_w_w = { .args_ct_str = { "w", "w", "w" } };
2580     static const TCGTargetOpDef w_w_wO = { .args_ct_str = { "w", "w", "wO" } };
2581     static const TCGTargetOpDef w_w_wN = { .args_ct_str = { "w", "w", "wN" } };
2582     static const TCGTargetOpDef w_w_wZ = { .args_ct_str = { "w", "w", "wZ" } };
2583     static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
2584     static const TCGTargetOpDef r_r_rA = { .args_ct_str = { "r", "r", "rA" } };
2585     static const TCGTargetOpDef r_r_rL = { .args_ct_str = { "r", "r", "rL" } };
2586     static const TCGTargetOpDef r_r_rAL
2587         = { .args_ct_str = { "r", "r", "rAL" } };
2588     static const TCGTargetOpDef dep
2589         = { .args_ct_str = { "r", "0", "rZ" } };
2590     static const TCGTargetOpDef ext2
2591         = { .args_ct_str = { "r", "rZ", "rZ" } };
2592     static const TCGTargetOpDef movc
2593         = { .args_ct_str = { "r", "r", "rA", "rZ", "rZ" } };
2594     static const TCGTargetOpDef add2
2595         = { .args_ct_str = { "r", "r", "rZ", "rZ", "rA", "rMZ" } };
2596     static const TCGTargetOpDef w_w_w_w
2597         = { .args_ct_str = { "w", "w", "w", "w" } };
2598
2599     switch (op) {
2600     case INDEX_op_goto_ptr:
2601         return &r;
2602
2603     case INDEX_op_ld8u_i32:
2604     case INDEX_op_ld8s_i32:
2605     case INDEX_op_ld16u_i32:
2606     case INDEX_op_ld16s_i32:
2607     case INDEX_op_ld_i32:
2608     case INDEX_op_ld8u_i64:
2609     case INDEX_op_ld8s_i64:
2610     case INDEX_op_ld16u_i64:
2611     case INDEX_op_ld16s_i64:
2612     case INDEX_op_ld32u_i64:
2613     case INDEX_op_ld32s_i64:
2614     case INDEX_op_ld_i64:
2615     case INDEX_op_neg_i32:
2616     case INDEX_op_neg_i64:
2617     case INDEX_op_not_i32:
2618     case INDEX_op_not_i64:
2619     case INDEX_op_bswap16_i32:
2620     case INDEX_op_bswap32_i32:
2621     case INDEX_op_bswap16_i64:
2622     case INDEX_op_bswap32_i64:
2623     case INDEX_op_bswap64_i64:
2624     case INDEX_op_ext8s_i32:
2625     case INDEX_op_ext16s_i32:
2626     case INDEX_op_ext8u_i32:
2627     case INDEX_op_ext16u_i32:
2628     case INDEX_op_ext8s_i64:
2629     case INDEX_op_ext16s_i64:
2630     case INDEX_op_ext32s_i64:
2631     case INDEX_op_ext8u_i64:
2632     case INDEX_op_ext16u_i64:
2633     case INDEX_op_ext32u_i64:
2634     case INDEX_op_ext_i32_i64:
2635     case INDEX_op_extu_i32_i64:
2636     case INDEX_op_extract_i32:
2637     case INDEX_op_extract_i64:
2638     case INDEX_op_sextract_i32:
2639     case INDEX_op_sextract_i64:
2640         return &r_r;
2641
2642     case INDEX_op_st8_i32:
2643     case INDEX_op_st16_i32:
2644     case INDEX_op_st_i32:
2645     case INDEX_op_st8_i64:
2646     case INDEX_op_st16_i64:
2647     case INDEX_op_st32_i64:
2648     case INDEX_op_st_i64:
2649         return &rZ_r;
2650
2651     case INDEX_op_add_i32:
2652     case INDEX_op_add_i64:
2653     case INDEX_op_sub_i32:
2654     case INDEX_op_sub_i64:
2655     case INDEX_op_setcond_i32:
2656     case INDEX_op_setcond_i64:
2657         return &r_r_rA;
2658
2659     case INDEX_op_mul_i32:
2660     case INDEX_op_mul_i64:
2661     case INDEX_op_div_i32:
2662     case INDEX_op_div_i64:
2663     case INDEX_op_divu_i32:
2664     case INDEX_op_divu_i64:
2665     case INDEX_op_rem_i32:
2666     case INDEX_op_rem_i64:
2667     case INDEX_op_remu_i32:
2668     case INDEX_op_remu_i64:
2669     case INDEX_op_muluh_i64:
2670     case INDEX_op_mulsh_i64:
2671         return &r_r_r;
2672
2673     case INDEX_op_and_i32:
2674     case INDEX_op_and_i64:
2675     case INDEX_op_or_i32:
2676     case INDEX_op_or_i64:
2677     case INDEX_op_xor_i32:
2678     case INDEX_op_xor_i64:
2679     case INDEX_op_andc_i32:
2680     case INDEX_op_andc_i64:
2681     case INDEX_op_orc_i32:
2682     case INDEX_op_orc_i64:
2683     case INDEX_op_eqv_i32:
2684     case INDEX_op_eqv_i64:
2685         return &r_r_rL;
2686
2687     case INDEX_op_shl_i32:
2688     case INDEX_op_shr_i32:
2689     case INDEX_op_sar_i32:
2690     case INDEX_op_rotl_i32:
2691     case INDEX_op_rotr_i32:
2692     case INDEX_op_shl_i64:
2693     case INDEX_op_shr_i64:
2694     case INDEX_op_sar_i64:
2695     case INDEX_op_rotl_i64:
2696     case INDEX_op_rotr_i64:
2697         return &r_r_ri;
2698
2699     case INDEX_op_clz_i32:
2700     case INDEX_op_ctz_i32:
2701     case INDEX_op_clz_i64:
2702     case INDEX_op_ctz_i64:
2703         return &r_r_rAL;
2704
2705     case INDEX_op_brcond_i32:
2706     case INDEX_op_brcond_i64:
2707         return &r_rA;
2708
2709     case INDEX_op_movcond_i32:
2710     case INDEX_op_movcond_i64:
2711         return &movc;
2712
2713     case INDEX_op_qemu_ld_i32:
2714     case INDEX_op_qemu_ld_i64:
2715         return &r_l;
2716     case INDEX_op_qemu_st_i32:
2717     case INDEX_op_qemu_st_i64:
2718         return &lZ_l;
2719
2720     case INDEX_op_deposit_i32:
2721     case INDEX_op_deposit_i64:
2722         return &dep;
2723
2724     case INDEX_op_extract2_i32:
2725     case INDEX_op_extract2_i64:
2726         return &ext2;
2727
2728     case INDEX_op_add2_i32:
2729     case INDEX_op_add2_i64:
2730     case INDEX_op_sub2_i32:
2731     case INDEX_op_sub2_i64:
2732         return &add2;
2733
2734     case INDEX_op_add_vec:
2735     case INDEX_op_sub_vec:
2736     case INDEX_op_mul_vec:
2737     case INDEX_op_xor_vec:
2738     case INDEX_op_ssadd_vec:
2739     case INDEX_op_sssub_vec:
2740     case INDEX_op_usadd_vec:
2741     case INDEX_op_ussub_vec:
2742     case INDEX_op_smax_vec:
2743     case INDEX_op_smin_vec:
2744     case INDEX_op_umax_vec:
2745     case INDEX_op_umin_vec:
2746     case INDEX_op_shlv_vec:
2747     case INDEX_op_shrv_vec:
2748     case INDEX_op_sarv_vec:
2749     case INDEX_op_aa64_sshl_vec:
2750         return &w_w_w;
2751     case INDEX_op_not_vec:
2752     case INDEX_op_neg_vec:
2753     case INDEX_op_abs_vec:
2754     case INDEX_op_shli_vec:
2755     case INDEX_op_shri_vec:
2756     case INDEX_op_sari_vec:
2757         return &w_w;
2758     case INDEX_op_ld_vec:
2759     case INDEX_op_st_vec:
2760     case INDEX_op_dupm_vec:
2761         return &w_r;
2762     case INDEX_op_dup_vec:
2763         return &w_wr;
2764     case INDEX_op_or_vec:
2765     case INDEX_op_andc_vec:
2766         return &w_w_wO;
2767     case INDEX_op_and_vec:
2768     case INDEX_op_orc_vec:
2769         return &w_w_wN;
2770     case INDEX_op_cmp_vec:
2771         return &w_w_wZ;
2772     case INDEX_op_bitsel_vec:
2773         return &w_w_w_w;
2774
2775     default:
2776         return NULL;
2777     }
2778 }
2779
2780 static void tcg_target_init(TCGContext *s)
2781 {
2782     tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2783     tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
2784     tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2785     tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
2786
2787     tcg_target_call_clobber_regs = -1ull;
2788     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2789     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2790     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2791     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2792     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2793     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2794     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2795     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2796     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2797     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2798     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
2799     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2800     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2801     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2802     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2803     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2804     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2805     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2806     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
2807
2808     s->reserved_regs = 0;
2809     tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2810     tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2811     tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2812     tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
2813     tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
2814 }
2815
2816 /* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)).  */
2817 #define PUSH_SIZE  ((30 - 19 + 1) * 8)
2818
2819 #define FRAME_SIZE \
2820     ((PUSH_SIZE \
2821       + TCG_STATIC_CALL_ARGS_SIZE \
2822       + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2823       + TCG_TARGET_STACK_ALIGN - 1) \
2824      & ~(TCG_TARGET_STACK_ALIGN - 1))
2825
2826 /* We're expecting a 2 byte uleb128 encoded value.  */
2827 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2828
2829 /* We're expecting to use a single ADDI insn.  */
2830 QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
2831
2832 static void tcg_target_qemu_prologue(TCGContext *s)
2833 {
2834     TCGReg r;
2835
2836     /* Push (FP, LR) and allocate space for all saved registers.  */
2837     tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
2838                  TCG_REG_SP, -PUSH_SIZE, 1, 1);
2839
2840     /* Set up frame pointer for canonical unwinding.  */
2841     tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
2842
2843     /* Store callee-preserved regs x19..x28.  */
2844     for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2845         int ofs = (r - TCG_REG_X19 + 2) * 8;
2846         tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2847     }
2848
2849     /* Make stack space for TCG locals.  */
2850     tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2851                  FRAME_SIZE - PUSH_SIZE);
2852
2853     /* Inform TCG about how to find TCG locals with register, offset, size.  */
2854     tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
2855                   CPU_TEMP_BUF_NLONGS * sizeof(long));
2856
2857 #if !defined(CONFIG_SOFTMMU)
2858     if (USE_GUEST_BASE) {
2859         tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
2860         tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
2861     }
2862 #endif
2863
2864     tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2865     tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
2866
2867     /*
2868      * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2869      * and fall through to the rest of the epilogue.
2870      */
2871     s->code_gen_epilogue = s->code_ptr;
2872     tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
2873
2874     /* TB epilogue */
2875     tb_ret_addr = s->code_ptr;
2876
2877     /* Remove TCG locals stack space.  */
2878     tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2879                  FRAME_SIZE - PUSH_SIZE);
2880
2881     /* Restore registers x19..x28.  */
2882     for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2883         int ofs = (r - TCG_REG_X19 + 2) * 8;
2884         tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2885     }
2886
2887     /* Pop (FP, LR), restore SP to previous frame.  */
2888     tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
2889                  TCG_REG_SP, PUSH_SIZE, 0, 1);
2890     tcg_out_insn(s, 3207, RET, TCG_REG_LR);
2891 }
2892
2893 static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2894 {
2895     int i;
2896     for (i = 0; i < count; ++i) {
2897         p[i] = NOP;
2898     }
2899 }
2900
2901 typedef struct {
2902     DebugFrameHeader h;
2903     uint8_t fde_def_cfa[4];
2904     uint8_t fde_reg_ofs[24];
2905 } DebugFrame;
2906
2907 #define ELF_HOST_MACHINE EM_AARCH64
2908
2909 static const DebugFrame debug_frame = {
2910     .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2911     .h.cie.id = -1,
2912     .h.cie.version = 1,
2913     .h.cie.code_align = 1,
2914     .h.cie.data_align = 0x78,             /* sleb128 -8 */
2915     .h.cie.return_column = TCG_REG_LR,
2916
2917     /* Total FDE size does not include the "len" member.  */
2918     .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2919
2920     .fde_def_cfa = {
2921         12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
2922         (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
2923         (FRAME_SIZE >> 7)
2924     },
2925     .fde_reg_ofs = {
2926         0x80 + 28, 1,                   /* DW_CFA_offset, x28,  -8 */
2927         0x80 + 27, 2,                   /* DW_CFA_offset, x27, -16 */
2928         0x80 + 26, 3,                   /* DW_CFA_offset, x26, -24 */
2929         0x80 + 25, 4,                   /* DW_CFA_offset, x25, -32 */
2930         0x80 + 24, 5,                   /* DW_CFA_offset, x24, -40 */
2931         0x80 + 23, 6,                   /* DW_CFA_offset, x23, -48 */
2932         0x80 + 22, 7,                   /* DW_CFA_offset, x22, -56 */
2933         0x80 + 21, 8,                   /* DW_CFA_offset, x21, -64 */
2934         0x80 + 20, 9,                   /* DW_CFA_offset, x20, -72 */
2935         0x80 + 19, 10,                  /* DW_CFA_offset, x1p, -80 */
2936         0x80 + 30, 11,                  /* DW_CFA_offset,  lr, -88 */
2937         0x80 + 29, 12,                  /* DW_CFA_offset,  fp, -96 */
2938     }
2939 };
2940
2941 void tcg_register_jit(void *buf, size_t buf_size)
2942 {
2943     tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2944 }