target/arm/tcg/translate-a64.c

   1 /*
   2  *  AArch64 translation
   3  *
   4  *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19 #include "qemu/osdep.h"
  20
  21 #include "exec/exec-all.h"
  22 #include "translate.h"
  23 #include "translate-a64.h"
  24 #include "qemu/log.h"
  25 #include "disas/disas.h"
  26 #include "arm_ldst.h"
  27 #include "semihosting/semihost.h"
  28 #include "cpregs.h"
  29
  30 static TCGv_i64 cpu_X[32];
  31 static TCGv_i64 cpu_pc;
  32
  33 /* Load/store exclusive handling */
  34 static TCGv_i64 cpu_exclusive_high;
  35
  36 static const char *regnames[] = {
  37     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
  38     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
  39     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
  40     "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
  41 };
  42
  43 enum a64_shift_type {
  44     A64_SHIFT_TYPE_LSL = 0,
  45     A64_SHIFT_TYPE_LSR = 1,
  46     A64_SHIFT_TYPE_ASR = 2,
  47     A64_SHIFT_TYPE_ROR = 3
  48 };
  49
  50 /*
  51  * Helpers for extracting complex instruction fields
  52  */
  53
  54 /*
  55  * For load/store with an unsigned 12 bit immediate scaled by the element
  56  * size. The input has the immediate field in bits [14:3] and the element
  57  * size in [2:0].
  58  */
  59 static int uimm_scaled(DisasContext *s, int x)
  60 {
  61     unsigned imm = x >> 3;
  62     unsigned scale = extract32(x, 0, 3);
  63     return imm << scale;
  64 }
  65
  66 /* For load/store memory tags: scale offset by LOG2_TAG_GRANULE */
  67 static int scale_by_log2_tag_granule(DisasContext *s, int x)
  68 {
  69     return x << LOG2_TAG_GRANULE;
  70 }
  71
  72 /*
  73  * Include the generated decoders.
  74  */
  75
  76 #include "decode-sme-fa64.c.inc"
  77 #include "decode-a64.c.inc"
  78
  79 /* Table based decoder typedefs - used when the relevant bits for decode
  80  * are too awkwardly scattered across the instruction (eg SIMD).
  81  */
  82 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
  83
  84 typedef struct AArch64DecodeTable {
  85     uint32_t pattern;
  86     uint32_t mask;
  87     AArch64DecodeFn *disas_fn;
  88 } AArch64DecodeTable;
  89
  90 /* initialize TCG globals.  */
  91 void a64_translate_init(void)
  92 {
  93     int i;
  94
  95     cpu_pc = tcg_global_mem_new_i64(tcg_env,
  96                                     offsetof(CPUARMState, pc),
  97                                     "pc");
  98     for (i = 0; i < 32; i++) {
  99         cpu_X[i] = tcg_global_mem_new_i64(tcg_env,
 100                                           offsetof(CPUARMState, xregs[i]),
 101                                           regnames[i]);
 102     }
 103
 104     cpu_exclusive_high = tcg_global_mem_new_i64(tcg_env,
 105         offsetof(CPUARMState, exclusive_high), "exclusive_high");
 106 }
 107
 108 /*
 109  * Return the core mmu_idx to use for A64 load/store insns which
 110  * have a "unprivileged load/store" variant. Those insns access
 111  * EL0 if executed from an EL which has control over EL0 (usually
 112  * EL1) but behave like normal loads and stores if executed from
 113  * elsewhere (eg EL3).
 114  *
 115  * @unpriv : true for the unprivileged encoding; false for the
 116  *           normal encoding (in which case we will return the same
 117  *           thing as get_mem_index().
 118  */
 119 static int get_a64_user_mem_index(DisasContext *s, bool unpriv)
 120 {
 121     /*
 122      * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL,
 123      * which is the usual mmu_idx for this cpu state.
 124      */
 125     ARMMMUIdx useridx = s->mmu_idx;
 126
 127     if (unpriv && s->unpriv) {
 128         /*
 129          * We have pre-computed the condition for AccType_UNPRIV.
 130          * Therefore we should never get here with a mmu_idx for
 131          * which we do not know the corresponding user mmu_idx.
 132          */
 133         switch (useridx) {
 134         case ARMMMUIdx_E10_1:
 135         case ARMMMUIdx_E10_1_PAN:
 136             useridx = ARMMMUIdx_E10_0;
 137             break;
 138         case ARMMMUIdx_E20_2:
 139         case ARMMMUIdx_E20_2_PAN:
 140             useridx = ARMMMUIdx_E20_0;
 141             break;
 142         default:
 143             g_assert_not_reached();
 144         }
 145     }
 146     return arm_to_core_mmu_idx(useridx);
 147 }
 148
 149 static void set_btype_raw(int val)
 150 {
 151     tcg_gen_st_i32(tcg_constant_i32(val), tcg_env,
 152                    offsetof(CPUARMState, btype));
 153 }
 154
 155 static void set_btype(DisasContext *s, int val)
 156 {
 157     /* BTYPE is a 2-bit field, and 0 should be done with reset_btype.  */
 158     tcg_debug_assert(val >= 1 && val <= 3);
 159     set_btype_raw(val);
 160     s->btype = -1;
 161 }
 162
 163 static void reset_btype(DisasContext *s)
 164 {
 165     if (s->btype != 0) {
 166         set_btype_raw(0);
 167         s->btype = 0;
 168     }
 169 }
 170
 171 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff)
 172 {
 173     assert(s->pc_save != -1);
 174     if (tb_cflags(s->base.tb) & CF_PCREL) {
 175         tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff);
 176     } else {
 177         tcg_gen_movi_i64(dest, s->pc_curr + diff);
 178     }
 179 }
 180
 181 void gen_a64_update_pc(DisasContext *s, target_long diff)
 182 {
 183     gen_pc_plus_diff(s, cpu_pc, diff);
 184     s->pc_save = s->pc_curr + diff;
 185 }
 186
 187 /*
 188  * Handle Top Byte Ignore (TBI) bits.
 189  *
 190  * If address tagging is enabled via the TCR TBI bits:
 191  *  + for EL2 and EL3 there is only one TBI bit, and if it is set
 192  *    then the address is zero-extended, clearing bits [63:56]
 193  *  + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
 194  *    and TBI1 controls addresses with bit 55 == 1.
 195  *    If the appropriate TBI bit is set for the address then
 196  *    the address is sign-extended from bit 55 into bits [63:56]
 197  *
 198  * Here We have concatenated TBI{1,0} into tbi.
 199  */
 200 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst,
 201                                 TCGv_i64 src, int tbi)
 202 {
 203     if (tbi == 0) {
 204         /* Load unmodified address */
 205         tcg_gen_mov_i64(dst, src);
 206     } else if (!regime_has_2_ranges(s->mmu_idx)) {
 207         /* Force tag byte to all zero */
 208         tcg_gen_extract_i64(dst, src, 0, 56);
 209     } else {
 210         /* Sign-extend from bit 55.  */
 211         tcg_gen_sextract_i64(dst, src, 0, 56);
 212
 213         switch (tbi) {
 214         case 1:
 215             /* tbi0 but !tbi1: only use the extension if positive */
 216             tcg_gen_and_i64(dst, dst, src);
 217             break;
 218         case 2:
 219             /* !tbi0 but tbi1: only use the extension if negative */
 220             tcg_gen_or_i64(dst, dst, src);
 221             break;
 222         case 3:
 223             /* tbi0 and tbi1: always use the extension */
 224             break;
 225         default:
 226             g_assert_not_reached();
 227         }
 228     }
 229 }
 230
 231 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
 232 {
 233     /*
 234      * If address tagging is enabled for instructions via the TCR TBI bits,
 235      * then loading an address into the PC will clear out any tag.
 236      */
 237     gen_top_byte_ignore(s, cpu_pc, src, s->tbii);
 238     s->pc_save = -1;
 239 }
 240
 241 /*
 242  * Handle MTE and/or TBI.
 243  *
 244  * For TBI, ideally, we would do nothing.  Proper behaviour on fault is
 245  * for the tag to be present in the FAR_ELx register.  But for user-only
 246  * mode we do not have a TLB with which to implement this, so we must
 247  * remove the top byte now.
 248  *
 249  * Always return a fresh temporary that we can increment independently
 250  * of the write-back address.
 251  */
 252
 253 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
 254 {
 255     TCGv_i64 clean = tcg_temp_new_i64();
 256 #ifdef CONFIG_USER_ONLY
 257     gen_top_byte_ignore(s, clean, addr, s->tbid);
 258 #else
 259     tcg_gen_mov_i64(clean, addr);
 260 #endif
 261     return clean;
 262 }
 263
 264 /* Insert a zero tag into src, with the result at dst. */
 265 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src)
 266 {
 267     tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4));
 268 }
 269
 270 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr,
 271                              MMUAccessType acc, int log2_size)
 272 {
 273     gen_helper_probe_access(tcg_env, ptr,
 274                             tcg_constant_i32(acc),
 275                             tcg_constant_i32(get_mem_index(s)),
 276                             tcg_constant_i32(1 << log2_size));
 277 }
 278
 279 /*
 280  * For MTE, check a single logical or atomic access.  This probes a single
 281  * address, the exact one specified.  The size and alignment of the access
 282  * is not relevant to MTE, per se, but watchpoints do require the size,
 283  * and we want to recognize those before making any other changes to state.
 284  */
 285 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr,
 286                                       bool is_write, bool tag_checked,
 287                                       MemOp memop, bool is_unpriv,
 288                                       int core_idx)
 289 {
 290     if (tag_checked && s->mte_active[is_unpriv]) {
 291         TCGv_i64 ret;
 292         int desc = 0;
 293
 294         desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx);
 295         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
 296         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
 297         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
 298         desc = FIELD_DP32(desc, MTEDESC, ALIGN, get_alignment_bits(memop));
 299         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, memop_size(memop) - 1);
 300
 301         ret = tcg_temp_new_i64();
 302         gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr);
 303
 304         return ret;
 305     }
 306     return clean_data_tbi(s, addr);
 307 }
 308
 309 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
 310                         bool tag_checked, MemOp memop)
 311 {
 312     return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, memop,
 313                                  false, get_mem_index(s));
 314 }
 315
 316 /*
 317  * For MTE, check multiple logical sequential accesses.
 318  */
 319 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
 320                         bool tag_checked, int total_size, MemOp single_mop)
 321 {
 322     if (tag_checked && s->mte_active[0]) {
 323         TCGv_i64 ret;
 324         int desc = 0;
 325
 326         desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
 327         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
 328         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
 329         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
 330         desc = FIELD_DP32(desc, MTEDESC, ALIGN, get_alignment_bits(single_mop));
 331         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, total_size - 1);
 332
 333         ret = tcg_temp_new_i64();
 334         gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr);
 335
 336         return ret;
 337     }
 338     return clean_data_tbi(s, addr);
 339 }
 340
 341 /*
 342  * Generate the special alignment check that applies to AccType_ATOMIC
 343  * and AccType_ORDERED insns under FEAT_LSE2: the access need not be
 344  * naturally aligned, but it must not cross a 16-byte boundary.
 345  * See AArch64.CheckAlignment().
 346  */
 347 static void check_lse2_align(DisasContext *s, int rn, int imm,
 348                              bool is_write, MemOp mop)
 349 {
 350     TCGv_i32 tmp;
 351     TCGv_i64 addr;
 352     TCGLabel *over_label;
 353     MMUAccessType type;
 354     int mmu_idx;
 355
 356     tmp = tcg_temp_new_i32();
 357     tcg_gen_extrl_i64_i32(tmp, cpu_reg_sp(s, rn));
 358     tcg_gen_addi_i32(tmp, tmp, imm & 15);
 359     tcg_gen_andi_i32(tmp, tmp, 15);
 360     tcg_gen_addi_i32(tmp, tmp, memop_size(mop));
 361
 362     over_label = gen_new_label();
 363     tcg_gen_brcondi_i32(TCG_COND_LEU, tmp, 16, over_label);
 364
 365     addr = tcg_temp_new_i64();
 366     tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm);
 367
 368     type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD,
 369     mmu_idx = get_mem_index(s);
 370     gen_helper_unaligned_access(tcg_env, addr, tcg_constant_i32(type),
 371                                 tcg_constant_i32(mmu_idx));
 372
 373     gen_set_label(over_label);
 374
 375 }
 376
 377 /* Handle the alignment check for AccType_ATOMIC instructions. */
 378 static MemOp check_atomic_align(DisasContext *s, int rn, MemOp mop)
 379 {
 380     MemOp size = mop & MO_SIZE;
 381
 382     if (size == MO_8) {
 383         return mop;
 384     }
 385
 386     /*
 387      * If size == MO_128, this is a LDXP, and the operation is single-copy
 388      * atomic for each doubleword, not the entire quadword; it still must
 389      * be quadword aligned.
 390      */
 391     if (size == MO_128) {
 392         return finalize_memop_atom(s, MO_128 | MO_ALIGN,
 393                                    MO_ATOM_IFALIGN_PAIR);
 394     }
 395     if (dc_isar_feature(aa64_lse2, s)) {
 396         check_lse2_align(s, rn, 0, true, mop);
 397     } else {
 398         mop |= MO_ALIGN;
 399     }
 400     return finalize_memop(s, mop);
 401 }
 402
 403 /* Handle the alignment check for AccType_ORDERED instructions. */
 404 static MemOp check_ordered_align(DisasContext *s, int rn, int imm,
 405                                  bool is_write, MemOp mop)
 406 {
 407     MemOp size = mop & MO_SIZE;
 408
 409     if (size == MO_8) {
 410         return mop;
 411     }
 412     if (size == MO_128) {
 413         return finalize_memop_atom(s, MO_128 | MO_ALIGN,
 414                                    MO_ATOM_IFALIGN_PAIR);
 415     }
 416     if (!dc_isar_feature(aa64_lse2, s)) {
 417         mop |= MO_ALIGN;
 418     } else if (!s->naa) {
 419         check_lse2_align(s, rn, imm, is_write, mop);
 420     }
 421     return finalize_memop(s, mop);
 422 }
 423
 424 typedef struct DisasCompare64 {
 425     TCGCond cond;
 426     TCGv_i64 value;
 427 } DisasCompare64;
 428
 429 static void a64_test_cc(DisasCompare64 *c64, int cc)
 430 {
 431     DisasCompare c32;
 432
 433     arm_test_cc(&c32, cc);
 434
 435     /*
 436      * Sign-extend the 32-bit value so that the GE/LT comparisons work
 437      * properly.  The NE/EQ comparisons are also fine with this choice.
 438       */
 439     c64->cond = c32.cond;
 440     c64->value = tcg_temp_new_i64();
 441     tcg_gen_ext_i32_i64(c64->value, c32.value);
 442 }
 443
 444 static void gen_rebuild_hflags(DisasContext *s)
 445 {
 446     gen_helper_rebuild_hflags_a64(tcg_env, tcg_constant_i32(s->current_el));
 447 }
 448
 449 static void gen_exception_internal(int excp)
 450 {
 451     assert(excp_is_internal(excp));
 452     gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp));
 453 }
 454
 455 static void gen_exception_internal_insn(DisasContext *s, int excp)
 456 {
 457     gen_a64_update_pc(s, 0);
 458     gen_exception_internal(excp);
 459     s->base.is_jmp = DISAS_NORETURN;
 460 }
 461
 462 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome)
 463 {
 464     gen_a64_update_pc(s, 0);
 465     gen_helper_exception_bkpt_insn(tcg_env, tcg_constant_i32(syndrome));
 466     s->base.is_jmp = DISAS_NORETURN;
 467 }
 468
 469 static void gen_step_complete_exception(DisasContext *s)
 470 {
 471     /* We just completed step of an insn. Move from Active-not-pending
 472      * to Active-pending, and then also take the swstep exception.
 473      * This corresponds to making the (IMPDEF) choice to prioritize
 474      * swstep exceptions over asynchronous exceptions taken to an exception
 475      * level where debug is disabled. This choice has the advantage that
 476      * we do not need to maintain internal state corresponding to the
 477      * ISV/EX syndrome bits between completion of the step and generation
 478      * of the exception, and our syndrome information is always correct.
 479      */
 480     gen_ss_advance(s);
 481     gen_swstep_exception(s, 1, s->is_ldex);
 482     s->base.is_jmp = DISAS_NORETURN;
 483 }
 484
 485 static inline bool use_goto_tb(DisasContext *s, uint64_t dest)
 486 {
 487     if (s->ss_active) {
 488         return false;
 489     }
 490     return translator_use_goto_tb(&s->base, dest);
 491 }
 492
 493 static void gen_goto_tb(DisasContext *s, int n, int64_t diff)
 494 {
 495     if (use_goto_tb(s, s->pc_curr + diff)) {
 496         /*
 497          * For pcrel, the pc must always be up-to-date on entry to
 498          * the linked TB, so that it can use simple additions for all
 499          * further adjustments.  For !pcrel, the linked TB is compiled
 500          * to know its full virtual address, so we can delay the
 501          * update to pc to the unlinked path.  A long chain of links
 502          * can thus avoid many updates to the PC.
 503          */
 504         if (tb_cflags(s->base.tb) & CF_PCREL) {
 505             gen_a64_update_pc(s, diff);
 506             tcg_gen_goto_tb(n);
 507         } else {
 508             tcg_gen_goto_tb(n);
 509             gen_a64_update_pc(s, diff);
 510         }
 511         tcg_gen_exit_tb(s->base.tb, n);
 512         s->base.is_jmp = DISAS_NORETURN;
 513     } else {
 514         gen_a64_update_pc(s, diff);
 515         if (s->ss_active) {
 516             gen_step_complete_exception(s);
 517         } else {
 518             tcg_gen_lookup_and_goto_ptr();
 519             s->base.is_jmp = DISAS_NORETURN;
 520         }
 521     }
 522 }
 523
 524 /*
 525  * Register access functions
 526  *
 527  * These functions are used for directly accessing a register in where
 528  * changes to the final register value are likely to be made. If you
 529  * need to use a register for temporary calculation (e.g. index type
 530  * operations) use the read_* form.
 531  *
 532  * B1.2.1 Register mappings
 533  *
 534  * In instruction register encoding 31 can refer to ZR (zero register) or
 535  * the SP (stack pointer) depending on context. In QEMU's case we map SP
 536  * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
 537  * This is the point of the _sp forms.
 538  */
 539 TCGv_i64 cpu_reg(DisasContext *s, int reg)
 540 {
 541     if (reg == 31) {
 542         TCGv_i64 t = tcg_temp_new_i64();
 543         tcg_gen_movi_i64(t, 0);
 544         return t;
 545     } else {
 546         return cpu_X[reg];
 547     }
 548 }
 549
 550 /* register access for when 31 == SP */
 551 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
 552 {
 553     return cpu_X[reg];
 554 }
 555
 556 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
 557  * representing the register contents. This TCGv is an auto-freed
 558  * temporary so it need not be explicitly freed, and may be modified.
 559  */
 560 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
 561 {
 562     TCGv_i64 v = tcg_temp_new_i64();
 563     if (reg != 31) {
 564         if (sf) {
 565             tcg_gen_mov_i64(v, cpu_X[reg]);
 566         } else {
 567             tcg_gen_ext32u_i64(v, cpu_X[reg]);
 568         }
 569     } else {
 570         tcg_gen_movi_i64(v, 0);
 571     }
 572     return v;
 573 }
 574
 575 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
 576 {
 577     TCGv_i64 v = tcg_temp_new_i64();
 578     if (sf) {
 579         tcg_gen_mov_i64(v, cpu_X[reg]);
 580     } else {
 581         tcg_gen_ext32u_i64(v, cpu_X[reg]);
 582     }
 583     return v;
 584 }
 585
 586 /* Return the offset into CPUARMState of a slice (from
 587  * the least significant end) of FP register Qn (ie
 588  * Dn, Sn, Hn or Bn).
 589  * (Note that this is not the same mapping as for A32; see cpu.h)
 590  */
 591 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size)
 592 {
 593     return vec_reg_offset(s, regno, 0, size);
 594 }
 595
 596 /* Offset of the high half of the 128 bit vector Qn */
 597 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
 598 {
 599     return vec_reg_offset(s, regno, 1, MO_64);
 600 }
 601
 602 /* Convenience accessors for reading and writing single and double
 603  * FP registers. Writing clears the upper parts of the associated
 604  * 128 bit vector register, as required by the architecture.
 605  * Note that unlike the GP register accessors, the values returned
 606  * by the read functions must be manually freed.
 607  */
 608 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
 609 {
 610     TCGv_i64 v = tcg_temp_new_i64();
 611
 612     tcg_gen_ld_i64(v, tcg_env, fp_reg_offset(s, reg, MO_64));
 613     return v;
 614 }
 615
 616 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
 617 {
 618     TCGv_i32 v = tcg_temp_new_i32();
 619
 620     tcg_gen_ld_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32));
 621     return v;
 622 }
 623
 624 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
 625 {
 626     TCGv_i32 v = tcg_temp_new_i32();
 627
 628     tcg_gen_ld16u_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16));
 629     return v;
 630 }
 631
 632 /* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
 633  * If SVE is not enabled, then there are only 128 bits in the vector.
 634  */
 635 static void clear_vec_high(DisasContext *s, bool is_q, int rd)
 636 {
 637     unsigned ofs = fp_reg_offset(s, rd, MO_64);
 638     unsigned vsz = vec_full_reg_size(s);
 639
 640     /* Nop move, with side effect of clearing the tail. */
 641     tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz);
 642 }
 643
 644 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
 645 {
 646     unsigned ofs = fp_reg_offset(s, reg, MO_64);
 647
 648     tcg_gen_st_i64(v, tcg_env, ofs);
 649     clear_vec_high(s, false, reg);
 650 }
 651
 652 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
 653 {
 654     TCGv_i64 tmp = tcg_temp_new_i64();
 655
 656     tcg_gen_extu_i32_i64(tmp, v);
 657     write_fp_dreg(s, reg, tmp);
 658 }
 659
 660 /* Expand a 2-operand AdvSIMD vector operation using an expander function.  */
 661 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
 662                          GVecGen2Fn *gvec_fn, int vece)
 663 {
 664     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 665             is_q ? 16 : 8, vec_full_reg_size(s));
 666 }
 667
 668 /* Expand a 2-operand + immediate AdvSIMD vector operation using
 669  * an expander function.
 670  */
 671 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn,
 672                           int64_t imm, GVecGen2iFn *gvec_fn, int vece)
 673 {
 674     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 675             imm, is_q ? 16 : 8, vec_full_reg_size(s));
 676 }
 677
 678 /* Expand a 3-operand AdvSIMD vector operation using an expander function.  */
 679 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
 680                          GVecGen3Fn *gvec_fn, int vece)
 681 {
 682     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 683             vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
 684 }
 685
 686 /* Expand a 4-operand AdvSIMD vector operation using an expander function.  */
 687 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
 688                          int rx, GVecGen4Fn *gvec_fn, int vece)
 689 {
 690     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 691             vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx),
 692             is_q ? 16 : 8, vec_full_reg_size(s));
 693 }
 694
 695 /* Expand a 2-operand operation using an out-of-line helper.  */
 696 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd,
 697                              int rn, int data, gen_helper_gvec_2 *fn)
 698 {
 699     tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
 700                        vec_full_reg_offset(s, rn),
 701                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
 702 }
 703
 704 /* Expand a 3-operand operation using an out-of-line helper.  */
 705 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
 706                              int rn, int rm, int data, gen_helper_gvec_3 *fn)
 707 {
 708     tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
 709                        vec_full_reg_offset(s, rn),
 710                        vec_full_reg_offset(s, rm),
 711                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
 712 }
 713
 714 /* Expand a 3-operand + fpstatus pointer + simd data value operation using
 715  * an out-of-line helper.
 716  */
 717 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
 718                               int rm, bool is_fp16, int data,
 719                               gen_helper_gvec_3_ptr *fn)
 720 {
 721     TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
 722     tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
 723                        vec_full_reg_offset(s, rn),
 724                        vec_full_reg_offset(s, rm), fpst,
 725                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
 726 }
 727
 728 /* Expand a 3-operand + qc + operation using an out-of-line helper.  */
 729 static void gen_gvec_op3_qc(DisasContext *s, bool is_q, int rd, int rn,
 730                             int rm, gen_helper_gvec_3_ptr *fn)
 731 {
 732     TCGv_ptr qc_ptr = tcg_temp_new_ptr();
 733
 734     tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc));
 735     tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
 736                        vec_full_reg_offset(s, rn),
 737                        vec_full_reg_offset(s, rm), qc_ptr,
 738                        is_q ? 16 : 8, vec_full_reg_size(s), 0, fn);
 739 }
 740
 741 /* Expand a 4-operand operation using an out-of-line helper.  */
 742 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn,
 743                              int rm, int ra, int data, gen_helper_gvec_4 *fn)
 744 {
 745     tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
 746                        vec_full_reg_offset(s, rn),
 747                        vec_full_reg_offset(s, rm),
 748                        vec_full_reg_offset(s, ra),
 749                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
 750 }
 751
 752 /*
 753  * Expand a 4-operand + fpstatus pointer + simd data value operation using
 754  * an out-of-line helper.
 755  */
 756 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn,
 757                               int rm, int ra, bool is_fp16, int data,
 758                               gen_helper_gvec_4_ptr *fn)
 759 {
 760     TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
 761     tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
 762                        vec_full_reg_offset(s, rn),
 763                        vec_full_reg_offset(s, rm),
 764                        vec_full_reg_offset(s, ra), fpst,
 765                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
 766 }
 767
 768 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
 769  * than the 32 bit equivalent.
 770  */
 771 static inline void gen_set_NZ64(TCGv_i64 result)
 772 {
 773     tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
 774     tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
 775 }
 776
 777 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
 778 static inline void gen_logic_CC(int sf, TCGv_i64 result)
 779 {
 780     if (sf) {
 781         gen_set_NZ64(result);
 782     } else {
 783         tcg_gen_extrl_i64_i32(cpu_ZF, result);
 784         tcg_gen_mov_i32(cpu_NF, cpu_ZF);
 785     }
 786     tcg_gen_movi_i32(cpu_CF, 0);
 787     tcg_gen_movi_i32(cpu_VF, 0);
 788 }
 789
 790 /* dest = T0 + T1; compute C, N, V and Z flags */
 791 static void gen_add64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 792 {
 793     TCGv_i64 result, flag, tmp;
 794     result = tcg_temp_new_i64();
 795     flag = tcg_temp_new_i64();
 796     tmp = tcg_temp_new_i64();
 797
 798     tcg_gen_movi_i64(tmp, 0);
 799     tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
 800
 801     tcg_gen_extrl_i64_i32(cpu_CF, flag);
 802
 803     gen_set_NZ64(result);
 804
 805     tcg_gen_xor_i64(flag, result, t0);
 806     tcg_gen_xor_i64(tmp, t0, t1);
 807     tcg_gen_andc_i64(flag, flag, tmp);
 808     tcg_gen_extrh_i64_i32(cpu_VF, flag);
 809
 810     tcg_gen_mov_i64(dest, result);
 811 }
 812
 813 static void gen_add32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 814 {
 815     TCGv_i32 t0_32 = tcg_temp_new_i32();
 816     TCGv_i32 t1_32 = tcg_temp_new_i32();
 817     TCGv_i32 tmp = tcg_temp_new_i32();
 818
 819     tcg_gen_movi_i32(tmp, 0);
 820     tcg_gen_extrl_i64_i32(t0_32, t0);
 821     tcg_gen_extrl_i64_i32(t1_32, t1);
 822     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
 823     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 824     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 825     tcg_gen_xor_i32(tmp, t0_32, t1_32);
 826     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 827     tcg_gen_extu_i32_i64(dest, cpu_NF);
 828 }
 829
 830 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 831 {
 832     if (sf) {
 833         gen_add64_CC(dest, t0, t1);
 834     } else {
 835         gen_add32_CC(dest, t0, t1);
 836     }
 837 }
 838
 839 /* dest = T0 - T1; compute C, N, V and Z flags */
 840 static void gen_sub64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 841 {
 842     /* 64 bit arithmetic */
 843     TCGv_i64 result, flag, tmp;
 844
 845     result = tcg_temp_new_i64();
 846     flag = tcg_temp_new_i64();
 847     tcg_gen_sub_i64(result, t0, t1);
 848
 849     gen_set_NZ64(result);
 850
 851     tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
 852     tcg_gen_extrl_i64_i32(cpu_CF, flag);
 853
 854     tcg_gen_xor_i64(flag, result, t0);
 855     tmp = tcg_temp_new_i64();
 856     tcg_gen_xor_i64(tmp, t0, t1);
 857     tcg_gen_and_i64(flag, flag, tmp);
 858     tcg_gen_extrh_i64_i32(cpu_VF, flag);
 859     tcg_gen_mov_i64(dest, result);
 860 }
 861
 862 static void gen_sub32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 863 {
 864     /* 32 bit arithmetic */
 865     TCGv_i32 t0_32 = tcg_temp_new_i32();
 866     TCGv_i32 t1_32 = tcg_temp_new_i32();
 867     TCGv_i32 tmp;
 868
 869     tcg_gen_extrl_i64_i32(t0_32, t0);
 870     tcg_gen_extrl_i64_i32(t1_32, t1);
 871     tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
 872     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 873     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
 874     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 875     tmp = tcg_temp_new_i32();
 876     tcg_gen_xor_i32(tmp, t0_32, t1_32);
 877     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 878     tcg_gen_extu_i32_i64(dest, cpu_NF);
 879 }
 880
 881 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 882 {
 883     if (sf) {
 884         gen_sub64_CC(dest, t0, t1);
 885     } else {
 886         gen_sub32_CC(dest, t0, t1);
 887     }
 888 }
 889
 890 /* dest = T0 + T1 + CF; do not compute flags. */
 891 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 892 {
 893     TCGv_i64 flag = tcg_temp_new_i64();
 894     tcg_gen_extu_i32_i64(flag, cpu_CF);
 895     tcg_gen_add_i64(dest, t0, t1);
 896     tcg_gen_add_i64(dest, dest, flag);
 897
 898     if (!sf) {
 899         tcg_gen_ext32u_i64(dest, dest);
 900     }
 901 }
 902
 903 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
 904 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 905 {
 906     if (sf) {
 907         TCGv_i64 result = tcg_temp_new_i64();
 908         TCGv_i64 cf_64 = tcg_temp_new_i64();
 909         TCGv_i64 vf_64 = tcg_temp_new_i64();
 910         TCGv_i64 tmp = tcg_temp_new_i64();
 911         TCGv_i64 zero = tcg_constant_i64(0);
 912
 913         tcg_gen_extu_i32_i64(cf_64, cpu_CF);
 914         tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero);
 915         tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero);
 916         tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
 917         gen_set_NZ64(result);
 918
 919         tcg_gen_xor_i64(vf_64, result, t0);
 920         tcg_gen_xor_i64(tmp, t0, t1);
 921         tcg_gen_andc_i64(vf_64, vf_64, tmp);
 922         tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
 923
 924         tcg_gen_mov_i64(dest, result);
 925     } else {
 926         TCGv_i32 t0_32 = tcg_temp_new_i32();
 927         TCGv_i32 t1_32 = tcg_temp_new_i32();
 928         TCGv_i32 tmp = tcg_temp_new_i32();
 929         TCGv_i32 zero = tcg_constant_i32(0);
 930
 931         tcg_gen_extrl_i64_i32(t0_32, t0);
 932         tcg_gen_extrl_i64_i32(t1_32, t1);
 933         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero);
 934         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero);
 935
 936         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 937         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 938         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 939         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 940         tcg_gen_extu_i32_i64(dest, cpu_NF);
 941     }
 942 }
 943
 944 /*
 945  * Load/Store generators
 946  */
 947
 948 /*
 949  * Store from GPR register to memory.
 950  */
 951 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
 952                              TCGv_i64 tcg_addr, MemOp memop, int memidx,
 953                              bool iss_valid,
 954                              unsigned int iss_srt,
 955                              bool iss_sf, bool iss_ar)
 956 {
 957     tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop);
 958
 959     if (iss_valid) {
 960         uint32_t syn;
 961
 962         syn = syn_data_abort_with_iss(0,
 963                                       (memop & MO_SIZE),
 964                                       false,
 965                                       iss_srt,
 966                                       iss_sf,
 967                                       iss_ar,
 968                                       0, 0, 0, 0, 0, false);
 969         disas_set_insn_syndrome(s, syn);
 970     }
 971 }
 972
 973 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
 974                       TCGv_i64 tcg_addr, MemOp memop,
 975                       bool iss_valid,
 976                       unsigned int iss_srt,
 977                       bool iss_sf, bool iss_ar)
 978 {
 979     do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s),
 980                      iss_valid, iss_srt, iss_sf, iss_ar);
 981 }
 982
 983 /*
 984  * Load from memory to GPR register
 985  */
 986 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
 987                              MemOp memop, bool extend, int memidx,
 988                              bool iss_valid, unsigned int iss_srt,
 989                              bool iss_sf, bool iss_ar)
 990 {
 991     tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
 992
 993     if (extend && (memop & MO_SIGN)) {
 994         g_assert((memop & MO_SIZE) <= MO_32);
 995         tcg_gen_ext32u_i64(dest, dest);
 996     }
 997
 998     if (iss_valid) {
 999         uint32_t syn;
1000
1001         syn = syn_data_abort_with_iss(0,
1002                                       (memop & MO_SIZE),
1003                                       (memop & MO_SIGN) != 0,
1004                                       iss_srt,
1005                                       iss_sf,
1006                                       iss_ar,
1007                                       0, 0, 0, 0, 0, false);
1008         disas_set_insn_syndrome(s, syn);
1009     }
1010 }
1011
1012 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
1013                       MemOp memop, bool extend,
1014                       bool iss_valid, unsigned int iss_srt,
1015                       bool iss_sf, bool iss_ar)
1016 {
1017     do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s),
1018                      iss_valid, iss_srt, iss_sf, iss_ar);
1019 }
1020
1021 /*
1022  * Store from FP register to memory
1023  */
1024 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, MemOp mop)
1025 {
1026     /* This writes the bottom N bits of a 128 bit wide vector to memory */
1027     TCGv_i64 tmplo = tcg_temp_new_i64();
1028
1029     tcg_gen_ld_i64(tmplo, tcg_env, fp_reg_offset(s, srcidx, MO_64));
1030
1031     if ((mop & MO_SIZE) < MO_128) {
1032         tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop);
1033     } else {
1034         TCGv_i64 tmphi = tcg_temp_new_i64();
1035         TCGv_i128 t16 = tcg_temp_new_i128();
1036
1037         tcg_gen_ld_i64(tmphi, tcg_env, fp_reg_hi_offset(s, srcidx));
1038         tcg_gen_concat_i64_i128(t16, tmplo, tmphi);
1039
1040         tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop);
1041     }
1042 }
1043
1044 /*
1045  * Load from memory to FP register
1046  */
1047 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, MemOp mop)
1048 {
1049     /* This always zero-extends and writes to a full 128 bit wide vector */
1050     TCGv_i64 tmplo = tcg_temp_new_i64();
1051     TCGv_i64 tmphi = NULL;
1052
1053     if ((mop & MO_SIZE) < MO_128) {
1054         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop);
1055     } else {
1056         TCGv_i128 t16 = tcg_temp_new_i128();
1057
1058         tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop);
1059
1060         tmphi = tcg_temp_new_i64();
1061         tcg_gen_extr_i128_i64(tmplo, tmphi, t16);
1062     }
1063
1064     tcg_gen_st_i64(tmplo, tcg_env, fp_reg_offset(s, destidx, MO_64));
1065
1066     if (tmphi) {
1067         tcg_gen_st_i64(tmphi, tcg_env, fp_reg_hi_offset(s, destidx));
1068     }
1069     clear_vec_high(s, tmphi != NULL, destidx);
1070 }
1071
1072 /*
1073  * Vector load/store helpers.
1074  *
1075  * The principal difference between this and a FP load is that we don't
1076  * zero extend as we are filling a partial chunk of the vector register.
1077  * These functions don't support 128 bit loads/stores, which would be
1078  * normal load/store operations.
1079  *
1080  * The _i32 versions are useful when operating on 32 bit quantities
1081  * (eg for floating point single or using Neon helper functions).
1082  */
1083
1084 /* Get value of an element within a vector register */
1085 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
1086                              int element, MemOp memop)
1087 {
1088     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1089     switch ((unsigned)memop) {
1090     case MO_8:
1091         tcg_gen_ld8u_i64(tcg_dest, tcg_env, vect_off);
1092         break;
1093     case MO_16:
1094         tcg_gen_ld16u_i64(tcg_dest, tcg_env, vect_off);
1095         break;
1096     case MO_32:
1097         tcg_gen_ld32u_i64(tcg_dest, tcg_env, vect_off);
1098         break;
1099     case MO_8|MO_SIGN:
1100         tcg_gen_ld8s_i64(tcg_dest, tcg_env, vect_off);
1101         break;
1102     case MO_16|MO_SIGN:
1103         tcg_gen_ld16s_i64(tcg_dest, tcg_env, vect_off);
1104         break;
1105     case MO_32|MO_SIGN:
1106         tcg_gen_ld32s_i64(tcg_dest, tcg_env, vect_off);
1107         break;
1108     case MO_64:
1109     case MO_64|MO_SIGN:
1110         tcg_gen_ld_i64(tcg_dest, tcg_env, vect_off);
1111         break;
1112     default:
1113         g_assert_not_reached();
1114     }
1115 }
1116
1117 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
1118                                  int element, MemOp memop)
1119 {
1120     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1121     switch (memop) {
1122     case MO_8:
1123         tcg_gen_ld8u_i32(tcg_dest, tcg_env, vect_off);
1124         break;
1125     case MO_16:
1126         tcg_gen_ld16u_i32(tcg_dest, tcg_env, vect_off);
1127         break;
1128     case MO_8|MO_SIGN:
1129         tcg_gen_ld8s_i32(tcg_dest, tcg_env, vect_off);
1130         break;
1131     case MO_16|MO_SIGN:
1132         tcg_gen_ld16s_i32(tcg_dest, tcg_env, vect_off);
1133         break;
1134     case MO_32:
1135     case MO_32|MO_SIGN:
1136         tcg_gen_ld_i32(tcg_dest, tcg_env, vect_off);
1137         break;
1138     default:
1139         g_assert_not_reached();
1140     }
1141 }
1142
1143 /* Set value of an element within a vector register */
1144 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1145                               int element, MemOp memop)
1146 {
1147     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1148     switch (memop) {
1149     case MO_8:
1150         tcg_gen_st8_i64(tcg_src, tcg_env, vect_off);
1151         break;
1152     case MO_16:
1153         tcg_gen_st16_i64(tcg_src, tcg_env, vect_off);
1154         break;
1155     case MO_32:
1156         tcg_gen_st32_i64(tcg_src, tcg_env, vect_off);
1157         break;
1158     case MO_64:
1159         tcg_gen_st_i64(tcg_src, tcg_env, vect_off);
1160         break;
1161     default:
1162         g_assert_not_reached();
1163     }
1164 }
1165
1166 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1167                                   int destidx, int element, MemOp memop)
1168 {
1169     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1170     switch (memop) {
1171     case MO_8:
1172         tcg_gen_st8_i32(tcg_src, tcg_env, vect_off);
1173         break;
1174     case MO_16:
1175         tcg_gen_st16_i32(tcg_src, tcg_env, vect_off);
1176         break;
1177     case MO_32:
1178         tcg_gen_st_i32(tcg_src, tcg_env, vect_off);
1179         break;
1180     default:
1181         g_assert_not_reached();
1182     }
1183 }
1184
1185 /* Store from vector register to memory */
1186 static void do_vec_st(DisasContext *s, int srcidx, int element,
1187                       TCGv_i64 tcg_addr, MemOp mop)
1188 {
1189     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1190
1191     read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE);
1192     tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1193 }
1194
1195 /* Load from memory to vector register */
1196 static void do_vec_ld(DisasContext *s, int destidx, int element,
1197                       TCGv_i64 tcg_addr, MemOp mop)
1198 {
1199     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1200
1201     tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1202     write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE);
1203 }
1204
1205 /* Check that FP/Neon access is enabled. If it is, return
1206  * true. If not, emit code to generate an appropriate exception,
1207  * and return false; the caller should not emit any code for
1208  * the instruction. Note that this check must happen after all
1209  * unallocated-encoding checks (otherwise the syndrome information
1210  * for the resulting exception will be incorrect).
1211  */
1212 static bool fp_access_check_only(DisasContext *s)
1213 {
1214     if (s->fp_excp_el) {
1215         assert(!s->fp_access_checked);
1216         s->fp_access_checked = true;
1217
1218         gen_exception_insn_el(s, 0, EXCP_UDEF,
1219                               syn_fp_access_trap(1, 0xe, false, 0),
1220                               s->fp_excp_el);
1221         return false;
1222     }
1223     s->fp_access_checked = true;
1224     return true;
1225 }
1226
1227 static bool fp_access_check(DisasContext *s)
1228 {
1229     if (!fp_access_check_only(s)) {
1230         return false;
1231     }
1232     if (s->sme_trap_nonstreaming && s->is_nonstreaming) {
1233         gen_exception_insn(s, 0, EXCP_UDEF,
1234                            syn_smetrap(SME_ET_Streaming, false));
1235         return false;
1236     }
1237     return true;
1238 }
1239
1240 /*
1241  * Check that SVE access is enabled.  If it is, return true.
1242  * If not, emit code to generate an appropriate exception and return false.
1243  * This function corresponds to CheckSVEEnabled().
1244  */
1245 bool sve_access_check(DisasContext *s)
1246 {
1247     if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) {
1248         assert(dc_isar_feature(aa64_sme, s));
1249         if (!sme_sm_enabled_check(s)) {
1250             goto fail_exit;
1251         }
1252     } else if (s->sve_excp_el) {
1253         gen_exception_insn_el(s, 0, EXCP_UDEF,
1254                               syn_sve_access_trap(), s->sve_excp_el);
1255         goto fail_exit;
1256     }
1257     s->sve_access_checked = true;
1258     return fp_access_check(s);
1259
1260  fail_exit:
1261     /* Assert that we only raise one exception per instruction. */
1262     assert(!s->sve_access_checked);
1263     s->sve_access_checked = true;
1264     return false;
1265 }
1266
1267 /*
1268  * Check that SME access is enabled, raise an exception if not.
1269  * Note that this function corresponds to CheckSMEAccess and is
1270  * only used directly for cpregs.
1271  */
1272 static bool sme_access_check(DisasContext *s)
1273 {
1274     if (s->sme_excp_el) {
1275         gen_exception_insn_el(s, 0, EXCP_UDEF,
1276                               syn_smetrap(SME_ET_AccessTrap, false),
1277                               s->sme_excp_el);
1278         return false;
1279     }
1280     return true;
1281 }
1282
1283 /* This function corresponds to CheckSMEEnabled. */
1284 bool sme_enabled_check(DisasContext *s)
1285 {
1286     /*
1287      * Note that unlike sve_excp_el, we have not constrained sme_excp_el
1288      * to be zero when fp_excp_el has priority.  This is because we need
1289      * sme_excp_el by itself for cpregs access checks.
1290      */
1291     if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) {
1292         s->fp_access_checked = true;
1293         return sme_access_check(s);
1294     }
1295     return fp_access_check_only(s);
1296 }
1297
1298 /* Common subroutine for CheckSMEAnd*Enabled. */
1299 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req)
1300 {
1301     if (!sme_enabled_check(s)) {
1302         return false;
1303     }
1304     if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) {
1305         gen_exception_insn(s, 0, EXCP_UDEF,
1306                            syn_smetrap(SME_ET_NotStreaming, false));
1307         return false;
1308     }
1309     if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) {
1310         gen_exception_insn(s, 0, EXCP_UDEF,
1311                            syn_smetrap(SME_ET_InactiveZA, false));
1312         return false;
1313     }
1314     return true;
1315 }
1316
1317 /*
1318  * This utility function is for doing register extension with an
1319  * optional shift. You will likely want to pass a temporary for the
1320  * destination register. See DecodeRegExtend() in the ARM ARM.
1321  */
1322 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1323                               int option, unsigned int shift)
1324 {
1325     int extsize = extract32(option, 0, 2);
1326     bool is_signed = extract32(option, 2, 1);
1327
1328     tcg_gen_ext_i64(tcg_out, tcg_in, extsize | (is_signed ? MO_SIGN : 0));
1329     tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1330 }
1331
1332 static inline void gen_check_sp_alignment(DisasContext *s)
1333 {
1334     /* The AArch64 architecture mandates that (if enabled via PSTATE
1335      * or SCTLR bits) there is a check that SP is 16-aligned on every
1336      * SP-relative load or store (with an exception generated if it is not).
1337      * In line with general QEMU practice regarding misaligned accesses,
1338      * we omit these checks for the sake of guest program performance.
1339      * This function is provided as a hook so we can more easily add these
1340      * checks in future (possibly as a "favour catching guest program bugs
1341      * over speed" user selectable option).
1342      */
1343 }
1344
1345 /*
1346  * This provides a simple table based table lookup decoder. It is
1347  * intended to be used when the relevant bits for decode are too
1348  * awkwardly placed and switch/if based logic would be confusing and
1349  * deeply nested. Since it's a linear search through the table, tables
1350  * should be kept small.
1351  *
1352  * It returns the first handler where insn & mask == pattern, or
1353  * NULL if there is no match.
1354  * The table is terminated by an empty mask (i.e. 0)
1355  */
1356 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1357                                                uint32_t insn)
1358 {
1359     const AArch64DecodeTable *tptr = table;
1360
1361     while (tptr->mask) {
1362         if ((insn & tptr->mask) == tptr->pattern) {
1363             return tptr->disas_fn;
1364         }
1365         tptr++;
1366     }
1367     return NULL;
1368 }
1369
1370 /*
1371  * The instruction disassembly implemented here matches
1372  * the instruction encoding classifications in chapter C4
1373  * of the ARM Architecture Reference Manual (DDI0487B_a);
1374  * classification names and decode diagrams here should generally
1375  * match up with those in the manual.
1376  */
1377
1378 static bool trans_B(DisasContext *s, arg_i *a)
1379 {
1380     reset_btype(s);
1381     gen_goto_tb(s, 0, a->imm);
1382     return true;
1383 }
1384
1385 static bool trans_BL(DisasContext *s, arg_i *a)
1386 {
1387     gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s));
1388     reset_btype(s);
1389     gen_goto_tb(s, 0, a->imm);
1390     return true;
1391 }
1392
1393
1394 static bool trans_CBZ(DisasContext *s, arg_cbz *a)
1395 {
1396     DisasLabel match;
1397     TCGv_i64 tcg_cmp;
1398
1399     tcg_cmp = read_cpu_reg(s, a->rt, a->sf);
1400     reset_btype(s);
1401
1402     match = gen_disas_label(s);
1403     tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1404                         tcg_cmp, 0, match.label);
1405     gen_goto_tb(s, 0, 4);
1406     set_disas_label(s, match);
1407     gen_goto_tb(s, 1, a->imm);
1408     return true;
1409 }
1410
1411 static bool trans_TBZ(DisasContext *s, arg_tbz *a)
1412 {
1413     DisasLabel match;
1414     TCGv_i64 tcg_cmp;
1415
1416     tcg_cmp = tcg_temp_new_i64();
1417     tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, a->rt), 1ULL << a->bitpos);
1418
1419     reset_btype(s);
1420
1421     match = gen_disas_label(s);
1422     tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1423                         tcg_cmp, 0, match.label);
1424     gen_goto_tb(s, 0, 4);
1425     set_disas_label(s, match);
1426     gen_goto_tb(s, 1, a->imm);
1427     return true;
1428 }
1429
1430 static bool trans_B_cond(DisasContext *s, arg_B_cond *a)
1431 {
1432     /* BC.cond is only present with FEAT_HBC */
1433     if (a->c && !dc_isar_feature(aa64_hbc, s)) {
1434         return false;
1435     }
1436     reset_btype(s);
1437     if (a->cond < 0x0e) {
1438         /* genuinely conditional branches */
1439         DisasLabel match = gen_disas_label(s);
1440         arm_gen_test_cc(a->cond, match.label);
1441         gen_goto_tb(s, 0, 4);
1442         set_disas_label(s, match);
1443         gen_goto_tb(s, 1, a->imm);
1444     } else {
1445         /* 0xe and 0xf are both "always" conditions */
1446         gen_goto_tb(s, 0, a->imm);
1447     }
1448     return true;
1449 }
1450
1451 static void set_btype_for_br(DisasContext *s, int rn)
1452 {
1453     if (dc_isar_feature(aa64_bti, s)) {
1454         /* BR to {x16,x17} or !guard -> 1, else 3.  */
1455         set_btype(s, rn == 16 || rn == 17 || !s->guarded_page ? 1 : 3);
1456     }
1457 }
1458
1459 static void set_btype_for_blr(DisasContext *s)
1460 {
1461     if (dc_isar_feature(aa64_bti, s)) {
1462         /* BLR sets BTYPE to 2, regardless of source guarded page.  */
1463         set_btype(s, 2);
1464     }
1465 }
1466
1467 static bool trans_BR(DisasContext *s, arg_r *a)
1468 {
1469     gen_a64_set_pc(s, cpu_reg(s, a->rn));
1470     set_btype_for_br(s, a->rn);
1471     s->base.is_jmp = DISAS_JUMP;
1472     return true;
1473 }
1474
1475 static bool trans_BLR(DisasContext *s, arg_r *a)
1476 {
1477     TCGv_i64 dst = cpu_reg(s, a->rn);
1478     TCGv_i64 lr = cpu_reg(s, 30);
1479     if (dst == lr) {
1480         TCGv_i64 tmp = tcg_temp_new_i64();
1481         tcg_gen_mov_i64(tmp, dst);
1482         dst = tmp;
1483     }
1484     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1485     gen_a64_set_pc(s, dst);
1486     set_btype_for_blr(s);
1487     s->base.is_jmp = DISAS_JUMP;
1488     return true;
1489 }
1490
1491 static bool trans_RET(DisasContext *s, arg_r *a)
1492 {
1493     gen_a64_set_pc(s, cpu_reg(s, a->rn));
1494     s->base.is_jmp = DISAS_JUMP;
1495     return true;
1496 }
1497
1498 static TCGv_i64 auth_branch_target(DisasContext *s, TCGv_i64 dst,
1499                                    TCGv_i64 modifier, bool use_key_a)
1500 {
1501     TCGv_i64 truedst;
1502     /*
1503      * Return the branch target for a BRAA/RETA/etc, which is either
1504      * just the destination dst, or that value with the pauth check
1505      * done and the code removed from the high bits.
1506      */
1507     if (!s->pauth_active) {
1508         return dst;
1509     }
1510
1511     truedst = tcg_temp_new_i64();
1512     if (use_key_a) {
1513         gen_helper_autia_combined(truedst, tcg_env, dst, modifier);
1514     } else {
1515         gen_helper_autib_combined(truedst, tcg_env, dst, modifier);
1516     }
1517     return truedst;
1518 }
1519
1520 static bool trans_BRAZ(DisasContext *s, arg_braz *a)
1521 {
1522     TCGv_i64 dst;
1523
1524     if (!dc_isar_feature(aa64_pauth, s)) {
1525         return false;
1526     }
1527
1528     dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1529     gen_a64_set_pc(s, dst);
1530     set_btype_for_br(s, a->rn);
1531     s->base.is_jmp = DISAS_JUMP;
1532     return true;
1533 }
1534
1535 static bool trans_BLRAZ(DisasContext *s, arg_braz *a)
1536 {
1537     TCGv_i64 dst, lr;
1538
1539     if (!dc_isar_feature(aa64_pauth, s)) {
1540         return false;
1541     }
1542
1543     dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1544     lr = cpu_reg(s, 30);
1545     if (dst == lr) {
1546         TCGv_i64 tmp = tcg_temp_new_i64();
1547         tcg_gen_mov_i64(tmp, dst);
1548         dst = tmp;
1549     }
1550     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1551     gen_a64_set_pc(s, dst);
1552     set_btype_for_blr(s);
1553     s->base.is_jmp = DISAS_JUMP;
1554     return true;
1555 }
1556
1557 static bool trans_RETA(DisasContext *s, arg_reta *a)
1558 {
1559     TCGv_i64 dst;
1560
1561     dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m);
1562     gen_a64_set_pc(s, dst);
1563     s->base.is_jmp = DISAS_JUMP;
1564     return true;
1565 }
1566
1567 static bool trans_BRA(DisasContext *s, arg_bra *a)
1568 {
1569     TCGv_i64 dst;
1570
1571     if (!dc_isar_feature(aa64_pauth, s)) {
1572         return false;
1573     }
1574     dst = auth_branch_target(s, cpu_reg(s,a->rn), cpu_reg_sp(s, a->rm), !a->m);
1575     gen_a64_set_pc(s, dst);
1576     set_btype_for_br(s, a->rn);
1577     s->base.is_jmp = DISAS_JUMP;
1578     return true;
1579 }
1580
1581 static bool trans_BLRA(DisasContext *s, arg_bra *a)
1582 {
1583     TCGv_i64 dst, lr;
1584
1585     if (!dc_isar_feature(aa64_pauth, s)) {
1586         return false;
1587     }
1588     dst = auth_branch_target(s, cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm), !a->m);
1589     lr = cpu_reg(s, 30);
1590     if (dst == lr) {
1591         TCGv_i64 tmp = tcg_temp_new_i64();
1592         tcg_gen_mov_i64(tmp, dst);
1593         dst = tmp;
1594     }
1595     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1596     gen_a64_set_pc(s, dst);
1597     set_btype_for_blr(s);
1598     s->base.is_jmp = DISAS_JUMP;
1599     return true;
1600 }
1601
1602 static bool trans_ERET(DisasContext *s, arg_ERET *a)
1603 {
1604     TCGv_i64 dst;
1605
1606     if (s->current_el == 0) {
1607         return false;
1608     }
1609     if (s->trap_eret) {
1610         gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(0), 2);
1611         return true;
1612     }
1613     dst = tcg_temp_new_i64();
1614     tcg_gen_ld_i64(dst, tcg_env,
1615                    offsetof(CPUARMState, elr_el[s->current_el]));
1616
1617     translator_io_start(&s->base);
1618
1619     gen_helper_exception_return(tcg_env, dst);
1620     /* Must exit loop to check un-masked IRQs */
1621     s->base.is_jmp = DISAS_EXIT;
1622     return true;
1623 }
1624
1625 static bool trans_ERETA(DisasContext *s, arg_reta *a)
1626 {
1627     TCGv_i64 dst;
1628
1629     if (!dc_isar_feature(aa64_pauth, s)) {
1630         return false;
1631     }
1632     if (s->current_el == 0) {
1633         return false;
1634     }
1635     /* The FGT trap takes precedence over an auth trap. */
1636     if (s->trap_eret) {
1637         gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(a->m ? 3 : 2), 2);
1638         return true;
1639     }
1640     dst = tcg_temp_new_i64();
1641     tcg_gen_ld_i64(dst, tcg_env,
1642                    offsetof(CPUARMState, elr_el[s->current_el]));
1643
1644     dst = auth_branch_target(s, dst, cpu_X[31], !a->m);
1645
1646     translator_io_start(&s->base);
1647
1648     gen_helper_exception_return(tcg_env, dst);
1649     /* Must exit loop to check un-masked IRQs */
1650     s->base.is_jmp = DISAS_EXIT;
1651     return true;
1652 }
1653
1654 static bool trans_NOP(DisasContext *s, arg_NOP *a)
1655 {
1656     return true;
1657 }
1658
1659 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
1660 {
1661     /*
1662      * When running in MTTCG we don't generate jumps to the yield and
1663      * WFE helpers as it won't affect the scheduling of other vCPUs.
1664      * If we wanted to more completely model WFE/SEV so we don't busy
1665      * spin unnecessarily we would need to do something more involved.
1666      */
1667     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1668         s->base.is_jmp = DISAS_YIELD;
1669     }
1670     return true;
1671 }
1672
1673 static bool trans_WFI(DisasContext *s, arg_WFI *a)
1674 {
1675     s->base.is_jmp = DISAS_WFI;
1676     return true;
1677 }
1678
1679 static bool trans_WFE(DisasContext *s, arg_WFI *a)
1680 {
1681     /*
1682      * When running in MTTCG we don't generate jumps to the yield and
1683      * WFE helpers as it won't affect the scheduling of other vCPUs.
1684      * If we wanted to more completely model WFE/SEV so we don't busy
1685      * spin unnecessarily we would need to do something more involved.
1686      */
1687     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1688         s->base.is_jmp = DISAS_WFE;
1689     }
1690     return true;
1691 }
1692
1693 static bool trans_XPACLRI(DisasContext *s, arg_XPACLRI *a)
1694 {
1695     if (s->pauth_active) {
1696         gen_helper_xpaci(cpu_X[30], tcg_env, cpu_X[30]);
1697     }
1698     return true;
1699 }
1700
1701 static bool trans_PACIA1716(DisasContext *s, arg_PACIA1716 *a)
1702 {
1703     if (s->pauth_active) {
1704         gen_helper_pacia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
1705     }
1706     return true;
1707 }
1708
1709 static bool trans_PACIB1716(DisasContext *s, arg_PACIB1716 *a)
1710 {
1711     if (s->pauth_active) {
1712         gen_helper_pacib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
1713     }
1714     return true;
1715 }
1716
1717 static bool trans_AUTIA1716(DisasContext *s, arg_AUTIA1716 *a)
1718 {
1719     if (s->pauth_active) {
1720         gen_helper_autia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
1721     }
1722     return true;
1723 }
1724
1725 static bool trans_AUTIB1716(DisasContext *s, arg_AUTIB1716 *a)
1726 {
1727     if (s->pauth_active) {
1728         gen_helper_autib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
1729     }
1730     return true;
1731 }
1732
1733 static bool trans_ESB(DisasContext *s, arg_ESB *a)
1734 {
1735     /* Without RAS, we must implement this as NOP. */
1736     if (dc_isar_feature(aa64_ras, s)) {
1737         /*
1738          * QEMU does not have a source of physical SErrors,
1739          * so we are only concerned with virtual SErrors.
1740          * The pseudocode in the ARM for this case is
1741          *   if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
1742          *      AArch64.vESBOperation();
1743          * Most of the condition can be evaluated at translation time.
1744          * Test for EL2 present, and defer test for SEL2 to runtime.
1745          */
1746         if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
1747             gen_helper_vesb(tcg_env);
1748         }
1749     }
1750     return true;
1751 }
1752
1753 static bool trans_PACIAZ(DisasContext *s, arg_PACIAZ *a)
1754 {
1755     if (s->pauth_active) {
1756         gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
1757     }
1758     return true;
1759 }
1760
1761 static bool trans_PACIASP(DisasContext *s, arg_PACIASP *a)
1762 {
1763     if (s->pauth_active) {
1764         gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
1765     }
1766     return true;
1767 }
1768
1769 static bool trans_PACIBZ(DisasContext *s, arg_PACIBZ *a)
1770 {
1771     if (s->pauth_active) {
1772         gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
1773     }
1774     return true;
1775 }
1776
1777 static bool trans_PACIBSP(DisasContext *s, arg_PACIBSP *a)
1778 {
1779     if (s->pauth_active) {
1780         gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
1781     }
1782     return true;
1783 }
1784
1785 static bool trans_AUTIAZ(DisasContext *s, arg_AUTIAZ *a)
1786 {
1787     if (s->pauth_active) {
1788         gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
1789     }
1790     return true;
1791 }
1792
1793 static bool trans_AUTIASP(DisasContext *s, arg_AUTIASP *a)
1794 {
1795     if (s->pauth_active) {
1796         gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
1797     }
1798     return true;
1799 }
1800
1801 static bool trans_AUTIBZ(DisasContext *s, arg_AUTIBZ *a)
1802 {
1803     if (s->pauth_active) {
1804         gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
1805     }
1806     return true;
1807 }
1808
1809 static bool trans_AUTIBSP(DisasContext *s, arg_AUTIBSP *a)
1810 {
1811     if (s->pauth_active) {
1812         gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
1813     }
1814     return true;
1815 }
1816
1817 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
1818 {
1819     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1820     return true;
1821 }
1822
1823 static bool trans_DSB_DMB(DisasContext *s, arg_DSB_DMB *a)
1824 {
1825     /* We handle DSB and DMB the same way */
1826     TCGBar bar;
1827
1828     switch (a->types) {
1829     case 1: /* MBReqTypes_Reads */
1830         bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
1831         break;
1832     case 2: /* MBReqTypes_Writes */
1833         bar = TCG_BAR_SC | TCG_MO_ST_ST;
1834         break;
1835     default: /* MBReqTypes_All */
1836         bar = TCG_BAR_SC | TCG_MO_ALL;
1837         break;
1838     }
1839     tcg_gen_mb(bar);
1840     return true;
1841 }
1842
1843 static bool trans_ISB(DisasContext *s, arg_ISB *a)
1844 {
1845     /*
1846      * We need to break the TB after this insn to execute
1847      * self-modifying code correctly and also to take
1848      * any pending interrupts immediately.
1849      */
1850     reset_btype(s);
1851     gen_goto_tb(s, 0, 4);
1852     return true;
1853 }
1854
1855 static bool trans_SB(DisasContext *s, arg_SB *a)
1856 {
1857     if (!dc_isar_feature(aa64_sb, s)) {
1858         return false;
1859     }
1860     /*
1861      * TODO: There is no speculation barrier opcode for TCG;
1862      * MB and end the TB instead.
1863      */
1864     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
1865     gen_goto_tb(s, 0, 4);
1866     return true;
1867 }
1868
1869 static bool trans_CFINV(DisasContext *s, arg_CFINV *a)
1870 {
1871     if (!dc_isar_feature(aa64_condm_4, s)) {
1872         return false;
1873     }
1874     tcg_gen_xori_i32(cpu_CF, cpu_CF, 1);
1875     return true;
1876 }
1877
1878 static bool trans_XAFLAG(DisasContext *s, arg_XAFLAG *a)
1879 {
1880     TCGv_i32 z;
1881
1882     if (!dc_isar_feature(aa64_condm_5, s)) {
1883         return false;
1884     }
1885
1886     z = tcg_temp_new_i32();
1887
1888     tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0);
1889
1890     /*
1891      * (!C & !Z) << 31
1892      * (!(C | Z)) << 31
1893      * ~((C | Z) << 31)
1894      * ~-(C | Z)
1895      * (C | Z) - 1
1896      */
1897     tcg_gen_or_i32(cpu_NF, cpu_CF, z);
1898     tcg_gen_subi_i32(cpu_NF, cpu_NF, 1);
1899
1900     /* !(Z & C) */
1901     tcg_gen_and_i32(cpu_ZF, z, cpu_CF);
1902     tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1);
1903
1904     /* (!C & Z) << 31 -> -(Z & ~C) */
1905     tcg_gen_andc_i32(cpu_VF, z, cpu_CF);
1906     tcg_gen_neg_i32(cpu_VF, cpu_VF);
1907
1908     /* C | Z */
1909     tcg_gen_or_i32(cpu_CF, cpu_CF, z);
1910
1911     return true;
1912 }
1913
1914 static bool trans_AXFLAG(DisasContext *s, arg_AXFLAG *a)
1915 {
1916     if (!dc_isar_feature(aa64_condm_5, s)) {
1917         return false;
1918     }
1919
1920     tcg_gen_sari_i32(cpu_VF, cpu_VF, 31);         /* V ? -1 : 0 */
1921     tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF);     /* C & !V */
1922
1923     /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */
1924     tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF);
1925
1926     tcg_gen_movi_i32(cpu_NF, 0);
1927     tcg_gen_movi_i32(cpu_VF, 0);
1928
1929     return true;
1930 }
1931
1932 static bool trans_MSR_i_UAO(DisasContext *s, arg_i *a)
1933 {
1934     if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) {
1935         return false;
1936     }
1937     if (a->imm & 1) {
1938         set_pstate_bits(PSTATE_UAO);
1939     } else {
1940         clear_pstate_bits(PSTATE_UAO);
1941     }
1942     gen_rebuild_hflags(s);
1943     s->base.is_jmp = DISAS_TOO_MANY;
1944     return true;
1945 }
1946
1947 static bool trans_MSR_i_PAN(DisasContext *s, arg_i *a)
1948 {
1949     if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) {
1950         return false;
1951     }
1952     if (a->imm & 1) {
1953         set_pstate_bits(PSTATE_PAN);
1954     } else {
1955         clear_pstate_bits(PSTATE_PAN);
1956     }
1957     gen_rebuild_hflags(s);
1958     s->base.is_jmp = DISAS_TOO_MANY;
1959     return true;
1960 }
1961
1962 static bool trans_MSR_i_SPSEL(DisasContext *s, arg_i *a)
1963 {
1964     if (s->current_el == 0) {
1965         return false;
1966     }
1967     gen_helper_msr_i_spsel(tcg_env, tcg_constant_i32(a->imm & PSTATE_SP));
1968     s->base.is_jmp = DISAS_TOO_MANY;
1969     return true;
1970 }
1971
1972 static bool trans_MSR_i_SBSS(DisasContext *s, arg_i *a)
1973 {
1974     if (!dc_isar_feature(aa64_ssbs, s)) {
1975         return false;
1976     }
1977     if (a->imm & 1) {
1978         set_pstate_bits(PSTATE_SSBS);
1979     } else {
1980         clear_pstate_bits(PSTATE_SSBS);
1981     }
1982     /* Don't need to rebuild hflags since SSBS is a nop */
1983     s->base.is_jmp = DISAS_TOO_MANY;
1984     return true;
1985 }
1986
1987 static bool trans_MSR_i_DIT(DisasContext *s, arg_i *a)
1988 {
1989     if (!dc_isar_feature(aa64_dit, s)) {
1990         return false;
1991     }
1992     if (a->imm & 1) {
1993         set_pstate_bits(PSTATE_DIT);
1994     } else {
1995         clear_pstate_bits(PSTATE_DIT);
1996     }
1997     /* There's no need to rebuild hflags because DIT is a nop */
1998     s->base.is_jmp = DISAS_TOO_MANY;
1999     return true;
2000 }
2001
2002 static bool trans_MSR_i_TCO(DisasContext *s, arg_i *a)
2003 {
2004     if (dc_isar_feature(aa64_mte, s)) {
2005         /* Full MTE is enabled -- set the TCO bit as directed. */
2006         if (a->imm & 1) {
2007             set_pstate_bits(PSTATE_TCO);
2008         } else {
2009             clear_pstate_bits(PSTATE_TCO);
2010         }
2011         gen_rebuild_hflags(s);
2012         /* Many factors, including TCO, go into MTE_ACTIVE. */
2013         s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
2014         return true;
2015     } else if (dc_isar_feature(aa64_mte_insn_reg, s)) {
2016         /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI.  */
2017         return true;
2018     } else {
2019         /* Insn not present */
2020         return false;
2021     }
2022 }
2023
2024 static bool trans_MSR_i_DAIFSET(DisasContext *s, arg_i *a)
2025 {
2026     gen_helper_msr_i_daifset(tcg_env, tcg_constant_i32(a->imm));
2027     s->base.is_jmp = DISAS_TOO_MANY;
2028     return true;
2029 }
2030
2031 static bool trans_MSR_i_DAIFCLEAR(DisasContext *s, arg_i *a)
2032 {
2033     gen_helper_msr_i_daifclear(tcg_env, tcg_constant_i32(a->imm));
2034     /* Exit the cpu loop to re-evaluate pending IRQs. */
2035     s->base.is_jmp = DISAS_UPDATE_EXIT;
2036     return true;
2037 }
2038
2039 static bool trans_MSR_i_SVCR(DisasContext *s, arg_MSR_i_SVCR *a)
2040 {
2041     if (!dc_isar_feature(aa64_sme, s) || a->mask == 0) {
2042         return false;
2043     }
2044     if (sme_access_check(s)) {
2045         int old = s->pstate_sm | (s->pstate_za << 1);
2046         int new = a->imm * 3;
2047
2048         if ((old ^ new) & a->mask) {
2049             /* At least one bit changes. */
2050             gen_helper_set_svcr(tcg_env, tcg_constant_i32(new),
2051                                 tcg_constant_i32(a->mask));
2052             s->base.is_jmp = DISAS_TOO_MANY;
2053         }
2054     }
2055     return true;
2056 }
2057
2058 static void gen_get_nzcv(TCGv_i64 tcg_rt)
2059 {
2060     TCGv_i32 tmp = tcg_temp_new_i32();
2061     TCGv_i32 nzcv = tcg_temp_new_i32();
2062
2063     /* build bit 31, N */
2064     tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
2065     /* build bit 30, Z */
2066     tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
2067     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
2068     /* build bit 29, C */
2069     tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
2070     /* build bit 28, V */
2071     tcg_gen_shri_i32(tmp, cpu_VF, 31);
2072     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
2073     /* generate result */
2074     tcg_gen_extu_i32_i64(tcg_rt, nzcv);
2075 }
2076
2077 static void gen_set_nzcv(TCGv_i64 tcg_rt)
2078 {
2079     TCGv_i32 nzcv = tcg_temp_new_i32();
2080
2081     /* take NZCV from R[t] */
2082     tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
2083
2084     /* bit 31, N */
2085     tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
2086     /* bit 30, Z */
2087     tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
2088     tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
2089     /* bit 29, C */
2090     tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
2091     tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
2092     /* bit 28, V */
2093     tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
2094     tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
2095 }
2096
2097 static void gen_sysreg_undef(DisasContext *s, bool isread,
2098                              uint8_t op0, uint8_t op1, uint8_t op2,
2099                              uint8_t crn, uint8_t crm, uint8_t rt)
2100 {
2101     /*
2102      * Generate code to emit an UNDEF with correct syndrome
2103      * information for a failed system register access.
2104      * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases,
2105      * but if FEAT_IDST is implemented then read accesses to registers
2106      * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP
2107      * syndrome.
2108      */
2109     uint32_t syndrome;
2110
2111     if (isread && dc_isar_feature(aa64_ids, s) &&
2112         arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) {
2113         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2114     } else {
2115         syndrome = syn_uncategorized();
2116     }
2117     gen_exception_insn(s, 0, EXCP_UDEF, syndrome);
2118 }
2119
2120 /* MRS - move from system register
2121  * MSR (register) - move to system register
2122  * SYS
2123  * SYSL
2124  * These are all essentially the same insn in 'read' and 'write'
2125  * versions, with varying op0 fields.
2126  */
2127 static void handle_sys(DisasContext *s, bool isread,
2128                        unsigned int op0, unsigned int op1, unsigned int op2,
2129                        unsigned int crn, unsigned int crm, unsigned int rt)
2130 {
2131     uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
2132                                       crn, crm, op0, op1, op2);
2133     const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
2134     bool need_exit_tb = false;
2135     bool nv_trap_to_el2 = false;
2136     bool nv_redirect_reg = false;
2137     bool skip_fp_access_checks = false;
2138     bool nv2_mem_redirect = false;
2139     TCGv_ptr tcg_ri = NULL;
2140     TCGv_i64 tcg_rt;
2141     uint32_t syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2142
2143     if (crn == 11 || crn == 15) {
2144         /*
2145          * Check for TIDCP trap, which must take precedence over
2146          * the UNDEF for "no such register" etc.
2147          */
2148         switch (s->current_el) {
2149         case 0:
2150             if (dc_isar_feature(aa64_tidcp1, s)) {
2151                 gen_helper_tidcp_el0(tcg_env, tcg_constant_i32(syndrome));
2152             }
2153             break;
2154         case 1:
2155             gen_helper_tidcp_el1(tcg_env, tcg_constant_i32(syndrome));
2156             break;
2157         }
2158     }
2159
2160     if (!ri) {
2161         /* Unknown register; this might be a guest error or a QEMU
2162          * unimplemented feature.
2163          */
2164         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
2165                       "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
2166                       isread ? "read" : "write", op0, op1, crn, crm, op2);
2167         gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2168         return;
2169     }
2170
2171     if (s->nv2 && ri->nv2_redirect_offset) {
2172         /*
2173          * Some registers always redirect to memory; some only do so if
2174          * HCR_EL2.NV1 is 0, and some only if NV1 is 1 (these come in
2175          * pairs which share an offset; see the table in R_CSRPQ).
2176          */
2177         if (ri->nv2_redirect_offset & NV2_REDIR_NV1) {
2178             nv2_mem_redirect = s->nv1;
2179         } else if (ri->nv2_redirect_offset & NV2_REDIR_NO_NV1) {
2180             nv2_mem_redirect = !s->nv1;
2181         } else {
2182             nv2_mem_redirect = true;
2183         }
2184     }
2185
2186     /* Check access permissions */
2187     if (!cp_access_ok(s->current_el, ri, isread)) {
2188         /*
2189          * FEAT_NV/NV2 handling does not do the usual FP access checks
2190          * for registers only accessible at EL2 (though it *does* do them
2191          * for registers accessible at EL1).
2192          */
2193         skip_fp_access_checks = true;
2194         if (s->nv2 && (ri->type & ARM_CP_NV2_REDIRECT)) {
2195             /*
2196              * This is one of the few EL2 registers which should redirect
2197              * to the equivalent EL1 register. We do that after running
2198              * the EL2 register's accessfn.
2199              */
2200             nv_redirect_reg = true;
2201             assert(!nv2_mem_redirect);
2202         } else if (nv2_mem_redirect) {
2203             /*
2204              * NV2 redirect-to-memory takes precedence over trap to EL2 or
2205              * UNDEF to EL1.
2206              */
2207         } else if (s->nv && arm_cpreg_traps_in_nv(ri)) {
2208             /*
2209              * This register / instruction exists and is an EL2 register, so
2210              * we must trap to EL2 if accessed in nested virtualization EL1
2211              * instead of UNDEFing. We'll do that after the usual access checks.
2212              * (This makes a difference only for a couple of registers like
2213              * VSTTBR_EL2 where the "UNDEF if NonSecure" should take priority
2214              * over the trap-to-EL2. Most trapped-by-FEAT_NV registers have
2215              * an accessfn which does nothing when called from EL1, because
2216              * the trap-to-EL3 controls which would apply to that register
2217              * at EL2 don't take priority over the FEAT_NV trap-to-EL2.)
2218              */
2219             nv_trap_to_el2 = true;
2220         } else {
2221             gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2222             return;
2223         }
2224     }
2225
2226     if (ri->accessfn || (ri->fgt && s->fgt_active)) {
2227         /* Emit code to perform further access permissions checks at
2228          * runtime; this may result in an exception.
2229          */
2230         gen_a64_update_pc(s, 0);
2231         tcg_ri = tcg_temp_new_ptr();
2232         gen_helper_access_check_cp_reg(tcg_ri, tcg_env,
2233                                        tcg_constant_i32(key),
2234                                        tcg_constant_i32(syndrome),
2235                                        tcg_constant_i32(isread));
2236     } else if (ri->type & ARM_CP_RAISES_EXC) {
2237         /*
2238          * The readfn or writefn might raise an exception;
2239          * synchronize the CPU state in case it does.
2240          */
2241         gen_a64_update_pc(s, 0);
2242     }
2243
2244     if (!skip_fp_access_checks) {
2245         if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) {
2246             return;
2247         } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
2248             return;
2249         } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) {
2250             return;
2251         }
2252     }
2253
2254     if (nv_trap_to_el2) {
2255         gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
2256         return;
2257     }
2258
2259     if (nv_redirect_reg) {
2260         /*
2261          * FEAT_NV2 redirection of an EL2 register to an EL1 register.
2262          * Conveniently in all cases the encoding of the EL1 register is
2263          * identical to the EL2 register except that opc1 is 0.
2264          * Get the reginfo for the EL1 register to use for the actual access.
2265          * We don't use the EL1 register's access function, and
2266          * fine-grained-traps on EL1 also do not apply here.
2267          */
2268         key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
2269                                  crn, crm, op0, 0, op2);
2270         ri = get_arm_cp_reginfo(s->cp_regs, key);
2271         assert(ri);
2272         assert(cp_access_ok(s->current_el, ri, isread));
2273         /*
2274          * We might not have done an update_pc earlier, so check we don't
2275          * need it. We could support this in future if necessary.
2276          */
2277         assert(!(ri->type & ARM_CP_RAISES_EXC));
2278     }
2279
2280     if (nv2_mem_redirect) {
2281         /*
2282          * This system register is being redirected into an EL2 memory access.
2283          * This means it is not an IO operation, doesn't change hflags,
2284          * and need not end the TB, because it has no side effects.
2285          *
2286          * The access is 64-bit single copy atomic, guaranteed aligned because
2287          * of the definition of VCNR_EL2. Its endianness depends on
2288          * SCTLR_EL2.EE, not on the data endianness of EL1.
2289          * It is done under either the EL2 translation regime or the EL2&0
2290          * translation regime, depending on HCR_EL2.E2H. It behaves as if
2291          * PSTATE.PAN is 0.
2292          */
2293         TCGv_i64 ptr = tcg_temp_new_i64();
2294         MemOp mop = MO_64 | MO_ALIGN | MO_ATOM_IFALIGN;
2295         ARMMMUIdx armmemidx = s->nv2_mem_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_E2;
2296         int memidx = arm_to_core_mmu_idx(armmemidx);
2297
2298         mop |= (s->nv2_mem_be ? MO_BE : MO_LE);
2299
2300         tcg_gen_ld_i64(ptr, tcg_env, offsetof(CPUARMState, cp15.vncr_el2));
2301         tcg_gen_addi_i64(ptr, ptr,
2302                          (ri->nv2_redirect_offset & ~NV2_REDIR_FLAG_MASK));
2303         tcg_rt = cpu_reg(s, rt);
2304         if (isread) {
2305             tcg_gen_qemu_ld_i64(tcg_rt, ptr, memidx, mop);
2306         } else {
2307             tcg_gen_qemu_st_i64(tcg_rt, ptr, memidx, mop);
2308         }
2309         return;
2310     }
2311
2312     /* Handle special cases first */
2313     switch (ri->type & ARM_CP_SPECIAL_MASK) {
2314     case 0:
2315         break;
2316     case ARM_CP_NOP:
2317         return;
2318     case ARM_CP_NZCV:
2319         tcg_rt = cpu_reg(s, rt);
2320         if (isread) {
2321             gen_get_nzcv(tcg_rt);
2322         } else {
2323             gen_set_nzcv(tcg_rt);
2324         }
2325         return;
2326     case ARM_CP_CURRENTEL:
2327     {
2328         /*
2329          * Reads as current EL value from pstate, which is
2330          * guaranteed to be constant by the tb flags.
2331          * For nested virt we should report EL2.
2332          */
2333         int el = s->nv ? 2 : s->current_el;
2334         tcg_rt = cpu_reg(s, rt);
2335         tcg_gen_movi_i64(tcg_rt, el << 2);
2336         return;
2337     }
2338     case ARM_CP_DC_ZVA:
2339         /* Writes clear the aligned block of memory which rt points into. */
2340         if (s->mte_active[0]) {
2341             int desc = 0;
2342
2343             desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
2344             desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
2345             desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
2346
2347             tcg_rt = tcg_temp_new_i64();
2348             gen_helper_mte_check_zva(tcg_rt, tcg_env,
2349                                      tcg_constant_i32(desc), cpu_reg(s, rt));
2350         } else {
2351             tcg_rt = clean_data_tbi(s, cpu_reg(s, rt));
2352         }
2353         gen_helper_dc_zva(tcg_env, tcg_rt);
2354         return;
2355     case ARM_CP_DC_GVA:
2356         {
2357             TCGv_i64 clean_addr, tag;
2358
2359             /*
2360              * DC_GVA, like DC_ZVA, requires that we supply the original
2361              * pointer for an invalid page.  Probe that address first.
2362              */
2363             tcg_rt = cpu_reg(s, rt);
2364             clean_addr = clean_data_tbi(s, tcg_rt);
2365             gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8);
2366
2367             if (s->ata[0]) {
2368                 /* Extract the tag from the register to match STZGM.  */
2369                 tag = tcg_temp_new_i64();
2370                 tcg_gen_shri_i64(tag, tcg_rt, 56);
2371                 gen_helper_stzgm_tags(tcg_env, clean_addr, tag);
2372             }
2373         }
2374         return;
2375     case ARM_CP_DC_GZVA:
2376         {
2377             TCGv_i64 clean_addr, tag;
2378
2379             /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */
2380             tcg_rt = cpu_reg(s, rt);
2381             clean_addr = clean_data_tbi(s, tcg_rt);
2382             gen_helper_dc_zva(tcg_env, clean_addr);
2383
2384             if (s->ata[0]) {
2385                 /* Extract the tag from the register to match STZGM.  */
2386                 tag = tcg_temp_new_i64();
2387                 tcg_gen_shri_i64(tag, tcg_rt, 56);
2388                 gen_helper_stzgm_tags(tcg_env, clean_addr, tag);
2389             }
2390         }
2391         return;
2392     default:
2393         g_assert_not_reached();
2394     }
2395
2396     if (ri->type & ARM_CP_IO) {
2397         /* I/O operations must end the TB here (whether read or write) */
2398         need_exit_tb = translator_io_start(&s->base);
2399     }
2400
2401     tcg_rt = cpu_reg(s, rt);
2402
2403     if (isread) {
2404         if (ri->type & ARM_CP_CONST) {
2405             tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
2406         } else if (ri->readfn) {
2407             if (!tcg_ri) {
2408                 tcg_ri = gen_lookup_cp_reg(key);
2409             }
2410             gen_helper_get_cp_reg64(tcg_rt, tcg_env, tcg_ri);
2411         } else {
2412             tcg_gen_ld_i64(tcg_rt, tcg_env, ri->fieldoffset);
2413         }
2414     } else {
2415         if (ri->type & ARM_CP_CONST) {
2416             /* If not forbidden by access permissions, treat as WI */
2417             return;
2418         } else if (ri->writefn) {
2419             if (!tcg_ri) {
2420                 tcg_ri = gen_lookup_cp_reg(key);
2421             }
2422             gen_helper_set_cp_reg64(tcg_env, tcg_ri, tcg_rt);
2423         } else {
2424             tcg_gen_st_i64(tcg_rt, tcg_env, ri->fieldoffset);
2425         }
2426     }
2427
2428     if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
2429         /*
2430          * A write to any coprocessor register that ends a TB
2431          * must rebuild the hflags for the next TB.
2432          */
2433         gen_rebuild_hflags(s);
2434         /*
2435          * We default to ending the TB on a coprocessor register write,
2436          * but allow this to be suppressed by the register definition
2437          * (usually only necessary to work around guest bugs).
2438          */
2439         need_exit_tb = true;
2440     }
2441     if (need_exit_tb) {
2442         s->base.is_jmp = DISAS_UPDATE_EXIT;
2443     }
2444 }
2445
2446 static bool trans_SYS(DisasContext *s, arg_SYS *a)
2447 {
2448     handle_sys(s, a->l, a->op0, a->op1, a->op2, a->crn, a->crm, a->rt);
2449     return true;
2450 }
2451
2452 static bool trans_SVC(DisasContext *s, arg_i *a)
2453 {
2454     /*
2455      * For SVC, HVC and SMC we advance the single-step state
2456      * machine before taking the exception. This is architecturally
2457      * mandated, to ensure that single-stepping a system call
2458      * instruction works properly.
2459      */
2460     uint32_t syndrome = syn_aa64_svc(a->imm);
2461     if (s->fgt_svc) {
2462         gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
2463         return true;
2464     }
2465     gen_ss_advance(s);
2466     gen_exception_insn(s, 4, EXCP_SWI, syndrome);
2467     return true;
2468 }
2469
2470 static bool trans_HVC(DisasContext *s, arg_i *a)
2471 {
2472     int target_el = s->current_el == 3 ? 3 : 2;
2473
2474     if (s->current_el == 0) {
2475         unallocated_encoding(s);
2476         return true;
2477     }
2478     /*
2479      * The pre HVC helper handles cases when HVC gets trapped
2480      * as an undefined insn by runtime configuration.
2481      */
2482     gen_a64_update_pc(s, 0);
2483     gen_helper_pre_hvc(tcg_env);
2484     /* Architecture requires ss advance before we do the actual work */
2485     gen_ss_advance(s);
2486     gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(a->imm), target_el);
2487     return true;
2488 }
2489
2490 static bool trans_SMC(DisasContext *s, arg_i *a)
2491 {
2492     if (s->current_el == 0) {
2493         unallocated_encoding(s);
2494         return true;
2495     }
2496     gen_a64_update_pc(s, 0);
2497     gen_helper_pre_smc(tcg_env, tcg_constant_i32(syn_aa64_smc(a->imm)));
2498     /* Architecture requires ss advance before we do the actual work */
2499     gen_ss_advance(s);
2500     gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(a->imm), 3);
2501     return true;
2502 }
2503
2504 static bool trans_BRK(DisasContext *s, arg_i *a)
2505 {
2506     gen_exception_bkpt_insn(s, syn_aa64_bkpt(a->imm));
2507     return true;
2508 }
2509
2510 static bool trans_HLT(DisasContext *s, arg_i *a)
2511 {
2512     /*
2513      * HLT. This has two purposes.
2514      * Architecturally, it is an external halting debug instruction.
2515      * Since QEMU doesn't implement external debug, we treat this as
2516      * it is required for halting debug disabled: it will UNDEF.
2517      * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
2518      */
2519     if (semihosting_enabled(s->current_el == 0) && a->imm == 0xf000) {
2520         gen_exception_internal_insn(s, EXCP_SEMIHOST);
2521     } else {
2522         unallocated_encoding(s);
2523     }
2524     return true;
2525 }
2526
2527 /*
2528  * Load/Store exclusive instructions are implemented by remembering
2529  * the value/address loaded, and seeing if these are the same
2530  * when the store is performed. This is not actually the architecturally
2531  * mandated semantics, but it works for typical guest code sequences
2532  * and avoids having to monitor regular stores.
2533  *
2534  * The store exclusive uses the atomic cmpxchg primitives to avoid
2535  * races in multi-threaded linux-user and when MTTCG softmmu is
2536  * enabled.
2537  */
2538 static void gen_load_exclusive(DisasContext *s, int rt, int rt2, int rn,
2539                                int size, bool is_pair)
2540 {
2541     int idx = get_mem_index(s);
2542     TCGv_i64 dirty_addr, clean_addr;
2543     MemOp memop = check_atomic_align(s, rn, size + is_pair);
2544
2545     s->is_ldex = true;
2546     dirty_addr = cpu_reg_sp(s, rn);
2547     clean_addr = gen_mte_check1(s, dirty_addr, false, rn != 31, memop);
2548
2549     g_assert(size <= 3);
2550     if (is_pair) {
2551         g_assert(size >= 2);
2552         if (size == 2) {
2553             tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
2554             if (s->be_data == MO_LE) {
2555                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
2556                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
2557             } else {
2558                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
2559                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
2560             }
2561         } else {
2562             TCGv_i128 t16 = tcg_temp_new_i128();
2563
2564             tcg_gen_qemu_ld_i128(t16, clean_addr, idx, memop);
2565
2566             if (s->be_data == MO_LE) {
2567                 tcg_gen_extr_i128_i64(cpu_exclusive_val,
2568                                       cpu_exclusive_high, t16);
2569             } else {
2570                 tcg_gen_extr_i128_i64(cpu_exclusive_high,
2571                                       cpu_exclusive_val, t16);
2572             }
2573             tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2574             tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
2575         }
2576     } else {
2577         tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
2578         tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2579     }
2580     tcg_gen_mov_i64(cpu_exclusive_addr, clean_addr);
2581 }
2582
2583 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
2584                                 int rn, int size, int is_pair)
2585 {
2586     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
2587      *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
2588      *     [addr] = {Rt};
2589      *     if (is_pair) {
2590      *         [addr + datasize] = {Rt2};
2591      *     }
2592      *     {Rd} = 0;
2593      * } else {
2594      *     {Rd} = 1;
2595      * }
2596      * env->exclusive_addr = -1;
2597      */
2598     TCGLabel *fail_label = gen_new_label();
2599     TCGLabel *done_label = gen_new_label();
2600     TCGv_i64 tmp, clean_addr;
2601     MemOp memop;
2602
2603     /*
2604      * FIXME: We are out of spec here.  We have recorded only the address
2605      * from load_exclusive, not the entire range, and we assume that the
2606      * size of the access on both sides match.  The architecture allows the
2607      * store to be smaller than the load, so long as the stored bytes are
2608      * within the range recorded by the load.
2609      */
2610
2611     /* See AArch64.ExclusiveMonitorsPass() and AArch64.IsExclusiveVA(). */
2612     clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2613     tcg_gen_brcond_i64(TCG_COND_NE, clean_addr, cpu_exclusive_addr, fail_label);
2614
2615     /*
2616      * The write, and any associated faults, only happen if the virtual
2617      * and physical addresses pass the exclusive monitor check.  These
2618      * faults are exceedingly unlikely, because normally the guest uses
2619      * the exact same address register for the load_exclusive, and we
2620      * would have recognized these faults there.
2621      *
2622      * It is possible to trigger an alignment fault pre-LSE2, e.g. with an
2623      * unaligned 4-byte write within the range of an aligned 8-byte load.
2624      * With LSE2, the store would need to cross a 16-byte boundary when the
2625      * load did not, which would mean the store is outside the range
2626      * recorded for the monitor, which would have failed a corrected monitor
2627      * check above.  For now, we assume no size change and retain the
2628      * MO_ALIGN to let tcg know what we checked in the load_exclusive.
2629      *
2630      * It is possible to trigger an MTE fault, by performing the load with
2631      * a virtual address with a valid tag and performing the store with the
2632      * same virtual address and a different invalid tag.
2633      */
2634     memop = size + is_pair;
2635     if (memop == MO_128 || !dc_isar_feature(aa64_lse2, s)) {
2636         memop |= MO_ALIGN;
2637     }
2638     memop = finalize_memop(s, memop);
2639     gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
2640
2641     tmp = tcg_temp_new_i64();
2642     if (is_pair) {
2643         if (size == 2) {
2644             if (s->be_data == MO_LE) {
2645                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
2646             } else {
2647                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
2648             }
2649             tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
2650                                        cpu_exclusive_val, tmp,
2651                                        get_mem_index(s), memop);
2652             tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2653         } else {
2654             TCGv_i128 t16 = tcg_temp_new_i128();
2655             TCGv_i128 c16 = tcg_temp_new_i128();
2656             TCGv_i64 a, b;
2657
2658             if (s->be_data == MO_LE) {
2659                 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2));
2660                 tcg_gen_concat_i64_i128(c16, cpu_exclusive_val,
2661                                         cpu_exclusive_high);
2662             } else {
2663                 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt));
2664                 tcg_gen_concat_i64_i128(c16, cpu_exclusive_high,
2665                                         cpu_exclusive_val);
2666             }
2667
2668             tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16,
2669                                         get_mem_index(s), memop);
2670
2671             a = tcg_temp_new_i64();
2672             b = tcg_temp_new_i64();
2673             if (s->be_data == MO_LE) {
2674                 tcg_gen_extr_i128_i64(a, b, t16);
2675             } else {
2676                 tcg_gen_extr_i128_i64(b, a, t16);
2677             }
2678
2679             tcg_gen_xor_i64(a, a, cpu_exclusive_val);
2680             tcg_gen_xor_i64(b, b, cpu_exclusive_high);
2681             tcg_gen_or_i64(tmp, a, b);
2682
2683             tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0);
2684         }
2685     } else {
2686         tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
2687                                    cpu_reg(s, rt), get_mem_index(s), memop);
2688         tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2689     }
2690     tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
2691     tcg_gen_br(done_label);
2692
2693     gen_set_label(fail_label);
2694     tcg_gen_movi_i64(cpu_reg(s, rd), 1);
2695     gen_set_label(done_label);
2696     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
2697 }
2698
2699 static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
2700                                  int rn, int size)
2701 {
2702     TCGv_i64 tcg_rs = cpu_reg(s, rs);
2703     TCGv_i64 tcg_rt = cpu_reg(s, rt);
2704     int memidx = get_mem_index(s);
2705     TCGv_i64 clean_addr;
2706     MemOp memop;
2707
2708     if (rn == 31) {
2709         gen_check_sp_alignment(s);
2710     }
2711     memop = check_atomic_align(s, rn, size);
2712     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
2713     tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt,
2714                                memidx, memop);
2715 }
2716
2717 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
2718                                       int rn, int size)
2719 {
2720     TCGv_i64 s1 = cpu_reg(s, rs);
2721     TCGv_i64 s2 = cpu_reg(s, rs + 1);
2722     TCGv_i64 t1 = cpu_reg(s, rt);
2723     TCGv_i64 t2 = cpu_reg(s, rt + 1);
2724     TCGv_i64 clean_addr;
2725     int memidx = get_mem_index(s);
2726     MemOp memop;
2727
2728     if (rn == 31) {
2729         gen_check_sp_alignment(s);
2730     }
2731
2732     /* This is a single atomic access, despite the "pair". */
2733     memop = check_atomic_align(s, rn, size + 1);
2734     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
2735
2736     if (size == 2) {
2737         TCGv_i64 cmp = tcg_temp_new_i64();
2738         TCGv_i64 val = tcg_temp_new_i64();
2739
2740         if (s->be_data == MO_LE) {
2741             tcg_gen_concat32_i64(val, t1, t2);
2742             tcg_gen_concat32_i64(cmp, s1, s2);
2743         } else {
2744             tcg_gen_concat32_i64(val, t2, t1);
2745             tcg_gen_concat32_i64(cmp, s2, s1);
2746         }
2747
2748         tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, memop);
2749
2750         if (s->be_data == MO_LE) {
2751             tcg_gen_extr32_i64(s1, s2, cmp);
2752         } else {
2753             tcg_gen_extr32_i64(s2, s1, cmp);
2754         }
2755     } else {
2756         TCGv_i128 cmp = tcg_temp_new_i128();
2757         TCGv_i128 val = tcg_temp_new_i128();
2758
2759         if (s->be_data == MO_LE) {
2760             tcg_gen_concat_i64_i128(val, t1, t2);
2761             tcg_gen_concat_i64_i128(cmp, s1, s2);
2762         } else {
2763             tcg_gen_concat_i64_i128(val, t2, t1);
2764             tcg_gen_concat_i64_i128(cmp, s2, s1);
2765         }
2766
2767         tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, memop);
2768
2769         if (s->be_data == MO_LE) {
2770             tcg_gen_extr_i128_i64(s1, s2, cmp);
2771         } else {
2772             tcg_gen_extr_i128_i64(s2, s1, cmp);
2773         }
2774     }
2775 }
2776
2777 /*
2778  * Compute the ISS.SF bit for syndrome information if an exception
2779  * is taken on a load or store. This indicates whether the instruction
2780  * is accessing a 32-bit or 64-bit register. This logic is derived
2781  * from the ARMv8 specs for LDR (Shared decode for all encodings).
2782  */
2783 static bool ldst_iss_sf(int size, bool sign, bool ext)
2784 {
2785
2786     if (sign) {
2787         /*
2788          * Signed loads are 64 bit results if we are not going to
2789          * do a zero-extend from 32 to 64 after the load.
2790          * (For a store, sign and ext are always false.)
2791          */
2792         return !ext;
2793     } else {
2794         /* Unsigned loads/stores work at the specified size */
2795         return size == MO_64;
2796     }
2797 }
2798
2799 static bool trans_STXR(DisasContext *s, arg_stxr *a)
2800 {
2801     if (a->rn == 31) {
2802         gen_check_sp_alignment(s);
2803     }
2804     if (a->lasr) {
2805         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2806     }
2807     gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, false);
2808     return true;
2809 }
2810
2811 static bool trans_LDXR(DisasContext *s, arg_stxr *a)
2812 {
2813     if (a->rn == 31) {
2814         gen_check_sp_alignment(s);
2815     }
2816     gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, false);
2817     if (a->lasr) {
2818         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2819     }
2820     return true;
2821 }
2822
2823 static bool trans_STLR(DisasContext *s, arg_stlr *a)
2824 {
2825     TCGv_i64 clean_addr;
2826     MemOp memop;
2827     bool iss_sf = ldst_iss_sf(a->sz, false, false);
2828
2829     /*
2830      * StoreLORelease is the same as Store-Release for QEMU, but
2831      * needs the feature-test.
2832      */
2833     if (!a->lasr && !dc_isar_feature(aa64_lor, s)) {
2834         return false;
2835     }
2836     /* Generate ISS for non-exclusive accesses including LASR.  */
2837     if (a->rn == 31) {
2838         gen_check_sp_alignment(s);
2839     }
2840     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2841     memop = check_ordered_align(s, a->rn, 0, true, a->sz);
2842     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn),
2843                                 true, a->rn != 31, memop);
2844     do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, memop, true, a->rt,
2845               iss_sf, a->lasr);
2846     return true;
2847 }
2848
2849 static bool trans_LDAR(DisasContext *s, arg_stlr *a)
2850 {
2851     TCGv_i64 clean_addr;
2852     MemOp memop;
2853     bool iss_sf = ldst_iss_sf(a->sz, false, false);
2854
2855     /* LoadLOAcquire is the same as Load-Acquire for QEMU.  */
2856     if (!a->lasr && !dc_isar_feature(aa64_lor, s)) {
2857         return false;
2858     }
2859     /* Generate ISS for non-exclusive accesses including LASR.  */
2860     if (a->rn == 31) {
2861         gen_check_sp_alignment(s);
2862     }
2863     memop = check_ordered_align(s, a->rn, 0, false, a->sz);
2864     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn),
2865                                 false, a->rn != 31, memop);
2866     do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, memop, false, true,
2867               a->rt, iss_sf, a->lasr);
2868     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2869     return true;
2870 }
2871
2872 static bool trans_STXP(DisasContext *s, arg_stxr *a)
2873 {
2874     if (a->rn == 31) {
2875         gen_check_sp_alignment(s);
2876     }
2877     if (a->lasr) {
2878         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2879     }
2880     gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, true);
2881     return true;
2882 }
2883
2884 static bool trans_LDXP(DisasContext *s, arg_stxr *a)
2885 {
2886     if (a->rn == 31) {
2887         gen_check_sp_alignment(s);
2888     }
2889     gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, true);
2890     if (a->lasr) {
2891         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2892     }
2893     return true;
2894 }
2895
2896 static bool trans_CASP(DisasContext *s, arg_CASP *a)
2897 {
2898     if (!dc_isar_feature(aa64_atomics, s)) {
2899         return false;
2900     }
2901     if (((a->rt | a->rs) & 1) != 0) {
2902         return false;
2903     }
2904
2905     gen_compare_and_swap_pair(s, a->rs, a->rt, a->rn, a->sz);
2906     return true;
2907 }
2908
2909 static bool trans_CAS(DisasContext *s, arg_CAS *a)
2910 {
2911     if (!dc_isar_feature(aa64_atomics, s)) {
2912         return false;
2913     }
2914     gen_compare_and_swap(s, a->rs, a->rt, a->rn, a->sz);
2915     return true;
2916 }
2917
2918 static bool trans_LD_lit(DisasContext *s, arg_ldlit *a)
2919 {
2920     bool iss_sf = ldst_iss_sf(a->sz, a->sign, false);
2921     TCGv_i64 tcg_rt = cpu_reg(s, a->rt);
2922     TCGv_i64 clean_addr = tcg_temp_new_i64();
2923     MemOp memop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
2924
2925     gen_pc_plus_diff(s, clean_addr, a->imm);
2926     do_gpr_ld(s, tcg_rt, clean_addr, memop,
2927               false, true, a->rt, iss_sf, false);
2928     return true;
2929 }
2930
2931 static bool trans_LD_lit_v(DisasContext *s, arg_ldlit *a)
2932 {
2933     /* Load register (literal), vector version */
2934     TCGv_i64 clean_addr;
2935     MemOp memop;
2936
2937     if (!fp_access_check(s)) {
2938         return true;
2939     }
2940     memop = finalize_memop_asimd(s, a->sz);
2941     clean_addr = tcg_temp_new_i64();
2942     gen_pc_plus_diff(s, clean_addr, a->imm);
2943     do_fp_ld(s, a->rt, clean_addr, memop);
2944     return true;
2945 }
2946
2947 static void op_addr_ldstpair_pre(DisasContext *s, arg_ldstpair *a,
2948                                  TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
2949                                  uint64_t offset, bool is_store, MemOp mop)
2950 {
2951     if (a->rn == 31) {
2952         gen_check_sp_alignment(s);
2953     }
2954
2955     *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
2956     if (!a->p) {
2957         tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset);
2958     }
2959
2960     *clean_addr = gen_mte_checkN(s, *dirty_addr, is_store,
2961                                  (a->w || a->rn != 31), 2 << a->sz, mop);
2962 }
2963
2964 static void op_addr_ldstpair_post(DisasContext *s, arg_ldstpair *a,
2965                                   TCGv_i64 dirty_addr, uint64_t offset)
2966 {
2967     if (a->w) {
2968         if (a->p) {
2969             tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
2970         }
2971         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
2972     }
2973 }
2974
2975 static bool trans_STP(DisasContext *s, arg_ldstpair *a)
2976 {
2977     uint64_t offset = a->imm << a->sz;
2978     TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
2979     MemOp mop = finalize_memop(s, a->sz);
2980
2981     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop);
2982     tcg_rt = cpu_reg(s, a->rt);
2983     tcg_rt2 = cpu_reg(s, a->rt2);
2984     /*
2985      * We built mop above for the single logical access -- rebuild it
2986      * now for the paired operation.
2987      *
2988      * With LSE2, non-sign-extending pairs are treated atomically if
2989      * aligned, and if unaligned one of the pair will be completely
2990      * within a 16-byte block and that element will be atomic.
2991      * Otherwise each element is separately atomic.
2992      * In all cases, issue one operation with the correct atomicity.
2993      */
2994     mop = a->sz + 1;
2995     if (s->align_mem) {
2996         mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
2997     }
2998     mop = finalize_memop_pair(s, mop);
2999     if (a->sz == 2) {
3000         TCGv_i64 tmp = tcg_temp_new_i64();
3001
3002         if (s->be_data == MO_LE) {
3003             tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2);
3004         } else {
3005             tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt);
3006         }
3007         tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop);
3008     } else {
3009         TCGv_i128 tmp = tcg_temp_new_i128();
3010
3011         if (s->be_data == MO_LE) {
3012             tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
3013         } else {
3014             tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
3015         }
3016         tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
3017     }
3018     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3019     return true;
3020 }
3021
3022 static bool trans_LDP(DisasContext *s, arg_ldstpair *a)
3023 {
3024     uint64_t offset = a->imm << a->sz;
3025     TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3026     MemOp mop = finalize_memop(s, a->sz);
3027
3028     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop);
3029     tcg_rt = cpu_reg(s, a->rt);
3030     tcg_rt2 = cpu_reg(s, a->rt2);
3031
3032     /*
3033      * We built mop above for the single logical access -- rebuild it
3034      * now for the paired operation.
3035      *
3036      * With LSE2, non-sign-extending pairs are treated atomically if
3037      * aligned, and if unaligned one of the pair will be completely
3038      * within a 16-byte block and that element will be atomic.
3039      * Otherwise each element is separately atomic.
3040      * In all cases, issue one operation with the correct atomicity.
3041      *
3042      * This treats sign-extending loads like zero-extending loads,
3043      * since that reuses the most code below.
3044      */
3045     mop = a->sz + 1;
3046     if (s->align_mem) {
3047         mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
3048     }
3049     mop = finalize_memop_pair(s, mop);
3050     if (a->sz == 2) {
3051         int o2 = s->be_data == MO_LE ? 32 : 0;
3052         int o1 = o2 ^ 32;
3053
3054         tcg_gen_qemu_ld_i64(tcg_rt, clean_addr, get_mem_index(s), mop);
3055         if (a->sign) {
3056             tcg_gen_sextract_i64(tcg_rt2, tcg_rt, o2, 32);
3057             tcg_gen_sextract_i64(tcg_rt, tcg_rt, o1, 32);
3058         } else {
3059             tcg_gen_extract_i64(tcg_rt2, tcg_rt, o2, 32);
3060             tcg_gen_extract_i64(tcg_rt, tcg_rt, o1, 32);
3061         }
3062     } else {
3063         TCGv_i128 tmp = tcg_temp_new_i128();
3064
3065         tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop);
3066         if (s->be_data == MO_LE) {
3067             tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp);
3068         } else {
3069             tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp);
3070         }
3071     }
3072     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3073     return true;
3074 }
3075
3076 static bool trans_STP_v(DisasContext *s, arg_ldstpair *a)
3077 {
3078     uint64_t offset = a->imm << a->sz;
3079     TCGv_i64 clean_addr, dirty_addr;
3080     MemOp mop;
3081
3082     if (!fp_access_check(s)) {
3083         return true;
3084     }
3085
3086     /* LSE2 does not merge FP pairs; leave these as separate operations. */
3087     mop = finalize_memop_asimd(s, a->sz);
3088     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop);
3089     do_fp_st(s, a->rt, clean_addr, mop);
3090     tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz);
3091     do_fp_st(s, a->rt2, clean_addr, mop);
3092     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3093     return true;
3094 }
3095
3096 static bool trans_LDP_v(DisasContext *s, arg_ldstpair *a)
3097 {
3098     uint64_t offset = a->imm << a->sz;
3099     TCGv_i64 clean_addr, dirty_addr;
3100     MemOp mop;
3101
3102     if (!fp_access_check(s)) {
3103         return true;
3104     }
3105
3106     /* LSE2 does not merge FP pairs; leave these as separate operations. */
3107     mop = finalize_memop_asimd(s, a->sz);
3108     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop);
3109     do_fp_ld(s, a->rt, clean_addr, mop);
3110     tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz);
3111     do_fp_ld(s, a->rt2, clean_addr, mop);
3112     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3113     return true;
3114 }
3115
3116 static bool trans_STGP(DisasContext *s, arg_ldstpair *a)
3117 {
3118     TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3119     uint64_t offset = a->imm << LOG2_TAG_GRANULE;
3120     MemOp mop;
3121     TCGv_i128 tmp;
3122
3123     /* STGP only comes in one size. */
3124     tcg_debug_assert(a->sz == MO_64);
3125
3126     if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
3127         return false;
3128     }
3129
3130     if (a->rn == 31) {
3131         gen_check_sp_alignment(s);
3132     }
3133
3134     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3135     if (!a->p) {
3136         tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3137     }
3138
3139     clean_addr = clean_data_tbi(s, dirty_addr);
3140     tcg_rt = cpu_reg(s, a->rt);
3141     tcg_rt2 = cpu_reg(s, a->rt2);
3142
3143     /*
3144      * STGP is defined as two 8-byte memory operations, aligned to TAG_GRANULE,
3145      * and one tag operation.  We implement it as one single aligned 16-byte
3146      * memory operation for convenience.  Note that the alignment ensures
3147      * MO_ATOM_IFALIGN_PAIR produces 8-byte atomicity for the memory store.
3148      */
3149     mop = finalize_memop_atom(s, MO_128 | MO_ALIGN, MO_ATOM_IFALIGN_PAIR);
3150
3151     tmp = tcg_temp_new_i128();
3152     if (s->be_data == MO_LE) {
3153         tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
3154     } else {
3155         tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
3156     }
3157     tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
3158
3159     /* Perform the tag store, if tag access enabled. */
3160     if (s->ata[0]) {
3161         if (tb_cflags(s->base.tb) & CF_PARALLEL) {
3162             gen_helper_stg_parallel(tcg_env, dirty_addr, dirty_addr);
3163         } else {
3164             gen_helper_stg(tcg_env, dirty_addr, dirty_addr);
3165         }
3166     }
3167
3168     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3169     return true;
3170 }
3171
3172 static void op_addr_ldst_imm_pre(DisasContext *s, arg_ldst_imm *a,
3173                                  TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3174                                  uint64_t offset, bool is_store, MemOp mop)
3175 {
3176     int memidx;
3177
3178     if (a->rn == 31) {
3179         gen_check_sp_alignment(s);
3180     }
3181
3182     *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3183     if (!a->p) {
3184         tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset);
3185     }
3186     memidx = get_a64_user_mem_index(s, a->unpriv);
3187     *clean_addr = gen_mte_check1_mmuidx(s, *dirty_addr, is_store,
3188                                         a->w || a->rn != 31,
3189                                         mop, a->unpriv, memidx);
3190 }
3191
3192 static void op_addr_ldst_imm_post(DisasContext *s, arg_ldst_imm *a,
3193                                   TCGv_i64 dirty_addr, uint64_t offset)
3194 {
3195     if (a->w) {
3196         if (a->p) {
3197             tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3198         }
3199         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3200     }
3201 }
3202
3203 static bool trans_STR_i(DisasContext *s, arg_ldst_imm *a)
3204 {
3205     bool iss_sf, iss_valid = !a->w;
3206     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3207     int memidx = get_a64_user_mem_index(s, a->unpriv);
3208     MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3209
3210     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop);
3211
3212     tcg_rt = cpu_reg(s, a->rt);
3213     iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3214
3215     do_gpr_st_memidx(s, tcg_rt, clean_addr, mop, memidx,
3216                      iss_valid, a->rt, iss_sf, false);
3217     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3218     return true;
3219 }
3220
3221 static bool trans_LDR_i(DisasContext *s, arg_ldst_imm *a)
3222 {
3223     bool iss_sf, iss_valid = !a->w;
3224     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3225     int memidx = get_a64_user_mem_index(s, a->unpriv);
3226     MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3227
3228     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop);
3229
3230     tcg_rt = cpu_reg(s, a->rt);
3231     iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3232
3233     do_gpr_ld_memidx(s, tcg_rt, clean_addr, mop,
3234                      a->ext, memidx, iss_valid, a->rt, iss_sf, false);
3235     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3236     return true;
3237 }
3238
3239 static bool trans_STR_v_i(DisasContext *s, arg_ldst_imm *a)
3240 {
3241     TCGv_i64 clean_addr, dirty_addr;
3242     MemOp mop;
3243
3244     if (!fp_access_check(s)) {
3245         return true;
3246     }
3247     mop = finalize_memop_asimd(s, a->sz);
3248     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop);
3249     do_fp_st(s, a->rt, clean_addr, mop);
3250     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3251     return true;
3252 }
3253
3254 static bool trans_LDR_v_i(DisasContext *s, arg_ldst_imm *a)
3255 {
3256     TCGv_i64 clean_addr, dirty_addr;
3257     MemOp mop;
3258
3259     if (!fp_access_check(s)) {
3260         return true;
3261     }
3262     mop = finalize_memop_asimd(s, a->sz);
3263     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop);
3264     do_fp_ld(s, a->rt, clean_addr, mop);
3265     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3266     return true;
3267 }
3268
3269 static void op_addr_ldst_pre(DisasContext *s, arg_ldst *a,
3270                              TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3271                              bool is_store, MemOp memop)
3272 {
3273     TCGv_i64 tcg_rm;
3274
3275     if (a->rn == 31) {
3276         gen_check_sp_alignment(s);
3277     }
3278     *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3279
3280     tcg_rm = read_cpu_reg(s, a->rm, 1);
3281     ext_and_shift_reg(tcg_rm, tcg_rm, a->opt, a->s ? a->sz : 0);
3282
3283     tcg_gen_add_i64(*dirty_addr, *dirty_addr, tcg_rm);
3284     *clean_addr = gen_mte_check1(s, *dirty_addr, is_store, true, memop);
3285 }
3286
3287 static bool trans_LDR(DisasContext *s, arg_ldst *a)
3288 {
3289     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3290     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3291     MemOp memop;
3292
3293     if (extract32(a->opt, 1, 1) == 0) {
3294         return false;
3295     }
3296
3297     memop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3298     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop);
3299     tcg_rt = cpu_reg(s, a->rt);
3300     do_gpr_ld(s, tcg_rt, clean_addr, memop,
3301               a->ext, true, a->rt, iss_sf, false);
3302     return true;
3303 }
3304
3305 static bool trans_STR(DisasContext *s, arg_ldst *a)
3306 {
3307     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3308     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3309     MemOp memop;
3310
3311     if (extract32(a->opt, 1, 1) == 0) {
3312         return false;
3313     }
3314
3315     memop = finalize_memop(s, a->sz);
3316     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop);
3317     tcg_rt = cpu_reg(s, a->rt);
3318     do_gpr_st(s, tcg_rt, clean_addr, memop, true, a->rt, iss_sf, false);
3319     return true;
3320 }
3321
3322 static bool trans_LDR_v(DisasContext *s, arg_ldst *a)
3323 {
3324     TCGv_i64 clean_addr, dirty_addr;
3325     MemOp memop;
3326
3327     if (extract32(a->opt, 1, 1) == 0) {
3328         return false;
3329     }
3330
3331     if (!fp_access_check(s)) {
3332         return true;
3333     }
3334
3335     memop = finalize_memop_asimd(s, a->sz);
3336     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop);
3337     do_fp_ld(s, a->rt, clean_addr, memop);
3338     return true;
3339 }
3340
3341 static bool trans_STR_v(DisasContext *s, arg_ldst *a)
3342 {
3343     TCGv_i64 clean_addr, dirty_addr;
3344     MemOp memop;
3345
3346     if (extract32(a->opt, 1, 1) == 0) {
3347         return false;
3348     }
3349
3350     if (!fp_access_check(s)) {
3351         return true;
3352     }
3353
3354     memop = finalize_memop_asimd(s, a->sz);
3355     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop);
3356     do_fp_st(s, a->rt, clean_addr, memop);
3357     return true;
3358 }
3359
3360
3361 static bool do_atomic_ld(DisasContext *s, arg_atomic *a, AtomicThreeOpFn *fn,
3362                          int sign, bool invert)
3363 {
3364     MemOp mop = a->sz | sign;
3365     TCGv_i64 clean_addr, tcg_rs, tcg_rt;
3366
3367     if (a->rn == 31) {
3368         gen_check_sp_alignment(s);
3369     }
3370     mop = check_atomic_align(s, a->rn, mop);
3371     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
3372                                 a->rn != 31, mop);
3373     tcg_rs = read_cpu_reg(s, a->rs, true);
3374     tcg_rt = cpu_reg(s, a->rt);
3375     if (invert) {
3376         tcg_gen_not_i64(tcg_rs, tcg_rs);
3377     }
3378     /*
3379      * The tcg atomic primitives are all full barriers.  Therefore we
3380      * can ignore the Acquire and Release bits of this instruction.
3381      */
3382     fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop);
3383
3384     if (mop & MO_SIGN) {
3385         switch (a->sz) {
3386         case MO_8:
3387             tcg_gen_ext8u_i64(tcg_rt, tcg_rt);
3388             break;
3389         case MO_16:
3390             tcg_gen_ext16u_i64(tcg_rt, tcg_rt);
3391             break;
3392         case MO_32:
3393             tcg_gen_ext32u_i64(tcg_rt, tcg_rt);
3394             break;
3395         case MO_64:
3396             break;
3397         default:
3398             g_assert_not_reached();
3399         }
3400     }
3401     return true;
3402 }
3403
3404 TRANS_FEAT(LDADD, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_add_i64, 0, false)
3405 TRANS_FEAT(LDCLR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_and_i64, 0, true)
3406 TRANS_FEAT(LDEOR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_xor_i64, 0, false)
3407 TRANS_FEAT(LDSET, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_or_i64, 0, false)
3408 TRANS_FEAT(LDSMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smax_i64, MO_SIGN, false)
3409 TRANS_FEAT(LDSMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smin_i64, MO_SIGN, false)
3410 TRANS_FEAT(LDUMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0, false)
3411 TRANS_FEAT(LDUMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false)
3412 TRANS_FEAT(SWP, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false)
3413
3414 static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a)
3415 {
3416     bool iss_sf = ldst_iss_sf(a->sz, false, false);
3417     TCGv_i64 clean_addr;
3418     MemOp mop;
3419
3420     if (!dc_isar_feature(aa64_atomics, s) ||
3421         !dc_isar_feature(aa64_rcpc_8_3, s)) {
3422         return false;
3423     }
3424     if (a->rn == 31) {
3425         gen_check_sp_alignment(s);
3426     }
3427     mop = check_atomic_align(s, a->rn, a->sz);
3428     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
3429                                 a->rn != 31, mop);
3430     /*
3431      * LDAPR* are a special case because they are a simple load, not a
3432      * fetch-and-do-something op.
3433      * The architectural consistency requirements here are weaker than
3434      * full load-acquire (we only need "load-acquire processor consistent"),
3435      * but we choose to implement them as full LDAQ.
3436      */
3437     do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, false,
3438               true, a->rt, iss_sf, true);
3439     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3440     return true;
3441 }
3442
3443 static bool trans_LDRA(DisasContext *s, arg_LDRA *a)
3444 {
3445     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3446     MemOp memop;
3447
3448     /* Load with pointer authentication */
3449     if (!dc_isar_feature(aa64_pauth, s)) {
3450         return false;
3451     }
3452
3453     if (a->rn == 31) {
3454         gen_check_sp_alignment(s);
3455     }
3456     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3457
3458     if (s->pauth_active) {
3459         if (!a->m) {
3460             gen_helper_autda_combined(dirty_addr, tcg_env, dirty_addr,
3461                                       tcg_constant_i64(0));
3462         } else {
3463             gen_helper_autdb_combined(dirty_addr, tcg_env, dirty_addr,
3464                                       tcg_constant_i64(0));
3465         }
3466     }
3467
3468     tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3469
3470     memop = finalize_memop(s, MO_64);
3471
3472     /* Note that "clean" and "dirty" here refer to TBI not PAC.  */
3473     clean_addr = gen_mte_check1(s, dirty_addr, false,
3474                                 a->w || a->rn != 31, memop);
3475
3476     tcg_rt = cpu_reg(s, a->rt);
3477     do_gpr_ld(s, tcg_rt, clean_addr, memop,
3478               /* extend */ false, /* iss_valid */ !a->w,
3479               /* iss_srt */ a->rt, /* iss_sf */ true, /* iss_ar */ false);
3480
3481     if (a->w) {
3482         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3483     }
3484     return true;
3485 }
3486
3487 static bool trans_LDAPR_i(DisasContext *s, arg_ldapr_stlr_i *a)
3488 {
3489     TCGv_i64 clean_addr, dirty_addr;
3490     MemOp mop = a->sz | (a->sign ? MO_SIGN : 0);
3491     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3492
3493     if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3494         return false;
3495     }
3496
3497     if (a->rn == 31) {
3498         gen_check_sp_alignment(s);
3499     }
3500
3501     mop = check_ordered_align(s, a->rn, a->imm, false, mop);
3502     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3503     tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3504     clean_addr = clean_data_tbi(s, dirty_addr);
3505
3506     /*
3507      * Load-AcquirePC semantics; we implement as the slightly more
3508      * restrictive Load-Acquire.
3509      */
3510     do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, a->ext, true,
3511               a->rt, iss_sf, true);
3512     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3513     return true;
3514 }
3515
3516 static bool trans_STLR_i(DisasContext *s, arg_ldapr_stlr_i *a)
3517 {
3518     TCGv_i64 clean_addr, dirty_addr;
3519     MemOp mop = a->sz;
3520     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3521
3522     if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3523         return false;
3524     }
3525
3526     /* TODO: ARMv8.4-LSE SCTLR.nAA */
3527
3528     if (a->rn == 31) {
3529         gen_check_sp_alignment(s);
3530     }
3531
3532     mop = check_ordered_align(s, a->rn, a->imm, true, mop);
3533     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3534     tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3535     clean_addr = clean_data_tbi(s, dirty_addr);
3536
3537     /* Store-Release semantics */
3538     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3539     do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, mop, true, a->rt, iss_sf, true);
3540     return true;
3541 }
3542
3543 static bool trans_LD_mult(DisasContext *s, arg_ldst_mult *a)
3544 {
3545     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3546     MemOp endian, align, mop;
3547
3548     int total;    /* total bytes */
3549     int elements; /* elements per vector */
3550     int r;
3551     int size = a->sz;
3552
3553     if (!a->p && a->rm != 0) {
3554         /* For non-postindexed accesses the Rm field must be 0 */
3555         return false;
3556     }
3557     if (size == 3 && !a->q && a->selem != 1) {
3558         return false;
3559     }
3560     if (!fp_access_check(s)) {
3561         return true;
3562     }
3563
3564     if (a->rn == 31) {
3565         gen_check_sp_alignment(s);
3566     }
3567
3568     /* For our purposes, bytes are always little-endian.  */
3569     endian = s->be_data;
3570     if (size == 0) {
3571         endian = MO_LE;
3572     }
3573
3574     total = a->rpt * a->selem * (a->q ? 16 : 8);
3575     tcg_rn = cpu_reg_sp(s, a->rn);
3576
3577     /*
3578      * Issue the MTE check vs the logical repeat count, before we
3579      * promote consecutive little-endian elements below.
3580      */
3581     clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, total,
3582                                 finalize_memop_asimd(s, size));
3583
3584     /*
3585      * Consecutive little-endian elements from a single register
3586      * can be promoted to a larger little-endian operation.
3587      */
3588     align = MO_ALIGN;
3589     if (a->selem == 1 && endian == MO_LE) {
3590         align = pow2_align(size);
3591         size = 3;
3592     }
3593     if (!s->align_mem) {
3594         align = 0;
3595     }
3596     mop = endian | size | align;
3597
3598     elements = (a->q ? 16 : 8) >> size;
3599     tcg_ebytes = tcg_constant_i64(1 << size);
3600     for (r = 0; r < a->rpt; r++) {
3601         int e;
3602         for (e = 0; e < elements; e++) {
3603             int xs;
3604             for (xs = 0; xs < a->selem; xs++) {
3605                 int tt = (a->rt + r + xs) % 32;
3606                 do_vec_ld(s, tt, e, clean_addr, mop);
3607                 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3608             }
3609         }
3610     }
3611
3612     /*
3613      * For non-quad operations, setting a slice of the low 64 bits of
3614      * the register clears the high 64 bits (in the ARM ARM pseudocode
3615      * this is implicit in the fact that 'rval' is a 64 bit wide
3616      * variable).  For quad operations, we might still need to zero
3617      * the high bits of SVE.
3618      */
3619     for (r = 0; r < a->rpt * a->selem; r++) {
3620         int tt = (a->rt + r) % 32;
3621         clear_vec_high(s, a->q, tt);
3622     }
3623
3624     if (a->p) {
3625         if (a->rm == 31) {
3626             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3627         } else {
3628             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3629         }
3630     }
3631     return true;
3632 }
3633
3634 static bool trans_ST_mult(DisasContext *s, arg_ldst_mult *a)
3635 {
3636     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3637     MemOp endian, align, mop;
3638
3639     int total;    /* total bytes */
3640     int elements; /* elements per vector */
3641     int r;
3642     int size = a->sz;
3643
3644     if (!a->p && a->rm != 0) {
3645         /* For non-postindexed accesses the Rm field must be 0 */
3646         return false;
3647     }
3648     if (size == 3 && !a->q && a->selem != 1) {
3649         return false;
3650     }
3651     if (!fp_access_check(s)) {
3652         return true;
3653     }
3654
3655     if (a->rn == 31) {
3656         gen_check_sp_alignment(s);
3657     }
3658
3659     /* For our purposes, bytes are always little-endian.  */
3660     endian = s->be_data;
3661     if (size == 0) {
3662         endian = MO_LE;
3663     }
3664
3665     total = a->rpt * a->selem * (a->q ? 16 : 8);
3666     tcg_rn = cpu_reg_sp(s, a->rn);
3667
3668     /*
3669      * Issue the MTE check vs the logical repeat count, before we
3670      * promote consecutive little-endian elements below.
3671      */
3672     clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, total,
3673                                 finalize_memop_asimd(s, size));
3674
3675     /*
3676      * Consecutive little-endian elements from a single register
3677      * can be promoted to a larger little-endian operation.
3678      */
3679     align = MO_ALIGN;
3680     if (a->selem == 1 && endian == MO_LE) {
3681         align = pow2_align(size);
3682         size = 3;
3683     }
3684     if (!s->align_mem) {
3685         align = 0;
3686     }
3687     mop = endian | size | align;
3688
3689     elements = (a->q ? 16 : 8) >> size;
3690     tcg_ebytes = tcg_constant_i64(1 << size);
3691     for (r = 0; r < a->rpt; r++) {
3692         int e;
3693         for (e = 0; e < elements; e++) {
3694             int xs;
3695             for (xs = 0; xs < a->selem; xs++) {
3696                 int tt = (a->rt + r + xs) % 32;
3697                 do_vec_st(s, tt, e, clean_addr, mop);
3698                 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3699             }
3700         }
3701     }
3702
3703     if (a->p) {
3704         if (a->rm == 31) {
3705             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3706         } else {
3707             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3708         }
3709     }
3710     return true;
3711 }
3712
3713 static bool trans_ST_single(DisasContext *s, arg_ldst_single *a)
3714 {
3715     int xs, total, rt;
3716     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3717     MemOp mop;
3718
3719     if (!a->p && a->rm != 0) {
3720         return false;
3721     }
3722     if (!fp_access_check(s)) {
3723         return true;
3724     }
3725
3726     if (a->rn == 31) {
3727         gen_check_sp_alignment(s);
3728     }
3729
3730     total = a->selem << a->scale;
3731     tcg_rn = cpu_reg_sp(s, a->rn);
3732
3733     mop = finalize_memop_asimd(s, a->scale);
3734     clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31,
3735                                 total, mop);
3736
3737     tcg_ebytes = tcg_constant_i64(1 << a->scale);
3738     for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
3739         do_vec_st(s, rt, a->index, clean_addr, mop);
3740         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3741     }
3742
3743     if (a->p) {
3744         if (a->rm == 31) {
3745             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3746         } else {
3747             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3748         }
3749     }
3750     return true;
3751 }
3752
3753 static bool trans_LD_single(DisasContext *s, arg_ldst_single *a)
3754 {
3755     int xs, total, rt;
3756     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3757     MemOp mop;
3758
3759     if (!a->p && a->rm != 0) {
3760         return false;
3761     }
3762     if (!fp_access_check(s)) {
3763         return true;
3764     }
3765
3766     if (a->rn == 31) {
3767         gen_check_sp_alignment(s);
3768     }
3769
3770     total = a->selem << a->scale;
3771     tcg_rn = cpu_reg_sp(s, a->rn);
3772
3773     mop = finalize_memop_asimd(s, a->scale);
3774     clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31,
3775                                 total, mop);
3776
3777     tcg_ebytes = tcg_constant_i64(1 << a->scale);
3778     for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
3779         do_vec_ld(s, rt, a->index, clean_addr, mop);
3780         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3781     }
3782
3783     if (a->p) {
3784         if (a->rm == 31) {
3785             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3786         } else {
3787             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3788         }
3789     }
3790     return true;
3791 }
3792
3793 static bool trans_LD_single_repl(DisasContext *s, arg_LD_single_repl *a)
3794 {
3795     int xs, total, rt;
3796     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3797     MemOp mop;
3798
3799     if (!a->p && a->rm != 0) {
3800         return false;
3801     }
3802     if (!fp_access_check(s)) {
3803         return true;
3804     }
3805
3806     if (a->rn == 31) {
3807         gen_check_sp_alignment(s);
3808     }
3809
3810     total = a->selem << a->scale;
3811     tcg_rn = cpu_reg_sp(s, a->rn);
3812
3813     mop = finalize_memop_asimd(s, a->scale);
3814     clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31,
3815                                 total, mop);
3816
3817     tcg_ebytes = tcg_constant_i64(1 << a->scale);
3818     for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
3819         /* Load and replicate to all elements */
3820         TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3821
3822         tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop);
3823         tcg_gen_gvec_dup_i64(a->scale, vec_full_reg_offset(s, rt),
3824                              (a->q + 1) * 8, vec_full_reg_size(s), tcg_tmp);
3825         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3826     }
3827
3828     if (a->p) {
3829         if (a->rm == 31) {
3830             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3831         } else {
3832             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3833         }
3834     }
3835     return true;
3836 }
3837
3838 static bool trans_STZGM(DisasContext *s, arg_ldst_tag *a)
3839 {
3840     TCGv_i64 addr, clean_addr, tcg_rt;
3841     int size = 4 << s->dcz_blocksize;
3842
3843     if (!dc_isar_feature(aa64_mte, s)) {
3844         return false;
3845     }
3846     if (s->current_el == 0) {
3847         return false;
3848     }
3849
3850     if (a->rn == 31) {
3851         gen_check_sp_alignment(s);
3852     }
3853
3854     addr = read_cpu_reg_sp(s, a->rn, true);
3855     tcg_gen_addi_i64(addr, addr, a->imm);
3856     tcg_rt = cpu_reg(s, a->rt);
3857
3858     if (s->ata[0]) {
3859         gen_helper_stzgm_tags(tcg_env, addr, tcg_rt);
3860     }
3861     /*
3862      * The non-tags portion of STZGM is mostly like DC_ZVA,
3863      * except the alignment happens before the access.
3864      */
3865     clean_addr = clean_data_tbi(s, addr);
3866     tcg_gen_andi_i64(clean_addr, clean_addr, -size);
3867     gen_helper_dc_zva(tcg_env, clean_addr);
3868     return true;
3869 }
3870
3871 static bool trans_STGM(DisasContext *s, arg_ldst_tag *a)
3872 {
3873     TCGv_i64 addr, clean_addr, tcg_rt;
3874
3875     if (!dc_isar_feature(aa64_mte, s)) {
3876         return false;
3877     }
3878     if (s->current_el == 0) {
3879         return false;
3880     }
3881
3882     if (a->rn == 31) {
3883         gen_check_sp_alignment(s);
3884     }
3885
3886     addr = read_cpu_reg_sp(s, a->rn, true);
3887     tcg_gen_addi_i64(addr, addr, a->imm);
3888     tcg_rt = cpu_reg(s, a->rt);
3889
3890     if (s->ata[0]) {
3891         gen_helper_stgm(tcg_env, addr, tcg_rt);
3892     } else {
3893         MMUAccessType acc = MMU_DATA_STORE;
3894         int size = 4 << s->gm_blocksize;
3895
3896         clean_addr = clean_data_tbi(s, addr);
3897         tcg_gen_andi_i64(clean_addr, clean_addr, -size);
3898         gen_probe_access(s, clean_addr, acc, size);
3899     }
3900     return true;
3901 }
3902
3903 static bool trans_LDGM(DisasContext *s, arg_ldst_tag *a)
3904 {
3905     TCGv_i64 addr, clean_addr, tcg_rt;
3906
3907     if (!dc_isar_feature(aa64_mte, s)) {
3908         return false;
3909     }
3910     if (s->current_el == 0) {
3911         return false;
3912     }
3913
3914     if (a->rn == 31) {
3915         gen_check_sp_alignment(s);
3916     }
3917
3918     addr = read_cpu_reg_sp(s, a->rn, true);
3919     tcg_gen_addi_i64(addr, addr, a->imm);
3920     tcg_rt = cpu_reg(s, a->rt);
3921
3922     if (s->ata[0]) {
3923         gen_helper_ldgm(tcg_rt, tcg_env, addr);
3924     } else {
3925         MMUAccessType acc = MMU_DATA_LOAD;
3926         int size = 4 << s->gm_blocksize;
3927
3928         clean_addr = clean_data_tbi(s, addr);
3929         tcg_gen_andi_i64(clean_addr, clean_addr, -size);
3930         gen_probe_access(s, clean_addr, acc, size);
3931         /* The result tags are zeros.  */
3932         tcg_gen_movi_i64(tcg_rt, 0);
3933     }
3934     return true;
3935 }
3936
3937 static bool trans_LDG(DisasContext *s, arg_ldst_tag *a)
3938 {
3939     TCGv_i64 addr, clean_addr, tcg_rt;
3940
3941     if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
3942         return false;
3943     }
3944
3945     if (a->rn == 31) {
3946         gen_check_sp_alignment(s);
3947     }
3948
3949     addr = read_cpu_reg_sp(s, a->rn, true);
3950     if (!a->p) {
3951         /* pre-index or signed offset */
3952         tcg_gen_addi_i64(addr, addr, a->imm);
3953     }
3954
3955     tcg_gen_andi_i64(addr, addr, -TAG_GRANULE);
3956     tcg_rt = cpu_reg(s, a->rt);
3957     if (s->ata[0]) {
3958         gen_helper_ldg(tcg_rt, tcg_env, addr, tcg_rt);
3959     } else {
3960         /*
3961          * Tag access disabled: we must check for aborts on the load
3962          * load from [rn+offset], and then insert a 0 tag into rt.
3963          */
3964         clean_addr = clean_data_tbi(s, addr);
3965         gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8);
3966         gen_address_with_allocation_tag0(tcg_rt, tcg_rt);
3967     }
3968
3969     if (a->w) {
3970         /* pre-index or post-index */
3971         if (a->p) {
3972             /* post-index */
3973             tcg_gen_addi_i64(addr, addr, a->imm);
3974         }
3975         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr);
3976     }
3977     return true;
3978 }
3979
3980 static bool do_STG(DisasContext *s, arg_ldst_tag *a, bool is_zero, bool is_pair)
3981 {
3982     TCGv_i64 addr, tcg_rt;
3983
3984     if (a->rn == 31) {
3985         gen_check_sp_alignment(s);
3986     }
3987
3988     addr = read_cpu_reg_sp(s, a->rn, true);
3989     if (!a->p) {
3990         /* pre-index or signed offset */
3991         tcg_gen_addi_i64(addr, addr, a->imm);
3992     }
3993     tcg_rt = cpu_reg_sp(s, a->rt);
3994     if (!s->ata[0]) {
3995         /*
3996          * For STG and ST2G, we need to check alignment and probe memory.
3997          * TODO: For STZG and STZ2G, we could rely on the stores below,
3998          * at least for system mode; user-only won't enforce alignment.
3999          */
4000         if (is_pair) {
4001             gen_helper_st2g_stub(tcg_env, addr);
4002         } else {
4003             gen_helper_stg_stub(tcg_env, addr);
4004         }
4005     } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
4006         if (is_pair) {
4007             gen_helper_st2g_parallel(tcg_env, addr, tcg_rt);
4008         } else {
4009             gen_helper_stg_parallel(tcg_env, addr, tcg_rt);
4010         }
4011     } else {
4012         if (is_pair) {
4013             gen_helper_st2g(tcg_env, addr, tcg_rt);
4014         } else {
4015             gen_helper_stg(tcg_env, addr, tcg_rt);
4016         }
4017     }
4018
4019     if (is_zero) {
4020         TCGv_i64 clean_addr = clean_data_tbi(s, addr);
4021         TCGv_i64 zero64 = tcg_constant_i64(0);
4022         TCGv_i128 zero128 = tcg_temp_new_i128();
4023         int mem_index = get_mem_index(s);
4024         MemOp mop = finalize_memop(s, MO_128 | MO_ALIGN);
4025
4026         tcg_gen_concat_i64_i128(zero128, zero64, zero64);
4027
4028         /* This is 1 or 2 atomic 16-byte operations. */
4029         tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
4030         if (is_pair) {
4031             tcg_gen_addi_i64(clean_addr, clean_addr, 16);
4032             tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
4033         }
4034     }
4035
4036     if (a->w) {
4037         /* pre-index or post-index */
4038         if (a->p) {
4039             /* post-index */
4040             tcg_gen_addi_i64(addr, addr, a->imm);
4041         }
4042         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr);
4043     }
4044     return true;
4045 }
4046
4047 TRANS_FEAT(STG, aa64_mte_insn_reg, do_STG, a, false, false)
4048 TRANS_FEAT(STZG, aa64_mte_insn_reg, do_STG, a, true, false)
4049 TRANS_FEAT(ST2G, aa64_mte_insn_reg, do_STG, a, false, true)
4050 TRANS_FEAT(STZ2G, aa64_mte_insn_reg, do_STG, a, true, true)
4051
4052 typedef void SetFn(TCGv_env, TCGv_i32, TCGv_i32);
4053
4054 static bool do_SET(DisasContext *s, arg_set *a, bool is_epilogue,
4055                    bool is_setg, SetFn fn)
4056 {
4057     int memidx;
4058     uint32_t syndrome, desc = 0;
4059
4060     if (is_setg && !dc_isar_feature(aa64_mte, s)) {
4061         return false;
4062     }
4063
4064     /*
4065      * UNPREDICTABLE cases: we choose to UNDEF, which allows
4066      * us to pull this check before the CheckMOPSEnabled() test
4067      * (which we do in the helper function)
4068      */
4069     if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd ||
4070         a->rd == 31 || a->rn == 31) {
4071         return false;
4072     }
4073
4074     memidx = get_a64_user_mem_index(s, a->unpriv);
4075
4076     /*
4077      * We pass option_a == true, matching our implementation;
4078      * we pass wrong_option == false: helper function may set that bit.
4079      */
4080     syndrome = syn_mop(true, is_setg, (a->nontemp << 1) | a->unpriv,
4081                        is_epilogue, false, true, a->rd, a->rs, a->rn);
4082
4083     if (is_setg ? s->ata[a->unpriv] : s->mte_active[a->unpriv]) {
4084         /* We may need to do MTE tag checking, so assemble the descriptor */
4085         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
4086         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
4087         desc = FIELD_DP32(desc, MTEDESC, WRITE, true);
4088         /* SIZEM1 and ALIGN we leave 0 (byte write) */
4089     }
4090     /* The helper function always needs the memidx even with MTE disabled */
4091     desc = FIELD_DP32(desc, MTEDESC, MIDX, memidx);
4092
4093     /*
4094      * The helper needs the register numbers, but since they're in
4095      * the syndrome anyway, we let it extract them from there rather
4096      * than passing in an extra three integer arguments.
4097      */
4098     fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(desc));
4099     return true;
4100 }
4101
4102 TRANS_FEAT(SETP, aa64_mops, do_SET, a, false, false, gen_helper_setp)
4103 TRANS_FEAT(SETM, aa64_mops, do_SET, a, false, false, gen_helper_setm)
4104 TRANS_FEAT(SETE, aa64_mops, do_SET, a, true, false, gen_helper_sete)
4105 TRANS_FEAT(SETGP, aa64_mops, do_SET, a, false, true, gen_helper_setgp)
4106 TRANS_FEAT(SETGM, aa64_mops, do_SET, a, false, true, gen_helper_setgm)
4107 TRANS_FEAT(SETGE, aa64_mops, do_SET, a, true, true, gen_helper_setge)
4108
4109 typedef void CpyFn(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32);
4110
4111 static bool do_CPY(DisasContext *s, arg_cpy *a, bool is_epilogue, CpyFn fn)
4112 {
4113     int rmemidx, wmemidx;
4114     uint32_t syndrome, rdesc = 0, wdesc = 0;
4115     bool wunpriv = extract32(a->options, 0, 1);
4116     bool runpriv = extract32(a->options, 1, 1);
4117
4118     /*
4119      * UNPREDICTABLE cases: we choose to UNDEF, which allows
4120      * us to pull this check before the CheckMOPSEnabled() test
4121      * (which we do in the helper function)
4122      */
4123     if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd ||
4124         a->rd == 31 || a->rs == 31 || a->rn == 31) {
4125         return false;
4126     }
4127
4128     rmemidx = get_a64_user_mem_index(s, runpriv);
4129     wmemidx = get_a64_user_mem_index(s, wunpriv);
4130
4131     /*
4132      * We pass option_a == true, matching our implementation;
4133      * we pass wrong_option == false: helper function may set that bit.
4134      */
4135     syndrome = syn_mop(false, false, a->options, is_epilogue,
4136                        false, true, a->rd, a->rs, a->rn);
4137
4138     /* If we need to do MTE tag checking, assemble the descriptors */
4139     if (s->mte_active[runpriv]) {
4140         rdesc = FIELD_DP32(rdesc, MTEDESC, TBI, s->tbid);
4141         rdesc = FIELD_DP32(rdesc, MTEDESC, TCMA, s->tcma);
4142     }
4143     if (s->mte_active[wunpriv]) {
4144         wdesc = FIELD_DP32(wdesc, MTEDESC, TBI, s->tbid);
4145         wdesc = FIELD_DP32(wdesc, MTEDESC, TCMA, s->tcma);
4146         wdesc = FIELD_DP32(wdesc, MTEDESC, WRITE, true);
4147     }
4148     /* The helper function needs these parts of the descriptor regardless */
4149     rdesc = FIELD_DP32(rdesc, MTEDESC, MIDX, rmemidx);
4150     wdesc = FIELD_DP32(wdesc, MTEDESC, MIDX, wmemidx);
4151
4152     /*
4153      * The helper needs the register numbers, but since they're in
4154      * the syndrome anyway, we let it extract them from there rather
4155      * than passing in an extra three integer arguments.
4156      */
4157     fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(wdesc),
4158        tcg_constant_i32(rdesc));
4159     return true;
4160 }
4161
4162 TRANS_FEAT(CPYP, aa64_mops, do_CPY, a, false, gen_helper_cpyp)
4163 TRANS_FEAT(CPYM, aa64_mops, do_CPY, a, false, gen_helper_cpym)
4164 TRANS_FEAT(CPYE, aa64_mops, do_CPY, a, true, gen_helper_cpye)
4165 TRANS_FEAT(CPYFP, aa64_mops, do_CPY, a, false, gen_helper_cpyfp)
4166 TRANS_FEAT(CPYFM, aa64_mops, do_CPY, a, false, gen_helper_cpyfm)
4167 TRANS_FEAT(CPYFE, aa64_mops, do_CPY, a, true, gen_helper_cpyfe)
4168
4169 typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64);
4170
4171 static bool gen_rri(DisasContext *s, arg_rri_sf *a,
4172                     bool rd_sp, bool rn_sp, ArithTwoOp *fn)
4173 {
4174     TCGv_i64 tcg_rn = rn_sp ? cpu_reg_sp(s, a->rn) : cpu_reg(s, a->rn);
4175     TCGv_i64 tcg_rd = rd_sp ? cpu_reg_sp(s, a->rd) : cpu_reg(s, a->rd);
4176     TCGv_i64 tcg_imm = tcg_constant_i64(a->imm);
4177
4178     fn(tcg_rd, tcg_rn, tcg_imm);
4179     if (!a->sf) {
4180         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4181     }
4182     return true;
4183 }
4184
4185 /*
4186  * PC-rel. addressing
4187  */
4188
4189 static bool trans_ADR(DisasContext *s, arg_ri *a)
4190 {
4191     gen_pc_plus_diff(s, cpu_reg(s, a->rd), a->imm);
4192     return true;
4193 }
4194
4195 static bool trans_ADRP(DisasContext *s, arg_ri *a)
4196 {
4197     int64_t offset = (int64_t)a->imm << 12;
4198
4199     /* The page offset is ok for CF_PCREL. */
4200     offset -= s->pc_curr & 0xfff;
4201     gen_pc_plus_diff(s, cpu_reg(s, a->rd), offset);
4202     return true;
4203 }
4204
4205 /*
4206  * Add/subtract (immediate)
4207  */
4208 TRANS(ADD_i, gen_rri, a, 1, 1, tcg_gen_add_i64)
4209 TRANS(SUB_i, gen_rri, a, 1, 1, tcg_gen_sub_i64)
4210 TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC)
4211 TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC)
4212
4213 /*
4214  * Add/subtract (immediate, with tags)
4215  */
4216
4217 static bool gen_add_sub_imm_with_tags(DisasContext *s, arg_rri_tag *a,
4218                                       bool sub_op)
4219 {
4220     TCGv_i64 tcg_rn, tcg_rd;
4221     int imm;
4222
4223     imm = a->uimm6 << LOG2_TAG_GRANULE;
4224     if (sub_op) {
4225         imm = -imm;
4226     }
4227
4228     tcg_rn = cpu_reg_sp(s, a->rn);
4229     tcg_rd = cpu_reg_sp(s, a->rd);
4230
4231     if (s->ata[0]) {
4232         gen_helper_addsubg(tcg_rd, tcg_env, tcg_rn,
4233                            tcg_constant_i32(imm),
4234                            tcg_constant_i32(a->uimm4));
4235     } else {
4236         tcg_gen_addi_i64(tcg_rd, tcg_rn, imm);
4237         gen_address_with_allocation_tag0(tcg_rd, tcg_rd);
4238     }
4239     return true;
4240 }
4241
4242 TRANS_FEAT(ADDG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, false)
4243 TRANS_FEAT(SUBG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, true)
4244
4245 /* The input should be a value in the bottom e bits (with higher
4246  * bits zero); returns that value replicated into every element
4247  * of size e in a 64 bit integer.
4248  */
4249 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
4250 {
4251     assert(e != 0);
4252     while (e < 64) {
4253         mask |= mask << e;
4254         e *= 2;
4255     }
4256     return mask;
4257 }
4258
4259 /*
4260  * Logical (immediate)
4261  */
4262
4263 /*
4264  * Simplified variant of pseudocode DecodeBitMasks() for the case where we
4265  * only require the wmask. Returns false if the imms/immr/immn are a reserved
4266  * value (ie should cause a guest UNDEF exception), and true if they are
4267  * valid, in which case the decoded bit pattern is written to result.
4268  */
4269 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
4270                             unsigned int imms, unsigned int immr)
4271 {
4272     uint64_t mask;
4273     unsigned e, levels, s, r;
4274     int len;
4275
4276     assert(immn < 2 && imms < 64 && immr < 64);
4277
4278     /* The bit patterns we create here are 64 bit patterns which
4279      * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
4280      * 64 bits each. Each element contains the same value: a run
4281      * of between 1 and e-1 non-zero bits, rotated within the
4282      * element by between 0 and e-1 bits.
4283      *
4284      * The element size and run length are encoded into immn (1 bit)
4285      * and imms (6 bits) as follows:
4286      * 64 bit elements: immn = 1, imms = <length of run - 1>
4287      * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
4288      * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
4289      *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
4290      *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
4291      *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
4292      * Notice that immn = 0, imms = 11111x is the only combination
4293      * not covered by one of the above options; this is reserved.
4294      * Further, <length of run - 1> all-ones is a reserved pattern.
4295      *
4296      * In all cases the rotation is by immr % e (and immr is 6 bits).
4297      */
4298
4299     /* First determine the element size */
4300     len = 31 - clz32((immn << 6) | (~imms & 0x3f));
4301     if (len < 1) {
4302         /* This is the immn == 0, imms == 0x11111x case */
4303         return false;
4304     }
4305     e = 1 << len;
4306
4307     levels = e - 1;
4308     s = imms & levels;
4309     r = immr & levels;
4310
4311     if (s == levels) {
4312         /* <length of run - 1> mustn't be all-ones. */
4313         return false;
4314     }
4315
4316     /* Create the value of one element: s+1 set bits rotated
4317      * by r within the element (which is e bits wide)...
4318      */
4319     mask = MAKE_64BIT_MASK(0, s + 1);
4320     if (r) {
4321         mask = (mask >> r) | (mask << (e - r));
4322         mask &= MAKE_64BIT_MASK(0, e);
4323     }
4324     /* ...then replicate the element over the whole 64 bit value */
4325     mask = bitfield_replicate(mask, e);
4326     *result = mask;
4327     return true;
4328 }
4329
4330 static bool gen_rri_log(DisasContext *s, arg_rri_log *a, bool set_cc,
4331                         void (*fn)(TCGv_i64, TCGv_i64, int64_t))
4332 {
4333     TCGv_i64 tcg_rd, tcg_rn;
4334     uint64_t imm;
4335
4336     /* Some immediate field values are reserved. */
4337     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
4338                                 extract32(a->dbm, 0, 6),
4339                                 extract32(a->dbm, 6, 6))) {
4340         return false;
4341     }
4342     if (!a->sf) {
4343         imm &= 0xffffffffull;
4344     }
4345
4346     tcg_rd = set_cc ? cpu_reg(s, a->rd) : cpu_reg_sp(s, a->rd);
4347     tcg_rn = cpu_reg(s, a->rn);
4348
4349     fn(tcg_rd, tcg_rn, imm);
4350     if (set_cc) {
4351         gen_logic_CC(a->sf, tcg_rd);
4352     }
4353     if (!a->sf) {
4354         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4355     }
4356     return true;
4357 }
4358
4359 TRANS(AND_i, gen_rri_log, a, false, tcg_gen_andi_i64)
4360 TRANS(ORR_i, gen_rri_log, a, false, tcg_gen_ori_i64)
4361 TRANS(EOR_i, gen_rri_log, a, false, tcg_gen_xori_i64)
4362 TRANS(ANDS_i, gen_rri_log, a, true, tcg_gen_andi_i64)
4363
4364 /*
4365  * Move wide (immediate)
4366  */
4367
4368 static bool trans_MOVZ(DisasContext *s, arg_movw *a)
4369 {
4370     int pos = a->hw << 4;
4371     tcg_gen_movi_i64(cpu_reg(s, a->rd), (uint64_t)a->imm << pos);
4372     return true;
4373 }
4374
4375 static bool trans_MOVN(DisasContext *s, arg_movw *a)
4376 {
4377     int pos = a->hw << 4;
4378     uint64_t imm = a->imm;
4379
4380     imm = ~(imm << pos);
4381     if (!a->sf) {
4382         imm = (uint32_t)imm;
4383     }
4384     tcg_gen_movi_i64(cpu_reg(s, a->rd), imm);
4385     return true;
4386 }
4387
4388 static bool trans_MOVK(DisasContext *s, arg_movw *a)
4389 {
4390     int pos = a->hw << 4;
4391     TCGv_i64 tcg_rd, tcg_im;
4392
4393     tcg_rd = cpu_reg(s, a->rd);
4394     tcg_im = tcg_constant_i64(a->imm);
4395     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_im, pos, 16);
4396     if (!a->sf) {
4397         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4398     }
4399     return true;
4400 }
4401
4402 /*
4403  * Bitfield
4404  */
4405
4406 static bool trans_SBFM(DisasContext *s, arg_SBFM *a)
4407 {
4408     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4409     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4410     unsigned int bitsize = a->sf ? 64 : 32;
4411     unsigned int ri = a->immr;
4412     unsigned int si = a->imms;
4413     unsigned int pos, len;
4414
4415     if (si >= ri) {
4416         /* Wd<s-r:0> = Wn<s:r> */
4417         len = (si - ri) + 1;
4418         tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
4419         if (!a->sf) {
4420             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4421         }
4422     } else {
4423         /* Wd<32+s-r,32-r> = Wn<s:0> */
4424         len = si + 1;
4425         pos = (bitsize - ri) & (bitsize - 1);
4426
4427         if (len < ri) {
4428             /*
4429              * Sign extend the destination field from len to fill the
4430              * balance of the word.  Let the deposit below insert all
4431              * of those sign bits.
4432              */
4433             tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
4434             len = ri;
4435         }
4436
4437         /*
4438          * We start with zero, and we haven't modified any bits outside
4439          * bitsize, therefore no final zero-extension is unneeded for !sf.
4440          */
4441         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4442     }
4443     return true;
4444 }
4445
4446 static bool trans_UBFM(DisasContext *s, arg_UBFM *a)
4447 {
4448     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4449     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4450     unsigned int bitsize = a->sf ? 64 : 32;
4451     unsigned int ri = a->immr;
4452     unsigned int si = a->imms;
4453     unsigned int pos, len;
4454
4455     tcg_rd = cpu_reg(s, a->rd);
4456     tcg_tmp = read_cpu_reg(s, a->rn, 1);
4457
4458     if (si >= ri) {
4459         /* Wd<s-r:0> = Wn<s:r> */
4460         len = (si - ri) + 1;
4461         tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
4462     } else {
4463         /* Wd<32+s-r,32-r> = Wn<s:0> */
4464         len = si + 1;
4465         pos = (bitsize - ri) & (bitsize - 1);
4466         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4467     }
4468     return true;
4469 }
4470
4471 static bool trans_BFM(DisasContext *s, arg_BFM *a)
4472 {
4473     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4474     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4475     unsigned int bitsize = a->sf ? 64 : 32;
4476     unsigned int ri = a->immr;
4477     unsigned int si = a->imms;
4478     unsigned int pos, len;
4479
4480     tcg_rd = cpu_reg(s, a->rd);
4481     tcg_tmp = read_cpu_reg(s, a->rn, 1);
4482
4483     if (si >= ri) {
4484         /* Wd<s-r:0> = Wn<s:r> */
4485         tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
4486         len = (si - ri) + 1;
4487         pos = 0;
4488     } else {
4489         /* Wd<32+s-r,32-r> = Wn<s:0> */
4490         len = si + 1;
4491         pos = (bitsize - ri) & (bitsize - 1);
4492     }
4493
4494     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
4495     if (!a->sf) {
4496         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4497     }
4498     return true;
4499 }
4500
4501 static bool trans_EXTR(DisasContext *s, arg_extract *a)
4502 {
4503     TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
4504
4505     tcg_rd = cpu_reg(s, a->rd);
4506
4507     if (unlikely(a->imm == 0)) {
4508         /*
4509          * tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
4510          * so an extract from bit 0 is a special case.
4511          */
4512         if (a->sf) {
4513             tcg_gen_mov_i64(tcg_rd, cpu_reg(s, a->rm));
4514         } else {
4515             tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, a->rm));
4516         }
4517     } else {
4518         tcg_rm = cpu_reg(s, a->rm);
4519         tcg_rn = cpu_reg(s, a->rn);
4520
4521         if (a->sf) {
4522             /* Specialization to ROR happens in EXTRACT2.  */
4523             tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, a->imm);
4524         } else {
4525             TCGv_i32 t0 = tcg_temp_new_i32();
4526
4527             tcg_gen_extrl_i64_i32(t0, tcg_rm);
4528             if (a->rm == a->rn) {
4529                 tcg_gen_rotri_i32(t0, t0, a->imm);
4530             } else {
4531                 TCGv_i32 t1 = tcg_temp_new_i32();
4532                 tcg_gen_extrl_i64_i32(t1, tcg_rn);
4533                 tcg_gen_extract2_i32(t0, t0, t1, a->imm);
4534             }
4535             tcg_gen_extu_i32_i64(tcg_rd, t0);
4536         }
4537     }
4538     return true;
4539 }
4540
4541 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
4542  * Note that it is the caller's responsibility to ensure that the
4543  * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
4544  * mandated semantics for out of range shifts.
4545  */
4546 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
4547                       enum a64_shift_type shift_type, TCGv_i64 shift_amount)
4548 {
4549     switch (shift_type) {
4550     case A64_SHIFT_TYPE_LSL:
4551         tcg_gen_shl_i64(dst, src, shift_amount);
4552         break;
4553     case A64_SHIFT_TYPE_LSR:
4554         tcg_gen_shr_i64(dst, src, shift_amount);
4555         break;
4556     case A64_SHIFT_TYPE_ASR:
4557         if (!sf) {
4558             tcg_gen_ext32s_i64(dst, src);
4559         }
4560         tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
4561         break;
4562     case A64_SHIFT_TYPE_ROR:
4563         if (sf) {
4564             tcg_gen_rotr_i64(dst, src, shift_amount);
4565         } else {
4566             TCGv_i32 t0, t1;
4567             t0 = tcg_temp_new_i32();
4568             t1 = tcg_temp_new_i32();
4569             tcg_gen_extrl_i64_i32(t0, src);
4570             tcg_gen_extrl_i64_i32(t1, shift_amount);
4571             tcg_gen_rotr_i32(t0, t0, t1);
4572             tcg_gen_extu_i32_i64(dst, t0);
4573         }
4574         break;
4575     default:
4576         assert(FALSE); /* all shift types should be handled */
4577         break;
4578     }
4579
4580     if (!sf) { /* zero extend final result */
4581         tcg_gen_ext32u_i64(dst, dst);
4582     }
4583 }
4584
4585 /* Shift a TCGv src by immediate, put result in dst.
4586  * The shift amount must be in range (this should always be true as the
4587  * relevant instructions will UNDEF on bad shift immediates).
4588  */
4589 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
4590                           enum a64_shift_type shift_type, unsigned int shift_i)
4591 {
4592     assert(shift_i < (sf ? 64 : 32));
4593
4594     if (shift_i == 0) {
4595         tcg_gen_mov_i64(dst, src);
4596     } else {
4597         shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i));
4598     }
4599 }
4600
4601 /* Logical (shifted register)
4602  *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
4603  * +----+-----+-----------+-------+---+------+--------+------+------+
4604  * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
4605  * +----+-----+-----------+-------+---+------+--------+------+------+
4606  */
4607 static void disas_logic_reg(DisasContext *s, uint32_t insn)
4608 {
4609     TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
4610     unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
4611
4612     sf = extract32(insn, 31, 1);
4613     opc = extract32(insn, 29, 2);
4614     shift_type = extract32(insn, 22, 2);
4615     invert = extract32(insn, 21, 1);
4616     rm = extract32(insn, 16, 5);
4617     shift_amount = extract32(insn, 10, 6);
4618     rn = extract32(insn, 5, 5);
4619     rd = extract32(insn, 0, 5);
4620
4621     if (!sf && (shift_amount & (1 << 5))) {
4622         unallocated_encoding(s);
4623         return;
4624     }
4625
4626     tcg_rd = cpu_reg(s, rd);
4627
4628     if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
4629         /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
4630          * register-register MOV and MVN, so it is worth special casing.
4631          */
4632         tcg_rm = cpu_reg(s, rm);
4633         if (invert) {
4634             tcg_gen_not_i64(tcg_rd, tcg_rm);
4635             if (!sf) {
4636                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4637             }
4638         } else {
4639             if (sf) {
4640                 tcg_gen_mov_i64(tcg_rd, tcg_rm);
4641             } else {
4642                 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
4643             }
4644         }
4645         return;
4646     }
4647
4648     tcg_rm = read_cpu_reg(s, rm, sf);
4649
4650     if (shift_amount) {
4651         shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
4652     }
4653
4654     tcg_rn = cpu_reg(s, rn);
4655
4656     switch (opc | (invert << 2)) {
4657     case 0: /* AND */
4658     case 3: /* ANDS */
4659         tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
4660         break;
4661     case 1: /* ORR */
4662         tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
4663         break;
4664     case 2: /* EOR */
4665         tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
4666         break;
4667     case 4: /* BIC */
4668     case 7: /* BICS */
4669         tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
4670         break;
4671     case 5: /* ORN */
4672         tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
4673         break;
4674     case 6: /* EON */
4675         tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
4676         break;
4677     default:
4678         assert(FALSE);
4679         break;
4680     }
4681
4682     if (!sf) {
4683         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4684     }
4685
4686     if (opc == 3) {
4687         gen_logic_CC(sf, tcg_rd);
4688     }
4689 }
4690
4691 /*
4692  * Add/subtract (extended register)
4693  *
4694  *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
4695  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
4696  * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
4697  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
4698  *
4699  *  sf: 0 -> 32bit, 1 -> 64bit
4700  *  op: 0 -> add  , 1 -> sub
4701  *   S: 1 -> set flags
4702  * opt: 00
4703  * option: extension type (see DecodeRegExtend)
4704  * imm3: optional shift to Rm
4705  *
4706  * Rd = Rn + LSL(extend(Rm), amount)
4707  */
4708 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
4709 {
4710     int rd = extract32(insn, 0, 5);
4711     int rn = extract32(insn, 5, 5);
4712     int imm3 = extract32(insn, 10, 3);
4713     int option = extract32(insn, 13, 3);
4714     int rm = extract32(insn, 16, 5);
4715     int opt = extract32(insn, 22, 2);
4716     bool setflags = extract32(insn, 29, 1);
4717     bool sub_op = extract32(insn, 30, 1);
4718     bool sf = extract32(insn, 31, 1);
4719
4720     TCGv_i64 tcg_rm, tcg_rn; /* temps */
4721     TCGv_i64 tcg_rd;
4722     TCGv_i64 tcg_result;
4723
4724     if (imm3 > 4 || opt != 0) {
4725         unallocated_encoding(s);
4726         return;
4727     }
4728
4729     /* non-flag setting ops may use SP */
4730     if (!setflags) {
4731         tcg_rd = cpu_reg_sp(s, rd);
4732     } else {
4733         tcg_rd = cpu_reg(s, rd);
4734     }
4735     tcg_rn = read_cpu_reg_sp(s, rn, sf);
4736
4737     tcg_rm = read_cpu_reg(s, rm, sf);
4738     ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
4739
4740     tcg_result = tcg_temp_new_i64();
4741
4742     if (!setflags) {
4743         if (sub_op) {
4744             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
4745         } else {
4746             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
4747         }
4748     } else {
4749         if (sub_op) {
4750             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
4751         } else {
4752             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
4753         }
4754     }
4755
4756     if (sf) {
4757         tcg_gen_mov_i64(tcg_rd, tcg_result);
4758     } else {
4759         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
4760     }
4761 }
4762
4763 /*
4764  * Add/subtract (shifted register)
4765  *
4766  *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
4767  * +--+--+--+-----------+-----+--+-------+---------+------+------+
4768  * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
4769  * +--+--+--+-----------+-----+--+-------+---------+------+------+
4770  *
4771  *    sf: 0 -> 32bit, 1 -> 64bit
4772  *    op: 0 -> add  , 1 -> sub
4773  *     S: 1 -> set flags
4774  * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
4775  *  imm6: Shift amount to apply to Rm before the add/sub
4776  */
4777 static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
4778 {
4779     int rd = extract32(insn, 0, 5);
4780     int rn = extract32(insn, 5, 5);
4781     int imm6 = extract32(insn, 10, 6);
4782     int rm = extract32(insn, 16, 5);
4783     int shift_type = extract32(insn, 22, 2);
4784     bool setflags = extract32(insn, 29, 1);
4785     bool sub_op = extract32(insn, 30, 1);
4786     bool sf = extract32(insn, 31, 1);
4787
4788     TCGv_i64 tcg_rd = cpu_reg(s, rd);
4789     TCGv_i64 tcg_rn, tcg_rm;
4790     TCGv_i64 tcg_result;
4791
4792     if ((shift_type == 3) || (!sf && (imm6 > 31))) {
4793         unallocated_encoding(s);
4794         return;
4795     }
4796
4797     tcg_rn = read_cpu_reg(s, rn, sf);
4798     tcg_rm = read_cpu_reg(s, rm, sf);
4799
4800     shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
4801
4802     tcg_result = tcg_temp_new_i64();
4803
4804     if (!setflags) {
4805         if (sub_op) {
4806             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
4807         } else {
4808             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
4809         }
4810     } else {
4811         if (sub_op) {
4812             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
4813         } else {
4814             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
4815         }
4816     }
4817
4818     if (sf) {
4819         tcg_gen_mov_i64(tcg_rd, tcg_result);
4820     } else {
4821         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
4822     }
4823 }
4824
4825 /* Data-processing (3 source)
4826  *
4827  *    31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
4828  *  +--+------+-----------+------+------+----+------+------+------+
4829  *  |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
4830  *  +--+------+-----------+------+------+----+------+------+------+
4831  */
4832 static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
4833 {
4834     int rd = extract32(insn, 0, 5);
4835     int rn = extract32(insn, 5, 5);
4836     int ra = extract32(insn, 10, 5);
4837     int rm = extract32(insn, 16, 5);
4838     int op_id = (extract32(insn, 29, 3) << 4) |
4839         (extract32(insn, 21, 3) << 1) |
4840         extract32(insn, 15, 1);
4841     bool sf = extract32(insn, 31, 1);
4842     bool is_sub = extract32(op_id, 0, 1);
4843     bool is_high = extract32(op_id, 2, 1);
4844     bool is_signed = false;
4845     TCGv_i64 tcg_op1;
4846     TCGv_i64 tcg_op2;
4847     TCGv_i64 tcg_tmp;
4848
4849     /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
4850     switch (op_id) {
4851     case 0x42: /* SMADDL */
4852     case 0x43: /* SMSUBL */
4853     case 0x44: /* SMULH */
4854         is_signed = true;
4855         break;
4856     case 0x0: /* MADD (32bit) */
4857     case 0x1: /* MSUB (32bit) */
4858     case 0x40: /* MADD (64bit) */
4859     case 0x41: /* MSUB (64bit) */
4860     case 0x4a: /* UMADDL */
4861     case 0x4b: /* UMSUBL */
4862     case 0x4c: /* UMULH */
4863         break;
4864     default:
4865         unallocated_encoding(s);
4866         return;
4867     }
4868
4869     if (is_high) {
4870         TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
4871         TCGv_i64 tcg_rd = cpu_reg(s, rd);
4872         TCGv_i64 tcg_rn = cpu_reg(s, rn);
4873         TCGv_i64 tcg_rm = cpu_reg(s, rm);
4874
4875         if (is_signed) {
4876             tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
4877         } else {
4878             tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
4879         }
4880         return;
4881     }
4882
4883     tcg_op1 = tcg_temp_new_i64();
4884     tcg_op2 = tcg_temp_new_i64();
4885     tcg_tmp = tcg_temp_new_i64();
4886
4887     if (op_id < 0x42) {
4888         tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
4889         tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
4890     } else {
4891         if (is_signed) {
4892             tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
4893             tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
4894         } else {
4895             tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
4896             tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
4897         }
4898     }
4899
4900     if (ra == 31 && !is_sub) {
4901         /* Special-case MADD with rA == XZR; it is the standard MUL alias */
4902         tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
4903     } else {
4904         tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
4905         if (is_sub) {
4906             tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
4907         } else {
4908             tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
4909         }
4910     }
4911
4912     if (!sf) {
4913         tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
4914     }
4915 }
4916
4917 /* Add/subtract (with carry)
4918  *  31 30 29 28 27 26 25 24 23 22 21  20  16  15       10  9    5 4   0
4919  * +--+--+--+------------------------+------+-------------+------+-----+
4920  * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | 0 0 0 0 0 0 |  Rn  |  Rd |
4921  * +--+--+--+------------------------+------+-------------+------+-----+
4922  */
4923
4924 static void disas_adc_sbc(DisasContext *s, uint32_t insn)
4925 {
4926     unsigned int sf, op, setflags, rm, rn, rd;
4927     TCGv_i64 tcg_y, tcg_rn, tcg_rd;
4928
4929     sf = extract32(insn, 31, 1);
4930     op = extract32(insn, 30, 1);
4931     setflags = extract32(insn, 29, 1);
4932     rm = extract32(insn, 16, 5);
4933     rn = extract32(insn, 5, 5);
4934     rd = extract32(insn, 0, 5);
4935
4936     tcg_rd = cpu_reg(s, rd);
4937     tcg_rn = cpu_reg(s, rn);
4938
4939     if (op) {
4940         tcg_y = tcg_temp_new_i64();
4941         tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
4942     } else {
4943         tcg_y = cpu_reg(s, rm);
4944     }
4945
4946     if (setflags) {
4947         gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
4948     } else {
4949         gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
4950     }
4951 }
4952
4953 /*
4954  * Rotate right into flags
4955  *  31 30 29                21       15          10      5  4      0
4956  * +--+--+--+-----------------+--------+-----------+------+--+------+
4957  * |sf|op| S| 1 1 0 1 0 0 0 0 |  imm6  | 0 0 0 0 1 |  Rn  |o2| mask |
4958  * +--+--+--+-----------------+--------+-----------+------+--+------+
4959  */
4960 static void disas_rotate_right_into_flags(DisasContext *s, uint32_t insn)
4961 {
4962     int mask = extract32(insn, 0, 4);
4963     int o2 = extract32(insn, 4, 1);
4964     int rn = extract32(insn, 5, 5);
4965     int imm6 = extract32(insn, 15, 6);
4966     int sf_op_s = extract32(insn, 29, 3);
4967     TCGv_i64 tcg_rn;
4968     TCGv_i32 nzcv;
4969
4970     if (sf_op_s != 5 || o2 != 0 || !dc_isar_feature(aa64_condm_4, s)) {
4971         unallocated_encoding(s);
4972         return;
4973     }
4974
4975     tcg_rn = read_cpu_reg(s, rn, 1);
4976     tcg_gen_rotri_i64(tcg_rn, tcg_rn, imm6);
4977
4978     nzcv = tcg_temp_new_i32();
4979     tcg_gen_extrl_i64_i32(nzcv, tcg_rn);
4980
4981     if (mask & 8) { /* N */
4982         tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3);
4983     }
4984     if (mask & 4) { /* Z */
4985         tcg_gen_not_i32(cpu_ZF, nzcv);
4986         tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4);
4987     }
4988     if (mask & 2) { /* C */
4989         tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1);
4990     }
4991     if (mask & 1) { /* V */
4992         tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0);
4993     }
4994 }
4995
4996 /*
4997  * Evaluate into flags
4998  *  31 30 29                21        15   14        10      5  4      0
4999  * +--+--+--+-----------------+---------+----+---------+------+--+------+
5000  * |sf|op| S| 1 1 0 1 0 0 0 0 | opcode2 | sz | 0 0 1 0 |  Rn  |o3| mask |
5001  * +--+--+--+-----------------+---------+----+---------+------+--+------+
5002  */
5003 static void disas_evaluate_into_flags(DisasContext *s, uint32_t insn)
5004 {
5005     int o3_mask = extract32(insn, 0, 5);
5006     int rn = extract32(insn, 5, 5);
5007     int o2 = extract32(insn, 15, 6);
5008     int sz = extract32(insn, 14, 1);
5009     int sf_op_s = extract32(insn, 29, 3);
5010     TCGv_i32 tmp;
5011     int shift;
5012
5013     if (sf_op_s != 1 || o2 != 0 || o3_mask != 0xd ||
5014         !dc_isar_feature(aa64_condm_4, s)) {
5015         unallocated_encoding(s);
5016         return;
5017     }
5018     shift = sz ? 16 : 24;  /* SETF16 or SETF8 */
5019
5020     tmp = tcg_temp_new_i32();
5021     tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn));
5022     tcg_gen_shli_i32(cpu_NF, tmp, shift);
5023     tcg_gen_shli_i32(cpu_VF, tmp, shift - 1);
5024     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
5025     tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF);
5026 }
5027
5028 /* Conditional compare (immediate / register)
5029  *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
5030  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
5031  * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
5032  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
5033  *        [1]                             y                [0]       [0]
5034  */
5035 static void disas_cc(DisasContext *s, uint32_t insn)
5036 {
5037     unsigned int sf, op, y, cond, rn, nzcv, is_imm;
5038     TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
5039     TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
5040     DisasCompare c;
5041
5042     if (!extract32(insn, 29, 1)) {
5043         unallocated_encoding(s);
5044         return;
5045     }
5046     if (insn & (1 << 10 | 1 << 4)) {
5047         unallocated_encoding(s);
5048         return;
5049     }
5050     sf = extract32(insn, 31, 1);
5051     op = extract32(insn, 30, 1);
5052     is_imm = extract32(insn, 11, 1);
5053     y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
5054     cond = extract32(insn, 12, 4);
5055     rn = extract32(insn, 5, 5);
5056     nzcv = extract32(insn, 0, 4);
5057
5058     /* Set T0 = !COND.  */
5059     tcg_t0 = tcg_temp_new_i32();
5060     arm_test_cc(&c, cond);
5061     tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
5062
5063     /* Load the arguments for the new comparison.  */
5064     if (is_imm) {
5065         tcg_y = tcg_temp_new_i64();
5066         tcg_gen_movi_i64(tcg_y, y);
5067     } else {
5068         tcg_y = cpu_reg(s, y);
5069     }
5070     tcg_rn = cpu_reg(s, rn);
5071
5072     /* Set the flags for the new comparison.  */
5073     tcg_tmp = tcg_temp_new_i64();
5074     if (op) {
5075         gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
5076     } else {
5077         gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
5078     }
5079
5080     /* If COND was false, force the flags to #nzcv.  Compute two masks
5081      * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
5082      * For tcg hosts that support ANDC, we can make do with just T1.
5083      * In either case, allow the tcg optimizer to delete any unused mask.
5084      */
5085     tcg_t1 = tcg_temp_new_i32();
5086     tcg_t2 = tcg_temp_new_i32();
5087     tcg_gen_neg_i32(tcg_t1, tcg_t0);
5088     tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
5089
5090     if (nzcv & 8) { /* N */
5091         tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
5092     } else {
5093         if (TCG_TARGET_HAS_andc_i32) {
5094             tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
5095         } else {
5096             tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
5097         }
5098     }
5099     if (nzcv & 4) { /* Z */
5100         if (TCG_TARGET_HAS_andc_i32) {
5101             tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
5102         } else {
5103             tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
5104         }
5105     } else {
5106         tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
5107     }
5108     if (nzcv & 2) { /* C */
5109         tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
5110     } else {
5111         if (TCG_TARGET_HAS_andc_i32) {
5112             tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
5113         } else {
5114             tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
5115         }
5116     }
5117     if (nzcv & 1) { /* V */
5118         tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
5119     } else {
5120         if (TCG_TARGET_HAS_andc_i32) {
5121             tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
5122         } else {
5123             tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
5124         }
5125     }
5126 }
5127
5128 /* Conditional select
5129  *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
5130  * +----+----+---+-----------------+------+------+-----+------+------+
5131  * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
5132  * +----+----+---+-----------------+------+------+-----+------+------+
5133  */
5134 static void disas_cond_select(DisasContext *s, uint32_t insn)
5135 {
5136     unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
5137     TCGv_i64 tcg_rd, zero;
5138     DisasCompare64 c;
5139
5140     if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
5141         /* S == 1 or op2<1> == 1 */
5142         unallocated_encoding(s);
5143         return;
5144     }
5145     sf = extract32(insn, 31, 1);
5146     else_inv = extract32(insn, 30, 1);
5147     rm = extract32(insn, 16, 5);
5148     cond = extract32(insn, 12, 4);
5149     else_inc = extract32(insn, 10, 1);
5150     rn = extract32(insn, 5, 5);
5151     rd = extract32(insn, 0, 5);
5152
5153     tcg_rd = cpu_reg(s, rd);
5154
5155     a64_test_cc(&c, cond);
5156     zero = tcg_constant_i64(0);
5157
5158     if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
5159         /* CSET & CSETM.  */
5160         if (else_inv) {
5161             tcg_gen_negsetcond_i64(tcg_invert_cond(c.cond),
5162                                    tcg_rd, c.value, zero);
5163         } else {
5164             tcg_gen_setcond_i64(tcg_invert_cond(c.cond),
5165                                 tcg_rd, c.value, zero);
5166         }
5167     } else {
5168         TCGv_i64 t_true = cpu_reg(s, rn);
5169         TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
5170         if (else_inv && else_inc) {
5171             tcg_gen_neg_i64(t_false, t_false);
5172         } else if (else_inv) {
5173             tcg_gen_not_i64(t_false, t_false);
5174         } else if (else_inc) {
5175             tcg_gen_addi_i64(t_false, t_false, 1);
5176         }
5177         tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
5178     }
5179
5180     if (!sf) {
5181         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5182     }
5183 }
5184
5185 static void handle_clz(DisasContext *s, unsigned int sf,
5186                        unsigned int rn, unsigned int rd)
5187 {
5188     TCGv_i64 tcg_rd, tcg_rn;
5189     tcg_rd = cpu_reg(s, rd);
5190     tcg_rn = cpu_reg(s, rn);
5191
5192     if (sf) {
5193         tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
5194     } else {
5195         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
5196         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
5197         tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32);
5198         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
5199     }
5200 }
5201
5202 static void handle_cls(DisasContext *s, unsigned int sf,
5203                        unsigned int rn, unsigned int rd)
5204 {
5205     TCGv_i64 tcg_rd, tcg_rn;
5206     tcg_rd = cpu_reg(s, rd);
5207     tcg_rn = cpu_reg(s, rn);
5208
5209     if (sf) {
5210         tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
5211     } else {
5212         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
5213         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
5214         tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32);
5215         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
5216     }
5217 }
5218
5219 static void handle_rbit(DisasContext *s, unsigned int sf,
5220                         unsigned int rn, unsigned int rd)
5221 {
5222     TCGv_i64 tcg_rd, tcg_rn;
5223     tcg_rd = cpu_reg(s, rd);
5224     tcg_rn = cpu_reg(s, rn);
5225
5226     if (sf) {
5227         gen_helper_rbit64(tcg_rd, tcg_rn);
5228     } else {
5229         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
5230         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
5231         gen_helper_rbit(tcg_tmp32, tcg_tmp32);
5232         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
5233     }
5234 }
5235
5236 /* REV with sf==1, opcode==3 ("REV64") */
5237 static void handle_rev64(DisasContext *s, unsigned int sf,
5238                          unsigned int rn, unsigned int rd)
5239 {
5240     if (!sf) {
5241         unallocated_encoding(s);
5242         return;
5243     }
5244     tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
5245 }
5246
5247 /* REV with sf==0, opcode==2
5248  * REV32 (sf==1, opcode==2)
5249  */
5250 static void handle_rev32(DisasContext *s, unsigned int sf,
5251                          unsigned int rn, unsigned int rd)
5252 {
5253     TCGv_i64 tcg_rd = cpu_reg(s, rd);
5254     TCGv_i64 tcg_rn = cpu_reg(s, rn);
5255
5256     if (sf) {
5257         tcg_gen_bswap64_i64(tcg_rd, tcg_rn);
5258         tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32);
5259     } else {
5260         tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ);
5261     }
5262 }
5263
5264 /* REV16 (opcode==1) */
5265 static void handle_rev16(DisasContext *s, unsigned int sf,
5266                          unsigned int rn, unsigned int rd)
5267 {
5268     TCGv_i64 tcg_rd = cpu_reg(s, rd);
5269     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
5270     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
5271     TCGv_i64 mask = tcg_constant_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff);
5272
5273     tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
5274     tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
5275     tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
5276     tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
5277     tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
5278 }
5279
5280 /* Data-processing (1 source)
5281  *   31  30  29  28             21 20     16 15    10 9    5 4    0
5282  * +----+---+---+-----------------+---------+--------+------+------+
5283  * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
5284  * +----+---+---+-----------------+---------+--------+------+------+
5285  */
5286 static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
5287 {
5288     unsigned int sf, opcode, opcode2, rn, rd;
5289     TCGv_i64 tcg_rd;
5290
5291     if (extract32(insn, 29, 1)) {
5292         unallocated_encoding(s);
5293         return;
5294     }
5295
5296     sf = extract32(insn, 31, 1);
5297     opcode = extract32(insn, 10, 6);
5298     opcode2 = extract32(insn, 16, 5);
5299     rn = extract32(insn, 5, 5);
5300     rd = extract32(insn, 0, 5);
5301
5302 #define MAP(SF, O2, O1) ((SF) | (O1 << 1) | (O2 << 7))
5303
5304     switch (MAP(sf, opcode2, opcode)) {
5305     case MAP(0, 0x00, 0x00): /* RBIT */
5306     case MAP(1, 0x00, 0x00):
5307         handle_rbit(s, sf, rn, rd);
5308         break;
5309     case MAP(0, 0x00, 0x01): /* REV16 */
5310     case MAP(1, 0x00, 0x01):
5311         handle_rev16(s, sf, rn, rd);
5312         break;
5313     case MAP(0, 0x00, 0x02): /* REV/REV32 */
5314     case MAP(1, 0x00, 0x02):
5315         handle_rev32(s, sf, rn, rd);
5316         break;
5317     case MAP(1, 0x00, 0x03): /* REV64 */
5318         handle_rev64(s, sf, rn, rd);
5319         break;
5320     case MAP(0, 0x00, 0x04): /* CLZ */
5321     case MAP(1, 0x00, 0x04):
5322         handle_clz(s, sf, rn, rd);
5323         break;
5324     case MAP(0, 0x00, 0x05): /* CLS */
5325     case MAP(1, 0x00, 0x05):
5326         handle_cls(s, sf, rn, rd);
5327         break;
5328     case MAP(1, 0x01, 0x00): /* PACIA */
5329         if (s->pauth_active) {
5330             tcg_rd = cpu_reg(s, rd);
5331             gen_helper_pacia(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
5332         } else if (!dc_isar_feature(aa64_pauth, s)) {
5333             goto do_unallocated;
5334         }
5335         break;
5336     case MAP(1, 0x01, 0x01): /* PACIB */
5337         if (s->pauth_active) {
5338             tcg_rd = cpu_reg(s, rd);
5339             gen_helper_pacib(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
5340         } else if (!dc_isar_feature(aa64_pauth, s)) {
5341             goto do_unallocated;
5342         }
5343         break;
5344     case MAP(1, 0x01, 0x02): /* PACDA */
5345         if (s->pauth_active) {
5346             tcg_rd = cpu_reg(s, rd);
5347             gen_helper_pacda(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
5348         } else if (!dc_isar_feature(aa64_pauth, s)) {
5349             goto do_unallocated;
5350         }
5351         break;
5352     case MAP(1, 0x01, 0x03): /* PACDB */
5353         if (s->pauth_active) {
5354             tcg_rd = cpu_reg(s, rd);
5355             gen_helper_pacdb(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
5356         } else if (!dc_isar_feature(aa64_pauth, s)) {
5357             goto do_unallocated;
5358         }
5359         break;
5360     case MAP(1, 0x01, 0x04): /* AUTIA */
5361         if (s->pauth_active) {
5362             tcg_rd = cpu_reg(s, rd);
5363             gen_helper_autia(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
5364         } else if (!dc_isar_feature(aa64_pauth, s)) {
5365             goto do_unallocated;
5366         }
5367         break;
5368     case MAP(1, 0x01, 0x05): /* AUTIB */
5369         if (s->pauth_active) {
5370             tcg_rd = cpu_reg(s, rd);
5371             gen_helper_autib(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
5372         } else if (!dc_isar_feature(aa64_pauth, s)) {
5373             goto do_unallocated;
5374         }
5375         break;
5376     case MAP(1, 0x01, 0x06): /* AUTDA */
5377         if (s->pauth_active) {
5378             tcg_rd = cpu_reg(s, rd);
5379             gen_helper_autda(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
5380         } else if (!dc_isar_feature(aa64_pauth, s)) {
5381             goto do_unallocated;
5382         }
5383         break;
5384     case MAP(1, 0x01, 0x07): /* AUTDB */
5385         if (s->pauth_active) {
5386             tcg_rd = cpu_reg(s, rd);
5387             gen_helper_autdb(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
5388         } else if (!dc_isar_feature(aa64_pauth, s)) {
5389             goto do_unallocated;
5390         }
5391         break;
5392     case MAP(1, 0x01, 0x08): /* PACIZA */
5393         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5394             goto do_unallocated;
5395         } else if (s->pauth_active) {
5396             tcg_rd = cpu_reg(s, rd);
5397             gen_helper_pacia(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
5398         }
5399         break;
5400     case MAP(1, 0x01, 0x09): /* PACIZB */
5401         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5402             goto do_unallocated;
5403         } else if (s->pauth_active) {
5404             tcg_rd = cpu_reg(s, rd);
5405             gen_helper_pacib(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
5406         }
5407         break;
5408     case MAP(1, 0x01, 0x0a): /* PACDZA */
5409         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5410             goto do_unallocated;
5411         } else if (s->pauth_active) {
5412             tcg_rd = cpu_reg(s, rd);
5413             gen_helper_pacda(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
5414         }
5415         break;
5416     case MAP(1, 0x01, 0x0b): /* PACDZB */
5417         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5418             goto do_unallocated;
5419         } else if (s->pauth_active) {
5420             tcg_rd = cpu_reg(s, rd);
5421             gen_helper_pacdb(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
5422         }
5423         break;
5424     case MAP(1, 0x01, 0x0c): /* AUTIZA */
5425         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5426             goto do_unallocated;
5427         } else if (s->pauth_active) {
5428             tcg_rd = cpu_reg(s, rd);
5429             gen_helper_autia(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
5430         }
5431         break;
5432     case MAP(1, 0x01, 0x0d): /* AUTIZB */
5433         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5434             goto do_unallocated;
5435         } else if (s->pauth_active) {
5436             tcg_rd = cpu_reg(s, rd);
5437             gen_helper_autib(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
5438         }
5439         break;
5440     case MAP(1, 0x01, 0x0e): /* AUTDZA */
5441         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5442             goto do_unallocated;
5443         } else if (s->pauth_active) {
5444             tcg_rd = cpu_reg(s, rd);
5445             gen_helper_autda(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
5446         }
5447         break;
5448     case MAP(1, 0x01, 0x0f): /* AUTDZB */
5449         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5450             goto do_unallocated;
5451         } else if (s->pauth_active) {
5452             tcg_rd = cpu_reg(s, rd);
5453             gen_helper_autdb(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
5454         }
5455         break;
5456     case MAP(1, 0x01, 0x10): /* XPACI */
5457         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5458             goto do_unallocated;
5459         } else if (s->pauth_active) {
5460             tcg_rd = cpu_reg(s, rd);
5461             gen_helper_xpaci(tcg_rd, tcg_env, tcg_rd);
5462         }
5463         break;
5464     case MAP(1, 0x01, 0x11): /* XPACD */
5465         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5466             goto do_unallocated;
5467         } else if (s->pauth_active) {
5468             tcg_rd = cpu_reg(s, rd);
5469             gen_helper_xpacd(tcg_rd, tcg_env, tcg_rd);
5470         }
5471         break;
5472     default:
5473     do_unallocated:
5474         unallocated_encoding(s);
5475         break;
5476     }
5477
5478 #undef MAP
5479 }
5480
5481 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
5482                        unsigned int rm, unsigned int rn, unsigned int rd)
5483 {
5484     TCGv_i64 tcg_n, tcg_m, tcg_rd;
5485     tcg_rd = cpu_reg(s, rd);
5486
5487     if (!sf && is_signed) {
5488         tcg_n = tcg_temp_new_i64();
5489         tcg_m = tcg_temp_new_i64();
5490         tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
5491         tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
5492     } else {
5493         tcg_n = read_cpu_reg(s, rn, sf);
5494         tcg_m = read_cpu_reg(s, rm, sf);
5495     }
5496
5497     if (is_signed) {
5498         gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
5499     } else {
5500         gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
5501     }
5502
5503     if (!sf) { /* zero extend final result */
5504         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5505     }
5506 }
5507
5508 /* LSLV, LSRV, ASRV, RORV */
5509 static void handle_shift_reg(DisasContext *s,
5510                              enum a64_shift_type shift_type, unsigned int sf,
5511                              unsigned int rm, unsigned int rn, unsigned int rd)
5512 {
5513     TCGv_i64 tcg_shift = tcg_temp_new_i64();
5514     TCGv_i64 tcg_rd = cpu_reg(s, rd);
5515     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
5516
5517     tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
5518     shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
5519 }
5520
5521 /* CRC32[BHWX], CRC32C[BHWX] */
5522 static void handle_crc32(DisasContext *s,
5523                          unsigned int sf, unsigned int sz, bool crc32c,
5524                          unsigned int rm, unsigned int rn, unsigned int rd)
5525 {
5526     TCGv_i64 tcg_acc, tcg_val;
5527     TCGv_i32 tcg_bytes;
5528
5529     if (!dc_isar_feature(aa64_crc32, s)
5530         || (sf == 1 && sz != 3)
5531         || (sf == 0 && sz == 3)) {
5532         unallocated_encoding(s);
5533         return;
5534     }
5535
5536     if (sz == 3) {
5537         tcg_val = cpu_reg(s, rm);
5538     } else {
5539         uint64_t mask;
5540         switch (sz) {
5541         case 0:
5542             mask = 0xFF;
5543             break;
5544         case 1:
5545             mask = 0xFFFF;
5546             break;
5547         case 2:
5548             mask = 0xFFFFFFFF;
5549             break;
5550         default:
5551             g_assert_not_reached();
5552         }
5553         tcg_val = tcg_temp_new_i64();
5554         tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
5555     }
5556
5557     tcg_acc = cpu_reg(s, rn);
5558     tcg_bytes = tcg_constant_i32(1 << sz);
5559
5560     if (crc32c) {
5561         gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
5562     } else {
5563         gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
5564     }
5565 }
5566
5567 /* Data-processing (2 source)
5568  *   31   30  29 28             21 20  16 15    10 9    5 4    0
5569  * +----+---+---+-----------------+------+--------+------+------+
5570  * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
5571  * +----+---+---+-----------------+------+--------+------+------+
5572  */
5573 static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
5574 {
5575     unsigned int sf, rm, opcode, rn, rd, setflag;
5576     sf = extract32(insn, 31, 1);
5577     setflag = extract32(insn, 29, 1);
5578     rm = extract32(insn, 16, 5);
5579     opcode = extract32(insn, 10, 6);
5580     rn = extract32(insn, 5, 5);
5581     rd = extract32(insn, 0, 5);
5582
5583     if (setflag && opcode != 0) {
5584         unallocated_encoding(s);
5585         return;
5586     }
5587
5588     switch (opcode) {
5589     case 0: /* SUBP(S) */
5590         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
5591             goto do_unallocated;
5592         } else {
5593             TCGv_i64 tcg_n, tcg_m, tcg_d;
5594
5595             tcg_n = read_cpu_reg_sp(s, rn, true);
5596             tcg_m = read_cpu_reg_sp(s, rm, true);
5597             tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56);
5598             tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56);
5599             tcg_d = cpu_reg(s, rd);
5600
5601             if (setflag) {
5602                 gen_sub_CC(true, tcg_d, tcg_n, tcg_m);
5603             } else {
5604                 tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m);
5605             }
5606         }
5607         break;
5608     case 2: /* UDIV */
5609         handle_div(s, false, sf, rm, rn, rd);
5610         break;
5611     case 3: /* SDIV */
5612         handle_div(s, true, sf, rm, rn, rd);
5613         break;
5614     case 4: /* IRG */
5615         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
5616             goto do_unallocated;
5617         }
5618         if (s->ata[0]) {
5619             gen_helper_irg(cpu_reg_sp(s, rd), tcg_env,
5620                            cpu_reg_sp(s, rn), cpu_reg(s, rm));
5621         } else {
5622             gen_address_with_allocation_tag0(cpu_reg_sp(s, rd),
5623                                              cpu_reg_sp(s, rn));
5624         }
5625         break;
5626     case 5: /* GMI */
5627         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
5628             goto do_unallocated;
5629         } else {
5630             TCGv_i64 t = tcg_temp_new_i64();
5631
5632             tcg_gen_extract_i64(t, cpu_reg_sp(s, rn), 56, 4);
5633             tcg_gen_shl_i64(t, tcg_constant_i64(1), t);
5634             tcg_gen_or_i64(cpu_reg(s, rd), cpu_reg(s, rm), t);
5635         }
5636         break;
5637     case 8: /* LSLV */
5638         handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
5639         break;
5640     case 9: /* LSRV */
5641         handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
5642         break;
5643     case 10: /* ASRV */
5644         handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
5645         break;
5646     case 11: /* RORV */
5647         handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
5648         break;
5649     case 12: /* PACGA */
5650         if (sf == 0 || !dc_isar_feature(aa64_pauth, s)) {
5651             goto do_unallocated;
5652         }
5653         gen_helper_pacga(cpu_reg(s, rd), tcg_env,
5654                          cpu_reg(s, rn), cpu_reg_sp(s, rm));
5655         break;
5656     case 16:
5657     case 17:
5658     case 18:
5659     case 19:
5660     case 20:
5661     case 21:
5662     case 22:
5663     case 23: /* CRC32 */
5664     {
5665         int sz = extract32(opcode, 0, 2);
5666         bool crc32c = extract32(opcode, 2, 1);
5667         handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
5668         break;
5669     }
5670     default:
5671     do_unallocated:
5672         unallocated_encoding(s);
5673         break;
5674     }
5675 }
5676
5677 /*
5678  * Data processing - register
5679  *  31  30 29  28      25    21  20  16      10         0
5680  * +--+---+--+---+-------+-----+-------+-------+---------+
5681  * |  |op0|  |op1| 1 0 1 | op2 |       |  op3  |         |
5682  * +--+---+--+---+-------+-----+-------+-------+---------+
5683  */
5684 static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
5685 {
5686     int op0 = extract32(insn, 30, 1);
5687     int op1 = extract32(insn, 28, 1);
5688     int op2 = extract32(insn, 21, 4);
5689     int op3 = extract32(insn, 10, 6);
5690
5691     if (!op1) {
5692         if (op2 & 8) {
5693             if (op2 & 1) {
5694                 /* Add/sub (extended register) */
5695                 disas_add_sub_ext_reg(s, insn);
5696             } else {
5697                 /* Add/sub (shifted register) */
5698                 disas_add_sub_reg(s, insn);
5699             }
5700         } else {
5701             /* Logical (shifted register) */
5702             disas_logic_reg(s, insn);
5703         }
5704         return;
5705     }
5706
5707     switch (op2) {
5708     case 0x0:
5709         switch (op3) {
5710         case 0x00: /* Add/subtract (with carry) */
5711             disas_adc_sbc(s, insn);
5712             break;
5713
5714         case 0x01: /* Rotate right into flags */
5715         case 0x21:
5716             disas_rotate_right_into_flags(s, insn);
5717             break;
5718
5719         case 0x02: /* Evaluate into flags */
5720         case 0x12:
5721         case 0x22:
5722         case 0x32:
5723             disas_evaluate_into_flags(s, insn);
5724             break;
5725
5726         default:
5727             goto do_unallocated;
5728         }
5729         break;
5730
5731     case 0x2: /* Conditional compare */
5732         disas_cc(s, insn); /* both imm and reg forms */
5733         break;
5734
5735     case 0x4: /* Conditional select */
5736         disas_cond_select(s, insn);
5737         break;
5738
5739     case 0x6: /* Data-processing */
5740         if (op0) {    /* (1 source) */
5741             disas_data_proc_1src(s, insn);
5742         } else {      /* (2 source) */
5743             disas_data_proc_2src(s, insn);
5744         }
5745         break;
5746     case 0x8 ... 0xf: /* (3 source) */
5747         disas_data_proc_3src(s, insn);
5748         break;
5749
5750     default:
5751     do_unallocated:
5752         unallocated_encoding(s);
5753         break;
5754     }
5755 }
5756
5757 static void handle_fp_compare(DisasContext *s, int size,
5758                               unsigned int rn, unsigned int rm,
5759                               bool cmp_with_zero, bool signal_all_nans)
5760 {
5761     TCGv_i64 tcg_flags = tcg_temp_new_i64();
5762     TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
5763
5764     if (size == MO_64) {
5765         TCGv_i64 tcg_vn, tcg_vm;
5766
5767         tcg_vn = read_fp_dreg(s, rn);
5768         if (cmp_with_zero) {
5769             tcg_vm = tcg_constant_i64(0);
5770         } else {
5771             tcg_vm = read_fp_dreg(s, rm);
5772         }
5773         if (signal_all_nans) {
5774             gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5775         } else {
5776             gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5777         }
5778     } else {
5779         TCGv_i32 tcg_vn = tcg_temp_new_i32();
5780         TCGv_i32 tcg_vm = tcg_temp_new_i32();
5781
5782         read_vec_element_i32(s, tcg_vn, rn, 0, size);
5783         if (cmp_with_zero) {
5784             tcg_gen_movi_i32(tcg_vm, 0);
5785         } else {
5786             read_vec_element_i32(s, tcg_vm, rm, 0, size);
5787         }
5788
5789         switch (size) {
5790         case MO_32:
5791             if (signal_all_nans) {
5792                 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5793             } else {
5794                 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5795             }
5796             break;
5797         case MO_16:
5798             if (signal_all_nans) {
5799                 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5800             } else {
5801                 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5802             }
5803             break;
5804         default:
5805             g_assert_not_reached();
5806         }
5807     }
5808
5809     gen_set_nzcv(tcg_flags);
5810 }
5811
5812 /* Floating point compare
5813  *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
5814  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
5815  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
5816  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
5817  */
5818 static void disas_fp_compare(DisasContext *s, uint32_t insn)
5819 {
5820     unsigned int mos, type, rm, op, rn, opc, op2r;
5821     int size;
5822
5823     mos = extract32(insn, 29, 3);
5824     type = extract32(insn, 22, 2);
5825     rm = extract32(insn, 16, 5);
5826     op = extract32(insn, 14, 2);
5827     rn = extract32(insn, 5, 5);
5828     opc = extract32(insn, 3, 2);
5829     op2r = extract32(insn, 0, 3);
5830
5831     if (mos || op || op2r) {
5832         unallocated_encoding(s);
5833         return;
5834     }
5835
5836     switch (type) {
5837     case 0:
5838         size = MO_32;
5839         break;
5840     case 1:
5841         size = MO_64;
5842         break;
5843     case 3:
5844         size = MO_16;
5845         if (dc_isar_feature(aa64_fp16, s)) {
5846             break;
5847         }
5848         /* fallthru */
5849     default:
5850         unallocated_encoding(s);
5851         return;
5852     }
5853
5854     if (!fp_access_check(s)) {
5855         return;
5856     }
5857
5858     handle_fp_compare(s, size, rn, rm, opc & 1, opc & 2);
5859 }
5860
5861 /* Floating point conditional compare
5862  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
5863  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
5864  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
5865  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
5866  */
5867 static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
5868 {
5869     unsigned int mos, type, rm, cond, rn, op, nzcv;
5870     TCGLabel *label_continue = NULL;
5871     int size;
5872
5873     mos = extract32(insn, 29, 3);
5874     type = extract32(insn, 22, 2);
5875     rm = extract32(insn, 16, 5);
5876     cond = extract32(insn, 12, 4);
5877     rn = extract32(insn, 5, 5);
5878     op = extract32(insn, 4, 1);
5879     nzcv = extract32(insn, 0, 4);
5880
5881     if (mos) {
5882         unallocated_encoding(s);
5883         return;
5884     }
5885
5886     switch (type) {
5887     case 0:
5888         size = MO_32;
5889         break;
5890     case 1:
5891         size = MO_64;
5892         break;
5893     case 3:
5894         size = MO_16;
5895         if (dc_isar_feature(aa64_fp16, s)) {
5896             break;
5897         }
5898         /* fallthru */
5899     default:
5900         unallocated_encoding(s);
5901         return;
5902     }
5903
5904     if (!fp_access_check(s)) {
5905         return;
5906     }
5907
5908     if (cond < 0x0e) { /* not always */
5909         TCGLabel *label_match = gen_new_label();
5910         label_continue = gen_new_label();
5911         arm_gen_test_cc(cond, label_match);
5912         /* nomatch: */
5913         gen_set_nzcv(tcg_constant_i64(nzcv << 28));
5914         tcg_gen_br(label_continue);
5915         gen_set_label(label_match);
5916     }
5917
5918     handle_fp_compare(s, size, rn, rm, false, op);
5919
5920     if (cond < 0x0e) {
5921         gen_set_label(label_continue);
5922     }
5923 }
5924
5925 /* Floating point conditional select
5926  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
5927  * +---+---+---+-----------+------+---+------+------+-----+------+------+
5928  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
5929  * +---+---+---+-----------+------+---+------+------+-----+------+------+
5930  */
5931 static void disas_fp_csel(DisasContext *s, uint32_t insn)
5932 {
5933     unsigned int mos, type, rm, cond, rn, rd;
5934     TCGv_i64 t_true, t_false;
5935     DisasCompare64 c;
5936     MemOp sz;
5937
5938     mos = extract32(insn, 29, 3);
5939     type = extract32(insn, 22, 2);
5940     rm = extract32(insn, 16, 5);
5941     cond = extract32(insn, 12, 4);
5942     rn = extract32(insn, 5, 5);
5943     rd = extract32(insn, 0, 5);
5944
5945     if (mos) {
5946         unallocated_encoding(s);
5947         return;
5948     }
5949
5950     switch (type) {
5951     case 0:
5952         sz = MO_32;
5953         break;
5954     case 1:
5955         sz = MO_64;
5956         break;
5957     case 3:
5958         sz = MO_16;
5959         if (dc_isar_feature(aa64_fp16, s)) {
5960             break;
5961         }
5962         /* fallthru */
5963     default:
5964         unallocated_encoding(s);
5965         return;
5966     }
5967
5968     if (!fp_access_check(s)) {
5969         return;
5970     }
5971
5972     /* Zero extend sreg & hreg inputs to 64 bits now.  */
5973     t_true = tcg_temp_new_i64();
5974     t_false = tcg_temp_new_i64();
5975     read_vec_element(s, t_true, rn, 0, sz);
5976     read_vec_element(s, t_false, rm, 0, sz);
5977
5978     a64_test_cc(&c, cond);
5979     tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0),
5980                         t_true, t_false);
5981
5982     /* Note that sregs & hregs write back zeros to the high bits,
5983        and we've already done the zero-extension.  */
5984     write_fp_dreg(s, rd, t_true);
5985 }
5986
5987 /* Floating-point data-processing (1 source) - half precision */
5988 static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
5989 {
5990     TCGv_ptr fpst = NULL;
5991     TCGv_i32 tcg_op = read_fp_hreg(s, rn);
5992     TCGv_i32 tcg_res = tcg_temp_new_i32();
5993
5994     switch (opcode) {
5995     case 0x0: /* FMOV */
5996         tcg_gen_mov_i32(tcg_res, tcg_op);
5997         break;
5998     case 0x1: /* FABS */
5999         tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
6000         break;
6001     case 0x2: /* FNEG */
6002         tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
6003         break;
6004     case 0x3: /* FSQRT */
6005         fpst = fpstatus_ptr(FPST_FPCR_F16);
6006         gen_helper_sqrt_f16(tcg_res, tcg_op, fpst);
6007         break;
6008     case 0x8: /* FRINTN */
6009     case 0x9: /* FRINTP */
6010     case 0xa: /* FRINTM */
6011     case 0xb: /* FRINTZ */
6012     case 0xc: /* FRINTA */
6013     {
6014         TCGv_i32 tcg_rmode;
6015
6016         fpst = fpstatus_ptr(FPST_FPCR_F16);
6017         tcg_rmode = gen_set_rmode(opcode & 7, fpst);
6018         gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
6019         gen_restore_rmode(tcg_rmode, fpst);
6020         break;
6021     }
6022     case 0xe: /* FRINTX */
6023         fpst = fpstatus_ptr(FPST_FPCR_F16);
6024         gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst);
6025         break;
6026     case 0xf: /* FRINTI */
6027         fpst = fpstatus_ptr(FPST_FPCR_F16);
6028         gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
6029         break;
6030     default:
6031         g_assert_not_reached();
6032     }
6033
6034     write_fp_sreg(s, rd, tcg_res);
6035 }
6036
6037 /* Floating-point data-processing (1 source) - single precision */
6038 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
6039 {
6040     void (*gen_fpst)(TCGv_i32, TCGv_i32, TCGv_ptr);
6041     TCGv_i32 tcg_op, tcg_res;
6042     TCGv_ptr fpst;
6043     int rmode = -1;
6044
6045     tcg_op = read_fp_sreg(s, rn);
6046     tcg_res = tcg_temp_new_i32();
6047
6048     switch (opcode) {
6049     case 0x0: /* FMOV */
6050         tcg_gen_mov_i32(tcg_res, tcg_op);
6051         goto done;
6052     case 0x1: /* FABS */
6053         gen_helper_vfp_abss(tcg_res, tcg_op);
6054         goto done;
6055     case 0x2: /* FNEG */
6056         gen_helper_vfp_negs(tcg_res, tcg_op);
6057         goto done;
6058     case 0x3: /* FSQRT */
6059         gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env);
6060         goto done;
6061     case 0x6: /* BFCVT */
6062         gen_fpst = gen_helper_bfcvt;
6063         break;
6064     case 0x8: /* FRINTN */
6065     case 0x9: /* FRINTP */
6066     case 0xa: /* FRINTM */
6067     case 0xb: /* FRINTZ */
6068     case 0xc: /* FRINTA */
6069         rmode = opcode & 7;
6070         gen_fpst = gen_helper_rints;
6071         break;
6072     case 0xe: /* FRINTX */
6073         gen_fpst = gen_helper_rints_exact;
6074         break;
6075     case 0xf: /* FRINTI */
6076         gen_fpst = gen_helper_rints;
6077         break;
6078     case 0x10: /* FRINT32Z */
6079         rmode = FPROUNDING_ZERO;
6080         gen_fpst = gen_helper_frint32_s;
6081         break;
6082     case 0x11: /* FRINT32X */
6083         gen_fpst = gen_helper_frint32_s;
6084         break;
6085     case 0x12: /* FRINT64Z */
6086         rmode = FPROUNDING_ZERO;
6087         gen_fpst = gen_helper_frint64_s;
6088         break;
6089     case 0x13: /* FRINT64X */
6090         gen_fpst = gen_helper_frint64_s;
6091         break;
6092     default:
6093         g_assert_not_reached();
6094     }
6095
6096     fpst = fpstatus_ptr(FPST_FPCR);
6097     if (rmode >= 0) {
6098         TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst);
6099         gen_fpst(tcg_res, tcg_op, fpst);
6100         gen_restore_rmode(tcg_rmode, fpst);
6101     } else {
6102         gen_fpst(tcg_res, tcg_op, fpst);
6103     }
6104
6105  done:
6106     write_fp_sreg(s, rd, tcg_res);
6107 }
6108
6109 /* Floating-point data-processing (1 source) - double precision */
6110 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
6111 {
6112     void (*gen_fpst)(TCGv_i64, TCGv_i64, TCGv_ptr);
6113     TCGv_i64 tcg_op, tcg_res;
6114     TCGv_ptr fpst;
6115     int rmode = -1;
6116
6117     switch (opcode) {
6118     case 0x0: /* FMOV */
6119         gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0);
6120         return;
6121     }
6122
6123     tcg_op = read_fp_dreg(s, rn);
6124     tcg_res = tcg_temp_new_i64();
6125
6126     switch (opcode) {
6127     case 0x1: /* FABS */
6128         gen_helper_vfp_absd(tcg_res, tcg_op);
6129         goto done;
6130     case 0x2: /* FNEG */
6131         gen_helper_vfp_negd(tcg_res, tcg_op);
6132         goto done;
6133     case 0x3: /* FSQRT */
6134         gen_helper_vfp_sqrtd(tcg_res, tcg_op, tcg_env);
6135         goto done;
6136     case 0x8: /* FRINTN */
6137     case 0x9: /* FRINTP */
6138     case 0xa: /* FRINTM */
6139     case 0xb: /* FRINTZ */
6140     case 0xc: /* FRINTA */
6141         rmode = opcode & 7;
6142         gen_fpst = gen_helper_rintd;
6143         break;
6144     case 0xe: /* FRINTX */
6145         gen_fpst = gen_helper_rintd_exact;
6146         break;
6147     case 0xf: /* FRINTI */
6148         gen_fpst = gen_helper_rintd;
6149         break;
6150     case 0x10: /* FRINT32Z */
6151         rmode = FPROUNDING_ZERO;
6152         gen_fpst = gen_helper_frint32_d;
6153         break;
6154     case 0x11: /* FRINT32X */
6155         gen_fpst = gen_helper_frint32_d;
6156         break;
6157     case 0x12: /* FRINT64Z */
6158         rmode = FPROUNDING_ZERO;
6159         gen_fpst = gen_helper_frint64_d;
6160         break;
6161     case 0x13: /* FRINT64X */
6162         gen_fpst = gen_helper_frint64_d;
6163         break;
6164     default:
6165         g_assert_not_reached();
6166     }
6167
6168     fpst = fpstatus_ptr(FPST_FPCR);
6169     if (rmode >= 0) {
6170         TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst);
6171         gen_fpst(tcg_res, tcg_op, fpst);
6172         gen_restore_rmode(tcg_rmode, fpst);
6173     } else {
6174         gen_fpst(tcg_res, tcg_op, fpst);
6175     }
6176
6177  done:
6178     write_fp_dreg(s, rd, tcg_res);
6179 }
6180
6181 static void handle_fp_fcvt(DisasContext *s, int opcode,
6182                            int rd, int rn, int dtype, int ntype)
6183 {
6184     switch (ntype) {
6185     case 0x0:
6186     {
6187         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
6188         if (dtype == 1) {
6189             /* Single to double */
6190             TCGv_i64 tcg_rd = tcg_temp_new_i64();
6191             gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, tcg_env);
6192             write_fp_dreg(s, rd, tcg_rd);
6193         } else {
6194             /* Single to half */
6195             TCGv_i32 tcg_rd = tcg_temp_new_i32();
6196             TCGv_i32 ahp = get_ahp_flag();
6197             TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6198
6199             gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, fpst, ahp);
6200             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
6201             write_fp_sreg(s, rd, tcg_rd);
6202         }
6203         break;
6204     }
6205     case 0x1:
6206     {
6207         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
6208         TCGv_i32 tcg_rd = tcg_temp_new_i32();
6209         if (dtype == 0) {
6210             /* Double to single */
6211             gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, tcg_env);
6212         } else {
6213             TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6214             TCGv_i32 ahp = get_ahp_flag();
6215             /* Double to half */
6216             gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
6217             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
6218         }
6219         write_fp_sreg(s, rd, tcg_rd);
6220         break;
6221     }
6222     case 0x3:
6223     {
6224         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
6225         TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR);
6226         TCGv_i32 tcg_ahp = get_ahp_flag();
6227         tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
6228         if (dtype == 0) {
6229             /* Half to single */
6230             TCGv_i32 tcg_rd = tcg_temp_new_i32();
6231             gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
6232             write_fp_sreg(s, rd, tcg_rd);
6233         } else {
6234             /* Half to double */
6235             TCGv_i64 tcg_rd = tcg_temp_new_i64();
6236             gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
6237             write_fp_dreg(s, rd, tcg_rd);
6238         }
6239         break;
6240     }
6241     default:
6242         g_assert_not_reached();
6243     }
6244 }
6245
6246 /* Floating point data-processing (1 source)
6247  *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
6248  * +---+---+---+-----------+------+---+--------+-----------+------+------+
6249  * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
6250  * +---+---+---+-----------+------+---+--------+-----------+------+------+
6251  */
6252 static void disas_fp_1src(DisasContext *s, uint32_t insn)
6253 {
6254     int mos = extract32(insn, 29, 3);
6255     int type = extract32(insn, 22, 2);
6256     int opcode = extract32(insn, 15, 6);
6257     int rn = extract32(insn, 5, 5);
6258     int rd = extract32(insn, 0, 5);
6259
6260     if (mos) {
6261         goto do_unallocated;
6262     }
6263
6264     switch (opcode) {
6265     case 0x4: case 0x5: case 0x7:
6266     {
6267         /* FCVT between half, single and double precision */
6268         int dtype = extract32(opcode, 0, 2);
6269         if (type == 2 || dtype == type) {
6270             goto do_unallocated;
6271         }
6272         if (!fp_access_check(s)) {
6273             return;
6274         }
6275
6276         handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
6277         break;
6278     }
6279
6280     case 0x10 ... 0x13: /* FRINT{32,64}{X,Z} */
6281         if (type > 1 || !dc_isar_feature(aa64_frint, s)) {
6282             goto do_unallocated;
6283         }
6284         /* fall through */
6285     case 0x0 ... 0x3:
6286     case 0x8 ... 0xc:
6287     case 0xe ... 0xf:
6288         /* 32-to-32 and 64-to-64 ops */
6289         switch (type) {
6290         case 0:
6291             if (!fp_access_check(s)) {
6292                 return;
6293             }
6294             handle_fp_1src_single(s, opcode, rd, rn);
6295             break;
6296         case 1:
6297             if (!fp_access_check(s)) {
6298                 return;
6299             }
6300             handle_fp_1src_double(s, opcode, rd, rn);
6301             break;
6302         case 3:
6303             if (!dc_isar_feature(aa64_fp16, s)) {
6304                 goto do_unallocated;
6305             }
6306
6307             if (!fp_access_check(s)) {
6308                 return;
6309             }
6310             handle_fp_1src_half(s, opcode, rd, rn);
6311             break;
6312         default:
6313             goto do_unallocated;
6314         }
6315         break;
6316
6317     case 0x6:
6318         switch (type) {
6319         case 1: /* BFCVT */
6320             if (!dc_isar_feature(aa64_bf16, s)) {
6321                 goto do_unallocated;
6322             }
6323             if (!fp_access_check(s)) {
6324                 return;
6325             }
6326             handle_fp_1src_single(s, opcode, rd, rn);
6327             break;
6328         default:
6329             goto do_unallocated;
6330         }
6331         break;
6332
6333     default:
6334     do_unallocated:
6335         unallocated_encoding(s);
6336         break;
6337     }
6338 }
6339
6340 /* Floating-point data-processing (2 source) - single precision */
6341 static void handle_fp_2src_single(DisasContext *s, int opcode,
6342                                   int rd, int rn, int rm)
6343 {
6344     TCGv_i32 tcg_op1;
6345     TCGv_i32 tcg_op2;
6346     TCGv_i32 tcg_res;
6347     TCGv_ptr fpst;
6348
6349     tcg_res = tcg_temp_new_i32();
6350     fpst = fpstatus_ptr(FPST_FPCR);
6351     tcg_op1 = read_fp_sreg(s, rn);
6352     tcg_op2 = read_fp_sreg(s, rm);
6353
6354     switch (opcode) {
6355     case 0x0: /* FMUL */
6356         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
6357         break;
6358     case 0x1: /* FDIV */
6359         gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
6360         break;
6361     case 0x2: /* FADD */
6362         gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
6363         break;
6364     case 0x3: /* FSUB */
6365         gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
6366         break;
6367     case 0x4: /* FMAX */
6368         gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
6369         break;
6370     case 0x5: /* FMIN */
6371         gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
6372         break;
6373     case 0x6: /* FMAXNM */
6374         gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
6375         break;
6376     case 0x7: /* FMINNM */
6377         gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
6378         break;
6379     case 0x8: /* FNMUL */
6380         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
6381         gen_helper_vfp_negs(tcg_res, tcg_res);
6382         break;
6383     }
6384
6385     write_fp_sreg(s, rd, tcg_res);
6386 }
6387
6388 /* Floating-point data-processing (2 source) - double precision */
6389 static void handle_fp_2src_double(DisasContext *s, int opcode,
6390                                   int rd, int rn, int rm)
6391 {
6392     TCGv_i64 tcg_op1;
6393     TCGv_i64 tcg_op2;
6394     TCGv_i64 tcg_res;
6395     TCGv_ptr fpst;
6396
6397     tcg_res = tcg_temp_new_i64();
6398     fpst = fpstatus_ptr(FPST_FPCR);
6399     tcg_op1 = read_fp_dreg(s, rn);
6400     tcg_op2 = read_fp_dreg(s, rm);
6401
6402     switch (opcode) {
6403     case 0x0: /* FMUL */
6404         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
6405         break;
6406     case 0x1: /* FDIV */
6407         gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
6408         break;
6409     case 0x2: /* FADD */
6410         gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
6411         break;
6412     case 0x3: /* FSUB */
6413         gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
6414         break;
6415     case 0x4: /* FMAX */
6416         gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
6417         break;
6418     case 0x5: /* FMIN */
6419         gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
6420         break;
6421     case 0x6: /* FMAXNM */
6422         gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6423         break;
6424     case 0x7: /* FMINNM */
6425         gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6426         break;
6427     case 0x8: /* FNMUL */
6428         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
6429         gen_helper_vfp_negd(tcg_res, tcg_res);
6430         break;
6431     }
6432
6433     write_fp_dreg(s, rd, tcg_res);
6434 }
6435
6436 /* Floating-point data-processing (2 source) - half precision */
6437 static void handle_fp_2src_half(DisasContext *s, int opcode,
6438                                 int rd, int rn, int rm)
6439 {
6440     TCGv_i32 tcg_op1;
6441     TCGv_i32 tcg_op2;
6442     TCGv_i32 tcg_res;
6443     TCGv_ptr fpst;
6444
6445     tcg_res = tcg_temp_new_i32();
6446     fpst = fpstatus_ptr(FPST_FPCR_F16);
6447     tcg_op1 = read_fp_hreg(s, rn);
6448     tcg_op2 = read_fp_hreg(s, rm);
6449
6450     switch (opcode) {
6451     case 0x0: /* FMUL */
6452         gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
6453         break;
6454     case 0x1: /* FDIV */
6455         gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
6456         break;
6457     case 0x2: /* FADD */
6458         gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
6459         break;
6460     case 0x3: /* FSUB */
6461         gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
6462         break;
6463     case 0x4: /* FMAX */
6464         gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
6465         break;
6466     case 0x5: /* FMIN */
6467         gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
6468         break;
6469     case 0x6: /* FMAXNM */
6470         gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
6471         break;
6472     case 0x7: /* FMINNM */
6473         gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
6474         break;
6475     case 0x8: /* FNMUL */
6476         gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
6477         tcg_gen_xori_i32(tcg_res, tcg_res, 0x8000);
6478         break;
6479     default:
6480         g_assert_not_reached();
6481     }
6482
6483     write_fp_sreg(s, rd, tcg_res);
6484 }
6485
6486 /* Floating point data-processing (2 source)
6487  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
6488  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
6489  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | opcode | 1 0 |  Rn  |  Rd  |
6490  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
6491  */
6492 static void disas_fp_2src(DisasContext *s, uint32_t insn)
6493 {
6494     int mos = extract32(insn, 29, 3);
6495     int type = extract32(insn, 22, 2);
6496     int rd = extract32(insn, 0, 5);
6497     int rn = extract32(insn, 5, 5);
6498     int rm = extract32(insn, 16, 5);
6499     int opcode = extract32(insn, 12, 4);
6500
6501     if (opcode > 8 || mos) {
6502         unallocated_encoding(s);
6503         return;
6504     }
6505
6506     switch (type) {
6507     case 0:
6508         if (!fp_access_check(s)) {
6509             return;
6510         }
6511         handle_fp_2src_single(s, opcode, rd, rn, rm);
6512         break;
6513     case 1:
6514         if (!fp_access_check(s)) {
6515             return;
6516         }
6517         handle_fp_2src_double(s, opcode, rd, rn, rm);
6518         break;
6519     case 3:
6520         if (!dc_isar_feature(aa64_fp16, s)) {
6521             unallocated_encoding(s);
6522             return;
6523         }
6524         if (!fp_access_check(s)) {
6525             return;
6526         }
6527         handle_fp_2src_half(s, opcode, rd, rn, rm);
6528         break;
6529     default:
6530         unallocated_encoding(s);
6531     }
6532 }
6533
6534 /* Floating-point data-processing (3 source) - single precision */
6535 static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
6536                                   int rd, int rn, int rm, int ra)
6537 {
6538     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
6539     TCGv_i32 tcg_res = tcg_temp_new_i32();
6540     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6541
6542     tcg_op1 = read_fp_sreg(s, rn);
6543     tcg_op2 = read_fp_sreg(s, rm);
6544     tcg_op3 = read_fp_sreg(s, ra);
6545
6546     /* These are fused multiply-add, and must be done as one
6547      * floating point operation with no rounding between the
6548      * multiplication and addition steps.
6549      * NB that doing the negations here as separate steps is
6550      * correct : an input NaN should come out with its sign bit
6551      * flipped if it is a negated-input.
6552      */
6553     if (o1 == true) {
6554         gen_helper_vfp_negs(tcg_op3, tcg_op3);
6555     }
6556
6557     if (o0 != o1) {
6558         gen_helper_vfp_negs(tcg_op1, tcg_op1);
6559     }
6560
6561     gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6562
6563     write_fp_sreg(s, rd, tcg_res);
6564 }
6565
6566 /* Floating-point data-processing (3 source) - double precision */
6567 static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
6568                                   int rd, int rn, int rm, int ra)
6569 {
6570     TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
6571     TCGv_i64 tcg_res = tcg_temp_new_i64();
6572     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6573
6574     tcg_op1 = read_fp_dreg(s, rn);
6575     tcg_op2 = read_fp_dreg(s, rm);
6576     tcg_op3 = read_fp_dreg(s, ra);
6577
6578     /* These are fused multiply-add, and must be done as one
6579      * floating point operation with no rounding between the
6580      * multiplication and addition steps.
6581      * NB that doing the negations here as separate steps is
6582      * correct : an input NaN should come out with its sign bit
6583      * flipped if it is a negated-input.
6584      */
6585     if (o1 == true) {
6586         gen_helper_vfp_negd(tcg_op3, tcg_op3);
6587     }
6588
6589     if (o0 != o1) {
6590         gen_helper_vfp_negd(tcg_op1, tcg_op1);
6591     }
6592
6593     gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6594
6595     write_fp_dreg(s, rd, tcg_res);
6596 }
6597
6598 /* Floating-point data-processing (3 source) - half precision */
6599 static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1,
6600                                 int rd, int rn, int rm, int ra)
6601 {
6602     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
6603     TCGv_i32 tcg_res = tcg_temp_new_i32();
6604     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_F16);
6605
6606     tcg_op1 = read_fp_hreg(s, rn);
6607     tcg_op2 = read_fp_hreg(s, rm);
6608     tcg_op3 = read_fp_hreg(s, ra);
6609
6610     /* These are fused multiply-add, and must be done as one
6611      * floating point operation with no rounding between the
6612      * multiplication and addition steps.
6613      * NB that doing the negations here as separate steps is
6614      * correct : an input NaN should come out with its sign bit
6615      * flipped if it is a negated-input.
6616      */
6617     if (o1 == true) {
6618         tcg_gen_xori_i32(tcg_op3, tcg_op3, 0x8000);
6619     }
6620
6621     if (o0 != o1) {
6622         tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
6623     }
6624
6625     gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6626
6627     write_fp_sreg(s, rd, tcg_res);
6628 }
6629
6630 /* Floating point data-processing (3 source)
6631  *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
6632  * +---+---+---+-----------+------+----+------+----+------+------+------+
6633  * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
6634  * +---+---+---+-----------+------+----+------+----+------+------+------+
6635  */
6636 static void disas_fp_3src(DisasContext *s, uint32_t insn)
6637 {
6638     int mos = extract32(insn, 29, 3);
6639     int type = extract32(insn, 22, 2);
6640     int rd = extract32(insn, 0, 5);
6641     int rn = extract32(insn, 5, 5);
6642     int ra = extract32(insn, 10, 5);
6643     int rm = extract32(insn, 16, 5);
6644     bool o0 = extract32(insn, 15, 1);
6645     bool o1 = extract32(insn, 21, 1);
6646
6647     if (mos) {
6648         unallocated_encoding(s);
6649         return;
6650     }
6651
6652     switch (type) {
6653     case 0:
6654         if (!fp_access_check(s)) {
6655             return;
6656         }
6657         handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
6658         break;
6659     case 1:
6660         if (!fp_access_check(s)) {
6661             return;
6662         }
6663         handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
6664         break;
6665     case 3:
6666         if (!dc_isar_feature(aa64_fp16, s)) {
6667             unallocated_encoding(s);
6668             return;
6669         }
6670         if (!fp_access_check(s)) {
6671             return;
6672         }
6673         handle_fp_3src_half(s, o0, o1, rd, rn, rm, ra);
6674         break;
6675     default:
6676         unallocated_encoding(s);
6677     }
6678 }
6679
6680 /* Floating point immediate
6681  *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
6682  * +---+---+---+-----------+------+---+------------+-------+------+------+
6683  * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
6684  * +---+---+---+-----------+------+---+------------+-------+------+------+
6685  */
6686 static void disas_fp_imm(DisasContext *s, uint32_t insn)
6687 {
6688     int rd = extract32(insn, 0, 5);
6689     int imm5 = extract32(insn, 5, 5);
6690     int imm8 = extract32(insn, 13, 8);
6691     int type = extract32(insn, 22, 2);
6692     int mos = extract32(insn, 29, 3);
6693     uint64_t imm;
6694     MemOp sz;
6695
6696     if (mos || imm5) {
6697         unallocated_encoding(s);
6698         return;
6699     }
6700
6701     switch (type) {
6702     case 0:
6703         sz = MO_32;
6704         break;
6705     case 1:
6706         sz = MO_64;
6707         break;
6708     case 3:
6709         sz = MO_16;
6710         if (dc_isar_feature(aa64_fp16, s)) {
6711             break;
6712         }
6713         /* fallthru */
6714     default:
6715         unallocated_encoding(s);
6716         return;
6717     }
6718
6719     if (!fp_access_check(s)) {
6720         return;
6721     }
6722
6723     imm = vfp_expand_imm(sz, imm8);
6724     write_fp_dreg(s, rd, tcg_constant_i64(imm));
6725 }
6726
6727 /* Handle floating point <=> fixed point conversions. Note that we can
6728  * also deal with fp <=> integer conversions as a special case (scale == 64)
6729  * OPTME: consider handling that special case specially or at least skipping
6730  * the call to scalbn in the helpers for zero shifts.
6731  */
6732 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
6733                            bool itof, int rmode, int scale, int sf, int type)
6734 {
6735     bool is_signed = !(opcode & 1);
6736     TCGv_ptr tcg_fpstatus;
6737     TCGv_i32 tcg_shift, tcg_single;
6738     TCGv_i64 tcg_double;
6739
6740     tcg_fpstatus = fpstatus_ptr(type == 3 ? FPST_FPCR_F16 : FPST_FPCR);
6741
6742     tcg_shift = tcg_constant_i32(64 - scale);
6743
6744     if (itof) {
6745         TCGv_i64 tcg_int = cpu_reg(s, rn);
6746         if (!sf) {
6747             TCGv_i64 tcg_extend = tcg_temp_new_i64();
6748
6749             if (is_signed) {
6750                 tcg_gen_ext32s_i64(tcg_extend, tcg_int);
6751             } else {
6752                 tcg_gen_ext32u_i64(tcg_extend, tcg_int);
6753             }
6754
6755             tcg_int = tcg_extend;
6756         }
6757
6758         switch (type) {
6759         case 1: /* float64 */
6760             tcg_double = tcg_temp_new_i64();
6761             if (is_signed) {
6762                 gen_helper_vfp_sqtod(tcg_double, tcg_int,
6763                                      tcg_shift, tcg_fpstatus);
6764             } else {
6765                 gen_helper_vfp_uqtod(tcg_double, tcg_int,
6766                                      tcg_shift, tcg_fpstatus);
6767             }
6768             write_fp_dreg(s, rd, tcg_double);
6769             break;
6770
6771         case 0: /* float32 */
6772             tcg_single = tcg_temp_new_i32();
6773             if (is_signed) {
6774                 gen_helper_vfp_sqtos(tcg_single, tcg_int,
6775                                      tcg_shift, tcg_fpstatus);
6776             } else {
6777                 gen_helper_vfp_uqtos(tcg_single, tcg_int,
6778                                      tcg_shift, tcg_fpstatus);
6779             }
6780             write_fp_sreg(s, rd, tcg_single);
6781             break;
6782
6783         case 3: /* float16 */
6784             tcg_single = tcg_temp_new_i32();
6785             if (is_signed) {
6786                 gen_helper_vfp_sqtoh(tcg_single, tcg_int,
6787                                      tcg_shift, tcg_fpstatus);
6788             } else {
6789                 gen_helper_vfp_uqtoh(tcg_single, tcg_int,
6790                                      tcg_shift, tcg_fpstatus);
6791             }
6792             write_fp_sreg(s, rd, tcg_single);
6793             break;
6794
6795         default:
6796             g_assert_not_reached();
6797         }
6798     } else {
6799         TCGv_i64 tcg_int = cpu_reg(s, rd);
6800         TCGv_i32 tcg_rmode;
6801
6802         if (extract32(opcode, 2, 1)) {
6803             /* There are too many rounding modes to all fit into rmode,
6804              * so FCVTA[US] is a special case.
6805              */
6806             rmode = FPROUNDING_TIEAWAY;
6807         }
6808
6809         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
6810
6811         switch (type) {
6812         case 1: /* float64 */
6813             tcg_double = read_fp_dreg(s, rn);
6814             if (is_signed) {
6815                 if (!sf) {
6816                     gen_helper_vfp_tosld(tcg_int, tcg_double,
6817                                          tcg_shift, tcg_fpstatus);
6818                 } else {
6819                     gen_helper_vfp_tosqd(tcg_int, tcg_double,
6820                                          tcg_shift, tcg_fpstatus);
6821                 }
6822             } else {
6823                 if (!sf) {
6824                     gen_helper_vfp_tould(tcg_int, tcg_double,
6825                                          tcg_shift, tcg_fpstatus);
6826                 } else {
6827                     gen_helper_vfp_touqd(tcg_int, tcg_double,
6828                                          tcg_shift, tcg_fpstatus);
6829                 }
6830             }
6831             if (!sf) {
6832                 tcg_gen_ext32u_i64(tcg_int, tcg_int);
6833             }
6834             break;
6835
6836         case 0: /* float32 */
6837             tcg_single = read_fp_sreg(s, rn);
6838             if (sf) {
6839                 if (is_signed) {
6840                     gen_helper_vfp_tosqs(tcg_int, tcg_single,
6841                                          tcg_shift, tcg_fpstatus);
6842                 } else {
6843                     gen_helper_vfp_touqs(tcg_int, tcg_single,
6844                                          tcg_shift, tcg_fpstatus);
6845                 }
6846             } else {
6847                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
6848                 if (is_signed) {
6849                     gen_helper_vfp_tosls(tcg_dest, tcg_single,
6850                                          tcg_shift, tcg_fpstatus);
6851                 } else {
6852                     gen_helper_vfp_touls(tcg_dest, tcg_single,
6853                                          tcg_shift, tcg_fpstatus);
6854                 }
6855                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
6856             }
6857             break;
6858
6859         case 3: /* float16 */
6860             tcg_single = read_fp_sreg(s, rn);
6861             if (sf) {
6862                 if (is_signed) {
6863                     gen_helper_vfp_tosqh(tcg_int, tcg_single,
6864                                          tcg_shift, tcg_fpstatus);
6865                 } else {
6866                     gen_helper_vfp_touqh(tcg_int, tcg_single,
6867                                          tcg_shift, tcg_fpstatus);
6868                 }
6869             } else {
6870                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
6871                 if (is_signed) {
6872                     gen_helper_vfp_toslh(tcg_dest, tcg_single,
6873                                          tcg_shift, tcg_fpstatus);
6874                 } else {
6875                     gen_helper_vfp_toulh(tcg_dest, tcg_single,
6876                                          tcg_shift, tcg_fpstatus);
6877                 }
6878                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
6879             }
6880             break;
6881
6882         default:
6883             g_assert_not_reached();
6884         }
6885
6886         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
6887     }
6888 }
6889
6890 /* Floating point <-> fixed point conversions
6891  *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
6892  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
6893  * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
6894  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
6895  */
6896 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
6897 {
6898     int rd = extract32(insn, 0, 5);
6899     int rn = extract32(insn, 5, 5);
6900     int scale = extract32(insn, 10, 6);
6901     int opcode = extract32(insn, 16, 3);
6902     int rmode = extract32(insn, 19, 2);
6903     int type = extract32(insn, 22, 2);
6904     bool sbit = extract32(insn, 29, 1);
6905     bool sf = extract32(insn, 31, 1);
6906     bool itof;
6907
6908     if (sbit || (!sf && scale < 32)) {
6909         unallocated_encoding(s);
6910         return;
6911     }
6912
6913     switch (type) {
6914     case 0: /* float32 */
6915     case 1: /* float64 */
6916         break;
6917     case 3: /* float16 */
6918         if (dc_isar_feature(aa64_fp16, s)) {
6919             break;
6920         }
6921         /* fallthru */
6922     default:
6923         unallocated_encoding(s);
6924         return;
6925     }
6926
6927     switch ((rmode << 3) | opcode) {
6928     case 0x2: /* SCVTF */
6929     case 0x3: /* UCVTF */
6930         itof = true;
6931         break;
6932     case 0x18: /* FCVTZS */
6933     case 0x19: /* FCVTZU */
6934         itof = false;
6935         break;
6936     default:
6937         unallocated_encoding(s);
6938         return;
6939     }
6940
6941     if (!fp_access_check(s)) {
6942         return;
6943     }
6944
6945     handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
6946 }
6947
6948 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
6949 {
6950     /* FMOV: gpr to or from float, double, or top half of quad fp reg,
6951      * without conversion.
6952      */
6953
6954     if (itof) {
6955         TCGv_i64 tcg_rn = cpu_reg(s, rn);
6956         TCGv_i64 tmp;
6957
6958         switch (type) {
6959         case 0:
6960             /* 32 bit */
6961             tmp = tcg_temp_new_i64();
6962             tcg_gen_ext32u_i64(tmp, tcg_rn);
6963             write_fp_dreg(s, rd, tmp);
6964             break;
6965         case 1:
6966             /* 64 bit */
6967             write_fp_dreg(s, rd, tcg_rn);
6968             break;
6969         case 2:
6970             /* 64 bit to top half. */
6971             tcg_gen_st_i64(tcg_rn, tcg_env, fp_reg_hi_offset(s, rd));
6972             clear_vec_high(s, true, rd);
6973             break;
6974         case 3:
6975             /* 16 bit */
6976             tmp = tcg_temp_new_i64();
6977             tcg_gen_ext16u_i64(tmp, tcg_rn);
6978             write_fp_dreg(s, rd, tmp);
6979             break;
6980         default:
6981             g_assert_not_reached();
6982         }
6983     } else {
6984         TCGv_i64 tcg_rd = cpu_reg(s, rd);
6985
6986         switch (type) {
6987         case 0:
6988             /* 32 bit */
6989             tcg_gen_ld32u_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_32));
6990             break;
6991         case 1:
6992             /* 64 bit */
6993             tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_64));
6994             break;
6995         case 2:
6996             /* 64 bits from top half */
6997             tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_hi_offset(s, rn));
6998             break;
6999         case 3:
7000             /* 16 bit */
7001             tcg_gen_ld16u_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_16));
7002             break;
7003         default:
7004             g_assert_not_reached();
7005         }
7006     }
7007 }
7008
7009 static void handle_fjcvtzs(DisasContext *s, int rd, int rn)
7010 {
7011     TCGv_i64 t = read_fp_dreg(s, rn);
7012     TCGv_ptr fpstatus = fpstatus_ptr(FPST_FPCR);
7013
7014     gen_helper_fjcvtzs(t, t, fpstatus);
7015
7016     tcg_gen_ext32u_i64(cpu_reg(s, rd), t);
7017     tcg_gen_extrh_i64_i32(cpu_ZF, t);
7018     tcg_gen_movi_i32(cpu_CF, 0);
7019     tcg_gen_movi_i32(cpu_NF, 0);
7020     tcg_gen_movi_i32(cpu_VF, 0);
7021 }
7022
7023 /* Floating point <-> integer conversions
7024  *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
7025  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
7026  * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
7027  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
7028  */
7029 static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
7030 {
7031     int rd = extract32(insn, 0, 5);
7032     int rn = extract32(insn, 5, 5);
7033     int opcode = extract32(insn, 16, 3);
7034     int rmode = extract32(insn, 19, 2);
7035     int type = extract32(insn, 22, 2);
7036     bool sbit = extract32(insn, 29, 1);
7037     bool sf = extract32(insn, 31, 1);
7038     bool itof = false;
7039
7040     if (sbit) {
7041         goto do_unallocated;
7042     }
7043
7044     switch (opcode) {
7045     case 2: /* SCVTF */
7046     case 3: /* UCVTF */
7047         itof = true;
7048         /* fallthru */
7049     case 4: /* FCVTAS */
7050     case 5: /* FCVTAU */
7051         if (rmode != 0) {
7052             goto do_unallocated;
7053         }
7054         /* fallthru */
7055     case 0: /* FCVT[NPMZ]S */
7056     case 1: /* FCVT[NPMZ]U */
7057         switch (type) {
7058         case 0: /* float32 */
7059         case 1: /* float64 */
7060             break;
7061         case 3: /* float16 */
7062             if (!dc_isar_feature(aa64_fp16, s)) {
7063                 goto do_unallocated;
7064             }
7065             break;
7066         default:
7067             goto do_unallocated;
7068         }
7069         if (!fp_access_check(s)) {
7070             return;
7071         }
7072         handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
7073         break;
7074
7075     default:
7076         switch (sf << 7 | type << 5 | rmode << 3 | opcode) {
7077         case 0b01100110: /* FMOV half <-> 32-bit int */
7078         case 0b01100111:
7079         case 0b11100110: /* FMOV half <-> 64-bit int */
7080         case 0b11100111:
7081             if (!dc_isar_feature(aa64_fp16, s)) {
7082                 goto do_unallocated;
7083             }
7084             /* fallthru */
7085         case 0b00000110: /* FMOV 32-bit */
7086         case 0b00000111:
7087         case 0b10100110: /* FMOV 64-bit */
7088         case 0b10100111:
7089         case 0b11001110: /* FMOV top half of 128-bit */
7090         case 0b11001111:
7091             if (!fp_access_check(s)) {
7092                 return;
7093             }
7094             itof = opcode & 1;
7095             handle_fmov(s, rd, rn, type, itof);
7096             break;
7097
7098         case 0b00111110: /* FJCVTZS */
7099             if (!dc_isar_feature(aa64_jscvt, s)) {
7100                 goto do_unallocated;
7101             } else if (fp_access_check(s)) {
7102                 handle_fjcvtzs(s, rd, rn);
7103             }
7104             break;
7105
7106         default:
7107         do_unallocated:
7108             unallocated_encoding(s);
7109             return;
7110         }
7111         break;
7112     }
7113 }
7114
7115 /* FP-specific subcases of table C3-6 (SIMD and FP data processing)
7116  *   31  30  29 28     25 24                          0
7117  * +---+---+---+---------+-----------------------------+
7118  * |   | 0 |   | 1 1 1 1 |                             |
7119  * +---+---+---+---------+-----------------------------+
7120  */
7121 static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
7122 {
7123     if (extract32(insn, 24, 1)) {
7124         /* Floating point data-processing (3 source) */
7125         disas_fp_3src(s, insn);
7126     } else if (extract32(insn, 21, 1) == 0) {
7127         /* Floating point to fixed point conversions */
7128         disas_fp_fixed_conv(s, insn);
7129     } else {
7130         switch (extract32(insn, 10, 2)) {
7131         case 1:
7132             /* Floating point conditional compare */
7133             disas_fp_ccomp(s, insn);
7134             break;
7135         case 2:
7136             /* Floating point data-processing (2 source) */
7137             disas_fp_2src(s, insn);
7138             break;
7139         case 3:
7140             /* Floating point conditional select */
7141             disas_fp_csel(s, insn);
7142             break;
7143         case 0:
7144             switch (ctz32(extract32(insn, 12, 4))) {
7145             case 0: /* [15:12] == xxx1 */
7146                 /* Floating point immediate */
7147                 disas_fp_imm(s, insn);
7148                 break;
7149             case 1: /* [15:12] == xx10 */
7150                 /* Floating point compare */
7151                 disas_fp_compare(s, insn);
7152                 break;
7153             case 2: /* [15:12] == x100 */
7154                 /* Floating point data-processing (1 source) */
7155                 disas_fp_1src(s, insn);
7156                 break;
7157             case 3: /* [15:12] == 1000 */
7158                 unallocated_encoding(s);
7159                 break;
7160             default: /* [15:12] == 0000 */
7161                 /* Floating point <-> integer conversions */
7162                 disas_fp_int_conv(s, insn);
7163                 break;
7164             }
7165             break;
7166         }
7167     }
7168 }
7169
7170 static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
7171                      int pos)
7172 {
7173     /* Extract 64 bits from the middle of two concatenated 64 bit
7174      * vector register slices left:right. The extracted bits start
7175      * at 'pos' bits into the right (least significant) side.
7176      * We return the result in tcg_right, and guarantee not to
7177      * trash tcg_left.
7178      */
7179     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
7180     assert(pos > 0 && pos < 64);
7181
7182     tcg_gen_shri_i64(tcg_right, tcg_right, pos);
7183     tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
7184     tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
7185 }
7186
7187 /* EXT
7188  *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
7189  * +---+---+-------------+-----+---+------+---+------+---+------+------+
7190  * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
7191  * +---+---+-------------+-----+---+------+---+------+---+------+------+
7192  */
7193 static void disas_simd_ext(DisasContext *s, uint32_t insn)
7194 {
7195     int is_q = extract32(insn, 30, 1);
7196     int op2 = extract32(insn, 22, 2);
7197     int imm4 = extract32(insn, 11, 4);
7198     int rm = extract32(insn, 16, 5);
7199     int rn = extract32(insn, 5, 5);
7200     int rd = extract32(insn, 0, 5);
7201     int pos = imm4 << 3;
7202     TCGv_i64 tcg_resl, tcg_resh;
7203
7204     if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
7205         unallocated_encoding(s);
7206         return;
7207     }
7208
7209     if (!fp_access_check(s)) {
7210         return;
7211     }
7212
7213     tcg_resh = tcg_temp_new_i64();
7214     tcg_resl = tcg_temp_new_i64();
7215
7216     /* Vd gets bits starting at pos bits into Vm:Vn. This is
7217      * either extracting 128 bits from a 128:128 concatenation, or
7218      * extracting 64 bits from a 64:64 concatenation.
7219      */
7220     if (!is_q) {
7221         read_vec_element(s, tcg_resl, rn, 0, MO_64);
7222         if (pos != 0) {
7223             read_vec_element(s, tcg_resh, rm, 0, MO_64);
7224             do_ext64(s, tcg_resh, tcg_resl, pos);
7225         }
7226     } else {
7227         TCGv_i64 tcg_hh;
7228         typedef struct {
7229             int reg;
7230             int elt;
7231         } EltPosns;
7232         EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
7233         EltPosns *elt = eltposns;
7234
7235         if (pos >= 64) {
7236             elt++;
7237             pos -= 64;
7238         }
7239
7240         read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
7241         elt++;
7242         read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
7243         elt++;
7244         if (pos != 0) {
7245             do_ext64(s, tcg_resh, tcg_resl, pos);
7246             tcg_hh = tcg_temp_new_i64();
7247             read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
7248             do_ext64(s, tcg_hh, tcg_resh, pos);
7249         }
7250     }
7251
7252     write_vec_element(s, tcg_resl, rd, 0, MO_64);
7253     if (is_q) {
7254         write_vec_element(s, tcg_resh, rd, 1, MO_64);
7255     }
7256     clear_vec_high(s, is_q, rd);
7257 }
7258
7259 /* TBL/TBX
7260  *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
7261  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
7262  * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
7263  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
7264  */
7265 static void disas_simd_tb(DisasContext *s, uint32_t insn)
7266 {
7267     int op2 = extract32(insn, 22, 2);
7268     int is_q = extract32(insn, 30, 1);
7269     int rm = extract32(insn, 16, 5);
7270     int rn = extract32(insn, 5, 5);
7271     int rd = extract32(insn, 0, 5);
7272     int is_tbx = extract32(insn, 12, 1);
7273     int len = (extract32(insn, 13, 2) + 1) * 16;
7274
7275     if (op2 != 0) {
7276         unallocated_encoding(s);
7277         return;
7278     }
7279
7280     if (!fp_access_check(s)) {
7281         return;
7282     }
7283
7284     tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
7285                        vec_full_reg_offset(s, rm), tcg_env,
7286                        is_q ? 16 : 8, vec_full_reg_size(s),
7287                        (len << 6) | (is_tbx << 5) | rn,
7288                        gen_helper_simd_tblx);
7289 }
7290
7291 /* ZIP/UZP/TRN
7292  *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
7293  * +---+---+-------------+------+---+------+---+------------------+------+
7294  * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
7295  * +---+---+-------------+------+---+------+---+------------------+------+
7296  */
7297 static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
7298 {
7299     int rd = extract32(insn, 0, 5);
7300     int rn = extract32(insn, 5, 5);
7301     int rm = extract32(insn, 16, 5);
7302     int size = extract32(insn, 22, 2);
7303     /* opc field bits [1:0] indicate ZIP/UZP/TRN;
7304      * bit 2 indicates 1 vs 2 variant of the insn.
7305      */
7306     int opcode = extract32(insn, 12, 2);
7307     bool part = extract32(insn, 14, 1);
7308     bool is_q = extract32(insn, 30, 1);
7309     int esize = 8 << size;
7310     int i;
7311     int datasize = is_q ? 128 : 64;
7312     int elements = datasize / esize;
7313     TCGv_i64 tcg_res[2], tcg_ele;
7314
7315     if (opcode == 0 || (size == 3 && !is_q)) {
7316         unallocated_encoding(s);
7317         return;
7318     }
7319
7320     if (!fp_access_check(s)) {
7321         return;
7322     }
7323
7324     tcg_res[0] = tcg_temp_new_i64();
7325     tcg_res[1] = is_q ? tcg_temp_new_i64() : NULL;
7326     tcg_ele = tcg_temp_new_i64();
7327
7328     for (i = 0; i < elements; i++) {
7329         int o, w;
7330
7331         switch (opcode) {
7332         case 1: /* UZP1/2 */
7333         {
7334             int midpoint = elements / 2;
7335             if (i < midpoint) {
7336                 read_vec_element(s, tcg_ele, rn, 2 * i + part, size);
7337             } else {
7338                 read_vec_element(s, tcg_ele, rm,
7339                                  2 * (i - midpoint) + part, size);
7340             }
7341             break;
7342         }
7343         case 2: /* TRN1/2 */
7344             if (i & 1) {
7345                 read_vec_element(s, tcg_ele, rm, (i & ~1) + part, size);
7346             } else {
7347                 read_vec_element(s, tcg_ele, rn, (i & ~1) + part, size);
7348             }
7349             break;
7350         case 3: /* ZIP1/2 */
7351         {
7352             int base = part * elements / 2;
7353             if (i & 1) {
7354                 read_vec_element(s, tcg_ele, rm, base + (i >> 1), size);
7355             } else {
7356                 read_vec_element(s, tcg_ele, rn, base + (i >> 1), size);
7357             }
7358             break;
7359         }
7360         default:
7361             g_assert_not_reached();
7362         }
7363
7364         w = (i * esize) / 64;
7365         o = (i * esize) % 64;
7366         if (o == 0) {
7367             tcg_gen_mov_i64(tcg_res[w], tcg_ele);
7368         } else {
7369             tcg_gen_shli_i64(tcg_ele, tcg_ele, o);
7370             tcg_gen_or_i64(tcg_res[w], tcg_res[w], tcg_ele);
7371         }
7372     }
7373
7374     for (i = 0; i <= is_q; ++i) {
7375         write_vec_element(s, tcg_res[i], rd, i, MO_64);
7376     }
7377     clear_vec_high(s, is_q, rd);
7378 }
7379
7380 /*
7381  * do_reduction_op helper
7382  *
7383  * This mirrors the Reduce() pseudocode in the ARM ARM. It is
7384  * important for correct NaN propagation that we do these
7385  * operations in exactly the order specified by the pseudocode.
7386  *
7387  * This is a recursive function, TCG temps should be freed by the
7388  * calling function once it is done with the values.
7389  */
7390 static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn,
7391                                 int esize, int size, int vmap, TCGv_ptr fpst)
7392 {
7393     if (esize == size) {
7394         int element;
7395         MemOp msize = esize == 16 ? MO_16 : MO_32;
7396         TCGv_i32 tcg_elem;
7397
7398         /* We should have one register left here */
7399         assert(ctpop8(vmap) == 1);
7400         element = ctz32(vmap);
7401         assert(element < 8);
7402
7403         tcg_elem = tcg_temp_new_i32();
7404         read_vec_element_i32(s, tcg_elem, rn, element, msize);
7405         return tcg_elem;
7406     } else {
7407         int bits = size / 2;
7408         int shift = ctpop8(vmap) / 2;
7409         int vmap_lo = (vmap >> shift) & vmap;
7410         int vmap_hi = (vmap & ~vmap_lo);
7411         TCGv_i32 tcg_hi, tcg_lo, tcg_res;
7412
7413         tcg_hi = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_hi, fpst);
7414         tcg_lo = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_lo, fpst);
7415         tcg_res = tcg_temp_new_i32();
7416
7417         switch (fpopcode) {
7418         case 0x0c: /* fmaxnmv half-precision */
7419             gen_helper_advsimd_maxnumh(tcg_res, tcg_lo, tcg_hi, fpst);
7420             break;
7421         case 0x0f: /* fmaxv half-precision */
7422             gen_helper_advsimd_maxh(tcg_res, tcg_lo, tcg_hi, fpst);
7423             break;
7424         case 0x1c: /* fminnmv half-precision */
7425             gen_helper_advsimd_minnumh(tcg_res, tcg_lo, tcg_hi, fpst);
7426             break;
7427         case 0x1f: /* fminv half-precision */
7428             gen_helper_advsimd_minh(tcg_res, tcg_lo, tcg_hi, fpst);
7429             break;
7430         case 0x2c: /* fmaxnmv */
7431             gen_helper_vfp_maxnums(tcg_res, tcg_lo, tcg_hi, fpst);
7432             break;
7433         case 0x2f: /* fmaxv */
7434             gen_helper_vfp_maxs(tcg_res, tcg_lo, tcg_hi, fpst);
7435             break;
7436         case 0x3c: /* fminnmv */
7437             gen_helper_vfp_minnums(tcg_res, tcg_lo, tcg_hi, fpst);
7438             break;
7439         case 0x3f: /* fminv */
7440             gen_helper_vfp_mins(tcg_res, tcg_lo, tcg_hi, fpst);
7441             break;
7442         default:
7443             g_assert_not_reached();
7444         }
7445         return tcg_res;
7446     }
7447 }
7448
7449 /* AdvSIMD across lanes
7450  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
7451  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
7452  * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
7453  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
7454  */
7455 static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
7456 {
7457     int rd = extract32(insn, 0, 5);
7458     int rn = extract32(insn, 5, 5);
7459     int size = extract32(insn, 22, 2);
7460     int opcode = extract32(insn, 12, 5);
7461     bool is_q = extract32(insn, 30, 1);
7462     bool is_u = extract32(insn, 29, 1);
7463     bool is_fp = false;
7464     bool is_min = false;
7465     int esize;
7466     int elements;
7467     int i;
7468     TCGv_i64 tcg_res, tcg_elt;
7469
7470     switch (opcode) {
7471     case 0x1b: /* ADDV */
7472         if (is_u) {
7473             unallocated_encoding(s);
7474             return;
7475         }
7476         /* fall through */
7477     case 0x3: /* SADDLV, UADDLV */
7478     case 0xa: /* SMAXV, UMAXV */
7479     case 0x1a: /* SMINV, UMINV */
7480         if (size == 3 || (size == 2 && !is_q)) {
7481             unallocated_encoding(s);
7482             return;
7483         }
7484         break;
7485     case 0xc: /* FMAXNMV, FMINNMV */
7486     case 0xf: /* FMAXV, FMINV */
7487         /* Bit 1 of size field encodes min vs max and the actual size
7488          * depends on the encoding of the U bit. If not set (and FP16
7489          * enabled) then we do half-precision float instead of single
7490          * precision.
7491          */
7492         is_min = extract32(size, 1, 1);
7493         is_fp = true;
7494         if (!is_u && dc_isar_feature(aa64_fp16, s)) {
7495             size = 1;
7496         } else if (!is_u || !is_q || extract32(size, 0, 1)) {
7497             unallocated_encoding(s);
7498             return;
7499         } else {
7500             size = 2;
7501         }
7502         break;
7503     default:
7504         unallocated_encoding(s);
7505         return;
7506     }
7507
7508     if (!fp_access_check(s)) {
7509         return;
7510     }
7511
7512     esize = 8 << size;
7513     elements = (is_q ? 128 : 64) / esize;
7514
7515     tcg_res = tcg_temp_new_i64();
7516     tcg_elt = tcg_temp_new_i64();
7517
7518     /* These instructions operate across all lanes of a vector
7519      * to produce a single result. We can guarantee that a 64
7520      * bit intermediate is sufficient:
7521      *  + for [US]ADDLV the maximum element size is 32 bits, and
7522      *    the result type is 64 bits
7523      *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the
7524      *    same as the element size, which is 32 bits at most
7525      * For the integer operations we can choose to work at 64
7526      * or 32 bits and truncate at the end; for simplicity
7527      * we use 64 bits always. The floating point
7528      * ops do require 32 bit intermediates, though.
7529      */
7530     if (!is_fp) {
7531         read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
7532
7533         for (i = 1; i < elements; i++) {
7534             read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
7535
7536             switch (opcode) {
7537             case 0x03: /* SADDLV / UADDLV */
7538             case 0x1b: /* ADDV */
7539                 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
7540                 break;
7541             case 0x0a: /* SMAXV / UMAXV */
7542                 if (is_u) {
7543                     tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt);
7544                 } else {
7545                     tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt);
7546                 }
7547                 break;
7548             case 0x1a: /* SMINV / UMINV */
7549                 if (is_u) {
7550                     tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt);
7551                 } else {
7552                     tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt);
7553                 }
7554                 break;
7555             default:
7556                 g_assert_not_reached();
7557             }
7558
7559         }
7560     } else {
7561         /* Floating point vector reduction ops which work across 32
7562          * bit (single) or 16 bit (half-precision) intermediates.
7563          * Note that correct NaN propagation requires that we do these
7564          * operations in exactly the order specified by the pseudocode.
7565          */
7566         TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
7567         int fpopcode = opcode | is_min << 4 | is_u << 5;
7568         int vmap = (1 << elements) - 1;
7569         TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, esize,
7570                                              (is_q ? 128 : 64), vmap, fpst);
7571         tcg_gen_extu_i32_i64(tcg_res, tcg_res32);
7572     }
7573
7574     /* Now truncate the result to the width required for the final output */
7575     if (opcode == 0x03) {
7576         /* SADDLV, UADDLV: result is 2*esize */
7577         size++;
7578     }
7579
7580     switch (size) {
7581     case 0:
7582         tcg_gen_ext8u_i64(tcg_res, tcg_res);
7583         break;
7584     case 1:
7585         tcg_gen_ext16u_i64(tcg_res, tcg_res);
7586         break;
7587     case 2:
7588         tcg_gen_ext32u_i64(tcg_res, tcg_res);
7589         break;
7590     case 3:
7591         break;
7592     default:
7593         g_assert_not_reached();
7594     }
7595
7596     write_fp_dreg(s, rd, tcg_res);
7597 }
7598
7599 /* DUP (Element, Vector)
7600  *
7601  *  31  30   29              21 20    16 15        10  9    5 4    0
7602  * +---+---+-------------------+--------+-------------+------+------+
7603  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
7604  * +---+---+-------------------+--------+-------------+------+------+
7605  *
7606  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7607  */
7608 static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
7609                              int imm5)
7610 {
7611     int size = ctz32(imm5);
7612     int index;
7613
7614     if (size > 3 || (size == 3 && !is_q)) {
7615         unallocated_encoding(s);
7616         return;
7617     }
7618
7619     if (!fp_access_check(s)) {
7620         return;
7621     }
7622
7623     index = imm5 >> (size + 1);
7624     tcg_gen_gvec_dup_mem(size, vec_full_reg_offset(s, rd),
7625                          vec_reg_offset(s, rn, index, size),
7626                          is_q ? 16 : 8, vec_full_reg_size(s));
7627 }
7628
7629 /* DUP (element, scalar)
7630  *  31                   21 20    16 15        10  9    5 4    0
7631  * +-----------------------+--------+-------------+------+------+
7632  * | 0 1 0 1 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
7633  * +-----------------------+--------+-------------+------+------+
7634  */
7635 static void handle_simd_dupes(DisasContext *s, int rd, int rn,
7636                               int imm5)
7637 {
7638     int size = ctz32(imm5);
7639     int index;
7640     TCGv_i64 tmp;
7641
7642     if (size > 3) {
7643         unallocated_encoding(s);
7644         return;
7645     }
7646
7647     if (!fp_access_check(s)) {
7648         return;
7649     }
7650
7651     index = imm5 >> (size + 1);
7652
7653     /* This instruction just extracts the specified element and
7654      * zero-extends it into the bottom of the destination register.
7655      */
7656     tmp = tcg_temp_new_i64();
7657     read_vec_element(s, tmp, rn, index, size);
7658     write_fp_dreg(s, rd, tmp);
7659 }
7660
7661 /* DUP (General)
7662  *
7663  *  31  30   29              21 20    16 15        10  9    5 4    0
7664  * +---+---+-------------------+--------+-------------+------+------+
7665  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 1 1 |  Rn  |  Rd  |
7666  * +---+---+-------------------+--------+-------------+------+------+
7667  *
7668  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7669  */
7670 static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
7671                              int imm5)
7672 {
7673     int size = ctz32(imm5);
7674     uint32_t dofs, oprsz, maxsz;
7675
7676     if (size > 3 || ((size == 3) && !is_q)) {
7677         unallocated_encoding(s);
7678         return;
7679     }
7680
7681     if (!fp_access_check(s)) {
7682         return;
7683     }
7684
7685     dofs = vec_full_reg_offset(s, rd);
7686     oprsz = is_q ? 16 : 8;
7687     maxsz = vec_full_reg_size(s);
7688
7689     tcg_gen_gvec_dup_i64(size, dofs, oprsz, maxsz, cpu_reg(s, rn));
7690 }
7691
7692 /* INS (Element)
7693  *
7694  *  31                   21 20    16 15  14    11  10 9    5 4    0
7695  * +-----------------------+--------+------------+---+------+------+
7696  * | 0 1 1 0 1 1 1 0 0 0 0 |  imm5  | 0 |  imm4  | 1 |  Rn  |  Rd  |
7697  * +-----------------------+--------+------------+---+------+------+
7698  *
7699  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7700  * index: encoded in imm5<4:size+1>
7701  */
7702 static void handle_simd_inse(DisasContext *s, int rd, int rn,
7703                              int imm4, int imm5)
7704 {
7705     int size = ctz32(imm5);
7706     int src_index, dst_index;
7707     TCGv_i64 tmp;
7708
7709     if (size > 3) {
7710         unallocated_encoding(s);
7711         return;
7712     }
7713
7714     if (!fp_access_check(s)) {
7715         return;
7716     }
7717
7718     dst_index = extract32(imm5, 1+size, 5);
7719     src_index = extract32(imm4, size, 4);
7720
7721     tmp = tcg_temp_new_i64();
7722
7723     read_vec_element(s, tmp, rn, src_index, size);
7724     write_vec_element(s, tmp, rd, dst_index, size);
7725
7726     /* INS is considered a 128-bit write for SVE. */
7727     clear_vec_high(s, true, rd);
7728 }
7729
7730
7731 /* INS (General)
7732  *
7733  *  31                   21 20    16 15        10  9    5 4    0
7734  * +-----------------------+--------+-------------+------+------+
7735  * | 0 1 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 1 1 1 |  Rn  |  Rd  |
7736  * +-----------------------+--------+-------------+------+------+
7737  *
7738  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7739  * index: encoded in imm5<4:size+1>
7740  */
7741 static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
7742 {
7743     int size = ctz32(imm5);
7744     int idx;
7745
7746     if (size > 3) {
7747         unallocated_encoding(s);
7748         return;
7749     }
7750
7751     if (!fp_access_check(s)) {
7752         return;
7753     }
7754
7755     idx = extract32(imm5, 1 + size, 4 - size);
7756     write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
7757
7758     /* INS is considered a 128-bit write for SVE. */
7759     clear_vec_high(s, true, rd);
7760 }
7761
7762 /*
7763  * UMOV (General)
7764  * SMOV (General)
7765  *
7766  *  31  30   29              21 20    16 15    12   10 9    5 4    0
7767  * +---+---+-------------------+--------+-------------+------+------+
7768  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 1 U 1 1 |  Rn  |  Rd  |
7769  * +---+---+-------------------+--------+-------------+------+------+
7770  *
7771  * U: unsigned when set
7772  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7773  */
7774 static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
7775                                   int rn, int rd, int imm5)
7776 {
7777     int size = ctz32(imm5);
7778     int element;
7779     TCGv_i64 tcg_rd;
7780
7781     /* Check for UnallocatedEncodings */
7782     if (is_signed) {
7783         if (size > 2 || (size == 2 && !is_q)) {
7784             unallocated_encoding(s);
7785             return;
7786         }
7787     } else {
7788         if (size > 3
7789             || (size < 3 && is_q)
7790             || (size == 3 && !is_q)) {
7791             unallocated_encoding(s);
7792             return;
7793         }
7794     }
7795
7796     if (!fp_access_check(s)) {
7797         return;
7798     }
7799
7800     element = extract32(imm5, 1+size, 4);
7801
7802     tcg_rd = cpu_reg(s, rd);
7803     read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
7804     if (is_signed && !is_q) {
7805         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
7806     }
7807 }
7808
7809 /* AdvSIMD copy
7810  *   31  30  29  28             21 20  16 15  14  11 10  9    5 4    0
7811  * +---+---+----+-----------------+------+---+------+---+------+------+
7812  * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
7813  * +---+---+----+-----------------+------+---+------+---+------+------+
7814  */
7815 static void disas_simd_copy(DisasContext *s, uint32_t insn)
7816 {
7817     int rd = extract32(insn, 0, 5);
7818     int rn = extract32(insn, 5, 5);
7819     int imm4 = extract32(insn, 11, 4);
7820     int op = extract32(insn, 29, 1);
7821     int is_q = extract32(insn, 30, 1);
7822     int imm5 = extract32(insn, 16, 5);
7823
7824     if (op) {
7825         if (is_q) {
7826             /* INS (element) */
7827             handle_simd_inse(s, rd, rn, imm4, imm5);
7828         } else {
7829             unallocated_encoding(s);
7830         }
7831     } else {
7832         switch (imm4) {
7833         case 0:
7834             /* DUP (element - vector) */
7835             handle_simd_dupe(s, is_q, rd, rn, imm5);
7836             break;
7837         case 1:
7838             /* DUP (general) */
7839             handle_simd_dupg(s, is_q, rd, rn, imm5);
7840             break;
7841         case 3:
7842             if (is_q) {
7843                 /* INS (general) */
7844                 handle_simd_insg(s, rd, rn, imm5);
7845             } else {
7846                 unallocated_encoding(s);
7847             }
7848             break;
7849         case 5:
7850         case 7:
7851             /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
7852             handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
7853             break;
7854         default:
7855             unallocated_encoding(s);
7856             break;
7857         }
7858     }
7859 }
7860
7861 /* AdvSIMD modified immediate
7862  *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
7863  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
7864  * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh |  Rd  |
7865  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
7866  *
7867  * There are a number of operations that can be carried out here:
7868  *   MOVI - move (shifted) imm into register
7869  *   MVNI - move inverted (shifted) imm into register
7870  *   ORR  - bitwise OR of (shifted) imm with register
7871  *   BIC  - bitwise clear of (shifted) imm with register
7872  * With ARMv8.2 we also have:
7873  *   FMOV half-precision
7874  */
7875 static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
7876 {
7877     int rd = extract32(insn, 0, 5);
7878     int cmode = extract32(insn, 12, 4);
7879     int o2 = extract32(insn, 11, 1);
7880     uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
7881     bool is_neg = extract32(insn, 29, 1);
7882     bool is_q = extract32(insn, 30, 1);
7883     uint64_t imm = 0;
7884
7885     if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
7886         /* Check for FMOV (vector, immediate) - half-precision */
7887         if (!(dc_isar_feature(aa64_fp16, s) && o2 && cmode == 0xf)) {
7888             unallocated_encoding(s);
7889             return;
7890         }
7891     }
7892
7893     if (!fp_access_check(s)) {
7894         return;
7895     }
7896
7897     if (cmode == 15 && o2 && !is_neg) {
7898         /* FMOV (vector, immediate) - half-precision */
7899         imm = vfp_expand_imm(MO_16, abcdefgh);
7900         /* now duplicate across the lanes */
7901         imm = dup_const(MO_16, imm);
7902     } else {
7903         imm = asimd_imm_const(abcdefgh, cmode, is_neg);
7904     }
7905
7906     if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) {
7907         /* MOVI or MVNI, with MVNI negation handled above.  */
7908         tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), is_q ? 16 : 8,
7909                              vec_full_reg_size(s), imm);
7910     } else {
7911         /* ORR or BIC, with BIC negation to AND handled above.  */
7912         if (is_neg) {
7913             gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_andi, MO_64);
7914         } else {
7915             gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_ori, MO_64);
7916         }
7917     }
7918 }
7919
7920 /* AdvSIMD scalar copy
7921  *  31 30  29  28             21 20  16 15  14  11 10  9    5 4    0
7922  * +-----+----+-----------------+------+---+------+---+------+------+
7923  * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
7924  * +-----+----+-----------------+------+---+------+---+------+------+
7925  */
7926 static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
7927 {
7928     int rd = extract32(insn, 0, 5);
7929     int rn = extract32(insn, 5, 5);
7930     int imm4 = extract32(insn, 11, 4);
7931     int imm5 = extract32(insn, 16, 5);
7932     int op = extract32(insn, 29, 1);
7933
7934     if (op != 0 || imm4 != 0) {
7935         unallocated_encoding(s);
7936         return;
7937     }
7938
7939     /* DUP (element, scalar) */
7940     handle_simd_dupes(s, rd, rn, imm5);
7941 }
7942
7943 /* AdvSIMD scalar pairwise
7944  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
7945  * +-----+---+-----------+------+-----------+--------+-----+------+------+
7946  * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
7947  * +-----+---+-----------+------+-----------+--------+-----+------+------+
7948  */
7949 static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
7950 {
7951     int u = extract32(insn, 29, 1);
7952     int size = extract32(insn, 22, 2);
7953     int opcode = extract32(insn, 12, 5);
7954     int rn = extract32(insn, 5, 5);
7955     int rd = extract32(insn, 0, 5);
7956     TCGv_ptr fpst;
7957
7958     /* For some ops (the FP ones), size[1] is part of the encoding.
7959      * For ADDP strictly it is not but size[1] is always 1 for valid
7960      * encodings.
7961      */
7962     opcode |= (extract32(size, 1, 1) << 5);
7963
7964     switch (opcode) {
7965     case 0x3b: /* ADDP */
7966         if (u || size != 3) {
7967             unallocated_encoding(s);
7968             return;
7969         }
7970         if (!fp_access_check(s)) {
7971             return;
7972         }
7973
7974         fpst = NULL;
7975         break;
7976     case 0xc: /* FMAXNMP */
7977     case 0xd: /* FADDP */
7978     case 0xf: /* FMAXP */
7979     case 0x2c: /* FMINNMP */
7980     case 0x2f: /* FMINP */
7981         /* FP op, size[0] is 32 or 64 bit*/
7982         if (!u) {
7983             if (!dc_isar_feature(aa64_fp16, s)) {
7984                 unallocated_encoding(s);
7985                 return;
7986             } else {
7987                 size = MO_16;
7988             }
7989         } else {
7990             size = extract32(size, 0, 1) ? MO_64 : MO_32;
7991         }
7992
7993         if (!fp_access_check(s)) {
7994             return;
7995         }
7996
7997         fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
7998         break;
7999     default:
8000         unallocated_encoding(s);
8001         return;
8002     }
8003
8004     if (size == MO_64) {
8005         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8006         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8007         TCGv_i64 tcg_res = tcg_temp_new_i64();
8008
8009         read_vec_element(s, tcg_op1, rn, 0, MO_64);
8010         read_vec_element(s, tcg_op2, rn, 1, MO_64);
8011
8012         switch (opcode) {
8013         case 0x3b: /* ADDP */
8014             tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
8015             break;
8016         case 0xc: /* FMAXNMP */
8017             gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
8018             break;
8019         case 0xd: /* FADDP */
8020             gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
8021             break;
8022         case 0xf: /* FMAXP */
8023             gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
8024             break;
8025         case 0x2c: /* FMINNMP */
8026             gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
8027             break;
8028         case 0x2f: /* FMINP */
8029             gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
8030             break;
8031         default:
8032             g_assert_not_reached();
8033         }
8034
8035         write_fp_dreg(s, rd, tcg_res);
8036     } else {
8037         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
8038         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8039         TCGv_i32 tcg_res = tcg_temp_new_i32();
8040
8041         read_vec_element_i32(s, tcg_op1, rn, 0, size);
8042         read_vec_element_i32(s, tcg_op2, rn, 1, size);
8043
8044         if (size == MO_16) {
8045             switch (opcode) {
8046             case 0xc: /* FMAXNMP */
8047                 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
8048                 break;
8049             case 0xd: /* FADDP */
8050                 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
8051                 break;
8052             case 0xf: /* FMAXP */
8053                 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
8054                 break;
8055             case 0x2c: /* FMINNMP */
8056                 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
8057                 break;
8058             case 0x2f: /* FMINP */
8059                 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
8060                 break;
8061             default:
8062                 g_assert_not_reached();
8063             }
8064         } else {
8065             switch (opcode) {
8066             case 0xc: /* FMAXNMP */
8067                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
8068                 break;
8069             case 0xd: /* FADDP */
8070                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
8071                 break;
8072             case 0xf: /* FMAXP */
8073                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
8074                 break;
8075             case 0x2c: /* FMINNMP */
8076                 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
8077                 break;
8078             case 0x2f: /* FMINP */
8079                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
8080                 break;
8081             default:
8082                 g_assert_not_reached();
8083             }
8084         }
8085
8086         write_fp_sreg(s, rd, tcg_res);
8087     }
8088 }
8089
8090 /*
8091  * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
8092  *
8093  * This code is handles the common shifting code and is used by both
8094  * the vector and scalar code.
8095  */
8096 static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
8097                                     TCGv_i64 tcg_rnd, bool accumulate,
8098                                     bool is_u, int size, int shift)
8099 {
8100     bool extended_result = false;
8101     bool round = tcg_rnd != NULL;
8102     int ext_lshift = 0;
8103     TCGv_i64 tcg_src_hi;
8104
8105     if (round && size == 3) {
8106         extended_result = true;
8107         ext_lshift = 64 - shift;
8108         tcg_src_hi = tcg_temp_new_i64();
8109     } else if (shift == 64) {
8110         if (!accumulate && is_u) {
8111             /* result is zero */
8112             tcg_gen_movi_i64(tcg_res, 0);
8113             return;
8114         }
8115     }
8116
8117     /* Deal with the rounding step */
8118     if (round) {
8119         if (extended_result) {
8120             TCGv_i64 tcg_zero = tcg_constant_i64(0);
8121             if (!is_u) {
8122                 /* take care of sign extending tcg_res */
8123                 tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
8124                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
8125                                  tcg_src, tcg_src_hi,
8126                                  tcg_rnd, tcg_zero);
8127             } else {
8128                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
8129                                  tcg_src, tcg_zero,
8130                                  tcg_rnd, tcg_zero);
8131             }
8132         } else {
8133             tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
8134         }
8135     }
8136
8137     /* Now do the shift right */
8138     if (round && extended_result) {
8139         /* extended case, >64 bit precision required */
8140         if (ext_lshift == 0) {
8141             /* special case, only high bits matter */
8142             tcg_gen_mov_i64(tcg_src, tcg_src_hi);
8143         } else {
8144             tcg_gen_shri_i64(tcg_src, tcg_src, shift);
8145             tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
8146             tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
8147         }
8148     } else {
8149         if (is_u) {
8150             if (shift == 64) {
8151                 /* essentially shifting in 64 zeros */
8152                 tcg_gen_movi_i64(tcg_src, 0);
8153             } else {
8154                 tcg_gen_shri_i64(tcg_src, tcg_src, shift);
8155             }
8156         } else {
8157             if (shift == 64) {
8158                 /* effectively extending the sign-bit */
8159                 tcg_gen_sari_i64(tcg_src, tcg_src, 63);
8160             } else {
8161                 tcg_gen_sari_i64(tcg_src, tcg_src, shift);
8162             }
8163         }
8164     }
8165
8166     if (accumulate) {
8167         tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
8168     } else {
8169         tcg_gen_mov_i64(tcg_res, tcg_src);
8170     }
8171 }
8172
8173 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
8174 static void handle_scalar_simd_shri(DisasContext *s,
8175                                     bool is_u, int immh, int immb,
8176                                     int opcode, int rn, int rd)
8177 {
8178     const int size = 3;
8179     int immhb = immh << 3 | immb;
8180     int shift = 2 * (8 << size) - immhb;
8181     bool accumulate = false;
8182     bool round = false;
8183     bool insert = false;
8184     TCGv_i64 tcg_rn;
8185     TCGv_i64 tcg_rd;
8186     TCGv_i64 tcg_round;
8187
8188     if (!extract32(immh, 3, 1)) {
8189         unallocated_encoding(s);
8190         return;
8191     }
8192
8193     if (!fp_access_check(s)) {
8194         return;
8195     }
8196
8197     switch (opcode) {
8198     case 0x02: /* SSRA / USRA (accumulate) */
8199         accumulate = true;
8200         break;
8201     case 0x04: /* SRSHR / URSHR (rounding) */
8202         round = true;
8203         break;
8204     case 0x06: /* SRSRA / URSRA (accum + rounding) */
8205         accumulate = round = true;
8206         break;
8207     case 0x08: /* SRI */
8208         insert = true;
8209         break;
8210     }
8211
8212     if (round) {
8213         tcg_round = tcg_constant_i64(1ULL << (shift - 1));
8214     } else {
8215         tcg_round = NULL;
8216     }
8217
8218     tcg_rn = read_fp_dreg(s, rn);
8219     tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
8220
8221     if (insert) {
8222         /* shift count same as element size is valid but does nothing;
8223          * special case to avoid potential shift by 64.
8224          */
8225         int esize = 8 << size;
8226         if (shift != esize) {
8227             tcg_gen_shri_i64(tcg_rn, tcg_rn, shift);
8228             tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, 0, esize - shift);
8229         }
8230     } else {
8231         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8232                                 accumulate, is_u, size, shift);
8233     }
8234
8235     write_fp_dreg(s, rd, tcg_rd);
8236 }
8237
8238 /* SHL/SLI - Scalar shift left */
8239 static void handle_scalar_simd_shli(DisasContext *s, bool insert,
8240                                     int immh, int immb, int opcode,
8241                                     int rn, int rd)
8242 {
8243     int size = 32 - clz32(immh) - 1;
8244     int immhb = immh << 3 | immb;
8245     int shift = immhb - (8 << size);
8246     TCGv_i64 tcg_rn;
8247     TCGv_i64 tcg_rd;
8248
8249     if (!extract32(immh, 3, 1)) {
8250         unallocated_encoding(s);
8251         return;
8252     }
8253
8254     if (!fp_access_check(s)) {
8255         return;
8256     }
8257
8258     tcg_rn = read_fp_dreg(s, rn);
8259     tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
8260
8261     if (insert) {
8262         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, shift, 64 - shift);
8263     } else {
8264         tcg_gen_shli_i64(tcg_rd, tcg_rn, shift);
8265     }
8266
8267     write_fp_dreg(s, rd, tcg_rd);
8268 }
8269
8270 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
8271  * (signed/unsigned) narrowing */
8272 static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
8273                                    bool is_u_shift, bool is_u_narrow,
8274                                    int immh, int immb, int opcode,
8275                                    int rn, int rd)
8276 {
8277     int immhb = immh << 3 | immb;
8278     int size = 32 - clz32(immh) - 1;
8279     int esize = 8 << size;
8280     int shift = (2 * esize) - immhb;
8281     int elements = is_scalar ? 1 : (64 / esize);
8282     bool round = extract32(opcode, 0, 1);
8283     MemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
8284     TCGv_i64 tcg_rn, tcg_rd, tcg_round;
8285     TCGv_i32 tcg_rd_narrowed;
8286     TCGv_i64 tcg_final;
8287
8288     static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
8289         { gen_helper_neon_narrow_sat_s8,
8290           gen_helper_neon_unarrow_sat8 },
8291         { gen_helper_neon_narrow_sat_s16,
8292           gen_helper_neon_unarrow_sat16 },
8293         { gen_helper_neon_narrow_sat_s32,
8294           gen_helper_neon_unarrow_sat32 },
8295         { NULL, NULL },
8296     };
8297     static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
8298         gen_helper_neon_narrow_sat_u8,
8299         gen_helper_neon_narrow_sat_u16,
8300         gen_helper_neon_narrow_sat_u32,
8301         NULL
8302     };
8303     NeonGenNarrowEnvFn *narrowfn;
8304
8305     int i;
8306
8307     assert(size < 4);
8308
8309     if (extract32(immh, 3, 1)) {
8310         unallocated_encoding(s);
8311         return;
8312     }
8313
8314     if (!fp_access_check(s)) {
8315         return;
8316     }
8317
8318     if (is_u_shift) {
8319         narrowfn = unsigned_narrow_fns[size];
8320     } else {
8321         narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
8322     }
8323
8324     tcg_rn = tcg_temp_new_i64();
8325     tcg_rd = tcg_temp_new_i64();
8326     tcg_rd_narrowed = tcg_temp_new_i32();
8327     tcg_final = tcg_temp_new_i64();
8328
8329     if (round) {
8330         tcg_round = tcg_constant_i64(1ULL << (shift - 1));
8331     } else {
8332         tcg_round = NULL;
8333     }
8334
8335     for (i = 0; i < elements; i++) {
8336         read_vec_element(s, tcg_rn, rn, i, ldop);
8337         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8338                                 false, is_u_shift, size+1, shift);
8339         narrowfn(tcg_rd_narrowed, tcg_env, tcg_rd);
8340         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
8341         if (i == 0) {
8342             tcg_gen_mov_i64(tcg_final, tcg_rd);
8343         } else {
8344             tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
8345         }
8346     }
8347
8348     if (!is_q) {
8349         write_vec_element(s, tcg_final, rd, 0, MO_64);
8350     } else {
8351         write_vec_element(s, tcg_final, rd, 1, MO_64);
8352     }
8353     clear_vec_high(s, is_q, rd);
8354 }
8355
8356 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */
8357 static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
8358                              bool src_unsigned, bool dst_unsigned,
8359                              int immh, int immb, int rn, int rd)
8360 {
8361     int immhb = immh << 3 | immb;
8362     int size = 32 - clz32(immh) - 1;
8363     int shift = immhb - (8 << size);
8364     int pass;
8365
8366     assert(immh != 0);
8367     assert(!(scalar && is_q));
8368
8369     if (!scalar) {
8370         if (!is_q && extract32(immh, 3, 1)) {
8371             unallocated_encoding(s);
8372             return;
8373         }
8374
8375         /* Since we use the variable-shift helpers we must
8376          * replicate the shift count into each element of
8377          * the tcg_shift value.
8378          */
8379         switch (size) {
8380         case 0:
8381             shift |= shift << 8;
8382             /* fall through */
8383         case 1:
8384             shift |= shift << 16;
8385             break;
8386         case 2:
8387         case 3:
8388             break;
8389         default:
8390             g_assert_not_reached();
8391         }
8392     }
8393
8394     if (!fp_access_check(s)) {
8395         return;
8396     }
8397
8398     if (size == 3) {
8399         TCGv_i64 tcg_shift = tcg_constant_i64(shift);
8400         static NeonGenTwo64OpEnvFn * const fns[2][2] = {
8401             { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
8402             { NULL, gen_helper_neon_qshl_u64 },
8403         };
8404         NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
8405         int maxpass = is_q ? 2 : 1;
8406
8407         for (pass = 0; pass < maxpass; pass++) {
8408             TCGv_i64 tcg_op = tcg_temp_new_i64();
8409
8410             read_vec_element(s, tcg_op, rn, pass, MO_64);
8411             genfn(tcg_op, tcg_env, tcg_op, tcg_shift);
8412             write_vec_element(s, tcg_op, rd, pass, MO_64);
8413         }
8414         clear_vec_high(s, is_q, rd);
8415     } else {
8416         TCGv_i32 tcg_shift = tcg_constant_i32(shift);
8417         static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
8418             {
8419                 { gen_helper_neon_qshl_s8,
8420                   gen_helper_neon_qshl_s16,
8421                   gen_helper_neon_qshl_s32 },
8422                 { gen_helper_neon_qshlu_s8,
8423                   gen_helper_neon_qshlu_s16,
8424                   gen_helper_neon_qshlu_s32 }
8425             }, {
8426                 { NULL, NULL, NULL },
8427                 { gen_helper_neon_qshl_u8,
8428                   gen_helper_neon_qshl_u16,
8429                   gen_helper_neon_qshl_u32 }
8430             }
8431         };
8432         NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
8433         MemOp memop = scalar ? size : MO_32;
8434         int maxpass = scalar ? 1 : is_q ? 4 : 2;
8435
8436         for (pass = 0; pass < maxpass; pass++) {
8437             TCGv_i32 tcg_op = tcg_temp_new_i32();
8438
8439             read_vec_element_i32(s, tcg_op, rn, pass, memop);
8440             genfn(tcg_op, tcg_env, tcg_op, tcg_shift);
8441             if (scalar) {
8442                 switch (size) {
8443                 case 0:
8444                     tcg_gen_ext8u_i32(tcg_op, tcg_op);
8445                     break;
8446                 case 1:
8447                     tcg_gen_ext16u_i32(tcg_op, tcg_op);
8448                     break;
8449                 case 2:
8450                     break;
8451                 default:
8452                     g_assert_not_reached();
8453                 }
8454                 write_fp_sreg(s, rd, tcg_op);
8455             } else {
8456                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
8457             }
8458         }
8459
8460         if (!scalar) {
8461             clear_vec_high(s, is_q, rd);
8462         }
8463     }
8464 }
8465
8466 /* Common vector code for handling integer to FP conversion */
8467 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
8468                                    int elements, int is_signed,
8469                                    int fracbits, int size)
8470 {
8471     TCGv_ptr tcg_fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8472     TCGv_i32 tcg_shift = NULL;
8473
8474     MemOp mop = size | (is_signed ? MO_SIGN : 0);
8475     int pass;
8476
8477     if (fracbits || size == MO_64) {
8478         tcg_shift = tcg_constant_i32(fracbits);
8479     }
8480
8481     if (size == MO_64) {
8482         TCGv_i64 tcg_int64 = tcg_temp_new_i64();
8483         TCGv_i64 tcg_double = tcg_temp_new_i64();
8484
8485         for (pass = 0; pass < elements; pass++) {
8486             read_vec_element(s, tcg_int64, rn, pass, mop);
8487
8488             if (is_signed) {
8489                 gen_helper_vfp_sqtod(tcg_double, tcg_int64,
8490                                      tcg_shift, tcg_fpst);
8491             } else {
8492                 gen_helper_vfp_uqtod(tcg_double, tcg_int64,
8493                                      tcg_shift, tcg_fpst);
8494             }
8495             if (elements == 1) {
8496                 write_fp_dreg(s, rd, tcg_double);
8497             } else {
8498                 write_vec_element(s, tcg_double, rd, pass, MO_64);
8499             }
8500         }
8501     } else {
8502         TCGv_i32 tcg_int32 = tcg_temp_new_i32();
8503         TCGv_i32 tcg_float = tcg_temp_new_i32();
8504
8505         for (pass = 0; pass < elements; pass++) {
8506             read_vec_element_i32(s, tcg_int32, rn, pass, mop);
8507
8508             switch (size) {
8509             case MO_32:
8510                 if (fracbits) {
8511                     if (is_signed) {
8512                         gen_helper_vfp_sltos(tcg_float, tcg_int32,
8513                                              tcg_shift, tcg_fpst);
8514                     } else {
8515                         gen_helper_vfp_ultos(tcg_float, tcg_int32,
8516                                              tcg_shift, tcg_fpst);
8517                     }
8518                 } else {
8519                     if (is_signed) {
8520                         gen_helper_vfp_sitos(tcg_float, tcg_int32, tcg_fpst);
8521                     } else {
8522                         gen_helper_vfp_uitos(tcg_float, tcg_int32, tcg_fpst);
8523                     }
8524                 }
8525                 break;
8526             case MO_16:
8527                 if (fracbits) {
8528                     if (is_signed) {
8529                         gen_helper_vfp_sltoh(tcg_float, tcg_int32,
8530                                              tcg_shift, tcg_fpst);
8531                     } else {
8532                         gen_helper_vfp_ultoh(tcg_float, tcg_int32,
8533                                              tcg_shift, tcg_fpst);
8534                     }
8535                 } else {
8536                     if (is_signed) {
8537                         gen_helper_vfp_sitoh(tcg_float, tcg_int32, tcg_fpst);
8538                     } else {
8539                         gen_helper_vfp_uitoh(tcg_float, tcg_int32, tcg_fpst);
8540                     }
8541                 }
8542                 break;
8543             default:
8544                 g_assert_not_reached();
8545             }
8546
8547             if (elements == 1) {
8548                 write_fp_sreg(s, rd, tcg_float);
8549             } else {
8550                 write_vec_element_i32(s, tcg_float, rd, pass, size);
8551             }
8552         }
8553     }
8554
8555     clear_vec_high(s, elements << size == 16, rd);
8556 }
8557
8558 /* UCVTF/SCVTF - Integer to FP conversion */
8559 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
8560                                          bool is_q, bool is_u,
8561                                          int immh, int immb, int opcode,
8562                                          int rn, int rd)
8563 {
8564     int size, elements, fracbits;
8565     int immhb = immh << 3 | immb;
8566
8567     if (immh & 8) {
8568         size = MO_64;
8569         if (!is_scalar && !is_q) {
8570             unallocated_encoding(s);
8571             return;
8572         }
8573     } else if (immh & 4) {
8574         size = MO_32;
8575     } else if (immh & 2) {
8576         size = MO_16;
8577         if (!dc_isar_feature(aa64_fp16, s)) {
8578             unallocated_encoding(s);
8579             return;
8580         }
8581     } else {
8582         /* immh == 0 would be a failure of the decode logic */
8583         g_assert(immh == 1);
8584         unallocated_encoding(s);
8585         return;
8586     }
8587
8588     if (is_scalar) {
8589         elements = 1;
8590     } else {
8591         elements = (8 << is_q) >> size;
8592     }
8593     fracbits = (16 << size) - immhb;
8594
8595     if (!fp_access_check(s)) {
8596         return;
8597     }
8598
8599     handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
8600 }
8601
8602 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
8603 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
8604                                          bool is_q, bool is_u,
8605                                          int immh, int immb, int rn, int rd)
8606 {
8607     int immhb = immh << 3 | immb;
8608     int pass, size, fracbits;
8609     TCGv_ptr tcg_fpstatus;
8610     TCGv_i32 tcg_rmode, tcg_shift;
8611
8612     if (immh & 0x8) {
8613         size = MO_64;
8614         if (!is_scalar && !is_q) {
8615             unallocated_encoding(s);
8616             return;
8617         }
8618     } else if (immh & 0x4) {
8619         size = MO_32;
8620     } else if (immh & 0x2) {
8621         size = MO_16;
8622         if (!dc_isar_feature(aa64_fp16, s)) {
8623             unallocated_encoding(s);
8624             return;
8625         }
8626     } else {
8627         /* Should have split out AdvSIMD modified immediate earlier.  */
8628         assert(immh == 1);
8629         unallocated_encoding(s);
8630         return;
8631     }
8632
8633     if (!fp_access_check(s)) {
8634         return;
8635     }
8636
8637     assert(!(is_scalar && is_q));
8638
8639     tcg_fpstatus = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8640     tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, tcg_fpstatus);
8641     fracbits = (16 << size) - immhb;
8642     tcg_shift = tcg_constant_i32(fracbits);
8643
8644     if (size == MO_64) {
8645         int maxpass = is_scalar ? 1 : 2;
8646
8647         for (pass = 0; pass < maxpass; pass++) {
8648             TCGv_i64 tcg_op = tcg_temp_new_i64();
8649
8650             read_vec_element(s, tcg_op, rn, pass, MO_64);
8651             if (is_u) {
8652                 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8653             } else {
8654                 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8655             }
8656             write_vec_element(s, tcg_op, rd, pass, MO_64);
8657         }
8658         clear_vec_high(s, is_q, rd);
8659     } else {
8660         void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
8661         int maxpass = is_scalar ? 1 : ((8 << is_q) >> size);
8662
8663         switch (size) {
8664         case MO_16:
8665             if (is_u) {
8666                 fn = gen_helper_vfp_touhh;
8667             } else {
8668                 fn = gen_helper_vfp_toshh;
8669             }
8670             break;
8671         case MO_32:
8672             if (is_u) {
8673                 fn = gen_helper_vfp_touls;
8674             } else {
8675                 fn = gen_helper_vfp_tosls;
8676             }
8677             break;
8678         default:
8679             g_assert_not_reached();
8680         }
8681
8682         for (pass = 0; pass < maxpass; pass++) {
8683             TCGv_i32 tcg_op = tcg_temp_new_i32();
8684
8685             read_vec_element_i32(s, tcg_op, rn, pass, size);
8686             fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8687             if (is_scalar) {
8688                 write_fp_sreg(s, rd, tcg_op);
8689             } else {
8690                 write_vec_element_i32(s, tcg_op, rd, pass, size);
8691             }
8692         }
8693         if (!is_scalar) {
8694             clear_vec_high(s, is_q, rd);
8695         }
8696     }
8697
8698     gen_restore_rmode(tcg_rmode, tcg_fpstatus);
8699 }
8700
8701 /* AdvSIMD scalar shift by immediate
8702  *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
8703  * +-----+---+-------------+------+------+--------+---+------+------+
8704  * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
8705  * +-----+---+-------------+------+------+--------+---+------+------+
8706  *
8707  * This is the scalar version so it works on a fixed sized registers
8708  */
8709 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
8710 {
8711     int rd = extract32(insn, 0, 5);
8712     int rn = extract32(insn, 5, 5);
8713     int opcode = extract32(insn, 11, 5);
8714     int immb = extract32(insn, 16, 3);
8715     int immh = extract32(insn, 19, 4);
8716     bool is_u = extract32(insn, 29, 1);
8717
8718     if (immh == 0) {
8719         unallocated_encoding(s);
8720         return;
8721     }
8722
8723     switch (opcode) {
8724     case 0x08: /* SRI */
8725         if (!is_u) {
8726             unallocated_encoding(s);
8727             return;
8728         }
8729         /* fall through */
8730     case 0x00: /* SSHR / USHR */
8731     case 0x02: /* SSRA / USRA */
8732     case 0x04: /* SRSHR / URSHR */
8733     case 0x06: /* SRSRA / URSRA */
8734         handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
8735         break;
8736     case 0x0a: /* SHL / SLI */
8737         handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
8738         break;
8739     case 0x1c: /* SCVTF, UCVTF */
8740         handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
8741                                      opcode, rn, rd);
8742         break;
8743     case 0x10: /* SQSHRUN, SQSHRUN2 */
8744     case 0x11: /* SQRSHRUN, SQRSHRUN2 */
8745         if (!is_u) {
8746             unallocated_encoding(s);
8747             return;
8748         }
8749         handle_vec_simd_sqshrn(s, true, false, false, true,
8750                                immh, immb, opcode, rn, rd);
8751         break;
8752     case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
8753     case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
8754         handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
8755                                immh, immb, opcode, rn, rd);
8756         break;
8757     case 0xc: /* SQSHLU */
8758         if (!is_u) {
8759             unallocated_encoding(s);
8760             return;
8761         }
8762         handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
8763         break;
8764     case 0xe: /* SQSHL, UQSHL */
8765         handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
8766         break;
8767     case 0x1f: /* FCVTZS, FCVTZU */
8768         handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
8769         break;
8770     default:
8771         unallocated_encoding(s);
8772         break;
8773     }
8774 }
8775
8776 /* AdvSIMD scalar three different
8777  *  31 30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
8778  * +-----+---+-----------+------+---+------+--------+-----+------+------+
8779  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
8780  * +-----+---+-----------+------+---+------+--------+-----+------+------+
8781  */
8782 static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
8783 {
8784     bool is_u = extract32(insn, 29, 1);
8785     int size = extract32(insn, 22, 2);
8786     int opcode = extract32(insn, 12, 4);
8787     int rm = extract32(insn, 16, 5);
8788     int rn = extract32(insn, 5, 5);
8789     int rd = extract32(insn, 0, 5);
8790
8791     if (is_u) {
8792         unallocated_encoding(s);
8793         return;
8794     }
8795
8796     switch (opcode) {
8797     case 0x9: /* SQDMLAL, SQDMLAL2 */
8798     case 0xb: /* SQDMLSL, SQDMLSL2 */
8799     case 0xd: /* SQDMULL, SQDMULL2 */
8800         if (size == 0 || size == 3) {
8801             unallocated_encoding(s);
8802             return;
8803         }
8804         break;
8805     default:
8806         unallocated_encoding(s);
8807         return;
8808     }
8809
8810     if (!fp_access_check(s)) {
8811         return;
8812     }
8813
8814     if (size == 2) {
8815         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8816         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8817         TCGv_i64 tcg_res = tcg_temp_new_i64();
8818
8819         read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
8820         read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
8821
8822         tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
8823         gen_helper_neon_addl_saturate_s64(tcg_res, tcg_env, tcg_res, tcg_res);
8824
8825         switch (opcode) {
8826         case 0xd: /* SQDMULL, SQDMULL2 */
8827             break;
8828         case 0xb: /* SQDMLSL, SQDMLSL2 */
8829             tcg_gen_neg_i64(tcg_res, tcg_res);
8830             /* fall through */
8831         case 0x9: /* SQDMLAL, SQDMLAL2 */
8832             read_vec_element(s, tcg_op1, rd, 0, MO_64);
8833             gen_helper_neon_addl_saturate_s64(tcg_res, tcg_env,
8834                                               tcg_res, tcg_op1);
8835             break;
8836         default:
8837             g_assert_not_reached();
8838         }
8839
8840         write_fp_dreg(s, rd, tcg_res);
8841     } else {
8842         TCGv_i32 tcg_op1 = read_fp_hreg(s, rn);
8843         TCGv_i32 tcg_op2 = read_fp_hreg(s, rm);
8844         TCGv_i64 tcg_res = tcg_temp_new_i64();
8845
8846         gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
8847         gen_helper_neon_addl_saturate_s32(tcg_res, tcg_env, tcg_res, tcg_res);
8848
8849         switch (opcode) {
8850         case 0xd: /* SQDMULL, SQDMULL2 */
8851             break;
8852         case 0xb: /* SQDMLSL, SQDMLSL2 */
8853             gen_helper_neon_negl_u32(tcg_res, tcg_res);
8854             /* fall through */
8855         case 0x9: /* SQDMLAL, SQDMLAL2 */
8856         {
8857             TCGv_i64 tcg_op3 = tcg_temp_new_i64();
8858             read_vec_element(s, tcg_op3, rd, 0, MO_32);
8859             gen_helper_neon_addl_saturate_s32(tcg_res, tcg_env,
8860                                               tcg_res, tcg_op3);
8861             break;
8862         }
8863         default:
8864             g_assert_not_reached();
8865         }
8866
8867         tcg_gen_ext32u_i64(tcg_res, tcg_res);
8868         write_fp_dreg(s, rd, tcg_res);
8869     }
8870 }
8871
8872 static void handle_3same_64(DisasContext *s, int opcode, bool u,
8873                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
8874 {
8875     /* Handle 64x64->64 opcodes which are shared between the scalar
8876      * and vector 3-same groups. We cover every opcode where size == 3
8877      * is valid in either the three-reg-same (integer, not pairwise)
8878      * or scalar-three-reg-same groups.
8879      */
8880     TCGCond cond;
8881
8882     switch (opcode) {
8883     case 0x1: /* SQADD */
8884         if (u) {
8885             gen_helper_neon_qadd_u64(tcg_rd, tcg_env, tcg_rn, tcg_rm);
8886         } else {
8887             gen_helper_neon_qadd_s64(tcg_rd, tcg_env, tcg_rn, tcg_rm);
8888         }
8889         break;
8890     case 0x5: /* SQSUB */
8891         if (u) {
8892             gen_helper_neon_qsub_u64(tcg_rd, tcg_env, tcg_rn, tcg_rm);
8893         } else {
8894             gen_helper_neon_qsub_s64(tcg_rd, tcg_env, tcg_rn, tcg_rm);
8895         }
8896         break;
8897     case 0x6: /* CMGT, CMHI */
8898         cond = u ? TCG_COND_GTU : TCG_COND_GT;
8899     do_cmop:
8900         /* 64 bit integer comparison, result = test ? -1 : 0. */
8901         tcg_gen_negsetcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
8902         break;
8903     case 0x7: /* CMGE, CMHS */
8904         cond = u ? TCG_COND_GEU : TCG_COND_GE;
8905         goto do_cmop;
8906     case 0x11: /* CMTST, CMEQ */
8907         if (u) {
8908             cond = TCG_COND_EQ;
8909             goto do_cmop;
8910         }
8911         gen_cmtst_i64(tcg_rd, tcg_rn, tcg_rm);
8912         break;
8913     case 0x8: /* SSHL, USHL */
8914         if (u) {
8915             gen_ushl_i64(tcg_rd, tcg_rn, tcg_rm);
8916         } else {
8917             gen_sshl_i64(tcg_rd, tcg_rn, tcg_rm);
8918         }
8919         break;
8920     case 0x9: /* SQSHL, UQSHL */
8921         if (u) {
8922             gen_helper_neon_qshl_u64(tcg_rd, tcg_env, tcg_rn, tcg_rm);
8923         } else {
8924             gen_helper_neon_qshl_s64(tcg_rd, tcg_env, tcg_rn, tcg_rm);
8925         }
8926         break;
8927     case 0xa: /* SRSHL, URSHL */
8928         if (u) {
8929             gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
8930         } else {
8931             gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
8932         }
8933         break;
8934     case 0xb: /* SQRSHL, UQRSHL */
8935         if (u) {
8936             gen_helper_neon_qrshl_u64(tcg_rd, tcg_env, tcg_rn, tcg_rm);
8937         } else {
8938             gen_helper_neon_qrshl_s64(tcg_rd, tcg_env, tcg_rn, tcg_rm);
8939         }
8940         break;
8941     case 0x10: /* ADD, SUB */
8942         if (u) {
8943             tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
8944         } else {
8945             tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
8946         }
8947         break;
8948     default:
8949         g_assert_not_reached();
8950     }
8951 }
8952
8953 /* Handle the 3-same-operands float operations; shared by the scalar
8954  * and vector encodings. The caller must filter out any encodings
8955  * not allocated for the encoding it is dealing with.
8956  */
8957 static void handle_3same_float(DisasContext *s, int size, int elements,
8958                                int fpopcode, int rd, int rn, int rm)
8959 {
8960     int pass;
8961     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
8962
8963     for (pass = 0; pass < elements; pass++) {
8964         if (size) {
8965             /* Double */
8966             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8967             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8968             TCGv_i64 tcg_res = tcg_temp_new_i64();
8969
8970             read_vec_element(s, tcg_op1, rn, pass, MO_64);
8971             read_vec_element(s, tcg_op2, rm, pass, MO_64);
8972
8973             switch (fpopcode) {
8974             case 0x39: /* FMLS */
8975                 /* As usual for ARM, separate negation for fused multiply-add */
8976                 gen_helper_vfp_negd(tcg_op1, tcg_op1);
8977                 /* fall through */
8978             case 0x19: /* FMLA */
8979                 read_vec_element(s, tcg_res, rd, pass, MO_64);
8980                 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
8981                                        tcg_res, fpst);
8982                 break;
8983             case 0x18: /* FMAXNM */
8984                 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
8985                 break;
8986             case 0x1a: /* FADD */
8987                 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
8988                 break;
8989             case 0x1b: /* FMULX */
8990                 gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
8991                 break;
8992             case 0x1c: /* FCMEQ */
8993                 gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8994                 break;
8995             case 0x1e: /* FMAX */
8996                 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
8997                 break;
8998             case 0x1f: /* FRECPS */
8999                 gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9000                 break;
9001             case 0x38: /* FMINNM */
9002                 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
9003                 break;
9004             case 0x3a: /* FSUB */
9005                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
9006                 break;
9007             case 0x3e: /* FMIN */
9008                 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
9009                 break;
9010             case 0x3f: /* FRSQRTS */
9011                 gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9012                 break;
9013             case 0x5b: /* FMUL */
9014                 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
9015                 break;
9016             case 0x5c: /* FCMGE */
9017                 gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9018                 break;
9019             case 0x5d: /* FACGE */
9020                 gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9021                 break;
9022             case 0x5f: /* FDIV */
9023                 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
9024                 break;
9025             case 0x7a: /* FABD */
9026                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
9027                 gen_helper_vfp_absd(tcg_res, tcg_res);
9028                 break;
9029             case 0x7c: /* FCMGT */
9030                 gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9031                 break;
9032             case 0x7d: /* FACGT */
9033                 gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9034                 break;
9035             default:
9036                 g_assert_not_reached();
9037             }
9038
9039             write_vec_element(s, tcg_res, rd, pass, MO_64);
9040         } else {
9041             /* Single */
9042             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9043             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9044             TCGv_i32 tcg_res = tcg_temp_new_i32();
9045
9046             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
9047             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
9048
9049             switch (fpopcode) {
9050             case 0x39: /* FMLS */
9051                 /* As usual for ARM, separate negation for fused multiply-add */
9052                 gen_helper_vfp_negs(tcg_op1, tcg_op1);
9053                 /* fall through */
9054             case 0x19: /* FMLA */
9055                 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9056                 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
9057                                        tcg_res, fpst);
9058                 break;
9059             case 0x1a: /* FADD */
9060                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
9061                 break;
9062             case 0x1b: /* FMULX */
9063                 gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
9064                 break;
9065             case 0x1c: /* FCMEQ */
9066                 gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9067                 break;
9068             case 0x1e: /* FMAX */
9069                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
9070                 break;
9071             case 0x1f: /* FRECPS */
9072                 gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9073                 break;
9074             case 0x18: /* FMAXNM */
9075                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
9076                 break;
9077             case 0x38: /* FMINNM */
9078                 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
9079                 break;
9080             case 0x3a: /* FSUB */
9081                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
9082                 break;
9083             case 0x3e: /* FMIN */
9084                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
9085                 break;
9086             case 0x3f: /* FRSQRTS */
9087                 gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9088                 break;
9089             case 0x5b: /* FMUL */
9090                 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
9091                 break;
9092             case 0x5c: /* FCMGE */
9093                 gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9094                 break;
9095             case 0x5d: /* FACGE */
9096                 gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9097                 break;
9098             case 0x5f: /* FDIV */
9099                 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
9100                 break;
9101             case 0x7a: /* FABD */
9102                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
9103                 gen_helper_vfp_abss(tcg_res, tcg_res);
9104                 break;
9105             case 0x7c: /* FCMGT */
9106                 gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9107                 break;
9108             case 0x7d: /* FACGT */
9109                 gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9110                 break;
9111             default:
9112                 g_assert_not_reached();
9113             }
9114
9115             if (elements == 1) {
9116                 /* scalar single so clear high part */
9117                 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
9118
9119                 tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
9120                 write_vec_element(s, tcg_tmp, rd, pass, MO_64);
9121             } else {
9122                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9123             }
9124         }
9125     }
9126
9127     clear_vec_high(s, elements * (size ? 8 : 4) > 8, rd);
9128 }
9129
9130 /* AdvSIMD scalar three same
9131  *  31 30  29 28       24 23  22  21 20  16 15    11  10 9    5 4    0
9132  * +-----+---+-----------+------+---+------+--------+---+------+------+
9133  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
9134  * +-----+---+-----------+------+---+------+--------+---+------+------+
9135  */
9136 static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
9137 {
9138     int rd = extract32(insn, 0, 5);
9139     int rn = extract32(insn, 5, 5);
9140     int opcode = extract32(insn, 11, 5);
9141     int rm = extract32(insn, 16, 5);
9142     int size = extract32(insn, 22, 2);
9143     bool u = extract32(insn, 29, 1);
9144     TCGv_i64 tcg_rd;
9145
9146     if (opcode >= 0x18) {
9147         /* Floating point: U, size[1] and opcode indicate operation */
9148         int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
9149         switch (fpopcode) {
9150         case 0x1b: /* FMULX */
9151         case 0x1f: /* FRECPS */
9152         case 0x3f: /* FRSQRTS */
9153         case 0x5d: /* FACGE */
9154         case 0x7d: /* FACGT */
9155         case 0x1c: /* FCMEQ */
9156         case 0x5c: /* FCMGE */
9157         case 0x7c: /* FCMGT */
9158         case 0x7a: /* FABD */
9159             break;
9160         default:
9161             unallocated_encoding(s);
9162             return;
9163         }
9164
9165         if (!fp_access_check(s)) {
9166             return;
9167         }
9168
9169         handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
9170         return;
9171     }
9172
9173     switch (opcode) {
9174     case 0x1: /* SQADD, UQADD */
9175     case 0x5: /* SQSUB, UQSUB */
9176     case 0x9: /* SQSHL, UQSHL */
9177     case 0xb: /* SQRSHL, UQRSHL */
9178         break;
9179     case 0x8: /* SSHL, USHL */
9180     case 0xa: /* SRSHL, URSHL */
9181     case 0x6: /* CMGT, CMHI */
9182     case 0x7: /* CMGE, CMHS */
9183     case 0x11: /* CMTST, CMEQ */
9184     case 0x10: /* ADD, SUB (vector) */
9185         if (size != 3) {
9186             unallocated_encoding(s);
9187             return;
9188         }
9189         break;
9190     case 0x16: /* SQDMULH, SQRDMULH (vector) */
9191         if (size != 1 && size != 2) {
9192             unallocated_encoding(s);
9193             return;
9194         }
9195         break;
9196     default:
9197         unallocated_encoding(s);
9198         return;
9199     }
9200
9201     if (!fp_access_check(s)) {
9202         return;
9203     }
9204
9205     tcg_rd = tcg_temp_new_i64();
9206
9207     if (size == 3) {
9208         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
9209         TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
9210
9211         handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
9212     } else {
9213         /* Do a single operation on the lowest element in the vector.
9214          * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
9215          * no side effects for all these operations.
9216          * OPTME: special-purpose helpers would avoid doing some
9217          * unnecessary work in the helper for the 8 and 16 bit cases.
9218          */
9219         NeonGenTwoOpEnvFn *genenvfn;
9220         TCGv_i32 tcg_rn = tcg_temp_new_i32();
9221         TCGv_i32 tcg_rm = tcg_temp_new_i32();
9222         TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
9223
9224         read_vec_element_i32(s, tcg_rn, rn, 0, size);
9225         read_vec_element_i32(s, tcg_rm, rm, 0, size);
9226
9227         switch (opcode) {
9228         case 0x1: /* SQADD, UQADD */
9229         {
9230             static NeonGenTwoOpEnvFn * const fns[3][2] = {
9231                 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
9232                 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
9233                 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
9234             };
9235             genenvfn = fns[size][u];
9236             break;
9237         }
9238         case 0x5: /* SQSUB, UQSUB */
9239         {
9240             static NeonGenTwoOpEnvFn * const fns[3][2] = {
9241                 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
9242                 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
9243                 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
9244             };
9245             genenvfn = fns[size][u];
9246             break;
9247         }
9248         case 0x9: /* SQSHL, UQSHL */
9249         {
9250             static NeonGenTwoOpEnvFn * const fns[3][2] = {
9251                 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
9252                 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
9253                 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
9254             };
9255             genenvfn = fns[size][u];
9256             break;
9257         }
9258         case 0xb: /* SQRSHL, UQRSHL */
9259         {
9260             static NeonGenTwoOpEnvFn * const fns[3][2] = {
9261                 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
9262                 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
9263                 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
9264             };
9265             genenvfn = fns[size][u];
9266             break;
9267         }
9268         case 0x16: /* SQDMULH, SQRDMULH */
9269         {
9270             static NeonGenTwoOpEnvFn * const fns[2][2] = {
9271                 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
9272                 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
9273             };
9274             assert(size == 1 || size == 2);
9275             genenvfn = fns[size - 1][u];
9276             break;
9277         }
9278         default:
9279             g_assert_not_reached();
9280         }
9281
9282         genenvfn(tcg_rd32, tcg_env, tcg_rn, tcg_rm);
9283         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
9284     }
9285
9286     write_fp_dreg(s, rd, tcg_rd);
9287 }
9288
9289 /* AdvSIMD scalar three same FP16
9290  *  31 30  29 28       24 23  22 21 20  16 15 14 13    11 10  9  5 4  0
9291  * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
9292  * | 0 1 | U | 1 1 1 1 0 | a | 1 0 |  Rm  | 0 0 | opcode | 1 | Rn | Rd |
9293  * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
9294  * v: 0101 1110 0100 0000 0000 0100 0000 0000 => 5e400400
9295  * m: 1101 1111 0110 0000 1100 0100 0000 0000 => df60c400
9296  */
9297 static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
9298                                                   uint32_t insn)
9299 {
9300     int rd = extract32(insn, 0, 5);
9301     int rn = extract32(insn, 5, 5);
9302     int opcode = extract32(insn, 11, 3);
9303     int rm = extract32(insn, 16, 5);
9304     bool u = extract32(insn, 29, 1);
9305     bool a = extract32(insn, 23, 1);
9306     int fpopcode = opcode | (a << 3) |  (u << 4);
9307     TCGv_ptr fpst;
9308     TCGv_i32 tcg_op1;
9309     TCGv_i32 tcg_op2;
9310     TCGv_i32 tcg_res;
9311
9312     switch (fpopcode) {
9313     case 0x03: /* FMULX */
9314     case 0x04: /* FCMEQ (reg) */
9315     case 0x07: /* FRECPS */
9316     case 0x0f: /* FRSQRTS */
9317     case 0x14: /* FCMGE (reg) */
9318     case 0x15: /* FACGE */
9319     case 0x1a: /* FABD */
9320     case 0x1c: /* FCMGT (reg) */
9321     case 0x1d: /* FACGT */
9322         break;
9323     default:
9324         unallocated_encoding(s);
9325         return;
9326     }
9327
9328     if (!dc_isar_feature(aa64_fp16, s)) {
9329         unallocated_encoding(s);
9330     }
9331
9332     if (!fp_access_check(s)) {
9333         return;
9334     }
9335
9336     fpst = fpstatus_ptr(FPST_FPCR_F16);
9337
9338     tcg_op1 = read_fp_hreg(s, rn);
9339     tcg_op2 = read_fp_hreg(s, rm);
9340     tcg_res = tcg_temp_new_i32();
9341
9342     switch (fpopcode) {
9343     case 0x03: /* FMULX */
9344         gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
9345         break;
9346     case 0x04: /* FCMEQ (reg) */
9347         gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9348         break;
9349     case 0x07: /* FRECPS */
9350         gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9351         break;
9352     case 0x0f: /* FRSQRTS */
9353         gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9354         break;
9355     case 0x14: /* FCMGE (reg) */
9356         gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9357         break;
9358     case 0x15: /* FACGE */
9359         gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9360         break;
9361     case 0x1a: /* FABD */
9362         gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
9363         tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
9364         break;
9365     case 0x1c: /* FCMGT (reg) */
9366         gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9367         break;
9368     case 0x1d: /* FACGT */
9369         gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9370         break;
9371     default:
9372         g_assert_not_reached();
9373     }
9374
9375     write_fp_sreg(s, rd, tcg_res);
9376 }
9377
9378 /* AdvSIMD scalar three same extra
9379  *  31 30  29 28       24 23  22  21 20  16  15 14    11  10 9  5 4  0
9380  * +-----+---+-----------+------+---+------+---+--------+---+----+----+
9381  * | 0 1 | U | 1 1 1 1 0 | size | 0 |  Rm  | 1 | opcode | 1 | Rn | Rd |
9382  * +-----+---+-----------+------+---+------+---+--------+---+----+----+
9383  */
9384 static void disas_simd_scalar_three_reg_same_extra(DisasContext *s,
9385                                                    uint32_t insn)
9386 {
9387     int rd = extract32(insn, 0, 5);
9388     int rn = extract32(insn, 5, 5);
9389     int opcode = extract32(insn, 11, 4);
9390     int rm = extract32(insn, 16, 5);
9391     int size = extract32(insn, 22, 2);
9392     bool u = extract32(insn, 29, 1);
9393     TCGv_i32 ele1, ele2, ele3;
9394     TCGv_i64 res;
9395     bool feature;
9396
9397     switch (u * 16 + opcode) {
9398     case 0x10: /* SQRDMLAH (vector) */
9399     case 0x11: /* SQRDMLSH (vector) */
9400         if (size != 1 && size != 2) {
9401             unallocated_encoding(s);
9402             return;
9403         }
9404         feature = dc_isar_feature(aa64_rdm, s);
9405         break;
9406     default:
9407         unallocated_encoding(s);
9408         return;
9409     }
9410     if (!feature) {
9411         unallocated_encoding(s);
9412         return;
9413     }
9414     if (!fp_access_check(s)) {
9415         return;
9416     }
9417
9418     /* Do a single operation on the lowest element in the vector.
9419      * We use the standard Neon helpers and rely on 0 OP 0 == 0
9420      * with no side effects for all these operations.
9421      * OPTME: special-purpose helpers would avoid doing some
9422      * unnecessary work in the helper for the 16 bit cases.
9423      */
9424     ele1 = tcg_temp_new_i32();
9425     ele2 = tcg_temp_new_i32();
9426     ele3 = tcg_temp_new_i32();
9427
9428     read_vec_element_i32(s, ele1, rn, 0, size);
9429     read_vec_element_i32(s, ele2, rm, 0, size);
9430     read_vec_element_i32(s, ele3, rd, 0, size);
9431
9432     switch (opcode) {
9433     case 0x0: /* SQRDMLAH */
9434         if (size == 1) {
9435             gen_helper_neon_qrdmlah_s16(ele3, tcg_env, ele1, ele2, ele3);
9436         } else {
9437             gen_helper_neon_qrdmlah_s32(ele3, tcg_env, ele1, ele2, ele3);
9438         }
9439         break;
9440     case 0x1: /* SQRDMLSH */
9441         if (size == 1) {
9442             gen_helper_neon_qrdmlsh_s16(ele3, tcg_env, ele1, ele2, ele3);
9443         } else {
9444             gen_helper_neon_qrdmlsh_s32(ele3, tcg_env, ele1, ele2, ele3);
9445         }
9446         break;
9447     default:
9448         g_assert_not_reached();
9449     }
9450
9451     res = tcg_temp_new_i64();
9452     tcg_gen_extu_i32_i64(res, ele3);
9453     write_fp_dreg(s, rd, res);
9454 }
9455
9456 static void handle_2misc_64(DisasContext *s, int opcode, bool u,
9457                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
9458                             TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
9459 {
9460     /* Handle 64->64 opcodes which are shared between the scalar and
9461      * vector 2-reg-misc groups. We cover every integer opcode where size == 3
9462      * is valid in either group and also the double-precision fp ops.
9463      * The caller only need provide tcg_rmode and tcg_fpstatus if the op
9464      * requires them.
9465      */
9466     TCGCond cond;
9467
9468     switch (opcode) {
9469     case 0x4: /* CLS, CLZ */
9470         if (u) {
9471             tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
9472         } else {
9473             tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
9474         }
9475         break;
9476     case 0x5: /* NOT */
9477         /* This opcode is shared with CNT and RBIT but we have earlier
9478          * enforced that size == 3 if and only if this is the NOT insn.
9479          */
9480         tcg_gen_not_i64(tcg_rd, tcg_rn);
9481         break;
9482     case 0x7: /* SQABS, SQNEG */
9483         if (u) {
9484             gen_helper_neon_qneg_s64(tcg_rd, tcg_env, tcg_rn);
9485         } else {
9486             gen_helper_neon_qabs_s64(tcg_rd, tcg_env, tcg_rn);
9487         }
9488         break;
9489     case 0xa: /* CMLT */
9490         cond = TCG_COND_LT;
9491     do_cmop:
9492         /* 64 bit integer comparison against zero, result is test ? -1 : 0. */
9493         tcg_gen_negsetcond_i64(cond, tcg_rd, tcg_rn, tcg_constant_i64(0));
9494         break;
9495     case 0x8: /* CMGT, CMGE */
9496         cond = u ? TCG_COND_GE : TCG_COND_GT;
9497         goto do_cmop;
9498     case 0x9: /* CMEQ, CMLE */
9499         cond = u ? TCG_COND_LE : TCG_COND_EQ;
9500         goto do_cmop;
9501     case 0xb: /* ABS, NEG */
9502         if (u) {
9503             tcg_gen_neg_i64(tcg_rd, tcg_rn);
9504         } else {
9505             tcg_gen_abs_i64(tcg_rd, tcg_rn);
9506         }
9507         break;
9508     case 0x2f: /* FABS */
9509         gen_helper_vfp_absd(tcg_rd, tcg_rn);
9510         break;
9511     case 0x6f: /* FNEG */
9512         gen_helper_vfp_negd(tcg_rd, tcg_rn);
9513         break;
9514     case 0x7f: /* FSQRT */
9515         gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, tcg_env);
9516         break;
9517     case 0x1a: /* FCVTNS */
9518     case 0x1b: /* FCVTMS */
9519     case 0x1c: /* FCVTAS */
9520     case 0x3a: /* FCVTPS */
9521     case 0x3b: /* FCVTZS */
9522         gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus);
9523         break;
9524     case 0x5a: /* FCVTNU */
9525     case 0x5b: /* FCVTMU */
9526     case 0x5c: /* FCVTAU */
9527     case 0x7a: /* FCVTPU */
9528     case 0x7b: /* FCVTZU */
9529         gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus);
9530         break;
9531     case 0x18: /* FRINTN */
9532     case 0x19: /* FRINTM */
9533     case 0x38: /* FRINTP */
9534     case 0x39: /* FRINTZ */
9535     case 0x58: /* FRINTA */
9536     case 0x79: /* FRINTI */
9537         gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
9538         break;
9539     case 0x59: /* FRINTX */
9540         gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
9541         break;
9542     case 0x1e: /* FRINT32Z */
9543     case 0x5e: /* FRINT32X */
9544         gen_helper_frint32_d(tcg_rd, tcg_rn, tcg_fpstatus);
9545         break;
9546     case 0x1f: /* FRINT64Z */
9547     case 0x5f: /* FRINT64X */
9548         gen_helper_frint64_d(tcg_rd, tcg_rn, tcg_fpstatus);
9549         break;
9550     default:
9551         g_assert_not_reached();
9552     }
9553 }
9554
9555 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
9556                                    bool is_scalar, bool is_u, bool is_q,
9557                                    int size, int rn, int rd)
9558 {
9559     bool is_double = (size == MO_64);
9560     TCGv_ptr fpst;
9561
9562     if (!fp_access_check(s)) {
9563         return;
9564     }
9565
9566     fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
9567
9568     if (is_double) {
9569         TCGv_i64 tcg_op = tcg_temp_new_i64();
9570         TCGv_i64 tcg_zero = tcg_constant_i64(0);
9571         TCGv_i64 tcg_res = tcg_temp_new_i64();
9572         NeonGenTwoDoubleOpFn *genfn;
9573         bool swap = false;
9574         int pass;
9575
9576         switch (opcode) {
9577         case 0x2e: /* FCMLT (zero) */
9578             swap = true;
9579             /* fallthrough */
9580         case 0x2c: /* FCMGT (zero) */
9581             genfn = gen_helper_neon_cgt_f64;
9582             break;
9583         case 0x2d: /* FCMEQ (zero) */
9584             genfn = gen_helper_neon_ceq_f64;
9585             break;
9586         case 0x6d: /* FCMLE (zero) */
9587             swap = true;
9588             /* fall through */
9589         case 0x6c: /* FCMGE (zero) */
9590             genfn = gen_helper_neon_cge_f64;
9591             break;
9592         default:
9593             g_assert_not_reached();
9594         }
9595
9596         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9597             read_vec_element(s, tcg_op, rn, pass, MO_64);
9598             if (swap) {
9599                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
9600             } else {
9601                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
9602             }
9603             write_vec_element(s, tcg_res, rd, pass, MO_64);
9604         }
9605
9606         clear_vec_high(s, !is_scalar, rd);
9607     } else {
9608         TCGv_i32 tcg_op = tcg_temp_new_i32();
9609         TCGv_i32 tcg_zero = tcg_constant_i32(0);
9610         TCGv_i32 tcg_res = tcg_temp_new_i32();
9611         NeonGenTwoSingleOpFn *genfn;
9612         bool swap = false;
9613         int pass, maxpasses;
9614
9615         if (size == MO_16) {
9616             switch (opcode) {
9617             case 0x2e: /* FCMLT (zero) */
9618                 swap = true;
9619                 /* fall through */
9620             case 0x2c: /* FCMGT (zero) */
9621                 genfn = gen_helper_advsimd_cgt_f16;
9622                 break;
9623             case 0x2d: /* FCMEQ (zero) */
9624                 genfn = gen_helper_advsimd_ceq_f16;
9625                 break;
9626             case 0x6d: /* FCMLE (zero) */
9627                 swap = true;
9628                 /* fall through */
9629             case 0x6c: /* FCMGE (zero) */
9630                 genfn = gen_helper_advsimd_cge_f16;
9631                 break;
9632             default:
9633                 g_assert_not_reached();
9634             }
9635         } else {
9636             switch (opcode) {
9637             case 0x2e: /* FCMLT (zero) */
9638                 swap = true;
9639                 /* fall through */
9640             case 0x2c: /* FCMGT (zero) */
9641                 genfn = gen_helper_neon_cgt_f32;
9642                 break;
9643             case 0x2d: /* FCMEQ (zero) */
9644                 genfn = gen_helper_neon_ceq_f32;
9645                 break;
9646             case 0x6d: /* FCMLE (zero) */
9647                 swap = true;
9648                 /* fall through */
9649             case 0x6c: /* FCMGE (zero) */
9650                 genfn = gen_helper_neon_cge_f32;
9651                 break;
9652             default:
9653                 g_assert_not_reached();
9654             }
9655         }
9656
9657         if (is_scalar) {
9658             maxpasses = 1;
9659         } else {
9660             int vector_size = 8 << is_q;
9661             maxpasses = vector_size >> size;
9662         }
9663
9664         for (pass = 0; pass < maxpasses; pass++) {
9665             read_vec_element_i32(s, tcg_op, rn, pass, size);
9666             if (swap) {
9667                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
9668             } else {
9669                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
9670             }
9671             if (is_scalar) {
9672                 write_fp_sreg(s, rd, tcg_res);
9673             } else {
9674                 write_vec_element_i32(s, tcg_res, rd, pass, size);
9675             }
9676         }
9677
9678         if (!is_scalar) {
9679             clear_vec_high(s, is_q, rd);
9680         }
9681     }
9682 }
9683
9684 static void handle_2misc_reciprocal(DisasContext *s, int opcode,
9685                                     bool is_scalar, bool is_u, bool is_q,
9686                                     int size, int rn, int rd)
9687 {
9688     bool is_double = (size == 3);
9689     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
9690
9691     if (is_double) {
9692         TCGv_i64 tcg_op = tcg_temp_new_i64();
9693         TCGv_i64 tcg_res = tcg_temp_new_i64();
9694         int pass;
9695
9696         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9697             read_vec_element(s, tcg_op, rn, pass, MO_64);
9698             switch (opcode) {
9699             case 0x3d: /* FRECPE */
9700                 gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
9701                 break;
9702             case 0x3f: /* FRECPX */
9703                 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
9704                 break;
9705             case 0x7d: /* FRSQRTE */
9706                 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
9707                 break;
9708             default:
9709                 g_assert_not_reached();
9710             }
9711             write_vec_element(s, tcg_res, rd, pass, MO_64);
9712         }
9713         clear_vec_high(s, !is_scalar, rd);
9714     } else {
9715         TCGv_i32 tcg_op = tcg_temp_new_i32();
9716         TCGv_i32 tcg_res = tcg_temp_new_i32();
9717         int pass, maxpasses;
9718
9719         if (is_scalar) {
9720             maxpasses = 1;
9721         } else {
9722             maxpasses = is_q ? 4 : 2;
9723         }
9724
9725         for (pass = 0; pass < maxpasses; pass++) {
9726             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
9727
9728             switch (opcode) {
9729             case 0x3c: /* URECPE */
9730                 gen_helper_recpe_u32(tcg_res, tcg_op);
9731                 break;
9732             case 0x3d: /* FRECPE */
9733                 gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
9734                 break;
9735             case 0x3f: /* FRECPX */
9736                 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
9737                 break;
9738             case 0x7d: /* FRSQRTE */
9739                 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
9740                 break;
9741             default:
9742                 g_assert_not_reached();
9743             }
9744
9745             if (is_scalar) {
9746                 write_fp_sreg(s, rd, tcg_res);
9747             } else {
9748                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9749             }
9750         }
9751         if (!is_scalar) {
9752             clear_vec_high(s, is_q, rd);
9753         }
9754     }
9755 }
9756
9757 static void handle_2misc_narrow(DisasContext *s, bool scalar,
9758                                 int opcode, bool u, bool is_q,
9759                                 int size, int rn, int rd)
9760 {
9761     /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
9762      * in the source becomes a size element in the destination).
9763      */
9764     int pass;
9765     TCGv_i32 tcg_res[2];
9766     int destelt = is_q ? 2 : 0;
9767     int passes = scalar ? 1 : 2;
9768
9769     if (scalar) {
9770         tcg_res[1] = tcg_constant_i32(0);
9771     }
9772
9773     for (pass = 0; pass < passes; pass++) {
9774         TCGv_i64 tcg_op = tcg_temp_new_i64();
9775         NeonGenNarrowFn *genfn = NULL;
9776         NeonGenNarrowEnvFn *genenvfn = NULL;
9777
9778         if (scalar) {
9779             read_vec_element(s, tcg_op, rn, pass, size + 1);
9780         } else {
9781             read_vec_element(s, tcg_op, rn, pass, MO_64);
9782         }
9783         tcg_res[pass] = tcg_temp_new_i32();
9784
9785         switch (opcode) {
9786         case 0x12: /* XTN, SQXTUN */
9787         {
9788             static NeonGenNarrowFn * const xtnfns[3] = {
9789                 gen_helper_neon_narrow_u8,
9790                 gen_helper_neon_narrow_u16,
9791                 tcg_gen_extrl_i64_i32,
9792             };
9793             static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
9794                 gen_helper_neon_unarrow_sat8,
9795                 gen_helper_neon_unarrow_sat16,
9796                 gen_helper_neon_unarrow_sat32,
9797             };
9798             if (u) {
9799                 genenvfn = sqxtunfns[size];
9800             } else {
9801                 genfn = xtnfns[size];
9802             }
9803             break;
9804         }
9805         case 0x14: /* SQXTN, UQXTN */
9806         {
9807             static NeonGenNarrowEnvFn * const fns[3][2] = {
9808                 { gen_helper_neon_narrow_sat_s8,
9809                   gen_helper_neon_narrow_sat_u8 },
9810                 { gen_helper_neon_narrow_sat_s16,
9811                   gen_helper_neon_narrow_sat_u16 },
9812                 { gen_helper_neon_narrow_sat_s32,
9813                   gen_helper_neon_narrow_sat_u32 },
9814             };
9815             genenvfn = fns[size][u];
9816             break;
9817         }
9818         case 0x16: /* FCVTN, FCVTN2 */
9819             /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
9820             if (size == 2) {
9821                 gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, tcg_env);
9822             } else {
9823                 TCGv_i32 tcg_lo = tcg_temp_new_i32();
9824                 TCGv_i32 tcg_hi = tcg_temp_new_i32();
9825                 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
9826                 TCGv_i32 ahp = get_ahp_flag();
9827
9828                 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
9829                 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp);
9830                 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp);
9831                 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
9832             }
9833             break;
9834         case 0x36: /* BFCVTN, BFCVTN2 */
9835             {
9836                 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
9837                 gen_helper_bfcvt_pair(tcg_res[pass], tcg_op, fpst);
9838             }
9839             break;
9840         case 0x56:  /* FCVTXN, FCVTXN2 */
9841             /* 64 bit to 32 bit float conversion
9842              * with von Neumann rounding (round to odd)
9843              */
9844             assert(size == 2);
9845             gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, tcg_env);
9846             break;
9847         default:
9848             g_assert_not_reached();
9849         }
9850
9851         if (genfn) {
9852             genfn(tcg_res[pass], tcg_op);
9853         } else if (genenvfn) {
9854             genenvfn(tcg_res[pass], tcg_env, tcg_op);
9855         }
9856     }
9857
9858     for (pass = 0; pass < 2; pass++) {
9859         write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
9860     }
9861     clear_vec_high(s, is_q, rd);
9862 }
9863
9864 /* Remaining saturating accumulating ops */
9865 static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
9866                                 bool is_q, int size, int rn, int rd)
9867 {
9868     bool is_double = (size == 3);
9869
9870     if (is_double) {
9871         TCGv_i64 tcg_rn = tcg_temp_new_i64();
9872         TCGv_i64 tcg_rd = tcg_temp_new_i64();
9873         int pass;
9874
9875         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9876             read_vec_element(s, tcg_rn, rn, pass, MO_64);
9877             read_vec_element(s, tcg_rd, rd, pass, MO_64);
9878
9879             if (is_u) { /* USQADD */
9880                 gen_helper_neon_uqadd_s64(tcg_rd, tcg_env, tcg_rn, tcg_rd);
9881             } else { /* SUQADD */
9882                 gen_helper_neon_sqadd_u64(tcg_rd, tcg_env, tcg_rn, tcg_rd);
9883             }
9884             write_vec_element(s, tcg_rd, rd, pass, MO_64);
9885         }
9886         clear_vec_high(s, !is_scalar, rd);
9887     } else {
9888         TCGv_i32 tcg_rn = tcg_temp_new_i32();
9889         TCGv_i32 tcg_rd = tcg_temp_new_i32();
9890         int pass, maxpasses;
9891
9892         if (is_scalar) {
9893             maxpasses = 1;
9894         } else {
9895             maxpasses = is_q ? 4 : 2;
9896         }
9897
9898         for (pass = 0; pass < maxpasses; pass++) {
9899             if (is_scalar) {
9900                 read_vec_element_i32(s, tcg_rn, rn, pass, size);
9901                 read_vec_element_i32(s, tcg_rd, rd, pass, size);
9902             } else {
9903                 read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
9904                 read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
9905             }
9906
9907             if (is_u) { /* USQADD */
9908                 switch (size) {
9909                 case 0:
9910                     gen_helper_neon_uqadd_s8(tcg_rd, tcg_env, tcg_rn, tcg_rd);
9911                     break;
9912                 case 1:
9913                     gen_helper_neon_uqadd_s16(tcg_rd, tcg_env, tcg_rn, tcg_rd);
9914                     break;
9915                 case 2:
9916                     gen_helper_neon_uqadd_s32(tcg_rd, tcg_env, tcg_rn, tcg_rd);
9917                     break;
9918                 default:
9919                     g_assert_not_reached();
9920                 }
9921             } else { /* SUQADD */
9922                 switch (size) {
9923                 case 0:
9924                     gen_helper_neon_sqadd_u8(tcg_rd, tcg_env, tcg_rn, tcg_rd);
9925                     break;
9926                 case 1:
9927                     gen_helper_neon_sqadd_u16(tcg_rd, tcg_env, tcg_rn, tcg_rd);
9928                     break;
9929                 case 2:
9930                     gen_helper_neon_sqadd_u32(tcg_rd, tcg_env, tcg_rn, tcg_rd);
9931                     break;
9932                 default:
9933                     g_assert_not_reached();
9934                 }
9935             }
9936
9937             if (is_scalar) {
9938                 write_vec_element(s, tcg_constant_i64(0), rd, 0, MO_64);
9939             }
9940             write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
9941         }
9942         clear_vec_high(s, is_q, rd);
9943     }
9944 }
9945
9946 /* AdvSIMD scalar two reg misc
9947  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
9948  * +-----+---+-----------+------+-----------+--------+-----+------+------+
9949  * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
9950  * +-----+---+-----------+------+-----------+--------+-----+------+------+
9951  */
9952 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
9953 {
9954     int rd = extract32(insn, 0, 5);
9955     int rn = extract32(insn, 5, 5);
9956     int opcode = extract32(insn, 12, 5);
9957     int size = extract32(insn, 22, 2);
9958     bool u = extract32(insn, 29, 1);
9959     bool is_fcvt = false;
9960     int rmode;
9961     TCGv_i32 tcg_rmode;
9962     TCGv_ptr tcg_fpstatus;
9963
9964     switch (opcode) {
9965     case 0x3: /* USQADD / SUQADD*/
9966         if (!fp_access_check(s)) {
9967             return;
9968         }
9969         handle_2misc_satacc(s, true, u, false, size, rn, rd);
9970         return;
9971     case 0x7: /* SQABS / SQNEG */
9972         break;
9973     case 0xa: /* CMLT */
9974         if (u) {
9975             unallocated_encoding(s);
9976             return;
9977         }
9978         /* fall through */
9979     case 0x8: /* CMGT, CMGE */
9980     case 0x9: /* CMEQ, CMLE */
9981     case 0xb: /* ABS, NEG */
9982         if (size != 3) {
9983             unallocated_encoding(s);
9984             return;
9985         }
9986         break;
9987     case 0x12: /* SQXTUN */
9988         if (!u) {
9989             unallocated_encoding(s);
9990             return;
9991         }
9992         /* fall through */
9993     case 0x14: /* SQXTN, UQXTN */
9994         if (size == 3) {
9995             unallocated_encoding(s);
9996             return;
9997         }
9998         if (!fp_access_check(s)) {
9999             return;
10000         }
10001         handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
10002         return;
10003     case 0xc ... 0xf:
10004     case 0x16 ... 0x1d:
10005     case 0x1f:
10006         /* Floating point: U, size[1] and opcode indicate operation;
10007          * size[0] indicates single or double precision.
10008          */
10009         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
10010         size = extract32(size, 0, 1) ? 3 : 2;
10011         switch (opcode) {
10012         case 0x2c: /* FCMGT (zero) */
10013         case 0x2d: /* FCMEQ (zero) */
10014         case 0x2e: /* FCMLT (zero) */
10015         case 0x6c: /* FCMGE (zero) */
10016         case 0x6d: /* FCMLE (zero) */
10017             handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
10018             return;
10019         case 0x1d: /* SCVTF */
10020         case 0x5d: /* UCVTF */
10021         {
10022             bool is_signed = (opcode == 0x1d);
10023             if (!fp_access_check(s)) {
10024                 return;
10025             }
10026             handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
10027             return;
10028         }
10029         case 0x3d: /* FRECPE */
10030         case 0x3f: /* FRECPX */
10031         case 0x7d: /* FRSQRTE */
10032             if (!fp_access_check(s)) {
10033                 return;
10034             }
10035             handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
10036             return;
10037         case 0x1a: /* FCVTNS */
10038         case 0x1b: /* FCVTMS */
10039         case 0x3a: /* FCVTPS */
10040         case 0x3b: /* FCVTZS */
10041         case 0x5a: /* FCVTNU */
10042         case 0x5b: /* FCVTMU */
10043         case 0x7a: /* FCVTPU */
10044         case 0x7b: /* FCVTZU */
10045             is_fcvt = true;
10046             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
10047             break;
10048         case 0x1c: /* FCVTAS */
10049         case 0x5c: /* FCVTAU */
10050             /* TIEAWAY doesn't fit in the usual rounding mode encoding */
10051             is_fcvt = true;
10052             rmode = FPROUNDING_TIEAWAY;
10053             break;
10054         case 0x56: /* FCVTXN, FCVTXN2 */
10055             if (size == 2) {
10056                 unallocated_encoding(s);
10057                 return;
10058             }
10059             if (!fp_access_check(s)) {
10060                 return;
10061             }
10062             handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
10063             return;
10064         default:
10065             unallocated_encoding(s);
10066             return;
10067         }
10068         break;
10069     default:
10070         unallocated_encoding(s);
10071         return;
10072     }
10073
10074     if (!fp_access_check(s)) {
10075         return;
10076     }
10077
10078     if (is_fcvt) {
10079         tcg_fpstatus = fpstatus_ptr(FPST_FPCR);
10080         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
10081     } else {
10082         tcg_fpstatus = NULL;
10083         tcg_rmode = NULL;
10084     }
10085
10086     if (size == 3) {
10087         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
10088         TCGv_i64 tcg_rd = tcg_temp_new_i64();
10089
10090         handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
10091         write_fp_dreg(s, rd, tcg_rd);
10092     } else {
10093         TCGv_i32 tcg_rn = tcg_temp_new_i32();
10094         TCGv_i32 tcg_rd = tcg_temp_new_i32();
10095
10096         read_vec_element_i32(s, tcg_rn, rn, 0, size);
10097
10098         switch (opcode) {
10099         case 0x7: /* SQABS, SQNEG */
10100         {
10101             NeonGenOneOpEnvFn *genfn;
10102             static NeonGenOneOpEnvFn * const fns[3][2] = {
10103                 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
10104                 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
10105                 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
10106             };
10107             genfn = fns[size][u];
10108             genfn(tcg_rd, tcg_env, tcg_rn);
10109             break;
10110         }
10111         case 0x1a: /* FCVTNS */
10112         case 0x1b: /* FCVTMS */
10113         case 0x1c: /* FCVTAS */
10114         case 0x3a: /* FCVTPS */
10115         case 0x3b: /* FCVTZS */
10116             gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_constant_i32(0),
10117                                  tcg_fpstatus);
10118             break;
10119         case 0x5a: /* FCVTNU */
10120         case 0x5b: /* FCVTMU */
10121         case 0x5c: /* FCVTAU */
10122         case 0x7a: /* FCVTPU */
10123         case 0x7b: /* FCVTZU */
10124             gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_constant_i32(0),
10125                                  tcg_fpstatus);
10126             break;
10127         default:
10128             g_assert_not_reached();
10129         }
10130
10131         write_fp_sreg(s, rd, tcg_rd);
10132     }
10133
10134     if (is_fcvt) {
10135         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
10136     }
10137 }
10138
10139 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
10140 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
10141                                  int immh, int immb, int opcode, int rn, int rd)
10142 {
10143     int size = 32 - clz32(immh) - 1;
10144     int immhb = immh << 3 | immb;
10145     int shift = 2 * (8 << size) - immhb;
10146     GVecGen2iFn *gvec_fn;
10147
10148     if (extract32(immh, 3, 1) && !is_q) {
10149         unallocated_encoding(s);
10150         return;
10151     }
10152     tcg_debug_assert(size <= 3);
10153
10154     if (!fp_access_check(s)) {
10155         return;
10156     }
10157
10158     switch (opcode) {
10159     case 0x02: /* SSRA / USRA (accumulate) */
10160         gvec_fn = is_u ? gen_gvec_usra : gen_gvec_ssra;
10161         break;
10162
10163     case 0x08: /* SRI */
10164         gvec_fn = gen_gvec_sri;
10165         break;
10166
10167     case 0x00: /* SSHR / USHR */
10168         if (is_u) {
10169             if (shift == 8 << size) {
10170                 /* Shift count the same size as element size produces zero.  */
10171                 tcg_gen_gvec_dup_imm(size, vec_full_reg_offset(s, rd),
10172                                      is_q ? 16 : 8, vec_full_reg_size(s), 0);
10173                 return;
10174             }
10175             gvec_fn = tcg_gen_gvec_shri;
10176         } else {
10177             /* Shift count the same size as element size produces all sign.  */
10178             if (shift == 8 << size) {
10179                 shift -= 1;
10180             }
10181             gvec_fn = tcg_gen_gvec_sari;
10182         }
10183         break;
10184
10185     case 0x04: /* SRSHR / URSHR (rounding) */
10186         gvec_fn = is_u ? gen_gvec_urshr : gen_gvec_srshr;
10187         break;
10188
10189     case 0x06: /* SRSRA / URSRA (accum + rounding) */
10190         gvec_fn = is_u ? gen_gvec_ursra : gen_gvec_srsra;
10191         break;
10192
10193     default:
10194         g_assert_not_reached();
10195     }
10196
10197     gen_gvec_fn2i(s, is_q, rd, rn, shift, gvec_fn, size);
10198 }
10199
10200 /* SHL/SLI - Vector shift left */
10201 static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
10202                                  int immh, int immb, int opcode, int rn, int rd)
10203 {
10204     int size = 32 - clz32(immh) - 1;
10205     int immhb = immh << 3 | immb;
10206     int shift = immhb - (8 << size);
10207
10208     /* Range of size is limited by decode: immh is a non-zero 4 bit field */
10209     assert(size >= 0 && size <= 3);
10210
10211     if (extract32(immh, 3, 1) && !is_q) {
10212         unallocated_encoding(s);
10213         return;
10214     }
10215
10216     if (!fp_access_check(s)) {
10217         return;
10218     }
10219
10220     if (insert) {
10221         gen_gvec_fn2i(s, is_q, rd, rn, shift, gen_gvec_sli, size);
10222     } else {
10223         gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size);
10224     }
10225 }
10226
10227 /* USHLL/SHLL - Vector shift left with widening */
10228 static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
10229                                  int immh, int immb, int opcode, int rn, int rd)
10230 {
10231     int size = 32 - clz32(immh) - 1;
10232     int immhb = immh << 3 | immb;
10233     int shift = immhb - (8 << size);
10234     int dsize = 64;
10235     int esize = 8 << size;
10236     int elements = dsize/esize;
10237     TCGv_i64 tcg_rn = tcg_temp_new_i64();
10238     TCGv_i64 tcg_rd = tcg_temp_new_i64();
10239     int i;
10240
10241     if (size >= 3) {
10242         unallocated_encoding(s);
10243         return;
10244     }
10245
10246     if (!fp_access_check(s)) {
10247         return;
10248     }
10249
10250     /* For the LL variants the store is larger than the load,
10251      * so if rd == rn we would overwrite parts of our input.
10252      * So load everything right now and use shifts in the main loop.
10253      */
10254     read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
10255
10256     for (i = 0; i < elements; i++) {
10257         tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
10258         ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
10259         tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
10260         write_vec_element(s, tcg_rd, rd, i, size + 1);
10261     }
10262 }
10263
10264 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
10265 static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
10266                                  int immh, int immb, int opcode, int rn, int rd)
10267 {
10268     int immhb = immh << 3 | immb;
10269     int size = 32 - clz32(immh) - 1;
10270     int dsize = 64;
10271     int esize = 8 << size;
10272     int elements = dsize/esize;
10273     int shift = (2 * esize) - immhb;
10274     bool round = extract32(opcode, 0, 1);
10275     TCGv_i64 tcg_rn, tcg_rd, tcg_final;
10276     TCGv_i64 tcg_round;
10277     int i;
10278
10279     if (extract32(immh, 3, 1)) {
10280         unallocated_encoding(s);
10281         return;
10282     }
10283
10284     if (!fp_access_check(s)) {
10285         return;
10286     }
10287
10288     tcg_rn = tcg_temp_new_i64();
10289     tcg_rd = tcg_temp_new_i64();
10290     tcg_final = tcg_temp_new_i64();
10291     read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
10292
10293     if (round) {
10294         tcg_round = tcg_constant_i64(1ULL << (shift - 1));
10295     } else {
10296         tcg_round = NULL;
10297     }
10298
10299     for (i = 0; i < elements; i++) {
10300         read_vec_element(s, tcg_rn, rn, i, size+1);
10301         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
10302                                 false, true, size+1, shift);
10303
10304         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
10305     }
10306
10307     if (!is_q) {
10308         write_vec_element(s, tcg_final, rd, 0, MO_64);
10309     } else {
10310         write_vec_element(s, tcg_final, rd, 1, MO_64);
10311     }
10312
10313     clear_vec_high(s, is_q, rd);
10314 }
10315
10316
10317 /* AdvSIMD shift by immediate
10318  *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
10319  * +---+---+---+-------------+------+------+--------+---+------+------+
10320  * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
10321  * +---+---+---+-------------+------+------+--------+---+------+------+
10322  */
10323 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
10324 {
10325     int rd = extract32(insn, 0, 5);
10326     int rn = extract32(insn, 5, 5);
10327     int opcode = extract32(insn, 11, 5);
10328     int immb = extract32(insn, 16, 3);
10329     int immh = extract32(insn, 19, 4);
10330     bool is_u = extract32(insn, 29, 1);
10331     bool is_q = extract32(insn, 30, 1);
10332
10333     /* data_proc_simd[] has sent immh == 0 to disas_simd_mod_imm. */
10334     assert(immh != 0);
10335
10336     switch (opcode) {
10337     case 0x08: /* SRI */
10338         if (!is_u) {
10339             unallocated_encoding(s);
10340             return;
10341         }
10342         /* fall through */
10343     case 0x00: /* SSHR / USHR */
10344     case 0x02: /* SSRA / USRA (accumulate) */
10345     case 0x04: /* SRSHR / URSHR (rounding) */
10346     case 0x06: /* SRSRA / URSRA (accum + rounding) */
10347         handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
10348         break;
10349     case 0x0a: /* SHL / SLI */
10350         handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
10351         break;
10352     case 0x10: /* SHRN */
10353     case 0x11: /* RSHRN / SQRSHRUN */
10354         if (is_u) {
10355             handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
10356                                    opcode, rn, rd);
10357         } else {
10358             handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
10359         }
10360         break;
10361     case 0x12: /* SQSHRN / UQSHRN */
10362     case 0x13: /* SQRSHRN / UQRSHRN */
10363         handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
10364                                opcode, rn, rd);
10365         break;
10366     case 0x14: /* SSHLL / USHLL */
10367         handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
10368         break;
10369     case 0x1c: /* SCVTF / UCVTF */
10370         handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
10371                                      opcode, rn, rd);
10372         break;
10373     case 0xc: /* SQSHLU */
10374         if (!is_u) {
10375             unallocated_encoding(s);
10376             return;
10377         }
10378         handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
10379         break;
10380     case 0xe: /* SQSHL, UQSHL */
10381         handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
10382         break;
10383     case 0x1f: /* FCVTZS/ FCVTZU */
10384         handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
10385         return;
10386     default:
10387         unallocated_encoding(s);
10388         return;
10389     }
10390 }
10391
10392 /* Generate code to do a "long" addition or subtraction, ie one done in
10393  * TCGv_i64 on vector lanes twice the width specified by size.
10394  */
10395 static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
10396                           TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
10397 {
10398     static NeonGenTwo64OpFn * const fns[3][2] = {
10399         { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
10400         { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
10401         { tcg_gen_add_i64, tcg_gen_sub_i64 },
10402     };
10403     NeonGenTwo64OpFn *genfn;
10404     assert(size < 3);
10405
10406     genfn = fns[size][is_sub];
10407     genfn(tcg_res, tcg_op1, tcg_op2);
10408 }
10409
10410 static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
10411                                 int opcode, int rd, int rn, int rm)
10412 {
10413     /* 3-reg-different widening insns: 64 x 64 -> 128 */
10414     TCGv_i64 tcg_res[2];
10415     int pass, accop;
10416
10417     tcg_res[0] = tcg_temp_new_i64();
10418     tcg_res[1] = tcg_temp_new_i64();
10419
10420     /* Does this op do an adding accumulate, a subtracting accumulate,
10421      * or no accumulate at all?
10422      */
10423     switch (opcode) {
10424     case 5:
10425     case 8:
10426     case 9:
10427         accop = 1;
10428         break;
10429     case 10:
10430     case 11:
10431         accop = -1;
10432         break;
10433     default:
10434         accop = 0;
10435         break;
10436     }
10437
10438     if (accop != 0) {
10439         read_vec_element(s, tcg_res[0], rd, 0, MO_64);
10440         read_vec_element(s, tcg_res[1], rd, 1, MO_64);
10441     }
10442
10443     /* size == 2 means two 32x32->64 operations; this is worth special
10444      * casing because we can generally handle it inline.
10445      */
10446     if (size == 2) {
10447         for (pass = 0; pass < 2; pass++) {
10448             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10449             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10450             TCGv_i64 tcg_passres;
10451             MemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
10452
10453             int elt = pass + is_q * 2;
10454
10455             read_vec_element(s, tcg_op1, rn, elt, memop);
10456             read_vec_element(s, tcg_op2, rm, elt, memop);
10457
10458             if (accop == 0) {
10459                 tcg_passres = tcg_res[pass];
10460             } else {
10461                 tcg_passres = tcg_temp_new_i64();
10462             }
10463
10464             switch (opcode) {
10465             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10466                 tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
10467                 break;
10468             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10469                 tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
10470                 break;
10471             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10472             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10473             {
10474                 TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
10475                 TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
10476
10477                 tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
10478                 tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
10479                 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
10480                                     tcg_passres,
10481                                     tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
10482                 break;
10483             }
10484             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10485             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10486             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
10487                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
10488                 break;
10489             case 9: /* SQDMLAL, SQDMLAL2 */
10490             case 11: /* SQDMLSL, SQDMLSL2 */
10491             case 13: /* SQDMULL, SQDMULL2 */
10492                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
10493                 gen_helper_neon_addl_saturate_s64(tcg_passres, tcg_env,
10494                                                   tcg_passres, tcg_passres);
10495                 break;
10496             default:
10497                 g_assert_not_reached();
10498             }
10499
10500             if (opcode == 9 || opcode == 11) {
10501                 /* saturating accumulate ops */
10502                 if (accop < 0) {
10503                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
10504                 }
10505                 gen_helper_neon_addl_saturate_s64(tcg_res[pass], tcg_env,
10506                                                   tcg_res[pass], tcg_passres);
10507             } else if (accop > 0) {
10508                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10509             } else if (accop < 0) {
10510                 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10511             }
10512         }
10513     } else {
10514         /* size 0 or 1, generally helper functions */
10515         for (pass = 0; pass < 2; pass++) {
10516             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10517             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10518             TCGv_i64 tcg_passres;
10519             int elt = pass + is_q * 2;
10520
10521             read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
10522             read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
10523
10524             if (accop == 0) {
10525                 tcg_passres = tcg_res[pass];
10526             } else {
10527                 tcg_passres = tcg_temp_new_i64();
10528             }
10529
10530             switch (opcode) {
10531             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10532             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10533             {
10534                 TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
10535                 static NeonGenWidenFn * const widenfns[2][2] = {
10536                     { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
10537                     { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
10538                 };
10539                 NeonGenWidenFn *widenfn = widenfns[size][is_u];
10540
10541                 widenfn(tcg_op2_64, tcg_op2);
10542                 widenfn(tcg_passres, tcg_op1);
10543                 gen_neon_addl(size, (opcode == 2), tcg_passres,
10544                               tcg_passres, tcg_op2_64);
10545                 break;
10546             }
10547             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10548             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10549                 if (size == 0) {
10550                     if (is_u) {
10551                         gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
10552                     } else {
10553                         gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
10554                     }
10555                 } else {
10556                     if (is_u) {
10557                         gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
10558                     } else {
10559                         gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
10560                     }
10561                 }
10562                 break;
10563             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10564             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10565             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
10566                 if (size == 0) {
10567                     if (is_u) {
10568                         gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
10569                     } else {
10570                         gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
10571                     }
10572                 } else {
10573                     if (is_u) {
10574                         gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
10575                     } else {
10576                         gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
10577                     }
10578                 }
10579                 break;
10580             case 9: /* SQDMLAL, SQDMLAL2 */
10581             case 11: /* SQDMLSL, SQDMLSL2 */
10582             case 13: /* SQDMULL, SQDMULL2 */
10583                 assert(size == 1);
10584                 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
10585                 gen_helper_neon_addl_saturate_s32(tcg_passres, tcg_env,
10586                                                   tcg_passres, tcg_passres);
10587                 break;
10588             default:
10589                 g_assert_not_reached();
10590             }
10591
10592             if (accop != 0) {
10593                 if (opcode == 9 || opcode == 11) {
10594                     /* saturating accumulate ops */
10595                     if (accop < 0) {
10596                         gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
10597                     }
10598                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], tcg_env,
10599                                                       tcg_res[pass],
10600                                                       tcg_passres);
10601                 } else {
10602                     gen_neon_addl(size, (accop < 0), tcg_res[pass],
10603                                   tcg_res[pass], tcg_passres);
10604                 }
10605             }
10606         }
10607     }
10608
10609     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
10610     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
10611 }
10612
10613 static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
10614                             int opcode, int rd, int rn, int rm)
10615 {
10616     TCGv_i64 tcg_res[2];
10617     int part = is_q ? 2 : 0;
10618     int pass;
10619
10620     for (pass = 0; pass < 2; pass++) {
10621         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10622         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10623         TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
10624         static NeonGenWidenFn * const widenfns[3][2] = {
10625             { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
10626             { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
10627             { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
10628         };
10629         NeonGenWidenFn *widenfn = widenfns[size][is_u];
10630
10631         read_vec_element(s, tcg_op1, rn, pass, MO_64);
10632         read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
10633         widenfn(tcg_op2_wide, tcg_op2);
10634         tcg_res[pass] = tcg_temp_new_i64();
10635         gen_neon_addl(size, (opcode == 3),
10636                       tcg_res[pass], tcg_op1, tcg_op2_wide);
10637     }
10638
10639     for (pass = 0; pass < 2; pass++) {
10640         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10641     }
10642 }
10643
10644 static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
10645 {
10646     tcg_gen_addi_i64(in, in, 1U << 31);
10647     tcg_gen_extrh_i64_i32(res, in);
10648 }
10649
10650 static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
10651                                  int opcode, int rd, int rn, int rm)
10652 {
10653     TCGv_i32 tcg_res[2];
10654     int part = is_q ? 2 : 0;
10655     int pass;
10656
10657     for (pass = 0; pass < 2; pass++) {
10658         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10659         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10660         TCGv_i64 tcg_wideres = tcg_temp_new_i64();
10661         static NeonGenNarrowFn * const narrowfns[3][2] = {
10662             { gen_helper_neon_narrow_high_u8,
10663               gen_helper_neon_narrow_round_high_u8 },
10664             { gen_helper_neon_narrow_high_u16,
10665               gen_helper_neon_narrow_round_high_u16 },
10666             { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
10667         };
10668         NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
10669
10670         read_vec_element(s, tcg_op1, rn, pass, MO_64);
10671         read_vec_element(s, tcg_op2, rm, pass, MO_64);
10672
10673         gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
10674
10675         tcg_res[pass] = tcg_temp_new_i32();
10676         gennarrow(tcg_res[pass], tcg_wideres);
10677     }
10678
10679     for (pass = 0; pass < 2; pass++) {
10680         write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
10681     }
10682     clear_vec_high(s, is_q, rd);
10683 }
10684
10685 /* AdvSIMD three different
10686  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
10687  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
10688  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
10689  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
10690  */
10691 static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
10692 {
10693     /* Instructions in this group fall into three basic classes
10694      * (in each case with the operation working on each element in
10695      * the input vectors):
10696      * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
10697      *     128 bit input)
10698      * (2) wide 64 x 128 -> 128
10699      * (3) narrowing 128 x 128 -> 64
10700      * Here we do initial decode, catch unallocated cases and
10701      * dispatch to separate functions for each class.
10702      */
10703     int is_q = extract32(insn, 30, 1);
10704     int is_u = extract32(insn, 29, 1);
10705     int size = extract32(insn, 22, 2);
10706     int opcode = extract32(insn, 12, 4);
10707     int rm = extract32(insn, 16, 5);
10708     int rn = extract32(insn, 5, 5);
10709     int rd = extract32(insn, 0, 5);
10710
10711     switch (opcode) {
10712     case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
10713     case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
10714         /* 64 x 128 -> 128 */
10715         if (size == 3) {
10716             unallocated_encoding(s);
10717             return;
10718         }
10719         if (!fp_access_check(s)) {
10720             return;
10721         }
10722         handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
10723         break;
10724     case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
10725     case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
10726         /* 128 x 128 -> 64 */
10727         if (size == 3) {
10728             unallocated_encoding(s);
10729             return;
10730         }
10731         if (!fp_access_check(s)) {
10732             return;
10733         }
10734         handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
10735         break;
10736     case 14: /* PMULL, PMULL2 */
10737         if (is_u) {
10738             unallocated_encoding(s);
10739             return;
10740         }
10741         switch (size) {
10742         case 0: /* PMULL.P8 */
10743             if (!fp_access_check(s)) {
10744                 return;
10745             }
10746             /* The Q field specifies lo/hi half input for this insn.  */
10747             gen_gvec_op3_ool(s, true, rd, rn, rm, is_q,
10748                              gen_helper_neon_pmull_h);
10749             break;
10750
10751         case 3: /* PMULL.P64 */
10752             if (!dc_isar_feature(aa64_pmull, s)) {
10753                 unallocated_encoding(s);
10754                 return;
10755             }
10756             if (!fp_access_check(s)) {
10757                 return;
10758             }
10759             /* The Q field specifies lo/hi half input for this insn.  */
10760             gen_gvec_op3_ool(s, true, rd, rn, rm, is_q,
10761                              gen_helper_gvec_pmull_q);
10762             break;
10763
10764         default:
10765             unallocated_encoding(s);
10766             break;
10767         }
10768         return;
10769     case 9: /* SQDMLAL, SQDMLAL2 */
10770     case 11: /* SQDMLSL, SQDMLSL2 */
10771     case 13: /* SQDMULL, SQDMULL2 */
10772         if (is_u || size == 0) {
10773             unallocated_encoding(s);
10774             return;
10775         }
10776         /* fall through */
10777     case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10778     case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10779     case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10780     case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10781     case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10782     case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10783     case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
10784         /* 64 x 64 -> 128 */
10785         if (size == 3) {
10786             unallocated_encoding(s);
10787             return;
10788         }
10789         if (!fp_access_check(s)) {
10790             return;
10791         }
10792
10793         handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
10794         break;
10795     default:
10796         /* opcode 15 not allocated */
10797         unallocated_encoding(s);
10798         break;
10799     }
10800 }
10801
10802 /* Logic op (opcode == 3) subgroup of C3.6.16. */
10803 static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
10804 {
10805     int rd = extract32(insn, 0, 5);
10806     int rn = extract32(insn, 5, 5);
10807     int rm = extract32(insn, 16, 5);
10808     int size = extract32(insn, 22, 2);
10809     bool is_u = extract32(insn, 29, 1);
10810     bool is_q = extract32(insn, 30, 1);
10811
10812     if (!fp_access_check(s)) {
10813         return;
10814     }
10815
10816     switch (size + 4 * is_u) {
10817     case 0: /* AND */
10818         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_and, 0);
10819         return;
10820     case 1: /* BIC */
10821         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_andc, 0);
10822         return;
10823     case 2: /* ORR */
10824         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0);
10825         return;
10826     case 3: /* ORN */
10827         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_orc, 0);
10828         return;
10829     case 4: /* EOR */
10830         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_xor, 0);
10831         return;
10832
10833     case 5: /* BSL bitwise select */
10834         gen_gvec_fn4(s, is_q, rd, rd, rn, rm, tcg_gen_gvec_bitsel, 0);
10835         return;
10836     case 6: /* BIT, bitwise insert if true */
10837         gen_gvec_fn4(s, is_q, rd, rm, rn, rd, tcg_gen_gvec_bitsel, 0);
10838         return;
10839     case 7: /* BIF, bitwise insert if false */
10840         gen_gvec_fn4(s, is_q, rd, rm, rd, rn, tcg_gen_gvec_bitsel, 0);
10841         return;
10842
10843     default:
10844         g_assert_not_reached();
10845     }
10846 }
10847
10848 /* Pairwise op subgroup of C3.6.16.
10849  *
10850  * This is called directly or via the handle_3same_float for float pairwise
10851  * operations where the opcode and size are calculated differently.
10852  */
10853 static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
10854                                    int size, int rn, int rm, int rd)
10855 {
10856     TCGv_ptr fpst;
10857     int pass;
10858
10859     /* Floating point operations need fpst */
10860     if (opcode >= 0x58) {
10861         fpst = fpstatus_ptr(FPST_FPCR);
10862     } else {
10863         fpst = NULL;
10864     }
10865
10866     if (!fp_access_check(s)) {
10867         return;
10868     }
10869
10870     /* These operations work on the concatenated rm:rn, with each pair of
10871      * adjacent elements being operated on to produce an element in the result.
10872      */
10873     if (size == 3) {
10874         TCGv_i64 tcg_res[2];
10875
10876         for (pass = 0; pass < 2; pass++) {
10877             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10878             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10879             int passreg = (pass == 0) ? rn : rm;
10880
10881             read_vec_element(s, tcg_op1, passreg, 0, MO_64);
10882             read_vec_element(s, tcg_op2, passreg, 1, MO_64);
10883             tcg_res[pass] = tcg_temp_new_i64();
10884
10885             switch (opcode) {
10886             case 0x17: /* ADDP */
10887                 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
10888                 break;
10889             case 0x58: /* FMAXNMP */
10890                 gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10891                 break;
10892             case 0x5a: /* FADDP */
10893                 gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10894                 break;
10895             case 0x5e: /* FMAXP */
10896                 gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10897                 break;
10898             case 0x78: /* FMINNMP */
10899                 gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10900                 break;
10901             case 0x7e: /* FMINP */
10902                 gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10903                 break;
10904             default:
10905                 g_assert_not_reached();
10906             }
10907         }
10908
10909         for (pass = 0; pass < 2; pass++) {
10910             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10911         }
10912     } else {
10913         int maxpass = is_q ? 4 : 2;
10914         TCGv_i32 tcg_res[4];
10915
10916         for (pass = 0; pass < maxpass; pass++) {
10917             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10918             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10919             NeonGenTwoOpFn *genfn = NULL;
10920             int passreg = pass < (maxpass / 2) ? rn : rm;
10921             int passelt = (is_q && (pass & 1)) ? 2 : 0;
10922
10923             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
10924             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
10925             tcg_res[pass] = tcg_temp_new_i32();
10926
10927             switch (opcode) {
10928             case 0x17: /* ADDP */
10929             {
10930                 static NeonGenTwoOpFn * const fns[3] = {
10931                     gen_helper_neon_padd_u8,
10932                     gen_helper_neon_padd_u16,
10933                     tcg_gen_add_i32,
10934                 };
10935                 genfn = fns[size];
10936                 break;
10937             }
10938             case 0x14: /* SMAXP, UMAXP */
10939             {
10940                 static NeonGenTwoOpFn * const fns[3][2] = {
10941                     { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
10942                     { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
10943                     { tcg_gen_smax_i32, tcg_gen_umax_i32 },
10944                 };
10945                 genfn = fns[size][u];
10946                 break;
10947             }
10948             case 0x15: /* SMINP, UMINP */
10949             {
10950                 static NeonGenTwoOpFn * const fns[3][2] = {
10951                     { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
10952                     { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
10953                     { tcg_gen_smin_i32, tcg_gen_umin_i32 },
10954                 };
10955                 genfn = fns[size][u];
10956                 break;
10957             }
10958             /* The FP operations are all on single floats (32 bit) */
10959             case 0x58: /* FMAXNMP */
10960                 gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10961                 break;
10962             case 0x5a: /* FADDP */
10963                 gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10964                 break;
10965             case 0x5e: /* FMAXP */
10966                 gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10967                 break;
10968             case 0x78: /* FMINNMP */
10969                 gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10970                 break;
10971             case 0x7e: /* FMINP */
10972                 gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10973                 break;
10974             default:
10975                 g_assert_not_reached();
10976             }
10977
10978             /* FP ops called directly, otherwise call now */
10979             if (genfn) {
10980                 genfn(tcg_res[pass], tcg_op1, tcg_op2);
10981             }
10982         }
10983
10984         for (pass = 0; pass < maxpass; pass++) {
10985             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
10986         }
10987         clear_vec_high(s, is_q, rd);
10988     }
10989 }
10990
10991 /* Floating point op subgroup of C3.6.16. */
10992 static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
10993 {
10994     /* For floating point ops, the U, size[1] and opcode bits
10995      * together indicate the operation. size[0] indicates single
10996      * or double.
10997      */
10998     int fpopcode = extract32(insn, 11, 5)
10999         | (extract32(insn, 23, 1) << 5)
11000         | (extract32(insn, 29, 1) << 6);
11001     int is_q = extract32(insn, 30, 1);
11002     int size = extract32(insn, 22, 1);
11003     int rm = extract32(insn, 16, 5);
11004     int rn = extract32(insn, 5, 5);
11005     int rd = extract32(insn, 0, 5);
11006
11007     int datasize = is_q ? 128 : 64;
11008     int esize = 32 << size;
11009     int elements = datasize / esize;
11010
11011     if (size == 1 && !is_q) {
11012         unallocated_encoding(s);
11013         return;
11014     }
11015
11016     switch (fpopcode) {
11017     case 0x58: /* FMAXNMP */
11018     case 0x5a: /* FADDP */
11019     case 0x5e: /* FMAXP */
11020     case 0x78: /* FMINNMP */
11021     case 0x7e: /* FMINP */
11022         if (size && !is_q) {
11023             unallocated_encoding(s);
11024             return;
11025         }
11026         handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
11027                                rn, rm, rd);
11028         return;
11029     case 0x1b: /* FMULX */
11030     case 0x1f: /* FRECPS */
11031     case 0x3f: /* FRSQRTS */
11032     case 0x5d: /* FACGE */
11033     case 0x7d: /* FACGT */
11034     case 0x19: /* FMLA */
11035     case 0x39: /* FMLS */
11036     case 0x18: /* FMAXNM */
11037     case 0x1a: /* FADD */
11038     case 0x1c: /* FCMEQ */
11039     case 0x1e: /* FMAX */
11040     case 0x38: /* FMINNM */
11041     case 0x3a: /* FSUB */
11042     case 0x3e: /* FMIN */
11043     case 0x5b: /* FMUL */
11044     case 0x5c: /* FCMGE */
11045     case 0x5f: /* FDIV */
11046     case 0x7a: /* FABD */
11047     case 0x7c: /* FCMGT */
11048         if (!fp_access_check(s)) {
11049             return;
11050         }
11051         handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
11052         return;
11053
11054     case 0x1d: /* FMLAL  */
11055     case 0x3d: /* FMLSL  */
11056     case 0x59: /* FMLAL2 */
11057     case 0x79: /* FMLSL2 */
11058         if (size & 1 || !dc_isar_feature(aa64_fhm, s)) {
11059             unallocated_encoding(s);
11060             return;
11061         }
11062         if (fp_access_check(s)) {
11063             int is_s = extract32(insn, 23, 1);
11064             int is_2 = extract32(insn, 29, 1);
11065             int data = (is_2 << 1) | is_s;
11066             tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
11067                                vec_full_reg_offset(s, rn),
11068                                vec_full_reg_offset(s, rm), tcg_env,
11069                                is_q ? 16 : 8, vec_full_reg_size(s),
11070                                data, gen_helper_gvec_fmlal_a64);
11071         }
11072         return;
11073
11074     default:
11075         unallocated_encoding(s);
11076         return;
11077     }
11078 }
11079
11080 /* Integer op subgroup of C3.6.16. */
11081 static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
11082 {
11083     int is_q = extract32(insn, 30, 1);
11084     int u = extract32(insn, 29, 1);
11085     int size = extract32(insn, 22, 2);
11086     int opcode = extract32(insn, 11, 5);
11087     int rm = extract32(insn, 16, 5);
11088     int rn = extract32(insn, 5, 5);
11089     int rd = extract32(insn, 0, 5);
11090     int pass;
11091     TCGCond cond;
11092
11093     switch (opcode) {
11094     case 0x13: /* MUL, PMUL */
11095         if (u && size != 0) {
11096             unallocated_encoding(s);
11097             return;
11098         }
11099         /* fall through */
11100     case 0x0: /* SHADD, UHADD */
11101     case 0x2: /* SRHADD, URHADD */
11102     case 0x4: /* SHSUB, UHSUB */
11103     case 0xc: /* SMAX, UMAX */
11104     case 0xd: /* SMIN, UMIN */
11105     case 0xe: /* SABD, UABD */
11106     case 0xf: /* SABA, UABA */
11107     case 0x12: /* MLA, MLS */
11108         if (size == 3) {
11109             unallocated_encoding(s);
11110             return;
11111         }
11112         break;
11113     case 0x16: /* SQDMULH, SQRDMULH */
11114         if (size == 0 || size == 3) {
11115             unallocated_encoding(s);
11116             return;
11117         }
11118         break;
11119     default:
11120         if (size == 3 && !is_q) {
11121             unallocated_encoding(s);
11122             return;
11123         }
11124         break;
11125     }
11126
11127     if (!fp_access_check(s)) {
11128         return;
11129     }
11130
11131     switch (opcode) {
11132     case 0x01: /* SQADD, UQADD */
11133         if (u) {
11134             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqadd_qc, size);
11135         } else {
11136             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqadd_qc, size);
11137         }
11138         return;
11139     case 0x05: /* SQSUB, UQSUB */
11140         if (u) {
11141             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqsub_qc, size);
11142         } else {
11143             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqsub_qc, size);
11144         }
11145         return;
11146     case 0x08: /* SSHL, USHL */
11147         if (u) {
11148             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_ushl, size);
11149         } else {
11150             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sshl, size);
11151         }
11152         return;
11153     case 0x0c: /* SMAX, UMAX */
11154         if (u) {
11155             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umax, size);
11156         } else {
11157             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smax, size);
11158         }
11159         return;
11160     case 0x0d: /* SMIN, UMIN */
11161         if (u) {
11162             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umin, size);
11163         } else {
11164             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smin, size);
11165         }
11166         return;
11167     case 0xe: /* SABD, UABD */
11168         if (u) {
11169             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uabd, size);
11170         } else {
11171             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sabd, size);
11172         }
11173         return;
11174     case 0xf: /* SABA, UABA */
11175         if (u) {
11176             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uaba, size);
11177         } else {
11178             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_saba, size);
11179         }
11180         return;
11181     case 0x10: /* ADD, SUB */
11182         if (u) {
11183             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size);
11184         } else {
11185             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_add, size);
11186         }
11187         return;
11188     case 0x13: /* MUL, PMUL */
11189         if (!u) { /* MUL */
11190             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size);
11191         } else {  /* PMUL */
11192             gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0, gen_helper_gvec_pmul_b);
11193         }
11194         return;
11195     case 0x12: /* MLA, MLS */
11196         if (u) {
11197             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mls, size);
11198         } else {
11199             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mla, size);
11200         }
11201         return;
11202     case 0x16: /* SQDMULH, SQRDMULH */
11203         {
11204             static gen_helper_gvec_3_ptr * const fns[2][2] = {
11205                 { gen_helper_neon_sqdmulh_h, gen_helper_neon_sqrdmulh_h },
11206                 { gen_helper_neon_sqdmulh_s, gen_helper_neon_sqrdmulh_s },
11207             };
11208             gen_gvec_op3_qc(s, is_q, rd, rn, rm, fns[size - 1][u]);
11209         }
11210         return;
11211     case 0x11:
11212         if (!u) { /* CMTST */
11213             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_cmtst, size);
11214             return;
11215         }
11216         /* else CMEQ */
11217         cond = TCG_COND_EQ;
11218         goto do_gvec_cmp;
11219     case 0x06: /* CMGT, CMHI */
11220         cond = u ? TCG_COND_GTU : TCG_COND_GT;
11221         goto do_gvec_cmp;
11222     case 0x07: /* CMGE, CMHS */
11223         cond = u ? TCG_COND_GEU : TCG_COND_GE;
11224     do_gvec_cmp:
11225         tcg_gen_gvec_cmp(cond, size, vec_full_reg_offset(s, rd),
11226                          vec_full_reg_offset(s, rn),
11227                          vec_full_reg_offset(s, rm),
11228                          is_q ? 16 : 8, vec_full_reg_size(s));
11229         return;
11230     }
11231
11232     if (size == 3) {
11233         assert(is_q);
11234         for (pass = 0; pass < 2; pass++) {
11235             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11236             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11237             TCGv_i64 tcg_res = tcg_temp_new_i64();
11238
11239             read_vec_element(s, tcg_op1, rn, pass, MO_64);
11240             read_vec_element(s, tcg_op2, rm, pass, MO_64);
11241
11242             handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
11243
11244             write_vec_element(s, tcg_res, rd, pass, MO_64);
11245         }
11246     } else {
11247         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
11248             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11249             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11250             TCGv_i32 tcg_res = tcg_temp_new_i32();
11251             NeonGenTwoOpFn *genfn = NULL;
11252             NeonGenTwoOpEnvFn *genenvfn = NULL;
11253
11254             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
11255             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
11256
11257             switch (opcode) {
11258             case 0x0: /* SHADD, UHADD */
11259             {
11260                 static NeonGenTwoOpFn * const fns[3][2] = {
11261                     { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
11262                     { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
11263                     { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
11264                 };
11265                 genfn = fns[size][u];
11266                 break;
11267             }
11268             case 0x2: /* SRHADD, URHADD */
11269             {
11270                 static NeonGenTwoOpFn * const fns[3][2] = {
11271                     { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
11272                     { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
11273                     { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
11274                 };
11275                 genfn = fns[size][u];
11276                 break;
11277             }
11278             case 0x4: /* SHSUB, UHSUB */
11279             {
11280                 static NeonGenTwoOpFn * const fns[3][2] = {
11281                     { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
11282                     { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
11283                     { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
11284                 };
11285                 genfn = fns[size][u];
11286                 break;
11287             }
11288             case 0x9: /* SQSHL, UQSHL */
11289             {
11290                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
11291                     { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
11292                     { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
11293                     { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
11294                 };
11295                 genenvfn = fns[size][u];
11296                 break;
11297             }
11298             case 0xa: /* SRSHL, URSHL */
11299             {
11300                 static NeonGenTwoOpFn * const fns[3][2] = {
11301                     { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
11302                     { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
11303                     { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
11304                 };
11305                 genfn = fns[size][u];
11306                 break;
11307             }
11308             case 0xb: /* SQRSHL, UQRSHL */
11309             {
11310                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
11311                     { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
11312                     { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
11313                     { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
11314                 };
11315                 genenvfn = fns[size][u];
11316                 break;
11317             }
11318             default:
11319                 g_assert_not_reached();
11320             }
11321
11322             if (genenvfn) {
11323                 genenvfn(tcg_res, tcg_env, tcg_op1, tcg_op2);
11324             } else {
11325                 genfn(tcg_res, tcg_op1, tcg_op2);
11326             }
11327
11328             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
11329         }
11330     }
11331     clear_vec_high(s, is_q, rd);
11332 }
11333
11334 /* AdvSIMD three same
11335  *  31  30  29  28       24 23  22  21 20  16 15    11  10 9    5 4    0
11336  * +---+---+---+-----------+------+---+------+--------+---+------+------+
11337  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
11338  * +---+---+---+-----------+------+---+------+--------+---+------+------+
11339  */
11340 static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
11341 {
11342     int opcode = extract32(insn, 11, 5);
11343
11344     switch (opcode) {
11345     case 0x3: /* logic ops */
11346         disas_simd_3same_logic(s, insn);
11347         break;
11348     case 0x17: /* ADDP */
11349     case 0x14: /* SMAXP, UMAXP */
11350     case 0x15: /* SMINP, UMINP */
11351     {
11352         /* Pairwise operations */
11353         int is_q = extract32(insn, 30, 1);
11354         int u = extract32(insn, 29, 1);
11355         int size = extract32(insn, 22, 2);
11356         int rm = extract32(insn, 16, 5);
11357         int rn = extract32(insn, 5, 5);
11358         int rd = extract32(insn, 0, 5);
11359         if (opcode == 0x17) {
11360             if (u || (size == 3 && !is_q)) {
11361                 unallocated_encoding(s);
11362                 return;
11363             }
11364         } else {
11365             if (size == 3) {
11366                 unallocated_encoding(s);
11367                 return;
11368             }
11369         }
11370         handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
11371         break;
11372     }
11373     case 0x18 ... 0x31:
11374         /* floating point ops, sz[1] and U are part of opcode */
11375         disas_simd_3same_float(s, insn);
11376         break;
11377     default:
11378         disas_simd_3same_int(s, insn);
11379         break;
11380     }
11381 }
11382
11383 /*
11384  * Advanced SIMD three same (ARMv8.2 FP16 variants)
11385  *
11386  *  31  30  29  28       24 23  22 21 20  16 15 14 13    11 10  9    5 4    0
11387  * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
11388  * | 0 | Q | U | 0 1 1 1 0 | a | 1 0 |  Rm  | 0 0 | opcode | 1 |  Rn  |  Rd  |
11389  * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
11390  *
11391  * This includes FMULX, FCMEQ (register), FRECPS, FRSQRTS, FCMGE
11392  * (register), FACGE, FABD, FCMGT (register) and FACGT.
11393  *
11394  */
11395 static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
11396 {
11397     int opcode = extract32(insn, 11, 3);
11398     int u = extract32(insn, 29, 1);
11399     int a = extract32(insn, 23, 1);
11400     int is_q = extract32(insn, 30, 1);
11401     int rm = extract32(insn, 16, 5);
11402     int rn = extract32(insn, 5, 5);
11403     int rd = extract32(insn, 0, 5);
11404     /*
11405      * For these floating point ops, the U, a and opcode bits
11406      * together indicate the operation.
11407      */
11408     int fpopcode = opcode | (a << 3) | (u << 4);
11409     int datasize = is_q ? 128 : 64;
11410     int elements = datasize / 16;
11411     bool pairwise;
11412     TCGv_ptr fpst;
11413     int pass;
11414
11415     switch (fpopcode) {
11416     case 0x0: /* FMAXNM */
11417     case 0x1: /* FMLA */
11418     case 0x2: /* FADD */
11419     case 0x3: /* FMULX */
11420     case 0x4: /* FCMEQ */
11421     case 0x6: /* FMAX */
11422     case 0x7: /* FRECPS */
11423     case 0x8: /* FMINNM */
11424     case 0x9: /* FMLS */
11425     case 0xa: /* FSUB */
11426     case 0xe: /* FMIN */
11427     case 0xf: /* FRSQRTS */
11428     case 0x13: /* FMUL */
11429     case 0x14: /* FCMGE */
11430     case 0x15: /* FACGE */
11431     case 0x17: /* FDIV */
11432     case 0x1a: /* FABD */
11433     case 0x1c: /* FCMGT */
11434     case 0x1d: /* FACGT */
11435         pairwise = false;
11436         break;
11437     case 0x10: /* FMAXNMP */
11438     case 0x12: /* FADDP */
11439     case 0x16: /* FMAXP */
11440     case 0x18: /* FMINNMP */
11441     case 0x1e: /* FMINP */
11442         pairwise = true;
11443         break;
11444     default:
11445         unallocated_encoding(s);
11446         return;
11447     }
11448
11449     if (!dc_isar_feature(aa64_fp16, s)) {
11450         unallocated_encoding(s);
11451         return;
11452     }
11453
11454     if (!fp_access_check(s)) {
11455         return;
11456     }
11457
11458     fpst = fpstatus_ptr(FPST_FPCR_F16);
11459
11460     if (pairwise) {
11461         int maxpass = is_q ? 8 : 4;
11462         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11463         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11464         TCGv_i32 tcg_res[8];
11465
11466         for (pass = 0; pass < maxpass; pass++) {
11467             int passreg = pass < (maxpass / 2) ? rn : rm;
11468             int passelt = (pass << 1) & (maxpass - 1);
11469
11470             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_16);
11471             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_16);
11472             tcg_res[pass] = tcg_temp_new_i32();
11473
11474             switch (fpopcode) {
11475             case 0x10: /* FMAXNMP */
11476                 gen_helper_advsimd_maxnumh(tcg_res[pass], tcg_op1, tcg_op2,
11477                                            fpst);
11478                 break;
11479             case 0x12: /* FADDP */
11480                 gen_helper_advsimd_addh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11481                 break;
11482             case 0x16: /* FMAXP */
11483                 gen_helper_advsimd_maxh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11484                 break;
11485             case 0x18: /* FMINNMP */
11486                 gen_helper_advsimd_minnumh(tcg_res[pass], tcg_op1, tcg_op2,
11487                                            fpst);
11488                 break;
11489             case 0x1e: /* FMINP */
11490                 gen_helper_advsimd_minh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11491                 break;
11492             default:
11493                 g_assert_not_reached();
11494             }
11495         }
11496
11497         for (pass = 0; pass < maxpass; pass++) {
11498             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_16);
11499         }
11500     } else {
11501         for (pass = 0; pass < elements; pass++) {
11502             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11503             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11504             TCGv_i32 tcg_res = tcg_temp_new_i32();
11505
11506             read_vec_element_i32(s, tcg_op1, rn, pass, MO_16);
11507             read_vec_element_i32(s, tcg_op2, rm, pass, MO_16);
11508
11509             switch (fpopcode) {
11510             case 0x0: /* FMAXNM */
11511                 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
11512                 break;
11513             case 0x1: /* FMLA */
11514                 read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11515                 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
11516                                            fpst);
11517                 break;
11518             case 0x2: /* FADD */
11519                 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
11520                 break;
11521             case 0x3: /* FMULX */
11522                 gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
11523                 break;
11524             case 0x4: /* FCMEQ */
11525                 gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11526                 break;
11527             case 0x6: /* FMAX */
11528                 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
11529                 break;
11530             case 0x7: /* FRECPS */
11531                 gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11532                 break;
11533             case 0x8: /* FMINNM */
11534                 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
11535                 break;
11536             case 0x9: /* FMLS */
11537                 /* As usual for ARM, separate negation for fused multiply-add */
11538                 tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
11539                 read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11540                 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
11541                                            fpst);
11542                 break;
11543             case 0xa: /* FSUB */
11544                 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
11545                 break;
11546             case 0xe: /* FMIN */
11547                 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
11548                 break;
11549             case 0xf: /* FRSQRTS */
11550                 gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11551                 break;
11552             case 0x13: /* FMUL */
11553                 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
11554                 break;
11555             case 0x14: /* FCMGE */
11556                 gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11557                 break;
11558             case 0x15: /* FACGE */
11559                 gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11560                 break;
11561             case 0x17: /* FDIV */
11562                 gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
11563                 break;
11564             case 0x1a: /* FABD */
11565                 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
11566                 tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
11567                 break;
11568             case 0x1c: /* FCMGT */
11569                 gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11570                 break;
11571             case 0x1d: /* FACGT */
11572                 gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11573                 break;
11574             default:
11575                 g_assert_not_reached();
11576             }
11577
11578             write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11579         }
11580     }
11581
11582     clear_vec_high(s, is_q, rd);
11583 }
11584
11585 /* AdvSIMD three same extra
11586  *  31   30  29 28       24 23  22  21 20  16  15 14    11  10 9  5 4  0
11587  * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
11588  * | 0 | Q | U | 0 1 1 1 0 | size | 0 |  Rm  | 1 | opcode | 1 | Rn | Rd |
11589  * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
11590  */
11591 static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
11592 {
11593     int rd = extract32(insn, 0, 5);
11594     int rn = extract32(insn, 5, 5);
11595     int opcode = extract32(insn, 11, 4);
11596     int rm = extract32(insn, 16, 5);
11597     int size = extract32(insn, 22, 2);
11598     bool u = extract32(insn, 29, 1);
11599     bool is_q = extract32(insn, 30, 1);
11600     bool feature;
11601     int rot;
11602
11603     switch (u * 16 + opcode) {
11604     case 0x10: /* SQRDMLAH (vector) */
11605     case 0x11: /* SQRDMLSH (vector) */
11606         if (size != 1 && size != 2) {
11607             unallocated_encoding(s);
11608             return;
11609         }
11610         feature = dc_isar_feature(aa64_rdm, s);
11611         break;
11612     case 0x02: /* SDOT (vector) */
11613     case 0x12: /* UDOT (vector) */
11614         if (size != MO_32) {
11615             unallocated_encoding(s);
11616             return;
11617         }
11618         feature = dc_isar_feature(aa64_dp, s);
11619         break;
11620     case 0x03: /* USDOT */
11621         if (size != MO_32) {
11622             unallocated_encoding(s);
11623             return;
11624         }
11625         feature = dc_isar_feature(aa64_i8mm, s);
11626         break;
11627     case 0x04: /* SMMLA */
11628     case 0x14: /* UMMLA */
11629     case 0x05: /* USMMLA */
11630         if (!is_q || size != MO_32) {
11631             unallocated_encoding(s);
11632             return;
11633         }
11634         feature = dc_isar_feature(aa64_i8mm, s);
11635         break;
11636     case 0x18: /* FCMLA, #0 */
11637     case 0x19: /* FCMLA, #90 */
11638     case 0x1a: /* FCMLA, #180 */
11639     case 0x1b: /* FCMLA, #270 */
11640     case 0x1c: /* FCADD, #90 */
11641     case 0x1e: /* FCADD, #270 */
11642         if (size == 0
11643             || (size == 1 && !dc_isar_feature(aa64_fp16, s))
11644             || (size == 3 && !is_q)) {
11645             unallocated_encoding(s);
11646             return;
11647         }
11648         feature = dc_isar_feature(aa64_fcma, s);
11649         break;
11650     case 0x1d: /* BFMMLA */
11651         if (size != MO_16 || !is_q) {
11652             unallocated_encoding(s);
11653             return;
11654         }
11655         feature = dc_isar_feature(aa64_bf16, s);
11656         break;
11657     case 0x1f:
11658         switch (size) {
11659         case 1: /* BFDOT */
11660         case 3: /* BFMLAL{B,T} */
11661             feature = dc_isar_feature(aa64_bf16, s);
11662             break;
11663         default:
11664             unallocated_encoding(s);
11665             return;
11666         }
11667         break;
11668     default:
11669         unallocated_encoding(s);
11670         return;
11671     }
11672     if (!feature) {
11673         unallocated_encoding(s);
11674         return;
11675     }
11676     if (!fp_access_check(s)) {
11677         return;
11678     }
11679
11680     switch (opcode) {
11681     case 0x0: /* SQRDMLAH (vector) */
11682         gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlah_qc, size);
11683         return;
11684
11685     case 0x1: /* SQRDMLSH (vector) */
11686         gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlsh_qc, size);
11687         return;
11688
11689     case 0x2: /* SDOT / UDOT */
11690         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0,
11691                          u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b);
11692         return;
11693
11694     case 0x3: /* USDOT */
11695         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_usdot_b);
11696         return;
11697
11698     case 0x04: /* SMMLA, UMMLA */
11699         gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0,
11700                          u ? gen_helper_gvec_ummla_b
11701                          : gen_helper_gvec_smmla_b);
11702         return;
11703     case 0x05: /* USMMLA */
11704         gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0, gen_helper_gvec_usmmla_b);
11705         return;
11706
11707     case 0x8: /* FCMLA, #0 */
11708     case 0x9: /* FCMLA, #90 */
11709     case 0xa: /* FCMLA, #180 */
11710     case 0xb: /* FCMLA, #270 */
11711         rot = extract32(opcode, 0, 2);
11712         switch (size) {
11713         case 1:
11714             gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, true, rot,
11715                               gen_helper_gvec_fcmlah);
11716             break;
11717         case 2:
11718             gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot,
11719                               gen_helper_gvec_fcmlas);
11720             break;
11721         case 3:
11722             gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot,
11723                               gen_helper_gvec_fcmlad);
11724             break;
11725         default:
11726             g_assert_not_reached();
11727         }
11728         return;
11729
11730     case 0xc: /* FCADD, #90 */
11731     case 0xe: /* FCADD, #270 */
11732         rot = extract32(opcode, 1, 1);
11733         switch (size) {
11734         case 1:
11735             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11736                               gen_helper_gvec_fcaddh);
11737             break;
11738         case 2:
11739             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11740                               gen_helper_gvec_fcadds);
11741             break;
11742         case 3:
11743             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11744                               gen_helper_gvec_fcaddd);
11745             break;
11746         default:
11747             g_assert_not_reached();
11748         }
11749         return;
11750
11751     case 0xd: /* BFMMLA */
11752         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfmmla);
11753         return;
11754     case 0xf:
11755         switch (size) {
11756         case 1: /* BFDOT */
11757             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfdot);
11758             break;
11759         case 3: /* BFMLAL{B,T} */
11760             gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, false, is_q,
11761                               gen_helper_gvec_bfmlal);
11762             break;
11763         default:
11764             g_assert_not_reached();
11765         }
11766         return;
11767
11768     default:
11769         g_assert_not_reached();
11770     }
11771 }
11772
11773 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
11774                                   int size, int rn, int rd)
11775 {
11776     /* Handle 2-reg-misc ops which are widening (so each size element
11777      * in the source becomes a 2*size element in the destination.
11778      * The only instruction like this is FCVTL.
11779      */
11780     int pass;
11781
11782     if (size == 3) {
11783         /* 32 -> 64 bit fp conversion */
11784         TCGv_i64 tcg_res[2];
11785         int srcelt = is_q ? 2 : 0;
11786
11787         for (pass = 0; pass < 2; pass++) {
11788             TCGv_i32 tcg_op = tcg_temp_new_i32();
11789             tcg_res[pass] = tcg_temp_new_i64();
11790
11791             read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
11792             gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, tcg_env);
11793         }
11794         for (pass = 0; pass < 2; pass++) {
11795             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11796         }
11797     } else {
11798         /* 16 -> 32 bit fp conversion */
11799         int srcelt = is_q ? 4 : 0;
11800         TCGv_i32 tcg_res[4];
11801         TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
11802         TCGv_i32 ahp = get_ahp_flag();
11803
11804         for (pass = 0; pass < 4; pass++) {
11805             tcg_res[pass] = tcg_temp_new_i32();
11806
11807             read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
11808             gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
11809                                            fpst, ahp);
11810         }
11811         for (pass = 0; pass < 4; pass++) {
11812             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
11813         }
11814     }
11815 }
11816
11817 static void handle_rev(DisasContext *s, int opcode, bool u,
11818                        bool is_q, int size, int rn, int rd)
11819 {
11820     int op = (opcode << 1) | u;
11821     int opsz = op + size;
11822     int grp_size = 3 - opsz;
11823     int dsize = is_q ? 128 : 64;
11824     int i;
11825
11826     if (opsz >= 3) {
11827         unallocated_encoding(s);
11828         return;
11829     }
11830
11831     if (!fp_access_check(s)) {
11832         return;
11833     }
11834
11835     if (size == 0) {
11836         /* Special case bytes, use bswap op on each group of elements */
11837         int groups = dsize / (8 << grp_size);
11838
11839         for (i = 0; i < groups; i++) {
11840             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
11841
11842             read_vec_element(s, tcg_tmp, rn, i, grp_size);
11843             switch (grp_size) {
11844             case MO_16:
11845                 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ);
11846                 break;
11847             case MO_32:
11848                 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ);
11849                 break;
11850             case MO_64:
11851                 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
11852                 break;
11853             default:
11854                 g_assert_not_reached();
11855             }
11856             write_vec_element(s, tcg_tmp, rd, i, grp_size);
11857         }
11858         clear_vec_high(s, is_q, rd);
11859     } else {
11860         int revmask = (1 << grp_size) - 1;
11861         int esize = 8 << size;
11862         int elements = dsize / esize;
11863         TCGv_i64 tcg_rn = tcg_temp_new_i64();
11864         TCGv_i64 tcg_rd[2];
11865
11866         for (i = 0; i < 2; i++) {
11867             tcg_rd[i] = tcg_temp_new_i64();
11868             tcg_gen_movi_i64(tcg_rd[i], 0);
11869         }
11870
11871         for (i = 0; i < elements; i++) {
11872             int e_rev = (i & 0xf) ^ revmask;
11873             int w = (e_rev * esize) / 64;
11874             int o = (e_rev * esize) % 64;
11875
11876             read_vec_element(s, tcg_rn, rn, i, size);
11877             tcg_gen_deposit_i64(tcg_rd[w], tcg_rd[w], tcg_rn, o, esize);
11878         }
11879
11880         for (i = 0; i < 2; i++) {
11881             write_vec_element(s, tcg_rd[i], rd, i, MO_64);
11882         }
11883         clear_vec_high(s, true, rd);
11884     }
11885 }
11886
11887 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
11888                                   bool is_q, int size, int rn, int rd)
11889 {
11890     /* Implement the pairwise operations from 2-misc:
11891      * SADDLP, UADDLP, SADALP, UADALP.
11892      * These all add pairs of elements in the input to produce a
11893      * double-width result element in the output (possibly accumulating).
11894      */
11895     bool accum = (opcode == 0x6);
11896     int maxpass = is_q ? 2 : 1;
11897     int pass;
11898     TCGv_i64 tcg_res[2];
11899
11900     if (size == 2) {
11901         /* 32 + 32 -> 64 op */
11902         MemOp memop = size + (u ? 0 : MO_SIGN);
11903
11904         for (pass = 0; pass < maxpass; pass++) {
11905             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11906             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11907
11908             tcg_res[pass] = tcg_temp_new_i64();
11909
11910             read_vec_element(s, tcg_op1, rn, pass * 2, memop);
11911             read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
11912             tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
11913             if (accum) {
11914                 read_vec_element(s, tcg_op1, rd, pass, MO_64);
11915                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
11916             }
11917         }
11918     } else {
11919         for (pass = 0; pass < maxpass; pass++) {
11920             TCGv_i64 tcg_op = tcg_temp_new_i64();
11921             NeonGenOne64OpFn *genfn;
11922             static NeonGenOne64OpFn * const fns[2][2] = {
11923                 { gen_helper_neon_addlp_s8,  gen_helper_neon_addlp_u8 },
11924                 { gen_helper_neon_addlp_s16,  gen_helper_neon_addlp_u16 },
11925             };
11926
11927             genfn = fns[size][u];
11928
11929             tcg_res[pass] = tcg_temp_new_i64();
11930
11931             read_vec_element(s, tcg_op, rn, pass, MO_64);
11932             genfn(tcg_res[pass], tcg_op);
11933
11934             if (accum) {
11935                 read_vec_element(s, tcg_op, rd, pass, MO_64);
11936                 if (size == 0) {
11937                     gen_helper_neon_addl_u16(tcg_res[pass],
11938                                              tcg_res[pass], tcg_op);
11939                 } else {
11940                     gen_helper_neon_addl_u32(tcg_res[pass],
11941                                              tcg_res[pass], tcg_op);
11942                 }
11943             }
11944         }
11945     }
11946     if (!is_q) {
11947         tcg_res[1] = tcg_constant_i64(0);
11948     }
11949     for (pass = 0; pass < 2; pass++) {
11950         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11951     }
11952 }
11953
11954 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
11955 {
11956     /* Implement SHLL and SHLL2 */
11957     int pass;
11958     int part = is_q ? 2 : 0;
11959     TCGv_i64 tcg_res[2];
11960
11961     for (pass = 0; pass < 2; pass++) {
11962         static NeonGenWidenFn * const widenfns[3] = {
11963             gen_helper_neon_widen_u8,
11964             gen_helper_neon_widen_u16,
11965             tcg_gen_extu_i32_i64,
11966         };
11967         NeonGenWidenFn *widenfn = widenfns[size];
11968         TCGv_i32 tcg_op = tcg_temp_new_i32();
11969
11970         read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
11971         tcg_res[pass] = tcg_temp_new_i64();
11972         widenfn(tcg_res[pass], tcg_op);
11973         tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
11974     }
11975
11976     for (pass = 0; pass < 2; pass++) {
11977         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11978     }
11979 }
11980
11981 /* AdvSIMD two reg misc
11982  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
11983  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
11984  * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
11985  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
11986  */
11987 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
11988 {
11989     int size = extract32(insn, 22, 2);
11990     int opcode = extract32(insn, 12, 5);
11991     bool u = extract32(insn, 29, 1);
11992     bool is_q = extract32(insn, 30, 1);
11993     int rn = extract32(insn, 5, 5);
11994     int rd = extract32(insn, 0, 5);
11995     bool need_fpstatus = false;
11996     int rmode = -1;
11997     TCGv_i32 tcg_rmode;
11998     TCGv_ptr tcg_fpstatus;
11999
12000     switch (opcode) {
12001     case 0x0: /* REV64, REV32 */
12002     case 0x1: /* REV16 */
12003         handle_rev(s, opcode, u, is_q, size, rn, rd);
12004         return;
12005     case 0x5: /* CNT, NOT, RBIT */
12006         if (u && size == 0) {
12007             /* NOT */
12008             break;
12009         } else if (u && size == 1) {
12010             /* RBIT */
12011             break;
12012         } else if (!u && size == 0) {
12013             /* CNT */
12014             break;
12015         }
12016         unallocated_encoding(s);
12017         return;
12018     case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
12019     case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
12020         if (size == 3) {
12021             unallocated_encoding(s);
12022             return;
12023         }
12024         if (!fp_access_check(s)) {
12025             return;
12026         }
12027
12028         handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
12029         return;
12030     case 0x4: /* CLS, CLZ */
12031         if (size == 3) {
12032             unallocated_encoding(s);
12033             return;
12034         }
12035         break;
12036     case 0x2: /* SADDLP, UADDLP */
12037     case 0x6: /* SADALP, UADALP */
12038         if (size == 3) {
12039             unallocated_encoding(s);
12040             return;
12041         }
12042         if (!fp_access_check(s)) {
12043             return;
12044         }
12045         handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
12046         return;
12047     case 0x13: /* SHLL, SHLL2 */
12048         if (u == 0 || size == 3) {
12049             unallocated_encoding(s);
12050             return;
12051         }
12052         if (!fp_access_check(s)) {
12053             return;
12054         }
12055         handle_shll(s, is_q, size, rn, rd);
12056         return;
12057     case 0xa: /* CMLT */
12058         if (u == 1) {
12059             unallocated_encoding(s);
12060             return;
12061         }
12062         /* fall through */
12063     case 0x8: /* CMGT, CMGE */
12064     case 0x9: /* CMEQ, CMLE */
12065     case 0xb: /* ABS, NEG */
12066         if (size == 3 && !is_q) {
12067             unallocated_encoding(s);
12068             return;
12069         }
12070         break;
12071     case 0x3: /* SUQADD, USQADD */
12072         if (size == 3 && !is_q) {
12073             unallocated_encoding(s);
12074             return;
12075         }
12076         if (!fp_access_check(s)) {
12077             return;
12078         }
12079         handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
12080         return;
12081     case 0x7: /* SQABS, SQNEG */
12082         if (size == 3 && !is_q) {
12083             unallocated_encoding(s);
12084             return;
12085         }
12086         break;
12087     case 0xc ... 0xf:
12088     case 0x16 ... 0x1f:
12089     {
12090         /* Floating point: U, size[1] and opcode indicate operation;
12091          * size[0] indicates single or double precision.
12092          */
12093         int is_double = extract32(size, 0, 1);
12094         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
12095         size = is_double ? 3 : 2;
12096         switch (opcode) {
12097         case 0x2f: /* FABS */
12098         case 0x6f: /* FNEG */
12099             if (size == 3 && !is_q) {
12100                 unallocated_encoding(s);
12101                 return;
12102             }
12103             break;
12104         case 0x1d: /* SCVTF */
12105         case 0x5d: /* UCVTF */
12106         {
12107             bool is_signed = (opcode == 0x1d) ? true : false;
12108             int elements = is_double ? 2 : is_q ? 4 : 2;
12109             if (is_double && !is_q) {
12110                 unallocated_encoding(s);
12111                 return;
12112             }
12113             if (!fp_access_check(s)) {
12114                 return;
12115             }
12116             handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
12117             return;
12118         }
12119         case 0x2c: /* FCMGT (zero) */
12120         case 0x2d: /* FCMEQ (zero) */
12121         case 0x2e: /* FCMLT (zero) */
12122         case 0x6c: /* FCMGE (zero) */
12123         case 0x6d: /* FCMLE (zero) */
12124             if (size == 3 && !is_q) {
12125                 unallocated_encoding(s);
12126                 return;
12127             }
12128             handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
12129             return;
12130         case 0x7f: /* FSQRT */
12131             if (size == 3 && !is_q) {
12132                 unallocated_encoding(s);
12133                 return;
12134             }
12135             break;
12136         case 0x1a: /* FCVTNS */
12137         case 0x1b: /* FCVTMS */
12138         case 0x3a: /* FCVTPS */
12139         case 0x3b: /* FCVTZS */
12140         case 0x5a: /* FCVTNU */
12141         case 0x5b: /* FCVTMU */
12142         case 0x7a: /* FCVTPU */
12143         case 0x7b: /* FCVTZU */
12144             need_fpstatus = true;
12145             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
12146             if (size == 3 && !is_q) {
12147                 unallocated_encoding(s);
12148                 return;
12149             }
12150             break;
12151         case 0x5c: /* FCVTAU */
12152         case 0x1c: /* FCVTAS */
12153             need_fpstatus = true;
12154             rmode = FPROUNDING_TIEAWAY;
12155             if (size == 3 && !is_q) {
12156                 unallocated_encoding(s);
12157                 return;
12158             }
12159             break;
12160         case 0x3c: /* URECPE */
12161             if (size == 3) {
12162                 unallocated_encoding(s);
12163                 return;
12164             }
12165             /* fall through */
12166         case 0x3d: /* FRECPE */
12167         case 0x7d: /* FRSQRTE */
12168             if (size == 3 && !is_q) {
12169                 unallocated_encoding(s);
12170                 return;
12171             }
12172             if (!fp_access_check(s)) {
12173                 return;
12174             }
12175             handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
12176             return;
12177         case 0x56: /* FCVTXN, FCVTXN2 */
12178             if (size == 2) {
12179                 unallocated_encoding(s);
12180                 return;
12181             }
12182             /* fall through */
12183         case 0x16: /* FCVTN, FCVTN2 */
12184             /* handle_2misc_narrow does a 2*size -> size operation, but these
12185              * instructions encode the source size rather than dest size.
12186              */
12187             if (!fp_access_check(s)) {
12188                 return;
12189             }
12190             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
12191             return;
12192         case 0x36: /* BFCVTN, BFCVTN2 */
12193             if (!dc_isar_feature(aa64_bf16, s) || size != 2) {
12194                 unallocated_encoding(s);
12195                 return;
12196             }
12197             if (!fp_access_check(s)) {
12198                 return;
12199             }
12200             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
12201             return;
12202         case 0x17: /* FCVTL, FCVTL2 */
12203             if (!fp_access_check(s)) {
12204                 return;
12205             }
12206             handle_2misc_widening(s, opcode, is_q, size, rn, rd);
12207             return;
12208         case 0x18: /* FRINTN */
12209         case 0x19: /* FRINTM */
12210         case 0x38: /* FRINTP */
12211         case 0x39: /* FRINTZ */
12212             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
12213             /* fall through */
12214         case 0x59: /* FRINTX */
12215         case 0x79: /* FRINTI */
12216             need_fpstatus = true;
12217             if (size == 3 && !is_q) {
12218                 unallocated_encoding(s);
12219                 return;
12220             }
12221             break;
12222         case 0x58: /* FRINTA */
12223             rmode = FPROUNDING_TIEAWAY;
12224             need_fpstatus = true;
12225             if (size == 3 && !is_q) {
12226                 unallocated_encoding(s);
12227                 return;
12228             }
12229             break;
12230         case 0x7c: /* URSQRTE */
12231             if (size == 3) {
12232                 unallocated_encoding(s);
12233                 return;
12234             }
12235             break;
12236         case 0x1e: /* FRINT32Z */
12237         case 0x1f: /* FRINT64Z */
12238             rmode = FPROUNDING_ZERO;
12239             /* fall through */
12240         case 0x5e: /* FRINT32X */
12241         case 0x5f: /* FRINT64X */
12242             need_fpstatus = true;
12243             if ((size == 3 && !is_q) || !dc_isar_feature(aa64_frint, s)) {
12244                 unallocated_encoding(s);
12245                 return;
12246             }
12247             break;
12248         default:
12249             unallocated_encoding(s);
12250             return;
12251         }
12252         break;
12253     }
12254     default:
12255         unallocated_encoding(s);
12256         return;
12257     }
12258
12259     if (!fp_access_check(s)) {
12260         return;
12261     }
12262
12263     if (need_fpstatus || rmode >= 0) {
12264         tcg_fpstatus = fpstatus_ptr(FPST_FPCR);
12265     } else {
12266         tcg_fpstatus = NULL;
12267     }
12268     if (rmode >= 0) {
12269         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
12270     } else {
12271         tcg_rmode = NULL;
12272     }
12273
12274     switch (opcode) {
12275     case 0x5:
12276         if (u && size == 0) { /* NOT */
12277             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_not, 0);
12278             return;
12279         }
12280         break;
12281     case 0x8: /* CMGT, CMGE */
12282         if (u) {
12283             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cge0, size);
12284         } else {
12285             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cgt0, size);
12286         }
12287         return;
12288     case 0x9: /* CMEQ, CMLE */
12289         if (u) {
12290             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cle0, size);
12291         } else {
12292             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_ceq0, size);
12293         }
12294         return;
12295     case 0xa: /* CMLT */
12296         gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_clt0, size);
12297         return;
12298     case 0xb:
12299         if (u) { /* ABS, NEG */
12300             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size);
12301         } else {
12302             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_abs, size);
12303         }
12304         return;
12305     }
12306
12307     if (size == 3) {
12308         /* All 64-bit element operations can be shared with scalar 2misc */
12309         int pass;
12310
12311         /* Coverity claims (size == 3 && !is_q) has been eliminated
12312          * from all paths leading to here.
12313          */
12314         tcg_debug_assert(is_q);
12315         for (pass = 0; pass < 2; pass++) {
12316             TCGv_i64 tcg_op = tcg_temp_new_i64();
12317             TCGv_i64 tcg_res = tcg_temp_new_i64();
12318
12319             read_vec_element(s, tcg_op, rn, pass, MO_64);
12320
12321             handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
12322                             tcg_rmode, tcg_fpstatus);
12323
12324             write_vec_element(s, tcg_res, rd, pass, MO_64);
12325         }
12326     } else {
12327         int pass;
12328
12329         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
12330             TCGv_i32 tcg_op = tcg_temp_new_i32();
12331             TCGv_i32 tcg_res = tcg_temp_new_i32();
12332
12333             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
12334
12335             if (size == 2) {
12336                 /* Special cases for 32 bit elements */
12337                 switch (opcode) {
12338                 case 0x4: /* CLS */
12339                     if (u) {
12340                         tcg_gen_clzi_i32(tcg_res, tcg_op, 32);
12341                     } else {
12342                         tcg_gen_clrsb_i32(tcg_res, tcg_op);
12343                     }
12344                     break;
12345                 case 0x7: /* SQABS, SQNEG */
12346                     if (u) {
12347                         gen_helper_neon_qneg_s32(tcg_res, tcg_env, tcg_op);
12348                     } else {
12349                         gen_helper_neon_qabs_s32(tcg_res, tcg_env, tcg_op);
12350                     }
12351                     break;
12352                 case 0x2f: /* FABS */
12353                     gen_helper_vfp_abss(tcg_res, tcg_op);
12354                     break;
12355                 case 0x6f: /* FNEG */
12356                     gen_helper_vfp_negs(tcg_res, tcg_op);
12357                     break;
12358                 case 0x7f: /* FSQRT */
12359                     gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env);
12360                     break;
12361                 case 0x1a: /* FCVTNS */
12362                 case 0x1b: /* FCVTMS */
12363                 case 0x1c: /* FCVTAS */
12364                 case 0x3a: /* FCVTPS */
12365                 case 0x3b: /* FCVTZS */
12366                     gen_helper_vfp_tosls(tcg_res, tcg_op,
12367                                          tcg_constant_i32(0), tcg_fpstatus);
12368                     break;
12369                 case 0x5a: /* FCVTNU */
12370                 case 0x5b: /* FCVTMU */
12371                 case 0x5c: /* FCVTAU */
12372                 case 0x7a: /* FCVTPU */
12373                 case 0x7b: /* FCVTZU */
12374                     gen_helper_vfp_touls(tcg_res, tcg_op,
12375                                          tcg_constant_i32(0), tcg_fpstatus);
12376                     break;
12377                 case 0x18: /* FRINTN */
12378                 case 0x19: /* FRINTM */
12379                 case 0x38: /* FRINTP */
12380                 case 0x39: /* FRINTZ */
12381                 case 0x58: /* FRINTA */
12382                 case 0x79: /* FRINTI */
12383                     gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
12384                     break;
12385                 case 0x59: /* FRINTX */
12386                     gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
12387                     break;
12388                 case 0x7c: /* URSQRTE */
12389                     gen_helper_rsqrte_u32(tcg_res, tcg_op);
12390                     break;
12391                 case 0x1e: /* FRINT32Z */
12392                 case 0x5e: /* FRINT32X */
12393                     gen_helper_frint32_s(tcg_res, tcg_op, tcg_fpstatus);
12394                     break;
12395                 case 0x1f: /* FRINT64Z */
12396                 case 0x5f: /* FRINT64X */
12397                     gen_helper_frint64_s(tcg_res, tcg_op, tcg_fpstatus);
12398                     break;
12399                 default:
12400                     g_assert_not_reached();
12401                 }
12402             } else {
12403                 /* Use helpers for 8 and 16 bit elements */
12404                 switch (opcode) {
12405                 case 0x5: /* CNT, RBIT */
12406                     /* For these two insns size is part of the opcode specifier
12407                      * (handled earlier); they always operate on byte elements.
12408                      */
12409                     if (u) {
12410                         gen_helper_neon_rbit_u8(tcg_res, tcg_op);
12411                     } else {
12412                         gen_helper_neon_cnt_u8(tcg_res, tcg_op);
12413                     }
12414                     break;
12415                 case 0x7: /* SQABS, SQNEG */
12416                 {
12417                     NeonGenOneOpEnvFn *genfn;
12418                     static NeonGenOneOpEnvFn * const fns[2][2] = {
12419                         { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
12420                         { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
12421                     };
12422                     genfn = fns[size][u];
12423                     genfn(tcg_res, tcg_env, tcg_op);
12424                     break;
12425                 }
12426                 case 0x4: /* CLS, CLZ */
12427                     if (u) {
12428                         if (size == 0) {
12429                             gen_helper_neon_clz_u8(tcg_res, tcg_op);
12430                         } else {
12431                             gen_helper_neon_clz_u16(tcg_res, tcg_op);
12432                         }
12433                     } else {
12434                         if (size == 0) {
12435                             gen_helper_neon_cls_s8(tcg_res, tcg_op);
12436                         } else {
12437                             gen_helper_neon_cls_s16(tcg_res, tcg_op);
12438                         }
12439                     }
12440                     break;
12441                 default:
12442                     g_assert_not_reached();
12443                 }
12444             }
12445
12446             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
12447         }
12448     }
12449     clear_vec_high(s, is_q, rd);
12450
12451     if (tcg_rmode) {
12452         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
12453     }
12454 }
12455
12456 /* AdvSIMD [scalar] two register miscellaneous (FP16)
12457  *
12458  *   31  30  29 28  27     24  23 22 21       17 16    12 11 10 9    5 4    0
12459  * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
12460  * | 0 | Q | U | S | 1 1 1 0 | a | 1 1 1 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
12461  * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
12462  *   mask: 1000 1111 0111 1110 0000 1100 0000 0000 0x8f7e 0c00
12463  *   val:  0000 1110 0111 1000 0000 1000 0000 0000 0x0e78 0800
12464  *
12465  * This actually covers two groups where scalar access is governed by
12466  * bit 28. A bunch of the instructions (float to integral) only exist
12467  * in the vector form and are un-allocated for the scalar decode. Also
12468  * in the scalar decode Q is always 1.
12469  */
12470 static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
12471 {
12472     int fpop, opcode, a, u;
12473     int rn, rd;
12474     bool is_q;
12475     bool is_scalar;
12476     bool only_in_vector = false;
12477
12478     int pass;
12479     TCGv_i32 tcg_rmode = NULL;
12480     TCGv_ptr tcg_fpstatus = NULL;
12481     bool need_fpst = true;
12482     int rmode = -1;
12483
12484     if (!dc_isar_feature(aa64_fp16, s)) {
12485         unallocated_encoding(s);
12486         return;
12487     }
12488
12489     rd = extract32(insn, 0, 5);
12490     rn = extract32(insn, 5, 5);
12491
12492     a = extract32(insn, 23, 1);
12493     u = extract32(insn, 29, 1);
12494     is_scalar = extract32(insn, 28, 1);
12495     is_q = extract32(insn, 30, 1);
12496
12497     opcode = extract32(insn, 12, 5);
12498     fpop = deposit32(opcode, 5, 1, a);
12499     fpop = deposit32(fpop, 6, 1, u);
12500
12501     switch (fpop) {
12502     case 0x1d: /* SCVTF */
12503     case 0x5d: /* UCVTF */
12504     {
12505         int elements;
12506
12507         if (is_scalar) {
12508             elements = 1;
12509         } else {
12510             elements = (is_q ? 8 : 4);
12511         }
12512
12513         if (!fp_access_check(s)) {
12514             return;
12515         }
12516         handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16);
12517         return;
12518     }
12519     break;
12520     case 0x2c: /* FCMGT (zero) */
12521     case 0x2d: /* FCMEQ (zero) */
12522     case 0x2e: /* FCMLT (zero) */
12523     case 0x6c: /* FCMGE (zero) */
12524     case 0x6d: /* FCMLE (zero) */
12525         handle_2misc_fcmp_zero(s, fpop, is_scalar, 0, is_q, MO_16, rn, rd);
12526         return;
12527     case 0x3d: /* FRECPE */
12528     case 0x3f: /* FRECPX */
12529         break;
12530     case 0x18: /* FRINTN */
12531         only_in_vector = true;
12532         rmode = FPROUNDING_TIEEVEN;
12533         break;
12534     case 0x19: /* FRINTM */
12535         only_in_vector = true;
12536         rmode = FPROUNDING_NEGINF;
12537         break;
12538     case 0x38: /* FRINTP */
12539         only_in_vector = true;
12540         rmode = FPROUNDING_POSINF;
12541         break;
12542     case 0x39: /* FRINTZ */
12543         only_in_vector = true;
12544         rmode = FPROUNDING_ZERO;
12545         break;
12546     case 0x58: /* FRINTA */
12547         only_in_vector = true;
12548         rmode = FPROUNDING_TIEAWAY;
12549         break;
12550     case 0x59: /* FRINTX */
12551     case 0x79: /* FRINTI */
12552         only_in_vector = true;
12553         /* current rounding mode */
12554         break;
12555     case 0x1a: /* FCVTNS */
12556         rmode = FPROUNDING_TIEEVEN;
12557         break;
12558     case 0x1b: /* FCVTMS */
12559         rmode = FPROUNDING_NEGINF;
12560         break;
12561     case 0x1c: /* FCVTAS */
12562         rmode = FPROUNDING_TIEAWAY;
12563         break;
12564     case 0x3a: /* FCVTPS */
12565         rmode = FPROUNDING_POSINF;
12566         break;
12567     case 0x3b: /* FCVTZS */
12568         rmode = FPROUNDING_ZERO;
12569         break;
12570     case 0x5a: /* FCVTNU */
12571         rmode = FPROUNDING_TIEEVEN;
12572         break;
12573     case 0x5b: /* FCVTMU */
12574         rmode = FPROUNDING_NEGINF;
12575         break;
12576     case 0x5c: /* FCVTAU */
12577         rmode = FPROUNDING_TIEAWAY;
12578         break;
12579     case 0x7a: /* FCVTPU */
12580         rmode = FPROUNDING_POSINF;
12581         break;
12582     case 0x7b: /* FCVTZU */
12583         rmode = FPROUNDING_ZERO;
12584         break;
12585     case 0x2f: /* FABS */
12586     case 0x6f: /* FNEG */
12587         need_fpst = false;
12588         break;
12589     case 0x7d: /* FRSQRTE */
12590     case 0x7f: /* FSQRT (vector) */
12591         break;
12592     default:
12593         unallocated_encoding(s);
12594         return;
12595     }
12596
12597
12598     /* Check additional constraints for the scalar encoding */
12599     if (is_scalar) {
12600         if (!is_q) {
12601             unallocated_encoding(s);
12602             return;
12603         }
12604         /* FRINTxx is only in the vector form */
12605         if (only_in_vector) {
12606             unallocated_encoding(s);
12607             return;
12608         }
12609     }
12610
12611     if (!fp_access_check(s)) {
12612         return;
12613     }
12614
12615     if (rmode >= 0 || need_fpst) {
12616         tcg_fpstatus = fpstatus_ptr(FPST_FPCR_F16);
12617     }
12618
12619     if (rmode >= 0) {
12620         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
12621     }
12622
12623     if (is_scalar) {
12624         TCGv_i32 tcg_op = read_fp_hreg(s, rn);
12625         TCGv_i32 tcg_res = tcg_temp_new_i32();
12626
12627         switch (fpop) {
12628         case 0x1a: /* FCVTNS */
12629         case 0x1b: /* FCVTMS */
12630         case 0x1c: /* FCVTAS */
12631         case 0x3a: /* FCVTPS */
12632         case 0x3b: /* FCVTZS */
12633             gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
12634             break;
12635         case 0x3d: /* FRECPE */
12636             gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
12637             break;
12638         case 0x3f: /* FRECPX */
12639             gen_helper_frecpx_f16(tcg_res, tcg_op, tcg_fpstatus);
12640             break;
12641         case 0x5a: /* FCVTNU */
12642         case 0x5b: /* FCVTMU */
12643         case 0x5c: /* FCVTAU */
12644         case 0x7a: /* FCVTPU */
12645         case 0x7b: /* FCVTZU */
12646             gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
12647             break;
12648         case 0x6f: /* FNEG */
12649             tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
12650             break;
12651         case 0x7d: /* FRSQRTE */
12652             gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
12653             break;
12654         default:
12655             g_assert_not_reached();
12656         }
12657
12658         /* limit any sign extension going on */
12659         tcg_gen_andi_i32(tcg_res, tcg_res, 0xffff);
12660         write_fp_sreg(s, rd, tcg_res);
12661     } else {
12662         for (pass = 0; pass < (is_q ? 8 : 4); pass++) {
12663             TCGv_i32 tcg_op = tcg_temp_new_i32();
12664             TCGv_i32 tcg_res = tcg_temp_new_i32();
12665
12666             read_vec_element_i32(s, tcg_op, rn, pass, MO_16);
12667
12668             switch (fpop) {
12669             case 0x1a: /* FCVTNS */
12670             case 0x1b: /* FCVTMS */
12671             case 0x1c: /* FCVTAS */
12672             case 0x3a: /* FCVTPS */
12673             case 0x3b: /* FCVTZS */
12674                 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
12675                 break;
12676             case 0x3d: /* FRECPE */
12677                 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
12678                 break;
12679             case 0x5a: /* FCVTNU */
12680             case 0x5b: /* FCVTMU */
12681             case 0x5c: /* FCVTAU */
12682             case 0x7a: /* FCVTPU */
12683             case 0x7b: /* FCVTZU */
12684                 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
12685                 break;
12686             case 0x18: /* FRINTN */
12687             case 0x19: /* FRINTM */
12688             case 0x38: /* FRINTP */
12689             case 0x39: /* FRINTZ */
12690             case 0x58: /* FRINTA */
12691             case 0x79: /* FRINTI */
12692                 gen_helper_advsimd_rinth(tcg_res, tcg_op, tcg_fpstatus);
12693                 break;
12694             case 0x59: /* FRINTX */
12695                 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, tcg_fpstatus);
12696                 break;
12697             case 0x2f: /* FABS */
12698                 tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
12699                 break;
12700             case 0x6f: /* FNEG */
12701                 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
12702                 break;
12703             case 0x7d: /* FRSQRTE */
12704                 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
12705                 break;
12706             case 0x7f: /* FSQRT */
12707                 gen_helper_sqrt_f16(tcg_res, tcg_op, tcg_fpstatus);
12708                 break;
12709             default:
12710                 g_assert_not_reached();
12711             }
12712
12713             write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
12714         }
12715
12716         clear_vec_high(s, is_q, rd);
12717     }
12718
12719     if (tcg_rmode) {
12720         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
12721     }
12722 }
12723
12724 /* AdvSIMD scalar x indexed element
12725  *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
12726  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
12727  * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
12728  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
12729  * AdvSIMD vector x indexed element
12730  *   31  30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
12731  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
12732  * | 0 | Q | U | 0 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
12733  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
12734  */
12735 static void disas_simd_indexed(DisasContext *s, uint32_t insn)
12736 {
12737     /* This encoding has two kinds of instruction:
12738      *  normal, where we perform elt x idxelt => elt for each
12739      *     element in the vector
12740      *  long, where we perform elt x idxelt and generate a result of
12741      *     double the width of the input element
12742      * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
12743      */
12744     bool is_scalar = extract32(insn, 28, 1);
12745     bool is_q = extract32(insn, 30, 1);
12746     bool u = extract32(insn, 29, 1);
12747     int size = extract32(insn, 22, 2);
12748     int l = extract32(insn, 21, 1);
12749     int m = extract32(insn, 20, 1);
12750     /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
12751     int rm = extract32(insn, 16, 4);
12752     int opcode = extract32(insn, 12, 4);
12753     int h = extract32(insn, 11, 1);
12754     int rn = extract32(insn, 5, 5);
12755     int rd = extract32(insn, 0, 5);
12756     bool is_long = false;
12757     int is_fp = 0;
12758     bool is_fp16 = false;
12759     int index;
12760     TCGv_ptr fpst;
12761
12762     switch (16 * u + opcode) {
12763     case 0x08: /* MUL */
12764     case 0x10: /* MLA */
12765     case 0x14: /* MLS */
12766         if (is_scalar) {
12767             unallocated_encoding(s);
12768             return;
12769         }
12770         break;
12771     case 0x02: /* SMLAL, SMLAL2 */
12772     case 0x12: /* UMLAL, UMLAL2 */
12773     case 0x06: /* SMLSL, SMLSL2 */
12774     case 0x16: /* UMLSL, UMLSL2 */
12775     case 0x0a: /* SMULL, SMULL2 */
12776     case 0x1a: /* UMULL, UMULL2 */
12777         if (is_scalar) {
12778             unallocated_encoding(s);
12779             return;
12780         }
12781         is_long = true;
12782         break;
12783     case 0x03: /* SQDMLAL, SQDMLAL2 */
12784     case 0x07: /* SQDMLSL, SQDMLSL2 */
12785     case 0x0b: /* SQDMULL, SQDMULL2 */
12786         is_long = true;
12787         break;
12788     case 0x0c: /* SQDMULH */
12789     case 0x0d: /* SQRDMULH */
12790         break;
12791     case 0x01: /* FMLA */
12792     case 0x05: /* FMLS */
12793     case 0x09: /* FMUL */
12794     case 0x19: /* FMULX */
12795         is_fp = 1;
12796         break;
12797     case 0x1d: /* SQRDMLAH */
12798     case 0x1f: /* SQRDMLSH */
12799         if (!dc_isar_feature(aa64_rdm, s)) {
12800             unallocated_encoding(s);
12801             return;
12802         }
12803         break;
12804     case 0x0e: /* SDOT */
12805     case 0x1e: /* UDOT */
12806         if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_dp, s)) {
12807             unallocated_encoding(s);
12808             return;
12809         }
12810         break;
12811     case 0x0f:
12812         switch (size) {
12813         case 0: /* SUDOT */
12814         case 2: /* USDOT */
12815             if (is_scalar || !dc_isar_feature(aa64_i8mm, s)) {
12816                 unallocated_encoding(s);
12817                 return;
12818             }
12819             size = MO_32;
12820             break;
12821         case 1: /* BFDOT */
12822             if (is_scalar || !dc_isar_feature(aa64_bf16, s)) {
12823                 unallocated_encoding(s);
12824                 return;
12825             }
12826             size = MO_32;
12827             break;
12828         case 3: /* BFMLAL{B,T} */
12829             if (is_scalar || !dc_isar_feature(aa64_bf16, s)) {
12830                 unallocated_encoding(s);
12831                 return;
12832             }
12833             /* can't set is_fp without other incorrect size checks */
12834             size = MO_16;
12835             break;
12836         default:
12837             unallocated_encoding(s);
12838             return;
12839         }
12840         break;
12841     case 0x11: /* FCMLA #0 */
12842     case 0x13: /* FCMLA #90 */
12843     case 0x15: /* FCMLA #180 */
12844     case 0x17: /* FCMLA #270 */
12845         if (is_scalar || !dc_isar_feature(aa64_fcma, s)) {
12846             unallocated_encoding(s);
12847             return;
12848         }
12849         is_fp = 2;
12850         break;
12851     case 0x00: /* FMLAL */
12852     case 0x04: /* FMLSL */
12853     case 0x18: /* FMLAL2 */
12854     case 0x1c: /* FMLSL2 */
12855         if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_fhm, s)) {
12856             unallocated_encoding(s);
12857             return;
12858         }
12859         size = MO_16;
12860         /* is_fp, but we pass tcg_env not fp_status.  */
12861         break;
12862     default:
12863         unallocated_encoding(s);
12864         return;
12865     }
12866
12867     switch (is_fp) {
12868     case 1: /* normal fp */
12869         /* convert insn encoded size to MemOp size */
12870         switch (size) {
12871         case 0: /* half-precision */
12872             size = MO_16;
12873             is_fp16 = true;
12874             break;
12875         case MO_32: /* single precision */
12876         case MO_64: /* double precision */
12877             break;
12878         default:
12879             unallocated_encoding(s);
12880             return;
12881         }
12882         break;
12883
12884     case 2: /* complex fp */
12885         /* Each indexable element is a complex pair.  */
12886         size += 1;
12887         switch (size) {
12888         case MO_32:
12889             if (h && !is_q) {
12890                 unallocated_encoding(s);
12891                 return;
12892             }
12893             is_fp16 = true;
12894             break;
12895         case MO_64:
12896             break;
12897         default:
12898             unallocated_encoding(s);
12899             return;
12900         }
12901         break;
12902
12903     default: /* integer */
12904         switch (size) {
12905         case MO_8:
12906         case MO_64:
12907             unallocated_encoding(s);
12908             return;
12909         }
12910         break;
12911     }
12912     if (is_fp16 && !dc_isar_feature(aa64_fp16, s)) {
12913         unallocated_encoding(s);
12914         return;
12915     }
12916
12917     /* Given MemOp size, adjust register and indexing.  */
12918     switch (size) {
12919     case MO_16:
12920         index = h << 2 | l << 1 | m;
12921         break;
12922     case MO_32:
12923         index = h << 1 | l;
12924         rm |= m << 4;
12925         break;
12926     case MO_64:
12927         if (l || !is_q) {
12928             unallocated_encoding(s);
12929             return;
12930         }
12931         index = h;
12932         rm |= m << 4;
12933         break;
12934     default:
12935         g_assert_not_reached();
12936     }
12937
12938     if (!fp_access_check(s)) {
12939         return;
12940     }
12941
12942     if (is_fp) {
12943         fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
12944     } else {
12945         fpst = NULL;
12946     }
12947
12948     switch (16 * u + opcode) {
12949     case 0x0e: /* SDOT */
12950     case 0x1e: /* UDOT */
12951         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
12952                          u ? gen_helper_gvec_udot_idx_b
12953                          : gen_helper_gvec_sdot_idx_b);
12954         return;
12955     case 0x0f:
12956         switch (extract32(insn, 22, 2)) {
12957         case 0: /* SUDOT */
12958             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
12959                              gen_helper_gvec_sudot_idx_b);
12960             return;
12961         case 1: /* BFDOT */
12962             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
12963                              gen_helper_gvec_bfdot_idx);
12964             return;
12965         case 2: /* USDOT */
12966             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
12967                              gen_helper_gvec_usdot_idx_b);
12968             return;
12969         case 3: /* BFMLAL{B,T} */
12970             gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, 0, (index << 1) | is_q,
12971                               gen_helper_gvec_bfmlal_idx);
12972             return;
12973         }
12974         g_assert_not_reached();
12975     case 0x11: /* FCMLA #0 */
12976     case 0x13: /* FCMLA #90 */
12977     case 0x15: /* FCMLA #180 */
12978     case 0x17: /* FCMLA #270 */
12979         {
12980             int rot = extract32(insn, 13, 2);
12981             int data = (index << 2) | rot;
12982             tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
12983                                vec_full_reg_offset(s, rn),
12984                                vec_full_reg_offset(s, rm),
12985                                vec_full_reg_offset(s, rd), fpst,
12986                                is_q ? 16 : 8, vec_full_reg_size(s), data,
12987                                size == MO_64
12988                                ? gen_helper_gvec_fcmlas_idx
12989                                : gen_helper_gvec_fcmlah_idx);
12990         }
12991         return;
12992
12993     case 0x00: /* FMLAL */
12994     case 0x04: /* FMLSL */
12995     case 0x18: /* FMLAL2 */
12996     case 0x1c: /* FMLSL2 */
12997         {
12998             int is_s = extract32(opcode, 2, 1);
12999             int is_2 = u;
13000             int data = (index << 2) | (is_2 << 1) | is_s;
13001             tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
13002                                vec_full_reg_offset(s, rn),
13003                                vec_full_reg_offset(s, rm), tcg_env,
13004                                is_q ? 16 : 8, vec_full_reg_size(s),
13005                                data, gen_helper_gvec_fmlal_idx_a64);
13006         }
13007         return;
13008
13009     case 0x08: /* MUL */
13010         if (!is_long && !is_scalar) {
13011             static gen_helper_gvec_3 * const fns[3] = {
13012                 gen_helper_gvec_mul_idx_h,
13013                 gen_helper_gvec_mul_idx_s,
13014                 gen_helper_gvec_mul_idx_d,
13015             };
13016             tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
13017                                vec_full_reg_offset(s, rn),
13018                                vec_full_reg_offset(s, rm),
13019                                is_q ? 16 : 8, vec_full_reg_size(s),
13020                                index, fns[size - 1]);
13021             return;
13022         }
13023         break;
13024
13025     case 0x10: /* MLA */
13026         if (!is_long && !is_scalar) {
13027             static gen_helper_gvec_4 * const fns[3] = {
13028                 gen_helper_gvec_mla_idx_h,
13029                 gen_helper_gvec_mla_idx_s,
13030                 gen_helper_gvec_mla_idx_d,
13031             };
13032             tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
13033                                vec_full_reg_offset(s, rn),
13034                                vec_full_reg_offset(s, rm),
13035                                vec_full_reg_offset(s, rd),
13036                                is_q ? 16 : 8, vec_full_reg_size(s),
13037                                index, fns[size - 1]);
13038             return;
13039         }
13040         break;
13041
13042     case 0x14: /* MLS */
13043         if (!is_long && !is_scalar) {
13044             static gen_helper_gvec_4 * const fns[3] = {
13045                 gen_helper_gvec_mls_idx_h,
13046                 gen_helper_gvec_mls_idx_s,
13047                 gen_helper_gvec_mls_idx_d,
13048             };
13049             tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
13050                                vec_full_reg_offset(s, rn),
13051                                vec_full_reg_offset(s, rm),
13052                                vec_full_reg_offset(s, rd),
13053                                is_q ? 16 : 8, vec_full_reg_size(s),
13054                                index, fns[size - 1]);
13055             return;
13056         }
13057         break;
13058     }
13059
13060     if (size == 3) {
13061         TCGv_i64 tcg_idx = tcg_temp_new_i64();
13062         int pass;
13063
13064         assert(is_fp && is_q && !is_long);
13065
13066         read_vec_element(s, tcg_idx, rm, index, MO_64);
13067
13068         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13069             TCGv_i64 tcg_op = tcg_temp_new_i64();
13070             TCGv_i64 tcg_res = tcg_temp_new_i64();
13071
13072             read_vec_element(s, tcg_op, rn, pass, MO_64);
13073
13074             switch (16 * u + opcode) {
13075             case 0x05: /* FMLS */
13076                 /* As usual for ARM, separate negation for fused multiply-add */
13077                 gen_helper_vfp_negd(tcg_op, tcg_op);
13078                 /* fall through */
13079             case 0x01: /* FMLA */
13080                 read_vec_element(s, tcg_res, rd, pass, MO_64);
13081                 gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
13082                 break;
13083             case 0x09: /* FMUL */
13084                 gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
13085                 break;
13086             case 0x19: /* FMULX */
13087                 gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
13088                 break;
13089             default:
13090                 g_assert_not_reached();
13091             }
13092
13093             write_vec_element(s, tcg_res, rd, pass, MO_64);
13094         }
13095
13096         clear_vec_high(s, !is_scalar, rd);
13097     } else if (!is_long) {
13098         /* 32 bit floating point, or 16 or 32 bit integer.
13099          * For the 16 bit scalar case we use the usual Neon helpers and
13100          * rely on the fact that 0 op 0 == 0 with no side effects.
13101          */
13102         TCGv_i32 tcg_idx = tcg_temp_new_i32();
13103         int pass, maxpasses;
13104
13105         if (is_scalar) {
13106             maxpasses = 1;
13107         } else {
13108             maxpasses = is_q ? 4 : 2;
13109         }
13110
13111         read_vec_element_i32(s, tcg_idx, rm, index, size);
13112
13113         if (size == 1 && !is_scalar) {
13114             /* The simplest way to handle the 16x16 indexed ops is to duplicate
13115              * the index into both halves of the 32 bit tcg_idx and then use
13116              * the usual Neon helpers.
13117              */
13118             tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
13119         }
13120
13121         for (pass = 0; pass < maxpasses; pass++) {
13122             TCGv_i32 tcg_op = tcg_temp_new_i32();
13123             TCGv_i32 tcg_res = tcg_temp_new_i32();
13124
13125             read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
13126
13127             switch (16 * u + opcode) {
13128             case 0x08: /* MUL */
13129             case 0x10: /* MLA */
13130             case 0x14: /* MLS */
13131             {
13132                 static NeonGenTwoOpFn * const fns[2][2] = {
13133                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
13134                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
13135                 };
13136                 NeonGenTwoOpFn *genfn;
13137                 bool is_sub = opcode == 0x4;
13138
13139                 if (size == 1) {
13140                     gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
13141                 } else {
13142                     tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
13143                 }
13144                 if (opcode == 0x8) {
13145                     break;
13146                 }
13147                 read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
13148                 genfn = fns[size - 1][is_sub];
13149                 genfn(tcg_res, tcg_op, tcg_res);
13150                 break;
13151             }
13152             case 0x05: /* FMLS */
13153             case 0x01: /* FMLA */
13154                 read_vec_element_i32(s, tcg_res, rd, pass,
13155                                      is_scalar ? size : MO_32);
13156                 switch (size) {
13157                 case 1:
13158                     if (opcode == 0x5) {
13159                         /* As usual for ARM, separate negation for fused
13160                          * multiply-add */
13161                         tcg_gen_xori_i32(tcg_op, tcg_op, 0x80008000);
13162                     }
13163                     if (is_scalar) {
13164                         gen_helper_advsimd_muladdh(tcg_res, tcg_op, tcg_idx,
13165                                                    tcg_res, fpst);
13166                     } else {
13167                         gen_helper_advsimd_muladd2h(tcg_res, tcg_op, tcg_idx,
13168                                                     tcg_res, fpst);
13169                     }
13170                     break;
13171                 case 2:
13172                     if (opcode == 0x5) {
13173                         /* As usual for ARM, separate negation for
13174                          * fused multiply-add */
13175                         tcg_gen_xori_i32(tcg_op, tcg_op, 0x80000000);
13176                     }
13177                     gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx,
13178                                            tcg_res, fpst);
13179                     break;
13180                 default:
13181                     g_assert_not_reached();
13182                 }
13183                 break;
13184             case 0x09: /* FMUL */
13185                 switch (size) {
13186                 case 1:
13187                     if (is_scalar) {
13188                         gen_helper_advsimd_mulh(tcg_res, tcg_op,
13189                                                 tcg_idx, fpst);
13190                     } else {
13191                         gen_helper_advsimd_mul2h(tcg_res, tcg_op,
13192                                                  tcg_idx, fpst);
13193                     }
13194                     break;
13195                 case 2:
13196                     gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
13197                     break;
13198                 default:
13199                     g_assert_not_reached();
13200                 }
13201                 break;
13202             case 0x19: /* FMULX */
13203                 switch (size) {
13204                 case 1:
13205                     if (is_scalar) {
13206                         gen_helper_advsimd_mulxh(tcg_res, tcg_op,
13207                                                  tcg_idx, fpst);
13208                     } else {
13209                         gen_helper_advsimd_mulx2h(tcg_res, tcg_op,
13210                                                   tcg_idx, fpst);
13211                     }
13212                     break;
13213                 case 2:
13214                     gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
13215                     break;
13216                 default:
13217                     g_assert_not_reached();
13218                 }
13219                 break;
13220             case 0x0c: /* SQDMULH */
13221                 if (size == 1) {
13222                     gen_helper_neon_qdmulh_s16(tcg_res, tcg_env,
13223                                                tcg_op, tcg_idx);
13224                 } else {
13225                     gen_helper_neon_qdmulh_s32(tcg_res, tcg_env,
13226                                                tcg_op, tcg_idx);
13227                 }
13228                 break;
13229             case 0x0d: /* SQRDMULH */
13230                 if (size == 1) {
13231                     gen_helper_neon_qrdmulh_s16(tcg_res, tcg_env,
13232                                                 tcg_op, tcg_idx);
13233                 } else {
13234                     gen_helper_neon_qrdmulh_s32(tcg_res, tcg_env,
13235                                                 tcg_op, tcg_idx);
13236                 }
13237                 break;
13238             case 0x1d: /* SQRDMLAH */
13239                 read_vec_element_i32(s, tcg_res, rd, pass,
13240                                      is_scalar ? size : MO_32);
13241                 if (size == 1) {
13242                     gen_helper_neon_qrdmlah_s16(tcg_res, tcg_env,
13243                                                 tcg_op, tcg_idx, tcg_res);
13244                 } else {
13245                     gen_helper_neon_qrdmlah_s32(tcg_res, tcg_env,
13246                                                 tcg_op, tcg_idx, tcg_res);
13247                 }
13248                 break;
13249             case 0x1f: /* SQRDMLSH */
13250                 read_vec_element_i32(s, tcg_res, rd, pass,
13251                                      is_scalar ? size : MO_32);
13252                 if (size == 1) {
13253                     gen_helper_neon_qrdmlsh_s16(tcg_res, tcg_env,
13254                                                 tcg_op, tcg_idx, tcg_res);
13255                 } else {
13256                     gen_helper_neon_qrdmlsh_s32(tcg_res, tcg_env,
13257                                                 tcg_op, tcg_idx, tcg_res);
13258                 }
13259                 break;
13260             default:
13261                 g_assert_not_reached();
13262             }
13263
13264             if (is_scalar) {
13265                 write_fp_sreg(s, rd, tcg_res);
13266             } else {
13267                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
13268             }
13269         }
13270
13271         clear_vec_high(s, is_q, rd);
13272     } else {
13273         /* long ops: 16x16->32 or 32x32->64 */
13274         TCGv_i64 tcg_res[2];
13275         int pass;
13276         bool satop = extract32(opcode, 0, 1);
13277         MemOp memop = MO_32;
13278
13279         if (satop || !u) {
13280             memop |= MO_SIGN;
13281         }
13282
13283         if (size == 2) {
13284             TCGv_i64 tcg_idx = tcg_temp_new_i64();
13285
13286             read_vec_element(s, tcg_idx, rm, index, memop);
13287
13288             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13289                 TCGv_i64 tcg_op = tcg_temp_new_i64();
13290                 TCGv_i64 tcg_passres;
13291                 int passelt;
13292
13293                 if (is_scalar) {
13294                     passelt = 0;
13295                 } else {
13296                     passelt = pass + (is_q * 2);
13297                 }
13298
13299                 read_vec_element(s, tcg_op, rn, passelt, memop);
13300
13301                 tcg_res[pass] = tcg_temp_new_i64();
13302
13303                 if (opcode == 0xa || opcode == 0xb) {
13304                     /* Non-accumulating ops */
13305                     tcg_passres = tcg_res[pass];
13306                 } else {
13307                     tcg_passres = tcg_temp_new_i64();
13308                 }
13309
13310                 tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
13311
13312                 if (satop) {
13313                     /* saturating, doubling */
13314                     gen_helper_neon_addl_saturate_s64(tcg_passres, tcg_env,
13315                                                       tcg_passres, tcg_passres);
13316                 }
13317
13318                 if (opcode == 0xa || opcode == 0xb) {
13319                     continue;
13320                 }
13321
13322                 /* Accumulating op: handle accumulate step */
13323                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13324
13325                 switch (opcode) {
13326                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
13327                     tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
13328                     break;
13329                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
13330                     tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
13331                     break;
13332                 case 0x7: /* SQDMLSL, SQDMLSL2 */
13333                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
13334                     /* fall through */
13335                 case 0x3: /* SQDMLAL, SQDMLAL2 */
13336                     gen_helper_neon_addl_saturate_s64(tcg_res[pass], tcg_env,
13337                                                       tcg_res[pass],
13338                                                       tcg_passres);
13339                     break;
13340                 default:
13341                     g_assert_not_reached();
13342                 }
13343             }
13344
13345             clear_vec_high(s, !is_scalar, rd);
13346         } else {
13347             TCGv_i32 tcg_idx = tcg_temp_new_i32();
13348
13349             assert(size == 1);
13350             read_vec_element_i32(s, tcg_idx, rm, index, size);
13351
13352             if (!is_scalar) {
13353                 /* The simplest way to handle the 16x16 indexed ops is to
13354                  * duplicate the index into both halves of the 32 bit tcg_idx
13355                  * and then use the usual Neon helpers.
13356                  */
13357                 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
13358             }
13359
13360             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13361                 TCGv_i32 tcg_op = tcg_temp_new_i32();
13362                 TCGv_i64 tcg_passres;
13363
13364                 if (is_scalar) {
13365                     read_vec_element_i32(s, tcg_op, rn, pass, size);
13366                 } else {
13367                     read_vec_element_i32(s, tcg_op, rn,
13368                                          pass + (is_q * 2), MO_32);
13369                 }
13370
13371                 tcg_res[pass] = tcg_temp_new_i64();
13372
13373                 if (opcode == 0xa || opcode == 0xb) {
13374                     /* Non-accumulating ops */
13375                     tcg_passres = tcg_res[pass];
13376                 } else {
13377                     tcg_passres = tcg_temp_new_i64();
13378                 }
13379
13380                 if (memop & MO_SIGN) {
13381                     gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
13382                 } else {
13383                     gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
13384                 }
13385                 if (satop) {
13386                     gen_helper_neon_addl_saturate_s32(tcg_passres, tcg_env,
13387                                                       tcg_passres, tcg_passres);
13388                 }
13389
13390                 if (opcode == 0xa || opcode == 0xb) {
13391                     continue;
13392                 }
13393
13394                 /* Accumulating op: handle accumulate step */
13395                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13396
13397                 switch (opcode) {
13398                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
13399                     gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
13400                                              tcg_passres);
13401                     break;
13402                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
13403                     gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
13404                                              tcg_passres);
13405                     break;
13406                 case 0x7: /* SQDMLSL, SQDMLSL2 */
13407                     gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
13408                     /* fall through */
13409                 case 0x3: /* SQDMLAL, SQDMLAL2 */
13410                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], tcg_env,
13411                                                       tcg_res[pass],
13412                                                       tcg_passres);
13413                     break;
13414                 default:
13415                     g_assert_not_reached();
13416                 }
13417             }
13418
13419             if (is_scalar) {
13420                 tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
13421             }
13422         }
13423
13424         if (is_scalar) {
13425             tcg_res[1] = tcg_constant_i64(0);
13426         }
13427
13428         for (pass = 0; pass < 2; pass++) {
13429             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13430         }
13431     }
13432 }
13433
13434 /* Crypto AES
13435  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
13436  * +-----------------+------+-----------+--------+-----+------+------+
13437  * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
13438  * +-----------------+------+-----------+--------+-----+------+------+
13439  */
13440 static void disas_crypto_aes(DisasContext *s, uint32_t insn)
13441 {
13442     int size = extract32(insn, 22, 2);
13443     int opcode = extract32(insn, 12, 5);
13444     int rn = extract32(insn, 5, 5);
13445     int rd = extract32(insn, 0, 5);
13446     gen_helper_gvec_2 *genfn2 = NULL;
13447     gen_helper_gvec_3 *genfn3 = NULL;
13448
13449     if (!dc_isar_feature(aa64_aes, s) || size != 0) {
13450         unallocated_encoding(s);
13451         return;
13452     }
13453
13454     switch (opcode) {
13455     case 0x4: /* AESE */
13456         genfn3 = gen_helper_crypto_aese;
13457         break;
13458     case 0x6: /* AESMC */
13459         genfn2 = gen_helper_crypto_aesmc;
13460         break;
13461     case 0x5: /* AESD */
13462         genfn3 = gen_helper_crypto_aesd;
13463         break;
13464     case 0x7: /* AESIMC */
13465         genfn2 = gen_helper_crypto_aesimc;
13466         break;
13467     default:
13468         unallocated_encoding(s);
13469         return;
13470     }
13471
13472     if (!fp_access_check(s)) {
13473         return;
13474     }
13475     if (genfn2) {
13476         gen_gvec_op2_ool(s, true, rd, rn, 0, genfn2);
13477     } else {
13478         gen_gvec_op3_ool(s, true, rd, rd, rn, 0, genfn3);
13479     }
13480 }
13481
13482 /* Crypto three-reg SHA
13483  *  31             24 23  22  21 20  16  15 14    12 11 10 9    5 4    0
13484  * +-----------------+------+---+------+---+--------+-----+------+------+
13485  * | 0 1 0 1 1 1 1 0 | size | 0 |  Rm  | 0 | opcode | 0 0 |  Rn  |  Rd  |
13486  * +-----------------+------+---+------+---+--------+-----+------+------+
13487  */
13488 static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
13489 {
13490     int size = extract32(insn, 22, 2);
13491     int opcode = extract32(insn, 12, 3);
13492     int rm = extract32(insn, 16, 5);
13493     int rn = extract32(insn, 5, 5);
13494     int rd = extract32(insn, 0, 5);
13495     gen_helper_gvec_3 *genfn;
13496     bool feature;
13497
13498     if (size != 0) {
13499         unallocated_encoding(s);
13500         return;
13501     }
13502
13503     switch (opcode) {
13504     case 0: /* SHA1C */
13505         genfn = gen_helper_crypto_sha1c;
13506         feature = dc_isar_feature(aa64_sha1, s);
13507         break;
13508     case 1: /* SHA1P */
13509         genfn = gen_helper_crypto_sha1p;
13510         feature = dc_isar_feature(aa64_sha1, s);
13511         break;
13512     case 2: /* SHA1M */
13513         genfn = gen_helper_crypto_sha1m;
13514         feature = dc_isar_feature(aa64_sha1, s);
13515         break;
13516     case 3: /* SHA1SU0 */
13517         genfn = gen_helper_crypto_sha1su0;
13518         feature = dc_isar_feature(aa64_sha1, s);
13519         break;
13520     case 4: /* SHA256H */
13521         genfn = gen_helper_crypto_sha256h;
13522         feature = dc_isar_feature(aa64_sha256, s);
13523         break;
13524     case 5: /* SHA256H2 */
13525         genfn = gen_helper_crypto_sha256h2;
13526         feature = dc_isar_feature(aa64_sha256, s);
13527         break;
13528     case 6: /* SHA256SU1 */
13529         genfn = gen_helper_crypto_sha256su1;
13530         feature = dc_isar_feature(aa64_sha256, s);
13531         break;
13532     default:
13533         unallocated_encoding(s);
13534         return;
13535     }
13536
13537     if (!feature) {
13538         unallocated_encoding(s);
13539         return;
13540     }
13541
13542     if (!fp_access_check(s)) {
13543         return;
13544     }
13545     gen_gvec_op3_ool(s, true, rd, rn, rm, 0, genfn);
13546 }
13547
13548 /* Crypto two-reg SHA
13549  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
13550  * +-----------------+------+-----------+--------+-----+------+------+
13551  * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
13552  * +-----------------+------+-----------+--------+-----+------+------+
13553  */
13554 static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
13555 {
13556     int size = extract32(insn, 22, 2);
13557     int opcode = extract32(insn, 12, 5);
13558     int rn = extract32(insn, 5, 5);
13559     int rd = extract32(insn, 0, 5);
13560     gen_helper_gvec_2 *genfn;
13561     bool feature;
13562
13563     if (size != 0) {
13564         unallocated_encoding(s);
13565         return;
13566     }
13567
13568     switch (opcode) {
13569     case 0: /* SHA1H */
13570         feature = dc_isar_feature(aa64_sha1, s);
13571         genfn = gen_helper_crypto_sha1h;
13572         break;
13573     case 1: /* SHA1SU1 */
13574         feature = dc_isar_feature(aa64_sha1, s);
13575         genfn = gen_helper_crypto_sha1su1;
13576         break;
13577     case 2: /* SHA256SU0 */
13578         feature = dc_isar_feature(aa64_sha256, s);
13579         genfn = gen_helper_crypto_sha256su0;
13580         break;
13581     default:
13582         unallocated_encoding(s);
13583         return;
13584     }
13585
13586     if (!feature) {
13587         unallocated_encoding(s);
13588         return;
13589     }
13590
13591     if (!fp_access_check(s)) {
13592         return;
13593     }
13594     gen_gvec_op2_ool(s, true, rd, rn, 0, genfn);
13595 }
13596
13597 static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
13598 {
13599     tcg_gen_rotli_i64(d, m, 1);
13600     tcg_gen_xor_i64(d, d, n);
13601 }
13602
13603 static void gen_rax1_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m)
13604 {
13605     tcg_gen_rotli_vec(vece, d, m, 1);
13606     tcg_gen_xor_vec(vece, d, d, n);
13607 }
13608
13609 void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
13610                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
13611 {
13612     static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 };
13613     static const GVecGen3 op = {
13614         .fni8 = gen_rax1_i64,
13615         .fniv = gen_rax1_vec,
13616         .opt_opc = vecop_list,
13617         .fno = gen_helper_crypto_rax1,
13618         .vece = MO_64,
13619     };
13620     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op);
13621 }
13622
13623 /* Crypto three-reg SHA512
13624  *  31                   21 20  16 15  14  13 12  11  10  9    5 4    0
13625  * +-----------------------+------+---+---+-----+--------+------+------+
13626  * | 1 1 0 0 1 1 1 0 0 1 1 |  Rm  | 1 | O | 0 0 | opcode |  Rn  |  Rd  |
13627  * +-----------------------+------+---+---+-----+--------+------+------+
13628  */
13629 static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
13630 {
13631     int opcode = extract32(insn, 10, 2);
13632     int o =  extract32(insn, 14, 1);
13633     int rm = extract32(insn, 16, 5);
13634     int rn = extract32(insn, 5, 5);
13635     int rd = extract32(insn, 0, 5);
13636     bool feature;
13637     gen_helper_gvec_3 *oolfn = NULL;
13638     GVecGen3Fn *gvecfn = NULL;
13639
13640     if (o == 0) {
13641         switch (opcode) {
13642         case 0: /* SHA512H */
13643             feature = dc_isar_feature(aa64_sha512, s);
13644             oolfn = gen_helper_crypto_sha512h;
13645             break;
13646         case 1: /* SHA512H2 */
13647             feature = dc_isar_feature(aa64_sha512, s);
13648             oolfn = gen_helper_crypto_sha512h2;
13649             break;
13650         case 2: /* SHA512SU1 */
13651             feature = dc_isar_feature(aa64_sha512, s);
13652             oolfn = gen_helper_crypto_sha512su1;
13653             break;
13654         case 3: /* RAX1 */
13655             feature = dc_isar_feature(aa64_sha3, s);
13656             gvecfn = gen_gvec_rax1;
13657             break;
13658         default:
13659             g_assert_not_reached();
13660         }
13661     } else {
13662         switch (opcode) {
13663         case 0: /* SM3PARTW1 */
13664             feature = dc_isar_feature(aa64_sm3, s);
13665             oolfn = gen_helper_crypto_sm3partw1;
13666             break;
13667         case 1: /* SM3PARTW2 */
13668             feature = dc_isar_feature(aa64_sm3, s);
13669             oolfn = gen_helper_crypto_sm3partw2;
13670             break;
13671         case 2: /* SM4EKEY */
13672             feature = dc_isar_feature(aa64_sm4, s);
13673             oolfn = gen_helper_crypto_sm4ekey;
13674             break;
13675         default:
13676             unallocated_encoding(s);
13677             return;
13678         }
13679     }
13680
13681     if (!feature) {
13682         unallocated_encoding(s);
13683         return;
13684     }
13685
13686     if (!fp_access_check(s)) {
13687         return;
13688     }
13689
13690     if (oolfn) {
13691         gen_gvec_op3_ool(s, true, rd, rn, rm, 0, oolfn);
13692     } else {
13693         gen_gvec_fn3(s, true, rd, rn, rm, gvecfn, MO_64);
13694     }
13695 }
13696
13697 /* Crypto two-reg SHA512
13698  *  31                                     12  11  10  9    5 4    0
13699  * +-----------------------------------------+--------+------+------+
13700  * | 1 1 0 0 1 1 1 0 1 1 0 0 0 0 0 0 1 0 0 0 | opcode |  Rn  |  Rd  |
13701  * +-----------------------------------------+--------+------+------+
13702  */
13703 static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn)
13704 {
13705     int opcode = extract32(insn, 10, 2);
13706     int rn = extract32(insn, 5, 5);
13707     int rd = extract32(insn, 0, 5);
13708     bool feature;
13709
13710     switch (opcode) {
13711     case 0: /* SHA512SU0 */
13712         feature = dc_isar_feature(aa64_sha512, s);
13713         break;
13714     case 1: /* SM4E */
13715         feature = dc_isar_feature(aa64_sm4, s);
13716         break;
13717     default:
13718         unallocated_encoding(s);
13719         return;
13720     }
13721
13722     if (!feature) {
13723         unallocated_encoding(s);
13724         return;
13725     }
13726
13727     if (!fp_access_check(s)) {
13728         return;
13729     }
13730
13731     switch (opcode) {
13732     case 0: /* SHA512SU0 */
13733         gen_gvec_op2_ool(s, true, rd, rn, 0, gen_helper_crypto_sha512su0);
13734         break;
13735     case 1: /* SM4E */
13736         gen_gvec_op3_ool(s, true, rd, rd, rn, 0, gen_helper_crypto_sm4e);
13737         break;
13738     default:
13739         g_assert_not_reached();
13740     }
13741 }
13742
13743 /* Crypto four-register
13744  *  31               23 22 21 20  16 15  14  10 9    5 4    0
13745  * +-------------------+-----+------+---+------+------+------+
13746  * | 1 1 0 0 1 1 1 0 0 | Op0 |  Rm  | 0 |  Ra  |  Rn  |  Rd  |
13747  * +-------------------+-----+------+---+------+------+------+
13748  */
13749 static void disas_crypto_four_reg(DisasContext *s, uint32_t insn)
13750 {
13751     int op0 = extract32(insn, 21, 2);
13752     int rm = extract32(insn, 16, 5);
13753     int ra = extract32(insn, 10, 5);
13754     int rn = extract32(insn, 5, 5);
13755     int rd = extract32(insn, 0, 5);
13756     bool feature;
13757
13758     switch (op0) {
13759     case 0: /* EOR3 */
13760     case 1: /* BCAX */
13761         feature = dc_isar_feature(aa64_sha3, s);
13762         break;
13763     case 2: /* SM3SS1 */
13764         feature = dc_isar_feature(aa64_sm3, s);
13765         break;
13766     default:
13767         unallocated_encoding(s);
13768         return;
13769     }
13770
13771     if (!feature) {
13772         unallocated_encoding(s);
13773         return;
13774     }
13775
13776     if (!fp_access_check(s)) {
13777         return;
13778     }
13779
13780     if (op0 < 2) {
13781         TCGv_i64 tcg_op1, tcg_op2, tcg_op3, tcg_res[2];
13782         int pass;
13783
13784         tcg_op1 = tcg_temp_new_i64();
13785         tcg_op2 = tcg_temp_new_i64();
13786         tcg_op3 = tcg_temp_new_i64();
13787         tcg_res[0] = tcg_temp_new_i64();
13788         tcg_res[1] = tcg_temp_new_i64();
13789
13790         for (pass = 0; pass < 2; pass++) {
13791             read_vec_element(s, tcg_op1, rn, pass, MO_64);
13792             read_vec_element(s, tcg_op2, rm, pass, MO_64);
13793             read_vec_element(s, tcg_op3, ra, pass, MO_64);
13794
13795             if (op0 == 0) {
13796                 /* EOR3 */
13797                 tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op3);
13798             } else {
13799                 /* BCAX */
13800                 tcg_gen_andc_i64(tcg_res[pass], tcg_op2, tcg_op3);
13801             }
13802             tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
13803         }
13804         write_vec_element(s, tcg_res[0], rd, 0, MO_64);
13805         write_vec_element(s, tcg_res[1], rd, 1, MO_64);
13806     } else {
13807         TCGv_i32 tcg_op1, tcg_op2, tcg_op3, tcg_res, tcg_zero;
13808
13809         tcg_op1 = tcg_temp_new_i32();
13810         tcg_op2 = tcg_temp_new_i32();
13811         tcg_op3 = tcg_temp_new_i32();
13812         tcg_res = tcg_temp_new_i32();
13813         tcg_zero = tcg_constant_i32(0);
13814
13815         read_vec_element_i32(s, tcg_op1, rn, 3, MO_32);
13816         read_vec_element_i32(s, tcg_op2, rm, 3, MO_32);
13817         read_vec_element_i32(s, tcg_op3, ra, 3, MO_32);
13818
13819         tcg_gen_rotri_i32(tcg_res, tcg_op1, 20);
13820         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2);
13821         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3);
13822         tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
13823
13824         write_vec_element_i32(s, tcg_zero, rd, 0, MO_32);
13825         write_vec_element_i32(s, tcg_zero, rd, 1, MO_32);
13826         write_vec_element_i32(s, tcg_zero, rd, 2, MO_32);
13827         write_vec_element_i32(s, tcg_res, rd, 3, MO_32);
13828     }
13829 }
13830
13831 /* Crypto XAR
13832  *  31                   21 20  16 15    10 9    5 4    0
13833  * +-----------------------+------+--------+------+------+
13834  * | 1 1 0 0 1 1 1 0 1 0 0 |  Rm  |  imm6  |  Rn  |  Rd  |
13835  * +-----------------------+------+--------+------+------+
13836  */
13837 static void disas_crypto_xar(DisasContext *s, uint32_t insn)
13838 {
13839     int rm = extract32(insn, 16, 5);
13840     int imm6 = extract32(insn, 10, 6);
13841     int rn = extract32(insn, 5, 5);
13842     int rd = extract32(insn, 0, 5);
13843
13844     if (!dc_isar_feature(aa64_sha3, s)) {
13845         unallocated_encoding(s);
13846         return;
13847     }
13848
13849     if (!fp_access_check(s)) {
13850         return;
13851     }
13852
13853     gen_gvec_xar(MO_64, vec_full_reg_offset(s, rd),
13854                  vec_full_reg_offset(s, rn),
13855                  vec_full_reg_offset(s, rm), imm6, 16,
13856                  vec_full_reg_size(s));
13857 }
13858
13859 /* Crypto three-reg imm2
13860  *  31                   21 20  16 15  14 13 12  11  10  9    5 4    0
13861  * +-----------------------+------+-----+------+--------+------+------+
13862  * | 1 1 0 0 1 1 1 0 0 1 0 |  Rm  | 1 0 | imm2 | opcode |  Rn  |  Rd  |
13863  * +-----------------------+------+-----+------+--------+------+------+
13864  */
13865 static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn)
13866 {
13867     static gen_helper_gvec_3 * const fns[4] = {
13868         gen_helper_crypto_sm3tt1a, gen_helper_crypto_sm3tt1b,
13869         gen_helper_crypto_sm3tt2a, gen_helper_crypto_sm3tt2b,
13870     };
13871     int opcode = extract32(insn, 10, 2);
13872     int imm2 = extract32(insn, 12, 2);
13873     int rm = extract32(insn, 16, 5);
13874     int rn = extract32(insn, 5, 5);
13875     int rd = extract32(insn, 0, 5);
13876
13877     if (!dc_isar_feature(aa64_sm3, s)) {
13878         unallocated_encoding(s);
13879         return;
13880     }
13881
13882     if (!fp_access_check(s)) {
13883         return;
13884     }
13885
13886     gen_gvec_op3_ool(s, true, rd, rn, rm, imm2, fns[opcode]);
13887 }
13888
13889 /* C3.6 Data processing - SIMD, inc Crypto
13890  *
13891  * As the decode gets a little complex we are using a table based
13892  * approach for this part of the decode.
13893  */
13894 static const AArch64DecodeTable data_proc_simd[] = {
13895     /* pattern  ,  mask     ,  fn                        */
13896     { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
13897     { 0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra },
13898     { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
13899     { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
13900     { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
13901     { 0x0e000400, 0x9fe08400, disas_simd_copy },
13902     { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
13903     /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
13904     { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
13905     { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
13906     { 0x0e000000, 0xbf208c00, disas_simd_tb },
13907     { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
13908     { 0x2e000000, 0xbf208400, disas_simd_ext },
13909     { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
13910     { 0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra },
13911     { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
13912     { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
13913     { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
13914     { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
13915     { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
13916     { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
13917     { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
13918     { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
13919     { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
13920     { 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 },
13921     { 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 },
13922     { 0xce000000, 0xff808000, disas_crypto_four_reg },
13923     { 0xce800000, 0xffe00000, disas_crypto_xar },
13924     { 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2 },
13925     { 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 },
13926     { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 },
13927     { 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16 },
13928     { 0x00000000, 0x00000000, NULL }
13929 };
13930
13931 static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
13932 {
13933     /* Note that this is called with all non-FP cases from
13934      * table C3-6 so it must UNDEF for entries not specifically
13935      * allocated to instructions in that table.
13936      */
13937     AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
13938     if (fn) {
13939         fn(s, insn);
13940     } else {
13941         unallocated_encoding(s);
13942     }
13943 }
13944
13945 /* C3.6 Data processing - SIMD and floating point */
13946 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
13947 {
13948     if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
13949         disas_data_proc_fp(s, insn);
13950     } else {
13951         /* SIMD, including crypto */
13952         disas_data_proc_simd(s, insn);
13953     }
13954 }
13955
13956 static bool trans_OK(DisasContext *s, arg_OK *a)
13957 {
13958     return true;
13959 }
13960
13961 static bool trans_FAIL(DisasContext *s, arg_OK *a)
13962 {
13963     s->is_nonstreaming = true;
13964     return true;
13965 }
13966
13967 /**
13968  * is_guarded_page:
13969  * @env: The cpu environment
13970  * @s: The DisasContext
13971  *
13972  * Return true if the page is guarded.
13973  */
13974 static bool is_guarded_page(CPUARMState *env, DisasContext *s)
13975 {
13976     uint64_t addr = s->base.pc_first;
13977 #ifdef CONFIG_USER_ONLY
13978     return page_get_flags(addr) & PAGE_BTI;
13979 #else
13980     CPUTLBEntryFull *full;
13981     void *host;
13982     int mmu_idx = arm_to_core_mmu_idx(s->mmu_idx);
13983     int flags;
13984
13985     /*
13986      * We test this immediately after reading an insn, which means
13987      * that the TLB entry must be present and valid, and thus this
13988      * access will never raise an exception.
13989      */
13990     flags = probe_access_full(env, addr, 0, MMU_INST_FETCH, mmu_idx,
13991                               false, &host, &full, 0);
13992     assert(!(flags & TLB_INVALID_MASK));
13993
13994     return full->extra.arm.guarded;
13995 #endif
13996 }
13997
13998 /**
13999  * btype_destination_ok:
14000  * @insn: The instruction at the branch destination
14001  * @bt: SCTLR_ELx.BT
14002  * @btype: PSTATE.BTYPE, and is non-zero
14003  *
14004  * On a guarded page, there are a limited number of insns
14005  * that may be present at the branch target:
14006  *   - branch target identifiers,
14007  *   - paciasp, pacibsp,
14008  *   - BRK insn
14009  *   - HLT insn
14010  * Anything else causes a Branch Target Exception.
14011  *
14012  * Return true if the branch is compatible, false to raise BTITRAP.
14013  */
14014 static bool btype_destination_ok(uint32_t insn, bool bt, int btype)
14015 {
14016     if ((insn & 0xfffff01fu) == 0xd503201fu) {
14017         /* HINT space */
14018         switch (extract32(insn, 5, 7)) {
14019         case 0b011001: /* PACIASP */
14020         case 0b011011: /* PACIBSP */
14021             /*
14022              * If SCTLR_ELx.BT, then PACI*SP are not compatible
14023              * with btype == 3.  Otherwise all btype are ok.
14024              */
14025             return !bt || btype != 3;
14026         case 0b100000: /* BTI */
14027             /* Not compatible with any btype.  */
14028             return false;
14029         case 0b100010: /* BTI c */
14030             /* Not compatible with btype == 3 */
14031             return btype != 3;
14032         case 0b100100: /* BTI j */
14033             /* Not compatible with btype == 2 */
14034             return btype != 2;
14035         case 0b100110: /* BTI jc */
14036             /* Compatible with any btype.  */
14037             return true;
14038         }
14039     } else {
14040         switch (insn & 0xffe0001fu) {
14041         case 0xd4200000u: /* BRK */
14042         case 0xd4400000u: /* HLT */
14043             /* Give priority to the breakpoint exception.  */
14044             return true;
14045         }
14046     }
14047     return false;
14048 }
14049
14050 /* C3.1 A64 instruction index by encoding */
14051 static void disas_a64_legacy(DisasContext *s, uint32_t insn)
14052 {
14053     switch (extract32(insn, 25, 4)) {
14054     case 0x5:
14055     case 0xd:      /* Data processing - register */
14056         disas_data_proc_reg(s, insn);
14057         break;
14058     case 0x7:
14059     case 0xf:      /* Data processing - SIMD and floating point */
14060         disas_data_proc_simd_fp(s, insn);
14061         break;
14062     default:
14063         unallocated_encoding(s);
14064         break;
14065     }
14066 }
14067
14068 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
14069                                           CPUState *cpu)
14070 {
14071     DisasContext *dc = container_of(dcbase, DisasContext, base);
14072     CPUARMState *env = cpu_env(cpu);
14073     ARMCPU *arm_cpu = env_archcpu(env);
14074     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
14075     int bound, core_mmu_idx;
14076
14077     dc->isar = &arm_cpu->isar;
14078     dc->condjmp = 0;
14079     dc->pc_save = dc->base.pc_first;
14080     dc->aarch64 = true;
14081     dc->thumb = false;
14082     dc->sctlr_b = 0;
14083     dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
14084     dc->condexec_mask = 0;
14085     dc->condexec_cond = 0;
14086     core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
14087     dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx);
14088     dc->tbii = EX_TBFLAG_A64(tb_flags, TBII);
14089     dc->tbid = EX_TBFLAG_A64(tb_flags, TBID);
14090     dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA);
14091     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
14092 #if !defined(CONFIG_USER_ONLY)
14093     dc->user = (dc->current_el == 0);
14094 #endif
14095     dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
14096     dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
14097     dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
14098     dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
14099     dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
14100     dc->trap_eret = EX_TBFLAG_A64(tb_flags, TRAP_ERET);
14101     dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL);
14102     dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL);
14103     dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16;
14104     dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16;
14105     dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE);
14106     dc->bt = EX_TBFLAG_A64(tb_flags, BT);
14107     dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE);
14108     dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV);
14109     dc->ata[0] = EX_TBFLAG_A64(tb_flags, ATA);
14110     dc->ata[1] = EX_TBFLAG_A64(tb_flags, ATA0);
14111     dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE);
14112     dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE);
14113     dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM);
14114     dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA);
14115     dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING);
14116     dc->naa = EX_TBFLAG_A64(tb_flags, NAA);
14117     dc->nv = EX_TBFLAG_A64(tb_flags, NV);
14118     dc->nv1 = EX_TBFLAG_A64(tb_flags, NV1);
14119     dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2);
14120     dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20);
14121     dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE);
14122     dc->vec_len = 0;
14123     dc->vec_stride = 0;
14124     dc->cp_regs = arm_cpu->cp_regs;
14125     dc->features = env->features;
14126     dc->dcz_blocksize = arm_cpu->dcz_blocksize;
14127     dc->gm_blocksize = arm_cpu->gm_blocksize;
14128
14129 #ifdef CONFIG_USER_ONLY
14130     /* In sve_probe_page, we assume TBI is enabled. */
14131     tcg_debug_assert(dc->tbid & 1);
14132 #endif
14133
14134     dc->lse2 = dc_isar_feature(aa64_lse2, dc);
14135
14136     /* Single step state. The code-generation logic here is:
14137      *  SS_ACTIVE == 0:
14138      *   generate code with no special handling for single-stepping (except
14139      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
14140      *   this happens anyway because those changes are all system register or
14141      *   PSTATE writes).
14142      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
14143      *   emit code for one insn
14144      *   emit code to clear PSTATE.SS
14145      *   emit code to generate software step exception for completed step
14146      *   end TB (as usual for having generated an exception)
14147      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
14148      *   emit code to generate a software step exception
14149      *   end the TB
14150      */
14151     dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
14152     dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
14153     dc->is_ldex = false;
14154
14155     /* Bound the number of insns to execute to those left on the page.  */
14156     bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
14157
14158     /* If architectural single step active, limit to 1.  */
14159     if (dc->ss_active) {
14160         bound = 1;
14161     }
14162     dc->base.max_insns = MIN(dc->base.max_insns, bound);
14163 }
14164
14165 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
14166 {
14167 }
14168
14169 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
14170 {
14171     DisasContext *dc = container_of(dcbase, DisasContext, base);
14172     target_ulong pc_arg = dc->base.pc_next;
14173
14174     if (tb_cflags(dcbase->tb) & CF_PCREL) {
14175         pc_arg &= ~TARGET_PAGE_MASK;
14176     }
14177     tcg_gen_insn_start(pc_arg, 0, 0);
14178     dc->insn_start = tcg_last_op();
14179 }
14180
14181 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
14182 {
14183     DisasContext *s = container_of(dcbase, DisasContext, base);
14184     CPUARMState *env = cpu_env(cpu);
14185     uint64_t pc = s->base.pc_next;
14186     uint32_t insn;
14187
14188     /* Singlestep exceptions have the highest priority. */
14189     if (s->ss_active && !s->pstate_ss) {
14190         /* Singlestep state is Active-pending.
14191          * If we're in this state at the start of a TB then either
14192          *  a) we just took an exception to an EL which is being debugged
14193          *     and this is the first insn in the exception handler
14194          *  b) debug exceptions were masked and we just unmasked them
14195          *     without changing EL (eg by clearing PSTATE.D)
14196          * In either case we're going to take a swstep exception in the
14197          * "did not step an insn" case, and so the syndrome ISV and EX
14198          * bits should be zero.
14199          */
14200         assert(s->base.num_insns == 1);
14201         gen_swstep_exception(s, 0, 0);
14202         s->base.is_jmp = DISAS_NORETURN;
14203         s->base.pc_next = pc + 4;
14204         return;
14205     }
14206
14207     if (pc & 3) {
14208         /*
14209          * PC alignment fault.  This has priority over the instruction abort
14210          * that we would receive from a translation fault via arm_ldl_code.
14211          * This should only be possible after an indirect branch, at the
14212          * start of the TB.
14213          */
14214         assert(s->base.num_insns == 1);
14215         gen_helper_exception_pc_alignment(tcg_env, tcg_constant_tl(pc));
14216         s->base.is_jmp = DISAS_NORETURN;
14217         s->base.pc_next = QEMU_ALIGN_UP(pc, 4);
14218         return;
14219     }
14220
14221     s->pc_curr = pc;
14222     insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b);
14223     s->insn = insn;
14224     s->base.pc_next = pc + 4;
14225
14226     s->fp_access_checked = false;
14227     s->sve_access_checked = false;
14228
14229     if (s->pstate_il) {
14230         /*
14231          * Illegal execution state. This has priority over BTI
14232          * exceptions, but comes after instruction abort exceptions.
14233          */
14234         gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
14235         return;
14236     }
14237
14238     if (dc_isar_feature(aa64_bti, s)) {
14239         if (s->base.num_insns == 1) {
14240             /*
14241              * At the first insn of the TB, compute s->guarded_page.
14242              * We delayed computing this until successfully reading
14243              * the first insn of the TB, above.  This (mostly) ensures
14244              * that the softmmu tlb entry has been populated, and the
14245              * page table GP bit is available.
14246              *
14247              * Note that we need to compute this even if btype == 0,
14248              * because this value is used for BR instructions later
14249              * where ENV is not available.
14250              */
14251             s->guarded_page = is_guarded_page(env, s);
14252
14253             /* First insn can have btype set to non-zero.  */
14254             tcg_debug_assert(s->btype >= 0);
14255
14256             /*
14257              * Note that the Branch Target Exception has fairly high
14258              * priority -- below debugging exceptions but above most
14259              * everything else.  This allows us to handle this now
14260              * instead of waiting until the insn is otherwise decoded.
14261              */
14262             if (s->btype != 0
14263                 && s->guarded_page
14264                 && !btype_destination_ok(insn, s->bt, s->btype)) {
14265                 gen_exception_insn(s, 0, EXCP_UDEF, syn_btitrap(s->btype));
14266                 return;
14267             }
14268         } else {
14269             /* Not the first insn: btype must be 0.  */
14270             tcg_debug_assert(s->btype == 0);
14271         }
14272     }
14273
14274     s->is_nonstreaming = false;
14275     if (s->sme_trap_nonstreaming) {
14276         disas_sme_fa64(s, insn);
14277     }
14278
14279     if (!disas_a64(s, insn) &&
14280         !disas_sme(s, insn) &&
14281         !disas_sve(s, insn)) {
14282         disas_a64_legacy(s, insn);
14283     }
14284
14285     /*
14286      * After execution of most insns, btype is reset to 0.
14287      * Note that we set btype == -1 when the insn sets btype.
14288      */
14289     if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) {
14290         reset_btype(s);
14291     }
14292 }
14293
14294 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
14295 {
14296     DisasContext *dc = container_of(dcbase, DisasContext, base);
14297
14298     if (unlikely(dc->ss_active)) {
14299         /* Note that this means single stepping WFI doesn't halt the CPU.
14300          * For conditional branch insns this is harmless unreachable code as
14301          * gen_goto_tb() has already handled emitting the debug exception
14302          * (and thus a tb-jump is not possible when singlestepping).
14303          */
14304         switch (dc->base.is_jmp) {
14305         default:
14306             gen_a64_update_pc(dc, 4);
14307             /* fall through */
14308         case DISAS_EXIT:
14309         case DISAS_JUMP:
14310             gen_step_complete_exception(dc);
14311             break;
14312         case DISAS_NORETURN:
14313             break;
14314         }
14315     } else {
14316         switch (dc->base.is_jmp) {
14317         case DISAS_NEXT:
14318         case DISAS_TOO_MANY:
14319             gen_goto_tb(dc, 1, 4);
14320             break;
14321         default:
14322         case DISAS_UPDATE_EXIT:
14323             gen_a64_update_pc(dc, 4);
14324             /* fall through */
14325         case DISAS_EXIT:
14326             tcg_gen_exit_tb(NULL, 0);
14327             break;
14328         case DISAS_UPDATE_NOCHAIN:
14329             gen_a64_update_pc(dc, 4);
14330             /* fall through */
14331         case DISAS_JUMP:
14332             tcg_gen_lookup_and_goto_ptr();
14333             break;
14334         case DISAS_NORETURN:
14335         case DISAS_SWI:
14336             break;
14337         case DISAS_WFE:
14338             gen_a64_update_pc(dc, 4);
14339             gen_helper_wfe(tcg_env);
14340             break;
14341         case DISAS_YIELD:
14342             gen_a64_update_pc(dc, 4);
14343             gen_helper_yield(tcg_env);
14344             break;
14345         case DISAS_WFI:
14346             /*
14347              * This is a special case because we don't want to just halt
14348              * the CPU if trying to debug across a WFI.
14349              */
14350             gen_a64_update_pc(dc, 4);
14351             gen_helper_wfi(tcg_env, tcg_constant_i32(4));
14352             /*
14353              * The helper doesn't necessarily throw an exception, but we
14354              * must go back to the main loop to check for interrupts anyway.
14355              */
14356             tcg_gen_exit_tb(NULL, 0);
14357             break;
14358         }
14359     }
14360 }
14361
14362 static void aarch64_tr_disas_log(const DisasContextBase *dcbase,
14363                                  CPUState *cpu, FILE *logfile)
14364 {
14365     DisasContext *dc = container_of(dcbase, DisasContext, base);
14366
14367     fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first));
14368     target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size);
14369 }
14370
14371 const TranslatorOps aarch64_translator_ops = {
14372     .init_disas_context = aarch64_tr_init_disas_context,
14373     .tb_start           = aarch64_tr_tb_start,
14374     .insn_start         = aarch64_tr_insn_start,
14375     .translate_insn     = aarch64_tr_translate_insn,
14376     .tb_stop            = aarch64_tr_tb_stop,
14377     .disas_log          = aarch64_tr_disas_log,
14378 };