target/arm/tcg/translate-a64.c

   1 /*
   2  *  AArch64 translation
   3  *
   4  *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19 #include "qemu/osdep.h"
  20
  21 #include "translate.h"
  22 #include "translate-a64.h"
  23 #include "qemu/log.h"
  24 #include "disas/disas.h"
  25 #include "arm_ldst.h"
  26 #include "semihosting/semihost.h"
  27 #include "cpregs.h"
  28
  29 static TCGv_i64 cpu_X[32];
  30 static TCGv_i64 cpu_pc;
  31
  32 /* Load/store exclusive handling */
  33 static TCGv_i64 cpu_exclusive_high;
  34
  35 static const char *regnames[] = {
  36     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
  37     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
  38     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
  39     "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
  40 };
  41
  42 enum a64_shift_type {
  43     A64_SHIFT_TYPE_LSL = 0,
  44     A64_SHIFT_TYPE_LSR = 1,
  45     A64_SHIFT_TYPE_ASR = 2,
  46     A64_SHIFT_TYPE_ROR = 3
  47 };
  48
  49 /*
  50  * Helpers for extracting complex instruction fields
  51  */
  52
  53 /*
  54  * For load/store with an unsigned 12 bit immediate scaled by the element
  55  * size. The input has the immediate field in bits [14:3] and the element
  56  * size in [2:0].
  57  */
  58 static int uimm_scaled(DisasContext *s, int x)
  59 {
  60     unsigned imm = x >> 3;
  61     unsigned scale = extract32(x, 0, 3);
  62     return imm << scale;
  63 }
  64
  65 /* For load/store memory tags: scale offset by LOG2_TAG_GRANULE */
  66 static int scale_by_log2_tag_granule(DisasContext *s, int x)
  67 {
  68     return x << LOG2_TAG_GRANULE;
  69 }
  70
  71 /*
  72  * Include the generated decoders.
  73  */
  74
  75 #include "decode-sme-fa64.c.inc"
  76 #include "decode-a64.c.inc"
  77
  78 /* Table based decoder typedefs - used when the relevant bits for decode
  79  * are too awkwardly scattered across the instruction (eg SIMD).
  80  */
  81 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
  82
  83 typedef struct AArch64DecodeTable {
  84     uint32_t pattern;
  85     uint32_t mask;
  86     AArch64DecodeFn *disas_fn;
  87 } AArch64DecodeTable;
  88
  89 /* initialize TCG globals.  */
  90 void a64_translate_init(void)
  91 {
  92     int i;
  93
  94     cpu_pc = tcg_global_mem_new_i64(cpu_env,
  95                                     offsetof(CPUARMState, pc),
  96                                     "pc");
  97     for (i = 0; i < 32; i++) {
  98         cpu_X[i] = tcg_global_mem_new_i64(cpu_env,
  99                                           offsetof(CPUARMState, xregs[i]),
 100                                           regnames[i]);
 101     }
 102
 103     cpu_exclusive_high = tcg_global_mem_new_i64(cpu_env,
 104         offsetof(CPUARMState, exclusive_high), "exclusive_high");
 105 }
 106
 107 /*
 108  * Return the core mmu_idx to use for A64 "unprivileged load/store" insns
 109  */
 110 static int get_a64_user_mem_index(DisasContext *s)
 111 {
 112     /*
 113      * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL,
 114      * which is the usual mmu_idx for this cpu state.
 115      */
 116     ARMMMUIdx useridx = s->mmu_idx;
 117
 118     if (s->unpriv) {
 119         /*
 120          * We have pre-computed the condition for AccType_UNPRIV.
 121          * Therefore we should never get here with a mmu_idx for
 122          * which we do not know the corresponding user mmu_idx.
 123          */
 124         switch (useridx) {
 125         case ARMMMUIdx_E10_1:
 126         case ARMMMUIdx_E10_1_PAN:
 127             useridx = ARMMMUIdx_E10_0;
 128             break;
 129         case ARMMMUIdx_E20_2:
 130         case ARMMMUIdx_E20_2_PAN:
 131             useridx = ARMMMUIdx_E20_0;
 132             break;
 133         default:
 134             g_assert_not_reached();
 135         }
 136     }
 137     return arm_to_core_mmu_idx(useridx);
 138 }
 139
 140 static void set_btype_raw(int val)
 141 {
 142     tcg_gen_st_i32(tcg_constant_i32(val), cpu_env,
 143                    offsetof(CPUARMState, btype));
 144 }
 145
 146 static void set_btype(DisasContext *s, int val)
 147 {
 148     /* BTYPE is a 2-bit field, and 0 should be done with reset_btype.  */
 149     tcg_debug_assert(val >= 1 && val <= 3);
 150     set_btype_raw(val);
 151     s->btype = -1;
 152 }
 153
 154 static void reset_btype(DisasContext *s)
 155 {
 156     if (s->btype != 0) {
 157         set_btype_raw(0);
 158         s->btype = 0;
 159     }
 160 }
 161
 162 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff)
 163 {
 164     assert(s->pc_save != -1);
 165     if (tb_cflags(s->base.tb) & CF_PCREL) {
 166         tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff);
 167     } else {
 168         tcg_gen_movi_i64(dest, s->pc_curr + diff);
 169     }
 170 }
 171
 172 void gen_a64_update_pc(DisasContext *s, target_long diff)
 173 {
 174     gen_pc_plus_diff(s, cpu_pc, diff);
 175     s->pc_save = s->pc_curr + diff;
 176 }
 177
 178 /*
 179  * Handle Top Byte Ignore (TBI) bits.
 180  *
 181  * If address tagging is enabled via the TCR TBI bits:
 182  *  + for EL2 and EL3 there is only one TBI bit, and if it is set
 183  *    then the address is zero-extended, clearing bits [63:56]
 184  *  + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
 185  *    and TBI1 controls addresses with bit 55 == 1.
 186  *    If the appropriate TBI bit is set for the address then
 187  *    the address is sign-extended from bit 55 into bits [63:56]
 188  *
 189  * Here We have concatenated TBI{1,0} into tbi.
 190  */
 191 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst,
 192                                 TCGv_i64 src, int tbi)
 193 {
 194     if (tbi == 0) {
 195         /* Load unmodified address */
 196         tcg_gen_mov_i64(dst, src);
 197     } else if (!regime_has_2_ranges(s->mmu_idx)) {
 198         /* Force tag byte to all zero */
 199         tcg_gen_extract_i64(dst, src, 0, 56);
 200     } else {
 201         /* Sign-extend from bit 55.  */
 202         tcg_gen_sextract_i64(dst, src, 0, 56);
 203
 204         switch (tbi) {
 205         case 1:
 206             /* tbi0 but !tbi1: only use the extension if positive */
 207             tcg_gen_and_i64(dst, dst, src);
 208             break;
 209         case 2:
 210             /* !tbi0 but tbi1: only use the extension if negative */
 211             tcg_gen_or_i64(dst, dst, src);
 212             break;
 213         case 3:
 214             /* tbi0 and tbi1: always use the extension */
 215             break;
 216         default:
 217             g_assert_not_reached();
 218         }
 219     }
 220 }
 221
 222 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
 223 {
 224     /*
 225      * If address tagging is enabled for instructions via the TCR TBI bits,
 226      * then loading an address into the PC will clear out any tag.
 227      */
 228     gen_top_byte_ignore(s, cpu_pc, src, s->tbii);
 229     s->pc_save = -1;
 230 }
 231
 232 /*
 233  * Handle MTE and/or TBI.
 234  *
 235  * For TBI, ideally, we would do nothing.  Proper behaviour on fault is
 236  * for the tag to be present in the FAR_ELx register.  But for user-only
 237  * mode we do not have a TLB with which to implement this, so we must
 238  * remove the top byte now.
 239  *
 240  * Always return a fresh temporary that we can increment independently
 241  * of the write-back address.
 242  */
 243
 244 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
 245 {
 246     TCGv_i64 clean = tcg_temp_new_i64();
 247 #ifdef CONFIG_USER_ONLY
 248     gen_top_byte_ignore(s, clean, addr, s->tbid);
 249 #else
 250     tcg_gen_mov_i64(clean, addr);
 251 #endif
 252     return clean;
 253 }
 254
 255 /* Insert a zero tag into src, with the result at dst. */
 256 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src)
 257 {
 258     tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4));
 259 }
 260
 261 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr,
 262                              MMUAccessType acc, int log2_size)
 263 {
 264     gen_helper_probe_access(cpu_env, ptr,
 265                             tcg_constant_i32(acc),
 266                             tcg_constant_i32(get_mem_index(s)),
 267                             tcg_constant_i32(1 << log2_size));
 268 }
 269
 270 /*
 271  * For MTE, check a single logical or atomic access.  This probes a single
 272  * address, the exact one specified.  The size and alignment of the access
 273  * is not relevant to MTE, per se, but watchpoints do require the size,
 274  * and we want to recognize those before making any other changes to state.
 275  */
 276 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr,
 277                                       bool is_write, bool tag_checked,
 278                                       MemOp memop, bool is_unpriv,
 279                                       int core_idx)
 280 {
 281     if (tag_checked && s->mte_active[is_unpriv]) {
 282         TCGv_i64 ret;
 283         int desc = 0;
 284
 285         desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx);
 286         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
 287         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
 288         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
 289         desc = FIELD_DP32(desc, MTEDESC, ALIGN, get_alignment_bits(memop));
 290         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, memop_size(memop) - 1);
 291
 292         ret = tcg_temp_new_i64();
 293         gen_helper_mte_check(ret, cpu_env, tcg_constant_i32(desc), addr);
 294
 295         return ret;
 296     }
 297     return clean_data_tbi(s, addr);
 298 }
 299
 300 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
 301                         bool tag_checked, MemOp memop)
 302 {
 303     return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, memop,
 304                                  false, get_mem_index(s));
 305 }
 306
 307 /*
 308  * For MTE, check multiple logical sequential accesses.
 309  */
 310 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
 311                         bool tag_checked, int total_size, MemOp single_mop)
 312 {
 313     if (tag_checked && s->mte_active[0]) {
 314         TCGv_i64 ret;
 315         int desc = 0;
 316
 317         desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
 318         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
 319         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
 320         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
 321         desc = FIELD_DP32(desc, MTEDESC, ALIGN, get_alignment_bits(single_mop));
 322         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, total_size - 1);
 323
 324         ret = tcg_temp_new_i64();
 325         gen_helper_mte_check(ret, cpu_env, tcg_constant_i32(desc), addr);
 326
 327         return ret;
 328     }
 329     return clean_data_tbi(s, addr);
 330 }
 331
 332 /*
 333  * Generate the special alignment check that applies to AccType_ATOMIC
 334  * and AccType_ORDERED insns under FEAT_LSE2: the access need not be
 335  * naturally aligned, but it must not cross a 16-byte boundary.
 336  * See AArch64.CheckAlignment().
 337  */
 338 static void check_lse2_align(DisasContext *s, int rn, int imm,
 339                              bool is_write, MemOp mop)
 340 {
 341     TCGv_i32 tmp;
 342     TCGv_i64 addr;
 343     TCGLabel *over_label;
 344     MMUAccessType type;
 345     int mmu_idx;
 346
 347     tmp = tcg_temp_new_i32();
 348     tcg_gen_extrl_i64_i32(tmp, cpu_reg_sp(s, rn));
 349     tcg_gen_addi_i32(tmp, tmp, imm & 15);
 350     tcg_gen_andi_i32(tmp, tmp, 15);
 351     tcg_gen_addi_i32(tmp, tmp, memop_size(mop));
 352
 353     over_label = gen_new_label();
 354     tcg_gen_brcondi_i32(TCG_COND_LEU, tmp, 16, over_label);
 355
 356     addr = tcg_temp_new_i64();
 357     tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm);
 358
 359     type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD,
 360     mmu_idx = get_mem_index(s);
 361     gen_helper_unaligned_access(cpu_env, addr, tcg_constant_i32(type),
 362                                 tcg_constant_i32(mmu_idx));
 363
 364     gen_set_label(over_label);
 365
 366 }
 367
 368 /* Handle the alignment check for AccType_ATOMIC instructions. */
 369 static MemOp check_atomic_align(DisasContext *s, int rn, MemOp mop)
 370 {
 371     MemOp size = mop & MO_SIZE;
 372
 373     if (size == MO_8) {
 374         return mop;
 375     }
 376
 377     /*
 378      * If size == MO_128, this is a LDXP, and the operation is single-copy
 379      * atomic for each doubleword, not the entire quadword; it still must
 380      * be quadword aligned.
 381      */
 382     if (size == MO_128) {
 383         return finalize_memop_atom(s, MO_128 | MO_ALIGN,
 384                                    MO_ATOM_IFALIGN_PAIR);
 385     }
 386     if (dc_isar_feature(aa64_lse2, s)) {
 387         check_lse2_align(s, rn, 0, true, mop);
 388     } else {
 389         mop |= MO_ALIGN;
 390     }
 391     return finalize_memop(s, mop);
 392 }
 393
 394 /* Handle the alignment check for AccType_ORDERED instructions. */
 395 static MemOp check_ordered_align(DisasContext *s, int rn, int imm,
 396                                  bool is_write, MemOp mop)
 397 {
 398     MemOp size = mop & MO_SIZE;
 399
 400     if (size == MO_8) {
 401         return mop;
 402     }
 403     if (size == MO_128) {
 404         return finalize_memop_atom(s, MO_128 | MO_ALIGN,
 405                                    MO_ATOM_IFALIGN_PAIR);
 406     }
 407     if (!dc_isar_feature(aa64_lse2, s)) {
 408         mop |= MO_ALIGN;
 409     } else if (!s->naa) {
 410         check_lse2_align(s, rn, imm, is_write, mop);
 411     }
 412     return finalize_memop(s, mop);
 413 }
 414
 415 typedef struct DisasCompare64 {
 416     TCGCond cond;
 417     TCGv_i64 value;
 418 } DisasCompare64;
 419
 420 static void a64_test_cc(DisasCompare64 *c64, int cc)
 421 {
 422     DisasCompare c32;
 423
 424     arm_test_cc(&c32, cc);
 425
 426     /*
 427      * Sign-extend the 32-bit value so that the GE/LT comparisons work
 428      * properly.  The NE/EQ comparisons are also fine with this choice.
 429       */
 430     c64->cond = c32.cond;
 431     c64->value = tcg_temp_new_i64();
 432     tcg_gen_ext_i32_i64(c64->value, c32.value);
 433 }
 434
 435 static void gen_rebuild_hflags(DisasContext *s)
 436 {
 437     gen_helper_rebuild_hflags_a64(cpu_env, tcg_constant_i32(s->current_el));
 438 }
 439
 440 static void gen_exception_internal(int excp)
 441 {
 442     assert(excp_is_internal(excp));
 443     gen_helper_exception_internal(cpu_env, tcg_constant_i32(excp));
 444 }
 445
 446 static void gen_exception_internal_insn(DisasContext *s, int excp)
 447 {
 448     gen_a64_update_pc(s, 0);
 449     gen_exception_internal(excp);
 450     s->base.is_jmp = DISAS_NORETURN;
 451 }
 452
 453 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome)
 454 {
 455     gen_a64_update_pc(s, 0);
 456     gen_helper_exception_bkpt_insn(cpu_env, tcg_constant_i32(syndrome));
 457     s->base.is_jmp = DISAS_NORETURN;
 458 }
 459
 460 static void gen_step_complete_exception(DisasContext *s)
 461 {
 462     /* We just completed step of an insn. Move from Active-not-pending
 463      * to Active-pending, and then also take the swstep exception.
 464      * This corresponds to making the (IMPDEF) choice to prioritize
 465      * swstep exceptions over asynchronous exceptions taken to an exception
 466      * level where debug is disabled. This choice has the advantage that
 467      * we do not need to maintain internal state corresponding to the
 468      * ISV/EX syndrome bits between completion of the step and generation
 469      * of the exception, and our syndrome information is always correct.
 470      */
 471     gen_ss_advance(s);
 472     gen_swstep_exception(s, 1, s->is_ldex);
 473     s->base.is_jmp = DISAS_NORETURN;
 474 }
 475
 476 static inline bool use_goto_tb(DisasContext *s, uint64_t dest)
 477 {
 478     if (s->ss_active) {
 479         return false;
 480     }
 481     return translator_use_goto_tb(&s->base, dest);
 482 }
 483
 484 static void gen_goto_tb(DisasContext *s, int n, int64_t diff)
 485 {
 486     if (use_goto_tb(s, s->pc_curr + diff)) {
 487         /*
 488          * For pcrel, the pc must always be up-to-date on entry to
 489          * the linked TB, so that it can use simple additions for all
 490          * further adjustments.  For !pcrel, the linked TB is compiled
 491          * to know its full virtual address, so we can delay the
 492          * update to pc to the unlinked path.  A long chain of links
 493          * can thus avoid many updates to the PC.
 494          */
 495         if (tb_cflags(s->base.tb) & CF_PCREL) {
 496             gen_a64_update_pc(s, diff);
 497             tcg_gen_goto_tb(n);
 498         } else {
 499             tcg_gen_goto_tb(n);
 500             gen_a64_update_pc(s, diff);
 501         }
 502         tcg_gen_exit_tb(s->base.tb, n);
 503         s->base.is_jmp = DISAS_NORETURN;
 504     } else {
 505         gen_a64_update_pc(s, diff);
 506         if (s->ss_active) {
 507             gen_step_complete_exception(s);
 508         } else {
 509             tcg_gen_lookup_and_goto_ptr();
 510             s->base.is_jmp = DISAS_NORETURN;
 511         }
 512     }
 513 }
 514
 515 /*
 516  * Register access functions
 517  *
 518  * These functions are used for directly accessing a register in where
 519  * changes to the final register value are likely to be made. If you
 520  * need to use a register for temporary calculation (e.g. index type
 521  * operations) use the read_* form.
 522  *
 523  * B1.2.1 Register mappings
 524  *
 525  * In instruction register encoding 31 can refer to ZR (zero register) or
 526  * the SP (stack pointer) depending on context. In QEMU's case we map SP
 527  * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
 528  * This is the point of the _sp forms.
 529  */
 530 TCGv_i64 cpu_reg(DisasContext *s, int reg)
 531 {
 532     if (reg == 31) {
 533         TCGv_i64 t = tcg_temp_new_i64();
 534         tcg_gen_movi_i64(t, 0);
 535         return t;
 536     } else {
 537         return cpu_X[reg];
 538     }
 539 }
 540
 541 /* register access for when 31 == SP */
 542 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
 543 {
 544     return cpu_X[reg];
 545 }
 546
 547 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
 548  * representing the register contents. This TCGv is an auto-freed
 549  * temporary so it need not be explicitly freed, and may be modified.
 550  */
 551 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
 552 {
 553     TCGv_i64 v = tcg_temp_new_i64();
 554     if (reg != 31) {
 555         if (sf) {
 556             tcg_gen_mov_i64(v, cpu_X[reg]);
 557         } else {
 558             tcg_gen_ext32u_i64(v, cpu_X[reg]);
 559         }
 560     } else {
 561         tcg_gen_movi_i64(v, 0);
 562     }
 563     return v;
 564 }
 565
 566 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
 567 {
 568     TCGv_i64 v = tcg_temp_new_i64();
 569     if (sf) {
 570         tcg_gen_mov_i64(v, cpu_X[reg]);
 571     } else {
 572         tcg_gen_ext32u_i64(v, cpu_X[reg]);
 573     }
 574     return v;
 575 }
 576
 577 /* Return the offset into CPUARMState of a slice (from
 578  * the least significant end) of FP register Qn (ie
 579  * Dn, Sn, Hn or Bn).
 580  * (Note that this is not the same mapping as for A32; see cpu.h)
 581  */
 582 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size)
 583 {
 584     return vec_reg_offset(s, regno, 0, size);
 585 }
 586
 587 /* Offset of the high half of the 128 bit vector Qn */
 588 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
 589 {
 590     return vec_reg_offset(s, regno, 1, MO_64);
 591 }
 592
 593 /* Convenience accessors for reading and writing single and double
 594  * FP registers. Writing clears the upper parts of the associated
 595  * 128 bit vector register, as required by the architecture.
 596  * Note that unlike the GP register accessors, the values returned
 597  * by the read functions must be manually freed.
 598  */
 599 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
 600 {
 601     TCGv_i64 v = tcg_temp_new_i64();
 602
 603     tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 604     return v;
 605 }
 606
 607 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
 608 {
 609     TCGv_i32 v = tcg_temp_new_i32();
 610
 611     tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
 612     return v;
 613 }
 614
 615 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
 616 {
 617     TCGv_i32 v = tcg_temp_new_i32();
 618
 619     tcg_gen_ld16u_i32(v, cpu_env, fp_reg_offset(s, reg, MO_16));
 620     return v;
 621 }
 622
 623 /* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
 624  * If SVE is not enabled, then there are only 128 bits in the vector.
 625  */
 626 static void clear_vec_high(DisasContext *s, bool is_q, int rd)
 627 {
 628     unsigned ofs = fp_reg_offset(s, rd, MO_64);
 629     unsigned vsz = vec_full_reg_size(s);
 630
 631     /* Nop move, with side effect of clearing the tail. */
 632     tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz);
 633 }
 634
 635 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
 636 {
 637     unsigned ofs = fp_reg_offset(s, reg, MO_64);
 638
 639     tcg_gen_st_i64(v, cpu_env, ofs);
 640     clear_vec_high(s, false, reg);
 641 }
 642
 643 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
 644 {
 645     TCGv_i64 tmp = tcg_temp_new_i64();
 646
 647     tcg_gen_extu_i32_i64(tmp, v);
 648     write_fp_dreg(s, reg, tmp);
 649 }
 650
 651 /* Expand a 2-operand AdvSIMD vector operation using an expander function.  */
 652 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
 653                          GVecGen2Fn *gvec_fn, int vece)
 654 {
 655     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 656             is_q ? 16 : 8, vec_full_reg_size(s));
 657 }
 658
 659 /* Expand a 2-operand + immediate AdvSIMD vector operation using
 660  * an expander function.
 661  */
 662 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn,
 663                           int64_t imm, GVecGen2iFn *gvec_fn, int vece)
 664 {
 665     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 666             imm, is_q ? 16 : 8, vec_full_reg_size(s));
 667 }
 668
 669 /* Expand a 3-operand AdvSIMD vector operation using an expander function.  */
 670 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
 671                          GVecGen3Fn *gvec_fn, int vece)
 672 {
 673     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 674             vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
 675 }
 676
 677 /* Expand a 4-operand AdvSIMD vector operation using an expander function.  */
 678 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
 679                          int rx, GVecGen4Fn *gvec_fn, int vece)
 680 {
 681     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 682             vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx),
 683             is_q ? 16 : 8, vec_full_reg_size(s));
 684 }
 685
 686 /* Expand a 2-operand operation using an out-of-line helper.  */
 687 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd,
 688                              int rn, int data, gen_helper_gvec_2 *fn)
 689 {
 690     tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
 691                        vec_full_reg_offset(s, rn),
 692                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
 693 }
 694
 695 /* Expand a 3-operand operation using an out-of-line helper.  */
 696 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
 697                              int rn, int rm, int data, gen_helper_gvec_3 *fn)
 698 {
 699     tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
 700                        vec_full_reg_offset(s, rn),
 701                        vec_full_reg_offset(s, rm),
 702                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
 703 }
 704
 705 /* Expand a 3-operand + fpstatus pointer + simd data value operation using
 706  * an out-of-line helper.
 707  */
 708 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
 709                               int rm, bool is_fp16, int data,
 710                               gen_helper_gvec_3_ptr *fn)
 711 {
 712     TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
 713     tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
 714                        vec_full_reg_offset(s, rn),
 715                        vec_full_reg_offset(s, rm), fpst,
 716                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
 717 }
 718
 719 /* Expand a 3-operand + qc + operation using an out-of-line helper.  */
 720 static void gen_gvec_op3_qc(DisasContext *s, bool is_q, int rd, int rn,
 721                             int rm, gen_helper_gvec_3_ptr *fn)
 722 {
 723     TCGv_ptr qc_ptr = tcg_temp_new_ptr();
 724
 725     tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
 726     tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
 727                        vec_full_reg_offset(s, rn),
 728                        vec_full_reg_offset(s, rm), qc_ptr,
 729                        is_q ? 16 : 8, vec_full_reg_size(s), 0, fn);
 730 }
 731
 732 /* Expand a 4-operand operation using an out-of-line helper.  */
 733 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn,
 734                              int rm, int ra, int data, gen_helper_gvec_4 *fn)
 735 {
 736     tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
 737                        vec_full_reg_offset(s, rn),
 738                        vec_full_reg_offset(s, rm),
 739                        vec_full_reg_offset(s, ra),
 740                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
 741 }
 742
 743 /*
 744  * Expand a 4-operand + fpstatus pointer + simd data value operation using
 745  * an out-of-line helper.
 746  */
 747 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn,
 748                               int rm, int ra, bool is_fp16, int data,
 749                               gen_helper_gvec_4_ptr *fn)
 750 {
 751     TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
 752     tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
 753                        vec_full_reg_offset(s, rn),
 754                        vec_full_reg_offset(s, rm),
 755                        vec_full_reg_offset(s, ra), fpst,
 756                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
 757 }
 758
 759 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
 760  * than the 32 bit equivalent.
 761  */
 762 static inline void gen_set_NZ64(TCGv_i64 result)
 763 {
 764     tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
 765     tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
 766 }
 767
 768 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
 769 static inline void gen_logic_CC(int sf, TCGv_i64 result)
 770 {
 771     if (sf) {
 772         gen_set_NZ64(result);
 773     } else {
 774         tcg_gen_extrl_i64_i32(cpu_ZF, result);
 775         tcg_gen_mov_i32(cpu_NF, cpu_ZF);
 776     }
 777     tcg_gen_movi_i32(cpu_CF, 0);
 778     tcg_gen_movi_i32(cpu_VF, 0);
 779 }
 780
 781 /* dest = T0 + T1; compute C, N, V and Z flags */
 782 static void gen_add64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 783 {
 784     TCGv_i64 result, flag, tmp;
 785     result = tcg_temp_new_i64();
 786     flag = tcg_temp_new_i64();
 787     tmp = tcg_temp_new_i64();
 788
 789     tcg_gen_movi_i64(tmp, 0);
 790     tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
 791
 792     tcg_gen_extrl_i64_i32(cpu_CF, flag);
 793
 794     gen_set_NZ64(result);
 795
 796     tcg_gen_xor_i64(flag, result, t0);
 797     tcg_gen_xor_i64(tmp, t0, t1);
 798     tcg_gen_andc_i64(flag, flag, tmp);
 799     tcg_gen_extrh_i64_i32(cpu_VF, flag);
 800
 801     tcg_gen_mov_i64(dest, result);
 802 }
 803
 804 static void gen_add32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 805 {
 806     TCGv_i32 t0_32 = tcg_temp_new_i32();
 807     TCGv_i32 t1_32 = tcg_temp_new_i32();
 808     TCGv_i32 tmp = tcg_temp_new_i32();
 809
 810     tcg_gen_movi_i32(tmp, 0);
 811     tcg_gen_extrl_i64_i32(t0_32, t0);
 812     tcg_gen_extrl_i64_i32(t1_32, t1);
 813     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
 814     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 815     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 816     tcg_gen_xor_i32(tmp, t0_32, t1_32);
 817     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 818     tcg_gen_extu_i32_i64(dest, cpu_NF);
 819 }
 820
 821 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 822 {
 823     if (sf) {
 824         gen_add64_CC(dest, t0, t1);
 825     } else {
 826         gen_add32_CC(dest, t0, t1);
 827     }
 828 }
 829
 830 /* dest = T0 - T1; compute C, N, V and Z flags */
 831 static void gen_sub64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 832 {
 833     /* 64 bit arithmetic */
 834     TCGv_i64 result, flag, tmp;
 835
 836     result = tcg_temp_new_i64();
 837     flag = tcg_temp_new_i64();
 838     tcg_gen_sub_i64(result, t0, t1);
 839
 840     gen_set_NZ64(result);
 841
 842     tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
 843     tcg_gen_extrl_i64_i32(cpu_CF, flag);
 844
 845     tcg_gen_xor_i64(flag, result, t0);
 846     tmp = tcg_temp_new_i64();
 847     tcg_gen_xor_i64(tmp, t0, t1);
 848     tcg_gen_and_i64(flag, flag, tmp);
 849     tcg_gen_extrh_i64_i32(cpu_VF, flag);
 850     tcg_gen_mov_i64(dest, result);
 851 }
 852
 853 static void gen_sub32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 854 {
 855     /* 32 bit arithmetic */
 856     TCGv_i32 t0_32 = tcg_temp_new_i32();
 857     TCGv_i32 t1_32 = tcg_temp_new_i32();
 858     TCGv_i32 tmp;
 859
 860     tcg_gen_extrl_i64_i32(t0_32, t0);
 861     tcg_gen_extrl_i64_i32(t1_32, t1);
 862     tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
 863     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 864     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
 865     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 866     tmp = tcg_temp_new_i32();
 867     tcg_gen_xor_i32(tmp, t0_32, t1_32);
 868     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 869     tcg_gen_extu_i32_i64(dest, cpu_NF);
 870 }
 871
 872 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 873 {
 874     if (sf) {
 875         gen_sub64_CC(dest, t0, t1);
 876     } else {
 877         gen_sub32_CC(dest, t0, t1);
 878     }
 879 }
 880
 881 /* dest = T0 + T1 + CF; do not compute flags. */
 882 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 883 {
 884     TCGv_i64 flag = tcg_temp_new_i64();
 885     tcg_gen_extu_i32_i64(flag, cpu_CF);
 886     tcg_gen_add_i64(dest, t0, t1);
 887     tcg_gen_add_i64(dest, dest, flag);
 888
 889     if (!sf) {
 890         tcg_gen_ext32u_i64(dest, dest);
 891     }
 892 }
 893
 894 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
 895 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 896 {
 897     if (sf) {
 898         TCGv_i64 result = tcg_temp_new_i64();
 899         TCGv_i64 cf_64 = tcg_temp_new_i64();
 900         TCGv_i64 vf_64 = tcg_temp_new_i64();
 901         TCGv_i64 tmp = tcg_temp_new_i64();
 902         TCGv_i64 zero = tcg_constant_i64(0);
 903
 904         tcg_gen_extu_i32_i64(cf_64, cpu_CF);
 905         tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero);
 906         tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero);
 907         tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
 908         gen_set_NZ64(result);
 909
 910         tcg_gen_xor_i64(vf_64, result, t0);
 911         tcg_gen_xor_i64(tmp, t0, t1);
 912         tcg_gen_andc_i64(vf_64, vf_64, tmp);
 913         tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
 914
 915         tcg_gen_mov_i64(dest, result);
 916     } else {
 917         TCGv_i32 t0_32 = tcg_temp_new_i32();
 918         TCGv_i32 t1_32 = tcg_temp_new_i32();
 919         TCGv_i32 tmp = tcg_temp_new_i32();
 920         TCGv_i32 zero = tcg_constant_i32(0);
 921
 922         tcg_gen_extrl_i64_i32(t0_32, t0);
 923         tcg_gen_extrl_i64_i32(t1_32, t1);
 924         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero);
 925         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero);
 926
 927         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 928         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 929         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 930         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 931         tcg_gen_extu_i32_i64(dest, cpu_NF);
 932     }
 933 }
 934
 935 /*
 936  * Load/Store generators
 937  */
 938
 939 /*
 940  * Store from GPR register to memory.
 941  */
 942 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
 943                              TCGv_i64 tcg_addr, MemOp memop, int memidx,
 944                              bool iss_valid,
 945                              unsigned int iss_srt,
 946                              bool iss_sf, bool iss_ar)
 947 {
 948     tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop);
 949
 950     if (iss_valid) {
 951         uint32_t syn;
 952
 953         syn = syn_data_abort_with_iss(0,
 954                                       (memop & MO_SIZE),
 955                                       false,
 956                                       iss_srt,
 957                                       iss_sf,
 958                                       iss_ar,
 959                                       0, 0, 0, 0, 0, false);
 960         disas_set_insn_syndrome(s, syn);
 961     }
 962 }
 963
 964 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
 965                       TCGv_i64 tcg_addr, MemOp memop,
 966                       bool iss_valid,
 967                       unsigned int iss_srt,
 968                       bool iss_sf, bool iss_ar)
 969 {
 970     do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s),
 971                      iss_valid, iss_srt, iss_sf, iss_ar);
 972 }
 973
 974 /*
 975  * Load from memory to GPR register
 976  */
 977 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
 978                              MemOp memop, bool extend, int memidx,
 979                              bool iss_valid, unsigned int iss_srt,
 980                              bool iss_sf, bool iss_ar)
 981 {
 982     tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
 983
 984     if (extend && (memop & MO_SIGN)) {
 985         g_assert((memop & MO_SIZE) <= MO_32);
 986         tcg_gen_ext32u_i64(dest, dest);
 987     }
 988
 989     if (iss_valid) {
 990         uint32_t syn;
 991
 992         syn = syn_data_abort_with_iss(0,
 993                                       (memop & MO_SIZE),
 994                                       (memop & MO_SIGN) != 0,
 995                                       iss_srt,
 996                                       iss_sf,
 997                                       iss_ar,
 998                                       0, 0, 0, 0, 0, false);
 999         disas_set_insn_syndrome(s, syn);
1000     }
1001 }
1002
1003 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
1004                       MemOp memop, bool extend,
1005                       bool iss_valid, unsigned int iss_srt,
1006                       bool iss_sf, bool iss_ar)
1007 {
1008     do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s),
1009                      iss_valid, iss_srt, iss_sf, iss_ar);
1010 }
1011
1012 /*
1013  * Store from FP register to memory
1014  */
1015 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, MemOp mop)
1016 {
1017     /* This writes the bottom N bits of a 128 bit wide vector to memory */
1018     TCGv_i64 tmplo = tcg_temp_new_i64();
1019
1020     tcg_gen_ld_i64(tmplo, cpu_env, fp_reg_offset(s, srcidx, MO_64));
1021
1022     if ((mop & MO_SIZE) < MO_128) {
1023         tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop);
1024     } else {
1025         TCGv_i64 tmphi = tcg_temp_new_i64();
1026         TCGv_i128 t16 = tcg_temp_new_i128();
1027
1028         tcg_gen_ld_i64(tmphi, cpu_env, fp_reg_hi_offset(s, srcidx));
1029         tcg_gen_concat_i64_i128(t16, tmplo, tmphi);
1030
1031         tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop);
1032     }
1033 }
1034
1035 /*
1036  * Load from memory to FP register
1037  */
1038 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, MemOp mop)
1039 {
1040     /* This always zero-extends and writes to a full 128 bit wide vector */
1041     TCGv_i64 tmplo = tcg_temp_new_i64();
1042     TCGv_i64 tmphi = NULL;
1043
1044     if ((mop & MO_SIZE) < MO_128) {
1045         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop);
1046     } else {
1047         TCGv_i128 t16 = tcg_temp_new_i128();
1048
1049         tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop);
1050
1051         tmphi = tcg_temp_new_i64();
1052         tcg_gen_extr_i128_i64(tmplo, tmphi, t16);
1053     }
1054
1055     tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
1056
1057     if (tmphi) {
1058         tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
1059     }
1060     clear_vec_high(s, tmphi != NULL, destidx);
1061 }
1062
1063 /*
1064  * Vector load/store helpers.
1065  *
1066  * The principal difference between this and a FP load is that we don't
1067  * zero extend as we are filling a partial chunk of the vector register.
1068  * These functions don't support 128 bit loads/stores, which would be
1069  * normal load/store operations.
1070  *
1071  * The _i32 versions are useful when operating on 32 bit quantities
1072  * (eg for floating point single or using Neon helper functions).
1073  */
1074
1075 /* Get value of an element within a vector register */
1076 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
1077                              int element, MemOp memop)
1078 {
1079     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1080     switch ((unsigned)memop) {
1081     case MO_8:
1082         tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
1083         break;
1084     case MO_16:
1085         tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
1086         break;
1087     case MO_32:
1088         tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
1089         break;
1090     case MO_8|MO_SIGN:
1091         tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
1092         break;
1093     case MO_16|MO_SIGN:
1094         tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
1095         break;
1096     case MO_32|MO_SIGN:
1097         tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
1098         break;
1099     case MO_64:
1100     case MO_64|MO_SIGN:
1101         tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
1102         break;
1103     default:
1104         g_assert_not_reached();
1105     }
1106 }
1107
1108 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
1109                                  int element, MemOp memop)
1110 {
1111     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1112     switch (memop) {
1113     case MO_8:
1114         tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
1115         break;
1116     case MO_16:
1117         tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
1118         break;
1119     case MO_8|MO_SIGN:
1120         tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
1121         break;
1122     case MO_16|MO_SIGN:
1123         tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
1124         break;
1125     case MO_32:
1126     case MO_32|MO_SIGN:
1127         tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
1128         break;
1129     default:
1130         g_assert_not_reached();
1131     }
1132 }
1133
1134 /* Set value of an element within a vector register */
1135 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1136                               int element, MemOp memop)
1137 {
1138     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1139     switch (memop) {
1140     case MO_8:
1141         tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
1142         break;
1143     case MO_16:
1144         tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
1145         break;
1146     case MO_32:
1147         tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
1148         break;
1149     case MO_64:
1150         tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
1151         break;
1152     default:
1153         g_assert_not_reached();
1154     }
1155 }
1156
1157 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1158                                   int destidx, int element, MemOp memop)
1159 {
1160     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1161     switch (memop) {
1162     case MO_8:
1163         tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
1164         break;
1165     case MO_16:
1166         tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
1167         break;
1168     case MO_32:
1169         tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
1170         break;
1171     default:
1172         g_assert_not_reached();
1173     }
1174 }
1175
1176 /* Store from vector register to memory */
1177 static void do_vec_st(DisasContext *s, int srcidx, int element,
1178                       TCGv_i64 tcg_addr, MemOp mop)
1179 {
1180     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1181
1182     read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE);
1183     tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1184 }
1185
1186 /* Load from memory to vector register */
1187 static void do_vec_ld(DisasContext *s, int destidx, int element,
1188                       TCGv_i64 tcg_addr, MemOp mop)
1189 {
1190     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1191
1192     tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1193     write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE);
1194 }
1195
1196 /* Check that FP/Neon access is enabled. If it is, return
1197  * true. If not, emit code to generate an appropriate exception,
1198  * and return false; the caller should not emit any code for
1199  * the instruction. Note that this check must happen after all
1200  * unallocated-encoding checks (otherwise the syndrome information
1201  * for the resulting exception will be incorrect).
1202  */
1203 static bool fp_access_check_only(DisasContext *s)
1204 {
1205     if (s->fp_excp_el) {
1206         assert(!s->fp_access_checked);
1207         s->fp_access_checked = true;
1208
1209         gen_exception_insn_el(s, 0, EXCP_UDEF,
1210                               syn_fp_access_trap(1, 0xe, false, 0),
1211                               s->fp_excp_el);
1212         return false;
1213     }
1214     s->fp_access_checked = true;
1215     return true;
1216 }
1217
1218 static bool fp_access_check(DisasContext *s)
1219 {
1220     if (!fp_access_check_only(s)) {
1221         return false;
1222     }
1223     if (s->sme_trap_nonstreaming && s->is_nonstreaming) {
1224         gen_exception_insn(s, 0, EXCP_UDEF,
1225                            syn_smetrap(SME_ET_Streaming, false));
1226         return false;
1227     }
1228     return true;
1229 }
1230
1231 /*
1232  * Check that SVE access is enabled.  If it is, return true.
1233  * If not, emit code to generate an appropriate exception and return false.
1234  * This function corresponds to CheckSVEEnabled().
1235  */
1236 bool sve_access_check(DisasContext *s)
1237 {
1238     if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) {
1239         assert(dc_isar_feature(aa64_sme, s));
1240         if (!sme_sm_enabled_check(s)) {
1241             goto fail_exit;
1242         }
1243     } else if (s->sve_excp_el) {
1244         gen_exception_insn_el(s, 0, EXCP_UDEF,
1245                               syn_sve_access_trap(), s->sve_excp_el);
1246         goto fail_exit;
1247     }
1248     s->sve_access_checked = true;
1249     return fp_access_check(s);
1250
1251  fail_exit:
1252     /* Assert that we only raise one exception per instruction. */
1253     assert(!s->sve_access_checked);
1254     s->sve_access_checked = true;
1255     return false;
1256 }
1257
1258 /*
1259  * Check that SME access is enabled, raise an exception if not.
1260  * Note that this function corresponds to CheckSMEAccess and is
1261  * only used directly for cpregs.
1262  */
1263 static bool sme_access_check(DisasContext *s)
1264 {
1265     if (s->sme_excp_el) {
1266         gen_exception_insn_el(s, 0, EXCP_UDEF,
1267                               syn_smetrap(SME_ET_AccessTrap, false),
1268                               s->sme_excp_el);
1269         return false;
1270     }
1271     return true;
1272 }
1273
1274 /* This function corresponds to CheckSMEEnabled. */
1275 bool sme_enabled_check(DisasContext *s)
1276 {
1277     /*
1278      * Note that unlike sve_excp_el, we have not constrained sme_excp_el
1279      * to be zero when fp_excp_el has priority.  This is because we need
1280      * sme_excp_el by itself for cpregs access checks.
1281      */
1282     if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) {
1283         s->fp_access_checked = true;
1284         return sme_access_check(s);
1285     }
1286     return fp_access_check_only(s);
1287 }
1288
1289 /* Common subroutine for CheckSMEAnd*Enabled. */
1290 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req)
1291 {
1292     if (!sme_enabled_check(s)) {
1293         return false;
1294     }
1295     if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) {
1296         gen_exception_insn(s, 0, EXCP_UDEF,
1297                            syn_smetrap(SME_ET_NotStreaming, false));
1298         return false;
1299     }
1300     if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) {
1301         gen_exception_insn(s, 0, EXCP_UDEF,
1302                            syn_smetrap(SME_ET_InactiveZA, false));
1303         return false;
1304     }
1305     return true;
1306 }
1307
1308 /*
1309  * This utility function is for doing register extension with an
1310  * optional shift. You will likely want to pass a temporary for the
1311  * destination register. See DecodeRegExtend() in the ARM ARM.
1312  */
1313 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1314                               int option, unsigned int shift)
1315 {
1316     int extsize = extract32(option, 0, 2);
1317     bool is_signed = extract32(option, 2, 1);
1318
1319     if (is_signed) {
1320         switch (extsize) {
1321         case 0:
1322             tcg_gen_ext8s_i64(tcg_out, tcg_in);
1323             break;
1324         case 1:
1325             tcg_gen_ext16s_i64(tcg_out, tcg_in);
1326             break;
1327         case 2:
1328             tcg_gen_ext32s_i64(tcg_out, tcg_in);
1329             break;
1330         case 3:
1331             tcg_gen_mov_i64(tcg_out, tcg_in);
1332             break;
1333         }
1334     } else {
1335         switch (extsize) {
1336         case 0:
1337             tcg_gen_ext8u_i64(tcg_out, tcg_in);
1338             break;
1339         case 1:
1340             tcg_gen_ext16u_i64(tcg_out, tcg_in);
1341             break;
1342         case 2:
1343             tcg_gen_ext32u_i64(tcg_out, tcg_in);
1344             break;
1345         case 3:
1346             tcg_gen_mov_i64(tcg_out, tcg_in);
1347             break;
1348         }
1349     }
1350
1351     if (shift) {
1352         tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1353     }
1354 }
1355
1356 static inline void gen_check_sp_alignment(DisasContext *s)
1357 {
1358     /* The AArch64 architecture mandates that (if enabled via PSTATE
1359      * or SCTLR bits) there is a check that SP is 16-aligned on every
1360      * SP-relative load or store (with an exception generated if it is not).
1361      * In line with general QEMU practice regarding misaligned accesses,
1362      * we omit these checks for the sake of guest program performance.
1363      * This function is provided as a hook so we can more easily add these
1364      * checks in future (possibly as a "favour catching guest program bugs
1365      * over speed" user selectable option).
1366      */
1367 }
1368
1369 /*
1370  * This provides a simple table based table lookup decoder. It is
1371  * intended to be used when the relevant bits for decode are too
1372  * awkwardly placed and switch/if based logic would be confusing and
1373  * deeply nested. Since it's a linear search through the table, tables
1374  * should be kept small.
1375  *
1376  * It returns the first handler where insn & mask == pattern, or
1377  * NULL if there is no match.
1378  * The table is terminated by an empty mask (i.e. 0)
1379  */
1380 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1381                                                uint32_t insn)
1382 {
1383     const AArch64DecodeTable *tptr = table;
1384
1385     while (tptr->mask) {
1386         if ((insn & tptr->mask) == tptr->pattern) {
1387             return tptr->disas_fn;
1388         }
1389         tptr++;
1390     }
1391     return NULL;
1392 }
1393
1394 /*
1395  * The instruction disassembly implemented here matches
1396  * the instruction encoding classifications in chapter C4
1397  * of the ARM Architecture Reference Manual (DDI0487B_a);
1398  * classification names and decode diagrams here should generally
1399  * match up with those in the manual.
1400  */
1401
1402 static bool trans_B(DisasContext *s, arg_i *a)
1403 {
1404     reset_btype(s);
1405     gen_goto_tb(s, 0, a->imm);
1406     return true;
1407 }
1408
1409 static bool trans_BL(DisasContext *s, arg_i *a)
1410 {
1411     gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s));
1412     reset_btype(s);
1413     gen_goto_tb(s, 0, a->imm);
1414     return true;
1415 }
1416
1417
1418 static bool trans_CBZ(DisasContext *s, arg_cbz *a)
1419 {
1420     DisasLabel match;
1421     TCGv_i64 tcg_cmp;
1422
1423     tcg_cmp = read_cpu_reg(s, a->rt, a->sf);
1424     reset_btype(s);
1425
1426     match = gen_disas_label(s);
1427     tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1428                         tcg_cmp, 0, match.label);
1429     gen_goto_tb(s, 0, 4);
1430     set_disas_label(s, match);
1431     gen_goto_tb(s, 1, a->imm);
1432     return true;
1433 }
1434
1435 static bool trans_TBZ(DisasContext *s, arg_tbz *a)
1436 {
1437     DisasLabel match;
1438     TCGv_i64 tcg_cmp;
1439
1440     tcg_cmp = tcg_temp_new_i64();
1441     tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, a->rt), 1ULL << a->bitpos);
1442
1443     reset_btype(s);
1444
1445     match = gen_disas_label(s);
1446     tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1447                         tcg_cmp, 0, match.label);
1448     gen_goto_tb(s, 0, 4);
1449     set_disas_label(s, match);
1450     gen_goto_tb(s, 1, a->imm);
1451     return true;
1452 }
1453
1454 static bool trans_B_cond(DisasContext *s, arg_B_cond *a)
1455 {
1456     reset_btype(s);
1457     if (a->cond < 0x0e) {
1458         /* genuinely conditional branches */
1459         DisasLabel match = gen_disas_label(s);
1460         arm_gen_test_cc(a->cond, match.label);
1461         gen_goto_tb(s, 0, 4);
1462         set_disas_label(s, match);
1463         gen_goto_tb(s, 1, a->imm);
1464     } else {
1465         /* 0xe and 0xf are both "always" conditions */
1466         gen_goto_tb(s, 0, a->imm);
1467     }
1468     return true;
1469 }
1470
1471 static void set_btype_for_br(DisasContext *s, int rn)
1472 {
1473     if (dc_isar_feature(aa64_bti, s)) {
1474         /* BR to {x16,x17} or !guard -> 1, else 3.  */
1475         set_btype(s, rn == 16 || rn == 17 || !s->guarded_page ? 1 : 3);
1476     }
1477 }
1478
1479 static void set_btype_for_blr(DisasContext *s)
1480 {
1481     if (dc_isar_feature(aa64_bti, s)) {
1482         /* BLR sets BTYPE to 2, regardless of source guarded page.  */
1483         set_btype(s, 2);
1484     }
1485 }
1486
1487 static bool trans_BR(DisasContext *s, arg_r *a)
1488 {
1489     gen_a64_set_pc(s, cpu_reg(s, a->rn));
1490     set_btype_for_br(s, a->rn);
1491     s->base.is_jmp = DISAS_JUMP;
1492     return true;
1493 }
1494
1495 static bool trans_BLR(DisasContext *s, arg_r *a)
1496 {
1497     TCGv_i64 dst = cpu_reg(s, a->rn);
1498     TCGv_i64 lr = cpu_reg(s, 30);
1499     if (dst == lr) {
1500         TCGv_i64 tmp = tcg_temp_new_i64();
1501         tcg_gen_mov_i64(tmp, dst);
1502         dst = tmp;
1503     }
1504     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1505     gen_a64_set_pc(s, dst);
1506     set_btype_for_blr(s);
1507     s->base.is_jmp = DISAS_JUMP;
1508     return true;
1509 }
1510
1511 static bool trans_RET(DisasContext *s, arg_r *a)
1512 {
1513     gen_a64_set_pc(s, cpu_reg(s, a->rn));
1514     s->base.is_jmp = DISAS_JUMP;
1515     return true;
1516 }
1517
1518 static TCGv_i64 auth_branch_target(DisasContext *s, TCGv_i64 dst,
1519                                    TCGv_i64 modifier, bool use_key_a)
1520 {
1521     TCGv_i64 truedst;
1522     /*
1523      * Return the branch target for a BRAA/RETA/etc, which is either
1524      * just the destination dst, or that value with the pauth check
1525      * done and the code removed from the high bits.
1526      */
1527     if (!s->pauth_active) {
1528         return dst;
1529     }
1530
1531     truedst = tcg_temp_new_i64();
1532     if (use_key_a) {
1533         gen_helper_autia_combined(truedst, cpu_env, dst, modifier);
1534     } else {
1535         gen_helper_autib_combined(truedst, cpu_env, dst, modifier);
1536     }
1537     return truedst;
1538 }
1539
1540 static bool trans_BRAZ(DisasContext *s, arg_braz *a)
1541 {
1542     TCGv_i64 dst;
1543
1544     if (!dc_isar_feature(aa64_pauth, s)) {
1545         return false;
1546     }
1547
1548     dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1549     gen_a64_set_pc(s, dst);
1550     set_btype_for_br(s, a->rn);
1551     s->base.is_jmp = DISAS_JUMP;
1552     return true;
1553 }
1554
1555 static bool trans_BLRAZ(DisasContext *s, arg_braz *a)
1556 {
1557     TCGv_i64 dst, lr;
1558
1559     if (!dc_isar_feature(aa64_pauth, s)) {
1560         return false;
1561     }
1562
1563     dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1564     lr = cpu_reg(s, 30);
1565     if (dst == lr) {
1566         TCGv_i64 tmp = tcg_temp_new_i64();
1567         tcg_gen_mov_i64(tmp, dst);
1568         dst = tmp;
1569     }
1570     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1571     gen_a64_set_pc(s, dst);
1572     set_btype_for_blr(s);
1573     s->base.is_jmp = DISAS_JUMP;
1574     return true;
1575 }
1576
1577 static bool trans_RETA(DisasContext *s, arg_reta *a)
1578 {
1579     TCGv_i64 dst;
1580
1581     dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m);
1582     gen_a64_set_pc(s, dst);
1583     s->base.is_jmp = DISAS_JUMP;
1584     return true;
1585 }
1586
1587 static bool trans_BRA(DisasContext *s, arg_bra *a)
1588 {
1589     TCGv_i64 dst;
1590
1591     if (!dc_isar_feature(aa64_pauth, s)) {
1592         return false;
1593     }
1594     dst = auth_branch_target(s, cpu_reg(s,a->rn), cpu_reg_sp(s, a->rm), !a->m);
1595     gen_a64_set_pc(s, dst);
1596     set_btype_for_br(s, a->rn);
1597     s->base.is_jmp = DISAS_JUMP;
1598     return true;
1599 }
1600
1601 static bool trans_BLRA(DisasContext *s, arg_bra *a)
1602 {
1603     TCGv_i64 dst, lr;
1604
1605     if (!dc_isar_feature(aa64_pauth, s)) {
1606         return false;
1607     }
1608     dst = auth_branch_target(s, cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm), !a->m);
1609     lr = cpu_reg(s, 30);
1610     if (dst == lr) {
1611         TCGv_i64 tmp = tcg_temp_new_i64();
1612         tcg_gen_mov_i64(tmp, dst);
1613         dst = tmp;
1614     }
1615     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1616     gen_a64_set_pc(s, dst);
1617     set_btype_for_blr(s);
1618     s->base.is_jmp = DISAS_JUMP;
1619     return true;
1620 }
1621
1622 static bool trans_ERET(DisasContext *s, arg_ERET *a)
1623 {
1624     TCGv_i64 dst;
1625
1626     if (s->current_el == 0) {
1627         return false;
1628     }
1629     if (s->fgt_eret) {
1630         gen_exception_insn_el(s, 0, EXCP_UDEF, 0, 2);
1631         return true;
1632     }
1633     dst = tcg_temp_new_i64();
1634     tcg_gen_ld_i64(dst, cpu_env,
1635                    offsetof(CPUARMState, elr_el[s->current_el]));
1636
1637     translator_io_start(&s->base);
1638
1639     gen_helper_exception_return(cpu_env, dst);
1640     /* Must exit loop to check un-masked IRQs */
1641     s->base.is_jmp = DISAS_EXIT;
1642     return true;
1643 }
1644
1645 static bool trans_ERETA(DisasContext *s, arg_reta *a)
1646 {
1647     TCGv_i64 dst;
1648
1649     if (!dc_isar_feature(aa64_pauth, s)) {
1650         return false;
1651     }
1652     if (s->current_el == 0) {
1653         return false;
1654     }
1655     /* The FGT trap takes precedence over an auth trap. */
1656     if (s->fgt_eret) {
1657         gen_exception_insn_el(s, 0, EXCP_UDEF, a->m ? 3 : 2, 2);
1658         return true;
1659     }
1660     dst = tcg_temp_new_i64();
1661     tcg_gen_ld_i64(dst, cpu_env,
1662                    offsetof(CPUARMState, elr_el[s->current_el]));
1663
1664     dst = auth_branch_target(s, dst, cpu_X[31], !a->m);
1665
1666     translator_io_start(&s->base);
1667
1668     gen_helper_exception_return(cpu_env, dst);
1669     /* Must exit loop to check un-masked IRQs */
1670     s->base.is_jmp = DISAS_EXIT;
1671     return true;
1672 }
1673
1674 static bool trans_NOP(DisasContext *s, arg_NOP *a)
1675 {
1676     return true;
1677 }
1678
1679 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
1680 {
1681     /*
1682      * When running in MTTCG we don't generate jumps to the yield and
1683      * WFE helpers as it won't affect the scheduling of other vCPUs.
1684      * If we wanted to more completely model WFE/SEV so we don't busy
1685      * spin unnecessarily we would need to do something more involved.
1686      */
1687     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1688         s->base.is_jmp = DISAS_YIELD;
1689     }
1690     return true;
1691 }
1692
1693 static bool trans_WFI(DisasContext *s, arg_WFI *a)
1694 {
1695     s->base.is_jmp = DISAS_WFI;
1696     return true;
1697 }
1698
1699 static bool trans_WFE(DisasContext *s, arg_WFI *a)
1700 {
1701     /*
1702      * When running in MTTCG we don't generate jumps to the yield and
1703      * WFE helpers as it won't affect the scheduling of other vCPUs.
1704      * If we wanted to more completely model WFE/SEV so we don't busy
1705      * spin unnecessarily we would need to do something more involved.
1706      */
1707     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1708         s->base.is_jmp = DISAS_WFE;
1709     }
1710     return true;
1711 }
1712
1713 static bool trans_XPACLRI(DisasContext *s, arg_XPACLRI *a)
1714 {
1715     if (s->pauth_active) {
1716         gen_helper_xpaci(cpu_X[30], cpu_env, cpu_X[30]);
1717     }
1718     return true;
1719 }
1720
1721 static bool trans_PACIA1716(DisasContext *s, arg_PACIA1716 *a)
1722 {
1723     if (s->pauth_active) {
1724         gen_helper_pacia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1725     }
1726     return true;
1727 }
1728
1729 static bool trans_PACIB1716(DisasContext *s, arg_PACIB1716 *a)
1730 {
1731     if (s->pauth_active) {
1732         gen_helper_pacib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1733     }
1734     return true;
1735 }
1736
1737 static bool trans_AUTIA1716(DisasContext *s, arg_AUTIA1716 *a)
1738 {
1739     if (s->pauth_active) {
1740         gen_helper_autia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1741     }
1742     return true;
1743 }
1744
1745 static bool trans_AUTIB1716(DisasContext *s, arg_AUTIB1716 *a)
1746 {
1747     if (s->pauth_active) {
1748         gen_helper_autib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1749     }
1750     return true;
1751 }
1752
1753 static bool trans_ESB(DisasContext *s, arg_ESB *a)
1754 {
1755     /* Without RAS, we must implement this as NOP. */
1756     if (dc_isar_feature(aa64_ras, s)) {
1757         /*
1758          * QEMU does not have a source of physical SErrors,
1759          * so we are only concerned with virtual SErrors.
1760          * The pseudocode in the ARM for this case is
1761          *   if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
1762          *      AArch64.vESBOperation();
1763          * Most of the condition can be evaluated at translation time.
1764          * Test for EL2 present, and defer test for SEL2 to runtime.
1765          */
1766         if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
1767             gen_helper_vesb(cpu_env);
1768         }
1769     }
1770     return true;
1771 }
1772
1773 static bool trans_PACIAZ(DisasContext *s, arg_PACIAZ *a)
1774 {
1775     if (s->pauth_active) {
1776         gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30], tcg_constant_i64(0));
1777     }
1778     return true;
1779 }
1780
1781 static bool trans_PACIASP(DisasContext *s, arg_PACIASP *a)
1782 {
1783     if (s->pauth_active) {
1784         gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1785     }
1786     return true;
1787 }
1788
1789 static bool trans_PACIBZ(DisasContext *s, arg_PACIBZ *a)
1790 {
1791     if (s->pauth_active) {
1792         gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30], tcg_constant_i64(0));
1793     }
1794     return true;
1795 }
1796
1797 static bool trans_PACIBSP(DisasContext *s, arg_PACIBSP *a)
1798 {
1799     if (s->pauth_active) {
1800         gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1801     }
1802     return true;
1803 }
1804
1805 static bool trans_AUTIAZ(DisasContext *s, arg_AUTIAZ *a)
1806 {
1807     if (s->pauth_active) {
1808         gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30], tcg_constant_i64(0));
1809     }
1810     return true;
1811 }
1812
1813 static bool trans_AUTIASP(DisasContext *s, arg_AUTIASP *a)
1814 {
1815     if (s->pauth_active) {
1816         gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1817     }
1818     return true;
1819 }
1820
1821 static bool trans_AUTIBZ(DisasContext *s, arg_AUTIBZ *a)
1822 {
1823     if (s->pauth_active) {
1824         gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30], tcg_constant_i64(0));
1825     }
1826     return true;
1827 }
1828
1829 static bool trans_AUTIBSP(DisasContext *s, arg_AUTIBSP *a)
1830 {
1831     if (s->pauth_active) {
1832         gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1833     }
1834     return true;
1835 }
1836
1837 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
1838 {
1839     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1840     return true;
1841 }
1842
1843 static bool trans_DSB_DMB(DisasContext *s, arg_DSB_DMB *a)
1844 {
1845     /* We handle DSB and DMB the same way */
1846     TCGBar bar;
1847
1848     switch (a->types) {
1849     case 1: /* MBReqTypes_Reads */
1850         bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
1851         break;
1852     case 2: /* MBReqTypes_Writes */
1853         bar = TCG_BAR_SC | TCG_MO_ST_ST;
1854         break;
1855     default: /* MBReqTypes_All */
1856         bar = TCG_BAR_SC | TCG_MO_ALL;
1857         break;
1858     }
1859     tcg_gen_mb(bar);
1860     return true;
1861 }
1862
1863 static bool trans_ISB(DisasContext *s, arg_ISB *a)
1864 {
1865     /*
1866      * We need to break the TB after this insn to execute
1867      * self-modifying code correctly and also to take
1868      * any pending interrupts immediately.
1869      */
1870     reset_btype(s);
1871     gen_goto_tb(s, 0, 4);
1872     return true;
1873 }
1874
1875 static bool trans_SB(DisasContext *s, arg_SB *a)
1876 {
1877     if (!dc_isar_feature(aa64_sb, s)) {
1878         return false;
1879     }
1880     /*
1881      * TODO: There is no speculation barrier opcode for TCG;
1882      * MB and end the TB instead.
1883      */
1884     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
1885     gen_goto_tb(s, 0, 4);
1886     return true;
1887 }
1888
1889 static bool trans_CFINV(DisasContext *s, arg_CFINV *a)
1890 {
1891     if (!dc_isar_feature(aa64_condm_4, s)) {
1892         return false;
1893     }
1894     tcg_gen_xori_i32(cpu_CF, cpu_CF, 1);
1895     return true;
1896 }
1897
1898 static bool trans_XAFLAG(DisasContext *s, arg_XAFLAG *a)
1899 {
1900     TCGv_i32 z;
1901
1902     if (!dc_isar_feature(aa64_condm_5, s)) {
1903         return false;
1904     }
1905
1906     z = tcg_temp_new_i32();
1907
1908     tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0);
1909
1910     /*
1911      * (!C & !Z) << 31
1912      * (!(C | Z)) << 31
1913      * ~((C | Z) << 31)
1914      * ~-(C | Z)
1915      * (C | Z) - 1
1916      */
1917     tcg_gen_or_i32(cpu_NF, cpu_CF, z);
1918     tcg_gen_subi_i32(cpu_NF, cpu_NF, 1);
1919
1920     /* !(Z & C) */
1921     tcg_gen_and_i32(cpu_ZF, z, cpu_CF);
1922     tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1);
1923
1924     /* (!C & Z) << 31 -> -(Z & ~C) */
1925     tcg_gen_andc_i32(cpu_VF, z, cpu_CF);
1926     tcg_gen_neg_i32(cpu_VF, cpu_VF);
1927
1928     /* C | Z */
1929     tcg_gen_or_i32(cpu_CF, cpu_CF, z);
1930
1931     return true;
1932 }
1933
1934 static bool trans_AXFLAG(DisasContext *s, arg_AXFLAG *a)
1935 {
1936     if (!dc_isar_feature(aa64_condm_5, s)) {
1937         return false;
1938     }
1939
1940     tcg_gen_sari_i32(cpu_VF, cpu_VF, 31);         /* V ? -1 : 0 */
1941     tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF);     /* C & !V */
1942
1943     /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */
1944     tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF);
1945
1946     tcg_gen_movi_i32(cpu_NF, 0);
1947     tcg_gen_movi_i32(cpu_VF, 0);
1948
1949     return true;
1950 }
1951
1952 static bool trans_MSR_i_UAO(DisasContext *s, arg_i *a)
1953 {
1954     if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) {
1955         return false;
1956     }
1957     if (a->imm & 1) {
1958         set_pstate_bits(PSTATE_UAO);
1959     } else {
1960         clear_pstate_bits(PSTATE_UAO);
1961     }
1962     gen_rebuild_hflags(s);
1963     s->base.is_jmp = DISAS_TOO_MANY;
1964     return true;
1965 }
1966
1967 static bool trans_MSR_i_PAN(DisasContext *s, arg_i *a)
1968 {
1969     if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) {
1970         return false;
1971     }
1972     if (a->imm & 1) {
1973         set_pstate_bits(PSTATE_PAN);
1974     } else {
1975         clear_pstate_bits(PSTATE_PAN);
1976     }
1977     gen_rebuild_hflags(s);
1978     s->base.is_jmp = DISAS_TOO_MANY;
1979     return true;
1980 }
1981
1982 static bool trans_MSR_i_SPSEL(DisasContext *s, arg_i *a)
1983 {
1984     if (s->current_el == 0) {
1985         return false;
1986     }
1987     gen_helper_msr_i_spsel(cpu_env, tcg_constant_i32(a->imm & PSTATE_SP));
1988     s->base.is_jmp = DISAS_TOO_MANY;
1989     return true;
1990 }
1991
1992 static bool trans_MSR_i_SBSS(DisasContext *s, arg_i *a)
1993 {
1994     if (!dc_isar_feature(aa64_ssbs, s)) {
1995         return false;
1996     }
1997     if (a->imm & 1) {
1998         set_pstate_bits(PSTATE_SSBS);
1999     } else {
2000         clear_pstate_bits(PSTATE_SSBS);
2001     }
2002     /* Don't need to rebuild hflags since SSBS is a nop */
2003     s->base.is_jmp = DISAS_TOO_MANY;
2004     return true;
2005 }
2006
2007 static bool trans_MSR_i_DIT(DisasContext *s, arg_i *a)
2008 {
2009     if (!dc_isar_feature(aa64_dit, s)) {
2010         return false;
2011     }
2012     if (a->imm & 1) {
2013         set_pstate_bits(PSTATE_DIT);
2014     } else {
2015         clear_pstate_bits(PSTATE_DIT);
2016     }
2017     /* There's no need to rebuild hflags because DIT is a nop */
2018     s->base.is_jmp = DISAS_TOO_MANY;
2019     return true;
2020 }
2021
2022 static bool trans_MSR_i_TCO(DisasContext *s, arg_i *a)
2023 {
2024     if (dc_isar_feature(aa64_mte, s)) {
2025         /* Full MTE is enabled -- set the TCO bit as directed. */
2026         if (a->imm & 1) {
2027             set_pstate_bits(PSTATE_TCO);
2028         } else {
2029             clear_pstate_bits(PSTATE_TCO);
2030         }
2031         gen_rebuild_hflags(s);
2032         /* Many factors, including TCO, go into MTE_ACTIVE. */
2033         s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
2034         return true;
2035     } else if (dc_isar_feature(aa64_mte_insn_reg, s)) {
2036         /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI.  */
2037         return true;
2038     } else {
2039         /* Insn not present */
2040         return false;
2041     }
2042 }
2043
2044 static bool trans_MSR_i_DAIFSET(DisasContext *s, arg_i *a)
2045 {
2046     gen_helper_msr_i_daifset(cpu_env, tcg_constant_i32(a->imm));
2047     s->base.is_jmp = DISAS_TOO_MANY;
2048     return true;
2049 }
2050
2051 static bool trans_MSR_i_DAIFCLEAR(DisasContext *s, arg_i *a)
2052 {
2053     gen_helper_msr_i_daifclear(cpu_env, tcg_constant_i32(a->imm));
2054     /* Exit the cpu loop to re-evaluate pending IRQs. */
2055     s->base.is_jmp = DISAS_UPDATE_EXIT;
2056     return true;
2057 }
2058
2059 static bool trans_MSR_i_SVCR(DisasContext *s, arg_MSR_i_SVCR *a)
2060 {
2061     if (!dc_isar_feature(aa64_sme, s) || a->mask == 0) {
2062         return false;
2063     }
2064     if (sme_access_check(s)) {
2065         int old = s->pstate_sm | (s->pstate_za << 1);
2066         int new = a->imm * 3;
2067
2068         if ((old ^ new) & a->mask) {
2069             /* At least one bit changes. */
2070             gen_helper_set_svcr(cpu_env, tcg_constant_i32(new),
2071                                 tcg_constant_i32(a->mask));
2072             s->base.is_jmp = DISAS_TOO_MANY;
2073         }
2074     }
2075     return true;
2076 }
2077
2078 static void gen_get_nzcv(TCGv_i64 tcg_rt)
2079 {
2080     TCGv_i32 tmp = tcg_temp_new_i32();
2081     TCGv_i32 nzcv = tcg_temp_new_i32();
2082
2083     /* build bit 31, N */
2084     tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
2085     /* build bit 30, Z */
2086     tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
2087     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
2088     /* build bit 29, C */
2089     tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
2090     /* build bit 28, V */
2091     tcg_gen_shri_i32(tmp, cpu_VF, 31);
2092     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
2093     /* generate result */
2094     tcg_gen_extu_i32_i64(tcg_rt, nzcv);
2095 }
2096
2097 static void gen_set_nzcv(TCGv_i64 tcg_rt)
2098 {
2099     TCGv_i32 nzcv = tcg_temp_new_i32();
2100
2101     /* take NZCV from R[t] */
2102     tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
2103
2104     /* bit 31, N */
2105     tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
2106     /* bit 30, Z */
2107     tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
2108     tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
2109     /* bit 29, C */
2110     tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
2111     tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
2112     /* bit 28, V */
2113     tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
2114     tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
2115 }
2116
2117 static void gen_sysreg_undef(DisasContext *s, bool isread,
2118                              uint8_t op0, uint8_t op1, uint8_t op2,
2119                              uint8_t crn, uint8_t crm, uint8_t rt)
2120 {
2121     /*
2122      * Generate code to emit an UNDEF with correct syndrome
2123      * information for a failed system register access.
2124      * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases,
2125      * but if FEAT_IDST is implemented then read accesses to registers
2126      * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP
2127      * syndrome.
2128      */
2129     uint32_t syndrome;
2130
2131     if (isread && dc_isar_feature(aa64_ids, s) &&
2132         arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) {
2133         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2134     } else {
2135         syndrome = syn_uncategorized();
2136     }
2137     gen_exception_insn(s, 0, EXCP_UDEF, syndrome);
2138 }
2139
2140 /* MRS - move from system register
2141  * MSR (register) - move to system register
2142  * SYS
2143  * SYSL
2144  * These are all essentially the same insn in 'read' and 'write'
2145  * versions, with varying op0 fields.
2146  */
2147 static void handle_sys(DisasContext *s, bool isread,
2148                        unsigned int op0, unsigned int op1, unsigned int op2,
2149                        unsigned int crn, unsigned int crm, unsigned int rt)
2150 {
2151     uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
2152                                       crn, crm, op0, op1, op2);
2153     const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
2154     bool need_exit_tb = false;
2155     TCGv_ptr tcg_ri = NULL;
2156     TCGv_i64 tcg_rt;
2157     uint32_t syndrome;
2158
2159     if (crn == 11 || crn == 15) {
2160         /*
2161          * Check for TIDCP trap, which must take precedence over
2162          * the UNDEF for "no such register" etc.
2163          */
2164         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2165         switch (s->current_el) {
2166         case 1:
2167             gen_helper_tidcp_el1(cpu_env, tcg_constant_i32(syndrome));
2168             break;
2169         }
2170     }
2171
2172     if (!ri) {
2173         /* Unknown register; this might be a guest error or a QEMU
2174          * unimplemented feature.
2175          */
2176         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
2177                       "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
2178                       isread ? "read" : "write", op0, op1, crn, crm, op2);
2179         gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2180         return;
2181     }
2182
2183     /* Check access permissions */
2184     if (!cp_access_ok(s->current_el, ri, isread)) {
2185         gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2186         return;
2187     }
2188
2189     if (ri->accessfn || (ri->fgt && s->fgt_active)) {
2190         /* Emit code to perform further access permissions checks at
2191          * runtime; this may result in an exception.
2192          */
2193         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2194         gen_a64_update_pc(s, 0);
2195         tcg_ri = tcg_temp_new_ptr();
2196         gen_helper_access_check_cp_reg(tcg_ri, cpu_env,
2197                                        tcg_constant_i32(key),
2198                                        tcg_constant_i32(syndrome),
2199                                        tcg_constant_i32(isread));
2200     } else if (ri->type & ARM_CP_RAISES_EXC) {
2201         /*
2202          * The readfn or writefn might raise an exception;
2203          * synchronize the CPU state in case it does.
2204          */
2205         gen_a64_update_pc(s, 0);
2206     }
2207
2208     /* Handle special cases first */
2209     switch (ri->type & ARM_CP_SPECIAL_MASK) {
2210     case 0:
2211         break;
2212     case ARM_CP_NOP:
2213         return;
2214     case ARM_CP_NZCV:
2215         tcg_rt = cpu_reg(s, rt);
2216         if (isread) {
2217             gen_get_nzcv(tcg_rt);
2218         } else {
2219             gen_set_nzcv(tcg_rt);
2220         }
2221         return;
2222     case ARM_CP_CURRENTEL:
2223         /* Reads as current EL value from pstate, which is
2224          * guaranteed to be constant by the tb flags.
2225          */
2226         tcg_rt = cpu_reg(s, rt);
2227         tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
2228         return;
2229     case ARM_CP_DC_ZVA:
2230         /* Writes clear the aligned block of memory which rt points into. */
2231         if (s->mte_active[0]) {
2232             int desc = 0;
2233
2234             desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
2235             desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
2236             desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
2237
2238             tcg_rt = tcg_temp_new_i64();
2239             gen_helper_mte_check_zva(tcg_rt, cpu_env,
2240                                      tcg_constant_i32(desc), cpu_reg(s, rt));
2241         } else {
2242             tcg_rt = clean_data_tbi(s, cpu_reg(s, rt));
2243         }
2244         gen_helper_dc_zva(cpu_env, tcg_rt);
2245         return;
2246     case ARM_CP_DC_GVA:
2247         {
2248             TCGv_i64 clean_addr, tag;
2249
2250             /*
2251              * DC_GVA, like DC_ZVA, requires that we supply the original
2252              * pointer for an invalid page.  Probe that address first.
2253              */
2254             tcg_rt = cpu_reg(s, rt);
2255             clean_addr = clean_data_tbi(s, tcg_rt);
2256             gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8);
2257
2258             if (s->ata) {
2259                 /* Extract the tag from the register to match STZGM.  */
2260                 tag = tcg_temp_new_i64();
2261                 tcg_gen_shri_i64(tag, tcg_rt, 56);
2262                 gen_helper_stzgm_tags(cpu_env, clean_addr, tag);
2263             }
2264         }
2265         return;
2266     case ARM_CP_DC_GZVA:
2267         {
2268             TCGv_i64 clean_addr, tag;
2269
2270             /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */
2271             tcg_rt = cpu_reg(s, rt);
2272             clean_addr = clean_data_tbi(s, tcg_rt);
2273             gen_helper_dc_zva(cpu_env, clean_addr);
2274
2275             if (s->ata) {
2276                 /* Extract the tag from the register to match STZGM.  */
2277                 tag = tcg_temp_new_i64();
2278                 tcg_gen_shri_i64(tag, tcg_rt, 56);
2279                 gen_helper_stzgm_tags(cpu_env, clean_addr, tag);
2280             }
2281         }
2282         return;
2283     default:
2284         g_assert_not_reached();
2285     }
2286     if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) {
2287         return;
2288     } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
2289         return;
2290     } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) {
2291         return;
2292     }
2293
2294     if (ri->type & ARM_CP_IO) {
2295         /* I/O operations must end the TB here (whether read or write) */
2296         need_exit_tb = translator_io_start(&s->base);
2297     }
2298
2299     tcg_rt = cpu_reg(s, rt);
2300
2301     if (isread) {
2302         if (ri->type & ARM_CP_CONST) {
2303             tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
2304         } else if (ri->readfn) {
2305             if (!tcg_ri) {
2306                 tcg_ri = gen_lookup_cp_reg(key);
2307             }
2308             gen_helper_get_cp_reg64(tcg_rt, cpu_env, tcg_ri);
2309         } else {
2310             tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
2311         }
2312     } else {
2313         if (ri->type & ARM_CP_CONST) {
2314             /* If not forbidden by access permissions, treat as WI */
2315             return;
2316         } else if (ri->writefn) {
2317             if (!tcg_ri) {
2318                 tcg_ri = gen_lookup_cp_reg(key);
2319             }
2320             gen_helper_set_cp_reg64(cpu_env, tcg_ri, tcg_rt);
2321         } else {
2322             tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
2323         }
2324     }
2325
2326     if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
2327         /*
2328          * A write to any coprocessor register that ends a TB
2329          * must rebuild the hflags for the next TB.
2330          */
2331         gen_rebuild_hflags(s);
2332         /*
2333          * We default to ending the TB on a coprocessor register write,
2334          * but allow this to be suppressed by the register definition
2335          * (usually only necessary to work around guest bugs).
2336          */
2337         need_exit_tb = true;
2338     }
2339     if (need_exit_tb) {
2340         s->base.is_jmp = DISAS_UPDATE_EXIT;
2341     }
2342 }
2343
2344 static bool trans_SYS(DisasContext *s, arg_SYS *a)
2345 {
2346     handle_sys(s, a->l, a->op0, a->op1, a->op2, a->crn, a->crm, a->rt);
2347     return true;
2348 }
2349
2350 static bool trans_SVC(DisasContext *s, arg_i *a)
2351 {
2352     /*
2353      * For SVC, HVC and SMC we advance the single-step state
2354      * machine before taking the exception. This is architecturally
2355      * mandated, to ensure that single-stepping a system call
2356      * instruction works properly.
2357      */
2358     uint32_t syndrome = syn_aa64_svc(a->imm);
2359     if (s->fgt_svc) {
2360         gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
2361         return true;
2362     }
2363     gen_ss_advance(s);
2364     gen_exception_insn(s, 4, EXCP_SWI, syndrome);
2365     return true;
2366 }
2367
2368 static bool trans_HVC(DisasContext *s, arg_i *a)
2369 {
2370     if (s->current_el == 0) {
2371         unallocated_encoding(s);
2372         return true;
2373     }
2374     /*
2375      * The pre HVC helper handles cases when HVC gets trapped
2376      * as an undefined insn by runtime configuration.
2377      */
2378     gen_a64_update_pc(s, 0);
2379     gen_helper_pre_hvc(cpu_env);
2380     /* Architecture requires ss advance before we do the actual work */
2381     gen_ss_advance(s);
2382     gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(a->imm), 2);
2383     return true;
2384 }
2385
2386 static bool trans_SMC(DisasContext *s, arg_i *a)
2387 {
2388     if (s->current_el == 0) {
2389         unallocated_encoding(s);
2390         return true;
2391     }
2392     gen_a64_update_pc(s, 0);
2393     gen_helper_pre_smc(cpu_env, tcg_constant_i32(syn_aa64_smc(a->imm)));
2394     /* Architecture requires ss advance before we do the actual work */
2395     gen_ss_advance(s);
2396     gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(a->imm), 3);
2397     return true;
2398 }
2399
2400 static bool trans_BRK(DisasContext *s, arg_i *a)
2401 {
2402     gen_exception_bkpt_insn(s, syn_aa64_bkpt(a->imm));
2403     return true;
2404 }
2405
2406 static bool trans_HLT(DisasContext *s, arg_i *a)
2407 {
2408     /*
2409      * HLT. This has two purposes.
2410      * Architecturally, it is an external halting debug instruction.
2411      * Since QEMU doesn't implement external debug, we treat this as
2412      * it is required for halting debug disabled: it will UNDEF.
2413      * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
2414      */
2415     if (semihosting_enabled(s->current_el == 0) && a->imm == 0xf000) {
2416         gen_exception_internal_insn(s, EXCP_SEMIHOST);
2417     } else {
2418         unallocated_encoding(s);
2419     }
2420     return true;
2421 }
2422
2423 /*
2424  * Load/Store exclusive instructions are implemented by remembering
2425  * the value/address loaded, and seeing if these are the same
2426  * when the store is performed. This is not actually the architecturally
2427  * mandated semantics, but it works for typical guest code sequences
2428  * and avoids having to monitor regular stores.
2429  *
2430  * The store exclusive uses the atomic cmpxchg primitives to avoid
2431  * races in multi-threaded linux-user and when MTTCG softmmu is
2432  * enabled.
2433  */
2434 static void gen_load_exclusive(DisasContext *s, int rt, int rt2, int rn,
2435                                int size, bool is_pair)
2436 {
2437     int idx = get_mem_index(s);
2438     TCGv_i64 dirty_addr, clean_addr;
2439     MemOp memop = check_atomic_align(s, rn, size + is_pair);
2440
2441     s->is_ldex = true;
2442     dirty_addr = cpu_reg_sp(s, rn);
2443     clean_addr = gen_mte_check1(s, dirty_addr, false, rn != 31, memop);
2444
2445     g_assert(size <= 3);
2446     if (is_pair) {
2447         g_assert(size >= 2);
2448         if (size == 2) {
2449             tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
2450             if (s->be_data == MO_LE) {
2451                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
2452                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
2453             } else {
2454                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
2455                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
2456             }
2457         } else {
2458             TCGv_i128 t16 = tcg_temp_new_i128();
2459
2460             tcg_gen_qemu_ld_i128(t16, clean_addr, idx, memop);
2461
2462             if (s->be_data == MO_LE) {
2463                 tcg_gen_extr_i128_i64(cpu_exclusive_val,
2464                                       cpu_exclusive_high, t16);
2465             } else {
2466                 tcg_gen_extr_i128_i64(cpu_exclusive_high,
2467                                       cpu_exclusive_val, t16);
2468             }
2469             tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2470             tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
2471         }
2472     } else {
2473         tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
2474         tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2475     }
2476     tcg_gen_mov_i64(cpu_exclusive_addr, clean_addr);
2477 }
2478
2479 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
2480                                 int rn, int size, int is_pair)
2481 {
2482     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
2483      *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
2484      *     [addr] = {Rt};
2485      *     if (is_pair) {
2486      *         [addr + datasize] = {Rt2};
2487      *     }
2488      *     {Rd} = 0;
2489      * } else {
2490      *     {Rd} = 1;
2491      * }
2492      * env->exclusive_addr = -1;
2493      */
2494     TCGLabel *fail_label = gen_new_label();
2495     TCGLabel *done_label = gen_new_label();
2496     TCGv_i64 tmp, clean_addr;
2497     MemOp memop;
2498
2499     /*
2500      * FIXME: We are out of spec here.  We have recorded only the address
2501      * from load_exclusive, not the entire range, and we assume that the
2502      * size of the access on both sides match.  The architecture allows the
2503      * store to be smaller than the load, so long as the stored bytes are
2504      * within the range recorded by the load.
2505      */
2506
2507     /* See AArch64.ExclusiveMonitorsPass() and AArch64.IsExclusiveVA(). */
2508     clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2509     tcg_gen_brcond_i64(TCG_COND_NE, clean_addr, cpu_exclusive_addr, fail_label);
2510
2511     /*
2512      * The write, and any associated faults, only happen if the virtual
2513      * and physical addresses pass the exclusive monitor check.  These
2514      * faults are exceedingly unlikely, because normally the guest uses
2515      * the exact same address register for the load_exclusive, and we
2516      * would have recognized these faults there.
2517      *
2518      * It is possible to trigger an alignment fault pre-LSE2, e.g. with an
2519      * unaligned 4-byte write within the range of an aligned 8-byte load.
2520      * With LSE2, the store would need to cross a 16-byte boundary when the
2521      * load did not, which would mean the store is outside the range
2522      * recorded for the monitor, which would have failed a corrected monitor
2523      * check above.  For now, we assume no size change and retain the
2524      * MO_ALIGN to let tcg know what we checked in the load_exclusive.
2525      *
2526      * It is possible to trigger an MTE fault, by performing the load with
2527      * a virtual address with a valid tag and performing the store with the
2528      * same virtual address and a different invalid tag.
2529      */
2530     memop = size + is_pair;
2531     if (memop == MO_128 || !dc_isar_feature(aa64_lse2, s)) {
2532         memop |= MO_ALIGN;
2533     }
2534     memop = finalize_memop(s, memop);
2535     gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
2536
2537     tmp = tcg_temp_new_i64();
2538     if (is_pair) {
2539         if (size == 2) {
2540             if (s->be_data == MO_LE) {
2541                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
2542             } else {
2543                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
2544             }
2545             tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
2546                                        cpu_exclusive_val, tmp,
2547                                        get_mem_index(s), memop);
2548             tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2549         } else {
2550             TCGv_i128 t16 = tcg_temp_new_i128();
2551             TCGv_i128 c16 = tcg_temp_new_i128();
2552             TCGv_i64 a, b;
2553
2554             if (s->be_data == MO_LE) {
2555                 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2));
2556                 tcg_gen_concat_i64_i128(c16, cpu_exclusive_val,
2557                                         cpu_exclusive_high);
2558             } else {
2559                 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt));
2560                 tcg_gen_concat_i64_i128(c16, cpu_exclusive_high,
2561                                         cpu_exclusive_val);
2562             }
2563
2564             tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16,
2565                                         get_mem_index(s), memop);
2566
2567             a = tcg_temp_new_i64();
2568             b = tcg_temp_new_i64();
2569             if (s->be_data == MO_LE) {
2570                 tcg_gen_extr_i128_i64(a, b, t16);
2571             } else {
2572                 tcg_gen_extr_i128_i64(b, a, t16);
2573             }
2574
2575             tcg_gen_xor_i64(a, a, cpu_exclusive_val);
2576             tcg_gen_xor_i64(b, b, cpu_exclusive_high);
2577             tcg_gen_or_i64(tmp, a, b);
2578
2579             tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0);
2580         }
2581     } else {
2582         tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
2583                                    cpu_reg(s, rt), get_mem_index(s), memop);
2584         tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2585     }
2586     tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
2587     tcg_gen_br(done_label);
2588
2589     gen_set_label(fail_label);
2590     tcg_gen_movi_i64(cpu_reg(s, rd), 1);
2591     gen_set_label(done_label);
2592     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
2593 }
2594
2595 static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
2596                                  int rn, int size)
2597 {
2598     TCGv_i64 tcg_rs = cpu_reg(s, rs);
2599     TCGv_i64 tcg_rt = cpu_reg(s, rt);
2600     int memidx = get_mem_index(s);
2601     TCGv_i64 clean_addr;
2602     MemOp memop;
2603
2604     if (rn == 31) {
2605         gen_check_sp_alignment(s);
2606     }
2607     memop = check_atomic_align(s, rn, size);
2608     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
2609     tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt,
2610                                memidx, memop);
2611 }
2612
2613 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
2614                                       int rn, int size)
2615 {
2616     TCGv_i64 s1 = cpu_reg(s, rs);
2617     TCGv_i64 s2 = cpu_reg(s, rs + 1);
2618     TCGv_i64 t1 = cpu_reg(s, rt);
2619     TCGv_i64 t2 = cpu_reg(s, rt + 1);
2620     TCGv_i64 clean_addr;
2621     int memidx = get_mem_index(s);
2622     MemOp memop;
2623
2624     if (rn == 31) {
2625         gen_check_sp_alignment(s);
2626     }
2627
2628     /* This is a single atomic access, despite the "pair". */
2629     memop = check_atomic_align(s, rn, size + 1);
2630     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
2631
2632     if (size == 2) {
2633         TCGv_i64 cmp = tcg_temp_new_i64();
2634         TCGv_i64 val = tcg_temp_new_i64();
2635
2636         if (s->be_data == MO_LE) {
2637             tcg_gen_concat32_i64(val, t1, t2);
2638             tcg_gen_concat32_i64(cmp, s1, s2);
2639         } else {
2640             tcg_gen_concat32_i64(val, t2, t1);
2641             tcg_gen_concat32_i64(cmp, s2, s1);
2642         }
2643
2644         tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, memop);
2645
2646         if (s->be_data == MO_LE) {
2647             tcg_gen_extr32_i64(s1, s2, cmp);
2648         } else {
2649             tcg_gen_extr32_i64(s2, s1, cmp);
2650         }
2651     } else {
2652         TCGv_i128 cmp = tcg_temp_new_i128();
2653         TCGv_i128 val = tcg_temp_new_i128();
2654
2655         if (s->be_data == MO_LE) {
2656             tcg_gen_concat_i64_i128(val, t1, t2);
2657             tcg_gen_concat_i64_i128(cmp, s1, s2);
2658         } else {
2659             tcg_gen_concat_i64_i128(val, t2, t1);
2660             tcg_gen_concat_i64_i128(cmp, s2, s1);
2661         }
2662
2663         tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, memop);
2664
2665         if (s->be_data == MO_LE) {
2666             tcg_gen_extr_i128_i64(s1, s2, cmp);
2667         } else {
2668             tcg_gen_extr_i128_i64(s2, s1, cmp);
2669         }
2670     }
2671 }
2672
2673 /*
2674  * Compute the ISS.SF bit for syndrome information if an exception
2675  * is taken on a load or store. This indicates whether the instruction
2676  * is accessing a 32-bit or 64-bit register. This logic is derived
2677  * from the ARMv8 specs for LDR (Shared decode for all encodings).
2678  */
2679 static bool ldst_iss_sf(int size, bool sign, bool ext)
2680 {
2681
2682     if (sign) {
2683         /*
2684          * Signed loads are 64 bit results if we are not going to
2685          * do a zero-extend from 32 to 64 after the load.
2686          * (For a store, sign and ext are always false.)
2687          */
2688         return !ext;
2689     } else {
2690         /* Unsigned loads/stores work at the specified size */
2691         return size == MO_64;
2692     }
2693 }
2694
2695 static bool trans_STXR(DisasContext *s, arg_stxr *a)
2696 {
2697     if (a->rn == 31) {
2698         gen_check_sp_alignment(s);
2699     }
2700     if (a->lasr) {
2701         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2702     }
2703     gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, false);
2704     return true;
2705 }
2706
2707 static bool trans_LDXR(DisasContext *s, arg_stxr *a)
2708 {
2709     if (a->rn == 31) {
2710         gen_check_sp_alignment(s);
2711     }
2712     gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, false);
2713     if (a->lasr) {
2714         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2715     }
2716     return true;
2717 }
2718
2719 static bool trans_STLR(DisasContext *s, arg_stlr *a)
2720 {
2721     TCGv_i64 clean_addr;
2722     MemOp memop;
2723     bool iss_sf = ldst_iss_sf(a->sz, false, false);
2724
2725     /*
2726      * StoreLORelease is the same as Store-Release for QEMU, but
2727      * needs the feature-test.
2728      */
2729     if (!a->lasr && !dc_isar_feature(aa64_lor, s)) {
2730         return false;
2731     }
2732     /* Generate ISS for non-exclusive accesses including LASR.  */
2733     if (a->rn == 31) {
2734         gen_check_sp_alignment(s);
2735     }
2736     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2737     memop = check_ordered_align(s, a->rn, 0, true, a->sz);
2738     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn),
2739                                 true, a->rn != 31, memop);
2740     do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, memop, true, a->rt,
2741               iss_sf, a->lasr);
2742     return true;
2743 }
2744
2745 static bool trans_LDAR(DisasContext *s, arg_stlr *a)
2746 {
2747     TCGv_i64 clean_addr;
2748     MemOp memop;
2749     bool iss_sf = ldst_iss_sf(a->sz, false, false);
2750
2751     /* LoadLOAcquire is the same as Load-Acquire for QEMU.  */
2752     if (!a->lasr && !dc_isar_feature(aa64_lor, s)) {
2753         return false;
2754     }
2755     /* Generate ISS for non-exclusive accesses including LASR.  */
2756     if (a->rn == 31) {
2757         gen_check_sp_alignment(s);
2758     }
2759     memop = check_ordered_align(s, a->rn, 0, false, a->sz);
2760     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn),
2761                                 false, a->rn != 31, memop);
2762     do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, memop, false, true,
2763               a->rt, iss_sf, a->lasr);
2764     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2765     return true;
2766 }
2767
2768 static bool trans_STXP(DisasContext *s, arg_stxr *a)
2769 {
2770     if (a->rn == 31) {
2771         gen_check_sp_alignment(s);
2772     }
2773     if (a->lasr) {
2774         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2775     }
2776     gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, true);
2777     return true;
2778 }
2779
2780 static bool trans_LDXP(DisasContext *s, arg_stxr *a)
2781 {
2782     if (a->rn == 31) {
2783         gen_check_sp_alignment(s);
2784     }
2785     gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, true);
2786     if (a->lasr) {
2787         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2788     }
2789     return true;
2790 }
2791
2792 static bool trans_CASP(DisasContext *s, arg_CASP *a)
2793 {
2794     if (!dc_isar_feature(aa64_atomics, s)) {
2795         return false;
2796     }
2797     if (((a->rt | a->rs) & 1) != 0) {
2798         return false;
2799     }
2800
2801     gen_compare_and_swap_pair(s, a->rs, a->rt, a->rn, a->sz);
2802     return true;
2803 }
2804
2805 static bool trans_CAS(DisasContext *s, arg_CAS *a)
2806 {
2807     if (!dc_isar_feature(aa64_atomics, s)) {
2808         return false;
2809     }
2810     gen_compare_and_swap(s, a->rs, a->rt, a->rn, a->sz);
2811     return true;
2812 }
2813
2814 static bool trans_LD_lit(DisasContext *s, arg_ldlit *a)
2815 {
2816     bool iss_sf = ldst_iss_sf(a->sz, a->sign, false);
2817     TCGv_i64 tcg_rt = cpu_reg(s, a->rt);
2818     TCGv_i64 clean_addr = tcg_temp_new_i64();
2819     MemOp memop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
2820
2821     gen_pc_plus_diff(s, clean_addr, a->imm);
2822     do_gpr_ld(s, tcg_rt, clean_addr, memop,
2823               false, true, a->rt, iss_sf, false);
2824     return true;
2825 }
2826
2827 static bool trans_LD_lit_v(DisasContext *s, arg_ldlit *a)
2828 {
2829     /* Load register (literal), vector version */
2830     TCGv_i64 clean_addr;
2831     MemOp memop;
2832
2833     if (!fp_access_check(s)) {
2834         return true;
2835     }
2836     memop = finalize_memop_asimd(s, a->sz);
2837     clean_addr = tcg_temp_new_i64();
2838     gen_pc_plus_diff(s, clean_addr, a->imm);
2839     do_fp_ld(s, a->rt, clean_addr, memop);
2840     return true;
2841 }
2842
2843 static void op_addr_ldstpair_pre(DisasContext *s, arg_ldstpair *a,
2844                                  TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
2845                                  uint64_t offset, bool is_store, MemOp mop)
2846 {
2847     if (a->rn == 31) {
2848         gen_check_sp_alignment(s);
2849     }
2850
2851     *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
2852     if (!a->p) {
2853         tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset);
2854     }
2855
2856     *clean_addr = gen_mte_checkN(s, *dirty_addr, is_store,
2857                                  (a->w || a->rn != 31), 2 << a->sz, mop);
2858 }
2859
2860 static void op_addr_ldstpair_post(DisasContext *s, arg_ldstpair *a,
2861                                   TCGv_i64 dirty_addr, uint64_t offset)
2862 {
2863     if (a->w) {
2864         if (a->p) {
2865             tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
2866         }
2867         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
2868     }
2869 }
2870
2871 static bool trans_STP(DisasContext *s, arg_ldstpair *a)
2872 {
2873     uint64_t offset = a->imm << a->sz;
2874     TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
2875     MemOp mop = finalize_memop(s, a->sz);
2876
2877     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop);
2878     tcg_rt = cpu_reg(s, a->rt);
2879     tcg_rt2 = cpu_reg(s, a->rt2);
2880     /*
2881      * We built mop above for the single logical access -- rebuild it
2882      * now for the paired operation.
2883      *
2884      * With LSE2, non-sign-extending pairs are treated atomically if
2885      * aligned, and if unaligned one of the pair will be completely
2886      * within a 16-byte block and that element will be atomic.
2887      * Otherwise each element is separately atomic.
2888      * In all cases, issue one operation with the correct atomicity.
2889      */
2890     mop = a->sz + 1;
2891     if (s->align_mem) {
2892         mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
2893     }
2894     mop = finalize_memop_pair(s, mop);
2895     if (a->sz == 2) {
2896         TCGv_i64 tmp = tcg_temp_new_i64();
2897
2898         if (s->be_data == MO_LE) {
2899             tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2);
2900         } else {
2901             tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt);
2902         }
2903         tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop);
2904     } else {
2905         TCGv_i128 tmp = tcg_temp_new_i128();
2906
2907         if (s->be_data == MO_LE) {
2908             tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
2909         } else {
2910             tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
2911         }
2912         tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
2913     }
2914     op_addr_ldstpair_post(s, a, dirty_addr, offset);
2915     return true;
2916 }
2917
2918 static bool trans_LDP(DisasContext *s, arg_ldstpair *a)
2919 {
2920     uint64_t offset = a->imm << a->sz;
2921     TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
2922     MemOp mop = finalize_memop(s, a->sz);
2923
2924     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop);
2925     tcg_rt = cpu_reg(s, a->rt);
2926     tcg_rt2 = cpu_reg(s, a->rt2);
2927
2928     /*
2929      * We built mop above for the single logical access -- rebuild it
2930      * now for the paired operation.
2931      *
2932      * With LSE2, non-sign-extending pairs are treated atomically if
2933      * aligned, and if unaligned one of the pair will be completely
2934      * within a 16-byte block and that element will be atomic.
2935      * Otherwise each element is separately atomic.
2936      * In all cases, issue one operation with the correct atomicity.
2937      *
2938      * This treats sign-extending loads like zero-extending loads,
2939      * since that reuses the most code below.
2940      */
2941     mop = a->sz + 1;
2942     if (s->align_mem) {
2943         mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
2944     }
2945     mop = finalize_memop_pair(s, mop);
2946     if (a->sz == 2) {
2947         int o2 = s->be_data == MO_LE ? 32 : 0;
2948         int o1 = o2 ^ 32;
2949
2950         tcg_gen_qemu_ld_i64(tcg_rt, clean_addr, get_mem_index(s), mop);
2951         if (a->sign) {
2952             tcg_gen_sextract_i64(tcg_rt2, tcg_rt, o2, 32);
2953             tcg_gen_sextract_i64(tcg_rt, tcg_rt, o1, 32);
2954         } else {
2955             tcg_gen_extract_i64(tcg_rt2, tcg_rt, o2, 32);
2956             tcg_gen_extract_i64(tcg_rt, tcg_rt, o1, 32);
2957         }
2958     } else {
2959         TCGv_i128 tmp = tcg_temp_new_i128();
2960
2961         tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop);
2962         if (s->be_data == MO_LE) {
2963             tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp);
2964         } else {
2965             tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp);
2966         }
2967     }
2968     op_addr_ldstpair_post(s, a, dirty_addr, offset);
2969     return true;
2970 }
2971
2972 static bool trans_STP_v(DisasContext *s, arg_ldstpair *a)
2973 {
2974     uint64_t offset = a->imm << a->sz;
2975     TCGv_i64 clean_addr, dirty_addr;
2976     MemOp mop;
2977
2978     if (!fp_access_check(s)) {
2979         return true;
2980     }
2981
2982     /* LSE2 does not merge FP pairs; leave these as separate operations. */
2983     mop = finalize_memop_asimd(s, a->sz);
2984     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop);
2985     do_fp_st(s, a->rt, clean_addr, mop);
2986     tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz);
2987     do_fp_st(s, a->rt2, clean_addr, mop);
2988     op_addr_ldstpair_post(s, a, dirty_addr, offset);
2989     return true;
2990 }
2991
2992 static bool trans_LDP_v(DisasContext *s, arg_ldstpair *a)
2993 {
2994     uint64_t offset = a->imm << a->sz;
2995     TCGv_i64 clean_addr, dirty_addr;
2996     MemOp mop;
2997
2998     if (!fp_access_check(s)) {
2999         return true;
3000     }
3001
3002     /* LSE2 does not merge FP pairs; leave these as separate operations. */
3003     mop = finalize_memop_asimd(s, a->sz);
3004     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop);
3005     do_fp_ld(s, a->rt, clean_addr, mop);
3006     tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz);
3007     do_fp_ld(s, a->rt2, clean_addr, mop);
3008     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3009     return true;
3010 }
3011
3012 static bool trans_STGP(DisasContext *s, arg_ldstpair *a)
3013 {
3014     TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3015     uint64_t offset = a->imm << LOG2_TAG_GRANULE;
3016     MemOp mop;
3017     TCGv_i128 tmp;
3018
3019     /* STGP only comes in one size. */
3020     tcg_debug_assert(a->sz == MO_64);
3021
3022     if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
3023         return false;
3024     }
3025
3026     if (a->rn == 31) {
3027         gen_check_sp_alignment(s);
3028     }
3029
3030     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3031     if (!a->p) {
3032         tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3033     }
3034
3035     clean_addr = clean_data_tbi(s, dirty_addr);
3036     tcg_rt = cpu_reg(s, a->rt);
3037     tcg_rt2 = cpu_reg(s, a->rt2);
3038
3039     /*
3040      * STGP is defined as two 8-byte memory operations, aligned to TAG_GRANULE,
3041      * and one tag operation.  We implement it as one single aligned 16-byte
3042      * memory operation for convenience.  Note that the alignment ensures
3043      * MO_ATOM_IFALIGN_PAIR produces 8-byte atomicity for the memory store.
3044      */
3045     mop = finalize_memop_atom(s, MO_128 | MO_ALIGN, MO_ATOM_IFALIGN_PAIR);
3046
3047     tmp = tcg_temp_new_i128();
3048     if (s->be_data == MO_LE) {
3049         tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
3050     } else {
3051         tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
3052     }
3053     tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
3054
3055     /* Perform the tag store, if tag access enabled. */
3056     if (s->ata) {
3057         if (tb_cflags(s->base.tb) & CF_PARALLEL) {
3058             gen_helper_stg_parallel(cpu_env, dirty_addr, dirty_addr);
3059         } else {
3060             gen_helper_stg(cpu_env, dirty_addr, dirty_addr);
3061         }
3062     }
3063
3064     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3065     return true;
3066 }
3067
3068 static void op_addr_ldst_imm_pre(DisasContext *s, arg_ldst_imm *a,
3069                                  TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3070                                  uint64_t offset, bool is_store, MemOp mop)
3071 {
3072     int memidx;
3073
3074     if (a->rn == 31) {
3075         gen_check_sp_alignment(s);
3076     }
3077
3078     *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3079     if (!a->p) {
3080         tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset);
3081     }
3082     memidx = a->unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
3083     *clean_addr = gen_mte_check1_mmuidx(s, *dirty_addr, is_store,
3084                                         a->w || a->rn != 31,
3085                                         mop, a->unpriv, memidx);
3086 }
3087
3088 static void op_addr_ldst_imm_post(DisasContext *s, arg_ldst_imm *a,
3089                                   TCGv_i64 dirty_addr, uint64_t offset)
3090 {
3091     if (a->w) {
3092         if (a->p) {
3093             tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3094         }
3095         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3096     }
3097 }
3098
3099 static bool trans_STR_i(DisasContext *s, arg_ldst_imm *a)
3100 {
3101     bool iss_sf, iss_valid = !a->w;
3102     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3103     int memidx = a->unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
3104     MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3105
3106     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop);
3107
3108     tcg_rt = cpu_reg(s, a->rt);
3109     iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3110
3111     do_gpr_st_memidx(s, tcg_rt, clean_addr, mop, memidx,
3112                      iss_valid, a->rt, iss_sf, false);
3113     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3114     return true;
3115 }
3116
3117 static bool trans_LDR_i(DisasContext *s, arg_ldst_imm *a)
3118 {
3119     bool iss_sf, iss_valid = !a->w;
3120     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3121     int memidx = a->unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
3122     MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3123
3124     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop);
3125
3126     tcg_rt = cpu_reg(s, a->rt);
3127     iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3128
3129     do_gpr_ld_memidx(s, tcg_rt, clean_addr, mop,
3130                      a->ext, memidx, iss_valid, a->rt, iss_sf, false);
3131     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3132     return true;
3133 }
3134
3135 static bool trans_STR_v_i(DisasContext *s, arg_ldst_imm *a)
3136 {
3137     TCGv_i64 clean_addr, dirty_addr;
3138     MemOp mop;
3139
3140     if (!fp_access_check(s)) {
3141         return true;
3142     }
3143     mop = finalize_memop_asimd(s, a->sz);
3144     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop);
3145     do_fp_st(s, a->rt, clean_addr, mop);
3146     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3147     return true;
3148 }
3149
3150 static bool trans_LDR_v_i(DisasContext *s, arg_ldst_imm *a)
3151 {
3152     TCGv_i64 clean_addr, dirty_addr;
3153     MemOp mop;
3154
3155     if (!fp_access_check(s)) {
3156         return true;
3157     }
3158     mop = finalize_memop_asimd(s, a->sz);
3159     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop);
3160     do_fp_ld(s, a->rt, clean_addr, mop);
3161     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3162     return true;
3163 }
3164
3165 static void op_addr_ldst_pre(DisasContext *s, arg_ldst *a,
3166                              TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3167                              bool is_store, MemOp memop)
3168 {
3169     TCGv_i64 tcg_rm;
3170
3171     if (a->rn == 31) {
3172         gen_check_sp_alignment(s);
3173     }
3174     *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3175
3176     tcg_rm = read_cpu_reg(s, a->rm, 1);
3177     ext_and_shift_reg(tcg_rm, tcg_rm, a->opt, a->s ? a->sz : 0);
3178
3179     tcg_gen_add_i64(*dirty_addr, *dirty_addr, tcg_rm);
3180     *clean_addr = gen_mte_check1(s, *dirty_addr, is_store, true, memop);
3181 }
3182
3183 static bool trans_LDR(DisasContext *s, arg_ldst *a)
3184 {
3185     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3186     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3187     MemOp memop;
3188
3189     if (extract32(a->opt, 1, 1) == 0) {
3190         return false;
3191     }
3192
3193     memop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3194     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop);
3195     tcg_rt = cpu_reg(s, a->rt);
3196     do_gpr_ld(s, tcg_rt, clean_addr, memop,
3197               a->ext, true, a->rt, iss_sf, false);
3198     return true;
3199 }
3200
3201 static bool trans_STR(DisasContext *s, arg_ldst *a)
3202 {
3203     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3204     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3205     MemOp memop;
3206
3207     if (extract32(a->opt, 1, 1) == 0) {
3208         return false;
3209     }
3210
3211     memop = finalize_memop(s, a->sz);
3212     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop);
3213     tcg_rt = cpu_reg(s, a->rt);
3214     do_gpr_st(s, tcg_rt, clean_addr, memop, true, a->rt, iss_sf, false);
3215     return true;
3216 }
3217
3218 static bool trans_LDR_v(DisasContext *s, arg_ldst *a)
3219 {
3220     TCGv_i64 clean_addr, dirty_addr;
3221     MemOp memop;
3222
3223     if (extract32(a->opt, 1, 1) == 0) {
3224         return false;
3225     }
3226
3227     if (!fp_access_check(s)) {
3228         return true;
3229     }
3230
3231     memop = finalize_memop_asimd(s, a->sz);
3232     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop);
3233     do_fp_ld(s, a->rt, clean_addr, memop);
3234     return true;
3235 }
3236
3237 static bool trans_STR_v(DisasContext *s, arg_ldst *a)
3238 {
3239     TCGv_i64 clean_addr, dirty_addr;
3240     MemOp memop;
3241
3242     if (extract32(a->opt, 1, 1) == 0) {
3243         return false;
3244     }
3245
3246     if (!fp_access_check(s)) {
3247         return true;
3248     }
3249
3250     memop = finalize_memop_asimd(s, a->sz);
3251     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop);
3252     do_fp_st(s, a->rt, clean_addr, memop);
3253     return true;
3254 }
3255
3256
3257 static bool do_atomic_ld(DisasContext *s, arg_atomic *a, AtomicThreeOpFn *fn,
3258                          int sign, bool invert)
3259 {
3260     MemOp mop = a->sz | sign;
3261     TCGv_i64 clean_addr, tcg_rs, tcg_rt;
3262
3263     if (a->rn == 31) {
3264         gen_check_sp_alignment(s);
3265     }
3266     mop = check_atomic_align(s, a->rn, mop);
3267     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
3268                                 a->rn != 31, mop);
3269     tcg_rs = read_cpu_reg(s, a->rs, true);
3270     tcg_rt = cpu_reg(s, a->rt);
3271     if (invert) {
3272         tcg_gen_not_i64(tcg_rs, tcg_rs);
3273     }
3274     /*
3275      * The tcg atomic primitives are all full barriers.  Therefore we
3276      * can ignore the Acquire and Release bits of this instruction.
3277      */
3278     fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop);
3279
3280     if (mop & MO_SIGN) {
3281         switch (a->sz) {
3282         case MO_8:
3283             tcg_gen_ext8u_i64(tcg_rt, tcg_rt);
3284             break;
3285         case MO_16:
3286             tcg_gen_ext16u_i64(tcg_rt, tcg_rt);
3287             break;
3288         case MO_32:
3289             tcg_gen_ext32u_i64(tcg_rt, tcg_rt);
3290             break;
3291         case MO_64:
3292             break;
3293         default:
3294             g_assert_not_reached();
3295         }
3296     }
3297     return true;
3298 }
3299
3300 TRANS_FEAT(LDADD, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_add_i64, 0, false)
3301 TRANS_FEAT(LDCLR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_and_i64, 0, true)
3302 TRANS_FEAT(LDEOR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_xor_i64, 0, false)
3303 TRANS_FEAT(LDSET, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_or_i64, 0, false)
3304 TRANS_FEAT(LDSMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smax_i64, MO_SIGN, false)
3305 TRANS_FEAT(LDSMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smin_i64, MO_SIGN, false)
3306 TRANS_FEAT(LDUMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0, false)
3307 TRANS_FEAT(LDUMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false)
3308 TRANS_FEAT(SWP, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false)
3309
3310 static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a)
3311 {
3312     bool iss_sf = ldst_iss_sf(a->sz, false, false);
3313     TCGv_i64 clean_addr;
3314     MemOp mop;
3315
3316     if (!dc_isar_feature(aa64_atomics, s) ||
3317         !dc_isar_feature(aa64_rcpc_8_3, s)) {
3318         return false;
3319     }
3320     if (a->rn == 31) {
3321         gen_check_sp_alignment(s);
3322     }
3323     mop = check_atomic_align(s, a->rn, a->sz);
3324     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
3325                                 a->rn != 31, mop);
3326     /*
3327      * LDAPR* are a special case because they are a simple load, not a
3328      * fetch-and-do-something op.
3329      * The architectural consistency requirements here are weaker than
3330      * full load-acquire (we only need "load-acquire processor consistent"),
3331      * but we choose to implement them as full LDAQ.
3332      */
3333     do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, false,
3334               true, a->rt, iss_sf, true);
3335     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3336     return true;
3337 }
3338
3339 static bool trans_LDRA(DisasContext *s, arg_LDRA *a)
3340 {
3341     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3342     MemOp memop;
3343
3344     /* Load with pointer authentication */
3345     if (!dc_isar_feature(aa64_pauth, s)) {
3346         return false;
3347     }
3348
3349     if (a->rn == 31) {
3350         gen_check_sp_alignment(s);
3351     }
3352     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3353
3354     if (s->pauth_active) {
3355         if (!a->m) {
3356             gen_helper_autda_combined(dirty_addr, cpu_env, dirty_addr,
3357                                       tcg_constant_i64(0));
3358         } else {
3359             gen_helper_autdb_combined(dirty_addr, cpu_env, dirty_addr,
3360                                       tcg_constant_i64(0));
3361         }
3362     }
3363
3364     tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3365
3366     memop = finalize_memop(s, MO_64);
3367
3368     /* Note that "clean" and "dirty" here refer to TBI not PAC.  */
3369     clean_addr = gen_mte_check1(s, dirty_addr, false,
3370                                 a->w || a->rn != 31, memop);
3371
3372     tcg_rt = cpu_reg(s, a->rt);
3373     do_gpr_ld(s, tcg_rt, clean_addr, memop,
3374               /* extend */ false, /* iss_valid */ !a->w,
3375               /* iss_srt */ a->rt, /* iss_sf */ true, /* iss_ar */ false);
3376
3377     if (a->w) {
3378         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3379     }
3380     return true;
3381 }
3382
3383 static bool trans_LDAPR_i(DisasContext *s, arg_ldapr_stlr_i *a)
3384 {
3385     TCGv_i64 clean_addr, dirty_addr;
3386     MemOp mop = a->sz | (a->sign ? MO_SIGN : 0);
3387     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3388
3389     if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3390         return false;
3391     }
3392
3393     if (a->rn == 31) {
3394         gen_check_sp_alignment(s);
3395     }
3396
3397     mop = check_ordered_align(s, a->rn, a->imm, false, mop);
3398     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3399     tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3400     clean_addr = clean_data_tbi(s, dirty_addr);
3401
3402     /*
3403      * Load-AcquirePC semantics; we implement as the slightly more
3404      * restrictive Load-Acquire.
3405      */
3406     do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, a->ext, true,
3407               a->rt, iss_sf, true);
3408     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3409     return true;
3410 }
3411
3412 static bool trans_STLR_i(DisasContext *s, arg_ldapr_stlr_i *a)
3413 {
3414     TCGv_i64 clean_addr, dirty_addr;
3415     MemOp mop = a->sz;
3416     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3417
3418     if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3419         return false;
3420     }
3421
3422     /* TODO: ARMv8.4-LSE SCTLR.nAA */
3423
3424     if (a->rn == 31) {
3425         gen_check_sp_alignment(s);
3426     }
3427
3428     mop = check_ordered_align(s, a->rn, a->imm, true, mop);
3429     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3430     tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3431     clean_addr = clean_data_tbi(s, dirty_addr);
3432
3433     /* Store-Release semantics */
3434     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3435     do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, mop, true, a->rt, iss_sf, true);
3436     return true;
3437 }
3438
3439 static bool trans_LD_mult(DisasContext *s, arg_ldst_mult *a)
3440 {
3441     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3442     MemOp endian, align, mop;
3443
3444     int total;    /* total bytes */
3445     int elements; /* elements per vector */
3446     int r;
3447     int size = a->sz;
3448
3449     if (!a->p && a->rm != 0) {
3450         /* For non-postindexed accesses the Rm field must be 0 */
3451         return false;
3452     }
3453     if (size == 3 && !a->q && a->selem != 1) {
3454         return false;
3455     }
3456     if (!fp_access_check(s)) {
3457         return true;
3458     }
3459
3460     if (a->rn == 31) {
3461         gen_check_sp_alignment(s);
3462     }
3463
3464     /* For our purposes, bytes are always little-endian.  */
3465     endian = s->be_data;
3466     if (size == 0) {
3467         endian = MO_LE;
3468     }
3469
3470     total = a->rpt * a->selem * (a->q ? 16 : 8);
3471     tcg_rn = cpu_reg_sp(s, a->rn);
3472
3473     /*
3474      * Issue the MTE check vs the logical repeat count, before we
3475      * promote consecutive little-endian elements below.
3476      */
3477     clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, total,
3478                                 finalize_memop_asimd(s, size));
3479
3480     /*
3481      * Consecutive little-endian elements from a single register
3482      * can be promoted to a larger little-endian operation.
3483      */
3484     align = MO_ALIGN;
3485     if (a->selem == 1 && endian == MO_LE) {
3486         align = pow2_align(size);
3487         size = 3;
3488     }
3489     if (!s->align_mem) {
3490         align = 0;
3491     }
3492     mop = endian | size | align;
3493
3494     elements = (a->q ? 16 : 8) >> size;
3495     tcg_ebytes = tcg_constant_i64(1 << size);
3496     for (r = 0; r < a->rpt; r++) {
3497         int e;
3498         for (e = 0; e < elements; e++) {
3499             int xs;
3500             for (xs = 0; xs < a->selem; xs++) {
3501                 int tt = (a->rt + r + xs) % 32;
3502                 do_vec_ld(s, tt, e, clean_addr, mop);
3503                 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3504             }
3505         }
3506     }
3507
3508     /*
3509      * For non-quad operations, setting a slice of the low 64 bits of
3510      * the register clears the high 64 bits (in the ARM ARM pseudocode
3511      * this is implicit in the fact that 'rval' is a 64 bit wide
3512      * variable).  For quad operations, we might still need to zero
3513      * the high bits of SVE.
3514      */
3515     for (r = 0; r < a->rpt * a->selem; r++) {
3516         int tt = (a->rt + r) % 32;
3517         clear_vec_high(s, a->q, tt);
3518     }
3519
3520     if (a->p) {
3521         if (a->rm == 31) {
3522             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3523         } else {
3524             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3525         }
3526     }
3527     return true;
3528 }
3529
3530 static bool trans_ST_mult(DisasContext *s, arg_ldst_mult *a)
3531 {
3532     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3533     MemOp endian, align, mop;
3534
3535     int total;    /* total bytes */
3536     int elements; /* elements per vector */
3537     int r;
3538     int size = a->sz;
3539
3540     if (!a->p && a->rm != 0) {
3541         /* For non-postindexed accesses the Rm field must be 0 */
3542         return false;
3543     }
3544     if (size == 3 && !a->q && a->selem != 1) {
3545         return false;
3546     }
3547     if (!fp_access_check(s)) {
3548         return true;
3549     }
3550
3551     if (a->rn == 31) {
3552         gen_check_sp_alignment(s);
3553     }
3554
3555     /* For our purposes, bytes are always little-endian.  */
3556     endian = s->be_data;
3557     if (size == 0) {
3558         endian = MO_LE;
3559     }
3560
3561     total = a->rpt * a->selem * (a->q ? 16 : 8);
3562     tcg_rn = cpu_reg_sp(s, a->rn);
3563
3564     /*
3565      * Issue the MTE check vs the logical repeat count, before we
3566      * promote consecutive little-endian elements below.
3567      */
3568     clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, total,
3569                                 finalize_memop_asimd(s, size));
3570
3571     /*
3572      * Consecutive little-endian elements from a single register
3573      * can be promoted to a larger little-endian operation.
3574      */
3575     align = MO_ALIGN;
3576     if (a->selem == 1 && endian == MO_LE) {
3577         align = pow2_align(size);
3578         size = 3;
3579     }
3580     if (!s->align_mem) {
3581         align = 0;
3582     }
3583     mop = endian | size | align;
3584
3585     elements = (a->q ? 16 : 8) >> size;
3586     tcg_ebytes = tcg_constant_i64(1 << size);
3587     for (r = 0; r < a->rpt; r++) {
3588         int e;
3589         for (e = 0; e < elements; e++) {
3590             int xs;
3591             for (xs = 0; xs < a->selem; xs++) {
3592                 int tt = (a->rt + r + xs) % 32;
3593                 do_vec_st(s, tt, e, clean_addr, mop);
3594                 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3595             }
3596         }
3597     }
3598
3599     if (a->p) {
3600         if (a->rm == 31) {
3601             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3602         } else {
3603             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3604         }
3605     }
3606     return true;
3607 }
3608
3609 static bool trans_ST_single(DisasContext *s, arg_ldst_single *a)
3610 {
3611     int xs, total, rt;
3612     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3613     MemOp mop;
3614
3615     if (!a->p && a->rm != 0) {
3616         return false;
3617     }
3618     if (!fp_access_check(s)) {
3619         return true;
3620     }
3621
3622     if (a->rn == 31) {
3623         gen_check_sp_alignment(s);
3624     }
3625
3626     total = a->selem << a->scale;
3627     tcg_rn = cpu_reg_sp(s, a->rn);
3628
3629     mop = finalize_memop_asimd(s, a->scale);
3630     clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31,
3631                                 total, mop);
3632
3633     tcg_ebytes = tcg_constant_i64(1 << a->scale);
3634     for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
3635         do_vec_st(s, rt, a->index, clean_addr, mop);
3636         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3637     }
3638
3639     if (a->p) {
3640         if (a->rm == 31) {
3641             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3642         } else {
3643             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3644         }
3645     }
3646     return true;
3647 }
3648
3649 static bool trans_LD_single(DisasContext *s, arg_ldst_single *a)
3650 {
3651     int xs, total, rt;
3652     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3653     MemOp mop;
3654
3655     if (!a->p && a->rm != 0) {
3656         return false;
3657     }
3658     if (!fp_access_check(s)) {
3659         return true;
3660     }
3661
3662     if (a->rn == 31) {
3663         gen_check_sp_alignment(s);
3664     }
3665
3666     total = a->selem << a->scale;
3667     tcg_rn = cpu_reg_sp(s, a->rn);
3668
3669     mop = finalize_memop_asimd(s, a->scale);
3670     clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31,
3671                                 total, mop);
3672
3673     tcg_ebytes = tcg_constant_i64(1 << a->scale);
3674     for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
3675         do_vec_ld(s, rt, a->index, clean_addr, mop);
3676         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3677     }
3678
3679     if (a->p) {
3680         if (a->rm == 31) {
3681             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3682         } else {
3683             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3684         }
3685     }
3686     return true;
3687 }
3688
3689 static bool trans_LD_single_repl(DisasContext *s, arg_LD_single_repl *a)
3690 {
3691     int xs, total, rt;
3692     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3693     MemOp mop;
3694
3695     if (!a->p && a->rm != 0) {
3696         return false;
3697     }
3698     if (!fp_access_check(s)) {
3699         return true;
3700     }
3701
3702     if (a->rn == 31) {
3703         gen_check_sp_alignment(s);
3704     }
3705
3706     total = a->selem << a->scale;
3707     tcg_rn = cpu_reg_sp(s, a->rn);
3708
3709     mop = finalize_memop_asimd(s, a->scale);
3710     clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31,
3711                                 total, mop);
3712
3713     tcg_ebytes = tcg_constant_i64(1 << a->scale);
3714     for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
3715         /* Load and replicate to all elements */
3716         TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3717
3718         tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop);
3719         tcg_gen_gvec_dup_i64(a->scale, vec_full_reg_offset(s, rt),
3720                              (a->q + 1) * 8, vec_full_reg_size(s), tcg_tmp);
3721         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3722     }
3723
3724     if (a->p) {
3725         if (a->rm == 31) {
3726             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3727         } else {
3728             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3729         }
3730     }
3731     return true;
3732 }
3733
3734 static bool trans_STZGM(DisasContext *s, arg_ldst_tag *a)
3735 {
3736     TCGv_i64 addr, clean_addr, tcg_rt;
3737     int size = 4 << s->dcz_blocksize;
3738
3739     if (!dc_isar_feature(aa64_mte, s)) {
3740         return false;
3741     }
3742     if (s->current_el == 0) {
3743         return false;
3744     }
3745
3746     if (a->rn == 31) {
3747         gen_check_sp_alignment(s);
3748     }
3749
3750     addr = read_cpu_reg_sp(s, a->rn, true);
3751     tcg_gen_addi_i64(addr, addr, a->imm);
3752     tcg_rt = cpu_reg(s, a->rt);
3753
3754     if (s->ata) {
3755         gen_helper_stzgm_tags(cpu_env, addr, tcg_rt);
3756     }
3757     /*
3758      * The non-tags portion of STZGM is mostly like DC_ZVA,
3759      * except the alignment happens before the access.
3760      */
3761     clean_addr = clean_data_tbi(s, addr);
3762     tcg_gen_andi_i64(clean_addr, clean_addr, -size);
3763     gen_helper_dc_zva(cpu_env, clean_addr);
3764     return true;
3765 }
3766
3767 static bool trans_STGM(DisasContext *s, arg_ldst_tag *a)
3768 {
3769     TCGv_i64 addr, clean_addr, tcg_rt;
3770
3771     if (!dc_isar_feature(aa64_mte, s)) {
3772         return false;
3773     }
3774     if (s->current_el == 0) {
3775         return false;
3776     }
3777
3778     if (a->rn == 31) {
3779         gen_check_sp_alignment(s);
3780     }
3781
3782     addr = read_cpu_reg_sp(s, a->rn, true);
3783     tcg_gen_addi_i64(addr, addr, a->imm);
3784     tcg_rt = cpu_reg(s, a->rt);
3785
3786     if (s->ata) {
3787         gen_helper_stgm(cpu_env, addr, tcg_rt);
3788     } else {
3789         MMUAccessType acc = MMU_DATA_STORE;
3790         int size = 4 << s->gm_blocksize;
3791
3792         clean_addr = clean_data_tbi(s, addr);
3793         tcg_gen_andi_i64(clean_addr, clean_addr, -size);
3794         gen_probe_access(s, clean_addr, acc, size);
3795     }
3796     return true;
3797 }
3798
3799 static bool trans_LDGM(DisasContext *s, arg_ldst_tag *a)
3800 {
3801     TCGv_i64 addr, clean_addr, tcg_rt;
3802
3803     if (!dc_isar_feature(aa64_mte, s)) {
3804         return false;
3805     }
3806     if (s->current_el == 0) {
3807         return false;
3808     }
3809
3810     if (a->rn == 31) {
3811         gen_check_sp_alignment(s);
3812     }
3813
3814     addr = read_cpu_reg_sp(s, a->rn, true);
3815     tcg_gen_addi_i64(addr, addr, a->imm);
3816     tcg_rt = cpu_reg(s, a->rt);
3817
3818     if (s->ata) {
3819         gen_helper_ldgm(tcg_rt, cpu_env, addr);
3820     } else {
3821         MMUAccessType acc = MMU_DATA_LOAD;
3822         int size = 4 << s->gm_blocksize;
3823
3824         clean_addr = clean_data_tbi(s, addr);
3825         tcg_gen_andi_i64(clean_addr, clean_addr, -size);
3826         gen_probe_access(s, clean_addr, acc, size);
3827         /* The result tags are zeros.  */
3828         tcg_gen_movi_i64(tcg_rt, 0);
3829     }
3830     return true;
3831 }
3832
3833 static bool trans_LDG(DisasContext *s, arg_ldst_tag *a)
3834 {
3835     TCGv_i64 addr, clean_addr, tcg_rt;
3836
3837     if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
3838         return false;
3839     }
3840
3841     if (a->rn == 31) {
3842         gen_check_sp_alignment(s);
3843     }
3844
3845     addr = read_cpu_reg_sp(s, a->rn, true);
3846     if (!a->p) {
3847         /* pre-index or signed offset */
3848         tcg_gen_addi_i64(addr, addr, a->imm);
3849     }
3850
3851     tcg_gen_andi_i64(addr, addr, -TAG_GRANULE);
3852     tcg_rt = cpu_reg(s, a->rt);
3853     if (s->ata) {
3854         gen_helper_ldg(tcg_rt, cpu_env, addr, tcg_rt);
3855     } else {
3856         /*
3857          * Tag access disabled: we must check for aborts on the load
3858          * load from [rn+offset], and then insert a 0 tag into rt.
3859          */
3860         clean_addr = clean_data_tbi(s, addr);
3861         gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8);
3862         gen_address_with_allocation_tag0(tcg_rt, tcg_rt);
3863     }
3864
3865     if (a->w) {
3866         /* pre-index or post-index */
3867         if (a->p) {
3868             /* post-index */
3869             tcg_gen_addi_i64(addr, addr, a->imm);
3870         }
3871         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr);
3872     }
3873     return true;
3874 }
3875
3876 static bool do_STG(DisasContext *s, arg_ldst_tag *a, bool is_zero, bool is_pair)
3877 {
3878     TCGv_i64 addr, tcg_rt;
3879
3880     if (a->rn == 31) {
3881         gen_check_sp_alignment(s);
3882     }
3883
3884     addr = read_cpu_reg_sp(s, a->rn, true);
3885     if (!a->p) {
3886         /* pre-index or signed offset */
3887         tcg_gen_addi_i64(addr, addr, a->imm);
3888     }
3889     tcg_rt = cpu_reg_sp(s, a->rt);
3890     if (!s->ata) {
3891         /*
3892          * For STG and ST2G, we need to check alignment and probe memory.
3893          * TODO: For STZG and STZ2G, we could rely on the stores below,
3894          * at least for system mode; user-only won't enforce alignment.
3895          */
3896         if (is_pair) {
3897             gen_helper_st2g_stub(cpu_env, addr);
3898         } else {
3899             gen_helper_stg_stub(cpu_env, addr);
3900         }
3901     } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
3902         if (is_pair) {
3903             gen_helper_st2g_parallel(cpu_env, addr, tcg_rt);
3904         } else {
3905             gen_helper_stg_parallel(cpu_env, addr, tcg_rt);
3906         }
3907     } else {
3908         if (is_pair) {
3909             gen_helper_st2g(cpu_env, addr, tcg_rt);
3910         } else {
3911             gen_helper_stg(cpu_env, addr, tcg_rt);
3912         }
3913     }
3914
3915     if (is_zero) {
3916         TCGv_i64 clean_addr = clean_data_tbi(s, addr);
3917         TCGv_i64 zero64 = tcg_constant_i64(0);
3918         TCGv_i128 zero128 = tcg_temp_new_i128();
3919         int mem_index = get_mem_index(s);
3920         MemOp mop = finalize_memop(s, MO_128 | MO_ALIGN);
3921
3922         tcg_gen_concat_i64_i128(zero128, zero64, zero64);
3923
3924         /* This is 1 or 2 atomic 16-byte operations. */
3925         tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
3926         if (is_pair) {
3927             tcg_gen_addi_i64(clean_addr, clean_addr, 16);
3928             tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
3929         }
3930     }
3931
3932     if (a->w) {
3933         /* pre-index or post-index */
3934         if (a->p) {
3935             /* post-index */
3936             tcg_gen_addi_i64(addr, addr, a->imm);
3937         }
3938         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr);
3939     }
3940     return true;
3941 }
3942
3943 TRANS_FEAT(STG, aa64_mte_insn_reg, do_STG, a, false, false)
3944 TRANS_FEAT(STZG, aa64_mte_insn_reg, do_STG, a, true, false)
3945 TRANS_FEAT(ST2G, aa64_mte_insn_reg, do_STG, a, false, true)
3946 TRANS_FEAT(STZ2G, aa64_mte_insn_reg, do_STG, a, true, true)
3947
3948 typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64);
3949
3950 static bool gen_rri(DisasContext *s, arg_rri_sf *a,
3951                     bool rd_sp, bool rn_sp, ArithTwoOp *fn)
3952 {
3953     TCGv_i64 tcg_rn = rn_sp ? cpu_reg_sp(s, a->rn) : cpu_reg(s, a->rn);
3954     TCGv_i64 tcg_rd = rd_sp ? cpu_reg_sp(s, a->rd) : cpu_reg(s, a->rd);
3955     TCGv_i64 tcg_imm = tcg_constant_i64(a->imm);
3956
3957     fn(tcg_rd, tcg_rn, tcg_imm);
3958     if (!a->sf) {
3959         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3960     }
3961     return true;
3962 }
3963
3964 /*
3965  * PC-rel. addressing
3966  */
3967
3968 static bool trans_ADR(DisasContext *s, arg_ri *a)
3969 {
3970     gen_pc_plus_diff(s, cpu_reg(s, a->rd), a->imm);
3971     return true;
3972 }
3973
3974 static bool trans_ADRP(DisasContext *s, arg_ri *a)
3975 {
3976     int64_t offset = (int64_t)a->imm << 12;
3977
3978     /* The page offset is ok for CF_PCREL. */
3979     offset -= s->pc_curr & 0xfff;
3980     gen_pc_plus_diff(s, cpu_reg(s, a->rd), offset);
3981     return true;
3982 }
3983
3984 /*
3985  * Add/subtract (immediate)
3986  */
3987 TRANS(ADD_i, gen_rri, a, 1, 1, tcg_gen_add_i64)
3988 TRANS(SUB_i, gen_rri, a, 1, 1, tcg_gen_sub_i64)
3989 TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC)
3990 TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC)
3991
3992 /*
3993  * Add/subtract (immediate, with tags)
3994  */
3995
3996 static bool gen_add_sub_imm_with_tags(DisasContext *s, arg_rri_tag *a,
3997                                       bool sub_op)
3998 {
3999     TCGv_i64 tcg_rn, tcg_rd;
4000     int imm;
4001
4002     imm = a->uimm6 << LOG2_TAG_GRANULE;
4003     if (sub_op) {
4004         imm = -imm;
4005     }
4006
4007     tcg_rn = cpu_reg_sp(s, a->rn);
4008     tcg_rd = cpu_reg_sp(s, a->rd);
4009
4010     if (s->ata) {
4011         gen_helper_addsubg(tcg_rd, cpu_env, tcg_rn,
4012                            tcg_constant_i32(imm),
4013                            tcg_constant_i32(a->uimm4));
4014     } else {
4015         tcg_gen_addi_i64(tcg_rd, tcg_rn, imm);
4016         gen_address_with_allocation_tag0(tcg_rd, tcg_rd);
4017     }
4018     return true;
4019 }
4020
4021 TRANS_FEAT(ADDG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, false)
4022 TRANS_FEAT(SUBG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, true)
4023
4024 /* The input should be a value in the bottom e bits (with higher
4025  * bits zero); returns that value replicated into every element
4026  * of size e in a 64 bit integer.
4027  */
4028 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
4029 {
4030     assert(e != 0);
4031     while (e < 64) {
4032         mask |= mask << e;
4033         e *= 2;
4034     }
4035     return mask;
4036 }
4037
4038 /*
4039  * Logical (immediate)
4040  */
4041
4042 /*
4043  * Simplified variant of pseudocode DecodeBitMasks() for the case where we
4044  * only require the wmask. Returns false if the imms/immr/immn are a reserved
4045  * value (ie should cause a guest UNDEF exception), and true if they are
4046  * valid, in which case the decoded bit pattern is written to result.
4047  */
4048 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
4049                             unsigned int imms, unsigned int immr)
4050 {
4051     uint64_t mask;
4052     unsigned e, levels, s, r;
4053     int len;
4054
4055     assert(immn < 2 && imms < 64 && immr < 64);
4056
4057     /* The bit patterns we create here are 64 bit patterns which
4058      * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
4059      * 64 bits each. Each element contains the same value: a run
4060      * of between 1 and e-1 non-zero bits, rotated within the
4061      * element by between 0 and e-1 bits.
4062      *
4063      * The element size and run length are encoded into immn (1 bit)
4064      * and imms (6 bits) as follows:
4065      * 64 bit elements: immn = 1, imms = <length of run - 1>
4066      * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
4067      * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
4068      *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
4069      *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
4070      *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
4071      * Notice that immn = 0, imms = 11111x is the only combination
4072      * not covered by one of the above options; this is reserved.
4073      * Further, <length of run - 1> all-ones is a reserved pattern.
4074      *
4075      * In all cases the rotation is by immr % e (and immr is 6 bits).
4076      */
4077
4078     /* First determine the element size */
4079     len = 31 - clz32((immn << 6) | (~imms & 0x3f));
4080     if (len < 1) {
4081         /* This is the immn == 0, imms == 0x11111x case */
4082         return false;
4083     }
4084     e = 1 << len;
4085
4086     levels = e - 1;
4087     s = imms & levels;
4088     r = immr & levels;
4089
4090     if (s == levels) {
4091         /* <length of run - 1> mustn't be all-ones. */
4092         return false;
4093     }
4094
4095     /* Create the value of one element: s+1 set bits rotated
4096      * by r within the element (which is e bits wide)...
4097      */
4098     mask = MAKE_64BIT_MASK(0, s + 1);
4099     if (r) {
4100         mask = (mask >> r) | (mask << (e - r));
4101         mask &= MAKE_64BIT_MASK(0, e);
4102     }
4103     /* ...then replicate the element over the whole 64 bit value */
4104     mask = bitfield_replicate(mask, e);
4105     *result = mask;
4106     return true;
4107 }
4108
4109 static bool gen_rri_log(DisasContext *s, arg_rri_log *a, bool set_cc,
4110                         void (*fn)(TCGv_i64, TCGv_i64, int64_t))
4111 {
4112     TCGv_i64 tcg_rd, tcg_rn;
4113     uint64_t imm;
4114
4115     /* Some immediate field values are reserved. */
4116     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
4117                                 extract32(a->dbm, 0, 6),
4118                                 extract32(a->dbm, 6, 6))) {
4119         return false;
4120     }
4121     if (!a->sf) {
4122         imm &= 0xffffffffull;
4123     }
4124
4125     tcg_rd = set_cc ? cpu_reg(s, a->rd) : cpu_reg_sp(s, a->rd);
4126     tcg_rn = cpu_reg(s, a->rn);
4127
4128     fn(tcg_rd, tcg_rn, imm);
4129     if (set_cc) {
4130         gen_logic_CC(a->sf, tcg_rd);
4131     }
4132     if (!a->sf) {
4133         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4134     }
4135     return true;
4136 }
4137
4138 TRANS(AND_i, gen_rri_log, a, false, tcg_gen_andi_i64)
4139 TRANS(ORR_i, gen_rri_log, a, false, tcg_gen_ori_i64)
4140 TRANS(EOR_i, gen_rri_log, a, false, tcg_gen_xori_i64)
4141 TRANS(ANDS_i, gen_rri_log, a, true, tcg_gen_andi_i64)
4142
4143 /*
4144  * Move wide (immediate)
4145  */
4146
4147 static bool trans_MOVZ(DisasContext *s, arg_movw *a)
4148 {
4149     int pos = a->hw << 4;
4150     tcg_gen_movi_i64(cpu_reg(s, a->rd), (uint64_t)a->imm << pos);
4151     return true;
4152 }
4153
4154 static bool trans_MOVN(DisasContext *s, arg_movw *a)
4155 {
4156     int pos = a->hw << 4;
4157     uint64_t imm = a->imm;
4158
4159     imm = ~(imm << pos);
4160     if (!a->sf) {
4161         imm = (uint32_t)imm;
4162     }
4163     tcg_gen_movi_i64(cpu_reg(s, a->rd), imm);
4164     return true;
4165 }
4166
4167 static bool trans_MOVK(DisasContext *s, arg_movw *a)
4168 {
4169     int pos = a->hw << 4;
4170     TCGv_i64 tcg_rd, tcg_im;
4171
4172     tcg_rd = cpu_reg(s, a->rd);
4173     tcg_im = tcg_constant_i64(a->imm);
4174     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_im, pos, 16);
4175     if (!a->sf) {
4176         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4177     }
4178     return true;
4179 }
4180
4181 /*
4182  * Bitfield
4183  */
4184
4185 static bool trans_SBFM(DisasContext *s, arg_SBFM *a)
4186 {
4187     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4188     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4189     unsigned int bitsize = a->sf ? 64 : 32;
4190     unsigned int ri = a->immr;
4191     unsigned int si = a->imms;
4192     unsigned int pos, len;
4193
4194     if (si >= ri) {
4195         /* Wd<s-r:0> = Wn<s:r> */
4196         len = (si - ri) + 1;
4197         tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
4198         if (!a->sf) {
4199             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4200         }
4201     } else {
4202         /* Wd<32+s-r,32-r> = Wn<s:0> */
4203         len = si + 1;
4204         pos = (bitsize - ri) & (bitsize - 1);
4205
4206         if (len < ri) {
4207             /*
4208              * Sign extend the destination field from len to fill the
4209              * balance of the word.  Let the deposit below insert all
4210              * of those sign bits.
4211              */
4212             tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
4213             len = ri;
4214         }
4215
4216         /*
4217          * We start with zero, and we haven't modified any bits outside
4218          * bitsize, therefore no final zero-extension is unneeded for !sf.
4219          */
4220         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4221     }
4222     return true;
4223 }
4224
4225 static bool trans_UBFM(DisasContext *s, arg_UBFM *a)
4226 {
4227     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4228     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4229     unsigned int bitsize = a->sf ? 64 : 32;
4230     unsigned int ri = a->immr;
4231     unsigned int si = a->imms;
4232     unsigned int pos, len;
4233
4234     tcg_rd = cpu_reg(s, a->rd);
4235     tcg_tmp = read_cpu_reg(s, a->rn, 1);
4236
4237     if (si >= ri) {
4238         /* Wd<s-r:0> = Wn<s:r> */
4239         len = (si - ri) + 1;
4240         tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
4241     } else {
4242         /* Wd<32+s-r,32-r> = Wn<s:0> */
4243         len = si + 1;
4244         pos = (bitsize - ri) & (bitsize - 1);
4245         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4246     }
4247     return true;
4248 }
4249
4250 static bool trans_BFM(DisasContext *s, arg_BFM *a)
4251 {
4252     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4253     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4254     unsigned int bitsize = a->sf ? 64 : 32;
4255     unsigned int ri = a->immr;
4256     unsigned int si = a->imms;
4257     unsigned int pos, len;
4258
4259     tcg_rd = cpu_reg(s, a->rd);
4260     tcg_tmp = read_cpu_reg(s, a->rn, 1);
4261
4262     if (si >= ri) {
4263         /* Wd<s-r:0> = Wn<s:r> */
4264         tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
4265         len = (si - ri) + 1;
4266         pos = 0;
4267     } else {
4268         /* Wd<32+s-r,32-r> = Wn<s:0> */
4269         len = si + 1;
4270         pos = (bitsize - ri) & (bitsize - 1);
4271     }
4272
4273     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
4274     if (!a->sf) {
4275         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4276     }
4277     return true;
4278 }
4279
4280 static bool trans_EXTR(DisasContext *s, arg_extract *a)
4281 {
4282     TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
4283
4284     tcg_rd = cpu_reg(s, a->rd);
4285
4286     if (unlikely(a->imm == 0)) {
4287         /*
4288          * tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
4289          * so an extract from bit 0 is a special case.
4290          */
4291         if (a->sf) {
4292             tcg_gen_mov_i64(tcg_rd, cpu_reg(s, a->rm));
4293         } else {
4294             tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, a->rm));
4295         }
4296     } else {
4297         tcg_rm = cpu_reg(s, a->rm);
4298         tcg_rn = cpu_reg(s, a->rn);
4299
4300         if (a->sf) {
4301             /* Specialization to ROR happens in EXTRACT2.  */
4302             tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, a->imm);
4303         } else {
4304             TCGv_i32 t0 = tcg_temp_new_i32();
4305
4306             tcg_gen_extrl_i64_i32(t0, tcg_rm);
4307             if (a->rm == a->rn) {
4308                 tcg_gen_rotri_i32(t0, t0, a->imm);
4309             } else {
4310                 TCGv_i32 t1 = tcg_temp_new_i32();
4311                 tcg_gen_extrl_i64_i32(t1, tcg_rn);
4312                 tcg_gen_extract2_i32(t0, t0, t1, a->imm);
4313             }
4314             tcg_gen_extu_i32_i64(tcg_rd, t0);
4315         }
4316     }
4317     return true;
4318 }
4319
4320 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
4321  * Note that it is the caller's responsibility to ensure that the
4322  * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
4323  * mandated semantics for out of range shifts.
4324  */
4325 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
4326                       enum a64_shift_type shift_type, TCGv_i64 shift_amount)
4327 {
4328     switch (shift_type) {
4329     case A64_SHIFT_TYPE_LSL:
4330         tcg_gen_shl_i64(dst, src, shift_amount);
4331         break;
4332     case A64_SHIFT_TYPE_LSR:
4333         tcg_gen_shr_i64(dst, src, shift_amount);
4334         break;
4335     case A64_SHIFT_TYPE_ASR:
4336         if (!sf) {
4337             tcg_gen_ext32s_i64(dst, src);
4338         }
4339         tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
4340         break;
4341     case A64_SHIFT_TYPE_ROR:
4342         if (sf) {
4343             tcg_gen_rotr_i64(dst, src, shift_amount);
4344         } else {
4345             TCGv_i32 t0, t1;
4346             t0 = tcg_temp_new_i32();
4347             t1 = tcg_temp_new_i32();
4348             tcg_gen_extrl_i64_i32(t0, src);
4349             tcg_gen_extrl_i64_i32(t1, shift_amount);
4350             tcg_gen_rotr_i32(t0, t0, t1);
4351             tcg_gen_extu_i32_i64(dst, t0);
4352         }
4353         break;
4354     default:
4355         assert(FALSE); /* all shift types should be handled */
4356         break;
4357     }
4358
4359     if (!sf) { /* zero extend final result */
4360         tcg_gen_ext32u_i64(dst, dst);
4361     }
4362 }
4363
4364 /* Shift a TCGv src by immediate, put result in dst.
4365  * The shift amount must be in range (this should always be true as the
4366  * relevant instructions will UNDEF on bad shift immediates).
4367  */
4368 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
4369                           enum a64_shift_type shift_type, unsigned int shift_i)
4370 {
4371     assert(shift_i < (sf ? 64 : 32));
4372
4373     if (shift_i == 0) {
4374         tcg_gen_mov_i64(dst, src);
4375     } else {
4376         shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i));
4377     }
4378 }
4379
4380 /* Logical (shifted register)
4381  *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
4382  * +----+-----+-----------+-------+---+------+--------+------+------+
4383  * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
4384  * +----+-----+-----------+-------+---+------+--------+------+------+
4385  */
4386 static void disas_logic_reg(DisasContext *s, uint32_t insn)
4387 {
4388     TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
4389     unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
4390
4391     sf = extract32(insn, 31, 1);
4392     opc = extract32(insn, 29, 2);
4393     shift_type = extract32(insn, 22, 2);
4394     invert = extract32(insn, 21, 1);
4395     rm = extract32(insn, 16, 5);
4396     shift_amount = extract32(insn, 10, 6);
4397     rn = extract32(insn, 5, 5);
4398     rd = extract32(insn, 0, 5);
4399
4400     if (!sf && (shift_amount & (1 << 5))) {
4401         unallocated_encoding(s);
4402         return;
4403     }
4404
4405     tcg_rd = cpu_reg(s, rd);
4406
4407     if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
4408         /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
4409          * register-register MOV and MVN, so it is worth special casing.
4410          */
4411         tcg_rm = cpu_reg(s, rm);
4412         if (invert) {
4413             tcg_gen_not_i64(tcg_rd, tcg_rm);
4414             if (!sf) {
4415                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4416             }
4417         } else {
4418             if (sf) {
4419                 tcg_gen_mov_i64(tcg_rd, tcg_rm);
4420             } else {
4421                 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
4422             }
4423         }
4424         return;
4425     }
4426
4427     tcg_rm = read_cpu_reg(s, rm, sf);
4428
4429     if (shift_amount) {
4430         shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
4431     }
4432
4433     tcg_rn = cpu_reg(s, rn);
4434
4435     switch (opc | (invert << 2)) {
4436     case 0: /* AND */
4437     case 3: /* ANDS */
4438         tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
4439         break;
4440     case 1: /* ORR */
4441         tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
4442         break;
4443     case 2: /* EOR */
4444         tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
4445         break;
4446     case 4: /* BIC */
4447     case 7: /* BICS */
4448         tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
4449         break;
4450     case 5: /* ORN */
4451         tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
4452         break;
4453     case 6: /* EON */
4454         tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
4455         break;
4456     default:
4457         assert(FALSE);
4458         break;
4459     }
4460
4461     if (!sf) {
4462         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4463     }
4464
4465     if (opc == 3) {
4466         gen_logic_CC(sf, tcg_rd);
4467     }
4468 }
4469
4470 /*
4471  * Add/subtract (extended register)
4472  *
4473  *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
4474  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
4475  * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
4476  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
4477  *
4478  *  sf: 0 -> 32bit, 1 -> 64bit
4479  *  op: 0 -> add  , 1 -> sub
4480  *   S: 1 -> set flags
4481  * opt: 00
4482  * option: extension type (see DecodeRegExtend)
4483  * imm3: optional shift to Rm
4484  *
4485  * Rd = Rn + LSL(extend(Rm), amount)
4486  */
4487 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
4488 {
4489     int rd = extract32(insn, 0, 5);
4490     int rn = extract32(insn, 5, 5);
4491     int imm3 = extract32(insn, 10, 3);
4492     int option = extract32(insn, 13, 3);
4493     int rm = extract32(insn, 16, 5);
4494     int opt = extract32(insn, 22, 2);
4495     bool setflags = extract32(insn, 29, 1);
4496     bool sub_op = extract32(insn, 30, 1);
4497     bool sf = extract32(insn, 31, 1);
4498
4499     TCGv_i64 tcg_rm, tcg_rn; /* temps */
4500     TCGv_i64 tcg_rd;
4501     TCGv_i64 tcg_result;
4502
4503     if (imm3 > 4 || opt != 0) {
4504         unallocated_encoding(s);
4505         return;
4506     }
4507
4508     /* non-flag setting ops may use SP */
4509     if (!setflags) {
4510         tcg_rd = cpu_reg_sp(s, rd);
4511     } else {
4512         tcg_rd = cpu_reg(s, rd);
4513     }
4514     tcg_rn = read_cpu_reg_sp(s, rn, sf);
4515
4516     tcg_rm = read_cpu_reg(s, rm, sf);
4517     ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
4518
4519     tcg_result = tcg_temp_new_i64();
4520
4521     if (!setflags) {
4522         if (sub_op) {
4523             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
4524         } else {
4525             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
4526         }
4527     } else {
4528         if (sub_op) {
4529             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
4530         } else {
4531             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
4532         }
4533     }
4534
4535     if (sf) {
4536         tcg_gen_mov_i64(tcg_rd, tcg_result);
4537     } else {
4538         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
4539     }
4540 }
4541
4542 /*
4543  * Add/subtract (shifted register)
4544  *
4545  *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
4546  * +--+--+--+-----------+-----+--+-------+---------+------+------+
4547  * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
4548  * +--+--+--+-----------+-----+--+-------+---------+------+------+
4549  *
4550  *    sf: 0 -> 32bit, 1 -> 64bit
4551  *    op: 0 -> add  , 1 -> sub
4552  *     S: 1 -> set flags
4553  * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
4554  *  imm6: Shift amount to apply to Rm before the add/sub
4555  */
4556 static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
4557 {
4558     int rd = extract32(insn, 0, 5);
4559     int rn = extract32(insn, 5, 5);
4560     int imm6 = extract32(insn, 10, 6);
4561     int rm = extract32(insn, 16, 5);
4562     int shift_type = extract32(insn, 22, 2);
4563     bool setflags = extract32(insn, 29, 1);
4564     bool sub_op = extract32(insn, 30, 1);
4565     bool sf = extract32(insn, 31, 1);
4566
4567     TCGv_i64 tcg_rd = cpu_reg(s, rd);
4568     TCGv_i64 tcg_rn, tcg_rm;
4569     TCGv_i64 tcg_result;
4570
4571     if ((shift_type == 3) || (!sf && (imm6 > 31))) {
4572         unallocated_encoding(s);
4573         return;
4574     }
4575
4576     tcg_rn = read_cpu_reg(s, rn, sf);
4577     tcg_rm = read_cpu_reg(s, rm, sf);
4578
4579     shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
4580
4581     tcg_result = tcg_temp_new_i64();
4582
4583     if (!setflags) {
4584         if (sub_op) {
4585             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
4586         } else {
4587             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
4588         }
4589     } else {
4590         if (sub_op) {
4591             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
4592         } else {
4593             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
4594         }
4595     }
4596
4597     if (sf) {
4598         tcg_gen_mov_i64(tcg_rd, tcg_result);
4599     } else {
4600         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
4601     }
4602 }
4603
4604 /* Data-processing (3 source)
4605  *
4606  *    31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
4607  *  +--+------+-----------+------+------+----+------+------+------+
4608  *  |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
4609  *  +--+------+-----------+------+------+----+------+------+------+
4610  */
4611 static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
4612 {
4613     int rd = extract32(insn, 0, 5);
4614     int rn = extract32(insn, 5, 5);
4615     int ra = extract32(insn, 10, 5);
4616     int rm = extract32(insn, 16, 5);
4617     int op_id = (extract32(insn, 29, 3) << 4) |
4618         (extract32(insn, 21, 3) << 1) |
4619         extract32(insn, 15, 1);
4620     bool sf = extract32(insn, 31, 1);
4621     bool is_sub = extract32(op_id, 0, 1);
4622     bool is_high = extract32(op_id, 2, 1);
4623     bool is_signed = false;
4624     TCGv_i64 tcg_op1;
4625     TCGv_i64 tcg_op2;
4626     TCGv_i64 tcg_tmp;
4627
4628     /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
4629     switch (op_id) {
4630     case 0x42: /* SMADDL */
4631     case 0x43: /* SMSUBL */
4632     case 0x44: /* SMULH */
4633         is_signed = true;
4634         break;
4635     case 0x0: /* MADD (32bit) */
4636     case 0x1: /* MSUB (32bit) */
4637     case 0x40: /* MADD (64bit) */
4638     case 0x41: /* MSUB (64bit) */
4639     case 0x4a: /* UMADDL */
4640     case 0x4b: /* UMSUBL */
4641     case 0x4c: /* UMULH */
4642         break;
4643     default:
4644         unallocated_encoding(s);
4645         return;
4646     }
4647
4648     if (is_high) {
4649         TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
4650         TCGv_i64 tcg_rd = cpu_reg(s, rd);
4651         TCGv_i64 tcg_rn = cpu_reg(s, rn);
4652         TCGv_i64 tcg_rm = cpu_reg(s, rm);
4653
4654         if (is_signed) {
4655             tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
4656         } else {
4657             tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
4658         }
4659         return;
4660     }
4661
4662     tcg_op1 = tcg_temp_new_i64();
4663     tcg_op2 = tcg_temp_new_i64();
4664     tcg_tmp = tcg_temp_new_i64();
4665
4666     if (op_id < 0x42) {
4667         tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
4668         tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
4669     } else {
4670         if (is_signed) {
4671             tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
4672             tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
4673         } else {
4674             tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
4675             tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
4676         }
4677     }
4678
4679     if (ra == 31 && !is_sub) {
4680         /* Special-case MADD with rA == XZR; it is the standard MUL alias */
4681         tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
4682     } else {
4683         tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
4684         if (is_sub) {
4685             tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
4686         } else {
4687             tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
4688         }
4689     }
4690
4691     if (!sf) {
4692         tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
4693     }
4694 }
4695
4696 /* Add/subtract (with carry)
4697  *  31 30 29 28 27 26 25 24 23 22 21  20  16  15       10  9    5 4   0
4698  * +--+--+--+------------------------+------+-------------+------+-----+
4699  * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | 0 0 0 0 0 0 |  Rn  |  Rd |
4700  * +--+--+--+------------------------+------+-------------+------+-----+
4701  */
4702
4703 static void disas_adc_sbc(DisasContext *s, uint32_t insn)
4704 {
4705     unsigned int sf, op, setflags, rm, rn, rd;
4706     TCGv_i64 tcg_y, tcg_rn, tcg_rd;
4707
4708     sf = extract32(insn, 31, 1);
4709     op = extract32(insn, 30, 1);
4710     setflags = extract32(insn, 29, 1);
4711     rm = extract32(insn, 16, 5);
4712     rn = extract32(insn, 5, 5);
4713     rd = extract32(insn, 0, 5);
4714
4715     tcg_rd = cpu_reg(s, rd);
4716     tcg_rn = cpu_reg(s, rn);
4717
4718     if (op) {
4719         tcg_y = tcg_temp_new_i64();
4720         tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
4721     } else {
4722         tcg_y = cpu_reg(s, rm);
4723     }
4724
4725     if (setflags) {
4726         gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
4727     } else {
4728         gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
4729     }
4730 }
4731
4732 /*
4733  * Rotate right into flags
4734  *  31 30 29                21       15          10      5  4      0
4735  * +--+--+--+-----------------+--------+-----------+------+--+------+
4736  * |sf|op| S| 1 1 0 1 0 0 0 0 |  imm6  | 0 0 0 0 1 |  Rn  |o2| mask |
4737  * +--+--+--+-----------------+--------+-----------+------+--+------+
4738  */
4739 static void disas_rotate_right_into_flags(DisasContext *s, uint32_t insn)
4740 {
4741     int mask = extract32(insn, 0, 4);
4742     int o2 = extract32(insn, 4, 1);
4743     int rn = extract32(insn, 5, 5);
4744     int imm6 = extract32(insn, 15, 6);
4745     int sf_op_s = extract32(insn, 29, 3);
4746     TCGv_i64 tcg_rn;
4747     TCGv_i32 nzcv;
4748
4749     if (sf_op_s != 5 || o2 != 0 || !dc_isar_feature(aa64_condm_4, s)) {
4750         unallocated_encoding(s);
4751         return;
4752     }
4753
4754     tcg_rn = read_cpu_reg(s, rn, 1);
4755     tcg_gen_rotri_i64(tcg_rn, tcg_rn, imm6);
4756
4757     nzcv = tcg_temp_new_i32();
4758     tcg_gen_extrl_i64_i32(nzcv, tcg_rn);
4759
4760     if (mask & 8) { /* N */
4761         tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3);
4762     }
4763     if (mask & 4) { /* Z */
4764         tcg_gen_not_i32(cpu_ZF, nzcv);
4765         tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4);
4766     }
4767     if (mask & 2) { /* C */
4768         tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1);
4769     }
4770     if (mask & 1) { /* V */
4771         tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0);
4772     }
4773 }
4774
4775 /*
4776  * Evaluate into flags
4777  *  31 30 29                21        15   14        10      5  4      0
4778  * +--+--+--+-----------------+---------+----+---------+------+--+------+
4779  * |sf|op| S| 1 1 0 1 0 0 0 0 | opcode2 | sz | 0 0 1 0 |  Rn  |o3| mask |
4780  * +--+--+--+-----------------+---------+----+---------+------+--+------+
4781  */
4782 static void disas_evaluate_into_flags(DisasContext *s, uint32_t insn)
4783 {
4784     int o3_mask = extract32(insn, 0, 5);
4785     int rn = extract32(insn, 5, 5);
4786     int o2 = extract32(insn, 15, 6);
4787     int sz = extract32(insn, 14, 1);
4788     int sf_op_s = extract32(insn, 29, 3);
4789     TCGv_i32 tmp;
4790     int shift;
4791
4792     if (sf_op_s != 1 || o2 != 0 || o3_mask != 0xd ||
4793         !dc_isar_feature(aa64_condm_4, s)) {
4794         unallocated_encoding(s);
4795         return;
4796     }
4797     shift = sz ? 16 : 24;  /* SETF16 or SETF8 */
4798
4799     tmp = tcg_temp_new_i32();
4800     tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn));
4801     tcg_gen_shli_i32(cpu_NF, tmp, shift);
4802     tcg_gen_shli_i32(cpu_VF, tmp, shift - 1);
4803     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
4804     tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF);
4805 }
4806
4807 /* Conditional compare (immediate / register)
4808  *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
4809  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
4810  * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
4811  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
4812  *        [1]                             y                [0]       [0]
4813  */
4814 static void disas_cc(DisasContext *s, uint32_t insn)
4815 {
4816     unsigned int sf, op, y, cond, rn, nzcv, is_imm;
4817     TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
4818     TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
4819     DisasCompare c;
4820
4821     if (!extract32(insn, 29, 1)) {
4822         unallocated_encoding(s);
4823         return;
4824     }
4825     if (insn & (1 << 10 | 1 << 4)) {
4826         unallocated_encoding(s);
4827         return;
4828     }
4829     sf = extract32(insn, 31, 1);
4830     op = extract32(insn, 30, 1);
4831     is_imm = extract32(insn, 11, 1);
4832     y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
4833     cond = extract32(insn, 12, 4);
4834     rn = extract32(insn, 5, 5);
4835     nzcv = extract32(insn, 0, 4);
4836
4837     /* Set T0 = !COND.  */
4838     tcg_t0 = tcg_temp_new_i32();
4839     arm_test_cc(&c, cond);
4840     tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
4841
4842     /* Load the arguments for the new comparison.  */
4843     if (is_imm) {
4844         tcg_y = tcg_temp_new_i64();
4845         tcg_gen_movi_i64(tcg_y, y);
4846     } else {
4847         tcg_y = cpu_reg(s, y);
4848     }
4849     tcg_rn = cpu_reg(s, rn);
4850
4851     /* Set the flags for the new comparison.  */
4852     tcg_tmp = tcg_temp_new_i64();
4853     if (op) {
4854         gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
4855     } else {
4856         gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
4857     }
4858
4859     /* If COND was false, force the flags to #nzcv.  Compute two masks
4860      * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
4861      * For tcg hosts that support ANDC, we can make do with just T1.
4862      * In either case, allow the tcg optimizer to delete any unused mask.
4863      */
4864     tcg_t1 = tcg_temp_new_i32();
4865     tcg_t2 = tcg_temp_new_i32();
4866     tcg_gen_neg_i32(tcg_t1, tcg_t0);
4867     tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
4868
4869     if (nzcv & 8) { /* N */
4870         tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
4871     } else {
4872         if (TCG_TARGET_HAS_andc_i32) {
4873             tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
4874         } else {
4875             tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
4876         }
4877     }
4878     if (nzcv & 4) { /* Z */
4879         if (TCG_TARGET_HAS_andc_i32) {
4880             tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
4881         } else {
4882             tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
4883         }
4884     } else {
4885         tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
4886     }
4887     if (nzcv & 2) { /* C */
4888         tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
4889     } else {
4890         if (TCG_TARGET_HAS_andc_i32) {
4891             tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
4892         } else {
4893             tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
4894         }
4895     }
4896     if (nzcv & 1) { /* V */
4897         tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
4898     } else {
4899         if (TCG_TARGET_HAS_andc_i32) {
4900             tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
4901         } else {
4902             tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
4903         }
4904     }
4905 }
4906
4907 /* Conditional select
4908  *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
4909  * +----+----+---+-----------------+------+------+-----+------+------+
4910  * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
4911  * +----+----+---+-----------------+------+------+-----+------+------+
4912  */
4913 static void disas_cond_select(DisasContext *s, uint32_t insn)
4914 {
4915     unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
4916     TCGv_i64 tcg_rd, zero;
4917     DisasCompare64 c;
4918
4919     if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
4920         /* S == 1 or op2<1> == 1 */
4921         unallocated_encoding(s);
4922         return;
4923     }
4924     sf = extract32(insn, 31, 1);
4925     else_inv = extract32(insn, 30, 1);
4926     rm = extract32(insn, 16, 5);
4927     cond = extract32(insn, 12, 4);
4928     else_inc = extract32(insn, 10, 1);
4929     rn = extract32(insn, 5, 5);
4930     rd = extract32(insn, 0, 5);
4931
4932     tcg_rd = cpu_reg(s, rd);
4933
4934     a64_test_cc(&c, cond);
4935     zero = tcg_constant_i64(0);
4936
4937     if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
4938         /* CSET & CSETM.  */
4939         if (else_inv) {
4940             tcg_gen_negsetcond_i64(tcg_invert_cond(c.cond),
4941                                    tcg_rd, c.value, zero);
4942         } else {
4943             tcg_gen_setcond_i64(tcg_invert_cond(c.cond),
4944                                 tcg_rd, c.value, zero);
4945         }
4946     } else {
4947         TCGv_i64 t_true = cpu_reg(s, rn);
4948         TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
4949         if (else_inv && else_inc) {
4950             tcg_gen_neg_i64(t_false, t_false);
4951         } else if (else_inv) {
4952             tcg_gen_not_i64(t_false, t_false);
4953         } else if (else_inc) {
4954             tcg_gen_addi_i64(t_false, t_false, 1);
4955         }
4956         tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
4957     }
4958
4959     if (!sf) {
4960         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4961     }
4962 }
4963
4964 static void handle_clz(DisasContext *s, unsigned int sf,
4965                        unsigned int rn, unsigned int rd)
4966 {
4967     TCGv_i64 tcg_rd, tcg_rn;
4968     tcg_rd = cpu_reg(s, rd);
4969     tcg_rn = cpu_reg(s, rn);
4970
4971     if (sf) {
4972         tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
4973     } else {
4974         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4975         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4976         tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32);
4977         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4978     }
4979 }
4980
4981 static void handle_cls(DisasContext *s, unsigned int sf,
4982                        unsigned int rn, unsigned int rd)
4983 {
4984     TCGv_i64 tcg_rd, tcg_rn;
4985     tcg_rd = cpu_reg(s, rd);
4986     tcg_rn = cpu_reg(s, rn);
4987
4988     if (sf) {
4989         tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
4990     } else {
4991         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4992         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4993         tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32);
4994         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4995     }
4996 }
4997
4998 static void handle_rbit(DisasContext *s, unsigned int sf,
4999                         unsigned int rn, unsigned int rd)
5000 {
5001     TCGv_i64 tcg_rd, tcg_rn;
5002     tcg_rd = cpu_reg(s, rd);
5003     tcg_rn = cpu_reg(s, rn);
5004
5005     if (sf) {
5006         gen_helper_rbit64(tcg_rd, tcg_rn);
5007     } else {
5008         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
5009         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
5010         gen_helper_rbit(tcg_tmp32, tcg_tmp32);
5011         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
5012     }
5013 }
5014
5015 /* REV with sf==1, opcode==3 ("REV64") */
5016 static void handle_rev64(DisasContext *s, unsigned int sf,
5017                          unsigned int rn, unsigned int rd)
5018 {
5019     if (!sf) {
5020         unallocated_encoding(s);
5021         return;
5022     }
5023     tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
5024 }
5025
5026 /* REV with sf==0, opcode==2
5027  * REV32 (sf==1, opcode==2)
5028  */
5029 static void handle_rev32(DisasContext *s, unsigned int sf,
5030                          unsigned int rn, unsigned int rd)
5031 {
5032     TCGv_i64 tcg_rd = cpu_reg(s, rd);
5033     TCGv_i64 tcg_rn = cpu_reg(s, rn);
5034
5035     if (sf) {
5036         tcg_gen_bswap64_i64(tcg_rd, tcg_rn);
5037         tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32);
5038     } else {
5039         tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ);
5040     }
5041 }
5042
5043 /* REV16 (opcode==1) */
5044 static void handle_rev16(DisasContext *s, unsigned int sf,
5045                          unsigned int rn, unsigned int rd)
5046 {
5047     TCGv_i64 tcg_rd = cpu_reg(s, rd);
5048     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
5049     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
5050     TCGv_i64 mask = tcg_constant_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff);
5051
5052     tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
5053     tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
5054     tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
5055     tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
5056     tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
5057 }
5058
5059 /* Data-processing (1 source)
5060  *   31  30  29  28             21 20     16 15    10 9    5 4    0
5061  * +----+---+---+-----------------+---------+--------+------+------+
5062  * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
5063  * +----+---+---+-----------------+---------+--------+------+------+
5064  */
5065 static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
5066 {
5067     unsigned int sf, opcode, opcode2, rn, rd;
5068     TCGv_i64 tcg_rd;
5069
5070     if (extract32(insn, 29, 1)) {
5071         unallocated_encoding(s);
5072         return;
5073     }
5074
5075     sf = extract32(insn, 31, 1);
5076     opcode = extract32(insn, 10, 6);
5077     opcode2 = extract32(insn, 16, 5);
5078     rn = extract32(insn, 5, 5);
5079     rd = extract32(insn, 0, 5);
5080
5081 #define MAP(SF, O2, O1) ((SF) | (O1 << 1) | (O2 << 7))
5082
5083     switch (MAP(sf, opcode2, opcode)) {
5084     case MAP(0, 0x00, 0x00): /* RBIT */
5085     case MAP(1, 0x00, 0x00):
5086         handle_rbit(s, sf, rn, rd);
5087         break;
5088     case MAP(0, 0x00, 0x01): /* REV16 */
5089     case MAP(1, 0x00, 0x01):
5090         handle_rev16(s, sf, rn, rd);
5091         break;
5092     case MAP(0, 0x00, 0x02): /* REV/REV32 */
5093     case MAP(1, 0x00, 0x02):
5094         handle_rev32(s, sf, rn, rd);
5095         break;
5096     case MAP(1, 0x00, 0x03): /* REV64 */
5097         handle_rev64(s, sf, rn, rd);
5098         break;
5099     case MAP(0, 0x00, 0x04): /* CLZ */
5100     case MAP(1, 0x00, 0x04):
5101         handle_clz(s, sf, rn, rd);
5102         break;
5103     case MAP(0, 0x00, 0x05): /* CLS */
5104     case MAP(1, 0x00, 0x05):
5105         handle_cls(s, sf, rn, rd);
5106         break;
5107     case MAP(1, 0x01, 0x00): /* PACIA */
5108         if (s->pauth_active) {
5109             tcg_rd = cpu_reg(s, rd);
5110             gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5111         } else if (!dc_isar_feature(aa64_pauth, s)) {
5112             goto do_unallocated;
5113         }
5114         break;
5115     case MAP(1, 0x01, 0x01): /* PACIB */
5116         if (s->pauth_active) {
5117             tcg_rd = cpu_reg(s, rd);
5118             gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5119         } else if (!dc_isar_feature(aa64_pauth, s)) {
5120             goto do_unallocated;
5121         }
5122         break;
5123     case MAP(1, 0x01, 0x02): /* PACDA */
5124         if (s->pauth_active) {
5125             tcg_rd = cpu_reg(s, rd);
5126             gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5127         } else if (!dc_isar_feature(aa64_pauth, s)) {
5128             goto do_unallocated;
5129         }
5130         break;
5131     case MAP(1, 0x01, 0x03): /* PACDB */
5132         if (s->pauth_active) {
5133             tcg_rd = cpu_reg(s, rd);
5134             gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5135         } else if (!dc_isar_feature(aa64_pauth, s)) {
5136             goto do_unallocated;
5137         }
5138         break;
5139     case MAP(1, 0x01, 0x04): /* AUTIA */
5140         if (s->pauth_active) {
5141             tcg_rd = cpu_reg(s, rd);
5142             gen_helper_autia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5143         } else if (!dc_isar_feature(aa64_pauth, s)) {
5144             goto do_unallocated;
5145         }
5146         break;
5147     case MAP(1, 0x01, 0x05): /* AUTIB */
5148         if (s->pauth_active) {
5149             tcg_rd = cpu_reg(s, rd);
5150             gen_helper_autib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5151         } else if (!dc_isar_feature(aa64_pauth, s)) {
5152             goto do_unallocated;
5153         }
5154         break;
5155     case MAP(1, 0x01, 0x06): /* AUTDA */
5156         if (s->pauth_active) {
5157             tcg_rd = cpu_reg(s, rd);
5158             gen_helper_autda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5159         } else if (!dc_isar_feature(aa64_pauth, s)) {
5160             goto do_unallocated;
5161         }
5162         break;
5163     case MAP(1, 0x01, 0x07): /* AUTDB */
5164         if (s->pauth_active) {
5165             tcg_rd = cpu_reg(s, rd);
5166             gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5167         } else if (!dc_isar_feature(aa64_pauth, s)) {
5168             goto do_unallocated;
5169         }
5170         break;
5171     case MAP(1, 0x01, 0x08): /* PACIZA */
5172         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5173             goto do_unallocated;
5174         } else if (s->pauth_active) {
5175             tcg_rd = cpu_reg(s, rd);
5176             gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5177         }
5178         break;
5179     case MAP(1, 0x01, 0x09): /* PACIZB */
5180         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5181             goto do_unallocated;
5182         } else if (s->pauth_active) {
5183             tcg_rd = cpu_reg(s, rd);
5184             gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5185         }
5186         break;
5187     case MAP(1, 0x01, 0x0a): /* PACDZA */
5188         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5189             goto do_unallocated;
5190         } else if (s->pauth_active) {
5191             tcg_rd = cpu_reg(s, rd);
5192             gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5193         }
5194         break;
5195     case MAP(1, 0x01, 0x0b): /* PACDZB */
5196         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5197             goto do_unallocated;
5198         } else if (s->pauth_active) {
5199             tcg_rd = cpu_reg(s, rd);
5200             gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5201         }
5202         break;
5203     case MAP(1, 0x01, 0x0c): /* AUTIZA */
5204         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5205             goto do_unallocated;
5206         } else if (s->pauth_active) {
5207             tcg_rd = cpu_reg(s, rd);
5208             gen_helper_autia(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5209         }
5210         break;
5211     case MAP(1, 0x01, 0x0d): /* AUTIZB */
5212         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5213             goto do_unallocated;
5214         } else if (s->pauth_active) {
5215             tcg_rd = cpu_reg(s, rd);
5216             gen_helper_autib(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5217         }
5218         break;
5219     case MAP(1, 0x01, 0x0e): /* AUTDZA */
5220         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5221             goto do_unallocated;
5222         } else if (s->pauth_active) {
5223             tcg_rd = cpu_reg(s, rd);
5224             gen_helper_autda(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5225         }
5226         break;
5227     case MAP(1, 0x01, 0x0f): /* AUTDZB */
5228         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5229             goto do_unallocated;
5230         } else if (s->pauth_active) {
5231             tcg_rd = cpu_reg(s, rd);
5232             gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5233         }
5234         break;
5235     case MAP(1, 0x01, 0x10): /* XPACI */
5236         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5237             goto do_unallocated;
5238         } else if (s->pauth_active) {
5239             tcg_rd = cpu_reg(s, rd);
5240             gen_helper_xpaci(tcg_rd, cpu_env, tcg_rd);
5241         }
5242         break;
5243     case MAP(1, 0x01, 0x11): /* XPACD */
5244         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5245             goto do_unallocated;
5246         } else if (s->pauth_active) {
5247             tcg_rd = cpu_reg(s, rd);
5248             gen_helper_xpacd(tcg_rd, cpu_env, tcg_rd);
5249         }
5250         break;
5251     default:
5252     do_unallocated:
5253         unallocated_encoding(s);
5254         break;
5255     }
5256
5257 #undef MAP
5258 }
5259
5260 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
5261                        unsigned int rm, unsigned int rn, unsigned int rd)
5262 {
5263     TCGv_i64 tcg_n, tcg_m, tcg_rd;
5264     tcg_rd = cpu_reg(s, rd);
5265
5266     if (!sf && is_signed) {
5267         tcg_n = tcg_temp_new_i64();
5268         tcg_m = tcg_temp_new_i64();
5269         tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
5270         tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
5271     } else {
5272         tcg_n = read_cpu_reg(s, rn, sf);
5273         tcg_m = read_cpu_reg(s, rm, sf);
5274     }
5275
5276     if (is_signed) {
5277         gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
5278     } else {
5279         gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
5280     }
5281
5282     if (!sf) { /* zero extend final result */
5283         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5284     }
5285 }
5286
5287 /* LSLV, LSRV, ASRV, RORV */
5288 static void handle_shift_reg(DisasContext *s,
5289                              enum a64_shift_type shift_type, unsigned int sf,
5290                              unsigned int rm, unsigned int rn, unsigned int rd)
5291 {
5292     TCGv_i64 tcg_shift = tcg_temp_new_i64();
5293     TCGv_i64 tcg_rd = cpu_reg(s, rd);
5294     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
5295
5296     tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
5297     shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
5298 }
5299
5300 /* CRC32[BHWX], CRC32C[BHWX] */
5301 static void handle_crc32(DisasContext *s,
5302                          unsigned int sf, unsigned int sz, bool crc32c,
5303                          unsigned int rm, unsigned int rn, unsigned int rd)
5304 {
5305     TCGv_i64 tcg_acc, tcg_val;
5306     TCGv_i32 tcg_bytes;
5307
5308     if (!dc_isar_feature(aa64_crc32, s)
5309         || (sf == 1 && sz != 3)
5310         || (sf == 0 && sz == 3)) {
5311         unallocated_encoding(s);
5312         return;
5313     }
5314
5315     if (sz == 3) {
5316         tcg_val = cpu_reg(s, rm);
5317     } else {
5318         uint64_t mask;
5319         switch (sz) {
5320         case 0:
5321             mask = 0xFF;
5322             break;
5323         case 1:
5324             mask = 0xFFFF;
5325             break;
5326         case 2:
5327             mask = 0xFFFFFFFF;
5328             break;
5329         default:
5330             g_assert_not_reached();
5331         }
5332         tcg_val = tcg_temp_new_i64();
5333         tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
5334     }
5335
5336     tcg_acc = cpu_reg(s, rn);
5337     tcg_bytes = tcg_constant_i32(1 << sz);
5338
5339     if (crc32c) {
5340         gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
5341     } else {
5342         gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
5343     }
5344 }
5345
5346 /* Data-processing (2 source)
5347  *   31   30  29 28             21 20  16 15    10 9    5 4    0
5348  * +----+---+---+-----------------+------+--------+------+------+
5349  * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
5350  * +----+---+---+-----------------+------+--------+------+------+
5351  */
5352 static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
5353 {
5354     unsigned int sf, rm, opcode, rn, rd, setflag;
5355     sf = extract32(insn, 31, 1);
5356     setflag = extract32(insn, 29, 1);
5357     rm = extract32(insn, 16, 5);
5358     opcode = extract32(insn, 10, 6);
5359     rn = extract32(insn, 5, 5);
5360     rd = extract32(insn, 0, 5);
5361
5362     if (setflag && opcode != 0) {
5363         unallocated_encoding(s);
5364         return;
5365     }
5366
5367     switch (opcode) {
5368     case 0: /* SUBP(S) */
5369         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
5370             goto do_unallocated;
5371         } else {
5372             TCGv_i64 tcg_n, tcg_m, tcg_d;
5373
5374             tcg_n = read_cpu_reg_sp(s, rn, true);
5375             tcg_m = read_cpu_reg_sp(s, rm, true);
5376             tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56);
5377             tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56);
5378             tcg_d = cpu_reg(s, rd);
5379
5380             if (setflag) {
5381                 gen_sub_CC(true, tcg_d, tcg_n, tcg_m);
5382             } else {
5383                 tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m);
5384             }
5385         }
5386         break;
5387     case 2: /* UDIV */
5388         handle_div(s, false, sf, rm, rn, rd);
5389         break;
5390     case 3: /* SDIV */
5391         handle_div(s, true, sf, rm, rn, rd);
5392         break;
5393     case 4: /* IRG */
5394         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
5395             goto do_unallocated;
5396         }
5397         if (s->ata) {
5398             gen_helper_irg(cpu_reg_sp(s, rd), cpu_env,
5399                            cpu_reg_sp(s, rn), cpu_reg(s, rm));
5400         } else {
5401             gen_address_with_allocation_tag0(cpu_reg_sp(s, rd),
5402                                              cpu_reg_sp(s, rn));
5403         }
5404         break;
5405     case 5: /* GMI */
5406         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
5407             goto do_unallocated;
5408         } else {
5409             TCGv_i64 t = tcg_temp_new_i64();
5410
5411             tcg_gen_extract_i64(t, cpu_reg_sp(s, rn), 56, 4);
5412             tcg_gen_shl_i64(t, tcg_constant_i64(1), t);
5413             tcg_gen_or_i64(cpu_reg(s, rd), cpu_reg(s, rm), t);
5414         }
5415         break;
5416     case 8: /* LSLV */
5417         handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
5418         break;
5419     case 9: /* LSRV */
5420         handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
5421         break;
5422     case 10: /* ASRV */
5423         handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
5424         break;
5425     case 11: /* RORV */
5426         handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
5427         break;
5428     case 12: /* PACGA */
5429         if (sf == 0 || !dc_isar_feature(aa64_pauth, s)) {
5430             goto do_unallocated;
5431         }
5432         gen_helper_pacga(cpu_reg(s, rd), cpu_env,
5433                          cpu_reg(s, rn), cpu_reg_sp(s, rm));
5434         break;
5435     case 16:
5436     case 17:
5437     case 18:
5438     case 19:
5439     case 20:
5440     case 21:
5441     case 22:
5442     case 23: /* CRC32 */
5443     {
5444         int sz = extract32(opcode, 0, 2);
5445         bool crc32c = extract32(opcode, 2, 1);
5446         handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
5447         break;
5448     }
5449     default:
5450     do_unallocated:
5451         unallocated_encoding(s);
5452         break;
5453     }
5454 }
5455
5456 /*
5457  * Data processing - register
5458  *  31  30 29  28      25    21  20  16      10         0
5459  * +--+---+--+---+-------+-----+-------+-------+---------+
5460  * |  |op0|  |op1| 1 0 1 | op2 |       |  op3  |         |
5461  * +--+---+--+---+-------+-----+-------+-------+---------+
5462  */
5463 static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
5464 {
5465     int op0 = extract32(insn, 30, 1);
5466     int op1 = extract32(insn, 28, 1);
5467     int op2 = extract32(insn, 21, 4);
5468     int op3 = extract32(insn, 10, 6);
5469
5470     if (!op1) {
5471         if (op2 & 8) {
5472             if (op2 & 1) {
5473                 /* Add/sub (extended register) */
5474                 disas_add_sub_ext_reg(s, insn);
5475             } else {
5476                 /* Add/sub (shifted register) */
5477                 disas_add_sub_reg(s, insn);
5478             }
5479         } else {
5480             /* Logical (shifted register) */
5481             disas_logic_reg(s, insn);
5482         }
5483         return;
5484     }
5485
5486     switch (op2) {
5487     case 0x0:
5488         switch (op3) {
5489         case 0x00: /* Add/subtract (with carry) */
5490             disas_adc_sbc(s, insn);
5491             break;
5492
5493         case 0x01: /* Rotate right into flags */
5494         case 0x21:
5495             disas_rotate_right_into_flags(s, insn);
5496             break;
5497
5498         case 0x02: /* Evaluate into flags */
5499         case 0x12:
5500         case 0x22:
5501         case 0x32:
5502             disas_evaluate_into_flags(s, insn);
5503             break;
5504
5505         default:
5506             goto do_unallocated;
5507         }
5508         break;
5509
5510     case 0x2: /* Conditional compare */
5511         disas_cc(s, insn); /* both imm and reg forms */
5512         break;
5513
5514     case 0x4: /* Conditional select */
5515         disas_cond_select(s, insn);
5516         break;
5517
5518     case 0x6: /* Data-processing */
5519         if (op0) {    /* (1 source) */
5520             disas_data_proc_1src(s, insn);
5521         } else {      /* (2 source) */
5522             disas_data_proc_2src(s, insn);
5523         }
5524         break;
5525     case 0x8 ... 0xf: /* (3 source) */
5526         disas_data_proc_3src(s, insn);
5527         break;
5528
5529     default:
5530     do_unallocated:
5531         unallocated_encoding(s);
5532         break;
5533     }
5534 }
5535
5536 static void handle_fp_compare(DisasContext *s, int size,
5537                               unsigned int rn, unsigned int rm,
5538                               bool cmp_with_zero, bool signal_all_nans)
5539 {
5540     TCGv_i64 tcg_flags = tcg_temp_new_i64();
5541     TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
5542
5543     if (size == MO_64) {
5544         TCGv_i64 tcg_vn, tcg_vm;
5545
5546         tcg_vn = read_fp_dreg(s, rn);
5547         if (cmp_with_zero) {
5548             tcg_vm = tcg_constant_i64(0);
5549         } else {
5550             tcg_vm = read_fp_dreg(s, rm);
5551         }
5552         if (signal_all_nans) {
5553             gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5554         } else {
5555             gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5556         }
5557     } else {
5558         TCGv_i32 tcg_vn = tcg_temp_new_i32();
5559         TCGv_i32 tcg_vm = tcg_temp_new_i32();
5560
5561         read_vec_element_i32(s, tcg_vn, rn, 0, size);
5562         if (cmp_with_zero) {
5563             tcg_gen_movi_i32(tcg_vm, 0);
5564         } else {
5565             read_vec_element_i32(s, tcg_vm, rm, 0, size);
5566         }
5567
5568         switch (size) {
5569         case MO_32:
5570             if (signal_all_nans) {
5571                 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5572             } else {
5573                 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5574             }
5575             break;
5576         case MO_16:
5577             if (signal_all_nans) {
5578                 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5579             } else {
5580                 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5581             }
5582             break;
5583         default:
5584             g_assert_not_reached();
5585         }
5586     }
5587
5588     gen_set_nzcv(tcg_flags);
5589 }
5590
5591 /* Floating point compare
5592  *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
5593  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
5594  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
5595  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
5596  */
5597 static void disas_fp_compare(DisasContext *s, uint32_t insn)
5598 {
5599     unsigned int mos, type, rm, op, rn, opc, op2r;
5600     int size;
5601
5602     mos = extract32(insn, 29, 3);
5603     type = extract32(insn, 22, 2);
5604     rm = extract32(insn, 16, 5);
5605     op = extract32(insn, 14, 2);
5606     rn = extract32(insn, 5, 5);
5607     opc = extract32(insn, 3, 2);
5608     op2r = extract32(insn, 0, 3);
5609
5610     if (mos || op || op2r) {
5611         unallocated_encoding(s);
5612         return;
5613     }
5614
5615     switch (type) {
5616     case 0:
5617         size = MO_32;
5618         break;
5619     case 1:
5620         size = MO_64;
5621         break;
5622     case 3:
5623         size = MO_16;
5624         if (dc_isar_feature(aa64_fp16, s)) {
5625             break;
5626         }
5627         /* fallthru */
5628     default:
5629         unallocated_encoding(s);
5630         return;
5631     }
5632
5633     if (!fp_access_check(s)) {
5634         return;
5635     }
5636
5637     handle_fp_compare(s, size, rn, rm, opc & 1, opc & 2);
5638 }
5639
5640 /* Floating point conditional compare
5641  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
5642  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
5643  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
5644  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
5645  */
5646 static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
5647 {
5648     unsigned int mos, type, rm, cond, rn, op, nzcv;
5649     TCGLabel *label_continue = NULL;
5650     int size;
5651
5652     mos = extract32(insn, 29, 3);
5653     type = extract32(insn, 22, 2);
5654     rm = extract32(insn, 16, 5);
5655     cond = extract32(insn, 12, 4);
5656     rn = extract32(insn, 5, 5);
5657     op = extract32(insn, 4, 1);
5658     nzcv = extract32(insn, 0, 4);
5659
5660     if (mos) {
5661         unallocated_encoding(s);
5662         return;
5663     }
5664
5665     switch (type) {
5666     case 0:
5667         size = MO_32;
5668         break;
5669     case 1:
5670         size = MO_64;
5671         break;
5672     case 3:
5673         size = MO_16;
5674         if (dc_isar_feature(aa64_fp16, s)) {
5675             break;
5676         }
5677         /* fallthru */
5678     default:
5679         unallocated_encoding(s);
5680         return;
5681     }
5682
5683     if (!fp_access_check(s)) {
5684         return;
5685     }
5686
5687     if (cond < 0x0e) { /* not always */
5688         TCGLabel *label_match = gen_new_label();
5689         label_continue = gen_new_label();
5690         arm_gen_test_cc(cond, label_match);
5691         /* nomatch: */
5692         gen_set_nzcv(tcg_constant_i64(nzcv << 28));
5693         tcg_gen_br(label_continue);
5694         gen_set_label(label_match);
5695     }
5696
5697     handle_fp_compare(s, size, rn, rm, false, op);
5698
5699     if (cond < 0x0e) {
5700         gen_set_label(label_continue);
5701     }
5702 }
5703
5704 /* Floating point conditional select
5705  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
5706  * +---+---+---+-----------+------+---+------+------+-----+------+------+
5707  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
5708  * +---+---+---+-----------+------+---+------+------+-----+------+------+
5709  */
5710 static void disas_fp_csel(DisasContext *s, uint32_t insn)
5711 {
5712     unsigned int mos, type, rm, cond, rn, rd;
5713     TCGv_i64 t_true, t_false;
5714     DisasCompare64 c;
5715     MemOp sz;
5716
5717     mos = extract32(insn, 29, 3);
5718     type = extract32(insn, 22, 2);
5719     rm = extract32(insn, 16, 5);
5720     cond = extract32(insn, 12, 4);
5721     rn = extract32(insn, 5, 5);
5722     rd = extract32(insn, 0, 5);
5723
5724     if (mos) {
5725         unallocated_encoding(s);
5726         return;
5727     }
5728
5729     switch (type) {
5730     case 0:
5731         sz = MO_32;
5732         break;
5733     case 1:
5734         sz = MO_64;
5735         break;
5736     case 3:
5737         sz = MO_16;
5738         if (dc_isar_feature(aa64_fp16, s)) {
5739             break;
5740         }
5741         /* fallthru */
5742     default:
5743         unallocated_encoding(s);
5744         return;
5745     }
5746
5747     if (!fp_access_check(s)) {
5748         return;
5749     }
5750
5751     /* Zero extend sreg & hreg inputs to 64 bits now.  */
5752     t_true = tcg_temp_new_i64();
5753     t_false = tcg_temp_new_i64();
5754     read_vec_element(s, t_true, rn, 0, sz);
5755     read_vec_element(s, t_false, rm, 0, sz);
5756
5757     a64_test_cc(&c, cond);
5758     tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0),
5759                         t_true, t_false);
5760
5761     /* Note that sregs & hregs write back zeros to the high bits,
5762        and we've already done the zero-extension.  */
5763     write_fp_dreg(s, rd, t_true);
5764 }
5765
5766 /* Floating-point data-processing (1 source) - half precision */
5767 static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
5768 {
5769     TCGv_ptr fpst = NULL;
5770     TCGv_i32 tcg_op = read_fp_hreg(s, rn);
5771     TCGv_i32 tcg_res = tcg_temp_new_i32();
5772
5773     switch (opcode) {
5774     case 0x0: /* FMOV */
5775         tcg_gen_mov_i32(tcg_res, tcg_op);
5776         break;
5777     case 0x1: /* FABS */
5778         tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
5779         break;
5780     case 0x2: /* FNEG */
5781         tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
5782         break;
5783     case 0x3: /* FSQRT */
5784         fpst = fpstatus_ptr(FPST_FPCR_F16);
5785         gen_helper_sqrt_f16(tcg_res, tcg_op, fpst);
5786         break;
5787     case 0x8: /* FRINTN */
5788     case 0x9: /* FRINTP */
5789     case 0xa: /* FRINTM */
5790     case 0xb: /* FRINTZ */
5791     case 0xc: /* FRINTA */
5792     {
5793         TCGv_i32 tcg_rmode;
5794
5795         fpst = fpstatus_ptr(FPST_FPCR_F16);
5796         tcg_rmode = gen_set_rmode(opcode & 7, fpst);
5797         gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
5798         gen_restore_rmode(tcg_rmode, fpst);
5799         break;
5800     }
5801     case 0xe: /* FRINTX */
5802         fpst = fpstatus_ptr(FPST_FPCR_F16);
5803         gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst);
5804         break;
5805     case 0xf: /* FRINTI */
5806         fpst = fpstatus_ptr(FPST_FPCR_F16);
5807         gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
5808         break;
5809     default:
5810         g_assert_not_reached();
5811     }
5812
5813     write_fp_sreg(s, rd, tcg_res);
5814 }
5815
5816 /* Floating-point data-processing (1 source) - single precision */
5817 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
5818 {
5819     void (*gen_fpst)(TCGv_i32, TCGv_i32, TCGv_ptr);
5820     TCGv_i32 tcg_op, tcg_res;
5821     TCGv_ptr fpst;
5822     int rmode = -1;
5823
5824     tcg_op = read_fp_sreg(s, rn);
5825     tcg_res = tcg_temp_new_i32();
5826
5827     switch (opcode) {
5828     case 0x0: /* FMOV */
5829         tcg_gen_mov_i32(tcg_res, tcg_op);
5830         goto done;
5831     case 0x1: /* FABS */
5832         gen_helper_vfp_abss(tcg_res, tcg_op);
5833         goto done;
5834     case 0x2: /* FNEG */
5835         gen_helper_vfp_negs(tcg_res, tcg_op);
5836         goto done;
5837     case 0x3: /* FSQRT */
5838         gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
5839         goto done;
5840     case 0x6: /* BFCVT */
5841         gen_fpst = gen_helper_bfcvt;
5842         break;
5843     case 0x8: /* FRINTN */
5844     case 0x9: /* FRINTP */
5845     case 0xa: /* FRINTM */
5846     case 0xb: /* FRINTZ */
5847     case 0xc: /* FRINTA */
5848         rmode = opcode & 7;
5849         gen_fpst = gen_helper_rints;
5850         break;
5851     case 0xe: /* FRINTX */
5852         gen_fpst = gen_helper_rints_exact;
5853         break;
5854     case 0xf: /* FRINTI */
5855         gen_fpst = gen_helper_rints;
5856         break;
5857     case 0x10: /* FRINT32Z */
5858         rmode = FPROUNDING_ZERO;
5859         gen_fpst = gen_helper_frint32_s;
5860         break;
5861     case 0x11: /* FRINT32X */
5862         gen_fpst = gen_helper_frint32_s;
5863         break;
5864     case 0x12: /* FRINT64Z */
5865         rmode = FPROUNDING_ZERO;
5866         gen_fpst = gen_helper_frint64_s;
5867         break;
5868     case 0x13: /* FRINT64X */
5869         gen_fpst = gen_helper_frint64_s;
5870         break;
5871     default:
5872         g_assert_not_reached();
5873     }
5874
5875     fpst = fpstatus_ptr(FPST_FPCR);
5876     if (rmode >= 0) {
5877         TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst);
5878         gen_fpst(tcg_res, tcg_op, fpst);
5879         gen_restore_rmode(tcg_rmode, fpst);
5880     } else {
5881         gen_fpst(tcg_res, tcg_op, fpst);
5882     }
5883
5884  done:
5885     write_fp_sreg(s, rd, tcg_res);
5886 }
5887
5888 /* Floating-point data-processing (1 source) - double precision */
5889 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
5890 {
5891     void (*gen_fpst)(TCGv_i64, TCGv_i64, TCGv_ptr);
5892     TCGv_i64 tcg_op, tcg_res;
5893     TCGv_ptr fpst;
5894     int rmode = -1;
5895
5896     switch (opcode) {
5897     case 0x0: /* FMOV */
5898         gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0);
5899         return;
5900     }
5901
5902     tcg_op = read_fp_dreg(s, rn);
5903     tcg_res = tcg_temp_new_i64();
5904
5905     switch (opcode) {
5906     case 0x1: /* FABS */
5907         gen_helper_vfp_absd(tcg_res, tcg_op);
5908         goto done;
5909     case 0x2: /* FNEG */
5910         gen_helper_vfp_negd(tcg_res, tcg_op);
5911         goto done;
5912     case 0x3: /* FSQRT */
5913         gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
5914         goto done;
5915     case 0x8: /* FRINTN */
5916     case 0x9: /* FRINTP */
5917     case 0xa: /* FRINTM */
5918     case 0xb: /* FRINTZ */
5919     case 0xc: /* FRINTA */
5920         rmode = opcode & 7;
5921         gen_fpst = gen_helper_rintd;
5922         break;
5923     case 0xe: /* FRINTX */
5924         gen_fpst = gen_helper_rintd_exact;
5925         break;
5926     case 0xf: /* FRINTI */
5927         gen_fpst = gen_helper_rintd;
5928         break;
5929     case 0x10: /* FRINT32Z */
5930         rmode = FPROUNDING_ZERO;
5931         gen_fpst = gen_helper_frint32_d;
5932         break;
5933     case 0x11: /* FRINT32X */
5934         gen_fpst = gen_helper_frint32_d;
5935         break;
5936     case 0x12: /* FRINT64Z */
5937         rmode = FPROUNDING_ZERO;
5938         gen_fpst = gen_helper_frint64_d;
5939         break;
5940     case 0x13: /* FRINT64X */
5941         gen_fpst = gen_helper_frint64_d;
5942         break;
5943     default:
5944         g_assert_not_reached();
5945     }
5946
5947     fpst = fpstatus_ptr(FPST_FPCR);
5948     if (rmode >= 0) {
5949         TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst);
5950         gen_fpst(tcg_res, tcg_op, fpst);
5951         gen_restore_rmode(tcg_rmode, fpst);
5952     } else {
5953         gen_fpst(tcg_res, tcg_op, fpst);
5954     }
5955
5956  done:
5957     write_fp_dreg(s, rd, tcg_res);
5958 }
5959
5960 static void handle_fp_fcvt(DisasContext *s, int opcode,
5961                            int rd, int rn, int dtype, int ntype)
5962 {
5963     switch (ntype) {
5964     case 0x0:
5965     {
5966         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
5967         if (dtype == 1) {
5968             /* Single to double */
5969             TCGv_i64 tcg_rd = tcg_temp_new_i64();
5970             gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
5971             write_fp_dreg(s, rd, tcg_rd);
5972         } else {
5973             /* Single to half */
5974             TCGv_i32 tcg_rd = tcg_temp_new_i32();
5975             TCGv_i32 ahp = get_ahp_flag();
5976             TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
5977
5978             gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, fpst, ahp);
5979             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
5980             write_fp_sreg(s, rd, tcg_rd);
5981         }
5982         break;
5983     }
5984     case 0x1:
5985     {
5986         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
5987         TCGv_i32 tcg_rd = tcg_temp_new_i32();
5988         if (dtype == 0) {
5989             /* Double to single */
5990             gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
5991         } else {
5992             TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
5993             TCGv_i32 ahp = get_ahp_flag();
5994             /* Double to half */
5995             gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
5996             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
5997         }
5998         write_fp_sreg(s, rd, tcg_rd);
5999         break;
6000     }
6001     case 0x3:
6002     {
6003         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
6004         TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR);
6005         TCGv_i32 tcg_ahp = get_ahp_flag();
6006         tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
6007         if (dtype == 0) {
6008             /* Half to single */
6009             TCGv_i32 tcg_rd = tcg_temp_new_i32();
6010             gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
6011             write_fp_sreg(s, rd, tcg_rd);
6012         } else {
6013             /* Half to double */
6014             TCGv_i64 tcg_rd = tcg_temp_new_i64();
6015             gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
6016             write_fp_dreg(s, rd, tcg_rd);
6017         }
6018         break;
6019     }
6020     default:
6021         g_assert_not_reached();
6022     }
6023 }
6024
6025 /* Floating point data-processing (1 source)
6026  *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
6027  * +---+---+---+-----------+------+---+--------+-----------+------+------+
6028  * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
6029  * +---+---+---+-----------+------+---+--------+-----------+------+------+
6030  */
6031 static void disas_fp_1src(DisasContext *s, uint32_t insn)
6032 {
6033     int mos = extract32(insn, 29, 3);
6034     int type = extract32(insn, 22, 2);
6035     int opcode = extract32(insn, 15, 6);
6036     int rn = extract32(insn, 5, 5);
6037     int rd = extract32(insn, 0, 5);
6038
6039     if (mos) {
6040         goto do_unallocated;
6041     }
6042
6043     switch (opcode) {
6044     case 0x4: case 0x5: case 0x7:
6045     {
6046         /* FCVT between half, single and double precision */
6047         int dtype = extract32(opcode, 0, 2);
6048         if (type == 2 || dtype == type) {
6049             goto do_unallocated;
6050         }
6051         if (!fp_access_check(s)) {
6052             return;
6053         }
6054
6055         handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
6056         break;
6057     }
6058
6059     case 0x10 ... 0x13: /* FRINT{32,64}{X,Z} */
6060         if (type > 1 || !dc_isar_feature(aa64_frint, s)) {
6061             goto do_unallocated;
6062         }
6063         /* fall through */
6064     case 0x0 ... 0x3:
6065     case 0x8 ... 0xc:
6066     case 0xe ... 0xf:
6067         /* 32-to-32 and 64-to-64 ops */
6068         switch (type) {
6069         case 0:
6070             if (!fp_access_check(s)) {
6071                 return;
6072             }
6073             handle_fp_1src_single(s, opcode, rd, rn);
6074             break;
6075         case 1:
6076             if (!fp_access_check(s)) {
6077                 return;
6078             }
6079             handle_fp_1src_double(s, opcode, rd, rn);
6080             break;
6081         case 3:
6082             if (!dc_isar_feature(aa64_fp16, s)) {
6083                 goto do_unallocated;
6084             }
6085
6086             if (!fp_access_check(s)) {
6087                 return;
6088             }
6089             handle_fp_1src_half(s, opcode, rd, rn);
6090             break;
6091         default:
6092             goto do_unallocated;
6093         }
6094         break;
6095
6096     case 0x6:
6097         switch (type) {
6098         case 1: /* BFCVT */
6099             if (!dc_isar_feature(aa64_bf16, s)) {
6100                 goto do_unallocated;
6101             }
6102             if (!fp_access_check(s)) {
6103                 return;
6104             }
6105             handle_fp_1src_single(s, opcode, rd, rn);
6106             break;
6107         default:
6108             goto do_unallocated;
6109         }
6110         break;
6111
6112     default:
6113     do_unallocated:
6114         unallocated_encoding(s);
6115         break;
6116     }
6117 }
6118
6119 /* Floating-point data-processing (2 source) - single precision */
6120 static void handle_fp_2src_single(DisasContext *s, int opcode,
6121                                   int rd, int rn, int rm)
6122 {
6123     TCGv_i32 tcg_op1;
6124     TCGv_i32 tcg_op2;
6125     TCGv_i32 tcg_res;
6126     TCGv_ptr fpst;
6127
6128     tcg_res = tcg_temp_new_i32();
6129     fpst = fpstatus_ptr(FPST_FPCR);
6130     tcg_op1 = read_fp_sreg(s, rn);
6131     tcg_op2 = read_fp_sreg(s, rm);
6132
6133     switch (opcode) {
6134     case 0x0: /* FMUL */
6135         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
6136         break;
6137     case 0x1: /* FDIV */
6138         gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
6139         break;
6140     case 0x2: /* FADD */
6141         gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
6142         break;
6143     case 0x3: /* FSUB */
6144         gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
6145         break;
6146     case 0x4: /* FMAX */
6147         gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
6148         break;
6149     case 0x5: /* FMIN */
6150         gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
6151         break;
6152     case 0x6: /* FMAXNM */
6153         gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
6154         break;
6155     case 0x7: /* FMINNM */
6156         gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
6157         break;
6158     case 0x8: /* FNMUL */
6159         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
6160         gen_helper_vfp_negs(tcg_res, tcg_res);
6161         break;
6162     }
6163
6164     write_fp_sreg(s, rd, tcg_res);
6165 }
6166
6167 /* Floating-point data-processing (2 source) - double precision */
6168 static void handle_fp_2src_double(DisasContext *s, int opcode,
6169                                   int rd, int rn, int rm)
6170 {
6171     TCGv_i64 tcg_op1;
6172     TCGv_i64 tcg_op2;
6173     TCGv_i64 tcg_res;
6174     TCGv_ptr fpst;
6175
6176     tcg_res = tcg_temp_new_i64();
6177     fpst = fpstatus_ptr(FPST_FPCR);
6178     tcg_op1 = read_fp_dreg(s, rn);
6179     tcg_op2 = read_fp_dreg(s, rm);
6180
6181     switch (opcode) {
6182     case 0x0: /* FMUL */
6183         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
6184         break;
6185     case 0x1: /* FDIV */
6186         gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
6187         break;
6188     case 0x2: /* FADD */
6189         gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
6190         break;
6191     case 0x3: /* FSUB */
6192         gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
6193         break;
6194     case 0x4: /* FMAX */
6195         gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
6196         break;
6197     case 0x5: /* FMIN */
6198         gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
6199         break;
6200     case 0x6: /* FMAXNM */
6201         gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6202         break;
6203     case 0x7: /* FMINNM */
6204         gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6205         break;
6206     case 0x8: /* FNMUL */
6207         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
6208         gen_helper_vfp_negd(tcg_res, tcg_res);
6209         break;
6210     }
6211
6212     write_fp_dreg(s, rd, tcg_res);
6213 }
6214
6215 /* Floating-point data-processing (2 source) - half precision */
6216 static void handle_fp_2src_half(DisasContext *s, int opcode,
6217                                 int rd, int rn, int rm)
6218 {
6219     TCGv_i32 tcg_op1;
6220     TCGv_i32 tcg_op2;
6221     TCGv_i32 tcg_res;
6222     TCGv_ptr fpst;
6223
6224     tcg_res = tcg_temp_new_i32();
6225     fpst = fpstatus_ptr(FPST_FPCR_F16);
6226     tcg_op1 = read_fp_hreg(s, rn);
6227     tcg_op2 = read_fp_hreg(s, rm);
6228
6229     switch (opcode) {
6230     case 0x0: /* FMUL */
6231         gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
6232         break;
6233     case 0x1: /* FDIV */
6234         gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
6235         break;
6236     case 0x2: /* FADD */
6237         gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
6238         break;
6239     case 0x3: /* FSUB */
6240         gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
6241         break;
6242     case 0x4: /* FMAX */
6243         gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
6244         break;
6245     case 0x5: /* FMIN */
6246         gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
6247         break;
6248     case 0x6: /* FMAXNM */
6249         gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
6250         break;
6251     case 0x7: /* FMINNM */
6252         gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
6253         break;
6254     case 0x8: /* FNMUL */
6255         gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
6256         tcg_gen_xori_i32(tcg_res, tcg_res, 0x8000);
6257         break;
6258     default:
6259         g_assert_not_reached();
6260     }
6261
6262     write_fp_sreg(s, rd, tcg_res);
6263 }
6264
6265 /* Floating point data-processing (2 source)
6266  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
6267  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
6268  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | opcode | 1 0 |  Rn  |  Rd  |
6269  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
6270  */
6271 static void disas_fp_2src(DisasContext *s, uint32_t insn)
6272 {
6273     int mos = extract32(insn, 29, 3);
6274     int type = extract32(insn, 22, 2);
6275     int rd = extract32(insn, 0, 5);
6276     int rn = extract32(insn, 5, 5);
6277     int rm = extract32(insn, 16, 5);
6278     int opcode = extract32(insn, 12, 4);
6279
6280     if (opcode > 8 || mos) {
6281         unallocated_encoding(s);
6282         return;
6283     }
6284
6285     switch (type) {
6286     case 0:
6287         if (!fp_access_check(s)) {
6288             return;
6289         }
6290         handle_fp_2src_single(s, opcode, rd, rn, rm);
6291         break;
6292     case 1:
6293         if (!fp_access_check(s)) {
6294             return;
6295         }
6296         handle_fp_2src_double(s, opcode, rd, rn, rm);
6297         break;
6298     case 3:
6299         if (!dc_isar_feature(aa64_fp16, s)) {
6300             unallocated_encoding(s);
6301             return;
6302         }
6303         if (!fp_access_check(s)) {
6304             return;
6305         }
6306         handle_fp_2src_half(s, opcode, rd, rn, rm);
6307         break;
6308     default:
6309         unallocated_encoding(s);
6310     }
6311 }
6312
6313 /* Floating-point data-processing (3 source) - single precision */
6314 static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
6315                                   int rd, int rn, int rm, int ra)
6316 {
6317     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
6318     TCGv_i32 tcg_res = tcg_temp_new_i32();
6319     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6320
6321     tcg_op1 = read_fp_sreg(s, rn);
6322     tcg_op2 = read_fp_sreg(s, rm);
6323     tcg_op3 = read_fp_sreg(s, ra);
6324
6325     /* These are fused multiply-add, and must be done as one
6326      * floating point operation with no rounding between the
6327      * multiplication and addition steps.
6328      * NB that doing the negations here as separate steps is
6329      * correct : an input NaN should come out with its sign bit
6330      * flipped if it is a negated-input.
6331      */
6332     if (o1 == true) {
6333         gen_helper_vfp_negs(tcg_op3, tcg_op3);
6334     }
6335
6336     if (o0 != o1) {
6337         gen_helper_vfp_negs(tcg_op1, tcg_op1);
6338     }
6339
6340     gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6341
6342     write_fp_sreg(s, rd, tcg_res);
6343 }
6344
6345 /* Floating-point data-processing (3 source) - double precision */
6346 static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
6347                                   int rd, int rn, int rm, int ra)
6348 {
6349     TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
6350     TCGv_i64 tcg_res = tcg_temp_new_i64();
6351     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6352
6353     tcg_op1 = read_fp_dreg(s, rn);
6354     tcg_op2 = read_fp_dreg(s, rm);
6355     tcg_op3 = read_fp_dreg(s, ra);
6356
6357     /* These are fused multiply-add, and must be done as one
6358      * floating point operation with no rounding between the
6359      * multiplication and addition steps.
6360      * NB that doing the negations here as separate steps is
6361      * correct : an input NaN should come out with its sign bit
6362      * flipped if it is a negated-input.
6363      */
6364     if (o1 == true) {
6365         gen_helper_vfp_negd(tcg_op3, tcg_op3);
6366     }
6367
6368     if (o0 != o1) {
6369         gen_helper_vfp_negd(tcg_op1, tcg_op1);
6370     }
6371
6372     gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6373
6374     write_fp_dreg(s, rd, tcg_res);
6375 }
6376
6377 /* Floating-point data-processing (3 source) - half precision */
6378 static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1,
6379                                 int rd, int rn, int rm, int ra)
6380 {
6381     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
6382     TCGv_i32 tcg_res = tcg_temp_new_i32();
6383     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_F16);
6384
6385     tcg_op1 = read_fp_hreg(s, rn);
6386     tcg_op2 = read_fp_hreg(s, rm);
6387     tcg_op3 = read_fp_hreg(s, ra);
6388
6389     /* These are fused multiply-add, and must be done as one
6390      * floating point operation with no rounding between the
6391      * multiplication and addition steps.
6392      * NB that doing the negations here as separate steps is
6393      * correct : an input NaN should come out with its sign bit
6394      * flipped if it is a negated-input.
6395      */
6396     if (o1 == true) {
6397         tcg_gen_xori_i32(tcg_op3, tcg_op3, 0x8000);
6398     }
6399
6400     if (o0 != o1) {
6401         tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
6402     }
6403
6404     gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6405
6406     write_fp_sreg(s, rd, tcg_res);
6407 }
6408
6409 /* Floating point data-processing (3 source)
6410  *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
6411  * +---+---+---+-----------+------+----+------+----+------+------+------+
6412  * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
6413  * +---+---+---+-----------+------+----+------+----+------+------+------+
6414  */
6415 static void disas_fp_3src(DisasContext *s, uint32_t insn)
6416 {
6417     int mos = extract32(insn, 29, 3);
6418     int type = extract32(insn, 22, 2);
6419     int rd = extract32(insn, 0, 5);
6420     int rn = extract32(insn, 5, 5);
6421     int ra = extract32(insn, 10, 5);
6422     int rm = extract32(insn, 16, 5);
6423     bool o0 = extract32(insn, 15, 1);
6424     bool o1 = extract32(insn, 21, 1);
6425
6426     if (mos) {
6427         unallocated_encoding(s);
6428         return;
6429     }
6430
6431     switch (type) {
6432     case 0:
6433         if (!fp_access_check(s)) {
6434             return;
6435         }
6436         handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
6437         break;
6438     case 1:
6439         if (!fp_access_check(s)) {
6440             return;
6441         }
6442         handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
6443         break;
6444     case 3:
6445         if (!dc_isar_feature(aa64_fp16, s)) {
6446             unallocated_encoding(s);
6447             return;
6448         }
6449         if (!fp_access_check(s)) {
6450             return;
6451         }
6452         handle_fp_3src_half(s, o0, o1, rd, rn, rm, ra);
6453         break;
6454     default:
6455         unallocated_encoding(s);
6456     }
6457 }
6458
6459 /* Floating point immediate
6460  *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
6461  * +---+---+---+-----------+------+---+------------+-------+------+------+
6462  * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
6463  * +---+---+---+-----------+------+---+------------+-------+------+------+
6464  */
6465 static void disas_fp_imm(DisasContext *s, uint32_t insn)
6466 {
6467     int rd = extract32(insn, 0, 5);
6468     int imm5 = extract32(insn, 5, 5);
6469     int imm8 = extract32(insn, 13, 8);
6470     int type = extract32(insn, 22, 2);
6471     int mos = extract32(insn, 29, 3);
6472     uint64_t imm;
6473     MemOp sz;
6474
6475     if (mos || imm5) {
6476         unallocated_encoding(s);
6477         return;
6478     }
6479
6480     switch (type) {
6481     case 0:
6482         sz = MO_32;
6483         break;
6484     case 1:
6485         sz = MO_64;
6486         break;
6487     case 3:
6488         sz = MO_16;
6489         if (dc_isar_feature(aa64_fp16, s)) {
6490             break;
6491         }
6492         /* fallthru */
6493     default:
6494         unallocated_encoding(s);
6495         return;
6496     }
6497
6498     if (!fp_access_check(s)) {
6499         return;
6500     }
6501
6502     imm = vfp_expand_imm(sz, imm8);
6503     write_fp_dreg(s, rd, tcg_constant_i64(imm));
6504 }
6505
6506 /* Handle floating point <=> fixed point conversions. Note that we can
6507  * also deal with fp <=> integer conversions as a special case (scale == 64)
6508  * OPTME: consider handling that special case specially or at least skipping
6509  * the call to scalbn in the helpers for zero shifts.
6510  */
6511 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
6512                            bool itof, int rmode, int scale, int sf, int type)
6513 {
6514     bool is_signed = !(opcode & 1);
6515     TCGv_ptr tcg_fpstatus;
6516     TCGv_i32 tcg_shift, tcg_single;
6517     TCGv_i64 tcg_double;
6518
6519     tcg_fpstatus = fpstatus_ptr(type == 3 ? FPST_FPCR_F16 : FPST_FPCR);
6520
6521     tcg_shift = tcg_constant_i32(64 - scale);
6522
6523     if (itof) {
6524         TCGv_i64 tcg_int = cpu_reg(s, rn);
6525         if (!sf) {
6526             TCGv_i64 tcg_extend = tcg_temp_new_i64();
6527
6528             if (is_signed) {
6529                 tcg_gen_ext32s_i64(tcg_extend, tcg_int);
6530             } else {
6531                 tcg_gen_ext32u_i64(tcg_extend, tcg_int);
6532             }
6533
6534             tcg_int = tcg_extend;
6535         }
6536
6537         switch (type) {
6538         case 1: /* float64 */
6539             tcg_double = tcg_temp_new_i64();
6540             if (is_signed) {
6541                 gen_helper_vfp_sqtod(tcg_double, tcg_int,
6542                                      tcg_shift, tcg_fpstatus);
6543             } else {
6544                 gen_helper_vfp_uqtod(tcg_double, tcg_int,
6545                                      tcg_shift, tcg_fpstatus);
6546             }
6547             write_fp_dreg(s, rd, tcg_double);
6548             break;
6549
6550         case 0: /* float32 */
6551             tcg_single = tcg_temp_new_i32();
6552             if (is_signed) {
6553                 gen_helper_vfp_sqtos(tcg_single, tcg_int,
6554                                      tcg_shift, tcg_fpstatus);
6555             } else {
6556                 gen_helper_vfp_uqtos(tcg_single, tcg_int,
6557                                      tcg_shift, tcg_fpstatus);
6558             }
6559             write_fp_sreg(s, rd, tcg_single);
6560             break;
6561
6562         case 3: /* float16 */
6563             tcg_single = tcg_temp_new_i32();
6564             if (is_signed) {
6565                 gen_helper_vfp_sqtoh(tcg_single, tcg_int,
6566                                      tcg_shift, tcg_fpstatus);
6567             } else {
6568                 gen_helper_vfp_uqtoh(tcg_single, tcg_int,
6569                                      tcg_shift, tcg_fpstatus);
6570             }
6571             write_fp_sreg(s, rd, tcg_single);
6572             break;
6573
6574         default:
6575             g_assert_not_reached();
6576         }
6577     } else {
6578         TCGv_i64 tcg_int = cpu_reg(s, rd);
6579         TCGv_i32 tcg_rmode;
6580
6581         if (extract32(opcode, 2, 1)) {
6582             /* There are too many rounding modes to all fit into rmode,
6583              * so FCVTA[US] is a special case.
6584              */
6585             rmode = FPROUNDING_TIEAWAY;
6586         }
6587
6588         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
6589
6590         switch (type) {
6591         case 1: /* float64 */
6592             tcg_double = read_fp_dreg(s, rn);
6593             if (is_signed) {
6594                 if (!sf) {
6595                     gen_helper_vfp_tosld(tcg_int, tcg_double,
6596                                          tcg_shift, tcg_fpstatus);
6597                 } else {
6598                     gen_helper_vfp_tosqd(tcg_int, tcg_double,
6599                                          tcg_shift, tcg_fpstatus);
6600                 }
6601             } else {
6602                 if (!sf) {
6603                     gen_helper_vfp_tould(tcg_int, tcg_double,
6604                                          tcg_shift, tcg_fpstatus);
6605                 } else {
6606                     gen_helper_vfp_touqd(tcg_int, tcg_double,
6607                                          tcg_shift, tcg_fpstatus);
6608                 }
6609             }
6610             if (!sf) {
6611                 tcg_gen_ext32u_i64(tcg_int, tcg_int);
6612             }
6613             break;
6614
6615         case 0: /* float32 */
6616             tcg_single = read_fp_sreg(s, rn);
6617             if (sf) {
6618                 if (is_signed) {
6619                     gen_helper_vfp_tosqs(tcg_int, tcg_single,
6620                                          tcg_shift, tcg_fpstatus);
6621                 } else {
6622                     gen_helper_vfp_touqs(tcg_int, tcg_single,
6623                                          tcg_shift, tcg_fpstatus);
6624                 }
6625             } else {
6626                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
6627                 if (is_signed) {
6628                     gen_helper_vfp_tosls(tcg_dest, tcg_single,
6629                                          tcg_shift, tcg_fpstatus);
6630                 } else {
6631                     gen_helper_vfp_touls(tcg_dest, tcg_single,
6632                                          tcg_shift, tcg_fpstatus);
6633                 }
6634                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
6635             }
6636             break;
6637
6638         case 3: /* float16 */
6639             tcg_single = read_fp_sreg(s, rn);
6640             if (sf) {
6641                 if (is_signed) {
6642                     gen_helper_vfp_tosqh(tcg_int, tcg_single,
6643                                          tcg_shift, tcg_fpstatus);
6644                 } else {
6645                     gen_helper_vfp_touqh(tcg_int, tcg_single,
6646                                          tcg_shift, tcg_fpstatus);
6647                 }
6648             } else {
6649                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
6650                 if (is_signed) {
6651                     gen_helper_vfp_toslh(tcg_dest, tcg_single,
6652                                          tcg_shift, tcg_fpstatus);
6653                 } else {
6654                     gen_helper_vfp_toulh(tcg_dest, tcg_single,
6655                                          tcg_shift, tcg_fpstatus);
6656                 }
6657                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
6658             }
6659             break;
6660
6661         default:
6662             g_assert_not_reached();
6663         }
6664
6665         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
6666     }
6667 }
6668
6669 /* Floating point <-> fixed point conversions
6670  *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
6671  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
6672  * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
6673  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
6674  */
6675 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
6676 {
6677     int rd = extract32(insn, 0, 5);
6678     int rn = extract32(insn, 5, 5);
6679     int scale = extract32(insn, 10, 6);
6680     int opcode = extract32(insn, 16, 3);
6681     int rmode = extract32(insn, 19, 2);
6682     int type = extract32(insn, 22, 2);
6683     bool sbit = extract32(insn, 29, 1);
6684     bool sf = extract32(insn, 31, 1);
6685     bool itof;
6686
6687     if (sbit || (!sf && scale < 32)) {
6688         unallocated_encoding(s);
6689         return;
6690     }
6691
6692     switch (type) {
6693     case 0: /* float32 */
6694     case 1: /* float64 */
6695         break;
6696     case 3: /* float16 */
6697         if (dc_isar_feature(aa64_fp16, s)) {
6698             break;
6699         }
6700         /* fallthru */
6701     default:
6702         unallocated_encoding(s);
6703         return;
6704     }
6705
6706     switch ((rmode << 3) | opcode) {
6707     case 0x2: /* SCVTF */
6708     case 0x3: /* UCVTF */
6709         itof = true;
6710         break;
6711     case 0x18: /* FCVTZS */
6712     case 0x19: /* FCVTZU */
6713         itof = false;
6714         break;
6715     default:
6716         unallocated_encoding(s);
6717         return;
6718     }
6719
6720     if (!fp_access_check(s)) {
6721         return;
6722     }
6723
6724     handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
6725 }
6726
6727 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
6728 {
6729     /* FMOV: gpr to or from float, double, or top half of quad fp reg,
6730      * without conversion.
6731      */
6732
6733     if (itof) {
6734         TCGv_i64 tcg_rn = cpu_reg(s, rn);
6735         TCGv_i64 tmp;
6736
6737         switch (type) {
6738         case 0:
6739             /* 32 bit */
6740             tmp = tcg_temp_new_i64();
6741             tcg_gen_ext32u_i64(tmp, tcg_rn);
6742             write_fp_dreg(s, rd, tmp);
6743             break;
6744         case 1:
6745             /* 64 bit */
6746             write_fp_dreg(s, rd, tcg_rn);
6747             break;
6748         case 2:
6749             /* 64 bit to top half. */
6750             tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
6751             clear_vec_high(s, true, rd);
6752             break;
6753         case 3:
6754             /* 16 bit */
6755             tmp = tcg_temp_new_i64();
6756             tcg_gen_ext16u_i64(tmp, tcg_rn);
6757             write_fp_dreg(s, rd, tmp);
6758             break;
6759         default:
6760             g_assert_not_reached();
6761         }
6762     } else {
6763         TCGv_i64 tcg_rd = cpu_reg(s, rd);
6764
6765         switch (type) {
6766         case 0:
6767             /* 32 bit */
6768             tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32));
6769             break;
6770         case 1:
6771             /* 64 bit */
6772             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64));
6773             break;
6774         case 2:
6775             /* 64 bits from top half */
6776             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
6777             break;
6778         case 3:
6779             /* 16 bit */
6780             tcg_gen_ld16u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_16));
6781             break;
6782         default:
6783             g_assert_not_reached();
6784         }
6785     }
6786 }
6787
6788 static void handle_fjcvtzs(DisasContext *s, int rd, int rn)
6789 {
6790     TCGv_i64 t = read_fp_dreg(s, rn);
6791     TCGv_ptr fpstatus = fpstatus_ptr(FPST_FPCR);
6792
6793     gen_helper_fjcvtzs(t, t, fpstatus);
6794
6795     tcg_gen_ext32u_i64(cpu_reg(s, rd), t);
6796     tcg_gen_extrh_i64_i32(cpu_ZF, t);
6797     tcg_gen_movi_i32(cpu_CF, 0);
6798     tcg_gen_movi_i32(cpu_NF, 0);
6799     tcg_gen_movi_i32(cpu_VF, 0);
6800 }
6801
6802 /* Floating point <-> integer conversions
6803  *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
6804  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
6805  * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
6806  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
6807  */
6808 static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
6809 {
6810     int rd = extract32(insn, 0, 5);
6811     int rn = extract32(insn, 5, 5);
6812     int opcode = extract32(insn, 16, 3);
6813     int rmode = extract32(insn, 19, 2);
6814     int type = extract32(insn, 22, 2);
6815     bool sbit = extract32(insn, 29, 1);
6816     bool sf = extract32(insn, 31, 1);
6817     bool itof = false;
6818
6819     if (sbit) {
6820         goto do_unallocated;
6821     }
6822
6823     switch (opcode) {
6824     case 2: /* SCVTF */
6825     case 3: /* UCVTF */
6826         itof = true;
6827         /* fallthru */
6828     case 4: /* FCVTAS */
6829     case 5: /* FCVTAU */
6830         if (rmode != 0) {
6831             goto do_unallocated;
6832         }
6833         /* fallthru */
6834     case 0: /* FCVT[NPMZ]S */
6835     case 1: /* FCVT[NPMZ]U */
6836         switch (type) {
6837         case 0: /* float32 */
6838         case 1: /* float64 */
6839             break;
6840         case 3: /* float16 */
6841             if (!dc_isar_feature(aa64_fp16, s)) {
6842                 goto do_unallocated;
6843             }
6844             break;
6845         default:
6846             goto do_unallocated;
6847         }
6848         if (!fp_access_check(s)) {
6849             return;
6850         }
6851         handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
6852         break;
6853
6854     default:
6855         switch (sf << 7 | type << 5 | rmode << 3 | opcode) {
6856         case 0b01100110: /* FMOV half <-> 32-bit int */
6857         case 0b01100111:
6858         case 0b11100110: /* FMOV half <-> 64-bit int */
6859         case 0b11100111:
6860             if (!dc_isar_feature(aa64_fp16, s)) {
6861                 goto do_unallocated;
6862             }
6863             /* fallthru */
6864         case 0b00000110: /* FMOV 32-bit */
6865         case 0b00000111:
6866         case 0b10100110: /* FMOV 64-bit */
6867         case 0b10100111:
6868         case 0b11001110: /* FMOV top half of 128-bit */
6869         case 0b11001111:
6870             if (!fp_access_check(s)) {
6871                 return;
6872             }
6873             itof = opcode & 1;
6874             handle_fmov(s, rd, rn, type, itof);
6875             break;
6876
6877         case 0b00111110: /* FJCVTZS */
6878             if (!dc_isar_feature(aa64_jscvt, s)) {
6879                 goto do_unallocated;
6880             } else if (fp_access_check(s)) {
6881                 handle_fjcvtzs(s, rd, rn);
6882             }
6883             break;
6884
6885         default:
6886         do_unallocated:
6887             unallocated_encoding(s);
6888             return;
6889         }
6890         break;
6891     }
6892 }
6893
6894 /* FP-specific subcases of table C3-6 (SIMD and FP data processing)
6895  *   31  30  29 28     25 24                          0
6896  * +---+---+---+---------+-----------------------------+
6897  * |   | 0 |   | 1 1 1 1 |                             |
6898  * +---+---+---+---------+-----------------------------+
6899  */
6900 static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
6901 {
6902     if (extract32(insn, 24, 1)) {
6903         /* Floating point data-processing (3 source) */
6904         disas_fp_3src(s, insn);
6905     } else if (extract32(insn, 21, 1) == 0) {
6906         /* Floating point to fixed point conversions */
6907         disas_fp_fixed_conv(s, insn);
6908     } else {
6909         switch (extract32(insn, 10, 2)) {
6910         case 1:
6911             /* Floating point conditional compare */
6912             disas_fp_ccomp(s, insn);
6913             break;
6914         case 2:
6915             /* Floating point data-processing (2 source) */
6916             disas_fp_2src(s, insn);
6917             break;
6918         case 3:
6919             /* Floating point conditional select */
6920             disas_fp_csel(s, insn);
6921             break;
6922         case 0:
6923             switch (ctz32(extract32(insn, 12, 4))) {
6924             case 0: /* [15:12] == xxx1 */
6925                 /* Floating point immediate */
6926                 disas_fp_imm(s, insn);
6927                 break;
6928             case 1: /* [15:12] == xx10 */
6929                 /* Floating point compare */
6930                 disas_fp_compare(s, insn);
6931                 break;
6932             case 2: /* [15:12] == x100 */
6933                 /* Floating point data-processing (1 source) */
6934                 disas_fp_1src(s, insn);
6935                 break;
6936             case 3: /* [15:12] == 1000 */
6937                 unallocated_encoding(s);
6938                 break;
6939             default: /* [15:12] == 0000 */
6940                 /* Floating point <-> integer conversions */
6941                 disas_fp_int_conv(s, insn);
6942                 break;
6943             }
6944             break;
6945         }
6946     }
6947 }
6948
6949 static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
6950                      int pos)
6951 {
6952     /* Extract 64 bits from the middle of two concatenated 64 bit
6953      * vector register slices left:right. The extracted bits start
6954      * at 'pos' bits into the right (least significant) side.
6955      * We return the result in tcg_right, and guarantee not to
6956      * trash tcg_left.
6957      */
6958     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
6959     assert(pos > 0 && pos < 64);
6960
6961     tcg_gen_shri_i64(tcg_right, tcg_right, pos);
6962     tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
6963     tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
6964 }
6965
6966 /* EXT
6967  *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
6968  * +---+---+-------------+-----+---+------+---+------+---+------+------+
6969  * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
6970  * +---+---+-------------+-----+---+------+---+------+---+------+------+
6971  */
6972 static void disas_simd_ext(DisasContext *s, uint32_t insn)
6973 {
6974     int is_q = extract32(insn, 30, 1);
6975     int op2 = extract32(insn, 22, 2);
6976     int imm4 = extract32(insn, 11, 4);
6977     int rm = extract32(insn, 16, 5);
6978     int rn = extract32(insn, 5, 5);
6979     int rd = extract32(insn, 0, 5);
6980     int pos = imm4 << 3;
6981     TCGv_i64 tcg_resl, tcg_resh;
6982
6983     if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
6984         unallocated_encoding(s);
6985         return;
6986     }
6987
6988     if (!fp_access_check(s)) {
6989         return;
6990     }
6991
6992     tcg_resh = tcg_temp_new_i64();
6993     tcg_resl = tcg_temp_new_i64();
6994
6995     /* Vd gets bits starting at pos bits into Vm:Vn. This is
6996      * either extracting 128 bits from a 128:128 concatenation, or
6997      * extracting 64 bits from a 64:64 concatenation.
6998      */
6999     if (!is_q) {
7000         read_vec_element(s, tcg_resl, rn, 0, MO_64);
7001         if (pos != 0) {
7002             read_vec_element(s, tcg_resh, rm, 0, MO_64);
7003             do_ext64(s, tcg_resh, tcg_resl, pos);
7004         }
7005     } else {
7006         TCGv_i64 tcg_hh;
7007         typedef struct {
7008             int reg;
7009             int elt;
7010         } EltPosns;
7011         EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
7012         EltPosns *elt = eltposns;
7013
7014         if (pos >= 64) {
7015             elt++;
7016             pos -= 64;
7017         }
7018
7019         read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
7020         elt++;
7021         read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
7022         elt++;
7023         if (pos != 0) {
7024             do_ext64(s, tcg_resh, tcg_resl, pos);
7025             tcg_hh = tcg_temp_new_i64();
7026             read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
7027             do_ext64(s, tcg_hh, tcg_resh, pos);
7028         }
7029     }
7030
7031     write_vec_element(s, tcg_resl, rd, 0, MO_64);
7032     if (is_q) {
7033         write_vec_element(s, tcg_resh, rd, 1, MO_64);
7034     }
7035     clear_vec_high(s, is_q, rd);
7036 }
7037
7038 /* TBL/TBX
7039  *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
7040  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
7041  * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
7042  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
7043  */
7044 static void disas_simd_tb(DisasContext *s, uint32_t insn)
7045 {
7046     int op2 = extract32(insn, 22, 2);
7047     int is_q = extract32(insn, 30, 1);
7048     int rm = extract32(insn, 16, 5);
7049     int rn = extract32(insn, 5, 5);
7050     int rd = extract32(insn, 0, 5);
7051     int is_tbx = extract32(insn, 12, 1);
7052     int len = (extract32(insn, 13, 2) + 1) * 16;
7053
7054     if (op2 != 0) {
7055         unallocated_encoding(s);
7056         return;
7057     }
7058
7059     if (!fp_access_check(s)) {
7060         return;
7061     }
7062
7063     tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
7064                        vec_full_reg_offset(s, rm), cpu_env,
7065                        is_q ? 16 : 8, vec_full_reg_size(s),
7066                        (len << 6) | (is_tbx << 5) | rn,
7067                        gen_helper_simd_tblx);
7068 }
7069
7070 /* ZIP/UZP/TRN
7071  *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
7072  * +---+---+-------------+------+---+------+---+------------------+------+
7073  * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
7074  * +---+---+-------------+------+---+------+---+------------------+------+
7075  */
7076 static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
7077 {
7078     int rd = extract32(insn, 0, 5);
7079     int rn = extract32(insn, 5, 5);
7080     int rm = extract32(insn, 16, 5);
7081     int size = extract32(insn, 22, 2);
7082     /* opc field bits [1:0] indicate ZIP/UZP/TRN;
7083      * bit 2 indicates 1 vs 2 variant of the insn.
7084      */
7085     int opcode = extract32(insn, 12, 2);
7086     bool part = extract32(insn, 14, 1);
7087     bool is_q = extract32(insn, 30, 1);
7088     int esize = 8 << size;
7089     int i;
7090     int datasize = is_q ? 128 : 64;
7091     int elements = datasize / esize;
7092     TCGv_i64 tcg_res[2], tcg_ele;
7093
7094     if (opcode == 0 || (size == 3 && !is_q)) {
7095         unallocated_encoding(s);
7096         return;
7097     }
7098
7099     if (!fp_access_check(s)) {
7100         return;
7101     }
7102
7103     tcg_res[0] = tcg_temp_new_i64();
7104     tcg_res[1] = is_q ? tcg_temp_new_i64() : NULL;
7105     tcg_ele = tcg_temp_new_i64();
7106
7107     for (i = 0; i < elements; i++) {
7108         int o, w;
7109
7110         switch (opcode) {
7111         case 1: /* UZP1/2 */
7112         {
7113             int midpoint = elements / 2;
7114             if (i < midpoint) {
7115                 read_vec_element(s, tcg_ele, rn, 2 * i + part, size);
7116             } else {
7117                 read_vec_element(s, tcg_ele, rm,
7118                                  2 * (i - midpoint) + part, size);
7119             }
7120             break;
7121         }
7122         case 2: /* TRN1/2 */
7123             if (i & 1) {
7124                 read_vec_element(s, tcg_ele, rm, (i & ~1) + part, size);
7125             } else {
7126                 read_vec_element(s, tcg_ele, rn, (i & ~1) + part, size);
7127             }
7128             break;
7129         case 3: /* ZIP1/2 */
7130         {
7131             int base = part * elements / 2;
7132             if (i & 1) {
7133                 read_vec_element(s, tcg_ele, rm, base + (i >> 1), size);
7134             } else {
7135                 read_vec_element(s, tcg_ele, rn, base + (i >> 1), size);
7136             }
7137             break;
7138         }
7139         default:
7140             g_assert_not_reached();
7141         }
7142
7143         w = (i * esize) / 64;
7144         o = (i * esize) % 64;
7145         if (o == 0) {
7146             tcg_gen_mov_i64(tcg_res[w], tcg_ele);
7147         } else {
7148             tcg_gen_shli_i64(tcg_ele, tcg_ele, o);
7149             tcg_gen_or_i64(tcg_res[w], tcg_res[w], tcg_ele);
7150         }
7151     }
7152
7153     for (i = 0; i <= is_q; ++i) {
7154         write_vec_element(s, tcg_res[i], rd, i, MO_64);
7155     }
7156     clear_vec_high(s, is_q, rd);
7157 }
7158
7159 /*
7160  * do_reduction_op helper
7161  *
7162  * This mirrors the Reduce() pseudocode in the ARM ARM. It is
7163  * important for correct NaN propagation that we do these
7164  * operations in exactly the order specified by the pseudocode.
7165  *
7166  * This is a recursive function, TCG temps should be freed by the
7167  * calling function once it is done with the values.
7168  */
7169 static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn,
7170                                 int esize, int size, int vmap, TCGv_ptr fpst)
7171 {
7172     if (esize == size) {
7173         int element;
7174         MemOp msize = esize == 16 ? MO_16 : MO_32;
7175         TCGv_i32 tcg_elem;
7176
7177         /* We should have one register left here */
7178         assert(ctpop8(vmap) == 1);
7179         element = ctz32(vmap);
7180         assert(element < 8);
7181
7182         tcg_elem = tcg_temp_new_i32();
7183         read_vec_element_i32(s, tcg_elem, rn, element, msize);
7184         return tcg_elem;
7185     } else {
7186         int bits = size / 2;
7187         int shift = ctpop8(vmap) / 2;
7188         int vmap_lo = (vmap >> shift) & vmap;
7189         int vmap_hi = (vmap & ~vmap_lo);
7190         TCGv_i32 tcg_hi, tcg_lo, tcg_res;
7191
7192         tcg_hi = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_hi, fpst);
7193         tcg_lo = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_lo, fpst);
7194         tcg_res = tcg_temp_new_i32();
7195
7196         switch (fpopcode) {
7197         case 0x0c: /* fmaxnmv half-precision */
7198             gen_helper_advsimd_maxnumh(tcg_res, tcg_lo, tcg_hi, fpst);
7199             break;
7200         case 0x0f: /* fmaxv half-precision */
7201             gen_helper_advsimd_maxh(tcg_res, tcg_lo, tcg_hi, fpst);
7202             break;
7203         case 0x1c: /* fminnmv half-precision */
7204             gen_helper_advsimd_minnumh(tcg_res, tcg_lo, tcg_hi, fpst);
7205             break;
7206         case 0x1f: /* fminv half-precision */
7207             gen_helper_advsimd_minh(tcg_res, tcg_lo, tcg_hi, fpst);
7208             break;
7209         case 0x2c: /* fmaxnmv */
7210             gen_helper_vfp_maxnums(tcg_res, tcg_lo, tcg_hi, fpst);
7211             break;
7212         case 0x2f: /* fmaxv */
7213             gen_helper_vfp_maxs(tcg_res, tcg_lo, tcg_hi, fpst);
7214             break;
7215         case 0x3c: /* fminnmv */
7216             gen_helper_vfp_minnums(tcg_res, tcg_lo, tcg_hi, fpst);
7217             break;
7218         case 0x3f: /* fminv */
7219             gen_helper_vfp_mins(tcg_res, tcg_lo, tcg_hi, fpst);
7220             break;
7221         default:
7222             g_assert_not_reached();
7223         }
7224         return tcg_res;
7225     }
7226 }
7227
7228 /* AdvSIMD across lanes
7229  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
7230  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
7231  * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
7232  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
7233  */
7234 static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
7235 {
7236     int rd = extract32(insn, 0, 5);
7237     int rn = extract32(insn, 5, 5);
7238     int size = extract32(insn, 22, 2);
7239     int opcode = extract32(insn, 12, 5);
7240     bool is_q = extract32(insn, 30, 1);
7241     bool is_u = extract32(insn, 29, 1);
7242     bool is_fp = false;
7243     bool is_min = false;
7244     int esize;
7245     int elements;
7246     int i;
7247     TCGv_i64 tcg_res, tcg_elt;
7248
7249     switch (opcode) {
7250     case 0x1b: /* ADDV */
7251         if (is_u) {
7252             unallocated_encoding(s);
7253             return;
7254         }
7255         /* fall through */
7256     case 0x3: /* SADDLV, UADDLV */
7257     case 0xa: /* SMAXV, UMAXV */
7258     case 0x1a: /* SMINV, UMINV */
7259         if (size == 3 || (size == 2 && !is_q)) {
7260             unallocated_encoding(s);
7261             return;
7262         }
7263         break;
7264     case 0xc: /* FMAXNMV, FMINNMV */
7265     case 0xf: /* FMAXV, FMINV */
7266         /* Bit 1 of size field encodes min vs max and the actual size
7267          * depends on the encoding of the U bit. If not set (and FP16
7268          * enabled) then we do half-precision float instead of single
7269          * precision.
7270          */
7271         is_min = extract32(size, 1, 1);
7272         is_fp = true;
7273         if (!is_u && dc_isar_feature(aa64_fp16, s)) {
7274             size = 1;
7275         } else if (!is_u || !is_q || extract32(size, 0, 1)) {
7276             unallocated_encoding(s);
7277             return;
7278         } else {
7279             size = 2;
7280         }
7281         break;
7282     default:
7283         unallocated_encoding(s);
7284         return;
7285     }
7286
7287     if (!fp_access_check(s)) {
7288         return;
7289     }
7290
7291     esize = 8 << size;
7292     elements = (is_q ? 128 : 64) / esize;
7293
7294     tcg_res = tcg_temp_new_i64();
7295     tcg_elt = tcg_temp_new_i64();
7296
7297     /* These instructions operate across all lanes of a vector
7298      * to produce a single result. We can guarantee that a 64
7299      * bit intermediate is sufficient:
7300      *  + for [US]ADDLV the maximum element size is 32 bits, and
7301      *    the result type is 64 bits
7302      *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the
7303      *    same as the element size, which is 32 bits at most
7304      * For the integer operations we can choose to work at 64
7305      * or 32 bits and truncate at the end; for simplicity
7306      * we use 64 bits always. The floating point
7307      * ops do require 32 bit intermediates, though.
7308      */
7309     if (!is_fp) {
7310         read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
7311
7312         for (i = 1; i < elements; i++) {
7313             read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
7314
7315             switch (opcode) {
7316             case 0x03: /* SADDLV / UADDLV */
7317             case 0x1b: /* ADDV */
7318                 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
7319                 break;
7320             case 0x0a: /* SMAXV / UMAXV */
7321                 if (is_u) {
7322                     tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt);
7323                 } else {
7324                     tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt);
7325                 }
7326                 break;
7327             case 0x1a: /* SMINV / UMINV */
7328                 if (is_u) {
7329                     tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt);
7330                 } else {
7331                     tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt);
7332                 }
7333                 break;
7334             default:
7335                 g_assert_not_reached();
7336             }
7337
7338         }
7339     } else {
7340         /* Floating point vector reduction ops which work across 32
7341          * bit (single) or 16 bit (half-precision) intermediates.
7342          * Note that correct NaN propagation requires that we do these
7343          * operations in exactly the order specified by the pseudocode.
7344          */
7345         TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
7346         int fpopcode = opcode | is_min << 4 | is_u << 5;
7347         int vmap = (1 << elements) - 1;
7348         TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, esize,
7349                                              (is_q ? 128 : 64), vmap, fpst);
7350         tcg_gen_extu_i32_i64(tcg_res, tcg_res32);
7351     }
7352
7353     /* Now truncate the result to the width required for the final output */
7354     if (opcode == 0x03) {
7355         /* SADDLV, UADDLV: result is 2*esize */
7356         size++;
7357     }
7358
7359     switch (size) {
7360     case 0:
7361         tcg_gen_ext8u_i64(tcg_res, tcg_res);
7362         break;
7363     case 1:
7364         tcg_gen_ext16u_i64(tcg_res, tcg_res);
7365         break;
7366     case 2:
7367         tcg_gen_ext32u_i64(tcg_res, tcg_res);
7368         break;
7369     case 3:
7370         break;
7371     default:
7372         g_assert_not_reached();
7373     }
7374
7375     write_fp_dreg(s, rd, tcg_res);
7376 }
7377
7378 /* DUP (Element, Vector)
7379  *
7380  *  31  30   29              21 20    16 15        10  9    5 4    0
7381  * +---+---+-------------------+--------+-------------+------+------+
7382  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
7383  * +---+---+-------------------+--------+-------------+------+------+
7384  *
7385  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7386  */
7387 static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
7388                              int imm5)
7389 {
7390     int size = ctz32(imm5);
7391     int index;
7392
7393     if (size > 3 || (size == 3 && !is_q)) {
7394         unallocated_encoding(s);
7395         return;
7396     }
7397
7398     if (!fp_access_check(s)) {
7399         return;
7400     }
7401
7402     index = imm5 >> (size + 1);
7403     tcg_gen_gvec_dup_mem(size, vec_full_reg_offset(s, rd),
7404                          vec_reg_offset(s, rn, index, size),
7405                          is_q ? 16 : 8, vec_full_reg_size(s));
7406 }
7407
7408 /* DUP (element, scalar)
7409  *  31                   21 20    16 15        10  9    5 4    0
7410  * +-----------------------+--------+-------------+------+------+
7411  * | 0 1 0 1 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
7412  * +-----------------------+--------+-------------+------+------+
7413  */
7414 static void handle_simd_dupes(DisasContext *s, int rd, int rn,
7415                               int imm5)
7416 {
7417     int size = ctz32(imm5);
7418     int index;
7419     TCGv_i64 tmp;
7420
7421     if (size > 3) {
7422         unallocated_encoding(s);
7423         return;
7424     }
7425
7426     if (!fp_access_check(s)) {
7427         return;
7428     }
7429
7430     index = imm5 >> (size + 1);
7431
7432     /* This instruction just extracts the specified element and
7433      * zero-extends it into the bottom of the destination register.
7434      */
7435     tmp = tcg_temp_new_i64();
7436     read_vec_element(s, tmp, rn, index, size);
7437     write_fp_dreg(s, rd, tmp);
7438 }
7439
7440 /* DUP (General)
7441  *
7442  *  31  30   29              21 20    16 15        10  9    5 4    0
7443  * +---+---+-------------------+--------+-------------+------+------+
7444  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 1 1 |  Rn  |  Rd  |
7445  * +---+---+-------------------+--------+-------------+------+------+
7446  *
7447  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7448  */
7449 static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
7450                              int imm5)
7451 {
7452     int size = ctz32(imm5);
7453     uint32_t dofs, oprsz, maxsz;
7454
7455     if (size > 3 || ((size == 3) && !is_q)) {
7456         unallocated_encoding(s);
7457         return;
7458     }
7459
7460     if (!fp_access_check(s)) {
7461         return;
7462     }
7463
7464     dofs = vec_full_reg_offset(s, rd);
7465     oprsz = is_q ? 16 : 8;
7466     maxsz = vec_full_reg_size(s);
7467
7468     tcg_gen_gvec_dup_i64(size, dofs, oprsz, maxsz, cpu_reg(s, rn));
7469 }
7470
7471 /* INS (Element)
7472  *
7473  *  31                   21 20    16 15  14    11  10 9    5 4    0
7474  * +-----------------------+--------+------------+---+------+------+
7475  * | 0 1 1 0 1 1 1 0 0 0 0 |  imm5  | 0 |  imm4  | 1 |  Rn  |  Rd  |
7476  * +-----------------------+--------+------------+---+------+------+
7477  *
7478  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7479  * index: encoded in imm5<4:size+1>
7480  */
7481 static void handle_simd_inse(DisasContext *s, int rd, int rn,
7482                              int imm4, int imm5)
7483 {
7484     int size = ctz32(imm5);
7485     int src_index, dst_index;
7486     TCGv_i64 tmp;
7487
7488     if (size > 3) {
7489         unallocated_encoding(s);
7490         return;
7491     }
7492
7493     if (!fp_access_check(s)) {
7494         return;
7495     }
7496
7497     dst_index = extract32(imm5, 1+size, 5);
7498     src_index = extract32(imm4, size, 4);
7499
7500     tmp = tcg_temp_new_i64();
7501
7502     read_vec_element(s, tmp, rn, src_index, size);
7503     write_vec_element(s, tmp, rd, dst_index, size);
7504
7505     /* INS is considered a 128-bit write for SVE. */
7506     clear_vec_high(s, true, rd);
7507 }
7508
7509
7510 /* INS (General)
7511  *
7512  *  31                   21 20    16 15        10  9    5 4    0
7513  * +-----------------------+--------+-------------+------+------+
7514  * | 0 1 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 1 1 1 |  Rn  |  Rd  |
7515  * +-----------------------+--------+-------------+------+------+
7516  *
7517  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7518  * index: encoded in imm5<4:size+1>
7519  */
7520 static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
7521 {
7522     int size = ctz32(imm5);
7523     int idx;
7524
7525     if (size > 3) {
7526         unallocated_encoding(s);
7527         return;
7528     }
7529
7530     if (!fp_access_check(s)) {
7531         return;
7532     }
7533
7534     idx = extract32(imm5, 1 + size, 4 - size);
7535     write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
7536
7537     /* INS is considered a 128-bit write for SVE. */
7538     clear_vec_high(s, true, rd);
7539 }
7540
7541 /*
7542  * UMOV (General)
7543  * SMOV (General)
7544  *
7545  *  31  30   29              21 20    16 15    12   10 9    5 4    0
7546  * +---+---+-------------------+--------+-------------+------+------+
7547  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 1 U 1 1 |  Rn  |  Rd  |
7548  * +---+---+-------------------+--------+-------------+------+------+
7549  *
7550  * U: unsigned when set
7551  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7552  */
7553 static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
7554                                   int rn, int rd, int imm5)
7555 {
7556     int size = ctz32(imm5);
7557     int element;
7558     TCGv_i64 tcg_rd;
7559
7560     /* Check for UnallocatedEncodings */
7561     if (is_signed) {
7562         if (size > 2 || (size == 2 && !is_q)) {
7563             unallocated_encoding(s);
7564             return;
7565         }
7566     } else {
7567         if (size > 3
7568             || (size < 3 && is_q)
7569             || (size == 3 && !is_q)) {
7570             unallocated_encoding(s);
7571             return;
7572         }
7573     }
7574
7575     if (!fp_access_check(s)) {
7576         return;
7577     }
7578
7579     element = extract32(imm5, 1+size, 4);
7580
7581     tcg_rd = cpu_reg(s, rd);
7582     read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
7583     if (is_signed && !is_q) {
7584         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
7585     }
7586 }
7587
7588 /* AdvSIMD copy
7589  *   31  30  29  28             21 20  16 15  14  11 10  9    5 4    0
7590  * +---+---+----+-----------------+------+---+------+---+------+------+
7591  * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
7592  * +---+---+----+-----------------+------+---+------+---+------+------+
7593  */
7594 static void disas_simd_copy(DisasContext *s, uint32_t insn)
7595 {
7596     int rd = extract32(insn, 0, 5);
7597     int rn = extract32(insn, 5, 5);
7598     int imm4 = extract32(insn, 11, 4);
7599     int op = extract32(insn, 29, 1);
7600     int is_q = extract32(insn, 30, 1);
7601     int imm5 = extract32(insn, 16, 5);
7602
7603     if (op) {
7604         if (is_q) {
7605             /* INS (element) */
7606             handle_simd_inse(s, rd, rn, imm4, imm5);
7607         } else {
7608             unallocated_encoding(s);
7609         }
7610     } else {
7611         switch (imm4) {
7612         case 0:
7613             /* DUP (element - vector) */
7614             handle_simd_dupe(s, is_q, rd, rn, imm5);
7615             break;
7616         case 1:
7617             /* DUP (general) */
7618             handle_simd_dupg(s, is_q, rd, rn, imm5);
7619             break;
7620         case 3:
7621             if (is_q) {
7622                 /* INS (general) */
7623                 handle_simd_insg(s, rd, rn, imm5);
7624             } else {
7625                 unallocated_encoding(s);
7626             }
7627             break;
7628         case 5:
7629         case 7:
7630             /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
7631             handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
7632             break;
7633         default:
7634             unallocated_encoding(s);
7635             break;
7636         }
7637     }
7638 }
7639
7640 /* AdvSIMD modified immediate
7641  *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
7642  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
7643  * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh |  Rd  |
7644  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
7645  *
7646  * There are a number of operations that can be carried out here:
7647  *   MOVI - move (shifted) imm into register
7648  *   MVNI - move inverted (shifted) imm into register
7649  *   ORR  - bitwise OR of (shifted) imm with register
7650  *   BIC  - bitwise clear of (shifted) imm with register
7651  * With ARMv8.2 we also have:
7652  *   FMOV half-precision
7653  */
7654 static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
7655 {
7656     int rd = extract32(insn, 0, 5);
7657     int cmode = extract32(insn, 12, 4);
7658     int o2 = extract32(insn, 11, 1);
7659     uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
7660     bool is_neg = extract32(insn, 29, 1);
7661     bool is_q = extract32(insn, 30, 1);
7662     uint64_t imm = 0;
7663
7664     if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
7665         /* Check for FMOV (vector, immediate) - half-precision */
7666         if (!(dc_isar_feature(aa64_fp16, s) && o2 && cmode == 0xf)) {
7667             unallocated_encoding(s);
7668             return;
7669         }
7670     }
7671
7672     if (!fp_access_check(s)) {
7673         return;
7674     }
7675
7676     if (cmode == 15 && o2 && !is_neg) {
7677         /* FMOV (vector, immediate) - half-precision */
7678         imm = vfp_expand_imm(MO_16, abcdefgh);
7679         /* now duplicate across the lanes */
7680         imm = dup_const(MO_16, imm);
7681     } else {
7682         imm = asimd_imm_const(abcdefgh, cmode, is_neg);
7683     }
7684
7685     if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) {
7686         /* MOVI or MVNI, with MVNI negation handled above.  */
7687         tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), is_q ? 16 : 8,
7688                              vec_full_reg_size(s), imm);
7689     } else {
7690         /* ORR or BIC, with BIC negation to AND handled above.  */
7691         if (is_neg) {
7692             gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_andi, MO_64);
7693         } else {
7694             gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_ori, MO_64);
7695         }
7696     }
7697 }
7698
7699 /* AdvSIMD scalar copy
7700  *  31 30  29  28             21 20  16 15  14  11 10  9    5 4    0
7701  * +-----+----+-----------------+------+---+------+---+------+------+
7702  * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
7703  * +-----+----+-----------------+------+---+------+---+------+------+
7704  */
7705 static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
7706 {
7707     int rd = extract32(insn, 0, 5);
7708     int rn = extract32(insn, 5, 5);
7709     int imm4 = extract32(insn, 11, 4);
7710     int imm5 = extract32(insn, 16, 5);
7711     int op = extract32(insn, 29, 1);
7712
7713     if (op != 0 || imm4 != 0) {
7714         unallocated_encoding(s);
7715         return;
7716     }
7717
7718     /* DUP (element, scalar) */
7719     handle_simd_dupes(s, rd, rn, imm5);
7720 }
7721
7722 /* AdvSIMD scalar pairwise
7723  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
7724  * +-----+---+-----------+------+-----------+--------+-----+------+------+
7725  * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
7726  * +-----+---+-----------+------+-----------+--------+-----+------+------+
7727  */
7728 static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
7729 {
7730     int u = extract32(insn, 29, 1);
7731     int size = extract32(insn, 22, 2);
7732     int opcode = extract32(insn, 12, 5);
7733     int rn = extract32(insn, 5, 5);
7734     int rd = extract32(insn, 0, 5);
7735     TCGv_ptr fpst;
7736
7737     /* For some ops (the FP ones), size[1] is part of the encoding.
7738      * For ADDP strictly it is not but size[1] is always 1 for valid
7739      * encodings.
7740      */
7741     opcode |= (extract32(size, 1, 1) << 5);
7742
7743     switch (opcode) {
7744     case 0x3b: /* ADDP */
7745         if (u || size != 3) {
7746             unallocated_encoding(s);
7747             return;
7748         }
7749         if (!fp_access_check(s)) {
7750             return;
7751         }
7752
7753         fpst = NULL;
7754         break;
7755     case 0xc: /* FMAXNMP */
7756     case 0xd: /* FADDP */
7757     case 0xf: /* FMAXP */
7758     case 0x2c: /* FMINNMP */
7759     case 0x2f: /* FMINP */
7760         /* FP op, size[0] is 32 or 64 bit*/
7761         if (!u) {
7762             if (!dc_isar_feature(aa64_fp16, s)) {
7763                 unallocated_encoding(s);
7764                 return;
7765             } else {
7766                 size = MO_16;
7767             }
7768         } else {
7769             size = extract32(size, 0, 1) ? MO_64 : MO_32;
7770         }
7771
7772         if (!fp_access_check(s)) {
7773             return;
7774         }
7775
7776         fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
7777         break;
7778     default:
7779         unallocated_encoding(s);
7780         return;
7781     }
7782
7783     if (size == MO_64) {
7784         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7785         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7786         TCGv_i64 tcg_res = tcg_temp_new_i64();
7787
7788         read_vec_element(s, tcg_op1, rn, 0, MO_64);
7789         read_vec_element(s, tcg_op2, rn, 1, MO_64);
7790
7791         switch (opcode) {
7792         case 0x3b: /* ADDP */
7793             tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
7794             break;
7795         case 0xc: /* FMAXNMP */
7796             gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7797             break;
7798         case 0xd: /* FADDP */
7799             gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
7800             break;
7801         case 0xf: /* FMAXP */
7802             gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
7803             break;
7804         case 0x2c: /* FMINNMP */
7805             gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7806             break;
7807         case 0x2f: /* FMINP */
7808             gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
7809             break;
7810         default:
7811             g_assert_not_reached();
7812         }
7813
7814         write_fp_dreg(s, rd, tcg_res);
7815     } else {
7816         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7817         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7818         TCGv_i32 tcg_res = tcg_temp_new_i32();
7819
7820         read_vec_element_i32(s, tcg_op1, rn, 0, size);
7821         read_vec_element_i32(s, tcg_op2, rn, 1, size);
7822
7823         if (size == MO_16) {
7824             switch (opcode) {
7825             case 0xc: /* FMAXNMP */
7826                 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
7827                 break;
7828             case 0xd: /* FADDP */
7829                 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
7830                 break;
7831             case 0xf: /* FMAXP */
7832                 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
7833                 break;
7834             case 0x2c: /* FMINNMP */
7835                 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
7836                 break;
7837             case 0x2f: /* FMINP */
7838                 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
7839                 break;
7840             default:
7841                 g_assert_not_reached();
7842             }
7843         } else {
7844             switch (opcode) {
7845             case 0xc: /* FMAXNMP */
7846                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
7847                 break;
7848             case 0xd: /* FADDP */
7849                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
7850                 break;
7851             case 0xf: /* FMAXP */
7852                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
7853                 break;
7854             case 0x2c: /* FMINNMP */
7855                 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
7856                 break;
7857             case 0x2f: /* FMINP */
7858                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
7859                 break;
7860             default:
7861                 g_assert_not_reached();
7862             }
7863         }
7864
7865         write_fp_sreg(s, rd, tcg_res);
7866     }
7867 }
7868
7869 /*
7870  * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
7871  *
7872  * This code is handles the common shifting code and is used by both
7873  * the vector and scalar code.
7874  */
7875 static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
7876                                     TCGv_i64 tcg_rnd, bool accumulate,
7877                                     bool is_u, int size, int shift)
7878 {
7879     bool extended_result = false;
7880     bool round = tcg_rnd != NULL;
7881     int ext_lshift = 0;
7882     TCGv_i64 tcg_src_hi;
7883
7884     if (round && size == 3) {
7885         extended_result = true;
7886         ext_lshift = 64 - shift;
7887         tcg_src_hi = tcg_temp_new_i64();
7888     } else if (shift == 64) {
7889         if (!accumulate && is_u) {
7890             /* result is zero */
7891             tcg_gen_movi_i64(tcg_res, 0);
7892             return;
7893         }
7894     }
7895
7896     /* Deal with the rounding step */
7897     if (round) {
7898         if (extended_result) {
7899             TCGv_i64 tcg_zero = tcg_constant_i64(0);
7900             if (!is_u) {
7901                 /* take care of sign extending tcg_res */
7902                 tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
7903                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
7904                                  tcg_src, tcg_src_hi,
7905                                  tcg_rnd, tcg_zero);
7906             } else {
7907                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
7908                                  tcg_src, tcg_zero,
7909                                  tcg_rnd, tcg_zero);
7910             }
7911         } else {
7912             tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
7913         }
7914     }
7915
7916     /* Now do the shift right */
7917     if (round && extended_result) {
7918         /* extended case, >64 bit precision required */
7919         if (ext_lshift == 0) {
7920             /* special case, only high bits matter */
7921             tcg_gen_mov_i64(tcg_src, tcg_src_hi);
7922         } else {
7923             tcg_gen_shri_i64(tcg_src, tcg_src, shift);
7924             tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
7925             tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
7926         }
7927     } else {
7928         if (is_u) {
7929             if (shift == 64) {
7930                 /* essentially shifting in 64 zeros */
7931                 tcg_gen_movi_i64(tcg_src, 0);
7932             } else {
7933                 tcg_gen_shri_i64(tcg_src, tcg_src, shift);
7934             }
7935         } else {
7936             if (shift == 64) {
7937                 /* effectively extending the sign-bit */
7938                 tcg_gen_sari_i64(tcg_src, tcg_src, 63);
7939             } else {
7940                 tcg_gen_sari_i64(tcg_src, tcg_src, shift);
7941             }
7942         }
7943     }
7944
7945     if (accumulate) {
7946         tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
7947     } else {
7948         tcg_gen_mov_i64(tcg_res, tcg_src);
7949     }
7950 }
7951
7952 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
7953 static void handle_scalar_simd_shri(DisasContext *s,
7954                                     bool is_u, int immh, int immb,
7955                                     int opcode, int rn, int rd)
7956 {
7957     const int size = 3;
7958     int immhb = immh << 3 | immb;
7959     int shift = 2 * (8 << size) - immhb;
7960     bool accumulate = false;
7961     bool round = false;
7962     bool insert = false;
7963     TCGv_i64 tcg_rn;
7964     TCGv_i64 tcg_rd;
7965     TCGv_i64 tcg_round;
7966
7967     if (!extract32(immh, 3, 1)) {
7968         unallocated_encoding(s);
7969         return;
7970     }
7971
7972     if (!fp_access_check(s)) {
7973         return;
7974     }
7975
7976     switch (opcode) {
7977     case 0x02: /* SSRA / USRA (accumulate) */
7978         accumulate = true;
7979         break;
7980     case 0x04: /* SRSHR / URSHR (rounding) */
7981         round = true;
7982         break;
7983     case 0x06: /* SRSRA / URSRA (accum + rounding) */
7984         accumulate = round = true;
7985         break;
7986     case 0x08: /* SRI */
7987         insert = true;
7988         break;
7989     }
7990
7991     if (round) {
7992         tcg_round = tcg_constant_i64(1ULL << (shift - 1));
7993     } else {
7994         tcg_round = NULL;
7995     }
7996
7997     tcg_rn = read_fp_dreg(s, rn);
7998     tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
7999
8000     if (insert) {
8001         /* shift count same as element size is valid but does nothing;
8002          * special case to avoid potential shift by 64.
8003          */
8004         int esize = 8 << size;
8005         if (shift != esize) {
8006             tcg_gen_shri_i64(tcg_rn, tcg_rn, shift);
8007             tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, 0, esize - shift);
8008         }
8009     } else {
8010         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8011                                 accumulate, is_u, size, shift);
8012     }
8013
8014     write_fp_dreg(s, rd, tcg_rd);
8015 }
8016
8017 /* SHL/SLI - Scalar shift left */
8018 static void handle_scalar_simd_shli(DisasContext *s, bool insert,
8019                                     int immh, int immb, int opcode,
8020                                     int rn, int rd)
8021 {
8022     int size = 32 - clz32(immh) - 1;
8023     int immhb = immh << 3 | immb;
8024     int shift = immhb - (8 << size);
8025     TCGv_i64 tcg_rn;
8026     TCGv_i64 tcg_rd;
8027
8028     if (!extract32(immh, 3, 1)) {
8029         unallocated_encoding(s);
8030         return;
8031     }
8032
8033     if (!fp_access_check(s)) {
8034         return;
8035     }
8036
8037     tcg_rn = read_fp_dreg(s, rn);
8038     tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
8039
8040     if (insert) {
8041         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, shift, 64 - shift);
8042     } else {
8043         tcg_gen_shli_i64(tcg_rd, tcg_rn, shift);
8044     }
8045
8046     write_fp_dreg(s, rd, tcg_rd);
8047 }
8048
8049 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
8050  * (signed/unsigned) narrowing */
8051 static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
8052                                    bool is_u_shift, bool is_u_narrow,
8053                                    int immh, int immb, int opcode,
8054                                    int rn, int rd)
8055 {
8056     int immhb = immh << 3 | immb;
8057     int size = 32 - clz32(immh) - 1;
8058     int esize = 8 << size;
8059     int shift = (2 * esize) - immhb;
8060     int elements = is_scalar ? 1 : (64 / esize);
8061     bool round = extract32(opcode, 0, 1);
8062     MemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
8063     TCGv_i64 tcg_rn, tcg_rd, tcg_round;
8064     TCGv_i32 tcg_rd_narrowed;
8065     TCGv_i64 tcg_final;
8066
8067     static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
8068         { gen_helper_neon_narrow_sat_s8,
8069           gen_helper_neon_unarrow_sat8 },
8070         { gen_helper_neon_narrow_sat_s16,
8071           gen_helper_neon_unarrow_sat16 },
8072         { gen_helper_neon_narrow_sat_s32,
8073           gen_helper_neon_unarrow_sat32 },
8074         { NULL, NULL },
8075     };
8076     static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
8077         gen_helper_neon_narrow_sat_u8,
8078         gen_helper_neon_narrow_sat_u16,
8079         gen_helper_neon_narrow_sat_u32,
8080         NULL
8081     };
8082     NeonGenNarrowEnvFn *narrowfn;
8083
8084     int i;
8085
8086     assert(size < 4);
8087
8088     if (extract32(immh, 3, 1)) {
8089         unallocated_encoding(s);
8090         return;
8091     }
8092
8093     if (!fp_access_check(s)) {
8094         return;
8095     }
8096
8097     if (is_u_shift) {
8098         narrowfn = unsigned_narrow_fns[size];
8099     } else {
8100         narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
8101     }
8102
8103     tcg_rn = tcg_temp_new_i64();
8104     tcg_rd = tcg_temp_new_i64();
8105     tcg_rd_narrowed = tcg_temp_new_i32();
8106     tcg_final = tcg_temp_new_i64();
8107
8108     if (round) {
8109         tcg_round = tcg_constant_i64(1ULL << (shift - 1));
8110     } else {
8111         tcg_round = NULL;
8112     }
8113
8114     for (i = 0; i < elements; i++) {
8115         read_vec_element(s, tcg_rn, rn, i, ldop);
8116         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8117                                 false, is_u_shift, size+1, shift);
8118         narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd);
8119         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
8120         if (i == 0) {
8121             tcg_gen_mov_i64(tcg_final, tcg_rd);
8122         } else {
8123             tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
8124         }
8125     }
8126
8127     if (!is_q) {
8128         write_vec_element(s, tcg_final, rd, 0, MO_64);
8129     } else {
8130         write_vec_element(s, tcg_final, rd, 1, MO_64);
8131     }
8132     clear_vec_high(s, is_q, rd);
8133 }
8134
8135 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */
8136 static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
8137                              bool src_unsigned, bool dst_unsigned,
8138                              int immh, int immb, int rn, int rd)
8139 {
8140     int immhb = immh << 3 | immb;
8141     int size = 32 - clz32(immh) - 1;
8142     int shift = immhb - (8 << size);
8143     int pass;
8144
8145     assert(immh != 0);
8146     assert(!(scalar && is_q));
8147
8148     if (!scalar) {
8149         if (!is_q && extract32(immh, 3, 1)) {
8150             unallocated_encoding(s);
8151             return;
8152         }
8153
8154         /* Since we use the variable-shift helpers we must
8155          * replicate the shift count into each element of
8156          * the tcg_shift value.
8157          */
8158         switch (size) {
8159         case 0:
8160             shift |= shift << 8;
8161             /* fall through */
8162         case 1:
8163             shift |= shift << 16;
8164             break;
8165         case 2:
8166         case 3:
8167             break;
8168         default:
8169             g_assert_not_reached();
8170         }
8171     }
8172
8173     if (!fp_access_check(s)) {
8174         return;
8175     }
8176
8177     if (size == 3) {
8178         TCGv_i64 tcg_shift = tcg_constant_i64(shift);
8179         static NeonGenTwo64OpEnvFn * const fns[2][2] = {
8180             { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
8181             { NULL, gen_helper_neon_qshl_u64 },
8182         };
8183         NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
8184         int maxpass = is_q ? 2 : 1;
8185
8186         for (pass = 0; pass < maxpass; pass++) {
8187             TCGv_i64 tcg_op = tcg_temp_new_i64();
8188
8189             read_vec_element(s, tcg_op, rn, pass, MO_64);
8190             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
8191             write_vec_element(s, tcg_op, rd, pass, MO_64);
8192         }
8193         clear_vec_high(s, is_q, rd);
8194     } else {
8195         TCGv_i32 tcg_shift = tcg_constant_i32(shift);
8196         static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
8197             {
8198                 { gen_helper_neon_qshl_s8,
8199                   gen_helper_neon_qshl_s16,
8200                   gen_helper_neon_qshl_s32 },
8201                 { gen_helper_neon_qshlu_s8,
8202                   gen_helper_neon_qshlu_s16,
8203                   gen_helper_neon_qshlu_s32 }
8204             }, {
8205                 { NULL, NULL, NULL },
8206                 { gen_helper_neon_qshl_u8,
8207                   gen_helper_neon_qshl_u16,
8208                   gen_helper_neon_qshl_u32 }
8209             }
8210         };
8211         NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
8212         MemOp memop = scalar ? size : MO_32;
8213         int maxpass = scalar ? 1 : is_q ? 4 : 2;
8214
8215         for (pass = 0; pass < maxpass; pass++) {
8216             TCGv_i32 tcg_op = tcg_temp_new_i32();
8217
8218             read_vec_element_i32(s, tcg_op, rn, pass, memop);
8219             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
8220             if (scalar) {
8221                 switch (size) {
8222                 case 0:
8223                     tcg_gen_ext8u_i32(tcg_op, tcg_op);
8224                     break;
8225                 case 1:
8226                     tcg_gen_ext16u_i32(tcg_op, tcg_op);
8227                     break;
8228                 case 2:
8229                     break;
8230                 default:
8231                     g_assert_not_reached();
8232                 }
8233                 write_fp_sreg(s, rd, tcg_op);
8234             } else {
8235                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
8236             }
8237         }
8238
8239         if (!scalar) {
8240             clear_vec_high(s, is_q, rd);
8241         }
8242     }
8243 }
8244
8245 /* Common vector code for handling integer to FP conversion */
8246 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
8247                                    int elements, int is_signed,
8248                                    int fracbits, int size)
8249 {
8250     TCGv_ptr tcg_fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8251     TCGv_i32 tcg_shift = NULL;
8252
8253     MemOp mop = size | (is_signed ? MO_SIGN : 0);
8254     int pass;
8255
8256     if (fracbits || size == MO_64) {
8257         tcg_shift = tcg_constant_i32(fracbits);
8258     }
8259
8260     if (size == MO_64) {
8261         TCGv_i64 tcg_int64 = tcg_temp_new_i64();
8262         TCGv_i64 tcg_double = tcg_temp_new_i64();
8263
8264         for (pass = 0; pass < elements; pass++) {
8265             read_vec_element(s, tcg_int64, rn, pass, mop);
8266
8267             if (is_signed) {
8268                 gen_helper_vfp_sqtod(tcg_double, tcg_int64,
8269                                      tcg_shift, tcg_fpst);
8270             } else {
8271                 gen_helper_vfp_uqtod(tcg_double, tcg_int64,
8272                                      tcg_shift, tcg_fpst);
8273             }
8274             if (elements == 1) {
8275                 write_fp_dreg(s, rd, tcg_double);
8276             } else {
8277                 write_vec_element(s, tcg_double, rd, pass, MO_64);
8278             }
8279         }
8280     } else {
8281         TCGv_i32 tcg_int32 = tcg_temp_new_i32();
8282         TCGv_i32 tcg_float = tcg_temp_new_i32();
8283
8284         for (pass = 0; pass < elements; pass++) {
8285             read_vec_element_i32(s, tcg_int32, rn, pass, mop);
8286
8287             switch (size) {
8288             case MO_32:
8289                 if (fracbits) {
8290                     if (is_signed) {
8291                         gen_helper_vfp_sltos(tcg_float, tcg_int32,
8292                                              tcg_shift, tcg_fpst);
8293                     } else {
8294                         gen_helper_vfp_ultos(tcg_float, tcg_int32,
8295                                              tcg_shift, tcg_fpst);
8296                     }
8297                 } else {
8298                     if (is_signed) {
8299                         gen_helper_vfp_sitos(tcg_float, tcg_int32, tcg_fpst);
8300                     } else {
8301                         gen_helper_vfp_uitos(tcg_float, tcg_int32, tcg_fpst);
8302                     }
8303                 }
8304                 break;
8305             case MO_16:
8306                 if (fracbits) {
8307                     if (is_signed) {
8308                         gen_helper_vfp_sltoh(tcg_float, tcg_int32,
8309                                              tcg_shift, tcg_fpst);
8310                     } else {
8311                         gen_helper_vfp_ultoh(tcg_float, tcg_int32,
8312                                              tcg_shift, tcg_fpst);
8313                     }
8314                 } else {
8315                     if (is_signed) {
8316                         gen_helper_vfp_sitoh(tcg_float, tcg_int32, tcg_fpst);
8317                     } else {
8318                         gen_helper_vfp_uitoh(tcg_float, tcg_int32, tcg_fpst);
8319                     }
8320                 }
8321                 break;
8322             default:
8323                 g_assert_not_reached();
8324             }
8325
8326             if (elements == 1) {
8327                 write_fp_sreg(s, rd, tcg_float);
8328             } else {
8329                 write_vec_element_i32(s, tcg_float, rd, pass, size);
8330             }
8331         }
8332     }
8333
8334     clear_vec_high(s, elements << size == 16, rd);
8335 }
8336
8337 /* UCVTF/SCVTF - Integer to FP conversion */
8338 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
8339                                          bool is_q, bool is_u,
8340                                          int immh, int immb, int opcode,
8341                                          int rn, int rd)
8342 {
8343     int size, elements, fracbits;
8344     int immhb = immh << 3 | immb;
8345
8346     if (immh & 8) {
8347         size = MO_64;
8348         if (!is_scalar && !is_q) {
8349             unallocated_encoding(s);
8350             return;
8351         }
8352     } else if (immh & 4) {
8353         size = MO_32;
8354     } else if (immh & 2) {
8355         size = MO_16;
8356         if (!dc_isar_feature(aa64_fp16, s)) {
8357             unallocated_encoding(s);
8358             return;
8359         }
8360     } else {
8361         /* immh == 0 would be a failure of the decode logic */
8362         g_assert(immh == 1);
8363         unallocated_encoding(s);
8364         return;
8365     }
8366
8367     if (is_scalar) {
8368         elements = 1;
8369     } else {
8370         elements = (8 << is_q) >> size;
8371     }
8372     fracbits = (16 << size) - immhb;
8373
8374     if (!fp_access_check(s)) {
8375         return;
8376     }
8377
8378     handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
8379 }
8380
8381 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
8382 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
8383                                          bool is_q, bool is_u,
8384                                          int immh, int immb, int rn, int rd)
8385 {
8386     int immhb = immh << 3 | immb;
8387     int pass, size, fracbits;
8388     TCGv_ptr tcg_fpstatus;
8389     TCGv_i32 tcg_rmode, tcg_shift;
8390
8391     if (immh & 0x8) {
8392         size = MO_64;
8393         if (!is_scalar && !is_q) {
8394             unallocated_encoding(s);
8395             return;
8396         }
8397     } else if (immh & 0x4) {
8398         size = MO_32;
8399     } else if (immh & 0x2) {
8400         size = MO_16;
8401         if (!dc_isar_feature(aa64_fp16, s)) {
8402             unallocated_encoding(s);
8403             return;
8404         }
8405     } else {
8406         /* Should have split out AdvSIMD modified immediate earlier.  */
8407         assert(immh == 1);
8408         unallocated_encoding(s);
8409         return;
8410     }
8411
8412     if (!fp_access_check(s)) {
8413         return;
8414     }
8415
8416     assert(!(is_scalar && is_q));
8417
8418     tcg_fpstatus = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8419     tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, tcg_fpstatus);
8420     fracbits = (16 << size) - immhb;
8421     tcg_shift = tcg_constant_i32(fracbits);
8422
8423     if (size == MO_64) {
8424         int maxpass = is_scalar ? 1 : 2;
8425
8426         for (pass = 0; pass < maxpass; pass++) {
8427             TCGv_i64 tcg_op = tcg_temp_new_i64();
8428
8429             read_vec_element(s, tcg_op, rn, pass, MO_64);
8430             if (is_u) {
8431                 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8432             } else {
8433                 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8434             }
8435             write_vec_element(s, tcg_op, rd, pass, MO_64);
8436         }
8437         clear_vec_high(s, is_q, rd);
8438     } else {
8439         void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
8440         int maxpass = is_scalar ? 1 : ((8 << is_q) >> size);
8441
8442         switch (size) {
8443         case MO_16:
8444             if (is_u) {
8445                 fn = gen_helper_vfp_touhh;
8446             } else {
8447                 fn = gen_helper_vfp_toshh;
8448             }
8449             break;
8450         case MO_32:
8451             if (is_u) {
8452                 fn = gen_helper_vfp_touls;
8453             } else {
8454                 fn = gen_helper_vfp_tosls;
8455             }
8456             break;
8457         default:
8458             g_assert_not_reached();
8459         }
8460
8461         for (pass = 0; pass < maxpass; pass++) {
8462             TCGv_i32 tcg_op = tcg_temp_new_i32();
8463
8464             read_vec_element_i32(s, tcg_op, rn, pass, size);
8465             fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8466             if (is_scalar) {
8467                 write_fp_sreg(s, rd, tcg_op);
8468             } else {
8469                 write_vec_element_i32(s, tcg_op, rd, pass, size);
8470             }
8471         }
8472         if (!is_scalar) {
8473             clear_vec_high(s, is_q, rd);
8474         }
8475     }
8476
8477     gen_restore_rmode(tcg_rmode, tcg_fpstatus);
8478 }
8479
8480 /* AdvSIMD scalar shift by immediate
8481  *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
8482  * +-----+---+-------------+------+------+--------+---+------+------+
8483  * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
8484  * +-----+---+-------------+------+------+--------+---+------+------+
8485  *
8486  * This is the scalar version so it works on a fixed sized registers
8487  */
8488 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
8489 {
8490     int rd = extract32(insn, 0, 5);
8491     int rn = extract32(insn, 5, 5);
8492     int opcode = extract32(insn, 11, 5);
8493     int immb = extract32(insn, 16, 3);
8494     int immh = extract32(insn, 19, 4);
8495     bool is_u = extract32(insn, 29, 1);
8496
8497     if (immh == 0) {
8498         unallocated_encoding(s);
8499         return;
8500     }
8501
8502     switch (opcode) {
8503     case 0x08: /* SRI */
8504         if (!is_u) {
8505             unallocated_encoding(s);
8506             return;
8507         }
8508         /* fall through */
8509     case 0x00: /* SSHR / USHR */
8510     case 0x02: /* SSRA / USRA */
8511     case 0x04: /* SRSHR / URSHR */
8512     case 0x06: /* SRSRA / URSRA */
8513         handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
8514         break;
8515     case 0x0a: /* SHL / SLI */
8516         handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
8517         break;
8518     case 0x1c: /* SCVTF, UCVTF */
8519         handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
8520                                      opcode, rn, rd);
8521         break;
8522     case 0x10: /* SQSHRUN, SQSHRUN2 */
8523     case 0x11: /* SQRSHRUN, SQRSHRUN2 */
8524         if (!is_u) {
8525             unallocated_encoding(s);
8526             return;
8527         }
8528         handle_vec_simd_sqshrn(s, true, false, false, true,
8529                                immh, immb, opcode, rn, rd);
8530         break;
8531     case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
8532     case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
8533         handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
8534                                immh, immb, opcode, rn, rd);
8535         break;
8536     case 0xc: /* SQSHLU */
8537         if (!is_u) {
8538             unallocated_encoding(s);
8539             return;
8540         }
8541         handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
8542         break;
8543     case 0xe: /* SQSHL, UQSHL */
8544         handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
8545         break;
8546     case 0x1f: /* FCVTZS, FCVTZU */
8547         handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
8548         break;
8549     default:
8550         unallocated_encoding(s);
8551         break;
8552     }
8553 }
8554
8555 /* AdvSIMD scalar three different
8556  *  31 30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
8557  * +-----+---+-----------+------+---+------+--------+-----+------+------+
8558  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
8559  * +-----+---+-----------+------+---+------+--------+-----+------+------+
8560  */
8561 static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
8562 {
8563     bool is_u = extract32(insn, 29, 1);
8564     int size = extract32(insn, 22, 2);
8565     int opcode = extract32(insn, 12, 4);
8566     int rm = extract32(insn, 16, 5);
8567     int rn = extract32(insn, 5, 5);
8568     int rd = extract32(insn, 0, 5);
8569
8570     if (is_u) {
8571         unallocated_encoding(s);
8572         return;
8573     }
8574
8575     switch (opcode) {
8576     case 0x9: /* SQDMLAL, SQDMLAL2 */
8577     case 0xb: /* SQDMLSL, SQDMLSL2 */
8578     case 0xd: /* SQDMULL, SQDMULL2 */
8579         if (size == 0 || size == 3) {
8580             unallocated_encoding(s);
8581             return;
8582         }
8583         break;
8584     default:
8585         unallocated_encoding(s);
8586         return;
8587     }
8588
8589     if (!fp_access_check(s)) {
8590         return;
8591     }
8592
8593     if (size == 2) {
8594         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8595         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8596         TCGv_i64 tcg_res = tcg_temp_new_i64();
8597
8598         read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
8599         read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
8600
8601         tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
8602         gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
8603
8604         switch (opcode) {
8605         case 0xd: /* SQDMULL, SQDMULL2 */
8606             break;
8607         case 0xb: /* SQDMLSL, SQDMLSL2 */
8608             tcg_gen_neg_i64(tcg_res, tcg_res);
8609             /* fall through */
8610         case 0x9: /* SQDMLAL, SQDMLAL2 */
8611             read_vec_element(s, tcg_op1, rd, 0, MO_64);
8612             gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
8613                                               tcg_res, tcg_op1);
8614             break;
8615         default:
8616             g_assert_not_reached();
8617         }
8618
8619         write_fp_dreg(s, rd, tcg_res);
8620     } else {
8621         TCGv_i32 tcg_op1 = read_fp_hreg(s, rn);
8622         TCGv_i32 tcg_op2 = read_fp_hreg(s, rm);
8623         TCGv_i64 tcg_res = tcg_temp_new_i64();
8624
8625         gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
8626         gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
8627
8628         switch (opcode) {
8629         case 0xd: /* SQDMULL, SQDMULL2 */
8630             break;
8631         case 0xb: /* SQDMLSL, SQDMLSL2 */
8632             gen_helper_neon_negl_u32(tcg_res, tcg_res);
8633             /* fall through */
8634         case 0x9: /* SQDMLAL, SQDMLAL2 */
8635         {
8636             TCGv_i64 tcg_op3 = tcg_temp_new_i64();
8637             read_vec_element(s, tcg_op3, rd, 0, MO_32);
8638             gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
8639                                               tcg_res, tcg_op3);
8640             break;
8641         }
8642         default:
8643             g_assert_not_reached();
8644         }
8645
8646         tcg_gen_ext32u_i64(tcg_res, tcg_res);
8647         write_fp_dreg(s, rd, tcg_res);
8648     }
8649 }
8650
8651 static void handle_3same_64(DisasContext *s, int opcode, bool u,
8652                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
8653 {
8654     /* Handle 64x64->64 opcodes which are shared between the scalar
8655      * and vector 3-same groups. We cover every opcode where size == 3
8656      * is valid in either the three-reg-same (integer, not pairwise)
8657      * or scalar-three-reg-same groups.
8658      */
8659     TCGCond cond;
8660
8661     switch (opcode) {
8662     case 0x1: /* SQADD */
8663         if (u) {
8664             gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8665         } else {
8666             gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8667         }
8668         break;
8669     case 0x5: /* SQSUB */
8670         if (u) {
8671             gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8672         } else {
8673             gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8674         }
8675         break;
8676     case 0x6: /* CMGT, CMHI */
8677         cond = u ? TCG_COND_GTU : TCG_COND_GT;
8678     do_cmop:
8679         /* 64 bit integer comparison, result = test ? -1 : 0. */
8680         tcg_gen_negsetcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
8681         break;
8682     case 0x7: /* CMGE, CMHS */
8683         cond = u ? TCG_COND_GEU : TCG_COND_GE;
8684         goto do_cmop;
8685     case 0x11: /* CMTST, CMEQ */
8686         if (u) {
8687             cond = TCG_COND_EQ;
8688             goto do_cmop;
8689         }
8690         gen_cmtst_i64(tcg_rd, tcg_rn, tcg_rm);
8691         break;
8692     case 0x8: /* SSHL, USHL */
8693         if (u) {
8694             gen_ushl_i64(tcg_rd, tcg_rn, tcg_rm);
8695         } else {
8696             gen_sshl_i64(tcg_rd, tcg_rn, tcg_rm);
8697         }
8698         break;
8699     case 0x9: /* SQSHL, UQSHL */
8700         if (u) {
8701             gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8702         } else {
8703             gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8704         }
8705         break;
8706     case 0xa: /* SRSHL, URSHL */
8707         if (u) {
8708             gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
8709         } else {
8710             gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
8711         }
8712         break;
8713     case 0xb: /* SQRSHL, UQRSHL */
8714         if (u) {
8715             gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8716         } else {
8717             gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8718         }
8719         break;
8720     case 0x10: /* ADD, SUB */
8721         if (u) {
8722             tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
8723         } else {
8724             tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
8725         }
8726         break;
8727     default:
8728         g_assert_not_reached();
8729     }
8730 }
8731
8732 /* Handle the 3-same-operands float operations; shared by the scalar
8733  * and vector encodings. The caller must filter out any encodings
8734  * not allocated for the encoding it is dealing with.
8735  */
8736 static void handle_3same_float(DisasContext *s, int size, int elements,
8737                                int fpopcode, int rd, int rn, int rm)
8738 {
8739     int pass;
8740     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
8741
8742     for (pass = 0; pass < elements; pass++) {
8743         if (size) {
8744             /* Double */
8745             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8746             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8747             TCGv_i64 tcg_res = tcg_temp_new_i64();
8748
8749             read_vec_element(s, tcg_op1, rn, pass, MO_64);
8750             read_vec_element(s, tcg_op2, rm, pass, MO_64);
8751
8752             switch (fpopcode) {
8753             case 0x39: /* FMLS */
8754                 /* As usual for ARM, separate negation for fused multiply-add */
8755                 gen_helper_vfp_negd(tcg_op1, tcg_op1);
8756                 /* fall through */
8757             case 0x19: /* FMLA */
8758                 read_vec_element(s, tcg_res, rd, pass, MO_64);
8759                 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
8760                                        tcg_res, fpst);
8761                 break;
8762             case 0x18: /* FMAXNM */
8763                 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
8764                 break;
8765             case 0x1a: /* FADD */
8766                 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
8767                 break;
8768             case 0x1b: /* FMULX */
8769                 gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
8770                 break;
8771             case 0x1c: /* FCMEQ */
8772                 gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8773                 break;
8774             case 0x1e: /* FMAX */
8775                 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
8776                 break;
8777             case 0x1f: /* FRECPS */
8778                 gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8779                 break;
8780             case 0x38: /* FMINNM */
8781                 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
8782                 break;
8783             case 0x3a: /* FSUB */
8784                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
8785                 break;
8786             case 0x3e: /* FMIN */
8787                 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
8788                 break;
8789             case 0x3f: /* FRSQRTS */
8790                 gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8791                 break;
8792             case 0x5b: /* FMUL */
8793                 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
8794                 break;
8795             case 0x5c: /* FCMGE */
8796                 gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8797                 break;
8798             case 0x5d: /* FACGE */
8799                 gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8800                 break;
8801             case 0x5f: /* FDIV */
8802                 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
8803                 break;
8804             case 0x7a: /* FABD */
8805                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
8806                 gen_helper_vfp_absd(tcg_res, tcg_res);
8807                 break;
8808             case 0x7c: /* FCMGT */
8809                 gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8810                 break;
8811             case 0x7d: /* FACGT */
8812                 gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8813                 break;
8814             default:
8815                 g_assert_not_reached();
8816             }
8817
8818             write_vec_element(s, tcg_res, rd, pass, MO_64);
8819         } else {
8820             /* Single */
8821             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
8822             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8823             TCGv_i32 tcg_res = tcg_temp_new_i32();
8824
8825             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
8826             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
8827
8828             switch (fpopcode) {
8829             case 0x39: /* FMLS */
8830                 /* As usual for ARM, separate negation for fused multiply-add */
8831                 gen_helper_vfp_negs(tcg_op1, tcg_op1);
8832                 /* fall through */
8833             case 0x19: /* FMLA */
8834                 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
8835                 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
8836                                        tcg_res, fpst);
8837                 break;
8838             case 0x1a: /* FADD */
8839                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
8840                 break;
8841             case 0x1b: /* FMULX */
8842                 gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
8843                 break;
8844             case 0x1c: /* FCMEQ */
8845                 gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8846                 break;
8847             case 0x1e: /* FMAX */
8848                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
8849                 break;
8850             case 0x1f: /* FRECPS */
8851                 gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8852                 break;
8853             case 0x18: /* FMAXNM */
8854                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
8855                 break;
8856             case 0x38: /* FMINNM */
8857                 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
8858                 break;
8859             case 0x3a: /* FSUB */
8860                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
8861                 break;
8862             case 0x3e: /* FMIN */
8863                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
8864                 break;
8865             case 0x3f: /* FRSQRTS */
8866                 gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8867                 break;
8868             case 0x5b: /* FMUL */
8869                 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
8870                 break;
8871             case 0x5c: /* FCMGE */
8872                 gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8873                 break;
8874             case 0x5d: /* FACGE */
8875                 gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8876                 break;
8877             case 0x5f: /* FDIV */
8878                 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
8879                 break;
8880             case 0x7a: /* FABD */
8881                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
8882                 gen_helper_vfp_abss(tcg_res, tcg_res);
8883                 break;
8884             case 0x7c: /* FCMGT */
8885                 gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8886                 break;
8887             case 0x7d: /* FACGT */
8888                 gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8889                 break;
8890             default:
8891                 g_assert_not_reached();
8892             }
8893
8894             if (elements == 1) {
8895                 /* scalar single so clear high part */
8896                 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
8897
8898                 tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
8899                 write_vec_element(s, tcg_tmp, rd, pass, MO_64);
8900             } else {
8901                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
8902             }
8903         }
8904     }
8905
8906     clear_vec_high(s, elements * (size ? 8 : 4) > 8, rd);
8907 }
8908
8909 /* AdvSIMD scalar three same
8910  *  31 30  29 28       24 23  22  21 20  16 15    11  10 9    5 4    0
8911  * +-----+---+-----------+------+---+------+--------+---+------+------+
8912  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
8913  * +-----+---+-----------+------+---+------+--------+---+------+------+
8914  */
8915 static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
8916 {
8917     int rd = extract32(insn, 0, 5);
8918     int rn = extract32(insn, 5, 5);
8919     int opcode = extract32(insn, 11, 5);
8920     int rm = extract32(insn, 16, 5);
8921     int size = extract32(insn, 22, 2);
8922     bool u = extract32(insn, 29, 1);
8923     TCGv_i64 tcg_rd;
8924
8925     if (opcode >= 0x18) {
8926         /* Floating point: U, size[1] and opcode indicate operation */
8927         int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
8928         switch (fpopcode) {
8929         case 0x1b: /* FMULX */
8930         case 0x1f: /* FRECPS */
8931         case 0x3f: /* FRSQRTS */
8932         case 0x5d: /* FACGE */
8933         case 0x7d: /* FACGT */
8934         case 0x1c: /* FCMEQ */
8935         case 0x5c: /* FCMGE */
8936         case 0x7c: /* FCMGT */
8937         case 0x7a: /* FABD */
8938             break;
8939         default:
8940             unallocated_encoding(s);
8941             return;
8942         }
8943
8944         if (!fp_access_check(s)) {
8945             return;
8946         }
8947
8948         handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
8949         return;
8950     }
8951
8952     switch (opcode) {
8953     case 0x1: /* SQADD, UQADD */
8954     case 0x5: /* SQSUB, UQSUB */
8955     case 0x9: /* SQSHL, UQSHL */
8956     case 0xb: /* SQRSHL, UQRSHL */
8957         break;
8958     case 0x8: /* SSHL, USHL */
8959     case 0xa: /* SRSHL, URSHL */
8960     case 0x6: /* CMGT, CMHI */
8961     case 0x7: /* CMGE, CMHS */
8962     case 0x11: /* CMTST, CMEQ */
8963     case 0x10: /* ADD, SUB (vector) */
8964         if (size != 3) {
8965             unallocated_encoding(s);
8966             return;
8967         }
8968         break;
8969     case 0x16: /* SQDMULH, SQRDMULH (vector) */
8970         if (size != 1 && size != 2) {
8971             unallocated_encoding(s);
8972             return;
8973         }
8974         break;
8975     default:
8976         unallocated_encoding(s);
8977         return;
8978     }
8979
8980     if (!fp_access_check(s)) {
8981         return;
8982     }
8983
8984     tcg_rd = tcg_temp_new_i64();
8985
8986     if (size == 3) {
8987         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
8988         TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
8989
8990         handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
8991     } else {
8992         /* Do a single operation on the lowest element in the vector.
8993          * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
8994          * no side effects for all these operations.
8995          * OPTME: special-purpose helpers would avoid doing some
8996          * unnecessary work in the helper for the 8 and 16 bit cases.
8997          */
8998         NeonGenTwoOpEnvFn *genenvfn;
8999         TCGv_i32 tcg_rn = tcg_temp_new_i32();
9000         TCGv_i32 tcg_rm = tcg_temp_new_i32();
9001         TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
9002
9003         read_vec_element_i32(s, tcg_rn, rn, 0, size);
9004         read_vec_element_i32(s, tcg_rm, rm, 0, size);
9005
9006         switch (opcode) {
9007         case 0x1: /* SQADD, UQADD */
9008         {
9009             static NeonGenTwoOpEnvFn * const fns[3][2] = {
9010                 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
9011                 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
9012                 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
9013             };
9014             genenvfn = fns[size][u];
9015             break;
9016         }
9017         case 0x5: /* SQSUB, UQSUB */
9018         {
9019             static NeonGenTwoOpEnvFn * const fns[3][2] = {
9020                 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
9021                 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
9022                 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
9023             };
9024             genenvfn = fns[size][u];
9025             break;
9026         }
9027         case 0x9: /* SQSHL, UQSHL */
9028         {
9029             static NeonGenTwoOpEnvFn * const fns[3][2] = {
9030                 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
9031                 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
9032                 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
9033             };
9034             genenvfn = fns[size][u];
9035             break;
9036         }
9037         case 0xb: /* SQRSHL, UQRSHL */
9038         {
9039             static NeonGenTwoOpEnvFn * const fns[3][2] = {
9040                 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
9041                 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
9042                 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
9043             };
9044             genenvfn = fns[size][u];
9045             break;
9046         }
9047         case 0x16: /* SQDMULH, SQRDMULH */
9048         {
9049             static NeonGenTwoOpEnvFn * const fns[2][2] = {
9050                 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
9051                 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
9052             };
9053             assert(size == 1 || size == 2);
9054             genenvfn = fns[size - 1][u];
9055             break;
9056         }
9057         default:
9058             g_assert_not_reached();
9059         }
9060
9061         genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
9062         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
9063     }
9064
9065     write_fp_dreg(s, rd, tcg_rd);
9066 }
9067
9068 /* AdvSIMD scalar three same FP16
9069  *  31 30  29 28       24 23  22 21 20  16 15 14 13    11 10  9  5 4  0
9070  * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
9071  * | 0 1 | U | 1 1 1 1 0 | a | 1 0 |  Rm  | 0 0 | opcode | 1 | Rn | Rd |
9072  * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
9073  * v: 0101 1110 0100 0000 0000 0100 0000 0000 => 5e400400
9074  * m: 1101 1111 0110 0000 1100 0100 0000 0000 => df60c400
9075  */
9076 static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
9077                                                   uint32_t insn)
9078 {
9079     int rd = extract32(insn, 0, 5);
9080     int rn = extract32(insn, 5, 5);
9081     int opcode = extract32(insn, 11, 3);
9082     int rm = extract32(insn, 16, 5);
9083     bool u = extract32(insn, 29, 1);
9084     bool a = extract32(insn, 23, 1);
9085     int fpopcode = opcode | (a << 3) |  (u << 4);
9086     TCGv_ptr fpst;
9087     TCGv_i32 tcg_op1;
9088     TCGv_i32 tcg_op2;
9089     TCGv_i32 tcg_res;
9090
9091     switch (fpopcode) {
9092     case 0x03: /* FMULX */
9093     case 0x04: /* FCMEQ (reg) */
9094     case 0x07: /* FRECPS */
9095     case 0x0f: /* FRSQRTS */
9096     case 0x14: /* FCMGE (reg) */
9097     case 0x15: /* FACGE */
9098     case 0x1a: /* FABD */
9099     case 0x1c: /* FCMGT (reg) */
9100     case 0x1d: /* FACGT */
9101         break;
9102     default:
9103         unallocated_encoding(s);
9104         return;
9105     }
9106
9107     if (!dc_isar_feature(aa64_fp16, s)) {
9108         unallocated_encoding(s);
9109     }
9110
9111     if (!fp_access_check(s)) {
9112         return;
9113     }
9114
9115     fpst = fpstatus_ptr(FPST_FPCR_F16);
9116
9117     tcg_op1 = read_fp_hreg(s, rn);
9118     tcg_op2 = read_fp_hreg(s, rm);
9119     tcg_res = tcg_temp_new_i32();
9120
9121     switch (fpopcode) {
9122     case 0x03: /* FMULX */
9123         gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
9124         break;
9125     case 0x04: /* FCMEQ (reg) */
9126         gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9127         break;
9128     case 0x07: /* FRECPS */
9129         gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9130         break;
9131     case 0x0f: /* FRSQRTS */
9132         gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9133         break;
9134     case 0x14: /* FCMGE (reg) */
9135         gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9136         break;
9137     case 0x15: /* FACGE */
9138         gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9139         break;
9140     case 0x1a: /* FABD */
9141         gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
9142         tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
9143         break;
9144     case 0x1c: /* FCMGT (reg) */
9145         gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9146         break;
9147     case 0x1d: /* FACGT */
9148         gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9149         break;
9150     default:
9151         g_assert_not_reached();
9152     }
9153
9154     write_fp_sreg(s, rd, tcg_res);
9155 }
9156
9157 /* AdvSIMD scalar three same extra
9158  *  31 30  29 28       24 23  22  21 20  16  15 14    11  10 9  5 4  0
9159  * +-----+---+-----------+------+---+------+---+--------+---+----+----+
9160  * | 0 1 | U | 1 1 1 1 0 | size | 0 |  Rm  | 1 | opcode | 1 | Rn | Rd |
9161  * +-----+---+-----------+------+---+------+---+--------+---+----+----+
9162  */
9163 static void disas_simd_scalar_three_reg_same_extra(DisasContext *s,
9164                                                    uint32_t insn)
9165 {
9166     int rd = extract32(insn, 0, 5);
9167     int rn = extract32(insn, 5, 5);
9168     int opcode = extract32(insn, 11, 4);
9169     int rm = extract32(insn, 16, 5);
9170     int size = extract32(insn, 22, 2);
9171     bool u = extract32(insn, 29, 1);
9172     TCGv_i32 ele1, ele2, ele3;
9173     TCGv_i64 res;
9174     bool feature;
9175
9176     switch (u * 16 + opcode) {
9177     case 0x10: /* SQRDMLAH (vector) */
9178     case 0x11: /* SQRDMLSH (vector) */
9179         if (size != 1 && size != 2) {
9180             unallocated_encoding(s);
9181             return;
9182         }
9183         feature = dc_isar_feature(aa64_rdm, s);
9184         break;
9185     default:
9186         unallocated_encoding(s);
9187         return;
9188     }
9189     if (!feature) {
9190         unallocated_encoding(s);
9191         return;
9192     }
9193     if (!fp_access_check(s)) {
9194         return;
9195     }
9196
9197     /* Do a single operation on the lowest element in the vector.
9198      * We use the standard Neon helpers and rely on 0 OP 0 == 0
9199      * with no side effects for all these operations.
9200      * OPTME: special-purpose helpers would avoid doing some
9201      * unnecessary work in the helper for the 16 bit cases.
9202      */
9203     ele1 = tcg_temp_new_i32();
9204     ele2 = tcg_temp_new_i32();
9205     ele3 = tcg_temp_new_i32();
9206
9207     read_vec_element_i32(s, ele1, rn, 0, size);
9208     read_vec_element_i32(s, ele2, rm, 0, size);
9209     read_vec_element_i32(s, ele3, rd, 0, size);
9210
9211     switch (opcode) {
9212     case 0x0: /* SQRDMLAH */
9213         if (size == 1) {
9214             gen_helper_neon_qrdmlah_s16(ele3, cpu_env, ele1, ele2, ele3);
9215         } else {
9216             gen_helper_neon_qrdmlah_s32(ele3, cpu_env, ele1, ele2, ele3);
9217         }
9218         break;
9219     case 0x1: /* SQRDMLSH */
9220         if (size == 1) {
9221             gen_helper_neon_qrdmlsh_s16(ele3, cpu_env, ele1, ele2, ele3);
9222         } else {
9223             gen_helper_neon_qrdmlsh_s32(ele3, cpu_env, ele1, ele2, ele3);
9224         }
9225         break;
9226     default:
9227         g_assert_not_reached();
9228     }
9229
9230     res = tcg_temp_new_i64();
9231     tcg_gen_extu_i32_i64(res, ele3);
9232     write_fp_dreg(s, rd, res);
9233 }
9234
9235 static void handle_2misc_64(DisasContext *s, int opcode, bool u,
9236                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
9237                             TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
9238 {
9239     /* Handle 64->64 opcodes which are shared between the scalar and
9240      * vector 2-reg-misc groups. We cover every integer opcode where size == 3
9241      * is valid in either group and also the double-precision fp ops.
9242      * The caller only need provide tcg_rmode and tcg_fpstatus if the op
9243      * requires them.
9244      */
9245     TCGCond cond;
9246
9247     switch (opcode) {
9248     case 0x4: /* CLS, CLZ */
9249         if (u) {
9250             tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
9251         } else {
9252             tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
9253         }
9254         break;
9255     case 0x5: /* NOT */
9256         /* This opcode is shared with CNT and RBIT but we have earlier
9257          * enforced that size == 3 if and only if this is the NOT insn.
9258          */
9259         tcg_gen_not_i64(tcg_rd, tcg_rn);
9260         break;
9261     case 0x7: /* SQABS, SQNEG */
9262         if (u) {
9263             gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn);
9264         } else {
9265             gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn);
9266         }
9267         break;
9268     case 0xa: /* CMLT */
9269         cond = TCG_COND_LT;
9270     do_cmop:
9271         /* 64 bit integer comparison against zero, result is test ? -1 : 0. */
9272         tcg_gen_negsetcond_i64(cond, tcg_rd, tcg_rn, tcg_constant_i64(0));
9273         break;
9274     case 0x8: /* CMGT, CMGE */
9275         cond = u ? TCG_COND_GE : TCG_COND_GT;
9276         goto do_cmop;
9277     case 0x9: /* CMEQ, CMLE */
9278         cond = u ? TCG_COND_LE : TCG_COND_EQ;
9279         goto do_cmop;
9280     case 0xb: /* ABS, NEG */
9281         if (u) {
9282             tcg_gen_neg_i64(tcg_rd, tcg_rn);
9283         } else {
9284             tcg_gen_abs_i64(tcg_rd, tcg_rn);
9285         }
9286         break;
9287     case 0x2f: /* FABS */
9288         gen_helper_vfp_absd(tcg_rd, tcg_rn);
9289         break;
9290     case 0x6f: /* FNEG */
9291         gen_helper_vfp_negd(tcg_rd, tcg_rn);
9292         break;
9293     case 0x7f: /* FSQRT */
9294         gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env);
9295         break;
9296     case 0x1a: /* FCVTNS */
9297     case 0x1b: /* FCVTMS */
9298     case 0x1c: /* FCVTAS */
9299     case 0x3a: /* FCVTPS */
9300     case 0x3b: /* FCVTZS */
9301         gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus);
9302         break;
9303     case 0x5a: /* FCVTNU */
9304     case 0x5b: /* FCVTMU */
9305     case 0x5c: /* FCVTAU */
9306     case 0x7a: /* FCVTPU */
9307     case 0x7b: /* FCVTZU */
9308         gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus);
9309         break;
9310     case 0x18: /* FRINTN */
9311     case 0x19: /* FRINTM */
9312     case 0x38: /* FRINTP */
9313     case 0x39: /* FRINTZ */
9314     case 0x58: /* FRINTA */
9315     case 0x79: /* FRINTI */
9316         gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
9317         break;
9318     case 0x59: /* FRINTX */
9319         gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
9320         break;
9321     case 0x1e: /* FRINT32Z */
9322     case 0x5e: /* FRINT32X */
9323         gen_helper_frint32_d(tcg_rd, tcg_rn, tcg_fpstatus);
9324         break;
9325     case 0x1f: /* FRINT64Z */
9326     case 0x5f: /* FRINT64X */
9327         gen_helper_frint64_d(tcg_rd, tcg_rn, tcg_fpstatus);
9328         break;
9329     default:
9330         g_assert_not_reached();
9331     }
9332 }
9333
9334 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
9335                                    bool is_scalar, bool is_u, bool is_q,
9336                                    int size, int rn, int rd)
9337 {
9338     bool is_double = (size == MO_64);
9339     TCGv_ptr fpst;
9340
9341     if (!fp_access_check(s)) {
9342         return;
9343     }
9344
9345     fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
9346
9347     if (is_double) {
9348         TCGv_i64 tcg_op = tcg_temp_new_i64();
9349         TCGv_i64 tcg_zero = tcg_constant_i64(0);
9350         TCGv_i64 tcg_res = tcg_temp_new_i64();
9351         NeonGenTwoDoubleOpFn *genfn;
9352         bool swap = false;
9353         int pass;
9354
9355         switch (opcode) {
9356         case 0x2e: /* FCMLT (zero) */
9357             swap = true;
9358             /* fallthrough */
9359         case 0x2c: /* FCMGT (zero) */
9360             genfn = gen_helper_neon_cgt_f64;
9361             break;
9362         case 0x2d: /* FCMEQ (zero) */
9363             genfn = gen_helper_neon_ceq_f64;
9364             break;
9365         case 0x6d: /* FCMLE (zero) */
9366             swap = true;
9367             /* fall through */
9368         case 0x6c: /* FCMGE (zero) */
9369             genfn = gen_helper_neon_cge_f64;
9370             break;
9371         default:
9372             g_assert_not_reached();
9373         }
9374
9375         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9376             read_vec_element(s, tcg_op, rn, pass, MO_64);
9377             if (swap) {
9378                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
9379             } else {
9380                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
9381             }
9382             write_vec_element(s, tcg_res, rd, pass, MO_64);
9383         }
9384
9385         clear_vec_high(s, !is_scalar, rd);
9386     } else {
9387         TCGv_i32 tcg_op = tcg_temp_new_i32();
9388         TCGv_i32 tcg_zero = tcg_constant_i32(0);
9389         TCGv_i32 tcg_res = tcg_temp_new_i32();
9390         NeonGenTwoSingleOpFn *genfn;
9391         bool swap = false;
9392         int pass, maxpasses;
9393
9394         if (size == MO_16) {
9395             switch (opcode) {
9396             case 0x2e: /* FCMLT (zero) */
9397                 swap = true;
9398                 /* fall through */
9399             case 0x2c: /* FCMGT (zero) */
9400                 genfn = gen_helper_advsimd_cgt_f16;
9401                 break;
9402             case 0x2d: /* FCMEQ (zero) */
9403                 genfn = gen_helper_advsimd_ceq_f16;
9404                 break;
9405             case 0x6d: /* FCMLE (zero) */
9406                 swap = true;
9407                 /* fall through */
9408             case 0x6c: /* FCMGE (zero) */
9409                 genfn = gen_helper_advsimd_cge_f16;
9410                 break;
9411             default:
9412                 g_assert_not_reached();
9413             }
9414         } else {
9415             switch (opcode) {
9416             case 0x2e: /* FCMLT (zero) */
9417                 swap = true;
9418                 /* fall through */
9419             case 0x2c: /* FCMGT (zero) */
9420                 genfn = gen_helper_neon_cgt_f32;
9421                 break;
9422             case 0x2d: /* FCMEQ (zero) */
9423                 genfn = gen_helper_neon_ceq_f32;
9424                 break;
9425             case 0x6d: /* FCMLE (zero) */
9426                 swap = true;
9427                 /* fall through */
9428             case 0x6c: /* FCMGE (zero) */
9429                 genfn = gen_helper_neon_cge_f32;
9430                 break;
9431             default:
9432                 g_assert_not_reached();
9433             }
9434         }
9435
9436         if (is_scalar) {
9437             maxpasses = 1;
9438         } else {
9439             int vector_size = 8 << is_q;
9440             maxpasses = vector_size >> size;
9441         }
9442
9443         for (pass = 0; pass < maxpasses; pass++) {
9444             read_vec_element_i32(s, tcg_op, rn, pass, size);
9445             if (swap) {
9446                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
9447             } else {
9448                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
9449             }
9450             if (is_scalar) {
9451                 write_fp_sreg(s, rd, tcg_res);
9452             } else {
9453                 write_vec_element_i32(s, tcg_res, rd, pass, size);
9454             }
9455         }
9456
9457         if (!is_scalar) {
9458             clear_vec_high(s, is_q, rd);
9459         }
9460     }
9461 }
9462
9463 static void handle_2misc_reciprocal(DisasContext *s, int opcode,
9464                                     bool is_scalar, bool is_u, bool is_q,
9465                                     int size, int rn, int rd)
9466 {
9467     bool is_double = (size == 3);
9468     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
9469
9470     if (is_double) {
9471         TCGv_i64 tcg_op = tcg_temp_new_i64();
9472         TCGv_i64 tcg_res = tcg_temp_new_i64();
9473         int pass;
9474
9475         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9476             read_vec_element(s, tcg_op, rn, pass, MO_64);
9477             switch (opcode) {
9478             case 0x3d: /* FRECPE */
9479                 gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
9480                 break;
9481             case 0x3f: /* FRECPX */
9482                 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
9483                 break;
9484             case 0x7d: /* FRSQRTE */
9485                 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
9486                 break;
9487             default:
9488                 g_assert_not_reached();
9489             }
9490             write_vec_element(s, tcg_res, rd, pass, MO_64);
9491         }
9492         clear_vec_high(s, !is_scalar, rd);
9493     } else {
9494         TCGv_i32 tcg_op = tcg_temp_new_i32();
9495         TCGv_i32 tcg_res = tcg_temp_new_i32();
9496         int pass, maxpasses;
9497
9498         if (is_scalar) {
9499             maxpasses = 1;
9500         } else {
9501             maxpasses = is_q ? 4 : 2;
9502         }
9503
9504         for (pass = 0; pass < maxpasses; pass++) {
9505             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
9506
9507             switch (opcode) {
9508             case 0x3c: /* URECPE */
9509                 gen_helper_recpe_u32(tcg_res, tcg_op);
9510                 break;
9511             case 0x3d: /* FRECPE */
9512                 gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
9513                 break;
9514             case 0x3f: /* FRECPX */
9515                 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
9516                 break;
9517             case 0x7d: /* FRSQRTE */
9518                 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
9519                 break;
9520             default:
9521                 g_assert_not_reached();
9522             }
9523
9524             if (is_scalar) {
9525                 write_fp_sreg(s, rd, tcg_res);
9526             } else {
9527                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9528             }
9529         }
9530         if (!is_scalar) {
9531             clear_vec_high(s, is_q, rd);
9532         }
9533     }
9534 }
9535
9536 static void handle_2misc_narrow(DisasContext *s, bool scalar,
9537                                 int opcode, bool u, bool is_q,
9538                                 int size, int rn, int rd)
9539 {
9540     /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
9541      * in the source becomes a size element in the destination).
9542      */
9543     int pass;
9544     TCGv_i32 tcg_res[2];
9545     int destelt = is_q ? 2 : 0;
9546     int passes = scalar ? 1 : 2;
9547
9548     if (scalar) {
9549         tcg_res[1] = tcg_constant_i32(0);
9550     }
9551
9552     for (pass = 0; pass < passes; pass++) {
9553         TCGv_i64 tcg_op = tcg_temp_new_i64();
9554         NeonGenNarrowFn *genfn = NULL;
9555         NeonGenNarrowEnvFn *genenvfn = NULL;
9556
9557         if (scalar) {
9558             read_vec_element(s, tcg_op, rn, pass, size + 1);
9559         } else {
9560             read_vec_element(s, tcg_op, rn, pass, MO_64);
9561         }
9562         tcg_res[pass] = tcg_temp_new_i32();
9563
9564         switch (opcode) {
9565         case 0x12: /* XTN, SQXTUN */
9566         {
9567             static NeonGenNarrowFn * const xtnfns[3] = {
9568                 gen_helper_neon_narrow_u8,
9569                 gen_helper_neon_narrow_u16,
9570                 tcg_gen_extrl_i64_i32,
9571             };
9572             static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
9573                 gen_helper_neon_unarrow_sat8,
9574                 gen_helper_neon_unarrow_sat16,
9575                 gen_helper_neon_unarrow_sat32,
9576             };
9577             if (u) {
9578                 genenvfn = sqxtunfns[size];
9579             } else {
9580                 genfn = xtnfns[size];
9581             }
9582             break;
9583         }
9584         case 0x14: /* SQXTN, UQXTN */
9585         {
9586             static NeonGenNarrowEnvFn * const fns[3][2] = {
9587                 { gen_helper_neon_narrow_sat_s8,
9588                   gen_helper_neon_narrow_sat_u8 },
9589                 { gen_helper_neon_narrow_sat_s16,
9590                   gen_helper_neon_narrow_sat_u16 },
9591                 { gen_helper_neon_narrow_sat_s32,
9592                   gen_helper_neon_narrow_sat_u32 },
9593             };
9594             genenvfn = fns[size][u];
9595             break;
9596         }
9597         case 0x16: /* FCVTN, FCVTN2 */
9598             /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
9599             if (size == 2) {
9600                 gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env);
9601             } else {
9602                 TCGv_i32 tcg_lo = tcg_temp_new_i32();
9603                 TCGv_i32 tcg_hi = tcg_temp_new_i32();
9604                 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
9605                 TCGv_i32 ahp = get_ahp_flag();
9606
9607                 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
9608                 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp);
9609                 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp);
9610                 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
9611             }
9612             break;
9613         case 0x36: /* BFCVTN, BFCVTN2 */
9614             {
9615                 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
9616                 gen_helper_bfcvt_pair(tcg_res[pass], tcg_op, fpst);
9617             }
9618             break;
9619         case 0x56:  /* FCVTXN, FCVTXN2 */
9620             /* 64 bit to 32 bit float conversion
9621              * with von Neumann rounding (round to odd)
9622              */
9623             assert(size == 2);
9624             gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env);
9625             break;
9626         default:
9627             g_assert_not_reached();
9628         }
9629
9630         if (genfn) {
9631             genfn(tcg_res[pass], tcg_op);
9632         } else if (genenvfn) {
9633             genenvfn(tcg_res[pass], cpu_env, tcg_op);
9634         }
9635     }
9636
9637     for (pass = 0; pass < 2; pass++) {
9638         write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
9639     }
9640     clear_vec_high(s, is_q, rd);
9641 }
9642
9643 /* Remaining saturating accumulating ops */
9644 static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
9645                                 bool is_q, int size, int rn, int rd)
9646 {
9647     bool is_double = (size == 3);
9648
9649     if (is_double) {
9650         TCGv_i64 tcg_rn = tcg_temp_new_i64();
9651         TCGv_i64 tcg_rd = tcg_temp_new_i64();
9652         int pass;
9653
9654         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9655             read_vec_element(s, tcg_rn, rn, pass, MO_64);
9656             read_vec_element(s, tcg_rd, rd, pass, MO_64);
9657
9658             if (is_u) { /* USQADD */
9659                 gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9660             } else { /* SUQADD */
9661                 gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9662             }
9663             write_vec_element(s, tcg_rd, rd, pass, MO_64);
9664         }
9665         clear_vec_high(s, !is_scalar, rd);
9666     } else {
9667         TCGv_i32 tcg_rn = tcg_temp_new_i32();
9668         TCGv_i32 tcg_rd = tcg_temp_new_i32();
9669         int pass, maxpasses;
9670
9671         if (is_scalar) {
9672             maxpasses = 1;
9673         } else {
9674             maxpasses = is_q ? 4 : 2;
9675         }
9676
9677         for (pass = 0; pass < maxpasses; pass++) {
9678             if (is_scalar) {
9679                 read_vec_element_i32(s, tcg_rn, rn, pass, size);
9680                 read_vec_element_i32(s, tcg_rd, rd, pass, size);
9681             } else {
9682                 read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
9683                 read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
9684             }
9685
9686             if (is_u) { /* USQADD */
9687                 switch (size) {
9688                 case 0:
9689                     gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9690                     break;
9691                 case 1:
9692                     gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9693                     break;
9694                 case 2:
9695                     gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9696                     break;
9697                 default:
9698                     g_assert_not_reached();
9699                 }
9700             } else { /* SUQADD */
9701                 switch (size) {
9702                 case 0:
9703                     gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9704                     break;
9705                 case 1:
9706                     gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9707                     break;
9708                 case 2:
9709                     gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9710                     break;
9711                 default:
9712                     g_assert_not_reached();
9713                 }
9714             }
9715
9716             if (is_scalar) {
9717                 write_vec_element(s, tcg_constant_i64(0), rd, 0, MO_64);
9718             }
9719             write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
9720         }
9721         clear_vec_high(s, is_q, rd);
9722     }
9723 }
9724
9725 /* AdvSIMD scalar two reg misc
9726  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
9727  * +-----+---+-----------+------+-----------+--------+-----+------+------+
9728  * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
9729  * +-----+---+-----------+------+-----------+--------+-----+------+------+
9730  */
9731 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
9732 {
9733     int rd = extract32(insn, 0, 5);
9734     int rn = extract32(insn, 5, 5);
9735     int opcode = extract32(insn, 12, 5);
9736     int size = extract32(insn, 22, 2);
9737     bool u = extract32(insn, 29, 1);
9738     bool is_fcvt = false;
9739     int rmode;
9740     TCGv_i32 tcg_rmode;
9741     TCGv_ptr tcg_fpstatus;
9742
9743     switch (opcode) {
9744     case 0x3: /* USQADD / SUQADD*/
9745         if (!fp_access_check(s)) {
9746             return;
9747         }
9748         handle_2misc_satacc(s, true, u, false, size, rn, rd);
9749         return;
9750     case 0x7: /* SQABS / SQNEG */
9751         break;
9752     case 0xa: /* CMLT */
9753         if (u) {
9754             unallocated_encoding(s);
9755             return;
9756         }
9757         /* fall through */
9758     case 0x8: /* CMGT, CMGE */
9759     case 0x9: /* CMEQ, CMLE */
9760     case 0xb: /* ABS, NEG */
9761         if (size != 3) {
9762             unallocated_encoding(s);
9763             return;
9764         }
9765         break;
9766     case 0x12: /* SQXTUN */
9767         if (!u) {
9768             unallocated_encoding(s);
9769             return;
9770         }
9771         /* fall through */
9772     case 0x14: /* SQXTN, UQXTN */
9773         if (size == 3) {
9774             unallocated_encoding(s);
9775             return;
9776         }
9777         if (!fp_access_check(s)) {
9778             return;
9779         }
9780         handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
9781         return;
9782     case 0xc ... 0xf:
9783     case 0x16 ... 0x1d:
9784     case 0x1f:
9785         /* Floating point: U, size[1] and opcode indicate operation;
9786          * size[0] indicates single or double precision.
9787          */
9788         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
9789         size = extract32(size, 0, 1) ? 3 : 2;
9790         switch (opcode) {
9791         case 0x2c: /* FCMGT (zero) */
9792         case 0x2d: /* FCMEQ (zero) */
9793         case 0x2e: /* FCMLT (zero) */
9794         case 0x6c: /* FCMGE (zero) */
9795         case 0x6d: /* FCMLE (zero) */
9796             handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
9797             return;
9798         case 0x1d: /* SCVTF */
9799         case 0x5d: /* UCVTF */
9800         {
9801             bool is_signed = (opcode == 0x1d);
9802             if (!fp_access_check(s)) {
9803                 return;
9804             }
9805             handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
9806             return;
9807         }
9808         case 0x3d: /* FRECPE */
9809         case 0x3f: /* FRECPX */
9810         case 0x7d: /* FRSQRTE */
9811             if (!fp_access_check(s)) {
9812                 return;
9813             }
9814             handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
9815             return;
9816         case 0x1a: /* FCVTNS */
9817         case 0x1b: /* FCVTMS */
9818         case 0x3a: /* FCVTPS */
9819         case 0x3b: /* FCVTZS */
9820         case 0x5a: /* FCVTNU */
9821         case 0x5b: /* FCVTMU */
9822         case 0x7a: /* FCVTPU */
9823         case 0x7b: /* FCVTZU */
9824             is_fcvt = true;
9825             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
9826             break;
9827         case 0x1c: /* FCVTAS */
9828         case 0x5c: /* FCVTAU */
9829             /* TIEAWAY doesn't fit in the usual rounding mode encoding */
9830             is_fcvt = true;
9831             rmode = FPROUNDING_TIEAWAY;
9832             break;
9833         case 0x56: /* FCVTXN, FCVTXN2 */
9834             if (size == 2) {
9835                 unallocated_encoding(s);
9836                 return;
9837             }
9838             if (!fp_access_check(s)) {
9839                 return;
9840             }
9841             handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
9842             return;
9843         default:
9844             unallocated_encoding(s);
9845             return;
9846         }
9847         break;
9848     default:
9849         unallocated_encoding(s);
9850         return;
9851     }
9852
9853     if (!fp_access_check(s)) {
9854         return;
9855     }
9856
9857     if (is_fcvt) {
9858         tcg_fpstatus = fpstatus_ptr(FPST_FPCR);
9859         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
9860     } else {
9861         tcg_fpstatus = NULL;
9862         tcg_rmode = NULL;
9863     }
9864
9865     if (size == 3) {
9866         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
9867         TCGv_i64 tcg_rd = tcg_temp_new_i64();
9868
9869         handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
9870         write_fp_dreg(s, rd, tcg_rd);
9871     } else {
9872         TCGv_i32 tcg_rn = tcg_temp_new_i32();
9873         TCGv_i32 tcg_rd = tcg_temp_new_i32();
9874
9875         read_vec_element_i32(s, tcg_rn, rn, 0, size);
9876
9877         switch (opcode) {
9878         case 0x7: /* SQABS, SQNEG */
9879         {
9880             NeonGenOneOpEnvFn *genfn;
9881             static NeonGenOneOpEnvFn * const fns[3][2] = {
9882                 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
9883                 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
9884                 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
9885             };
9886             genfn = fns[size][u];
9887             genfn(tcg_rd, cpu_env, tcg_rn);
9888             break;
9889         }
9890         case 0x1a: /* FCVTNS */
9891         case 0x1b: /* FCVTMS */
9892         case 0x1c: /* FCVTAS */
9893         case 0x3a: /* FCVTPS */
9894         case 0x3b: /* FCVTZS */
9895             gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_constant_i32(0),
9896                                  tcg_fpstatus);
9897             break;
9898         case 0x5a: /* FCVTNU */
9899         case 0x5b: /* FCVTMU */
9900         case 0x5c: /* FCVTAU */
9901         case 0x7a: /* FCVTPU */
9902         case 0x7b: /* FCVTZU */
9903             gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_constant_i32(0),
9904                                  tcg_fpstatus);
9905             break;
9906         default:
9907             g_assert_not_reached();
9908         }
9909
9910         write_fp_sreg(s, rd, tcg_rd);
9911     }
9912
9913     if (is_fcvt) {
9914         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
9915     }
9916 }
9917
9918 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
9919 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
9920                                  int immh, int immb, int opcode, int rn, int rd)
9921 {
9922     int size = 32 - clz32(immh) - 1;
9923     int immhb = immh << 3 | immb;
9924     int shift = 2 * (8 << size) - immhb;
9925     GVecGen2iFn *gvec_fn;
9926
9927     if (extract32(immh, 3, 1) && !is_q) {
9928         unallocated_encoding(s);
9929         return;
9930     }
9931     tcg_debug_assert(size <= 3);
9932
9933     if (!fp_access_check(s)) {
9934         return;
9935     }
9936
9937     switch (opcode) {
9938     case 0x02: /* SSRA / USRA (accumulate) */
9939         gvec_fn = is_u ? gen_gvec_usra : gen_gvec_ssra;
9940         break;
9941
9942     case 0x08: /* SRI */
9943         gvec_fn = gen_gvec_sri;
9944         break;
9945
9946     case 0x00: /* SSHR / USHR */
9947         if (is_u) {
9948             if (shift == 8 << size) {
9949                 /* Shift count the same size as element size produces zero.  */
9950                 tcg_gen_gvec_dup_imm(size, vec_full_reg_offset(s, rd),
9951                                      is_q ? 16 : 8, vec_full_reg_size(s), 0);
9952                 return;
9953             }
9954             gvec_fn = tcg_gen_gvec_shri;
9955         } else {
9956             /* Shift count the same size as element size produces all sign.  */
9957             if (shift == 8 << size) {
9958                 shift -= 1;
9959             }
9960             gvec_fn = tcg_gen_gvec_sari;
9961         }
9962         break;
9963
9964     case 0x04: /* SRSHR / URSHR (rounding) */
9965         gvec_fn = is_u ? gen_gvec_urshr : gen_gvec_srshr;
9966         break;
9967
9968     case 0x06: /* SRSRA / URSRA (accum + rounding) */
9969         gvec_fn = is_u ? gen_gvec_ursra : gen_gvec_srsra;
9970         break;
9971
9972     default:
9973         g_assert_not_reached();
9974     }
9975
9976     gen_gvec_fn2i(s, is_q, rd, rn, shift, gvec_fn, size);
9977 }
9978
9979 /* SHL/SLI - Vector shift left */
9980 static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
9981                                  int immh, int immb, int opcode, int rn, int rd)
9982 {
9983     int size = 32 - clz32(immh) - 1;
9984     int immhb = immh << 3 | immb;
9985     int shift = immhb - (8 << size);
9986
9987     /* Range of size is limited by decode: immh is a non-zero 4 bit field */
9988     assert(size >= 0 && size <= 3);
9989
9990     if (extract32(immh, 3, 1) && !is_q) {
9991         unallocated_encoding(s);
9992         return;
9993     }
9994
9995     if (!fp_access_check(s)) {
9996         return;
9997     }
9998
9999     if (insert) {
10000         gen_gvec_fn2i(s, is_q, rd, rn, shift, gen_gvec_sli, size);
10001     } else {
10002         gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size);
10003     }
10004 }
10005
10006 /* USHLL/SHLL - Vector shift left with widening */
10007 static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
10008                                  int immh, int immb, int opcode, int rn, int rd)
10009 {
10010     int size = 32 - clz32(immh) - 1;
10011     int immhb = immh << 3 | immb;
10012     int shift = immhb - (8 << size);
10013     int dsize = 64;
10014     int esize = 8 << size;
10015     int elements = dsize/esize;
10016     TCGv_i64 tcg_rn = tcg_temp_new_i64();
10017     TCGv_i64 tcg_rd = tcg_temp_new_i64();
10018     int i;
10019
10020     if (size >= 3) {
10021         unallocated_encoding(s);
10022         return;
10023     }
10024
10025     if (!fp_access_check(s)) {
10026         return;
10027     }
10028
10029     /* For the LL variants the store is larger than the load,
10030      * so if rd == rn we would overwrite parts of our input.
10031      * So load everything right now and use shifts in the main loop.
10032      */
10033     read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
10034
10035     for (i = 0; i < elements; i++) {
10036         tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
10037         ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
10038         tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
10039         write_vec_element(s, tcg_rd, rd, i, size + 1);
10040     }
10041 }
10042
10043 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
10044 static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
10045                                  int immh, int immb, int opcode, int rn, int rd)
10046 {
10047     int immhb = immh << 3 | immb;
10048     int size = 32 - clz32(immh) - 1;
10049     int dsize = 64;
10050     int esize = 8 << size;
10051     int elements = dsize/esize;
10052     int shift = (2 * esize) - immhb;
10053     bool round = extract32(opcode, 0, 1);
10054     TCGv_i64 tcg_rn, tcg_rd, tcg_final;
10055     TCGv_i64 tcg_round;
10056     int i;
10057
10058     if (extract32(immh, 3, 1)) {
10059         unallocated_encoding(s);
10060         return;
10061     }
10062
10063     if (!fp_access_check(s)) {
10064         return;
10065     }
10066
10067     tcg_rn = tcg_temp_new_i64();
10068     tcg_rd = tcg_temp_new_i64();
10069     tcg_final = tcg_temp_new_i64();
10070     read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
10071
10072     if (round) {
10073         tcg_round = tcg_constant_i64(1ULL << (shift - 1));
10074     } else {
10075         tcg_round = NULL;
10076     }
10077
10078     for (i = 0; i < elements; i++) {
10079         read_vec_element(s, tcg_rn, rn, i, size+1);
10080         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
10081                                 false, true, size+1, shift);
10082
10083         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
10084     }
10085
10086     if (!is_q) {
10087         write_vec_element(s, tcg_final, rd, 0, MO_64);
10088     } else {
10089         write_vec_element(s, tcg_final, rd, 1, MO_64);
10090     }
10091
10092     clear_vec_high(s, is_q, rd);
10093 }
10094
10095
10096 /* AdvSIMD shift by immediate
10097  *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
10098  * +---+---+---+-------------+------+------+--------+---+------+------+
10099  * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
10100  * +---+---+---+-------------+------+------+--------+---+------+------+
10101  */
10102 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
10103 {
10104     int rd = extract32(insn, 0, 5);
10105     int rn = extract32(insn, 5, 5);
10106     int opcode = extract32(insn, 11, 5);
10107     int immb = extract32(insn, 16, 3);
10108     int immh = extract32(insn, 19, 4);
10109     bool is_u = extract32(insn, 29, 1);
10110     bool is_q = extract32(insn, 30, 1);
10111
10112     /* data_proc_simd[] has sent immh == 0 to disas_simd_mod_imm. */
10113     assert(immh != 0);
10114
10115     switch (opcode) {
10116     case 0x08: /* SRI */
10117         if (!is_u) {
10118             unallocated_encoding(s);
10119             return;
10120         }
10121         /* fall through */
10122     case 0x00: /* SSHR / USHR */
10123     case 0x02: /* SSRA / USRA (accumulate) */
10124     case 0x04: /* SRSHR / URSHR (rounding) */
10125     case 0x06: /* SRSRA / URSRA (accum + rounding) */
10126         handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
10127         break;
10128     case 0x0a: /* SHL / SLI */
10129         handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
10130         break;
10131     case 0x10: /* SHRN */
10132     case 0x11: /* RSHRN / SQRSHRUN */
10133         if (is_u) {
10134             handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
10135                                    opcode, rn, rd);
10136         } else {
10137             handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
10138         }
10139         break;
10140     case 0x12: /* SQSHRN / UQSHRN */
10141     case 0x13: /* SQRSHRN / UQRSHRN */
10142         handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
10143                                opcode, rn, rd);
10144         break;
10145     case 0x14: /* SSHLL / USHLL */
10146         handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
10147         break;
10148     case 0x1c: /* SCVTF / UCVTF */
10149         handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
10150                                      opcode, rn, rd);
10151         break;
10152     case 0xc: /* SQSHLU */
10153         if (!is_u) {
10154             unallocated_encoding(s);
10155             return;
10156         }
10157         handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
10158         break;
10159     case 0xe: /* SQSHL, UQSHL */
10160         handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
10161         break;
10162     case 0x1f: /* FCVTZS/ FCVTZU */
10163         handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
10164         return;
10165     default:
10166         unallocated_encoding(s);
10167         return;
10168     }
10169 }
10170
10171 /* Generate code to do a "long" addition or subtraction, ie one done in
10172  * TCGv_i64 on vector lanes twice the width specified by size.
10173  */
10174 static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
10175                           TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
10176 {
10177     static NeonGenTwo64OpFn * const fns[3][2] = {
10178         { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
10179         { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
10180         { tcg_gen_add_i64, tcg_gen_sub_i64 },
10181     };
10182     NeonGenTwo64OpFn *genfn;
10183     assert(size < 3);
10184
10185     genfn = fns[size][is_sub];
10186     genfn(tcg_res, tcg_op1, tcg_op2);
10187 }
10188
10189 static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
10190                                 int opcode, int rd, int rn, int rm)
10191 {
10192     /* 3-reg-different widening insns: 64 x 64 -> 128 */
10193     TCGv_i64 tcg_res[2];
10194     int pass, accop;
10195
10196     tcg_res[0] = tcg_temp_new_i64();
10197     tcg_res[1] = tcg_temp_new_i64();
10198
10199     /* Does this op do an adding accumulate, a subtracting accumulate,
10200      * or no accumulate at all?
10201      */
10202     switch (opcode) {
10203     case 5:
10204     case 8:
10205     case 9:
10206         accop = 1;
10207         break;
10208     case 10:
10209     case 11:
10210         accop = -1;
10211         break;
10212     default:
10213         accop = 0;
10214         break;
10215     }
10216
10217     if (accop != 0) {
10218         read_vec_element(s, tcg_res[0], rd, 0, MO_64);
10219         read_vec_element(s, tcg_res[1], rd, 1, MO_64);
10220     }
10221
10222     /* size == 2 means two 32x32->64 operations; this is worth special
10223      * casing because we can generally handle it inline.
10224      */
10225     if (size == 2) {
10226         for (pass = 0; pass < 2; pass++) {
10227             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10228             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10229             TCGv_i64 tcg_passres;
10230             MemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
10231
10232             int elt = pass + is_q * 2;
10233
10234             read_vec_element(s, tcg_op1, rn, elt, memop);
10235             read_vec_element(s, tcg_op2, rm, elt, memop);
10236
10237             if (accop == 0) {
10238                 tcg_passres = tcg_res[pass];
10239             } else {
10240                 tcg_passres = tcg_temp_new_i64();
10241             }
10242
10243             switch (opcode) {
10244             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10245                 tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
10246                 break;
10247             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10248                 tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
10249                 break;
10250             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10251             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10252             {
10253                 TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
10254                 TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
10255
10256                 tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
10257                 tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
10258                 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
10259                                     tcg_passres,
10260                                     tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
10261                 break;
10262             }
10263             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10264             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10265             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
10266                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
10267                 break;
10268             case 9: /* SQDMLAL, SQDMLAL2 */
10269             case 11: /* SQDMLSL, SQDMLSL2 */
10270             case 13: /* SQDMULL, SQDMULL2 */
10271                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
10272                 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
10273                                                   tcg_passres, tcg_passres);
10274                 break;
10275             default:
10276                 g_assert_not_reached();
10277             }
10278
10279             if (opcode == 9 || opcode == 11) {
10280                 /* saturating accumulate ops */
10281                 if (accop < 0) {
10282                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
10283                 }
10284                 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
10285                                                   tcg_res[pass], tcg_passres);
10286             } else if (accop > 0) {
10287                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10288             } else if (accop < 0) {
10289                 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10290             }
10291         }
10292     } else {
10293         /* size 0 or 1, generally helper functions */
10294         for (pass = 0; pass < 2; pass++) {
10295             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10296             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10297             TCGv_i64 tcg_passres;
10298             int elt = pass + is_q * 2;
10299
10300             read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
10301             read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
10302
10303             if (accop == 0) {
10304                 tcg_passres = tcg_res[pass];
10305             } else {
10306                 tcg_passres = tcg_temp_new_i64();
10307             }
10308
10309             switch (opcode) {
10310             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10311             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10312             {
10313                 TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
10314                 static NeonGenWidenFn * const widenfns[2][2] = {
10315                     { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
10316                     { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
10317                 };
10318                 NeonGenWidenFn *widenfn = widenfns[size][is_u];
10319
10320                 widenfn(tcg_op2_64, tcg_op2);
10321                 widenfn(tcg_passres, tcg_op1);
10322                 gen_neon_addl(size, (opcode == 2), tcg_passres,
10323                               tcg_passres, tcg_op2_64);
10324                 break;
10325             }
10326             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10327             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10328                 if (size == 0) {
10329                     if (is_u) {
10330                         gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
10331                     } else {
10332                         gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
10333                     }
10334                 } else {
10335                     if (is_u) {
10336                         gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
10337                     } else {
10338                         gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
10339                     }
10340                 }
10341                 break;
10342             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10343             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10344             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
10345                 if (size == 0) {
10346                     if (is_u) {
10347                         gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
10348                     } else {
10349                         gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
10350                     }
10351                 } else {
10352                     if (is_u) {
10353                         gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
10354                     } else {
10355                         gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
10356                     }
10357                 }
10358                 break;
10359             case 9: /* SQDMLAL, SQDMLAL2 */
10360             case 11: /* SQDMLSL, SQDMLSL2 */
10361             case 13: /* SQDMULL, SQDMULL2 */
10362                 assert(size == 1);
10363                 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
10364                 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
10365                                                   tcg_passres, tcg_passres);
10366                 break;
10367             default:
10368                 g_assert_not_reached();
10369             }
10370
10371             if (accop != 0) {
10372                 if (opcode == 9 || opcode == 11) {
10373                     /* saturating accumulate ops */
10374                     if (accop < 0) {
10375                         gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
10376                     }
10377                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
10378                                                       tcg_res[pass],
10379                                                       tcg_passres);
10380                 } else {
10381                     gen_neon_addl(size, (accop < 0), tcg_res[pass],
10382                                   tcg_res[pass], tcg_passres);
10383                 }
10384             }
10385         }
10386     }
10387
10388     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
10389     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
10390 }
10391
10392 static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
10393                             int opcode, int rd, int rn, int rm)
10394 {
10395     TCGv_i64 tcg_res[2];
10396     int part = is_q ? 2 : 0;
10397     int pass;
10398
10399     for (pass = 0; pass < 2; pass++) {
10400         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10401         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10402         TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
10403         static NeonGenWidenFn * const widenfns[3][2] = {
10404             { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
10405             { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
10406             { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
10407         };
10408         NeonGenWidenFn *widenfn = widenfns[size][is_u];
10409
10410         read_vec_element(s, tcg_op1, rn, pass, MO_64);
10411         read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
10412         widenfn(tcg_op2_wide, tcg_op2);
10413         tcg_res[pass] = tcg_temp_new_i64();
10414         gen_neon_addl(size, (opcode == 3),
10415                       tcg_res[pass], tcg_op1, tcg_op2_wide);
10416     }
10417
10418     for (pass = 0; pass < 2; pass++) {
10419         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10420     }
10421 }
10422
10423 static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
10424 {
10425     tcg_gen_addi_i64(in, in, 1U << 31);
10426     tcg_gen_extrh_i64_i32(res, in);
10427 }
10428
10429 static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
10430                                  int opcode, int rd, int rn, int rm)
10431 {
10432     TCGv_i32 tcg_res[2];
10433     int part = is_q ? 2 : 0;
10434     int pass;
10435
10436     for (pass = 0; pass < 2; pass++) {
10437         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10438         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10439         TCGv_i64 tcg_wideres = tcg_temp_new_i64();
10440         static NeonGenNarrowFn * const narrowfns[3][2] = {
10441             { gen_helper_neon_narrow_high_u8,
10442               gen_helper_neon_narrow_round_high_u8 },
10443             { gen_helper_neon_narrow_high_u16,
10444               gen_helper_neon_narrow_round_high_u16 },
10445             { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
10446         };
10447         NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
10448
10449         read_vec_element(s, tcg_op1, rn, pass, MO_64);
10450         read_vec_element(s, tcg_op2, rm, pass, MO_64);
10451
10452         gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
10453
10454         tcg_res[pass] = tcg_temp_new_i32();
10455         gennarrow(tcg_res[pass], tcg_wideres);
10456     }
10457
10458     for (pass = 0; pass < 2; pass++) {
10459         write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
10460     }
10461     clear_vec_high(s, is_q, rd);
10462 }
10463
10464 /* AdvSIMD three different
10465  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
10466  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
10467  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
10468  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
10469  */
10470 static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
10471 {
10472     /* Instructions in this group fall into three basic classes
10473      * (in each case with the operation working on each element in
10474      * the input vectors):
10475      * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
10476      *     128 bit input)
10477      * (2) wide 64 x 128 -> 128
10478      * (3) narrowing 128 x 128 -> 64
10479      * Here we do initial decode, catch unallocated cases and
10480      * dispatch to separate functions for each class.
10481      */
10482     int is_q = extract32(insn, 30, 1);
10483     int is_u = extract32(insn, 29, 1);
10484     int size = extract32(insn, 22, 2);
10485     int opcode = extract32(insn, 12, 4);
10486     int rm = extract32(insn, 16, 5);
10487     int rn = extract32(insn, 5, 5);
10488     int rd = extract32(insn, 0, 5);
10489
10490     switch (opcode) {
10491     case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
10492     case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
10493         /* 64 x 128 -> 128 */
10494         if (size == 3) {
10495             unallocated_encoding(s);
10496             return;
10497         }
10498         if (!fp_access_check(s)) {
10499             return;
10500         }
10501         handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
10502         break;
10503     case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
10504     case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
10505         /* 128 x 128 -> 64 */
10506         if (size == 3) {
10507             unallocated_encoding(s);
10508             return;
10509         }
10510         if (!fp_access_check(s)) {
10511             return;
10512         }
10513         handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
10514         break;
10515     case 14: /* PMULL, PMULL2 */
10516         if (is_u) {
10517             unallocated_encoding(s);
10518             return;
10519         }
10520         switch (size) {
10521         case 0: /* PMULL.P8 */
10522             if (!fp_access_check(s)) {
10523                 return;
10524             }
10525             /* The Q field specifies lo/hi half input for this insn.  */
10526             gen_gvec_op3_ool(s, true, rd, rn, rm, is_q,
10527                              gen_helper_neon_pmull_h);
10528             break;
10529
10530         case 3: /* PMULL.P64 */
10531             if (!dc_isar_feature(aa64_pmull, s)) {
10532                 unallocated_encoding(s);
10533                 return;
10534             }
10535             if (!fp_access_check(s)) {
10536                 return;
10537             }
10538             /* The Q field specifies lo/hi half input for this insn.  */
10539             gen_gvec_op3_ool(s, true, rd, rn, rm, is_q,
10540                              gen_helper_gvec_pmull_q);
10541             break;
10542
10543         default:
10544             unallocated_encoding(s);
10545             break;
10546         }
10547         return;
10548     case 9: /* SQDMLAL, SQDMLAL2 */
10549     case 11: /* SQDMLSL, SQDMLSL2 */
10550     case 13: /* SQDMULL, SQDMULL2 */
10551         if (is_u || size == 0) {
10552             unallocated_encoding(s);
10553             return;
10554         }
10555         /* fall through */
10556     case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10557     case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10558     case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10559     case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10560     case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10561     case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10562     case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
10563         /* 64 x 64 -> 128 */
10564         if (size == 3) {
10565             unallocated_encoding(s);
10566             return;
10567         }
10568         if (!fp_access_check(s)) {
10569             return;
10570         }
10571
10572         handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
10573         break;
10574     default:
10575         /* opcode 15 not allocated */
10576         unallocated_encoding(s);
10577         break;
10578     }
10579 }
10580
10581 /* Logic op (opcode == 3) subgroup of C3.6.16. */
10582 static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
10583 {
10584     int rd = extract32(insn, 0, 5);
10585     int rn = extract32(insn, 5, 5);
10586     int rm = extract32(insn, 16, 5);
10587     int size = extract32(insn, 22, 2);
10588     bool is_u = extract32(insn, 29, 1);
10589     bool is_q = extract32(insn, 30, 1);
10590
10591     if (!fp_access_check(s)) {
10592         return;
10593     }
10594
10595     switch (size + 4 * is_u) {
10596     case 0: /* AND */
10597         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_and, 0);
10598         return;
10599     case 1: /* BIC */
10600         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_andc, 0);
10601         return;
10602     case 2: /* ORR */
10603         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0);
10604         return;
10605     case 3: /* ORN */
10606         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_orc, 0);
10607         return;
10608     case 4: /* EOR */
10609         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_xor, 0);
10610         return;
10611
10612     case 5: /* BSL bitwise select */
10613         gen_gvec_fn4(s, is_q, rd, rd, rn, rm, tcg_gen_gvec_bitsel, 0);
10614         return;
10615     case 6: /* BIT, bitwise insert if true */
10616         gen_gvec_fn4(s, is_q, rd, rm, rn, rd, tcg_gen_gvec_bitsel, 0);
10617         return;
10618     case 7: /* BIF, bitwise insert if false */
10619         gen_gvec_fn4(s, is_q, rd, rm, rd, rn, tcg_gen_gvec_bitsel, 0);
10620         return;
10621
10622     default:
10623         g_assert_not_reached();
10624     }
10625 }
10626
10627 /* Pairwise op subgroup of C3.6.16.
10628  *
10629  * This is called directly or via the handle_3same_float for float pairwise
10630  * operations where the opcode and size are calculated differently.
10631  */
10632 static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
10633                                    int size, int rn, int rm, int rd)
10634 {
10635     TCGv_ptr fpst;
10636     int pass;
10637
10638     /* Floating point operations need fpst */
10639     if (opcode >= 0x58) {
10640         fpst = fpstatus_ptr(FPST_FPCR);
10641     } else {
10642         fpst = NULL;
10643     }
10644
10645     if (!fp_access_check(s)) {
10646         return;
10647     }
10648
10649     /* These operations work on the concatenated rm:rn, with each pair of
10650      * adjacent elements being operated on to produce an element in the result.
10651      */
10652     if (size == 3) {
10653         TCGv_i64 tcg_res[2];
10654
10655         for (pass = 0; pass < 2; pass++) {
10656             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10657             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10658             int passreg = (pass == 0) ? rn : rm;
10659
10660             read_vec_element(s, tcg_op1, passreg, 0, MO_64);
10661             read_vec_element(s, tcg_op2, passreg, 1, MO_64);
10662             tcg_res[pass] = tcg_temp_new_i64();
10663
10664             switch (opcode) {
10665             case 0x17: /* ADDP */
10666                 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
10667                 break;
10668             case 0x58: /* FMAXNMP */
10669                 gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10670                 break;
10671             case 0x5a: /* FADDP */
10672                 gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10673                 break;
10674             case 0x5e: /* FMAXP */
10675                 gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10676                 break;
10677             case 0x78: /* FMINNMP */
10678                 gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10679                 break;
10680             case 0x7e: /* FMINP */
10681                 gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10682                 break;
10683             default:
10684                 g_assert_not_reached();
10685             }
10686         }
10687
10688         for (pass = 0; pass < 2; pass++) {
10689             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10690         }
10691     } else {
10692         int maxpass = is_q ? 4 : 2;
10693         TCGv_i32 tcg_res[4];
10694
10695         for (pass = 0; pass < maxpass; pass++) {
10696             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10697             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10698             NeonGenTwoOpFn *genfn = NULL;
10699             int passreg = pass < (maxpass / 2) ? rn : rm;
10700             int passelt = (is_q && (pass & 1)) ? 2 : 0;
10701
10702             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
10703             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
10704             tcg_res[pass] = tcg_temp_new_i32();
10705
10706             switch (opcode) {
10707             case 0x17: /* ADDP */
10708             {
10709                 static NeonGenTwoOpFn * const fns[3] = {
10710                     gen_helper_neon_padd_u8,
10711                     gen_helper_neon_padd_u16,
10712                     tcg_gen_add_i32,
10713                 };
10714                 genfn = fns[size];
10715                 break;
10716             }
10717             case 0x14: /* SMAXP, UMAXP */
10718             {
10719                 static NeonGenTwoOpFn * const fns[3][2] = {
10720                     { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
10721                     { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
10722                     { tcg_gen_smax_i32, tcg_gen_umax_i32 },
10723                 };
10724                 genfn = fns[size][u];
10725                 break;
10726             }
10727             case 0x15: /* SMINP, UMINP */
10728             {
10729                 static NeonGenTwoOpFn * const fns[3][2] = {
10730                     { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
10731                     { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
10732                     { tcg_gen_smin_i32, tcg_gen_umin_i32 },
10733                 };
10734                 genfn = fns[size][u];
10735                 break;
10736             }
10737             /* The FP operations are all on single floats (32 bit) */
10738             case 0x58: /* FMAXNMP */
10739                 gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10740                 break;
10741             case 0x5a: /* FADDP */
10742                 gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10743                 break;
10744             case 0x5e: /* FMAXP */
10745                 gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10746                 break;
10747             case 0x78: /* FMINNMP */
10748                 gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10749                 break;
10750             case 0x7e: /* FMINP */
10751                 gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10752                 break;
10753             default:
10754                 g_assert_not_reached();
10755             }
10756
10757             /* FP ops called directly, otherwise call now */
10758             if (genfn) {
10759                 genfn(tcg_res[pass], tcg_op1, tcg_op2);
10760             }
10761         }
10762
10763         for (pass = 0; pass < maxpass; pass++) {
10764             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
10765         }
10766         clear_vec_high(s, is_q, rd);
10767     }
10768 }
10769
10770 /* Floating point op subgroup of C3.6.16. */
10771 static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
10772 {
10773     /* For floating point ops, the U, size[1] and opcode bits
10774      * together indicate the operation. size[0] indicates single
10775      * or double.
10776      */
10777     int fpopcode = extract32(insn, 11, 5)
10778         | (extract32(insn, 23, 1) << 5)
10779         | (extract32(insn, 29, 1) << 6);
10780     int is_q = extract32(insn, 30, 1);
10781     int size = extract32(insn, 22, 1);
10782     int rm = extract32(insn, 16, 5);
10783     int rn = extract32(insn, 5, 5);
10784     int rd = extract32(insn, 0, 5);
10785
10786     int datasize = is_q ? 128 : 64;
10787     int esize = 32 << size;
10788     int elements = datasize / esize;
10789
10790     if (size == 1 && !is_q) {
10791         unallocated_encoding(s);
10792         return;
10793     }
10794
10795     switch (fpopcode) {
10796     case 0x58: /* FMAXNMP */
10797     case 0x5a: /* FADDP */
10798     case 0x5e: /* FMAXP */
10799     case 0x78: /* FMINNMP */
10800     case 0x7e: /* FMINP */
10801         if (size && !is_q) {
10802             unallocated_encoding(s);
10803             return;
10804         }
10805         handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
10806                                rn, rm, rd);
10807         return;
10808     case 0x1b: /* FMULX */
10809     case 0x1f: /* FRECPS */
10810     case 0x3f: /* FRSQRTS */
10811     case 0x5d: /* FACGE */
10812     case 0x7d: /* FACGT */
10813     case 0x19: /* FMLA */
10814     case 0x39: /* FMLS */
10815     case 0x18: /* FMAXNM */
10816     case 0x1a: /* FADD */
10817     case 0x1c: /* FCMEQ */
10818     case 0x1e: /* FMAX */
10819     case 0x38: /* FMINNM */
10820     case 0x3a: /* FSUB */
10821     case 0x3e: /* FMIN */
10822     case 0x5b: /* FMUL */
10823     case 0x5c: /* FCMGE */
10824     case 0x5f: /* FDIV */
10825     case 0x7a: /* FABD */
10826     case 0x7c: /* FCMGT */
10827         if (!fp_access_check(s)) {
10828             return;
10829         }
10830         handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
10831         return;
10832
10833     case 0x1d: /* FMLAL  */
10834     case 0x3d: /* FMLSL  */
10835     case 0x59: /* FMLAL2 */
10836     case 0x79: /* FMLSL2 */
10837         if (size & 1 || !dc_isar_feature(aa64_fhm, s)) {
10838             unallocated_encoding(s);
10839             return;
10840         }
10841         if (fp_access_check(s)) {
10842             int is_s = extract32(insn, 23, 1);
10843             int is_2 = extract32(insn, 29, 1);
10844             int data = (is_2 << 1) | is_s;
10845             tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
10846                                vec_full_reg_offset(s, rn),
10847                                vec_full_reg_offset(s, rm), cpu_env,
10848                                is_q ? 16 : 8, vec_full_reg_size(s),
10849                                data, gen_helper_gvec_fmlal_a64);
10850         }
10851         return;
10852
10853     default:
10854         unallocated_encoding(s);
10855         return;
10856     }
10857 }
10858
10859 /* Integer op subgroup of C3.6.16. */
10860 static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
10861 {
10862     int is_q = extract32(insn, 30, 1);
10863     int u = extract32(insn, 29, 1);
10864     int size = extract32(insn, 22, 2);
10865     int opcode = extract32(insn, 11, 5);
10866     int rm = extract32(insn, 16, 5);
10867     int rn = extract32(insn, 5, 5);
10868     int rd = extract32(insn, 0, 5);
10869     int pass;
10870     TCGCond cond;
10871
10872     switch (opcode) {
10873     case 0x13: /* MUL, PMUL */
10874         if (u && size != 0) {
10875             unallocated_encoding(s);
10876             return;
10877         }
10878         /* fall through */
10879     case 0x0: /* SHADD, UHADD */
10880     case 0x2: /* SRHADD, URHADD */
10881     case 0x4: /* SHSUB, UHSUB */
10882     case 0xc: /* SMAX, UMAX */
10883     case 0xd: /* SMIN, UMIN */
10884     case 0xe: /* SABD, UABD */
10885     case 0xf: /* SABA, UABA */
10886     case 0x12: /* MLA, MLS */
10887         if (size == 3) {
10888             unallocated_encoding(s);
10889             return;
10890         }
10891         break;
10892     case 0x16: /* SQDMULH, SQRDMULH */
10893         if (size == 0 || size == 3) {
10894             unallocated_encoding(s);
10895             return;
10896         }
10897         break;
10898     default:
10899         if (size == 3 && !is_q) {
10900             unallocated_encoding(s);
10901             return;
10902         }
10903         break;
10904     }
10905
10906     if (!fp_access_check(s)) {
10907         return;
10908     }
10909
10910     switch (opcode) {
10911     case 0x01: /* SQADD, UQADD */
10912         if (u) {
10913             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqadd_qc, size);
10914         } else {
10915             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqadd_qc, size);
10916         }
10917         return;
10918     case 0x05: /* SQSUB, UQSUB */
10919         if (u) {
10920             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqsub_qc, size);
10921         } else {
10922             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqsub_qc, size);
10923         }
10924         return;
10925     case 0x08: /* SSHL, USHL */
10926         if (u) {
10927             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_ushl, size);
10928         } else {
10929             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sshl, size);
10930         }
10931         return;
10932     case 0x0c: /* SMAX, UMAX */
10933         if (u) {
10934             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umax, size);
10935         } else {
10936             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smax, size);
10937         }
10938         return;
10939     case 0x0d: /* SMIN, UMIN */
10940         if (u) {
10941             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umin, size);
10942         } else {
10943             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smin, size);
10944         }
10945         return;
10946     case 0xe: /* SABD, UABD */
10947         if (u) {
10948             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uabd, size);
10949         } else {
10950             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sabd, size);
10951         }
10952         return;
10953     case 0xf: /* SABA, UABA */
10954         if (u) {
10955             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uaba, size);
10956         } else {
10957             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_saba, size);
10958         }
10959         return;
10960     case 0x10: /* ADD, SUB */
10961         if (u) {
10962             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size);
10963         } else {
10964             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_add, size);
10965         }
10966         return;
10967     case 0x13: /* MUL, PMUL */
10968         if (!u) { /* MUL */
10969             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size);
10970         } else {  /* PMUL */
10971             gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0, gen_helper_gvec_pmul_b);
10972         }
10973         return;
10974     case 0x12: /* MLA, MLS */
10975         if (u) {
10976             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mls, size);
10977         } else {
10978             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mla, size);
10979         }
10980         return;
10981     case 0x16: /* SQDMULH, SQRDMULH */
10982         {
10983             static gen_helper_gvec_3_ptr * const fns[2][2] = {
10984                 { gen_helper_neon_sqdmulh_h, gen_helper_neon_sqrdmulh_h },
10985                 { gen_helper_neon_sqdmulh_s, gen_helper_neon_sqrdmulh_s },
10986             };
10987             gen_gvec_op3_qc(s, is_q, rd, rn, rm, fns[size - 1][u]);
10988         }
10989         return;
10990     case 0x11:
10991         if (!u) { /* CMTST */
10992             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_cmtst, size);
10993             return;
10994         }
10995         /* else CMEQ */
10996         cond = TCG_COND_EQ;
10997         goto do_gvec_cmp;
10998     case 0x06: /* CMGT, CMHI */
10999         cond = u ? TCG_COND_GTU : TCG_COND_GT;
11000         goto do_gvec_cmp;
11001     case 0x07: /* CMGE, CMHS */
11002         cond = u ? TCG_COND_GEU : TCG_COND_GE;
11003     do_gvec_cmp:
11004         tcg_gen_gvec_cmp(cond, size, vec_full_reg_offset(s, rd),
11005                          vec_full_reg_offset(s, rn),
11006                          vec_full_reg_offset(s, rm),
11007                          is_q ? 16 : 8, vec_full_reg_size(s));
11008         return;
11009     }
11010
11011     if (size == 3) {
11012         assert(is_q);
11013         for (pass = 0; pass < 2; pass++) {
11014             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11015             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11016             TCGv_i64 tcg_res = tcg_temp_new_i64();
11017
11018             read_vec_element(s, tcg_op1, rn, pass, MO_64);
11019             read_vec_element(s, tcg_op2, rm, pass, MO_64);
11020
11021             handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
11022
11023             write_vec_element(s, tcg_res, rd, pass, MO_64);
11024         }
11025     } else {
11026         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
11027             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11028             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11029             TCGv_i32 tcg_res = tcg_temp_new_i32();
11030             NeonGenTwoOpFn *genfn = NULL;
11031             NeonGenTwoOpEnvFn *genenvfn = NULL;
11032
11033             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
11034             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
11035
11036             switch (opcode) {
11037             case 0x0: /* SHADD, UHADD */
11038             {
11039                 static NeonGenTwoOpFn * const fns[3][2] = {
11040                     { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
11041                     { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
11042                     { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
11043                 };
11044                 genfn = fns[size][u];
11045                 break;
11046             }
11047             case 0x2: /* SRHADD, URHADD */
11048             {
11049                 static NeonGenTwoOpFn * const fns[3][2] = {
11050                     { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
11051                     { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
11052                     { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
11053                 };
11054                 genfn = fns[size][u];
11055                 break;
11056             }
11057             case 0x4: /* SHSUB, UHSUB */
11058             {
11059                 static NeonGenTwoOpFn * const fns[3][2] = {
11060                     { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
11061                     { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
11062                     { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
11063                 };
11064                 genfn = fns[size][u];
11065                 break;
11066             }
11067             case 0x9: /* SQSHL, UQSHL */
11068             {
11069                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
11070                     { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
11071                     { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
11072                     { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
11073                 };
11074                 genenvfn = fns[size][u];
11075                 break;
11076             }
11077             case 0xa: /* SRSHL, URSHL */
11078             {
11079                 static NeonGenTwoOpFn * const fns[3][2] = {
11080                     { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
11081                     { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
11082                     { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
11083                 };
11084                 genfn = fns[size][u];
11085                 break;
11086             }
11087             case 0xb: /* SQRSHL, UQRSHL */
11088             {
11089                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
11090                     { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
11091                     { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
11092                     { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
11093                 };
11094                 genenvfn = fns[size][u];
11095                 break;
11096             }
11097             default:
11098                 g_assert_not_reached();
11099             }
11100
11101             if (genenvfn) {
11102                 genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
11103             } else {
11104                 genfn(tcg_res, tcg_op1, tcg_op2);
11105             }
11106
11107             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
11108         }
11109     }
11110     clear_vec_high(s, is_q, rd);
11111 }
11112
11113 /* AdvSIMD three same
11114  *  31  30  29  28       24 23  22  21 20  16 15    11  10 9    5 4    0
11115  * +---+---+---+-----------+------+---+------+--------+---+------+------+
11116  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
11117  * +---+---+---+-----------+------+---+------+--------+---+------+------+
11118  */
11119 static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
11120 {
11121     int opcode = extract32(insn, 11, 5);
11122
11123     switch (opcode) {
11124     case 0x3: /* logic ops */
11125         disas_simd_3same_logic(s, insn);
11126         break;
11127     case 0x17: /* ADDP */
11128     case 0x14: /* SMAXP, UMAXP */
11129     case 0x15: /* SMINP, UMINP */
11130     {
11131         /* Pairwise operations */
11132         int is_q = extract32(insn, 30, 1);
11133         int u = extract32(insn, 29, 1);
11134         int size = extract32(insn, 22, 2);
11135         int rm = extract32(insn, 16, 5);
11136         int rn = extract32(insn, 5, 5);
11137         int rd = extract32(insn, 0, 5);
11138         if (opcode == 0x17) {
11139             if (u || (size == 3 && !is_q)) {
11140                 unallocated_encoding(s);
11141                 return;
11142             }
11143         } else {
11144             if (size == 3) {
11145                 unallocated_encoding(s);
11146                 return;
11147             }
11148         }
11149         handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
11150         break;
11151     }
11152     case 0x18 ... 0x31:
11153         /* floating point ops, sz[1] and U are part of opcode */
11154         disas_simd_3same_float(s, insn);
11155         break;
11156     default:
11157         disas_simd_3same_int(s, insn);
11158         break;
11159     }
11160 }
11161
11162 /*
11163  * Advanced SIMD three same (ARMv8.2 FP16 variants)
11164  *
11165  *  31  30  29  28       24 23  22 21 20  16 15 14 13    11 10  9    5 4    0
11166  * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
11167  * | 0 | Q | U | 0 1 1 1 0 | a | 1 0 |  Rm  | 0 0 | opcode | 1 |  Rn  |  Rd  |
11168  * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
11169  *
11170  * This includes FMULX, FCMEQ (register), FRECPS, FRSQRTS, FCMGE
11171  * (register), FACGE, FABD, FCMGT (register) and FACGT.
11172  *
11173  */
11174 static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
11175 {
11176     int opcode = extract32(insn, 11, 3);
11177     int u = extract32(insn, 29, 1);
11178     int a = extract32(insn, 23, 1);
11179     int is_q = extract32(insn, 30, 1);
11180     int rm = extract32(insn, 16, 5);
11181     int rn = extract32(insn, 5, 5);
11182     int rd = extract32(insn, 0, 5);
11183     /*
11184      * For these floating point ops, the U, a and opcode bits
11185      * together indicate the operation.
11186      */
11187     int fpopcode = opcode | (a << 3) | (u << 4);
11188     int datasize = is_q ? 128 : 64;
11189     int elements = datasize / 16;
11190     bool pairwise;
11191     TCGv_ptr fpst;
11192     int pass;
11193
11194     switch (fpopcode) {
11195     case 0x0: /* FMAXNM */
11196     case 0x1: /* FMLA */
11197     case 0x2: /* FADD */
11198     case 0x3: /* FMULX */
11199     case 0x4: /* FCMEQ */
11200     case 0x6: /* FMAX */
11201     case 0x7: /* FRECPS */
11202     case 0x8: /* FMINNM */
11203     case 0x9: /* FMLS */
11204     case 0xa: /* FSUB */
11205     case 0xe: /* FMIN */
11206     case 0xf: /* FRSQRTS */
11207     case 0x13: /* FMUL */
11208     case 0x14: /* FCMGE */
11209     case 0x15: /* FACGE */
11210     case 0x17: /* FDIV */
11211     case 0x1a: /* FABD */
11212     case 0x1c: /* FCMGT */
11213     case 0x1d: /* FACGT */
11214         pairwise = false;
11215         break;
11216     case 0x10: /* FMAXNMP */
11217     case 0x12: /* FADDP */
11218     case 0x16: /* FMAXP */
11219     case 0x18: /* FMINNMP */
11220     case 0x1e: /* FMINP */
11221         pairwise = true;
11222         break;
11223     default:
11224         unallocated_encoding(s);
11225         return;
11226     }
11227
11228     if (!dc_isar_feature(aa64_fp16, s)) {
11229         unallocated_encoding(s);
11230         return;
11231     }
11232
11233     if (!fp_access_check(s)) {
11234         return;
11235     }
11236
11237     fpst = fpstatus_ptr(FPST_FPCR_F16);
11238
11239     if (pairwise) {
11240         int maxpass = is_q ? 8 : 4;
11241         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11242         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11243         TCGv_i32 tcg_res[8];
11244
11245         for (pass = 0; pass < maxpass; pass++) {
11246             int passreg = pass < (maxpass / 2) ? rn : rm;
11247             int passelt = (pass << 1) & (maxpass - 1);
11248
11249             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_16);
11250             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_16);
11251             tcg_res[pass] = tcg_temp_new_i32();
11252
11253             switch (fpopcode) {
11254             case 0x10: /* FMAXNMP */
11255                 gen_helper_advsimd_maxnumh(tcg_res[pass], tcg_op1, tcg_op2,
11256                                            fpst);
11257                 break;
11258             case 0x12: /* FADDP */
11259                 gen_helper_advsimd_addh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11260                 break;
11261             case 0x16: /* FMAXP */
11262                 gen_helper_advsimd_maxh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11263                 break;
11264             case 0x18: /* FMINNMP */
11265                 gen_helper_advsimd_minnumh(tcg_res[pass], tcg_op1, tcg_op2,
11266                                            fpst);
11267                 break;
11268             case 0x1e: /* FMINP */
11269                 gen_helper_advsimd_minh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11270                 break;
11271             default:
11272                 g_assert_not_reached();
11273             }
11274         }
11275
11276         for (pass = 0; pass < maxpass; pass++) {
11277             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_16);
11278         }
11279     } else {
11280         for (pass = 0; pass < elements; pass++) {
11281             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11282             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11283             TCGv_i32 tcg_res = tcg_temp_new_i32();
11284
11285             read_vec_element_i32(s, tcg_op1, rn, pass, MO_16);
11286             read_vec_element_i32(s, tcg_op2, rm, pass, MO_16);
11287
11288             switch (fpopcode) {
11289             case 0x0: /* FMAXNM */
11290                 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
11291                 break;
11292             case 0x1: /* FMLA */
11293                 read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11294                 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
11295                                            fpst);
11296                 break;
11297             case 0x2: /* FADD */
11298                 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
11299                 break;
11300             case 0x3: /* FMULX */
11301                 gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
11302                 break;
11303             case 0x4: /* FCMEQ */
11304                 gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11305                 break;
11306             case 0x6: /* FMAX */
11307                 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
11308                 break;
11309             case 0x7: /* FRECPS */
11310                 gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11311                 break;
11312             case 0x8: /* FMINNM */
11313                 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
11314                 break;
11315             case 0x9: /* FMLS */
11316                 /* As usual for ARM, separate negation for fused multiply-add */
11317                 tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
11318                 read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11319                 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
11320                                            fpst);
11321                 break;
11322             case 0xa: /* FSUB */
11323                 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
11324                 break;
11325             case 0xe: /* FMIN */
11326                 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
11327                 break;
11328             case 0xf: /* FRSQRTS */
11329                 gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11330                 break;
11331             case 0x13: /* FMUL */
11332                 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
11333                 break;
11334             case 0x14: /* FCMGE */
11335                 gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11336                 break;
11337             case 0x15: /* FACGE */
11338                 gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11339                 break;
11340             case 0x17: /* FDIV */
11341                 gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
11342                 break;
11343             case 0x1a: /* FABD */
11344                 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
11345                 tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
11346                 break;
11347             case 0x1c: /* FCMGT */
11348                 gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11349                 break;
11350             case 0x1d: /* FACGT */
11351                 gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11352                 break;
11353             default:
11354                 g_assert_not_reached();
11355             }
11356
11357             write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11358         }
11359     }
11360
11361     clear_vec_high(s, is_q, rd);
11362 }
11363
11364 /* AdvSIMD three same extra
11365  *  31   30  29 28       24 23  22  21 20  16  15 14    11  10 9  5 4  0
11366  * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
11367  * | 0 | Q | U | 0 1 1 1 0 | size | 0 |  Rm  | 1 | opcode | 1 | Rn | Rd |
11368  * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
11369  */
11370 static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
11371 {
11372     int rd = extract32(insn, 0, 5);
11373     int rn = extract32(insn, 5, 5);
11374     int opcode = extract32(insn, 11, 4);
11375     int rm = extract32(insn, 16, 5);
11376     int size = extract32(insn, 22, 2);
11377     bool u = extract32(insn, 29, 1);
11378     bool is_q = extract32(insn, 30, 1);
11379     bool feature;
11380     int rot;
11381
11382     switch (u * 16 + opcode) {
11383     case 0x10: /* SQRDMLAH (vector) */
11384     case 0x11: /* SQRDMLSH (vector) */
11385         if (size != 1 && size != 2) {
11386             unallocated_encoding(s);
11387             return;
11388         }
11389         feature = dc_isar_feature(aa64_rdm, s);
11390         break;
11391     case 0x02: /* SDOT (vector) */
11392     case 0x12: /* UDOT (vector) */
11393         if (size != MO_32) {
11394             unallocated_encoding(s);
11395             return;
11396         }
11397         feature = dc_isar_feature(aa64_dp, s);
11398         break;
11399     case 0x03: /* USDOT */
11400         if (size != MO_32) {
11401             unallocated_encoding(s);
11402             return;
11403         }
11404         feature = dc_isar_feature(aa64_i8mm, s);
11405         break;
11406     case 0x04: /* SMMLA */
11407     case 0x14: /* UMMLA */
11408     case 0x05: /* USMMLA */
11409         if (!is_q || size != MO_32) {
11410             unallocated_encoding(s);
11411             return;
11412         }
11413         feature = dc_isar_feature(aa64_i8mm, s);
11414         break;
11415     case 0x18: /* FCMLA, #0 */
11416     case 0x19: /* FCMLA, #90 */
11417     case 0x1a: /* FCMLA, #180 */
11418     case 0x1b: /* FCMLA, #270 */
11419     case 0x1c: /* FCADD, #90 */
11420     case 0x1e: /* FCADD, #270 */
11421         if (size == 0
11422             || (size == 1 && !dc_isar_feature(aa64_fp16, s))
11423             || (size == 3 && !is_q)) {
11424             unallocated_encoding(s);
11425             return;
11426         }
11427         feature = dc_isar_feature(aa64_fcma, s);
11428         break;
11429     case 0x1d: /* BFMMLA */
11430         if (size != MO_16 || !is_q) {
11431             unallocated_encoding(s);
11432             return;
11433         }
11434         feature = dc_isar_feature(aa64_bf16, s);
11435         break;
11436     case 0x1f:
11437         switch (size) {
11438         case 1: /* BFDOT */
11439         case 3: /* BFMLAL{B,T} */
11440             feature = dc_isar_feature(aa64_bf16, s);
11441             break;
11442         default:
11443             unallocated_encoding(s);
11444             return;
11445         }
11446         break;
11447     default:
11448         unallocated_encoding(s);
11449         return;
11450     }
11451     if (!feature) {
11452         unallocated_encoding(s);
11453         return;
11454     }
11455     if (!fp_access_check(s)) {
11456         return;
11457     }
11458
11459     switch (opcode) {
11460     case 0x0: /* SQRDMLAH (vector) */
11461         gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlah_qc, size);
11462         return;
11463
11464     case 0x1: /* SQRDMLSH (vector) */
11465         gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlsh_qc, size);
11466         return;
11467
11468     case 0x2: /* SDOT / UDOT */
11469         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0,
11470                          u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b);
11471         return;
11472
11473     case 0x3: /* USDOT */
11474         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_usdot_b);
11475         return;
11476
11477     case 0x04: /* SMMLA, UMMLA */
11478         gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0,
11479                          u ? gen_helper_gvec_ummla_b
11480                          : gen_helper_gvec_smmla_b);
11481         return;
11482     case 0x05: /* USMMLA */
11483         gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0, gen_helper_gvec_usmmla_b);
11484         return;
11485
11486     case 0x8: /* FCMLA, #0 */
11487     case 0x9: /* FCMLA, #90 */
11488     case 0xa: /* FCMLA, #180 */
11489     case 0xb: /* FCMLA, #270 */
11490         rot = extract32(opcode, 0, 2);
11491         switch (size) {
11492         case 1:
11493             gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, true, rot,
11494                               gen_helper_gvec_fcmlah);
11495             break;
11496         case 2:
11497             gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot,
11498                               gen_helper_gvec_fcmlas);
11499             break;
11500         case 3:
11501             gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot,
11502                               gen_helper_gvec_fcmlad);
11503             break;
11504         default:
11505             g_assert_not_reached();
11506         }
11507         return;
11508
11509     case 0xc: /* FCADD, #90 */
11510     case 0xe: /* FCADD, #270 */
11511         rot = extract32(opcode, 1, 1);
11512         switch (size) {
11513         case 1:
11514             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11515                               gen_helper_gvec_fcaddh);
11516             break;
11517         case 2:
11518             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11519                               gen_helper_gvec_fcadds);
11520             break;
11521         case 3:
11522             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11523                               gen_helper_gvec_fcaddd);
11524             break;
11525         default:
11526             g_assert_not_reached();
11527         }
11528         return;
11529
11530     case 0xd: /* BFMMLA */
11531         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfmmla);
11532         return;
11533     case 0xf:
11534         switch (size) {
11535         case 1: /* BFDOT */
11536             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfdot);
11537             break;
11538         case 3: /* BFMLAL{B,T} */
11539             gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, false, is_q,
11540                               gen_helper_gvec_bfmlal);
11541             break;
11542         default:
11543             g_assert_not_reached();
11544         }
11545         return;
11546
11547     default:
11548         g_assert_not_reached();
11549     }
11550 }
11551
11552 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
11553                                   int size, int rn, int rd)
11554 {
11555     /* Handle 2-reg-misc ops which are widening (so each size element
11556      * in the source becomes a 2*size element in the destination.
11557      * The only instruction like this is FCVTL.
11558      */
11559     int pass;
11560
11561     if (size == 3) {
11562         /* 32 -> 64 bit fp conversion */
11563         TCGv_i64 tcg_res[2];
11564         int srcelt = is_q ? 2 : 0;
11565
11566         for (pass = 0; pass < 2; pass++) {
11567             TCGv_i32 tcg_op = tcg_temp_new_i32();
11568             tcg_res[pass] = tcg_temp_new_i64();
11569
11570             read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
11571             gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env);
11572         }
11573         for (pass = 0; pass < 2; pass++) {
11574             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11575         }
11576     } else {
11577         /* 16 -> 32 bit fp conversion */
11578         int srcelt = is_q ? 4 : 0;
11579         TCGv_i32 tcg_res[4];
11580         TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
11581         TCGv_i32 ahp = get_ahp_flag();
11582
11583         for (pass = 0; pass < 4; pass++) {
11584             tcg_res[pass] = tcg_temp_new_i32();
11585
11586             read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
11587             gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
11588                                            fpst, ahp);
11589         }
11590         for (pass = 0; pass < 4; pass++) {
11591             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
11592         }
11593     }
11594 }
11595
11596 static void handle_rev(DisasContext *s, int opcode, bool u,
11597                        bool is_q, int size, int rn, int rd)
11598 {
11599     int op = (opcode << 1) | u;
11600     int opsz = op + size;
11601     int grp_size = 3 - opsz;
11602     int dsize = is_q ? 128 : 64;
11603     int i;
11604
11605     if (opsz >= 3) {
11606         unallocated_encoding(s);
11607         return;
11608     }
11609
11610     if (!fp_access_check(s)) {
11611         return;
11612     }
11613
11614     if (size == 0) {
11615         /* Special case bytes, use bswap op on each group of elements */
11616         int groups = dsize / (8 << grp_size);
11617
11618         for (i = 0; i < groups; i++) {
11619             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
11620
11621             read_vec_element(s, tcg_tmp, rn, i, grp_size);
11622             switch (grp_size) {
11623             case MO_16:
11624                 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ);
11625                 break;
11626             case MO_32:
11627                 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ);
11628                 break;
11629             case MO_64:
11630                 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
11631                 break;
11632             default:
11633                 g_assert_not_reached();
11634             }
11635             write_vec_element(s, tcg_tmp, rd, i, grp_size);
11636         }
11637         clear_vec_high(s, is_q, rd);
11638     } else {
11639         int revmask = (1 << grp_size) - 1;
11640         int esize = 8 << size;
11641         int elements = dsize / esize;
11642         TCGv_i64 tcg_rn = tcg_temp_new_i64();
11643         TCGv_i64 tcg_rd[2];
11644
11645         for (i = 0; i < 2; i++) {
11646             tcg_rd[i] = tcg_temp_new_i64();
11647             tcg_gen_movi_i64(tcg_rd[i], 0);
11648         }
11649
11650         for (i = 0; i < elements; i++) {
11651             int e_rev = (i & 0xf) ^ revmask;
11652             int w = (e_rev * esize) / 64;
11653             int o = (e_rev * esize) % 64;
11654
11655             read_vec_element(s, tcg_rn, rn, i, size);
11656             tcg_gen_deposit_i64(tcg_rd[w], tcg_rd[w], tcg_rn, o, esize);
11657         }
11658
11659         for (i = 0; i < 2; i++) {
11660             write_vec_element(s, tcg_rd[i], rd, i, MO_64);
11661         }
11662         clear_vec_high(s, true, rd);
11663     }
11664 }
11665
11666 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
11667                                   bool is_q, int size, int rn, int rd)
11668 {
11669     /* Implement the pairwise operations from 2-misc:
11670      * SADDLP, UADDLP, SADALP, UADALP.
11671      * These all add pairs of elements in the input to produce a
11672      * double-width result element in the output (possibly accumulating).
11673      */
11674     bool accum = (opcode == 0x6);
11675     int maxpass = is_q ? 2 : 1;
11676     int pass;
11677     TCGv_i64 tcg_res[2];
11678
11679     if (size == 2) {
11680         /* 32 + 32 -> 64 op */
11681         MemOp memop = size + (u ? 0 : MO_SIGN);
11682
11683         for (pass = 0; pass < maxpass; pass++) {
11684             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11685             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11686
11687             tcg_res[pass] = tcg_temp_new_i64();
11688
11689             read_vec_element(s, tcg_op1, rn, pass * 2, memop);
11690             read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
11691             tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
11692             if (accum) {
11693                 read_vec_element(s, tcg_op1, rd, pass, MO_64);
11694                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
11695             }
11696         }
11697     } else {
11698         for (pass = 0; pass < maxpass; pass++) {
11699             TCGv_i64 tcg_op = tcg_temp_new_i64();
11700             NeonGenOne64OpFn *genfn;
11701             static NeonGenOne64OpFn * const fns[2][2] = {
11702                 { gen_helper_neon_addlp_s8,  gen_helper_neon_addlp_u8 },
11703                 { gen_helper_neon_addlp_s16,  gen_helper_neon_addlp_u16 },
11704             };
11705
11706             genfn = fns[size][u];
11707
11708             tcg_res[pass] = tcg_temp_new_i64();
11709
11710             read_vec_element(s, tcg_op, rn, pass, MO_64);
11711             genfn(tcg_res[pass], tcg_op);
11712
11713             if (accum) {
11714                 read_vec_element(s, tcg_op, rd, pass, MO_64);
11715                 if (size == 0) {
11716                     gen_helper_neon_addl_u16(tcg_res[pass],
11717                                              tcg_res[pass], tcg_op);
11718                 } else {
11719                     gen_helper_neon_addl_u32(tcg_res[pass],
11720                                              tcg_res[pass], tcg_op);
11721                 }
11722             }
11723         }
11724     }
11725     if (!is_q) {
11726         tcg_res[1] = tcg_constant_i64(0);
11727     }
11728     for (pass = 0; pass < 2; pass++) {
11729         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11730     }
11731 }
11732
11733 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
11734 {
11735     /* Implement SHLL and SHLL2 */
11736     int pass;
11737     int part = is_q ? 2 : 0;
11738     TCGv_i64 tcg_res[2];
11739
11740     for (pass = 0; pass < 2; pass++) {
11741         static NeonGenWidenFn * const widenfns[3] = {
11742             gen_helper_neon_widen_u8,
11743             gen_helper_neon_widen_u16,
11744             tcg_gen_extu_i32_i64,
11745         };
11746         NeonGenWidenFn *widenfn = widenfns[size];
11747         TCGv_i32 tcg_op = tcg_temp_new_i32();
11748
11749         read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
11750         tcg_res[pass] = tcg_temp_new_i64();
11751         widenfn(tcg_res[pass], tcg_op);
11752         tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
11753     }
11754
11755     for (pass = 0; pass < 2; pass++) {
11756         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11757     }
11758 }
11759
11760 /* AdvSIMD two reg misc
11761  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
11762  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
11763  * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
11764  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
11765  */
11766 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
11767 {
11768     int size = extract32(insn, 22, 2);
11769     int opcode = extract32(insn, 12, 5);
11770     bool u = extract32(insn, 29, 1);
11771     bool is_q = extract32(insn, 30, 1);
11772     int rn = extract32(insn, 5, 5);
11773     int rd = extract32(insn, 0, 5);
11774     bool need_fpstatus = false;
11775     int rmode = -1;
11776     TCGv_i32 tcg_rmode;
11777     TCGv_ptr tcg_fpstatus;
11778
11779     switch (opcode) {
11780     case 0x0: /* REV64, REV32 */
11781     case 0x1: /* REV16 */
11782         handle_rev(s, opcode, u, is_q, size, rn, rd);
11783         return;
11784     case 0x5: /* CNT, NOT, RBIT */
11785         if (u && size == 0) {
11786             /* NOT */
11787             break;
11788         } else if (u && size == 1) {
11789             /* RBIT */
11790             break;
11791         } else if (!u && size == 0) {
11792             /* CNT */
11793             break;
11794         }
11795         unallocated_encoding(s);
11796         return;
11797     case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
11798     case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
11799         if (size == 3) {
11800             unallocated_encoding(s);
11801             return;
11802         }
11803         if (!fp_access_check(s)) {
11804             return;
11805         }
11806
11807         handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
11808         return;
11809     case 0x4: /* CLS, CLZ */
11810         if (size == 3) {
11811             unallocated_encoding(s);
11812             return;
11813         }
11814         break;
11815     case 0x2: /* SADDLP, UADDLP */
11816     case 0x6: /* SADALP, UADALP */
11817         if (size == 3) {
11818             unallocated_encoding(s);
11819             return;
11820         }
11821         if (!fp_access_check(s)) {
11822             return;
11823         }
11824         handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
11825         return;
11826     case 0x13: /* SHLL, SHLL2 */
11827         if (u == 0 || size == 3) {
11828             unallocated_encoding(s);
11829             return;
11830         }
11831         if (!fp_access_check(s)) {
11832             return;
11833         }
11834         handle_shll(s, is_q, size, rn, rd);
11835         return;
11836     case 0xa: /* CMLT */
11837         if (u == 1) {
11838             unallocated_encoding(s);
11839             return;
11840         }
11841         /* fall through */
11842     case 0x8: /* CMGT, CMGE */
11843     case 0x9: /* CMEQ, CMLE */
11844     case 0xb: /* ABS, NEG */
11845         if (size == 3 && !is_q) {
11846             unallocated_encoding(s);
11847             return;
11848         }
11849         break;
11850     case 0x3: /* SUQADD, USQADD */
11851         if (size == 3 && !is_q) {
11852             unallocated_encoding(s);
11853             return;
11854         }
11855         if (!fp_access_check(s)) {
11856             return;
11857         }
11858         handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
11859         return;
11860     case 0x7: /* SQABS, SQNEG */
11861         if (size == 3 && !is_q) {
11862             unallocated_encoding(s);
11863             return;
11864         }
11865         break;
11866     case 0xc ... 0xf:
11867     case 0x16 ... 0x1f:
11868     {
11869         /* Floating point: U, size[1] and opcode indicate operation;
11870          * size[0] indicates single or double precision.
11871          */
11872         int is_double = extract32(size, 0, 1);
11873         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
11874         size = is_double ? 3 : 2;
11875         switch (opcode) {
11876         case 0x2f: /* FABS */
11877         case 0x6f: /* FNEG */
11878             if (size == 3 && !is_q) {
11879                 unallocated_encoding(s);
11880                 return;
11881             }
11882             break;
11883         case 0x1d: /* SCVTF */
11884         case 0x5d: /* UCVTF */
11885         {
11886             bool is_signed = (opcode == 0x1d) ? true : false;
11887             int elements = is_double ? 2 : is_q ? 4 : 2;
11888             if (is_double && !is_q) {
11889                 unallocated_encoding(s);
11890                 return;
11891             }
11892             if (!fp_access_check(s)) {
11893                 return;
11894             }
11895             handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
11896             return;
11897         }
11898         case 0x2c: /* FCMGT (zero) */
11899         case 0x2d: /* FCMEQ (zero) */
11900         case 0x2e: /* FCMLT (zero) */
11901         case 0x6c: /* FCMGE (zero) */
11902         case 0x6d: /* FCMLE (zero) */
11903             if (size == 3 && !is_q) {
11904                 unallocated_encoding(s);
11905                 return;
11906             }
11907             handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
11908             return;
11909         case 0x7f: /* FSQRT */
11910             if (size == 3 && !is_q) {
11911                 unallocated_encoding(s);
11912                 return;
11913             }
11914             break;
11915         case 0x1a: /* FCVTNS */
11916         case 0x1b: /* FCVTMS */
11917         case 0x3a: /* FCVTPS */
11918         case 0x3b: /* FCVTZS */
11919         case 0x5a: /* FCVTNU */
11920         case 0x5b: /* FCVTMU */
11921         case 0x7a: /* FCVTPU */
11922         case 0x7b: /* FCVTZU */
11923             need_fpstatus = true;
11924             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
11925             if (size == 3 && !is_q) {
11926                 unallocated_encoding(s);
11927                 return;
11928             }
11929             break;
11930         case 0x5c: /* FCVTAU */
11931         case 0x1c: /* FCVTAS */
11932             need_fpstatus = true;
11933             rmode = FPROUNDING_TIEAWAY;
11934             if (size == 3 && !is_q) {
11935                 unallocated_encoding(s);
11936                 return;
11937             }
11938             break;
11939         case 0x3c: /* URECPE */
11940             if (size == 3) {
11941                 unallocated_encoding(s);
11942                 return;
11943             }
11944             /* fall through */
11945         case 0x3d: /* FRECPE */
11946         case 0x7d: /* FRSQRTE */
11947             if (size == 3 && !is_q) {
11948                 unallocated_encoding(s);
11949                 return;
11950             }
11951             if (!fp_access_check(s)) {
11952                 return;
11953             }
11954             handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
11955             return;
11956         case 0x56: /* FCVTXN, FCVTXN2 */
11957             if (size == 2) {
11958                 unallocated_encoding(s);
11959                 return;
11960             }
11961             /* fall through */
11962         case 0x16: /* FCVTN, FCVTN2 */
11963             /* handle_2misc_narrow does a 2*size -> size operation, but these
11964              * instructions encode the source size rather than dest size.
11965              */
11966             if (!fp_access_check(s)) {
11967                 return;
11968             }
11969             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
11970             return;
11971         case 0x36: /* BFCVTN, BFCVTN2 */
11972             if (!dc_isar_feature(aa64_bf16, s) || size != 2) {
11973                 unallocated_encoding(s);
11974                 return;
11975             }
11976             if (!fp_access_check(s)) {
11977                 return;
11978             }
11979             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
11980             return;
11981         case 0x17: /* FCVTL, FCVTL2 */
11982             if (!fp_access_check(s)) {
11983                 return;
11984             }
11985             handle_2misc_widening(s, opcode, is_q, size, rn, rd);
11986             return;
11987         case 0x18: /* FRINTN */
11988         case 0x19: /* FRINTM */
11989         case 0x38: /* FRINTP */
11990         case 0x39: /* FRINTZ */
11991             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
11992             /* fall through */
11993         case 0x59: /* FRINTX */
11994         case 0x79: /* FRINTI */
11995             need_fpstatus = true;
11996             if (size == 3 && !is_q) {
11997                 unallocated_encoding(s);
11998                 return;
11999             }
12000             break;
12001         case 0x58: /* FRINTA */
12002             rmode = FPROUNDING_TIEAWAY;
12003             need_fpstatus = true;
12004             if (size == 3 && !is_q) {
12005                 unallocated_encoding(s);
12006                 return;
12007             }
12008             break;
12009         case 0x7c: /* URSQRTE */
12010             if (size == 3) {
12011                 unallocated_encoding(s);
12012                 return;
12013             }
12014             break;
12015         case 0x1e: /* FRINT32Z */
12016         case 0x1f: /* FRINT64Z */
12017             rmode = FPROUNDING_ZERO;
12018             /* fall through */
12019         case 0x5e: /* FRINT32X */
12020         case 0x5f: /* FRINT64X */
12021             need_fpstatus = true;
12022             if ((size == 3 && !is_q) || !dc_isar_feature(aa64_frint, s)) {
12023                 unallocated_encoding(s);
12024                 return;
12025             }
12026             break;
12027         default:
12028             unallocated_encoding(s);
12029             return;
12030         }
12031         break;
12032     }
12033     default:
12034         unallocated_encoding(s);
12035         return;
12036     }
12037
12038     if (!fp_access_check(s)) {
12039         return;
12040     }
12041
12042     if (need_fpstatus || rmode >= 0) {
12043         tcg_fpstatus = fpstatus_ptr(FPST_FPCR);
12044     } else {
12045         tcg_fpstatus = NULL;
12046     }
12047     if (rmode >= 0) {
12048         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
12049     } else {
12050         tcg_rmode = NULL;
12051     }
12052
12053     switch (opcode) {
12054     case 0x5:
12055         if (u && size == 0) { /* NOT */
12056             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_not, 0);
12057             return;
12058         }
12059         break;
12060     case 0x8: /* CMGT, CMGE */
12061         if (u) {
12062             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cge0, size);
12063         } else {
12064             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cgt0, size);
12065         }
12066         return;
12067     case 0x9: /* CMEQ, CMLE */
12068         if (u) {
12069             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cle0, size);
12070         } else {
12071             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_ceq0, size);
12072         }
12073         return;
12074     case 0xa: /* CMLT */
12075         gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_clt0, size);
12076         return;
12077     case 0xb:
12078         if (u) { /* ABS, NEG */
12079             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size);
12080         } else {
12081             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_abs, size);
12082         }
12083         return;
12084     }
12085
12086     if (size == 3) {
12087         /* All 64-bit element operations can be shared with scalar 2misc */
12088         int pass;
12089
12090         /* Coverity claims (size == 3 && !is_q) has been eliminated
12091          * from all paths leading to here.
12092          */
12093         tcg_debug_assert(is_q);
12094         for (pass = 0; pass < 2; pass++) {
12095             TCGv_i64 tcg_op = tcg_temp_new_i64();
12096             TCGv_i64 tcg_res = tcg_temp_new_i64();
12097
12098             read_vec_element(s, tcg_op, rn, pass, MO_64);
12099
12100             handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
12101                             tcg_rmode, tcg_fpstatus);
12102
12103             write_vec_element(s, tcg_res, rd, pass, MO_64);
12104         }
12105     } else {
12106         int pass;
12107
12108         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
12109             TCGv_i32 tcg_op = tcg_temp_new_i32();
12110             TCGv_i32 tcg_res = tcg_temp_new_i32();
12111
12112             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
12113
12114             if (size == 2) {
12115                 /* Special cases for 32 bit elements */
12116                 switch (opcode) {
12117                 case 0x4: /* CLS */
12118                     if (u) {
12119                         tcg_gen_clzi_i32(tcg_res, tcg_op, 32);
12120                     } else {
12121                         tcg_gen_clrsb_i32(tcg_res, tcg_op);
12122                     }
12123                     break;
12124                 case 0x7: /* SQABS, SQNEG */
12125                     if (u) {
12126                         gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op);
12127                     } else {
12128                         gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op);
12129                     }
12130                     break;
12131                 case 0x2f: /* FABS */
12132                     gen_helper_vfp_abss(tcg_res, tcg_op);
12133                     break;
12134                 case 0x6f: /* FNEG */
12135                     gen_helper_vfp_negs(tcg_res, tcg_op);
12136                     break;
12137                 case 0x7f: /* FSQRT */
12138                     gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
12139                     break;
12140                 case 0x1a: /* FCVTNS */
12141                 case 0x1b: /* FCVTMS */
12142                 case 0x1c: /* FCVTAS */
12143                 case 0x3a: /* FCVTPS */
12144                 case 0x3b: /* FCVTZS */
12145                     gen_helper_vfp_tosls(tcg_res, tcg_op,
12146                                          tcg_constant_i32(0), tcg_fpstatus);
12147                     break;
12148                 case 0x5a: /* FCVTNU */
12149                 case 0x5b: /* FCVTMU */
12150                 case 0x5c: /* FCVTAU */
12151                 case 0x7a: /* FCVTPU */
12152                 case 0x7b: /* FCVTZU */
12153                     gen_helper_vfp_touls(tcg_res, tcg_op,
12154                                          tcg_constant_i32(0), tcg_fpstatus);
12155                     break;
12156                 case 0x18: /* FRINTN */
12157                 case 0x19: /* FRINTM */
12158                 case 0x38: /* FRINTP */
12159                 case 0x39: /* FRINTZ */
12160                 case 0x58: /* FRINTA */
12161                 case 0x79: /* FRINTI */
12162                     gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
12163                     break;
12164                 case 0x59: /* FRINTX */
12165                     gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
12166                     break;
12167                 case 0x7c: /* URSQRTE */
12168                     gen_helper_rsqrte_u32(tcg_res, tcg_op);
12169                     break;
12170                 case 0x1e: /* FRINT32Z */
12171                 case 0x5e: /* FRINT32X */
12172                     gen_helper_frint32_s(tcg_res, tcg_op, tcg_fpstatus);
12173                     break;
12174                 case 0x1f: /* FRINT64Z */
12175                 case 0x5f: /* FRINT64X */
12176                     gen_helper_frint64_s(tcg_res, tcg_op, tcg_fpstatus);
12177                     break;
12178                 default:
12179                     g_assert_not_reached();
12180                 }
12181             } else {
12182                 /* Use helpers for 8 and 16 bit elements */
12183                 switch (opcode) {
12184                 case 0x5: /* CNT, RBIT */
12185                     /* For these two insns size is part of the opcode specifier
12186                      * (handled earlier); they always operate on byte elements.
12187                      */
12188                     if (u) {
12189                         gen_helper_neon_rbit_u8(tcg_res, tcg_op);
12190                     } else {
12191                         gen_helper_neon_cnt_u8(tcg_res, tcg_op);
12192                     }
12193                     break;
12194                 case 0x7: /* SQABS, SQNEG */
12195                 {
12196                     NeonGenOneOpEnvFn *genfn;
12197                     static NeonGenOneOpEnvFn * const fns[2][2] = {
12198                         { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
12199                         { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
12200                     };
12201                     genfn = fns[size][u];
12202                     genfn(tcg_res, cpu_env, tcg_op);
12203                     break;
12204                 }
12205                 case 0x4: /* CLS, CLZ */
12206                     if (u) {
12207                         if (size == 0) {
12208                             gen_helper_neon_clz_u8(tcg_res, tcg_op);
12209                         } else {
12210                             gen_helper_neon_clz_u16(tcg_res, tcg_op);
12211                         }
12212                     } else {
12213                         if (size == 0) {
12214                             gen_helper_neon_cls_s8(tcg_res, tcg_op);
12215                         } else {
12216                             gen_helper_neon_cls_s16(tcg_res, tcg_op);
12217                         }
12218                     }
12219                     break;
12220                 default:
12221                     g_assert_not_reached();
12222                 }
12223             }
12224
12225             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
12226         }
12227     }
12228     clear_vec_high(s, is_q, rd);
12229
12230     if (tcg_rmode) {
12231         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
12232     }
12233 }
12234
12235 /* AdvSIMD [scalar] two register miscellaneous (FP16)
12236  *
12237  *   31  30  29 28  27     24  23 22 21       17 16    12 11 10 9    5 4    0
12238  * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
12239  * | 0 | Q | U | S | 1 1 1 0 | a | 1 1 1 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
12240  * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
12241  *   mask: 1000 1111 0111 1110 0000 1100 0000 0000 0x8f7e 0c00
12242  *   val:  0000 1110 0111 1000 0000 1000 0000 0000 0x0e78 0800
12243  *
12244  * This actually covers two groups where scalar access is governed by
12245  * bit 28. A bunch of the instructions (float to integral) only exist
12246  * in the vector form and are un-allocated for the scalar decode. Also
12247  * in the scalar decode Q is always 1.
12248  */
12249 static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
12250 {
12251     int fpop, opcode, a, u;
12252     int rn, rd;
12253     bool is_q;
12254     bool is_scalar;
12255     bool only_in_vector = false;
12256
12257     int pass;
12258     TCGv_i32 tcg_rmode = NULL;
12259     TCGv_ptr tcg_fpstatus = NULL;
12260     bool need_fpst = true;
12261     int rmode = -1;
12262
12263     if (!dc_isar_feature(aa64_fp16, s)) {
12264         unallocated_encoding(s);
12265         return;
12266     }
12267
12268     rd = extract32(insn, 0, 5);
12269     rn = extract32(insn, 5, 5);
12270
12271     a = extract32(insn, 23, 1);
12272     u = extract32(insn, 29, 1);
12273     is_scalar = extract32(insn, 28, 1);
12274     is_q = extract32(insn, 30, 1);
12275
12276     opcode = extract32(insn, 12, 5);
12277     fpop = deposit32(opcode, 5, 1, a);
12278     fpop = deposit32(fpop, 6, 1, u);
12279
12280     switch (fpop) {
12281     case 0x1d: /* SCVTF */
12282     case 0x5d: /* UCVTF */
12283     {
12284         int elements;
12285
12286         if (is_scalar) {
12287             elements = 1;
12288         } else {
12289             elements = (is_q ? 8 : 4);
12290         }
12291
12292         if (!fp_access_check(s)) {
12293             return;
12294         }
12295         handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16);
12296         return;
12297     }
12298     break;
12299     case 0x2c: /* FCMGT (zero) */
12300     case 0x2d: /* FCMEQ (zero) */
12301     case 0x2e: /* FCMLT (zero) */
12302     case 0x6c: /* FCMGE (zero) */
12303     case 0x6d: /* FCMLE (zero) */
12304         handle_2misc_fcmp_zero(s, fpop, is_scalar, 0, is_q, MO_16, rn, rd);
12305         return;
12306     case 0x3d: /* FRECPE */
12307     case 0x3f: /* FRECPX */
12308         break;
12309     case 0x18: /* FRINTN */
12310         only_in_vector = true;
12311         rmode = FPROUNDING_TIEEVEN;
12312         break;
12313     case 0x19: /* FRINTM */
12314         only_in_vector = true;
12315         rmode = FPROUNDING_NEGINF;
12316         break;
12317     case 0x38: /* FRINTP */
12318         only_in_vector = true;
12319         rmode = FPROUNDING_POSINF;
12320         break;
12321     case 0x39: /* FRINTZ */
12322         only_in_vector = true;
12323         rmode = FPROUNDING_ZERO;
12324         break;
12325     case 0x58: /* FRINTA */
12326         only_in_vector = true;
12327         rmode = FPROUNDING_TIEAWAY;
12328         break;
12329     case 0x59: /* FRINTX */
12330     case 0x79: /* FRINTI */
12331         only_in_vector = true;
12332         /* current rounding mode */
12333         break;
12334     case 0x1a: /* FCVTNS */
12335         rmode = FPROUNDING_TIEEVEN;
12336         break;
12337     case 0x1b: /* FCVTMS */
12338         rmode = FPROUNDING_NEGINF;
12339         break;
12340     case 0x1c: /* FCVTAS */
12341         rmode = FPROUNDING_TIEAWAY;
12342         break;
12343     case 0x3a: /* FCVTPS */
12344         rmode = FPROUNDING_POSINF;
12345         break;
12346     case 0x3b: /* FCVTZS */
12347         rmode = FPROUNDING_ZERO;
12348         break;
12349     case 0x5a: /* FCVTNU */
12350         rmode = FPROUNDING_TIEEVEN;
12351         break;
12352     case 0x5b: /* FCVTMU */
12353         rmode = FPROUNDING_NEGINF;
12354         break;
12355     case 0x5c: /* FCVTAU */
12356         rmode = FPROUNDING_TIEAWAY;
12357         break;
12358     case 0x7a: /* FCVTPU */
12359         rmode = FPROUNDING_POSINF;
12360         break;
12361     case 0x7b: /* FCVTZU */
12362         rmode = FPROUNDING_ZERO;
12363         break;
12364     case 0x2f: /* FABS */
12365     case 0x6f: /* FNEG */
12366         need_fpst = false;
12367         break;
12368     case 0x7d: /* FRSQRTE */
12369     case 0x7f: /* FSQRT (vector) */
12370         break;
12371     default:
12372         unallocated_encoding(s);
12373         return;
12374     }
12375
12376
12377     /* Check additional constraints for the scalar encoding */
12378     if (is_scalar) {
12379         if (!is_q) {
12380             unallocated_encoding(s);
12381             return;
12382         }
12383         /* FRINTxx is only in the vector form */
12384         if (only_in_vector) {
12385             unallocated_encoding(s);
12386             return;
12387         }
12388     }
12389
12390     if (!fp_access_check(s)) {
12391         return;
12392     }
12393
12394     if (rmode >= 0 || need_fpst) {
12395         tcg_fpstatus = fpstatus_ptr(FPST_FPCR_F16);
12396     }
12397
12398     if (rmode >= 0) {
12399         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
12400     }
12401
12402     if (is_scalar) {
12403         TCGv_i32 tcg_op = read_fp_hreg(s, rn);
12404         TCGv_i32 tcg_res = tcg_temp_new_i32();
12405
12406         switch (fpop) {
12407         case 0x1a: /* FCVTNS */
12408         case 0x1b: /* FCVTMS */
12409         case 0x1c: /* FCVTAS */
12410         case 0x3a: /* FCVTPS */
12411         case 0x3b: /* FCVTZS */
12412             gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
12413             break;
12414         case 0x3d: /* FRECPE */
12415             gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
12416             break;
12417         case 0x3f: /* FRECPX */
12418             gen_helper_frecpx_f16(tcg_res, tcg_op, tcg_fpstatus);
12419             break;
12420         case 0x5a: /* FCVTNU */
12421         case 0x5b: /* FCVTMU */
12422         case 0x5c: /* FCVTAU */
12423         case 0x7a: /* FCVTPU */
12424         case 0x7b: /* FCVTZU */
12425             gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
12426             break;
12427         case 0x6f: /* FNEG */
12428             tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
12429             break;
12430         case 0x7d: /* FRSQRTE */
12431             gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
12432             break;
12433         default:
12434             g_assert_not_reached();
12435         }
12436
12437         /* limit any sign extension going on */
12438         tcg_gen_andi_i32(tcg_res, tcg_res, 0xffff);
12439         write_fp_sreg(s, rd, tcg_res);
12440     } else {
12441         for (pass = 0; pass < (is_q ? 8 : 4); pass++) {
12442             TCGv_i32 tcg_op = tcg_temp_new_i32();
12443             TCGv_i32 tcg_res = tcg_temp_new_i32();
12444
12445             read_vec_element_i32(s, tcg_op, rn, pass, MO_16);
12446
12447             switch (fpop) {
12448             case 0x1a: /* FCVTNS */
12449             case 0x1b: /* FCVTMS */
12450             case 0x1c: /* FCVTAS */
12451             case 0x3a: /* FCVTPS */
12452             case 0x3b: /* FCVTZS */
12453                 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
12454                 break;
12455             case 0x3d: /* FRECPE */
12456                 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
12457                 break;
12458             case 0x5a: /* FCVTNU */
12459             case 0x5b: /* FCVTMU */
12460             case 0x5c: /* FCVTAU */
12461             case 0x7a: /* FCVTPU */
12462             case 0x7b: /* FCVTZU */
12463                 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
12464                 break;
12465             case 0x18: /* FRINTN */
12466             case 0x19: /* FRINTM */
12467             case 0x38: /* FRINTP */
12468             case 0x39: /* FRINTZ */
12469             case 0x58: /* FRINTA */
12470             case 0x79: /* FRINTI */
12471                 gen_helper_advsimd_rinth(tcg_res, tcg_op, tcg_fpstatus);
12472                 break;
12473             case 0x59: /* FRINTX */
12474                 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, tcg_fpstatus);
12475                 break;
12476             case 0x2f: /* FABS */
12477                 tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
12478                 break;
12479             case 0x6f: /* FNEG */
12480                 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
12481                 break;
12482             case 0x7d: /* FRSQRTE */
12483                 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
12484                 break;
12485             case 0x7f: /* FSQRT */
12486                 gen_helper_sqrt_f16(tcg_res, tcg_op, tcg_fpstatus);
12487                 break;
12488             default:
12489                 g_assert_not_reached();
12490             }
12491
12492             write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
12493         }
12494
12495         clear_vec_high(s, is_q, rd);
12496     }
12497
12498     if (tcg_rmode) {
12499         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
12500     }
12501 }
12502
12503 /* AdvSIMD scalar x indexed element
12504  *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
12505  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
12506  * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
12507  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
12508  * AdvSIMD vector x indexed element
12509  *   31  30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
12510  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
12511  * | 0 | Q | U | 0 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
12512  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
12513  */
12514 static void disas_simd_indexed(DisasContext *s, uint32_t insn)
12515 {
12516     /* This encoding has two kinds of instruction:
12517      *  normal, where we perform elt x idxelt => elt for each
12518      *     element in the vector
12519      *  long, where we perform elt x idxelt and generate a result of
12520      *     double the width of the input element
12521      * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
12522      */
12523     bool is_scalar = extract32(insn, 28, 1);
12524     bool is_q = extract32(insn, 30, 1);
12525     bool u = extract32(insn, 29, 1);
12526     int size = extract32(insn, 22, 2);
12527     int l = extract32(insn, 21, 1);
12528     int m = extract32(insn, 20, 1);
12529     /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
12530     int rm = extract32(insn, 16, 4);
12531     int opcode = extract32(insn, 12, 4);
12532     int h = extract32(insn, 11, 1);
12533     int rn = extract32(insn, 5, 5);
12534     int rd = extract32(insn, 0, 5);
12535     bool is_long = false;
12536     int is_fp = 0;
12537     bool is_fp16 = false;
12538     int index;
12539     TCGv_ptr fpst;
12540
12541     switch (16 * u + opcode) {
12542     case 0x08: /* MUL */
12543     case 0x10: /* MLA */
12544     case 0x14: /* MLS */
12545         if (is_scalar) {
12546             unallocated_encoding(s);
12547             return;
12548         }
12549         break;
12550     case 0x02: /* SMLAL, SMLAL2 */
12551     case 0x12: /* UMLAL, UMLAL2 */
12552     case 0x06: /* SMLSL, SMLSL2 */
12553     case 0x16: /* UMLSL, UMLSL2 */
12554     case 0x0a: /* SMULL, SMULL2 */
12555     case 0x1a: /* UMULL, UMULL2 */
12556         if (is_scalar) {
12557             unallocated_encoding(s);
12558             return;
12559         }
12560         is_long = true;
12561         break;
12562     case 0x03: /* SQDMLAL, SQDMLAL2 */
12563     case 0x07: /* SQDMLSL, SQDMLSL2 */
12564     case 0x0b: /* SQDMULL, SQDMULL2 */
12565         is_long = true;
12566         break;
12567     case 0x0c: /* SQDMULH */
12568     case 0x0d: /* SQRDMULH */
12569         break;
12570     case 0x01: /* FMLA */
12571     case 0x05: /* FMLS */
12572     case 0x09: /* FMUL */
12573     case 0x19: /* FMULX */
12574         is_fp = 1;
12575         break;
12576     case 0x1d: /* SQRDMLAH */
12577     case 0x1f: /* SQRDMLSH */
12578         if (!dc_isar_feature(aa64_rdm, s)) {
12579             unallocated_encoding(s);
12580             return;
12581         }
12582         break;
12583     case 0x0e: /* SDOT */
12584     case 0x1e: /* UDOT */
12585         if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_dp, s)) {
12586             unallocated_encoding(s);
12587             return;
12588         }
12589         break;
12590     case 0x0f:
12591         switch (size) {
12592         case 0: /* SUDOT */
12593         case 2: /* USDOT */
12594             if (is_scalar || !dc_isar_feature(aa64_i8mm, s)) {
12595                 unallocated_encoding(s);
12596                 return;
12597             }
12598             size = MO_32;
12599             break;
12600         case 1: /* BFDOT */
12601             if (is_scalar || !dc_isar_feature(aa64_bf16, s)) {
12602                 unallocated_encoding(s);
12603                 return;
12604             }
12605             size = MO_32;
12606             break;
12607         case 3: /* BFMLAL{B,T} */
12608             if (is_scalar || !dc_isar_feature(aa64_bf16, s)) {
12609                 unallocated_encoding(s);
12610                 return;
12611             }
12612             /* can't set is_fp without other incorrect size checks */
12613             size = MO_16;
12614             break;
12615         default:
12616             unallocated_encoding(s);
12617             return;
12618         }
12619         break;
12620     case 0x11: /* FCMLA #0 */
12621     case 0x13: /* FCMLA #90 */
12622     case 0x15: /* FCMLA #180 */
12623     case 0x17: /* FCMLA #270 */
12624         if (is_scalar || !dc_isar_feature(aa64_fcma, s)) {
12625             unallocated_encoding(s);
12626             return;
12627         }
12628         is_fp = 2;
12629         break;
12630     case 0x00: /* FMLAL */
12631     case 0x04: /* FMLSL */
12632     case 0x18: /* FMLAL2 */
12633     case 0x1c: /* FMLSL2 */
12634         if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_fhm, s)) {
12635             unallocated_encoding(s);
12636             return;
12637         }
12638         size = MO_16;
12639         /* is_fp, but we pass cpu_env not fp_status.  */
12640         break;
12641     default:
12642         unallocated_encoding(s);
12643         return;
12644     }
12645
12646     switch (is_fp) {
12647     case 1: /* normal fp */
12648         /* convert insn encoded size to MemOp size */
12649         switch (size) {
12650         case 0: /* half-precision */
12651             size = MO_16;
12652             is_fp16 = true;
12653             break;
12654         case MO_32: /* single precision */
12655         case MO_64: /* double precision */
12656             break;
12657         default:
12658             unallocated_encoding(s);
12659             return;
12660         }
12661         break;
12662
12663     case 2: /* complex fp */
12664         /* Each indexable element is a complex pair.  */
12665         size += 1;
12666         switch (size) {
12667         case MO_32:
12668             if (h && !is_q) {
12669                 unallocated_encoding(s);
12670                 return;
12671             }
12672             is_fp16 = true;
12673             break;
12674         case MO_64:
12675             break;
12676         default:
12677             unallocated_encoding(s);
12678             return;
12679         }
12680         break;
12681
12682     default: /* integer */
12683         switch (size) {
12684         case MO_8:
12685         case MO_64:
12686             unallocated_encoding(s);
12687             return;
12688         }
12689         break;
12690     }
12691     if (is_fp16 && !dc_isar_feature(aa64_fp16, s)) {
12692         unallocated_encoding(s);
12693         return;
12694     }
12695
12696     /* Given MemOp size, adjust register and indexing.  */
12697     switch (size) {
12698     case MO_16:
12699         index = h << 2 | l << 1 | m;
12700         break;
12701     case MO_32:
12702         index = h << 1 | l;
12703         rm |= m << 4;
12704         break;
12705     case MO_64:
12706         if (l || !is_q) {
12707             unallocated_encoding(s);
12708             return;
12709         }
12710         index = h;
12711         rm |= m << 4;
12712         break;
12713     default:
12714         g_assert_not_reached();
12715     }
12716
12717     if (!fp_access_check(s)) {
12718         return;
12719     }
12720
12721     if (is_fp) {
12722         fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
12723     } else {
12724         fpst = NULL;
12725     }
12726
12727     switch (16 * u + opcode) {
12728     case 0x0e: /* SDOT */
12729     case 0x1e: /* UDOT */
12730         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
12731                          u ? gen_helper_gvec_udot_idx_b
12732                          : gen_helper_gvec_sdot_idx_b);
12733         return;
12734     case 0x0f:
12735         switch (extract32(insn, 22, 2)) {
12736         case 0: /* SUDOT */
12737             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
12738                              gen_helper_gvec_sudot_idx_b);
12739             return;
12740         case 1: /* BFDOT */
12741             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
12742                              gen_helper_gvec_bfdot_idx);
12743             return;
12744         case 2: /* USDOT */
12745             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
12746                              gen_helper_gvec_usdot_idx_b);
12747             return;
12748         case 3: /* BFMLAL{B,T} */
12749             gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, 0, (index << 1) | is_q,
12750                               gen_helper_gvec_bfmlal_idx);
12751             return;
12752         }
12753         g_assert_not_reached();
12754     case 0x11: /* FCMLA #0 */
12755     case 0x13: /* FCMLA #90 */
12756     case 0x15: /* FCMLA #180 */
12757     case 0x17: /* FCMLA #270 */
12758         {
12759             int rot = extract32(insn, 13, 2);
12760             int data = (index << 2) | rot;
12761             tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
12762                                vec_full_reg_offset(s, rn),
12763                                vec_full_reg_offset(s, rm),
12764                                vec_full_reg_offset(s, rd), fpst,
12765                                is_q ? 16 : 8, vec_full_reg_size(s), data,
12766                                size == MO_64
12767                                ? gen_helper_gvec_fcmlas_idx
12768                                : gen_helper_gvec_fcmlah_idx);
12769         }
12770         return;
12771
12772     case 0x00: /* FMLAL */
12773     case 0x04: /* FMLSL */
12774     case 0x18: /* FMLAL2 */
12775     case 0x1c: /* FMLSL2 */
12776         {
12777             int is_s = extract32(opcode, 2, 1);
12778             int is_2 = u;
12779             int data = (index << 2) | (is_2 << 1) | is_s;
12780             tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
12781                                vec_full_reg_offset(s, rn),
12782                                vec_full_reg_offset(s, rm), cpu_env,
12783                                is_q ? 16 : 8, vec_full_reg_size(s),
12784                                data, gen_helper_gvec_fmlal_idx_a64);
12785         }
12786         return;
12787
12788     case 0x08: /* MUL */
12789         if (!is_long && !is_scalar) {
12790             static gen_helper_gvec_3 * const fns[3] = {
12791                 gen_helper_gvec_mul_idx_h,
12792                 gen_helper_gvec_mul_idx_s,
12793                 gen_helper_gvec_mul_idx_d,
12794             };
12795             tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
12796                                vec_full_reg_offset(s, rn),
12797                                vec_full_reg_offset(s, rm),
12798                                is_q ? 16 : 8, vec_full_reg_size(s),
12799                                index, fns[size - 1]);
12800             return;
12801         }
12802         break;
12803
12804     case 0x10: /* MLA */
12805         if (!is_long && !is_scalar) {
12806             static gen_helper_gvec_4 * const fns[3] = {
12807                 gen_helper_gvec_mla_idx_h,
12808                 gen_helper_gvec_mla_idx_s,
12809                 gen_helper_gvec_mla_idx_d,
12810             };
12811             tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
12812                                vec_full_reg_offset(s, rn),
12813                                vec_full_reg_offset(s, rm),
12814                                vec_full_reg_offset(s, rd),
12815                                is_q ? 16 : 8, vec_full_reg_size(s),
12816                                index, fns[size - 1]);
12817             return;
12818         }
12819         break;
12820
12821     case 0x14: /* MLS */
12822         if (!is_long && !is_scalar) {
12823             static gen_helper_gvec_4 * const fns[3] = {
12824                 gen_helper_gvec_mls_idx_h,
12825                 gen_helper_gvec_mls_idx_s,
12826                 gen_helper_gvec_mls_idx_d,
12827             };
12828             tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
12829                                vec_full_reg_offset(s, rn),
12830                                vec_full_reg_offset(s, rm),
12831                                vec_full_reg_offset(s, rd),
12832                                is_q ? 16 : 8, vec_full_reg_size(s),
12833                                index, fns[size - 1]);
12834             return;
12835         }
12836         break;
12837     }
12838
12839     if (size == 3) {
12840         TCGv_i64 tcg_idx = tcg_temp_new_i64();
12841         int pass;
12842
12843         assert(is_fp && is_q && !is_long);
12844
12845         read_vec_element(s, tcg_idx, rm, index, MO_64);
12846
12847         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
12848             TCGv_i64 tcg_op = tcg_temp_new_i64();
12849             TCGv_i64 tcg_res = tcg_temp_new_i64();
12850
12851             read_vec_element(s, tcg_op, rn, pass, MO_64);
12852
12853             switch (16 * u + opcode) {
12854             case 0x05: /* FMLS */
12855                 /* As usual for ARM, separate negation for fused multiply-add */
12856                 gen_helper_vfp_negd(tcg_op, tcg_op);
12857                 /* fall through */
12858             case 0x01: /* FMLA */
12859                 read_vec_element(s, tcg_res, rd, pass, MO_64);
12860                 gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
12861                 break;
12862             case 0x09: /* FMUL */
12863                 gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
12864                 break;
12865             case 0x19: /* FMULX */
12866                 gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
12867                 break;
12868             default:
12869                 g_assert_not_reached();
12870             }
12871
12872             write_vec_element(s, tcg_res, rd, pass, MO_64);
12873         }
12874
12875         clear_vec_high(s, !is_scalar, rd);
12876     } else if (!is_long) {
12877         /* 32 bit floating point, or 16 or 32 bit integer.
12878          * For the 16 bit scalar case we use the usual Neon helpers and
12879          * rely on the fact that 0 op 0 == 0 with no side effects.
12880          */
12881         TCGv_i32 tcg_idx = tcg_temp_new_i32();
12882         int pass, maxpasses;
12883
12884         if (is_scalar) {
12885             maxpasses = 1;
12886         } else {
12887             maxpasses = is_q ? 4 : 2;
12888         }
12889
12890         read_vec_element_i32(s, tcg_idx, rm, index, size);
12891
12892         if (size == 1 && !is_scalar) {
12893             /* The simplest way to handle the 16x16 indexed ops is to duplicate
12894              * the index into both halves of the 32 bit tcg_idx and then use
12895              * the usual Neon helpers.
12896              */
12897             tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
12898         }
12899
12900         for (pass = 0; pass < maxpasses; pass++) {
12901             TCGv_i32 tcg_op = tcg_temp_new_i32();
12902             TCGv_i32 tcg_res = tcg_temp_new_i32();
12903
12904             read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
12905
12906             switch (16 * u + opcode) {
12907             case 0x08: /* MUL */
12908             case 0x10: /* MLA */
12909             case 0x14: /* MLS */
12910             {
12911                 static NeonGenTwoOpFn * const fns[2][2] = {
12912                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
12913                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
12914                 };
12915                 NeonGenTwoOpFn *genfn;
12916                 bool is_sub = opcode == 0x4;
12917
12918                 if (size == 1) {
12919                     gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
12920                 } else {
12921                     tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
12922                 }
12923                 if (opcode == 0x8) {
12924                     break;
12925                 }
12926                 read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
12927                 genfn = fns[size - 1][is_sub];
12928                 genfn(tcg_res, tcg_op, tcg_res);
12929                 break;
12930             }
12931             case 0x05: /* FMLS */
12932             case 0x01: /* FMLA */
12933                 read_vec_element_i32(s, tcg_res, rd, pass,
12934                                      is_scalar ? size : MO_32);
12935                 switch (size) {
12936                 case 1:
12937                     if (opcode == 0x5) {
12938                         /* As usual for ARM, separate negation for fused
12939                          * multiply-add */
12940                         tcg_gen_xori_i32(tcg_op, tcg_op, 0x80008000);
12941                     }
12942                     if (is_scalar) {
12943                         gen_helper_advsimd_muladdh(tcg_res, tcg_op, tcg_idx,
12944                                                    tcg_res, fpst);
12945                     } else {
12946                         gen_helper_advsimd_muladd2h(tcg_res, tcg_op, tcg_idx,
12947                                                     tcg_res, fpst);
12948                     }
12949                     break;
12950                 case 2:
12951                     if (opcode == 0x5) {
12952                         /* As usual for ARM, separate negation for
12953                          * fused multiply-add */
12954                         tcg_gen_xori_i32(tcg_op, tcg_op, 0x80000000);
12955                     }
12956                     gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx,
12957                                            tcg_res, fpst);
12958                     break;
12959                 default:
12960                     g_assert_not_reached();
12961                 }
12962                 break;
12963             case 0x09: /* FMUL */
12964                 switch (size) {
12965                 case 1:
12966                     if (is_scalar) {
12967                         gen_helper_advsimd_mulh(tcg_res, tcg_op,
12968                                                 tcg_idx, fpst);
12969                     } else {
12970                         gen_helper_advsimd_mul2h(tcg_res, tcg_op,
12971                                                  tcg_idx, fpst);
12972                     }
12973                     break;
12974                 case 2:
12975                     gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
12976                     break;
12977                 default:
12978                     g_assert_not_reached();
12979                 }
12980                 break;
12981             case 0x19: /* FMULX */
12982                 switch (size) {
12983                 case 1:
12984                     if (is_scalar) {
12985                         gen_helper_advsimd_mulxh(tcg_res, tcg_op,
12986                                                  tcg_idx, fpst);
12987                     } else {
12988                         gen_helper_advsimd_mulx2h(tcg_res, tcg_op,
12989                                                   tcg_idx, fpst);
12990                     }
12991                     break;
12992                 case 2:
12993                     gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
12994                     break;
12995                 default:
12996                     g_assert_not_reached();
12997                 }
12998                 break;
12999             case 0x0c: /* SQDMULH */
13000                 if (size == 1) {
13001                     gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
13002                                                tcg_op, tcg_idx);
13003                 } else {
13004                     gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
13005                                                tcg_op, tcg_idx);
13006                 }
13007                 break;
13008             case 0x0d: /* SQRDMULH */
13009                 if (size == 1) {
13010                     gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
13011                                                 tcg_op, tcg_idx);
13012                 } else {
13013                     gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
13014                                                 tcg_op, tcg_idx);
13015                 }
13016                 break;
13017             case 0x1d: /* SQRDMLAH */
13018                 read_vec_element_i32(s, tcg_res, rd, pass,
13019                                      is_scalar ? size : MO_32);
13020                 if (size == 1) {
13021                     gen_helper_neon_qrdmlah_s16(tcg_res, cpu_env,
13022                                                 tcg_op, tcg_idx, tcg_res);
13023                 } else {
13024                     gen_helper_neon_qrdmlah_s32(tcg_res, cpu_env,
13025                                                 tcg_op, tcg_idx, tcg_res);
13026                 }
13027                 break;
13028             case 0x1f: /* SQRDMLSH */
13029                 read_vec_element_i32(s, tcg_res, rd, pass,
13030                                      is_scalar ? size : MO_32);
13031                 if (size == 1) {
13032                     gen_helper_neon_qrdmlsh_s16(tcg_res, cpu_env,
13033                                                 tcg_op, tcg_idx, tcg_res);
13034                 } else {
13035                     gen_helper_neon_qrdmlsh_s32(tcg_res, cpu_env,
13036                                                 tcg_op, tcg_idx, tcg_res);
13037                 }
13038                 break;
13039             default:
13040                 g_assert_not_reached();
13041             }
13042
13043             if (is_scalar) {
13044                 write_fp_sreg(s, rd, tcg_res);
13045             } else {
13046                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
13047             }
13048         }
13049
13050         clear_vec_high(s, is_q, rd);
13051     } else {
13052         /* long ops: 16x16->32 or 32x32->64 */
13053         TCGv_i64 tcg_res[2];
13054         int pass;
13055         bool satop = extract32(opcode, 0, 1);
13056         MemOp memop = MO_32;
13057
13058         if (satop || !u) {
13059             memop |= MO_SIGN;
13060         }
13061
13062         if (size == 2) {
13063             TCGv_i64 tcg_idx = tcg_temp_new_i64();
13064
13065             read_vec_element(s, tcg_idx, rm, index, memop);
13066
13067             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13068                 TCGv_i64 tcg_op = tcg_temp_new_i64();
13069                 TCGv_i64 tcg_passres;
13070                 int passelt;
13071
13072                 if (is_scalar) {
13073                     passelt = 0;
13074                 } else {
13075                     passelt = pass + (is_q * 2);
13076                 }
13077
13078                 read_vec_element(s, tcg_op, rn, passelt, memop);
13079
13080                 tcg_res[pass] = tcg_temp_new_i64();
13081
13082                 if (opcode == 0xa || opcode == 0xb) {
13083                     /* Non-accumulating ops */
13084                     tcg_passres = tcg_res[pass];
13085                 } else {
13086                     tcg_passres = tcg_temp_new_i64();
13087                 }
13088
13089                 tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
13090
13091                 if (satop) {
13092                     /* saturating, doubling */
13093                     gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
13094                                                       tcg_passres, tcg_passres);
13095                 }
13096
13097                 if (opcode == 0xa || opcode == 0xb) {
13098                     continue;
13099                 }
13100
13101                 /* Accumulating op: handle accumulate step */
13102                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13103
13104                 switch (opcode) {
13105                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
13106                     tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
13107                     break;
13108                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
13109                     tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
13110                     break;
13111                 case 0x7: /* SQDMLSL, SQDMLSL2 */
13112                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
13113                     /* fall through */
13114                 case 0x3: /* SQDMLAL, SQDMLAL2 */
13115                     gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
13116                                                       tcg_res[pass],
13117                                                       tcg_passres);
13118                     break;
13119                 default:
13120                     g_assert_not_reached();
13121                 }
13122             }
13123
13124             clear_vec_high(s, !is_scalar, rd);
13125         } else {
13126             TCGv_i32 tcg_idx = tcg_temp_new_i32();
13127
13128             assert(size == 1);
13129             read_vec_element_i32(s, tcg_idx, rm, index, size);
13130
13131             if (!is_scalar) {
13132                 /* The simplest way to handle the 16x16 indexed ops is to
13133                  * duplicate the index into both halves of the 32 bit tcg_idx
13134                  * and then use the usual Neon helpers.
13135                  */
13136                 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
13137             }
13138
13139             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13140                 TCGv_i32 tcg_op = tcg_temp_new_i32();
13141                 TCGv_i64 tcg_passres;
13142
13143                 if (is_scalar) {
13144                     read_vec_element_i32(s, tcg_op, rn, pass, size);
13145                 } else {
13146                     read_vec_element_i32(s, tcg_op, rn,
13147                                          pass + (is_q * 2), MO_32);
13148                 }
13149
13150                 tcg_res[pass] = tcg_temp_new_i64();
13151
13152                 if (opcode == 0xa || opcode == 0xb) {
13153                     /* Non-accumulating ops */
13154                     tcg_passres = tcg_res[pass];
13155                 } else {
13156                     tcg_passres = tcg_temp_new_i64();
13157                 }
13158
13159                 if (memop & MO_SIGN) {
13160                     gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
13161                 } else {
13162                     gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
13163                 }
13164                 if (satop) {
13165                     gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
13166                                                       tcg_passres, tcg_passres);
13167                 }
13168
13169                 if (opcode == 0xa || opcode == 0xb) {
13170                     continue;
13171                 }
13172
13173                 /* Accumulating op: handle accumulate step */
13174                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13175
13176                 switch (opcode) {
13177                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
13178                     gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
13179                                              tcg_passres);
13180                     break;
13181                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
13182                     gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
13183                                              tcg_passres);
13184                     break;
13185                 case 0x7: /* SQDMLSL, SQDMLSL2 */
13186                     gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
13187                     /* fall through */
13188                 case 0x3: /* SQDMLAL, SQDMLAL2 */
13189                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
13190                                                       tcg_res[pass],
13191                                                       tcg_passres);
13192                     break;
13193                 default:
13194                     g_assert_not_reached();
13195                 }
13196             }
13197
13198             if (is_scalar) {
13199                 tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
13200             }
13201         }
13202
13203         if (is_scalar) {
13204             tcg_res[1] = tcg_constant_i64(0);
13205         }
13206
13207         for (pass = 0; pass < 2; pass++) {
13208             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13209         }
13210     }
13211 }
13212
13213 /* Crypto AES
13214  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
13215  * +-----------------+------+-----------+--------+-----+------+------+
13216  * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
13217  * +-----------------+------+-----------+--------+-----+------+------+
13218  */
13219 static void disas_crypto_aes(DisasContext *s, uint32_t insn)
13220 {
13221     int size = extract32(insn, 22, 2);
13222     int opcode = extract32(insn, 12, 5);
13223     int rn = extract32(insn, 5, 5);
13224     int rd = extract32(insn, 0, 5);
13225     gen_helper_gvec_2 *genfn2 = NULL;
13226     gen_helper_gvec_3 *genfn3 = NULL;
13227
13228     if (!dc_isar_feature(aa64_aes, s) || size != 0) {
13229         unallocated_encoding(s);
13230         return;
13231     }
13232
13233     switch (opcode) {
13234     case 0x4: /* AESE */
13235         genfn3 = gen_helper_crypto_aese;
13236         break;
13237     case 0x6: /* AESMC */
13238         genfn2 = gen_helper_crypto_aesmc;
13239         break;
13240     case 0x5: /* AESD */
13241         genfn3 = gen_helper_crypto_aesd;
13242         break;
13243     case 0x7: /* AESIMC */
13244         genfn2 = gen_helper_crypto_aesimc;
13245         break;
13246     default:
13247         unallocated_encoding(s);
13248         return;
13249     }
13250
13251     if (!fp_access_check(s)) {
13252         return;
13253     }
13254     if (genfn2) {
13255         gen_gvec_op2_ool(s, true, rd, rn, 0, genfn2);
13256     } else {
13257         gen_gvec_op3_ool(s, true, rd, rd, rn, 0, genfn3);
13258     }
13259 }
13260
13261 /* Crypto three-reg SHA
13262  *  31             24 23  22  21 20  16  15 14    12 11 10 9    5 4    0
13263  * +-----------------+------+---+------+---+--------+-----+------+------+
13264  * | 0 1 0 1 1 1 1 0 | size | 0 |  Rm  | 0 | opcode | 0 0 |  Rn  |  Rd  |
13265  * +-----------------+------+---+------+---+--------+-----+------+------+
13266  */
13267 static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
13268 {
13269     int size = extract32(insn, 22, 2);
13270     int opcode = extract32(insn, 12, 3);
13271     int rm = extract32(insn, 16, 5);
13272     int rn = extract32(insn, 5, 5);
13273     int rd = extract32(insn, 0, 5);
13274     gen_helper_gvec_3 *genfn;
13275     bool feature;
13276
13277     if (size != 0) {
13278         unallocated_encoding(s);
13279         return;
13280     }
13281
13282     switch (opcode) {
13283     case 0: /* SHA1C */
13284         genfn = gen_helper_crypto_sha1c;
13285         feature = dc_isar_feature(aa64_sha1, s);
13286         break;
13287     case 1: /* SHA1P */
13288         genfn = gen_helper_crypto_sha1p;
13289         feature = dc_isar_feature(aa64_sha1, s);
13290         break;
13291     case 2: /* SHA1M */
13292         genfn = gen_helper_crypto_sha1m;
13293         feature = dc_isar_feature(aa64_sha1, s);
13294         break;
13295     case 3: /* SHA1SU0 */
13296         genfn = gen_helper_crypto_sha1su0;
13297         feature = dc_isar_feature(aa64_sha1, s);
13298         break;
13299     case 4: /* SHA256H */
13300         genfn = gen_helper_crypto_sha256h;
13301         feature = dc_isar_feature(aa64_sha256, s);
13302         break;
13303     case 5: /* SHA256H2 */
13304         genfn = gen_helper_crypto_sha256h2;
13305         feature = dc_isar_feature(aa64_sha256, s);
13306         break;
13307     case 6: /* SHA256SU1 */
13308         genfn = gen_helper_crypto_sha256su1;
13309         feature = dc_isar_feature(aa64_sha256, s);
13310         break;
13311     default:
13312         unallocated_encoding(s);
13313         return;
13314     }
13315
13316     if (!feature) {
13317         unallocated_encoding(s);
13318         return;
13319     }
13320
13321     if (!fp_access_check(s)) {
13322         return;
13323     }
13324     gen_gvec_op3_ool(s, true, rd, rn, rm, 0, genfn);
13325 }
13326
13327 /* Crypto two-reg SHA
13328  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
13329  * +-----------------+------+-----------+--------+-----+------+------+
13330  * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
13331  * +-----------------+------+-----------+--------+-----+------+------+
13332  */
13333 static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
13334 {
13335     int size = extract32(insn, 22, 2);
13336     int opcode = extract32(insn, 12, 5);
13337     int rn = extract32(insn, 5, 5);
13338     int rd = extract32(insn, 0, 5);
13339     gen_helper_gvec_2 *genfn;
13340     bool feature;
13341
13342     if (size != 0) {
13343         unallocated_encoding(s);
13344         return;
13345     }
13346
13347     switch (opcode) {
13348     case 0: /* SHA1H */
13349         feature = dc_isar_feature(aa64_sha1, s);
13350         genfn = gen_helper_crypto_sha1h;
13351         break;
13352     case 1: /* SHA1SU1 */
13353         feature = dc_isar_feature(aa64_sha1, s);
13354         genfn = gen_helper_crypto_sha1su1;
13355         break;
13356     case 2: /* SHA256SU0 */
13357         feature = dc_isar_feature(aa64_sha256, s);
13358         genfn = gen_helper_crypto_sha256su0;
13359         break;
13360     default:
13361         unallocated_encoding(s);
13362         return;
13363     }
13364
13365     if (!feature) {
13366         unallocated_encoding(s);
13367         return;
13368     }
13369
13370     if (!fp_access_check(s)) {
13371         return;
13372     }
13373     gen_gvec_op2_ool(s, true, rd, rn, 0, genfn);
13374 }
13375
13376 static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
13377 {
13378     tcg_gen_rotli_i64(d, m, 1);
13379     tcg_gen_xor_i64(d, d, n);
13380 }
13381
13382 static void gen_rax1_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m)
13383 {
13384     tcg_gen_rotli_vec(vece, d, m, 1);
13385     tcg_gen_xor_vec(vece, d, d, n);
13386 }
13387
13388 void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
13389                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
13390 {
13391     static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 };
13392     static const GVecGen3 op = {
13393         .fni8 = gen_rax1_i64,
13394         .fniv = gen_rax1_vec,
13395         .opt_opc = vecop_list,
13396         .fno = gen_helper_crypto_rax1,
13397         .vece = MO_64,
13398     };
13399     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op);
13400 }
13401
13402 /* Crypto three-reg SHA512
13403  *  31                   21 20  16 15  14  13 12  11  10  9    5 4    0
13404  * +-----------------------+------+---+---+-----+--------+------+------+
13405  * | 1 1 0 0 1 1 1 0 0 1 1 |  Rm  | 1 | O | 0 0 | opcode |  Rn  |  Rd  |
13406  * +-----------------------+------+---+---+-----+--------+------+------+
13407  */
13408 static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
13409 {
13410     int opcode = extract32(insn, 10, 2);
13411     int o =  extract32(insn, 14, 1);
13412     int rm = extract32(insn, 16, 5);
13413     int rn = extract32(insn, 5, 5);
13414     int rd = extract32(insn, 0, 5);
13415     bool feature;
13416     gen_helper_gvec_3 *oolfn = NULL;
13417     GVecGen3Fn *gvecfn = NULL;
13418
13419     if (o == 0) {
13420         switch (opcode) {
13421         case 0: /* SHA512H */
13422             feature = dc_isar_feature(aa64_sha512, s);
13423             oolfn = gen_helper_crypto_sha512h;
13424             break;
13425         case 1: /* SHA512H2 */
13426             feature = dc_isar_feature(aa64_sha512, s);
13427             oolfn = gen_helper_crypto_sha512h2;
13428             break;
13429         case 2: /* SHA512SU1 */
13430             feature = dc_isar_feature(aa64_sha512, s);
13431             oolfn = gen_helper_crypto_sha512su1;
13432             break;
13433         case 3: /* RAX1 */
13434             feature = dc_isar_feature(aa64_sha3, s);
13435             gvecfn = gen_gvec_rax1;
13436             break;
13437         default:
13438             g_assert_not_reached();
13439         }
13440     } else {
13441         switch (opcode) {
13442         case 0: /* SM3PARTW1 */
13443             feature = dc_isar_feature(aa64_sm3, s);
13444             oolfn = gen_helper_crypto_sm3partw1;
13445             break;
13446         case 1: /* SM3PARTW2 */
13447             feature = dc_isar_feature(aa64_sm3, s);
13448             oolfn = gen_helper_crypto_sm3partw2;
13449             break;
13450         case 2: /* SM4EKEY */
13451             feature = dc_isar_feature(aa64_sm4, s);
13452             oolfn = gen_helper_crypto_sm4ekey;
13453             break;
13454         default:
13455             unallocated_encoding(s);
13456             return;
13457         }
13458     }
13459
13460     if (!feature) {
13461         unallocated_encoding(s);
13462         return;
13463     }
13464
13465     if (!fp_access_check(s)) {
13466         return;
13467     }
13468
13469     if (oolfn) {
13470         gen_gvec_op3_ool(s, true, rd, rn, rm, 0, oolfn);
13471     } else {
13472         gen_gvec_fn3(s, true, rd, rn, rm, gvecfn, MO_64);
13473     }
13474 }
13475
13476 /* Crypto two-reg SHA512
13477  *  31                                     12  11  10  9    5 4    0
13478  * +-----------------------------------------+--------+------+------+
13479  * | 1 1 0 0 1 1 1 0 1 1 0 0 0 0 0 0 1 0 0 0 | opcode |  Rn  |  Rd  |
13480  * +-----------------------------------------+--------+------+------+
13481  */
13482 static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn)
13483 {
13484     int opcode = extract32(insn, 10, 2);
13485     int rn = extract32(insn, 5, 5);
13486     int rd = extract32(insn, 0, 5);
13487     bool feature;
13488
13489     switch (opcode) {
13490     case 0: /* SHA512SU0 */
13491         feature = dc_isar_feature(aa64_sha512, s);
13492         break;
13493     case 1: /* SM4E */
13494         feature = dc_isar_feature(aa64_sm4, s);
13495         break;
13496     default:
13497         unallocated_encoding(s);
13498         return;
13499     }
13500
13501     if (!feature) {
13502         unallocated_encoding(s);
13503         return;
13504     }
13505
13506     if (!fp_access_check(s)) {
13507         return;
13508     }
13509
13510     switch (opcode) {
13511     case 0: /* SHA512SU0 */
13512         gen_gvec_op2_ool(s, true, rd, rn, 0, gen_helper_crypto_sha512su0);
13513         break;
13514     case 1: /* SM4E */
13515         gen_gvec_op3_ool(s, true, rd, rd, rn, 0, gen_helper_crypto_sm4e);
13516         break;
13517     default:
13518         g_assert_not_reached();
13519     }
13520 }
13521
13522 /* Crypto four-register
13523  *  31               23 22 21 20  16 15  14  10 9    5 4    0
13524  * +-------------------+-----+------+---+------+------+------+
13525  * | 1 1 0 0 1 1 1 0 0 | Op0 |  Rm  | 0 |  Ra  |  Rn  |  Rd  |
13526  * +-------------------+-----+------+---+------+------+------+
13527  */
13528 static void disas_crypto_four_reg(DisasContext *s, uint32_t insn)
13529 {
13530     int op0 = extract32(insn, 21, 2);
13531     int rm = extract32(insn, 16, 5);
13532     int ra = extract32(insn, 10, 5);
13533     int rn = extract32(insn, 5, 5);
13534     int rd = extract32(insn, 0, 5);
13535     bool feature;
13536
13537     switch (op0) {
13538     case 0: /* EOR3 */
13539     case 1: /* BCAX */
13540         feature = dc_isar_feature(aa64_sha3, s);
13541         break;
13542     case 2: /* SM3SS1 */
13543         feature = dc_isar_feature(aa64_sm3, s);
13544         break;
13545     default:
13546         unallocated_encoding(s);
13547         return;
13548     }
13549
13550     if (!feature) {
13551         unallocated_encoding(s);
13552         return;
13553     }
13554
13555     if (!fp_access_check(s)) {
13556         return;
13557     }
13558
13559     if (op0 < 2) {
13560         TCGv_i64 tcg_op1, tcg_op2, tcg_op3, tcg_res[2];
13561         int pass;
13562
13563         tcg_op1 = tcg_temp_new_i64();
13564         tcg_op2 = tcg_temp_new_i64();
13565         tcg_op3 = tcg_temp_new_i64();
13566         tcg_res[0] = tcg_temp_new_i64();
13567         tcg_res[1] = tcg_temp_new_i64();
13568
13569         for (pass = 0; pass < 2; pass++) {
13570             read_vec_element(s, tcg_op1, rn, pass, MO_64);
13571             read_vec_element(s, tcg_op2, rm, pass, MO_64);
13572             read_vec_element(s, tcg_op3, ra, pass, MO_64);
13573
13574             if (op0 == 0) {
13575                 /* EOR3 */
13576                 tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op3);
13577             } else {
13578                 /* BCAX */
13579                 tcg_gen_andc_i64(tcg_res[pass], tcg_op2, tcg_op3);
13580             }
13581             tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
13582         }
13583         write_vec_element(s, tcg_res[0], rd, 0, MO_64);
13584         write_vec_element(s, tcg_res[1], rd, 1, MO_64);
13585     } else {
13586         TCGv_i32 tcg_op1, tcg_op2, tcg_op3, tcg_res, tcg_zero;
13587
13588         tcg_op1 = tcg_temp_new_i32();
13589         tcg_op2 = tcg_temp_new_i32();
13590         tcg_op3 = tcg_temp_new_i32();
13591         tcg_res = tcg_temp_new_i32();
13592         tcg_zero = tcg_constant_i32(0);
13593
13594         read_vec_element_i32(s, tcg_op1, rn, 3, MO_32);
13595         read_vec_element_i32(s, tcg_op2, rm, 3, MO_32);
13596         read_vec_element_i32(s, tcg_op3, ra, 3, MO_32);
13597
13598         tcg_gen_rotri_i32(tcg_res, tcg_op1, 20);
13599         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2);
13600         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3);
13601         tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
13602
13603         write_vec_element_i32(s, tcg_zero, rd, 0, MO_32);
13604         write_vec_element_i32(s, tcg_zero, rd, 1, MO_32);
13605         write_vec_element_i32(s, tcg_zero, rd, 2, MO_32);
13606         write_vec_element_i32(s, tcg_res, rd, 3, MO_32);
13607     }
13608 }
13609
13610 /* Crypto XAR
13611  *  31                   21 20  16 15    10 9    5 4    0
13612  * +-----------------------+------+--------+------+------+
13613  * | 1 1 0 0 1 1 1 0 1 0 0 |  Rm  |  imm6  |  Rn  |  Rd  |
13614  * +-----------------------+------+--------+------+------+
13615  */
13616 static void disas_crypto_xar(DisasContext *s, uint32_t insn)
13617 {
13618     int rm = extract32(insn, 16, 5);
13619     int imm6 = extract32(insn, 10, 6);
13620     int rn = extract32(insn, 5, 5);
13621     int rd = extract32(insn, 0, 5);
13622
13623     if (!dc_isar_feature(aa64_sha3, s)) {
13624         unallocated_encoding(s);
13625         return;
13626     }
13627
13628     if (!fp_access_check(s)) {
13629         return;
13630     }
13631
13632     gen_gvec_xar(MO_64, vec_full_reg_offset(s, rd),
13633                  vec_full_reg_offset(s, rn),
13634                  vec_full_reg_offset(s, rm), imm6, 16,
13635                  vec_full_reg_size(s));
13636 }
13637
13638 /* Crypto three-reg imm2
13639  *  31                   21 20  16 15  14 13 12  11  10  9    5 4    0
13640  * +-----------------------+------+-----+------+--------+------+------+
13641  * | 1 1 0 0 1 1 1 0 0 1 0 |  Rm  | 1 0 | imm2 | opcode |  Rn  |  Rd  |
13642  * +-----------------------+------+-----+------+--------+------+------+
13643  */
13644 static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn)
13645 {
13646     static gen_helper_gvec_3 * const fns[4] = {
13647         gen_helper_crypto_sm3tt1a, gen_helper_crypto_sm3tt1b,
13648         gen_helper_crypto_sm3tt2a, gen_helper_crypto_sm3tt2b,
13649     };
13650     int opcode = extract32(insn, 10, 2);
13651     int imm2 = extract32(insn, 12, 2);
13652     int rm = extract32(insn, 16, 5);
13653     int rn = extract32(insn, 5, 5);
13654     int rd = extract32(insn, 0, 5);
13655
13656     if (!dc_isar_feature(aa64_sm3, s)) {
13657         unallocated_encoding(s);
13658         return;
13659     }
13660
13661     if (!fp_access_check(s)) {
13662         return;
13663     }
13664
13665     gen_gvec_op3_ool(s, true, rd, rn, rm, imm2, fns[opcode]);
13666 }
13667
13668 /* C3.6 Data processing - SIMD, inc Crypto
13669  *
13670  * As the decode gets a little complex we are using a table based
13671  * approach for this part of the decode.
13672  */
13673 static const AArch64DecodeTable data_proc_simd[] = {
13674     /* pattern  ,  mask     ,  fn                        */
13675     { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
13676     { 0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra },
13677     { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
13678     { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
13679     { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
13680     { 0x0e000400, 0x9fe08400, disas_simd_copy },
13681     { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
13682     /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
13683     { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
13684     { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
13685     { 0x0e000000, 0xbf208c00, disas_simd_tb },
13686     { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
13687     { 0x2e000000, 0xbf208400, disas_simd_ext },
13688     { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
13689     { 0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra },
13690     { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
13691     { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
13692     { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
13693     { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
13694     { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
13695     { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
13696     { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
13697     { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
13698     { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
13699     { 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 },
13700     { 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 },
13701     { 0xce000000, 0xff808000, disas_crypto_four_reg },
13702     { 0xce800000, 0xffe00000, disas_crypto_xar },
13703     { 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2 },
13704     { 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 },
13705     { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 },
13706     { 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16 },
13707     { 0x00000000, 0x00000000, NULL }
13708 };
13709
13710 static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
13711 {
13712     /* Note that this is called with all non-FP cases from
13713      * table C3-6 so it must UNDEF for entries not specifically
13714      * allocated to instructions in that table.
13715      */
13716     AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
13717     if (fn) {
13718         fn(s, insn);
13719     } else {
13720         unallocated_encoding(s);
13721     }
13722 }
13723
13724 /* C3.6 Data processing - SIMD and floating point */
13725 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
13726 {
13727     if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
13728         disas_data_proc_fp(s, insn);
13729     } else {
13730         /* SIMD, including crypto */
13731         disas_data_proc_simd(s, insn);
13732     }
13733 }
13734
13735 static bool trans_OK(DisasContext *s, arg_OK *a)
13736 {
13737     return true;
13738 }
13739
13740 static bool trans_FAIL(DisasContext *s, arg_OK *a)
13741 {
13742     s->is_nonstreaming = true;
13743     return true;
13744 }
13745
13746 /**
13747  * is_guarded_page:
13748  * @env: The cpu environment
13749  * @s: The DisasContext
13750  *
13751  * Return true if the page is guarded.
13752  */
13753 static bool is_guarded_page(CPUARMState *env, DisasContext *s)
13754 {
13755     uint64_t addr = s->base.pc_first;
13756 #ifdef CONFIG_USER_ONLY
13757     return page_get_flags(addr) & PAGE_BTI;
13758 #else
13759     CPUTLBEntryFull *full;
13760     void *host;
13761     int mmu_idx = arm_to_core_mmu_idx(s->mmu_idx);
13762     int flags;
13763
13764     /*
13765      * We test this immediately after reading an insn, which means
13766      * that the TLB entry must be present and valid, and thus this
13767      * access will never raise an exception.
13768      */
13769     flags = probe_access_full(env, addr, 0, MMU_INST_FETCH, mmu_idx,
13770                               false, &host, &full, 0);
13771     assert(!(flags & TLB_INVALID_MASK));
13772
13773     return full->guarded;
13774 #endif
13775 }
13776
13777 /**
13778  * btype_destination_ok:
13779  * @insn: The instruction at the branch destination
13780  * @bt: SCTLR_ELx.BT
13781  * @btype: PSTATE.BTYPE, and is non-zero
13782  *
13783  * On a guarded page, there are a limited number of insns
13784  * that may be present at the branch target:
13785  *   - branch target identifiers,
13786  *   - paciasp, pacibsp,
13787  *   - BRK insn
13788  *   - HLT insn
13789  * Anything else causes a Branch Target Exception.
13790  *
13791  * Return true if the branch is compatible, false to raise BTITRAP.
13792  */
13793 static bool btype_destination_ok(uint32_t insn, bool bt, int btype)
13794 {
13795     if ((insn & 0xfffff01fu) == 0xd503201fu) {
13796         /* HINT space */
13797         switch (extract32(insn, 5, 7)) {
13798         case 0b011001: /* PACIASP */
13799         case 0b011011: /* PACIBSP */
13800             /*
13801              * If SCTLR_ELx.BT, then PACI*SP are not compatible
13802              * with btype == 3.  Otherwise all btype are ok.
13803              */
13804             return !bt || btype != 3;
13805         case 0b100000: /* BTI */
13806             /* Not compatible with any btype.  */
13807             return false;
13808         case 0b100010: /* BTI c */
13809             /* Not compatible with btype == 3 */
13810             return btype != 3;
13811         case 0b100100: /* BTI j */
13812             /* Not compatible with btype == 2 */
13813             return btype != 2;
13814         case 0b100110: /* BTI jc */
13815             /* Compatible with any btype.  */
13816             return true;
13817         }
13818     } else {
13819         switch (insn & 0xffe0001fu) {
13820         case 0xd4200000u: /* BRK */
13821         case 0xd4400000u: /* HLT */
13822             /* Give priority to the breakpoint exception.  */
13823             return true;
13824         }
13825     }
13826     return false;
13827 }
13828
13829 /* C3.1 A64 instruction index by encoding */
13830 static void disas_a64_legacy(DisasContext *s, uint32_t insn)
13831 {
13832     switch (extract32(insn, 25, 4)) {
13833     case 0x5:
13834     case 0xd:      /* Data processing - register */
13835         disas_data_proc_reg(s, insn);
13836         break;
13837     case 0x7:
13838     case 0xf:      /* Data processing - SIMD and floating point */
13839         disas_data_proc_simd_fp(s, insn);
13840         break;
13841     default:
13842         unallocated_encoding(s);
13843         break;
13844     }
13845 }
13846
13847 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
13848                                           CPUState *cpu)
13849 {
13850     DisasContext *dc = container_of(dcbase, DisasContext, base);
13851     CPUARMState *env = cpu->env_ptr;
13852     ARMCPU *arm_cpu = env_archcpu(env);
13853     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
13854     int bound, core_mmu_idx;
13855
13856     dc->isar = &arm_cpu->isar;
13857     dc->condjmp = 0;
13858     dc->pc_save = dc->base.pc_first;
13859     dc->aarch64 = true;
13860     dc->thumb = false;
13861     dc->sctlr_b = 0;
13862     dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
13863     dc->condexec_mask = 0;
13864     dc->condexec_cond = 0;
13865     core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
13866     dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx);
13867     dc->tbii = EX_TBFLAG_A64(tb_flags, TBII);
13868     dc->tbid = EX_TBFLAG_A64(tb_flags, TBID);
13869     dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA);
13870     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
13871 #if !defined(CONFIG_USER_ONLY)
13872     dc->user = (dc->current_el == 0);
13873 #endif
13874     dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
13875     dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
13876     dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
13877     dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
13878     dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
13879     dc->fgt_eret = EX_TBFLAG_A64(tb_flags, FGT_ERET);
13880     dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL);
13881     dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL);
13882     dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16;
13883     dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16;
13884     dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE);
13885     dc->bt = EX_TBFLAG_A64(tb_flags, BT);
13886     dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE);
13887     dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV);
13888     dc->ata = EX_TBFLAG_A64(tb_flags, ATA);
13889     dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE);
13890     dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE);
13891     dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM);
13892     dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA);
13893     dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING);
13894     dc->naa = EX_TBFLAG_A64(tb_flags, NAA);
13895     dc->vec_len = 0;
13896     dc->vec_stride = 0;
13897     dc->cp_regs = arm_cpu->cp_regs;
13898     dc->features = env->features;
13899     dc->dcz_blocksize = arm_cpu->dcz_blocksize;
13900     dc->gm_blocksize = arm_cpu->gm_blocksize;
13901
13902 #ifdef CONFIG_USER_ONLY
13903     /* In sve_probe_page, we assume TBI is enabled. */
13904     tcg_debug_assert(dc->tbid & 1);
13905 #endif
13906
13907     dc->lse2 = dc_isar_feature(aa64_lse2, dc);
13908
13909     /* Single step state. The code-generation logic here is:
13910      *  SS_ACTIVE == 0:
13911      *   generate code with no special handling for single-stepping (except
13912      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
13913      *   this happens anyway because those changes are all system register or
13914      *   PSTATE writes).
13915      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
13916      *   emit code for one insn
13917      *   emit code to clear PSTATE.SS
13918      *   emit code to generate software step exception for completed step
13919      *   end TB (as usual for having generated an exception)
13920      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
13921      *   emit code to generate a software step exception
13922      *   end the TB
13923      */
13924     dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
13925     dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
13926     dc->is_ldex = false;
13927
13928     /* Bound the number of insns to execute to those left on the page.  */
13929     bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
13930
13931     /* If architectural single step active, limit to 1.  */
13932     if (dc->ss_active) {
13933         bound = 1;
13934     }
13935     dc->base.max_insns = MIN(dc->base.max_insns, bound);
13936 }
13937
13938 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
13939 {
13940 }
13941
13942 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
13943 {
13944     DisasContext *dc = container_of(dcbase, DisasContext, base);
13945     target_ulong pc_arg = dc->base.pc_next;
13946
13947     if (tb_cflags(dcbase->tb) & CF_PCREL) {
13948         pc_arg &= ~TARGET_PAGE_MASK;
13949     }
13950     tcg_gen_insn_start(pc_arg, 0, 0);
13951     dc->insn_start = tcg_last_op();
13952 }
13953
13954 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
13955 {
13956     DisasContext *s = container_of(dcbase, DisasContext, base);
13957     CPUARMState *env = cpu->env_ptr;
13958     uint64_t pc = s->base.pc_next;
13959     uint32_t insn;
13960
13961     /* Singlestep exceptions have the highest priority. */
13962     if (s->ss_active && !s->pstate_ss) {
13963         /* Singlestep state is Active-pending.
13964          * If we're in this state at the start of a TB then either
13965          *  a) we just took an exception to an EL which is being debugged
13966          *     and this is the first insn in the exception handler
13967          *  b) debug exceptions were masked and we just unmasked them
13968          *     without changing EL (eg by clearing PSTATE.D)
13969          * In either case we're going to take a swstep exception in the
13970          * "did not step an insn" case, and so the syndrome ISV and EX
13971          * bits should be zero.
13972          */
13973         assert(s->base.num_insns == 1);
13974         gen_swstep_exception(s, 0, 0);
13975         s->base.is_jmp = DISAS_NORETURN;
13976         s->base.pc_next = pc + 4;
13977         return;
13978     }
13979
13980     if (pc & 3) {
13981         /*
13982          * PC alignment fault.  This has priority over the instruction abort
13983          * that we would receive from a translation fault via arm_ldl_code.
13984          * This should only be possible after an indirect branch, at the
13985          * start of the TB.
13986          */
13987         assert(s->base.num_insns == 1);
13988         gen_helper_exception_pc_alignment(cpu_env, tcg_constant_tl(pc));
13989         s->base.is_jmp = DISAS_NORETURN;
13990         s->base.pc_next = QEMU_ALIGN_UP(pc, 4);
13991         return;
13992     }
13993
13994     s->pc_curr = pc;
13995     insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b);
13996     s->insn = insn;
13997     s->base.pc_next = pc + 4;
13998
13999     s->fp_access_checked = false;
14000     s->sve_access_checked = false;
14001
14002     if (s->pstate_il) {
14003         /*
14004          * Illegal execution state. This has priority over BTI
14005          * exceptions, but comes after instruction abort exceptions.
14006          */
14007         gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
14008         return;
14009     }
14010
14011     if (dc_isar_feature(aa64_bti, s)) {
14012         if (s->base.num_insns == 1) {
14013             /*
14014              * At the first insn of the TB, compute s->guarded_page.
14015              * We delayed computing this until successfully reading
14016              * the first insn of the TB, above.  This (mostly) ensures
14017              * that the softmmu tlb entry has been populated, and the
14018              * page table GP bit is available.
14019              *
14020              * Note that we need to compute this even if btype == 0,
14021              * because this value is used for BR instructions later
14022              * where ENV is not available.
14023              */
14024             s->guarded_page = is_guarded_page(env, s);
14025
14026             /* First insn can have btype set to non-zero.  */
14027             tcg_debug_assert(s->btype >= 0);
14028
14029             /*
14030              * Note that the Branch Target Exception has fairly high
14031              * priority -- below debugging exceptions but above most
14032              * everything else.  This allows us to handle this now
14033              * instead of waiting until the insn is otherwise decoded.
14034              */
14035             if (s->btype != 0
14036                 && s->guarded_page
14037                 && !btype_destination_ok(insn, s->bt, s->btype)) {
14038                 gen_exception_insn(s, 0, EXCP_UDEF, syn_btitrap(s->btype));
14039                 return;
14040             }
14041         } else {
14042             /* Not the first insn: btype must be 0.  */
14043             tcg_debug_assert(s->btype == 0);
14044         }
14045     }
14046
14047     s->is_nonstreaming = false;
14048     if (s->sme_trap_nonstreaming) {
14049         disas_sme_fa64(s, insn);
14050     }
14051
14052     if (!disas_a64(s, insn) &&
14053         !disas_sme(s, insn) &&
14054         !disas_sve(s, insn)) {
14055         disas_a64_legacy(s, insn);
14056     }
14057
14058     /*
14059      * After execution of most insns, btype is reset to 0.
14060      * Note that we set btype == -1 when the insn sets btype.
14061      */
14062     if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) {
14063         reset_btype(s);
14064     }
14065 }
14066
14067 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
14068 {
14069     DisasContext *dc = container_of(dcbase, DisasContext, base);
14070
14071     if (unlikely(dc->ss_active)) {
14072         /* Note that this means single stepping WFI doesn't halt the CPU.
14073          * For conditional branch insns this is harmless unreachable code as
14074          * gen_goto_tb() has already handled emitting the debug exception
14075          * (and thus a tb-jump is not possible when singlestepping).
14076          */
14077         switch (dc->base.is_jmp) {
14078         default:
14079             gen_a64_update_pc(dc, 4);
14080             /* fall through */
14081         case DISAS_EXIT:
14082         case DISAS_JUMP:
14083             gen_step_complete_exception(dc);
14084             break;
14085         case DISAS_NORETURN:
14086             break;
14087         }
14088     } else {
14089         switch (dc->base.is_jmp) {
14090         case DISAS_NEXT:
14091         case DISAS_TOO_MANY:
14092             gen_goto_tb(dc, 1, 4);
14093             break;
14094         default:
14095         case DISAS_UPDATE_EXIT:
14096             gen_a64_update_pc(dc, 4);
14097             /* fall through */
14098         case DISAS_EXIT:
14099             tcg_gen_exit_tb(NULL, 0);
14100             break;
14101         case DISAS_UPDATE_NOCHAIN:
14102             gen_a64_update_pc(dc, 4);
14103             /* fall through */
14104         case DISAS_JUMP:
14105             tcg_gen_lookup_and_goto_ptr();
14106             break;
14107         case DISAS_NORETURN:
14108         case DISAS_SWI:
14109             break;
14110         case DISAS_WFE:
14111             gen_a64_update_pc(dc, 4);
14112             gen_helper_wfe(cpu_env);
14113             break;
14114         case DISAS_YIELD:
14115             gen_a64_update_pc(dc, 4);
14116             gen_helper_yield(cpu_env);
14117             break;
14118         case DISAS_WFI:
14119             /*
14120              * This is a special case because we don't want to just halt
14121              * the CPU if trying to debug across a WFI.
14122              */
14123             gen_a64_update_pc(dc, 4);
14124             gen_helper_wfi(cpu_env, tcg_constant_i32(4));
14125             /*
14126              * The helper doesn't necessarily throw an exception, but we
14127              * must go back to the main loop to check for interrupts anyway.
14128              */
14129             tcg_gen_exit_tb(NULL, 0);
14130             break;
14131         }
14132     }
14133 }
14134
14135 static void aarch64_tr_disas_log(const DisasContextBase *dcbase,
14136                                  CPUState *cpu, FILE *logfile)
14137 {
14138     DisasContext *dc = container_of(dcbase, DisasContext, base);
14139
14140     fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first));
14141     target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size);
14142 }
14143
14144 const TranslatorOps aarch64_translator_ops = {
14145     .init_disas_context = aarch64_tr_init_disas_context,
14146     .tb_start           = aarch64_tr_tb_start,
14147     .insn_start         = aarch64_tr_insn_start,
14148     .translate_insn     = aarch64_tr_translate_insn,
14149     .tb_stop            = aarch64_tr_tb_stop,
14150     .disas_log          = aarch64_tr_disas_log,
14151 };