target/arm/translate-a64.c

   1 /*
   2  *  AArch64 translation
   3  *
   4  *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19 #include "qemu/osdep.h"
  20
  21 #include "cpu.h"
  22 #include "exec/exec-all.h"
  23 #include "tcg-op.h"
  24 #include "tcg-op-gvec.h"
  25 #include "qemu/log.h"
  26 #include "arm_ldst.h"
  27 #include "translate.h"
  28 #include "internals.h"
  29 #include "qemu/host-utils.h"
  30
  31 #include "exec/semihost.h"
  32 #include "exec/gen-icount.h"
  33
  34 #include "exec/helper-proto.h"
  35 #include "exec/helper-gen.h"
  36 #include "exec/log.h"
  37
  38 #include "trace-tcg.h"
  39 #include "translate-a64.h"
  40
  41 static TCGv_i64 cpu_X[32];
  42 static TCGv_i64 cpu_pc;
  43
  44 /* Load/store exclusive handling */
  45 static TCGv_i64 cpu_exclusive_high;
  46
  47 static const char *regnames[] = {
  48     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
  49     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
  50     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
  51     "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
  52 };
  53
  54 enum a64_shift_type {
  55     A64_SHIFT_TYPE_LSL = 0,
  56     A64_SHIFT_TYPE_LSR = 1,
  57     A64_SHIFT_TYPE_ASR = 2,
  58     A64_SHIFT_TYPE_ROR = 3
  59 };
  60
  61 /* Table based decoder typedefs - used when the relevant bits for decode
  62  * are too awkwardly scattered across the instruction (eg SIMD).
  63  */
  64 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
  65
  66 typedef struct AArch64DecodeTable {
  67     uint32_t pattern;
  68     uint32_t mask;
  69     AArch64DecodeFn *disas_fn;
  70 } AArch64DecodeTable;
  71
  72 /* Function prototype for gen_ functions for calling Neon helpers */
  73 typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32);
  74 typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
  75 typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
  76 typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
  77 typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
  78 typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
  79 typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
  80 typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
  81 typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
  82 typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
  83 typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
  84 typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr);
  85 typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
  86 typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
  87 typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, TCGMemOp);
  88
  89 /* initialize TCG globals.  */
  90 void a64_translate_init(void)
  91 {
  92     int i;
  93
  94     cpu_pc = tcg_global_mem_new_i64(cpu_env,
  95                                     offsetof(CPUARMState, pc),
  96                                     "pc");
  97     for (i = 0; i < 32; i++) {
  98         cpu_X[i] = tcg_global_mem_new_i64(cpu_env,
  99                                           offsetof(CPUARMState, xregs[i]),
 100                                           regnames[i]);
 101     }
 102
 103     cpu_exclusive_high = tcg_global_mem_new_i64(cpu_env,
 104         offsetof(CPUARMState, exclusive_high), "exclusive_high");
 105 }
 106
 107 static inline int get_a64_user_mem_index(DisasContext *s)
 108 {
 109     /* Return the core mmu_idx to use for A64 "unprivileged load/store" insns:
 110      *  if EL1, access as if EL0; otherwise access at current EL
 111      */
 112     ARMMMUIdx useridx;
 113
 114     switch (s->mmu_idx) {
 115     case ARMMMUIdx_S12NSE1:
 116         useridx = ARMMMUIdx_S12NSE0;
 117         break;
 118     case ARMMMUIdx_S1SE1:
 119         useridx = ARMMMUIdx_S1SE0;
 120         break;
 121     case ARMMMUIdx_S2NS:
 122         g_assert_not_reached();
 123     default:
 124         useridx = s->mmu_idx;
 125         break;
 126     }
 127     return arm_to_core_mmu_idx(useridx);
 128 }
 129
 130 void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
 131                             fprintf_function cpu_fprintf, int flags)
 132 {
 133     ARMCPU *cpu = ARM_CPU(cs);
 134     CPUARMState *env = &cpu->env;
 135     uint32_t psr = pstate_read(env);
 136     int i;
 137     int el = arm_current_el(env);
 138     const char *ns_status;
 139
 140     cpu_fprintf(f, "PC=%016"PRIx64"  SP=%016"PRIx64"\n",
 141             env->pc, env->xregs[31]);
 142     for (i = 0; i < 31; i++) {
 143         cpu_fprintf(f, "X%02d=%016"PRIx64, i, env->xregs[i]);
 144         if ((i % 4) == 3) {
 145             cpu_fprintf(f, "\n");
 146         } else {
 147             cpu_fprintf(f, " ");
 148         }
 149     }
 150
 151     if (arm_feature(env, ARM_FEATURE_EL3) && el != 3) {
 152         ns_status = env->cp15.scr_el3 & SCR_NS ? "NS " : "S ";
 153     } else {
 154         ns_status = "";
 155     }
 156
 157     cpu_fprintf(f, "\nPSTATE=%08x %c%c%c%c %sEL%d%c\n",
 158                 psr,
 159                 psr & PSTATE_N ? 'N' : '-',
 160                 psr & PSTATE_Z ? 'Z' : '-',
 161                 psr & PSTATE_C ? 'C' : '-',
 162                 psr & PSTATE_V ? 'V' : '-',
 163                 ns_status,
 164                 el,
 165                 psr & PSTATE_SP ? 'h' : 't');
 166
 167     if (flags & CPU_DUMP_FPU) {
 168         int numvfpregs = 32;
 169         for (i = 0; i < numvfpregs; i++) {
 170             uint64_t *q = aa64_vfp_qreg(env, i);
 171             uint64_t vlo = q[0];
 172             uint64_t vhi = q[1];
 173             cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 "%c",
 174                         i, vhi, vlo, (i & 1 ? '\n' : ' '));
 175         }
 176         cpu_fprintf(f, "FPCR: %08x  FPSR: %08x\n",
 177                     vfp_get_fpcr(env), vfp_get_fpsr(env));
 178     }
 179 }
 180
 181 void gen_a64_set_pc_im(uint64_t val)
 182 {
 183     tcg_gen_movi_i64(cpu_pc, val);
 184 }
 185
 186 /* Load the PC from a generic TCG variable.
 187  *
 188  * If address tagging is enabled via the TCR TBI bits, then loading
 189  * an address into the PC will clear out any tag in the it:
 190  *  + for EL2 and EL3 there is only one TBI bit, and if it is set
 191  *    then the address is zero-extended, clearing bits [63:56]
 192  *  + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
 193  *    and TBI1 controls addressses with bit 55 == 1.
 194  *    If the appropriate TBI bit is set for the address then
 195  *    the address is sign-extended from bit 55 into bits [63:56]
 196  *
 197  * We can avoid doing this for relative-branches, because the
 198  * PC + offset can never overflow into the tag bits (assuming
 199  * that virtual addresses are less than 56 bits wide, as they
 200  * are currently), but we must handle it for branch-to-register.
 201  */
 202 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
 203 {
 204
 205     if (s->current_el <= 1) {
 206         /* Test if NEITHER or BOTH TBI values are set.  If so, no need to
 207          * examine bit 55 of address, can just generate code.
 208          * If mixed, then test via generated code
 209          */
 210         if (s->tbi0 && s->tbi1) {
 211             TCGv_i64 tmp_reg = tcg_temp_new_i64();
 212             /* Both bits set, sign extension from bit 55 into [63:56] will
 213              * cover both cases
 214              */
 215             tcg_gen_shli_i64(tmp_reg, src, 8);
 216             tcg_gen_sari_i64(cpu_pc, tmp_reg, 8);
 217             tcg_temp_free_i64(tmp_reg);
 218         } else if (!s->tbi0 && !s->tbi1) {
 219             /* Neither bit set, just load it as-is */
 220             tcg_gen_mov_i64(cpu_pc, src);
 221         } else {
 222             TCGv_i64 tcg_tmpval = tcg_temp_new_i64();
 223             TCGv_i64 tcg_bit55  = tcg_temp_new_i64();
 224             TCGv_i64 tcg_zero   = tcg_const_i64(0);
 225
 226             tcg_gen_andi_i64(tcg_bit55, src, (1ull << 55));
 227
 228             if (s->tbi0) {
 229                 /* tbi0==1, tbi1==0, so 0-fill upper byte if bit 55 = 0 */
 230                 tcg_gen_andi_i64(tcg_tmpval, src,
 231                                  0x00FFFFFFFFFFFFFFull);
 232                 tcg_gen_movcond_i64(TCG_COND_EQ, cpu_pc, tcg_bit55, tcg_zero,
 233                                     tcg_tmpval, src);
 234             } else {
 235                 /* tbi0==0, tbi1==1, so 1-fill upper byte if bit 55 = 1 */
 236                 tcg_gen_ori_i64(tcg_tmpval, src,
 237                                 0xFF00000000000000ull);
 238                 tcg_gen_movcond_i64(TCG_COND_NE, cpu_pc, tcg_bit55, tcg_zero,
 239                                     tcg_tmpval, src);
 240             }
 241             tcg_temp_free_i64(tcg_zero);
 242             tcg_temp_free_i64(tcg_bit55);
 243             tcg_temp_free_i64(tcg_tmpval);
 244         }
 245     } else {  /* EL > 1 */
 246         if (s->tbi0) {
 247             /* Force tag byte to all zero */
 248             tcg_gen_andi_i64(cpu_pc, src, 0x00FFFFFFFFFFFFFFull);
 249         } else {
 250             /* Load unmodified address */
 251             tcg_gen_mov_i64(cpu_pc, src);
 252         }
 253     }
 254 }
 255
 256 typedef struct DisasCompare64 {
 257     TCGCond cond;
 258     TCGv_i64 value;
 259 } DisasCompare64;
 260
 261 static void a64_test_cc(DisasCompare64 *c64, int cc)
 262 {
 263     DisasCompare c32;
 264
 265     arm_test_cc(&c32, cc);
 266
 267     /* Sign-extend the 32-bit value so that the GE/LT comparisons work
 268        * properly.  The NE/EQ comparisons are also fine with this choice.  */
 269     c64->cond = c32.cond;
 270     c64->value = tcg_temp_new_i64();
 271     tcg_gen_ext_i32_i64(c64->value, c32.value);
 272
 273     arm_free_cc(&c32);
 274 }
 275
 276 static void a64_free_cc(DisasCompare64 *c64)
 277 {
 278     tcg_temp_free_i64(c64->value);
 279 }
 280
 281 static void gen_exception_internal(int excp)
 282 {
 283     TCGv_i32 tcg_excp = tcg_const_i32(excp);
 284
 285     assert(excp_is_internal(excp));
 286     gen_helper_exception_internal(cpu_env, tcg_excp);
 287     tcg_temp_free_i32(tcg_excp);
 288 }
 289
 290 static void gen_exception(int excp, uint32_t syndrome, uint32_t target_el)
 291 {
 292     TCGv_i32 tcg_excp = tcg_const_i32(excp);
 293     TCGv_i32 tcg_syn = tcg_const_i32(syndrome);
 294     TCGv_i32 tcg_el = tcg_const_i32(target_el);
 295
 296     gen_helper_exception_with_syndrome(cpu_env, tcg_excp,
 297                                        tcg_syn, tcg_el);
 298     tcg_temp_free_i32(tcg_el);
 299     tcg_temp_free_i32(tcg_syn);
 300     tcg_temp_free_i32(tcg_excp);
 301 }
 302
 303 static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
 304 {
 305     gen_a64_set_pc_im(s->pc - offset);
 306     gen_exception_internal(excp);
 307     s->base.is_jmp = DISAS_NORETURN;
 308 }
 309
 310 static void gen_exception_insn(DisasContext *s, int offset, int excp,
 311                                uint32_t syndrome, uint32_t target_el)
 312 {
 313     gen_a64_set_pc_im(s->pc - offset);
 314     gen_exception(excp, syndrome, target_el);
 315     s->base.is_jmp = DISAS_NORETURN;
 316 }
 317
 318 static void gen_exception_bkpt_insn(DisasContext *s, int offset,
 319                                     uint32_t syndrome)
 320 {
 321     TCGv_i32 tcg_syn;
 322
 323     gen_a64_set_pc_im(s->pc - offset);
 324     tcg_syn = tcg_const_i32(syndrome);
 325     gen_helper_exception_bkpt_insn(cpu_env, tcg_syn);
 326     tcg_temp_free_i32(tcg_syn);
 327     s->base.is_jmp = DISAS_NORETURN;
 328 }
 329
 330 static void gen_ss_advance(DisasContext *s)
 331 {
 332     /* If the singlestep state is Active-not-pending, advance to
 333      * Active-pending.
 334      */
 335     if (s->ss_active) {
 336         s->pstate_ss = 0;
 337         gen_helper_clear_pstate_ss(cpu_env);
 338     }
 339 }
 340
 341 static void gen_step_complete_exception(DisasContext *s)
 342 {
 343     /* We just completed step of an insn. Move from Active-not-pending
 344      * to Active-pending, and then also take the swstep exception.
 345      * This corresponds to making the (IMPDEF) choice to prioritize
 346      * swstep exceptions over asynchronous exceptions taken to an exception
 347      * level where debug is disabled. This choice has the advantage that
 348      * we do not need to maintain internal state corresponding to the
 349      * ISV/EX syndrome bits between completion of the step and generation
 350      * of the exception, and our syndrome information is always correct.
 351      */
 352     gen_ss_advance(s);
 353     gen_exception(EXCP_UDEF, syn_swstep(s->ss_same_el, 1, s->is_ldex),
 354                   default_exception_el(s));
 355     s->base.is_jmp = DISAS_NORETURN;
 356 }
 357
 358 static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
 359 {
 360     /* No direct tb linking with singlestep (either QEMU's or the ARM
 361      * debug architecture kind) or deterministic io
 362      */
 363     if (s->base.singlestep_enabled || s->ss_active ||
 364         (tb_cflags(s->base.tb) & CF_LAST_IO)) {
 365         return false;
 366     }
 367
 368 #ifndef CONFIG_USER_ONLY
 369     /* Only link tbs from inside the same guest page */
 370     if ((s->base.tb->pc & TARGET_PAGE_MASK) != (dest & TARGET_PAGE_MASK)) {
 371         return false;
 372     }
 373 #endif
 374
 375     return true;
 376 }
 377
 378 static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
 379 {
 380     TranslationBlock *tb;
 381
 382     tb = s->base.tb;
 383     if (use_goto_tb(s, n, dest)) {
 384         tcg_gen_goto_tb(n);
 385         gen_a64_set_pc_im(dest);
 386         tcg_gen_exit_tb(tb, n);
 387         s->base.is_jmp = DISAS_NORETURN;
 388     } else {
 389         gen_a64_set_pc_im(dest);
 390         if (s->ss_active) {
 391             gen_step_complete_exception(s);
 392         } else if (s->base.singlestep_enabled) {
 393             gen_exception_internal(EXCP_DEBUG);
 394         } else {
 395             tcg_gen_lookup_and_goto_ptr();
 396             s->base.is_jmp = DISAS_NORETURN;
 397         }
 398     }
 399 }
 400
 401 void unallocated_encoding(DisasContext *s)
 402 {
 403     /* Unallocated and reserved encodings are uncategorized */
 404     gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
 405                        default_exception_el(s));
 406 }
 407
 408 static void init_tmp_a64_array(DisasContext *s)
 409 {
 410 #ifdef CONFIG_DEBUG_TCG
 411     memset(s->tmp_a64, 0, sizeof(s->tmp_a64));
 412 #endif
 413     s->tmp_a64_count = 0;
 414 }
 415
 416 static void free_tmp_a64(DisasContext *s)
 417 {
 418     int i;
 419     for (i = 0; i < s->tmp_a64_count; i++) {
 420         tcg_temp_free_i64(s->tmp_a64[i]);
 421     }
 422     init_tmp_a64_array(s);
 423 }
 424
 425 TCGv_i64 new_tmp_a64(DisasContext *s)
 426 {
 427     assert(s->tmp_a64_count < TMP_A64_MAX);
 428     return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64();
 429 }
 430
 431 TCGv_i64 new_tmp_a64_zero(DisasContext *s)
 432 {
 433     TCGv_i64 t = new_tmp_a64(s);
 434     tcg_gen_movi_i64(t, 0);
 435     return t;
 436 }
 437
 438 /*
 439  * Register access functions
 440  *
 441  * These functions are used for directly accessing a register in where
 442  * changes to the final register value are likely to be made. If you
 443  * need to use a register for temporary calculation (e.g. index type
 444  * operations) use the read_* form.
 445  *
 446  * B1.2.1 Register mappings
 447  *
 448  * In instruction register encoding 31 can refer to ZR (zero register) or
 449  * the SP (stack pointer) depending on context. In QEMU's case we map SP
 450  * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
 451  * This is the point of the _sp forms.
 452  */
 453 TCGv_i64 cpu_reg(DisasContext *s, int reg)
 454 {
 455     if (reg == 31) {
 456         return new_tmp_a64_zero(s);
 457     } else {
 458         return cpu_X[reg];
 459     }
 460 }
 461
 462 /* register access for when 31 == SP */
 463 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
 464 {
 465     return cpu_X[reg];
 466 }
 467
 468 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
 469  * representing the register contents. This TCGv is an auto-freed
 470  * temporary so it need not be explicitly freed, and may be modified.
 471  */
 472 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
 473 {
 474     TCGv_i64 v = new_tmp_a64(s);
 475     if (reg != 31) {
 476         if (sf) {
 477             tcg_gen_mov_i64(v, cpu_X[reg]);
 478         } else {
 479             tcg_gen_ext32u_i64(v, cpu_X[reg]);
 480         }
 481     } else {
 482         tcg_gen_movi_i64(v, 0);
 483     }
 484     return v;
 485 }
 486
 487 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
 488 {
 489     TCGv_i64 v = new_tmp_a64(s);
 490     if (sf) {
 491         tcg_gen_mov_i64(v, cpu_X[reg]);
 492     } else {
 493         tcg_gen_ext32u_i64(v, cpu_X[reg]);
 494     }
 495     return v;
 496 }
 497
 498 /* Return the offset into CPUARMState of a slice (from
 499  * the least significant end) of FP register Qn (ie
 500  * Dn, Sn, Hn or Bn).
 501  * (Note that this is not the same mapping as for A32; see cpu.h)
 502  */
 503 static inline int fp_reg_offset(DisasContext *s, int regno, TCGMemOp size)
 504 {
 505     return vec_reg_offset(s, regno, 0, size);
 506 }
 507
 508 /* Offset of the high half of the 128 bit vector Qn */
 509 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
 510 {
 511     return vec_reg_offset(s, regno, 1, MO_64);
 512 }
 513
 514 /* Convenience accessors for reading and writing single and double
 515  * FP registers. Writing clears the upper parts of the associated
 516  * 128 bit vector register, as required by the architecture.
 517  * Note that unlike the GP register accessors, the values returned
 518  * by the read functions must be manually freed.
 519  */
 520 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
 521 {
 522     TCGv_i64 v = tcg_temp_new_i64();
 523
 524     tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 525     return v;
 526 }
 527
 528 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
 529 {
 530     TCGv_i32 v = tcg_temp_new_i32();
 531
 532     tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
 533     return v;
 534 }
 535
 536 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
 537 {
 538     TCGv_i32 v = tcg_temp_new_i32();
 539
 540     tcg_gen_ld16u_i32(v, cpu_env, fp_reg_offset(s, reg, MO_16));
 541     return v;
 542 }
 543
 544 /* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
 545  * If SVE is not enabled, then there are only 128 bits in the vector.
 546  */
 547 static void clear_vec_high(DisasContext *s, bool is_q, int rd)
 548 {
 549     unsigned ofs = fp_reg_offset(s, rd, MO_64);
 550     unsigned vsz = vec_full_reg_size(s);
 551
 552     if (!is_q) {
 553         TCGv_i64 tcg_zero = tcg_const_i64(0);
 554         tcg_gen_st_i64(tcg_zero, cpu_env, ofs + 8);
 555         tcg_temp_free_i64(tcg_zero);
 556     }
 557     if (vsz > 16) {
 558         tcg_gen_gvec_dup8i(ofs + 16, vsz - 16, vsz - 16, 0);
 559     }
 560 }
 561
 562 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
 563 {
 564     unsigned ofs = fp_reg_offset(s, reg, MO_64);
 565
 566     tcg_gen_st_i64(v, cpu_env, ofs);
 567     clear_vec_high(s, false, reg);
 568 }
 569
 570 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
 571 {
 572     TCGv_i64 tmp = tcg_temp_new_i64();
 573
 574     tcg_gen_extu_i32_i64(tmp, v);
 575     write_fp_dreg(s, reg, tmp);
 576     tcg_temp_free_i64(tmp);
 577 }
 578
 579 TCGv_ptr get_fpstatus_ptr(bool is_f16)
 580 {
 581     TCGv_ptr statusptr = tcg_temp_new_ptr();
 582     int offset;
 583
 584     /* In A64 all instructions (both FP and Neon) use the FPCR; there
 585      * is no equivalent of the A32 Neon "standard FPSCR value".
 586      * However half-precision operations operate under a different
 587      * FZ16 flag and use vfp.fp_status_f16 instead of vfp.fp_status.
 588      */
 589     if (is_f16) {
 590         offset = offsetof(CPUARMState, vfp.fp_status_f16);
 591     } else {
 592         offset = offsetof(CPUARMState, vfp.fp_status);
 593     }
 594     tcg_gen_addi_ptr(statusptr, cpu_env, offset);
 595     return statusptr;
 596 }
 597
 598 /* Expand a 2-operand AdvSIMD vector operation using an expander function.  */
 599 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
 600                          GVecGen2Fn *gvec_fn, int vece)
 601 {
 602     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 603             is_q ? 16 : 8, vec_full_reg_size(s));
 604 }
 605
 606 /* Expand a 2-operand + immediate AdvSIMD vector operation using
 607  * an expander function.
 608  */
 609 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn,
 610                           int64_t imm, GVecGen2iFn *gvec_fn, int vece)
 611 {
 612     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 613             imm, is_q ? 16 : 8, vec_full_reg_size(s));
 614 }
 615
 616 /* Expand a 3-operand AdvSIMD vector operation using an expander function.  */
 617 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
 618                          GVecGen3Fn *gvec_fn, int vece)
 619 {
 620     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 621             vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
 622 }
 623
 624 /* Expand a 2-operand + immediate AdvSIMD vector operation using
 625  * an op descriptor.
 626  */
 627 static void gen_gvec_op2i(DisasContext *s, bool is_q, int rd,
 628                           int rn, int64_t imm, const GVecGen2i *gvec_op)
 629 {
 630     tcg_gen_gvec_2i(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 631                     is_q ? 16 : 8, vec_full_reg_size(s), imm, gvec_op);
 632 }
 633
 634 /* Expand a 3-operand AdvSIMD vector operation using an op descriptor.  */
 635 static void gen_gvec_op3(DisasContext *s, bool is_q, int rd,
 636                          int rn, int rm, const GVecGen3 *gvec_op)
 637 {
 638     tcg_gen_gvec_3(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 639                    vec_full_reg_offset(s, rm), is_q ? 16 : 8,
 640                    vec_full_reg_size(s), gvec_op);
 641 }
 642
 643 /* Expand a 3-operand + env pointer operation using
 644  * an out-of-line helper.
 645  */
 646 static void gen_gvec_op3_env(DisasContext *s, bool is_q, int rd,
 647                              int rn, int rm, gen_helper_gvec_3_ptr *fn)
 648 {
 649     tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
 650                        vec_full_reg_offset(s, rn),
 651                        vec_full_reg_offset(s, rm), cpu_env,
 652                        is_q ? 16 : 8, vec_full_reg_size(s), 0, fn);
 653 }
 654
 655 /* Expand a 3-operand + fpstatus pointer + simd data value operation using
 656  * an out-of-line helper.
 657  */
 658 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
 659                               int rm, bool is_fp16, int data,
 660                               gen_helper_gvec_3_ptr *fn)
 661 {
 662     TCGv_ptr fpst = get_fpstatus_ptr(is_fp16);
 663     tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
 664                        vec_full_reg_offset(s, rn),
 665                        vec_full_reg_offset(s, rm), fpst,
 666                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
 667     tcg_temp_free_ptr(fpst);
 668 }
 669
 670 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
 671  * than the 32 bit equivalent.
 672  */
 673 static inline void gen_set_NZ64(TCGv_i64 result)
 674 {
 675     tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
 676     tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
 677 }
 678
 679 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
 680 static inline void gen_logic_CC(int sf, TCGv_i64 result)
 681 {
 682     if (sf) {
 683         gen_set_NZ64(result);
 684     } else {
 685         tcg_gen_extrl_i64_i32(cpu_ZF, result);
 686         tcg_gen_mov_i32(cpu_NF, cpu_ZF);
 687     }
 688     tcg_gen_movi_i32(cpu_CF, 0);
 689     tcg_gen_movi_i32(cpu_VF, 0);
 690 }
 691
 692 /* dest = T0 + T1; compute C, N, V and Z flags */
 693 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 694 {
 695     if (sf) {
 696         TCGv_i64 result, flag, tmp;
 697         result = tcg_temp_new_i64();
 698         flag = tcg_temp_new_i64();
 699         tmp = tcg_temp_new_i64();
 700
 701         tcg_gen_movi_i64(tmp, 0);
 702         tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
 703
 704         tcg_gen_extrl_i64_i32(cpu_CF, flag);
 705
 706         gen_set_NZ64(result);
 707
 708         tcg_gen_xor_i64(flag, result, t0);
 709         tcg_gen_xor_i64(tmp, t0, t1);
 710         tcg_gen_andc_i64(flag, flag, tmp);
 711         tcg_temp_free_i64(tmp);
 712         tcg_gen_extrh_i64_i32(cpu_VF, flag);
 713
 714         tcg_gen_mov_i64(dest, result);
 715         tcg_temp_free_i64(result);
 716         tcg_temp_free_i64(flag);
 717     } else {
 718         /* 32 bit arithmetic */
 719         TCGv_i32 t0_32 = tcg_temp_new_i32();
 720         TCGv_i32 t1_32 = tcg_temp_new_i32();
 721         TCGv_i32 tmp = tcg_temp_new_i32();
 722
 723         tcg_gen_movi_i32(tmp, 0);
 724         tcg_gen_extrl_i64_i32(t0_32, t0);
 725         tcg_gen_extrl_i64_i32(t1_32, t1);
 726         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
 727         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 728         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 729         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 730         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 731         tcg_gen_extu_i32_i64(dest, cpu_NF);
 732
 733         tcg_temp_free_i32(tmp);
 734         tcg_temp_free_i32(t0_32);
 735         tcg_temp_free_i32(t1_32);
 736     }
 737 }
 738
 739 /* dest = T0 - T1; compute C, N, V and Z flags */
 740 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 741 {
 742     if (sf) {
 743         /* 64 bit arithmetic */
 744         TCGv_i64 result, flag, tmp;
 745
 746         result = tcg_temp_new_i64();
 747         flag = tcg_temp_new_i64();
 748         tcg_gen_sub_i64(result, t0, t1);
 749
 750         gen_set_NZ64(result);
 751
 752         tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
 753         tcg_gen_extrl_i64_i32(cpu_CF, flag);
 754
 755         tcg_gen_xor_i64(flag, result, t0);
 756         tmp = tcg_temp_new_i64();
 757         tcg_gen_xor_i64(tmp, t0, t1);
 758         tcg_gen_and_i64(flag, flag, tmp);
 759         tcg_temp_free_i64(tmp);
 760         tcg_gen_extrh_i64_i32(cpu_VF, flag);
 761         tcg_gen_mov_i64(dest, result);
 762         tcg_temp_free_i64(flag);
 763         tcg_temp_free_i64(result);
 764     } else {
 765         /* 32 bit arithmetic */
 766         TCGv_i32 t0_32 = tcg_temp_new_i32();
 767         TCGv_i32 t1_32 = tcg_temp_new_i32();
 768         TCGv_i32 tmp;
 769
 770         tcg_gen_extrl_i64_i32(t0_32, t0);
 771         tcg_gen_extrl_i64_i32(t1_32, t1);
 772         tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
 773         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 774         tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
 775         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 776         tmp = tcg_temp_new_i32();
 777         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 778         tcg_temp_free_i32(t0_32);
 779         tcg_temp_free_i32(t1_32);
 780         tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 781         tcg_temp_free_i32(tmp);
 782         tcg_gen_extu_i32_i64(dest, cpu_NF);
 783     }
 784 }
 785
 786 /* dest = T0 + T1 + CF; do not compute flags. */
 787 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 788 {
 789     TCGv_i64 flag = tcg_temp_new_i64();
 790     tcg_gen_extu_i32_i64(flag, cpu_CF);
 791     tcg_gen_add_i64(dest, t0, t1);
 792     tcg_gen_add_i64(dest, dest, flag);
 793     tcg_temp_free_i64(flag);
 794
 795     if (!sf) {
 796         tcg_gen_ext32u_i64(dest, dest);
 797     }
 798 }
 799
 800 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
 801 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 802 {
 803     if (sf) {
 804         TCGv_i64 result, cf_64, vf_64, tmp;
 805         result = tcg_temp_new_i64();
 806         cf_64 = tcg_temp_new_i64();
 807         vf_64 = tcg_temp_new_i64();
 808         tmp = tcg_const_i64(0);
 809
 810         tcg_gen_extu_i32_i64(cf_64, cpu_CF);
 811         tcg_gen_add2_i64(result, cf_64, t0, tmp, cf_64, tmp);
 812         tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, tmp);
 813         tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
 814         gen_set_NZ64(result);
 815
 816         tcg_gen_xor_i64(vf_64, result, t0);
 817         tcg_gen_xor_i64(tmp, t0, t1);
 818         tcg_gen_andc_i64(vf_64, vf_64, tmp);
 819         tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
 820
 821         tcg_gen_mov_i64(dest, result);
 822
 823         tcg_temp_free_i64(tmp);
 824         tcg_temp_free_i64(vf_64);
 825         tcg_temp_free_i64(cf_64);
 826         tcg_temp_free_i64(result);
 827     } else {
 828         TCGv_i32 t0_32, t1_32, tmp;
 829         t0_32 = tcg_temp_new_i32();
 830         t1_32 = tcg_temp_new_i32();
 831         tmp = tcg_const_i32(0);
 832
 833         tcg_gen_extrl_i64_i32(t0_32, t0);
 834         tcg_gen_extrl_i64_i32(t1_32, t1);
 835         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, cpu_CF, tmp);
 836         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, tmp);
 837
 838         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 839         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 840         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 841         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 842         tcg_gen_extu_i32_i64(dest, cpu_NF);
 843
 844         tcg_temp_free_i32(tmp);
 845         tcg_temp_free_i32(t1_32);
 846         tcg_temp_free_i32(t0_32);
 847     }
 848 }
 849
 850 /*
 851  * Load/Store generators
 852  */
 853
 854 /*
 855  * Store from GPR register to memory.
 856  */
 857 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
 858                              TCGv_i64 tcg_addr, int size, int memidx,
 859                              bool iss_valid,
 860                              unsigned int iss_srt,
 861                              bool iss_sf, bool iss_ar)
 862 {
 863     g_assert(size <= 3);
 864     tcg_gen_qemu_st_i64(source, tcg_addr, memidx, s->be_data + size);
 865
 866     if (iss_valid) {
 867         uint32_t syn;
 868
 869         syn = syn_data_abort_with_iss(0,
 870                                       size,
 871                                       false,
 872                                       iss_srt,
 873                                       iss_sf,
 874                                       iss_ar,
 875                                       0, 0, 0, 0, 0, false);
 876         disas_set_insn_syndrome(s, syn);
 877     }
 878 }
 879
 880 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
 881                       TCGv_i64 tcg_addr, int size,
 882                       bool iss_valid,
 883                       unsigned int iss_srt,
 884                       bool iss_sf, bool iss_ar)
 885 {
 886     do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s),
 887                      iss_valid, iss_srt, iss_sf, iss_ar);
 888 }
 889
 890 /*
 891  * Load from memory to GPR register
 892  */
 893 static void do_gpr_ld_memidx(DisasContext *s,
 894                              TCGv_i64 dest, TCGv_i64 tcg_addr,
 895                              int size, bool is_signed,
 896                              bool extend, int memidx,
 897                              bool iss_valid, unsigned int iss_srt,
 898                              bool iss_sf, bool iss_ar)
 899 {
 900     TCGMemOp memop = s->be_data + size;
 901
 902     g_assert(size <= 3);
 903
 904     if (is_signed) {
 905         memop += MO_SIGN;
 906     }
 907
 908     tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
 909
 910     if (extend && is_signed) {
 911         g_assert(size < 3);
 912         tcg_gen_ext32u_i64(dest, dest);
 913     }
 914
 915     if (iss_valid) {
 916         uint32_t syn;
 917
 918         syn = syn_data_abort_with_iss(0,
 919                                       size,
 920                                       is_signed,
 921                                       iss_srt,
 922                                       iss_sf,
 923                                       iss_ar,
 924                                       0, 0, 0, 0, 0, false);
 925         disas_set_insn_syndrome(s, syn);
 926     }
 927 }
 928
 929 static void do_gpr_ld(DisasContext *s,
 930                       TCGv_i64 dest, TCGv_i64 tcg_addr,
 931                       int size, bool is_signed, bool extend,
 932                       bool iss_valid, unsigned int iss_srt,
 933                       bool iss_sf, bool iss_ar)
 934 {
 935     do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend,
 936                      get_mem_index(s),
 937                      iss_valid, iss_srt, iss_sf, iss_ar);
 938 }
 939
 940 /*
 941  * Store from FP register to memory
 942  */
 943 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
 944 {
 945     /* This writes the bottom N bits of a 128 bit wide vector to memory */
 946     TCGv_i64 tmp = tcg_temp_new_i64();
 947     tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(s, srcidx, MO_64));
 948     if (size < 4) {
 949         tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s),
 950                             s->be_data + size);
 951     } else {
 952         bool be = s->be_data == MO_BE;
 953         TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
 954
 955         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 956         tcg_gen_qemu_st_i64(tmp, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
 957                             s->be_data | MO_Q);
 958         tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(s, srcidx));
 959         tcg_gen_qemu_st_i64(tmp, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
 960                             s->be_data | MO_Q);
 961         tcg_temp_free_i64(tcg_hiaddr);
 962     }
 963
 964     tcg_temp_free_i64(tmp);
 965 }
 966
 967 /*
 968  * Load from memory to FP register
 969  */
 970 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
 971 {
 972     /* This always zero-extends and writes to a full 128 bit wide vector */
 973     TCGv_i64 tmplo = tcg_temp_new_i64();
 974     TCGv_i64 tmphi;
 975
 976     if (size < 4) {
 977         TCGMemOp memop = s->be_data + size;
 978         tmphi = tcg_const_i64(0);
 979         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
 980     } else {
 981         bool be = s->be_data == MO_BE;
 982         TCGv_i64 tcg_hiaddr;
 983
 984         tmphi = tcg_temp_new_i64();
 985         tcg_hiaddr = tcg_temp_new_i64();
 986
 987         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 988         tcg_gen_qemu_ld_i64(tmplo, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
 989                             s->be_data | MO_Q);
 990         tcg_gen_qemu_ld_i64(tmphi, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
 991                             s->be_data | MO_Q);
 992         tcg_temp_free_i64(tcg_hiaddr);
 993     }
 994
 995     tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
 996     tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
 997
 998     tcg_temp_free_i64(tmplo);
 999     tcg_temp_free_i64(tmphi);
1000
1001     clear_vec_high(s, true, destidx);
1002 }
1003
1004 /*
1005  * Vector load/store helpers.
1006  *
1007  * The principal difference between this and a FP load is that we don't
1008  * zero extend as we are filling a partial chunk of the vector register.
1009  * These functions don't support 128 bit loads/stores, which would be
1010  * normal load/store operations.
1011  *
1012  * The _i32 versions are useful when operating on 32 bit quantities
1013  * (eg for floating point single or using Neon helper functions).
1014  */
1015
1016 /* Get value of an element within a vector register */
1017 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
1018                              int element, TCGMemOp memop)
1019 {
1020     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1021     switch (memop) {
1022     case MO_8:
1023         tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
1024         break;
1025     case MO_16:
1026         tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
1027         break;
1028     case MO_32:
1029         tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
1030         break;
1031     case MO_8|MO_SIGN:
1032         tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
1033         break;
1034     case MO_16|MO_SIGN:
1035         tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
1036         break;
1037     case MO_32|MO_SIGN:
1038         tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
1039         break;
1040     case MO_64:
1041     case MO_64|MO_SIGN:
1042         tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
1043         break;
1044     default:
1045         g_assert_not_reached();
1046     }
1047 }
1048
1049 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
1050                                  int element, TCGMemOp memop)
1051 {
1052     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1053     switch (memop) {
1054     case MO_8:
1055         tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
1056         break;
1057     case MO_16:
1058         tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
1059         break;
1060     case MO_8|MO_SIGN:
1061         tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
1062         break;
1063     case MO_16|MO_SIGN:
1064         tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
1065         break;
1066     case MO_32:
1067     case MO_32|MO_SIGN:
1068         tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
1069         break;
1070     default:
1071         g_assert_not_reached();
1072     }
1073 }
1074
1075 /* Set value of an element within a vector register */
1076 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1077                               int element, TCGMemOp memop)
1078 {
1079     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1080     switch (memop) {
1081     case MO_8:
1082         tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
1083         break;
1084     case MO_16:
1085         tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
1086         break;
1087     case MO_32:
1088         tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
1089         break;
1090     case MO_64:
1091         tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
1092         break;
1093     default:
1094         g_assert_not_reached();
1095     }
1096 }
1097
1098 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1099                                   int destidx, int element, TCGMemOp memop)
1100 {
1101     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1102     switch (memop) {
1103     case MO_8:
1104         tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
1105         break;
1106     case MO_16:
1107         tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
1108         break;
1109     case MO_32:
1110         tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
1111         break;
1112     default:
1113         g_assert_not_reached();
1114     }
1115 }
1116
1117 /* Store from vector register to memory */
1118 static void do_vec_st(DisasContext *s, int srcidx, int element,
1119                       TCGv_i64 tcg_addr, int size)
1120 {
1121     TCGMemOp memop = s->be_data + size;
1122     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1123
1124     read_vec_element(s, tcg_tmp, srcidx, element, size);
1125     tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
1126
1127     tcg_temp_free_i64(tcg_tmp);
1128 }
1129
1130 /* Load from memory to vector register */
1131 static void do_vec_ld(DisasContext *s, int destidx, int element,
1132                       TCGv_i64 tcg_addr, int size)
1133 {
1134     TCGMemOp memop = s->be_data + size;
1135     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1136
1137     tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
1138     write_vec_element(s, tcg_tmp, destidx, element, size);
1139
1140     tcg_temp_free_i64(tcg_tmp);
1141 }
1142
1143 /* Check that FP/Neon access is enabled. If it is, return
1144  * true. If not, emit code to generate an appropriate exception,
1145  * and return false; the caller should not emit any code for
1146  * the instruction. Note that this check must happen after all
1147  * unallocated-encoding checks (otherwise the syndrome information
1148  * for the resulting exception will be incorrect).
1149  */
1150 static inline bool fp_access_check(DisasContext *s)
1151 {
1152     assert(!s->fp_access_checked);
1153     s->fp_access_checked = true;
1154
1155     if (!s->fp_excp_el) {
1156         return true;
1157     }
1158
1159     gen_exception_insn(s, 4, EXCP_UDEF, syn_fp_access_trap(1, 0xe, false),
1160                        s->fp_excp_el);
1161     return false;
1162 }
1163
1164 /* Check that SVE access is enabled.  If it is, return true.
1165  * If not, emit code to generate an appropriate exception and return false.
1166  */
1167 bool sve_access_check(DisasContext *s)
1168 {
1169     if (s->sve_excp_el) {
1170         gen_exception_insn(s, 4, EXCP_UDEF, syn_sve_access_trap(),
1171                            s->sve_excp_el);
1172         return false;
1173     }
1174     return fp_access_check(s);
1175 }
1176
1177 /*
1178  * This utility function is for doing register extension with an
1179  * optional shift. You will likely want to pass a temporary for the
1180  * destination register. See DecodeRegExtend() in the ARM ARM.
1181  */
1182 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1183                               int option, unsigned int shift)
1184 {
1185     int extsize = extract32(option, 0, 2);
1186     bool is_signed = extract32(option, 2, 1);
1187
1188     if (is_signed) {
1189         switch (extsize) {
1190         case 0:
1191             tcg_gen_ext8s_i64(tcg_out, tcg_in);
1192             break;
1193         case 1:
1194             tcg_gen_ext16s_i64(tcg_out, tcg_in);
1195             break;
1196         case 2:
1197             tcg_gen_ext32s_i64(tcg_out, tcg_in);
1198             break;
1199         case 3:
1200             tcg_gen_mov_i64(tcg_out, tcg_in);
1201             break;
1202         }
1203     } else {
1204         switch (extsize) {
1205         case 0:
1206             tcg_gen_ext8u_i64(tcg_out, tcg_in);
1207             break;
1208         case 1:
1209             tcg_gen_ext16u_i64(tcg_out, tcg_in);
1210             break;
1211         case 2:
1212             tcg_gen_ext32u_i64(tcg_out, tcg_in);
1213             break;
1214         case 3:
1215             tcg_gen_mov_i64(tcg_out, tcg_in);
1216             break;
1217         }
1218     }
1219
1220     if (shift) {
1221         tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1222     }
1223 }
1224
1225 static inline void gen_check_sp_alignment(DisasContext *s)
1226 {
1227     /* The AArch64 architecture mandates that (if enabled via PSTATE
1228      * or SCTLR bits) there is a check that SP is 16-aligned on every
1229      * SP-relative load or store (with an exception generated if it is not).
1230      * In line with general QEMU practice regarding misaligned accesses,
1231      * we omit these checks for the sake of guest program performance.
1232      * This function is provided as a hook so we can more easily add these
1233      * checks in future (possibly as a "favour catching guest program bugs
1234      * over speed" user selectable option).
1235      */
1236 }
1237
1238 /*
1239  * This provides a simple table based table lookup decoder. It is
1240  * intended to be used when the relevant bits for decode are too
1241  * awkwardly placed and switch/if based logic would be confusing and
1242  * deeply nested. Since it's a linear search through the table, tables
1243  * should be kept small.
1244  *
1245  * It returns the first handler where insn & mask == pattern, or
1246  * NULL if there is no match.
1247  * The table is terminated by an empty mask (i.e. 0)
1248  */
1249 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1250                                                uint32_t insn)
1251 {
1252     const AArch64DecodeTable *tptr = table;
1253
1254     while (tptr->mask) {
1255         if ((insn & tptr->mask) == tptr->pattern) {
1256             return tptr->disas_fn;
1257         }
1258         tptr++;
1259     }
1260     return NULL;
1261 }
1262
1263 /*
1264  * The instruction disassembly implemented here matches
1265  * the instruction encoding classifications in chapter C4
1266  * of the ARM Architecture Reference Manual (DDI0487B_a);
1267  * classification names and decode diagrams here should generally
1268  * match up with those in the manual.
1269  */
1270
1271 /* Unconditional branch (immediate)
1272  *   31  30       26 25                                  0
1273  * +----+-----------+-------------------------------------+
1274  * | op | 0 0 1 0 1 |                 imm26               |
1275  * +----+-----------+-------------------------------------+
1276  */
1277 static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
1278 {
1279     uint64_t addr = s->pc + sextract32(insn, 0, 26) * 4 - 4;
1280
1281     if (insn & (1U << 31)) {
1282         /* BL Branch with link */
1283         tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1284     }
1285
1286     /* B Branch / BL Branch with link */
1287     gen_goto_tb(s, 0, addr);
1288 }
1289
1290 /* Compare and branch (immediate)
1291  *   31  30         25  24  23                  5 4      0
1292  * +----+-------------+----+---------------------+--------+
1293  * | sf | 0 1 1 0 1 0 | op |         imm19       |   Rt   |
1294  * +----+-------------+----+---------------------+--------+
1295  */
1296 static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
1297 {
1298     unsigned int sf, op, rt;
1299     uint64_t addr;
1300     TCGLabel *label_match;
1301     TCGv_i64 tcg_cmp;
1302
1303     sf = extract32(insn, 31, 1);
1304     op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
1305     rt = extract32(insn, 0, 5);
1306     addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1307
1308     tcg_cmp = read_cpu_reg(s, rt, sf);
1309     label_match = gen_new_label();
1310
1311     tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1312                         tcg_cmp, 0, label_match);
1313
1314     gen_goto_tb(s, 0, s->pc);
1315     gen_set_label(label_match);
1316     gen_goto_tb(s, 1, addr);
1317 }
1318
1319 /* Test and branch (immediate)
1320  *   31  30         25  24  23   19 18          5 4    0
1321  * +----+-------------+----+-------+-------------+------+
1322  * | b5 | 0 1 1 0 1 1 | op |  b40  |    imm14    |  Rt  |
1323  * +----+-------------+----+-------+-------------+------+
1324  */
1325 static void disas_test_b_imm(DisasContext *s, uint32_t insn)
1326 {
1327     unsigned int bit_pos, op, rt;
1328     uint64_t addr;
1329     TCGLabel *label_match;
1330     TCGv_i64 tcg_cmp;
1331
1332     bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
1333     op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
1334     addr = s->pc + sextract32(insn, 5, 14) * 4 - 4;
1335     rt = extract32(insn, 0, 5);
1336
1337     tcg_cmp = tcg_temp_new_i64();
1338     tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
1339     label_match = gen_new_label();
1340     tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1341                         tcg_cmp, 0, label_match);
1342     tcg_temp_free_i64(tcg_cmp);
1343     gen_goto_tb(s, 0, s->pc);
1344     gen_set_label(label_match);
1345     gen_goto_tb(s, 1, addr);
1346 }
1347
1348 /* Conditional branch (immediate)
1349  *  31           25  24  23                  5   4  3    0
1350  * +---------------+----+---------------------+----+------+
1351  * | 0 1 0 1 0 1 0 | o1 |         imm19       | o0 | cond |
1352  * +---------------+----+---------------------+----+------+
1353  */
1354 static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
1355 {
1356     unsigned int cond;
1357     uint64_t addr;
1358
1359     if ((insn & (1 << 4)) || (insn & (1 << 24))) {
1360         unallocated_encoding(s);
1361         return;
1362     }
1363     addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1364     cond = extract32(insn, 0, 4);
1365
1366     if (cond < 0x0e) {
1367         /* genuinely conditional branches */
1368         TCGLabel *label_match = gen_new_label();
1369         arm_gen_test_cc(cond, label_match);
1370         gen_goto_tb(s, 0, s->pc);
1371         gen_set_label(label_match);
1372         gen_goto_tb(s, 1, addr);
1373     } else {
1374         /* 0xe and 0xf are both "always" conditions */
1375         gen_goto_tb(s, 0, addr);
1376     }
1377 }
1378
1379 /* HINT instruction group, including various allocated HINTs */
1380 static void handle_hint(DisasContext *s, uint32_t insn,
1381                         unsigned int op1, unsigned int op2, unsigned int crm)
1382 {
1383     unsigned int selector = crm << 3 | op2;
1384
1385     if (op1 != 3) {
1386         unallocated_encoding(s);
1387         return;
1388     }
1389
1390     switch (selector) {
1391     case 0: /* NOP */
1392         return;
1393     case 3: /* WFI */
1394         s->base.is_jmp = DISAS_WFI;
1395         return;
1396         /* When running in MTTCG we don't generate jumps to the yield and
1397          * WFE helpers as it won't affect the scheduling of other vCPUs.
1398          * If we wanted to more completely model WFE/SEV so we don't busy
1399          * spin unnecessarily we would need to do something more involved.
1400          */
1401     case 1: /* YIELD */
1402         if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1403             s->base.is_jmp = DISAS_YIELD;
1404         }
1405         return;
1406     case 2: /* WFE */
1407         if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1408             s->base.is_jmp = DISAS_WFE;
1409         }
1410         return;
1411     case 4: /* SEV */
1412     case 5: /* SEVL */
1413         /* we treat all as NOP at least for now */
1414         return;
1415     default:
1416         /* default specified as NOP equivalent */
1417         return;
1418     }
1419 }
1420
1421 static void gen_clrex(DisasContext *s, uint32_t insn)
1422 {
1423     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1424 }
1425
1426 /* CLREX, DSB, DMB, ISB */
1427 static void handle_sync(DisasContext *s, uint32_t insn,
1428                         unsigned int op1, unsigned int op2, unsigned int crm)
1429 {
1430     TCGBar bar;
1431
1432     if (op1 != 3) {
1433         unallocated_encoding(s);
1434         return;
1435     }
1436
1437     switch (op2) {
1438     case 2: /* CLREX */
1439         gen_clrex(s, insn);
1440         return;
1441     case 4: /* DSB */
1442     case 5: /* DMB */
1443         switch (crm & 3) {
1444         case 1: /* MBReqTypes_Reads */
1445             bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
1446             break;
1447         case 2: /* MBReqTypes_Writes */
1448             bar = TCG_BAR_SC | TCG_MO_ST_ST;
1449             break;
1450         default: /* MBReqTypes_All */
1451             bar = TCG_BAR_SC | TCG_MO_ALL;
1452             break;
1453         }
1454         tcg_gen_mb(bar);
1455         return;
1456     case 6: /* ISB */
1457         /* We need to break the TB after this insn to execute
1458          * a self-modified code correctly and also to take
1459          * any pending interrupts immediately.
1460          */
1461         gen_goto_tb(s, 0, s->pc);
1462         return;
1463     default:
1464         unallocated_encoding(s);
1465         return;
1466     }
1467 }
1468
1469 /* MSR (immediate) - move immediate to processor state field */
1470 static void handle_msr_i(DisasContext *s, uint32_t insn,
1471                          unsigned int op1, unsigned int op2, unsigned int crm)
1472 {
1473     int op = op1 << 3 | op2;
1474     switch (op) {
1475     case 0x05: /* SPSel */
1476         if (s->current_el == 0) {
1477             unallocated_encoding(s);
1478             return;
1479         }
1480         /* fall through */
1481     case 0x1e: /* DAIFSet */
1482     case 0x1f: /* DAIFClear */
1483     {
1484         TCGv_i32 tcg_imm = tcg_const_i32(crm);
1485         TCGv_i32 tcg_op = tcg_const_i32(op);
1486         gen_a64_set_pc_im(s->pc - 4);
1487         gen_helper_msr_i_pstate(cpu_env, tcg_op, tcg_imm);
1488         tcg_temp_free_i32(tcg_imm);
1489         tcg_temp_free_i32(tcg_op);
1490         /* For DAIFClear, exit the cpu loop to re-evaluate pending IRQs.  */
1491         gen_a64_set_pc_im(s->pc);
1492         s->base.is_jmp = (op == 0x1f ? DISAS_EXIT : DISAS_JUMP);
1493         break;
1494     }
1495     default:
1496         unallocated_encoding(s);
1497         return;
1498     }
1499 }
1500
1501 static void gen_get_nzcv(TCGv_i64 tcg_rt)
1502 {
1503     TCGv_i32 tmp = tcg_temp_new_i32();
1504     TCGv_i32 nzcv = tcg_temp_new_i32();
1505
1506     /* build bit 31, N */
1507     tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
1508     /* build bit 30, Z */
1509     tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1510     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1511     /* build bit 29, C */
1512     tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1513     /* build bit 28, V */
1514     tcg_gen_shri_i32(tmp, cpu_VF, 31);
1515     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1516     /* generate result */
1517     tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1518
1519     tcg_temp_free_i32(nzcv);
1520     tcg_temp_free_i32(tmp);
1521 }
1522
1523 static void gen_set_nzcv(TCGv_i64 tcg_rt)
1524
1525 {
1526     TCGv_i32 nzcv = tcg_temp_new_i32();
1527
1528     /* take NZCV from R[t] */
1529     tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
1530
1531     /* bit 31, N */
1532     tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
1533     /* bit 30, Z */
1534     tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1535     tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1536     /* bit 29, C */
1537     tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1538     tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1539     /* bit 28, V */
1540     tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1541     tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1542     tcg_temp_free_i32(nzcv);
1543 }
1544
1545 /* MRS - move from system register
1546  * MSR (register) - move to system register
1547  * SYS
1548  * SYSL
1549  * These are all essentially the same insn in 'read' and 'write'
1550  * versions, with varying op0 fields.
1551  */
1552 static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
1553                        unsigned int op0, unsigned int op1, unsigned int op2,
1554                        unsigned int crn, unsigned int crm, unsigned int rt)
1555 {
1556     const ARMCPRegInfo *ri;
1557     TCGv_i64 tcg_rt;
1558
1559     ri = get_arm_cp_reginfo(s->cp_regs,
1560                             ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
1561                                                crn, crm, op0, op1, op2));
1562
1563     if (!ri) {
1564         /* Unknown register; this might be a guest error or a QEMU
1565          * unimplemented feature.
1566          */
1567         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
1568                       "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
1569                       isread ? "read" : "write", op0, op1, crn, crm, op2);
1570         unallocated_encoding(s);
1571         return;
1572     }
1573
1574     /* Check access permissions */
1575     if (!cp_access_ok(s->current_el, ri, isread)) {
1576         unallocated_encoding(s);
1577         return;
1578     }
1579
1580     if (ri->accessfn) {
1581         /* Emit code to perform further access permissions checks at
1582          * runtime; this may result in an exception.
1583          */
1584         TCGv_ptr tmpptr;
1585         TCGv_i32 tcg_syn, tcg_isread;
1586         uint32_t syndrome;
1587
1588         gen_a64_set_pc_im(s->pc - 4);
1589         tmpptr = tcg_const_ptr(ri);
1590         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
1591         tcg_syn = tcg_const_i32(syndrome);
1592         tcg_isread = tcg_const_i32(isread);
1593         gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn, tcg_isread);
1594         tcg_temp_free_ptr(tmpptr);
1595         tcg_temp_free_i32(tcg_syn);
1596         tcg_temp_free_i32(tcg_isread);
1597     }
1598
1599     /* Handle special cases first */
1600     switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
1601     case ARM_CP_NOP:
1602         return;
1603     case ARM_CP_NZCV:
1604         tcg_rt = cpu_reg(s, rt);
1605         if (isread) {
1606             gen_get_nzcv(tcg_rt);
1607         } else {
1608             gen_set_nzcv(tcg_rt);
1609         }
1610         return;
1611     case ARM_CP_CURRENTEL:
1612         /* Reads as current EL value from pstate, which is
1613          * guaranteed to be constant by the tb flags.
1614          */
1615         tcg_rt = cpu_reg(s, rt);
1616         tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
1617         return;
1618     case ARM_CP_DC_ZVA:
1619         /* Writes clear the aligned block of memory which rt points into. */
1620         tcg_rt = cpu_reg(s, rt);
1621         gen_helper_dc_zva(cpu_env, tcg_rt);
1622         return;
1623     default:
1624         break;
1625     }
1626     if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
1627         return;
1628     }
1629     if ((ri->type & ARM_CP_FPU) && !fp_access_check(s)) {
1630         return;
1631     }
1632
1633     if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1634         gen_io_start();
1635     }
1636
1637     tcg_rt = cpu_reg(s, rt);
1638
1639     if (isread) {
1640         if (ri->type & ARM_CP_CONST) {
1641             tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
1642         } else if (ri->readfn) {
1643             TCGv_ptr tmpptr;
1644             tmpptr = tcg_const_ptr(ri);
1645             gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr);
1646             tcg_temp_free_ptr(tmpptr);
1647         } else {
1648             tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
1649         }
1650     } else {
1651         if (ri->type & ARM_CP_CONST) {
1652             /* If not forbidden by access permissions, treat as WI */
1653             return;
1654         } else if (ri->writefn) {
1655             TCGv_ptr tmpptr;
1656             tmpptr = tcg_const_ptr(ri);
1657             gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt);
1658             tcg_temp_free_ptr(tmpptr);
1659         } else {
1660             tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
1661         }
1662     }
1663
1664     if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1665         /* I/O operations must end the TB here (whether read or write) */
1666         gen_io_end();
1667         s->base.is_jmp = DISAS_UPDATE;
1668     } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
1669         /* We default to ending the TB on a coprocessor register write,
1670          * but allow this to be suppressed by the register definition
1671          * (usually only necessary to work around guest bugs).
1672          */
1673         s->base.is_jmp = DISAS_UPDATE;
1674     }
1675 }
1676
1677 /* System
1678  *  31                 22 21  20 19 18 16 15   12 11    8 7   5 4    0
1679  * +---------------------+---+-----+-----+-------+-------+-----+------+
1680  * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 |  CRn  |  CRm  | op2 |  Rt  |
1681  * +---------------------+---+-----+-----+-------+-------+-----+------+
1682  */
1683 static void disas_system(DisasContext *s, uint32_t insn)
1684 {
1685     unsigned int l, op0, op1, crn, crm, op2, rt;
1686     l = extract32(insn, 21, 1);
1687     op0 = extract32(insn, 19, 2);
1688     op1 = extract32(insn, 16, 3);
1689     crn = extract32(insn, 12, 4);
1690     crm = extract32(insn, 8, 4);
1691     op2 = extract32(insn, 5, 3);
1692     rt = extract32(insn, 0, 5);
1693
1694     if (op0 == 0) {
1695         if (l || rt != 31) {
1696             unallocated_encoding(s);
1697             return;
1698         }
1699         switch (crn) {
1700         case 2: /* HINT (including allocated hints like NOP, YIELD, etc) */
1701             handle_hint(s, insn, op1, op2, crm);
1702             break;
1703         case 3: /* CLREX, DSB, DMB, ISB */
1704             handle_sync(s, insn, op1, op2, crm);
1705             break;
1706         case 4: /* MSR (immediate) */
1707             handle_msr_i(s, insn, op1, op2, crm);
1708             break;
1709         default:
1710             unallocated_encoding(s);
1711             break;
1712         }
1713         return;
1714     }
1715     handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
1716 }
1717
1718 /* Exception generation
1719  *
1720  *  31             24 23 21 20                     5 4   2 1  0
1721  * +-----------------+-----+------------------------+-----+----+
1722  * | 1 1 0 1 0 1 0 0 | opc |          imm16         | op2 | LL |
1723  * +-----------------------+------------------------+----------+
1724  */
1725 static void disas_exc(DisasContext *s, uint32_t insn)
1726 {
1727     int opc = extract32(insn, 21, 3);
1728     int op2_ll = extract32(insn, 0, 5);
1729     int imm16 = extract32(insn, 5, 16);
1730     TCGv_i32 tmp;
1731
1732     switch (opc) {
1733     case 0:
1734         /* For SVC, HVC and SMC we advance the single-step state
1735          * machine before taking the exception. This is architecturally
1736          * mandated, to ensure that single-stepping a system call
1737          * instruction works properly.
1738          */
1739         switch (op2_ll) {
1740         case 1:                                                     /* SVC */
1741             gen_ss_advance(s);
1742             gen_exception_insn(s, 0, EXCP_SWI, syn_aa64_svc(imm16),
1743                                default_exception_el(s));
1744             break;
1745         case 2:                                                     /* HVC */
1746             if (s->current_el == 0) {
1747                 unallocated_encoding(s);
1748                 break;
1749             }
1750             /* The pre HVC helper handles cases when HVC gets trapped
1751              * as an undefined insn by runtime configuration.
1752              */
1753             gen_a64_set_pc_im(s->pc - 4);
1754             gen_helper_pre_hvc(cpu_env);
1755             gen_ss_advance(s);
1756             gen_exception_insn(s, 0, EXCP_HVC, syn_aa64_hvc(imm16), 2);
1757             break;
1758         case 3:                                                     /* SMC */
1759             if (s->current_el == 0) {
1760                 unallocated_encoding(s);
1761                 break;
1762             }
1763             gen_a64_set_pc_im(s->pc - 4);
1764             tmp = tcg_const_i32(syn_aa64_smc(imm16));
1765             gen_helper_pre_smc(cpu_env, tmp);
1766             tcg_temp_free_i32(tmp);
1767             gen_ss_advance(s);
1768             gen_exception_insn(s, 0, EXCP_SMC, syn_aa64_smc(imm16), 3);
1769             break;
1770         default:
1771             unallocated_encoding(s);
1772             break;
1773         }
1774         break;
1775     case 1:
1776         if (op2_ll != 0) {
1777             unallocated_encoding(s);
1778             break;
1779         }
1780         /* BRK */
1781         gen_exception_bkpt_insn(s, 4, syn_aa64_bkpt(imm16));
1782         break;
1783     case 2:
1784         if (op2_ll != 0) {
1785             unallocated_encoding(s);
1786             break;
1787         }
1788         /* HLT. This has two purposes.
1789          * Architecturally, it is an external halting debug instruction.
1790          * Since QEMU doesn't implement external debug, we treat this as
1791          * it is required for halting debug disabled: it will UNDEF.
1792          * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
1793          */
1794         if (semihosting_enabled() && imm16 == 0xf000) {
1795 #ifndef CONFIG_USER_ONLY
1796             /* In system mode, don't allow userspace access to semihosting,
1797              * to provide some semblance of security (and for consistency
1798              * with our 32-bit semihosting).
1799              */
1800             if (s->current_el == 0) {
1801                 unsupported_encoding(s, insn);
1802                 break;
1803             }
1804 #endif
1805             gen_exception_internal_insn(s, 0, EXCP_SEMIHOST);
1806         } else {
1807             unsupported_encoding(s, insn);
1808         }
1809         break;
1810     case 5:
1811         if (op2_ll < 1 || op2_ll > 3) {
1812             unallocated_encoding(s);
1813             break;
1814         }
1815         /* DCPS1, DCPS2, DCPS3 */
1816         unsupported_encoding(s, insn);
1817         break;
1818     default:
1819         unallocated_encoding(s);
1820         break;
1821     }
1822 }
1823
1824 /* Unconditional branch (register)
1825  *  31           25 24   21 20   16 15   10 9    5 4     0
1826  * +---------------+-------+-------+-------+------+-------+
1827  * | 1 1 0 1 0 1 1 |  opc  |  op2  |  op3  |  Rn  |  op4  |
1828  * +---------------+-------+-------+-------+------+-------+
1829  */
1830 static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
1831 {
1832     unsigned int opc, op2, op3, rn, op4;
1833
1834     opc = extract32(insn, 21, 4);
1835     op2 = extract32(insn, 16, 5);
1836     op3 = extract32(insn, 10, 6);
1837     rn = extract32(insn, 5, 5);
1838     op4 = extract32(insn, 0, 5);
1839
1840     if (op4 != 0x0 || op3 != 0x0 || op2 != 0x1f) {
1841         unallocated_encoding(s);
1842         return;
1843     }
1844
1845     switch (opc) {
1846     case 0: /* BR */
1847     case 1: /* BLR */
1848     case 2: /* RET */
1849         gen_a64_set_pc(s, cpu_reg(s, rn));
1850         /* BLR also needs to load return address */
1851         if (opc == 1) {
1852             tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1853         }
1854         break;
1855     case 4: /* ERET */
1856         if (s->current_el == 0) {
1857             unallocated_encoding(s);
1858             return;
1859         }
1860         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
1861             gen_io_start();
1862         }
1863         gen_helper_exception_return(cpu_env);
1864         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
1865             gen_io_end();
1866         }
1867         /* Must exit loop to check un-masked IRQs */
1868         s->base.is_jmp = DISAS_EXIT;
1869         return;
1870     case 5: /* DRPS */
1871         if (rn != 0x1f) {
1872             unallocated_encoding(s);
1873         } else {
1874             unsupported_encoding(s, insn);
1875         }
1876         return;
1877     default:
1878         unallocated_encoding(s);
1879         return;
1880     }
1881
1882     s->base.is_jmp = DISAS_JUMP;
1883 }
1884
1885 /* Branches, exception generating and system instructions */
1886 static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
1887 {
1888     switch (extract32(insn, 25, 7)) {
1889     case 0x0a: case 0x0b:
1890     case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
1891         disas_uncond_b_imm(s, insn);
1892         break;
1893     case 0x1a: case 0x5a: /* Compare & branch (immediate) */
1894         disas_comp_b_imm(s, insn);
1895         break;
1896     case 0x1b: case 0x5b: /* Test & branch (immediate) */
1897         disas_test_b_imm(s, insn);
1898         break;
1899     case 0x2a: /* Conditional branch (immediate) */
1900         disas_cond_b_imm(s, insn);
1901         break;
1902     case 0x6a: /* Exception generation / System */
1903         if (insn & (1 << 24)) {
1904             disas_system(s, insn);
1905         } else {
1906             disas_exc(s, insn);
1907         }
1908         break;
1909     case 0x6b: /* Unconditional branch (register) */
1910         disas_uncond_b_reg(s, insn);
1911         break;
1912     default:
1913         unallocated_encoding(s);
1914         break;
1915     }
1916 }
1917
1918 /*
1919  * Load/Store exclusive instructions are implemented by remembering
1920  * the value/address loaded, and seeing if these are the same
1921  * when the store is performed. This is not actually the architecturally
1922  * mandated semantics, but it works for typical guest code sequences
1923  * and avoids having to monitor regular stores.
1924  *
1925  * The store exclusive uses the atomic cmpxchg primitives to avoid
1926  * races in multi-threaded linux-user and when MTTCG softmmu is
1927  * enabled.
1928  */
1929 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
1930                                TCGv_i64 addr, int size, bool is_pair)
1931 {
1932     int idx = get_mem_index(s);
1933     TCGMemOp memop = s->be_data;
1934
1935     g_assert(size <= 3);
1936     if (is_pair) {
1937         g_assert(size >= 2);
1938         if (size == 2) {
1939             /* The pair must be single-copy atomic for the doubleword.  */
1940             memop |= MO_64 | MO_ALIGN;
1941             tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
1942             if (s->be_data == MO_LE) {
1943                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
1944                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
1945             } else {
1946                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
1947                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
1948             }
1949         } else {
1950             /* The pair must be single-copy atomic for *each* doubleword, not
1951                the entire quadword, however it must be quadword aligned.  */
1952             memop |= MO_64;
1953             tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx,
1954                                 memop | MO_ALIGN_16);
1955
1956             TCGv_i64 addr2 = tcg_temp_new_i64();
1957             tcg_gen_addi_i64(addr2, addr, 8);
1958             tcg_gen_qemu_ld_i64(cpu_exclusive_high, addr2, idx, memop);
1959             tcg_temp_free_i64(addr2);
1960
1961             tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
1962             tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
1963         }
1964     } else {
1965         memop |= size | MO_ALIGN;
1966         tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
1967         tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
1968     }
1969     tcg_gen_mov_i64(cpu_exclusive_addr, addr);
1970 }
1971
1972 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1973                                 TCGv_i64 addr, int size, int is_pair)
1974 {
1975     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
1976      *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
1977      *     [addr] = {Rt};
1978      *     if (is_pair) {
1979      *         [addr + datasize] = {Rt2};
1980      *     }
1981      *     {Rd} = 0;
1982      * } else {
1983      *     {Rd} = 1;
1984      * }
1985      * env->exclusive_addr = -1;
1986      */
1987     TCGLabel *fail_label = gen_new_label();
1988     TCGLabel *done_label = gen_new_label();
1989     TCGv_i64 tmp;
1990
1991     tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
1992
1993     tmp = tcg_temp_new_i64();
1994     if (is_pair) {
1995         if (size == 2) {
1996             if (s->be_data == MO_LE) {
1997                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
1998             } else {
1999                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
2000             }
2001             tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
2002                                        cpu_exclusive_val, tmp,
2003                                        get_mem_index(s),
2004                                        MO_64 | MO_ALIGN | s->be_data);
2005             tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2006         } else if (s->be_data == MO_LE) {
2007             if (tb_cflags(s->base.tb) & CF_PARALLEL) {
2008                 gen_helper_paired_cmpxchg64_le_parallel(tmp, cpu_env,
2009                                                         cpu_exclusive_addr,
2010                                                         cpu_reg(s, rt),
2011                                                         cpu_reg(s, rt2));
2012             } else {
2013                 gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr,
2014                                                cpu_reg(s, rt), cpu_reg(s, rt2));
2015             }
2016         } else {
2017             if (tb_cflags(s->base.tb) & CF_PARALLEL) {
2018                 gen_helper_paired_cmpxchg64_be_parallel(tmp, cpu_env,
2019                                                         cpu_exclusive_addr,
2020                                                         cpu_reg(s, rt),
2021                                                         cpu_reg(s, rt2));
2022             } else {
2023                 gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr,
2024                                                cpu_reg(s, rt), cpu_reg(s, rt2));
2025             }
2026         }
2027     } else {
2028         tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
2029                                    cpu_reg(s, rt), get_mem_index(s),
2030                                    size | MO_ALIGN | s->be_data);
2031         tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2032     }
2033     tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
2034     tcg_temp_free_i64(tmp);
2035     tcg_gen_br(done_label);
2036
2037     gen_set_label(fail_label);
2038     tcg_gen_movi_i64(cpu_reg(s, rd), 1);
2039     gen_set_label(done_label);
2040     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
2041 }
2042
2043 static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
2044                                  int rn, int size)
2045 {
2046     TCGv_i64 tcg_rs = cpu_reg(s, rs);
2047     TCGv_i64 tcg_rt = cpu_reg(s, rt);
2048     int memidx = get_mem_index(s);
2049     TCGv_i64 addr = cpu_reg_sp(s, rn);
2050
2051     if (rn == 31) {
2052         gen_check_sp_alignment(s);
2053     }
2054     tcg_gen_atomic_cmpxchg_i64(tcg_rs, addr, tcg_rs, tcg_rt, memidx,
2055                                size | MO_ALIGN | s->be_data);
2056 }
2057
2058 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
2059                                       int rn, int size)
2060 {
2061     TCGv_i64 s1 = cpu_reg(s, rs);
2062     TCGv_i64 s2 = cpu_reg(s, rs + 1);
2063     TCGv_i64 t1 = cpu_reg(s, rt);
2064     TCGv_i64 t2 = cpu_reg(s, rt + 1);
2065     TCGv_i64 addr = cpu_reg_sp(s, rn);
2066     int memidx = get_mem_index(s);
2067
2068     if (rn == 31) {
2069         gen_check_sp_alignment(s);
2070     }
2071
2072     if (size == 2) {
2073         TCGv_i64 cmp = tcg_temp_new_i64();
2074         TCGv_i64 val = tcg_temp_new_i64();
2075
2076         if (s->be_data == MO_LE) {
2077             tcg_gen_concat32_i64(val, t1, t2);
2078             tcg_gen_concat32_i64(cmp, s1, s2);
2079         } else {
2080             tcg_gen_concat32_i64(val, t2, t1);
2081             tcg_gen_concat32_i64(cmp, s2, s1);
2082         }
2083
2084         tcg_gen_atomic_cmpxchg_i64(cmp, addr, cmp, val, memidx,
2085                                    MO_64 | MO_ALIGN | s->be_data);
2086         tcg_temp_free_i64(val);
2087
2088         if (s->be_data == MO_LE) {
2089             tcg_gen_extr32_i64(s1, s2, cmp);
2090         } else {
2091             tcg_gen_extr32_i64(s2, s1, cmp);
2092         }
2093         tcg_temp_free_i64(cmp);
2094     } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
2095         TCGv_i32 tcg_rs = tcg_const_i32(rs);
2096
2097         if (s->be_data == MO_LE) {
2098             gen_helper_casp_le_parallel(cpu_env, tcg_rs, addr, t1, t2);
2099         } else {
2100             gen_helper_casp_be_parallel(cpu_env, tcg_rs, addr, t1, t2);
2101         }
2102         tcg_temp_free_i32(tcg_rs);
2103     } else {
2104         TCGv_i64 d1 = tcg_temp_new_i64();
2105         TCGv_i64 d2 = tcg_temp_new_i64();
2106         TCGv_i64 a2 = tcg_temp_new_i64();
2107         TCGv_i64 c1 = tcg_temp_new_i64();
2108         TCGv_i64 c2 = tcg_temp_new_i64();
2109         TCGv_i64 zero = tcg_const_i64(0);
2110
2111         /* Load the two words, in memory order.  */
2112         tcg_gen_qemu_ld_i64(d1, addr, memidx,
2113                             MO_64 | MO_ALIGN_16 | s->be_data);
2114         tcg_gen_addi_i64(a2, addr, 8);
2115         tcg_gen_qemu_ld_i64(d2, addr, memidx, MO_64 | s->be_data);
2116
2117         /* Compare the two words, also in memory order.  */
2118         tcg_gen_setcond_i64(TCG_COND_EQ, c1, d1, s1);
2119         tcg_gen_setcond_i64(TCG_COND_EQ, c2, d2, s2);
2120         tcg_gen_and_i64(c2, c2, c1);
2121
2122         /* If compare equal, write back new data, else write back old data.  */
2123         tcg_gen_movcond_i64(TCG_COND_NE, c1, c2, zero, t1, d1);
2124         tcg_gen_movcond_i64(TCG_COND_NE, c2, c2, zero, t2, d2);
2125         tcg_gen_qemu_st_i64(c1, addr, memidx, MO_64 | s->be_data);
2126         tcg_gen_qemu_st_i64(c2, a2, memidx, MO_64 | s->be_data);
2127         tcg_temp_free_i64(a2);
2128         tcg_temp_free_i64(c1);
2129         tcg_temp_free_i64(c2);
2130         tcg_temp_free_i64(zero);
2131
2132         /* Write back the data from memory to Rs.  */
2133         tcg_gen_mov_i64(s1, d1);
2134         tcg_gen_mov_i64(s2, d2);
2135         tcg_temp_free_i64(d1);
2136         tcg_temp_free_i64(d2);
2137     }
2138 }
2139
2140 /* Update the Sixty-Four bit (SF) registersize. This logic is derived
2141  * from the ARMv8 specs for LDR (Shared decode for all encodings).
2142  */
2143 static bool disas_ldst_compute_iss_sf(int size, bool is_signed, int opc)
2144 {
2145     int opc0 = extract32(opc, 0, 1);
2146     int regsize;
2147
2148     if (is_signed) {
2149         regsize = opc0 ? 32 : 64;
2150     } else {
2151         regsize = size == 3 ? 64 : 32;
2152     }
2153     return regsize == 64;
2154 }
2155
2156 /* Load/store exclusive
2157  *
2158  *  31 30 29         24  23  22   21  20  16  15  14   10 9    5 4    0
2159  * +-----+-------------+----+---+----+------+----+-------+------+------+
2160  * | sz  | 0 0 1 0 0 0 | o2 | L | o1 |  Rs  | o0 |  Rt2  |  Rn  | Rt   |
2161  * +-----+-------------+----+---+----+------+----+-------+------+------+
2162  *
2163  *  sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
2164  *   L: 0 -> store, 1 -> load
2165  *  o2: 0 -> exclusive, 1 -> not
2166  *  o1: 0 -> single register, 1 -> register pair
2167  *  o0: 1 -> load-acquire/store-release, 0 -> not
2168  */
2169 static void disas_ldst_excl(DisasContext *s, uint32_t insn)
2170 {
2171     int rt = extract32(insn, 0, 5);
2172     int rn = extract32(insn, 5, 5);
2173     int rt2 = extract32(insn, 10, 5);
2174     int rs = extract32(insn, 16, 5);
2175     int is_lasr = extract32(insn, 15, 1);
2176     int o2_L_o1_o0 = extract32(insn, 21, 3) * 2 | is_lasr;
2177     int size = extract32(insn, 30, 2);
2178     TCGv_i64 tcg_addr;
2179
2180     switch (o2_L_o1_o0) {
2181     case 0x0: /* STXR */
2182     case 0x1: /* STLXR */
2183         if (rn == 31) {
2184             gen_check_sp_alignment(s);
2185         }
2186         if (is_lasr) {
2187             tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2188         }
2189         tcg_addr = read_cpu_reg_sp(s, rn, 1);
2190         gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, false);
2191         return;
2192
2193     case 0x4: /* LDXR */
2194     case 0x5: /* LDAXR */
2195         if (rn == 31) {
2196             gen_check_sp_alignment(s);
2197         }
2198         tcg_addr = read_cpu_reg_sp(s, rn, 1);
2199         s->is_ldex = true;
2200         gen_load_exclusive(s, rt, rt2, tcg_addr, size, false);
2201         if (is_lasr) {
2202             tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2203         }
2204         return;
2205
2206     case 0x9: /* STLR */
2207         /* Generate ISS for non-exclusive accesses including LASR.  */
2208         if (rn == 31) {
2209             gen_check_sp_alignment(s);
2210         }
2211         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2212         tcg_addr = read_cpu_reg_sp(s, rn, 1);
2213         do_gpr_st(s, cpu_reg(s, rt), tcg_addr, size, true, rt,
2214                   disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
2215         return;
2216
2217     case 0xd: /* LDAR */
2218         /* Generate ISS for non-exclusive accesses including LASR.  */
2219         if (rn == 31) {
2220             gen_check_sp_alignment(s);
2221         }
2222         tcg_addr = read_cpu_reg_sp(s, rn, 1);
2223         do_gpr_ld(s, cpu_reg(s, rt), tcg_addr, size, false, false, true, rt,
2224                   disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
2225         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2226         return;
2227
2228     case 0x2: case 0x3: /* CASP / STXP */
2229         if (size & 2) { /* STXP / STLXP */
2230             if (rn == 31) {
2231                 gen_check_sp_alignment(s);
2232             }
2233             if (is_lasr) {
2234                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2235             }
2236             tcg_addr = read_cpu_reg_sp(s, rn, 1);
2237             gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, true);
2238             return;
2239         }
2240         if (rt2 == 31
2241             && ((rt | rs) & 1) == 0
2242             && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) {
2243             /* CASP / CASPL */
2244             gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
2245             return;
2246         }
2247         break;
2248
2249     case 0x6: case 0x7: /* CASPA / LDXP */
2250         if (size & 2) { /* LDXP / LDAXP */
2251             if (rn == 31) {
2252                 gen_check_sp_alignment(s);
2253             }
2254             tcg_addr = read_cpu_reg_sp(s, rn, 1);
2255             s->is_ldex = true;
2256             gen_load_exclusive(s, rt, rt2, tcg_addr, size, true);
2257             if (is_lasr) {
2258                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2259             }
2260             return;
2261         }
2262         if (rt2 == 31
2263             && ((rt | rs) & 1) == 0
2264             && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) {
2265             /* CASPA / CASPAL */
2266             gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
2267             return;
2268         }
2269         break;
2270
2271     case 0xa: /* CAS */
2272     case 0xb: /* CASL */
2273     case 0xe: /* CASA */
2274     case 0xf: /* CASAL */
2275         if (rt2 == 31 && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) {
2276             gen_compare_and_swap(s, rs, rt, rn, size);
2277             return;
2278         }
2279         break;
2280     }
2281     unallocated_encoding(s);
2282 }
2283
2284 /*
2285  * Load register (literal)
2286  *
2287  *  31 30 29   27  26 25 24 23                5 4     0
2288  * +-----+-------+---+-----+-------------------+-------+
2289  * | opc | 0 1 1 | V | 0 0 |     imm19         |  Rt   |
2290  * +-----+-------+---+-----+-------------------+-------+
2291  *
2292  * V: 1 -> vector (simd/fp)
2293  * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
2294  *                   10-> 32 bit signed, 11 -> prefetch
2295  * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
2296  */
2297 static void disas_ld_lit(DisasContext *s, uint32_t insn)
2298 {
2299     int rt = extract32(insn, 0, 5);
2300     int64_t imm = sextract32(insn, 5, 19) << 2;
2301     bool is_vector = extract32(insn, 26, 1);
2302     int opc = extract32(insn, 30, 2);
2303     bool is_signed = false;
2304     int size = 2;
2305     TCGv_i64 tcg_rt, tcg_addr;
2306
2307     if (is_vector) {
2308         if (opc == 3) {
2309             unallocated_encoding(s);
2310             return;
2311         }
2312         size = 2 + opc;
2313         if (!fp_access_check(s)) {
2314             return;
2315         }
2316     } else {
2317         if (opc == 3) {
2318             /* PRFM (literal) : prefetch */
2319             return;
2320         }
2321         size = 2 + extract32(opc, 0, 1);
2322         is_signed = extract32(opc, 1, 1);
2323     }
2324
2325     tcg_rt = cpu_reg(s, rt);
2326
2327     tcg_addr = tcg_const_i64((s->pc - 4) + imm);
2328     if (is_vector) {
2329         do_fp_ld(s, rt, tcg_addr, size);
2330     } else {
2331         /* Only unsigned 32bit loads target 32bit registers.  */
2332         bool iss_sf = opc != 0;
2333
2334         do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false,
2335                   true, rt, iss_sf, false);
2336     }
2337     tcg_temp_free_i64(tcg_addr);
2338 }
2339
2340 /*
2341  * LDNP (Load Pair - non-temporal hint)
2342  * LDP (Load Pair - non vector)
2343  * LDPSW (Load Pair Signed Word - non vector)
2344  * STNP (Store Pair - non-temporal hint)
2345  * STP (Store Pair - non vector)
2346  * LDNP (Load Pair of SIMD&FP - non-temporal hint)
2347  * LDP (Load Pair of SIMD&FP)
2348  * STNP (Store Pair of SIMD&FP - non-temporal hint)
2349  * STP (Store Pair of SIMD&FP)
2350  *
2351  *  31 30 29   27  26  25 24   23  22 21   15 14   10 9    5 4    0
2352  * +-----+-------+---+---+-------+---+-----------------------------+
2353  * | opc | 1 0 1 | V | 0 | index | L |  imm7 |  Rt2  |  Rn  | Rt   |
2354  * +-----+-------+---+---+-------+---+-------+-------+------+------+
2355  *
2356  * opc: LDP/STP/LDNP/STNP        00 -> 32 bit, 10 -> 64 bit
2357  *      LDPSW                    01
2358  *      LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
2359  *   V: 0 -> GPR, 1 -> Vector
2360  * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
2361  *      10 -> signed offset, 11 -> pre-index
2362  *   L: 0 -> Store 1 -> Load
2363  *
2364  * Rt, Rt2 = GPR or SIMD registers to be stored
2365  * Rn = general purpose register containing address
2366  * imm7 = signed offset (multiple of 4 or 8 depending on size)
2367  */
2368 static void disas_ldst_pair(DisasContext *s, uint32_t insn)
2369 {
2370     int rt = extract32(insn, 0, 5);
2371     int rn = extract32(insn, 5, 5);
2372     int rt2 = extract32(insn, 10, 5);
2373     uint64_t offset = sextract64(insn, 15, 7);
2374     int index = extract32(insn, 23, 2);
2375     bool is_vector = extract32(insn, 26, 1);
2376     bool is_load = extract32(insn, 22, 1);
2377     int opc = extract32(insn, 30, 2);
2378
2379     bool is_signed = false;
2380     bool postindex = false;
2381     bool wback = false;
2382
2383     TCGv_i64 tcg_addr; /* calculated address */
2384     int size;
2385
2386     if (opc == 3) {
2387         unallocated_encoding(s);
2388         return;
2389     }
2390
2391     if (is_vector) {
2392         size = 2 + opc;
2393     } else {
2394         size = 2 + extract32(opc, 1, 1);
2395         is_signed = extract32(opc, 0, 1);
2396         if (!is_load && is_signed) {
2397             unallocated_encoding(s);
2398             return;
2399         }
2400     }
2401
2402     switch (index) {
2403     case 1: /* post-index */
2404         postindex = true;
2405         wback = true;
2406         break;
2407     case 0:
2408         /* signed offset with "non-temporal" hint. Since we don't emulate
2409          * caches we don't care about hints to the cache system about
2410          * data access patterns, and handle this identically to plain
2411          * signed offset.
2412          */
2413         if (is_signed) {
2414             /* There is no non-temporal-hint version of LDPSW */
2415             unallocated_encoding(s);
2416             return;
2417         }
2418         postindex = false;
2419         break;
2420     case 2: /* signed offset, rn not updated */
2421         postindex = false;
2422         break;
2423     case 3: /* pre-index */
2424         postindex = false;
2425         wback = true;
2426         break;
2427     }
2428
2429     if (is_vector && !fp_access_check(s)) {
2430         return;
2431     }
2432
2433     offset <<= size;
2434
2435     if (rn == 31) {
2436         gen_check_sp_alignment(s);
2437     }
2438
2439     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2440
2441     if (!postindex) {
2442         tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2443     }
2444
2445     if (is_vector) {
2446         if (is_load) {
2447             do_fp_ld(s, rt, tcg_addr, size);
2448         } else {
2449             do_fp_st(s, rt, tcg_addr, size);
2450         }
2451         tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
2452         if (is_load) {
2453             do_fp_ld(s, rt2, tcg_addr, size);
2454         } else {
2455             do_fp_st(s, rt2, tcg_addr, size);
2456         }
2457     } else {
2458         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2459         TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
2460
2461         if (is_load) {
2462             TCGv_i64 tmp = tcg_temp_new_i64();
2463
2464             /* Do not modify tcg_rt before recognizing any exception
2465              * from the second load.
2466              */
2467             do_gpr_ld(s, tmp, tcg_addr, size, is_signed, false,
2468                       false, 0, false, false);
2469             tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
2470             do_gpr_ld(s, tcg_rt2, tcg_addr, size, is_signed, false,
2471                       false, 0, false, false);
2472
2473             tcg_gen_mov_i64(tcg_rt, tmp);
2474             tcg_temp_free_i64(tmp);
2475         } else {
2476             do_gpr_st(s, tcg_rt, tcg_addr, size,
2477                       false, 0, false, false);
2478             tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
2479             do_gpr_st(s, tcg_rt2, tcg_addr, size,
2480                       false, 0, false, false);
2481         }
2482     }
2483
2484     if (wback) {
2485         if (postindex) {
2486             tcg_gen_addi_i64(tcg_addr, tcg_addr, offset - (1 << size));
2487         } else {
2488             tcg_gen_subi_i64(tcg_addr, tcg_addr, 1 << size);
2489         }
2490         tcg_gen_mov_i64(cpu_reg_sp(s, rn), tcg_addr);
2491     }
2492 }
2493
2494 /*
2495  * Load/store (immediate post-indexed)
2496  * Load/store (immediate pre-indexed)
2497  * Load/store (unscaled immediate)
2498  *
2499  * 31 30 29   27  26 25 24 23 22 21  20    12 11 10 9    5 4    0
2500  * +----+-------+---+-----+-----+---+--------+-----+------+------+
2501  * |size| 1 1 1 | V | 0 0 | opc | 0 |  imm9  | idx |  Rn  |  Rt  |
2502  * +----+-------+---+-----+-----+---+--------+-----+------+------+
2503  *
2504  * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
2505          10 -> unprivileged
2506  * V = 0 -> non-vector
2507  * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
2508  * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2509  */
2510 static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
2511                                 int opc,
2512                                 int size,
2513                                 int rt,
2514                                 bool is_vector)
2515 {
2516     int rn = extract32(insn, 5, 5);
2517     int imm9 = sextract32(insn, 12, 9);
2518     int idx = extract32(insn, 10, 2);
2519     bool is_signed = false;
2520     bool is_store = false;
2521     bool is_extended = false;
2522     bool is_unpriv = (idx == 2);
2523     bool iss_valid = !is_vector;
2524     bool post_index;
2525     bool writeback;
2526
2527     TCGv_i64 tcg_addr;
2528
2529     if (is_vector) {
2530         size |= (opc & 2) << 1;
2531         if (size > 4 || is_unpriv) {
2532             unallocated_encoding(s);
2533             return;
2534         }
2535         is_store = ((opc & 1) == 0);
2536         if (!fp_access_check(s)) {
2537             return;
2538         }
2539     } else {
2540         if (size == 3 && opc == 2) {
2541             /* PRFM - prefetch */
2542             if (is_unpriv) {
2543                 unallocated_encoding(s);
2544                 return;
2545             }
2546             return;
2547         }
2548         if (opc == 3 && size > 1) {
2549             unallocated_encoding(s);
2550             return;
2551         }
2552         is_store = (opc == 0);
2553         is_signed = extract32(opc, 1, 1);
2554         is_extended = (size < 3) && extract32(opc, 0, 1);
2555     }
2556
2557     switch (idx) {
2558     case 0:
2559     case 2:
2560         post_index = false;
2561         writeback = false;
2562         break;
2563     case 1:
2564         post_index = true;
2565         writeback = true;
2566         break;
2567     case 3:
2568         post_index = false;
2569         writeback = true;
2570         break;
2571     default:
2572         g_assert_not_reached();
2573     }
2574
2575     if (rn == 31) {
2576         gen_check_sp_alignment(s);
2577     }
2578     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2579
2580     if (!post_index) {
2581         tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2582     }
2583
2584     if (is_vector) {
2585         if (is_store) {
2586             do_fp_st(s, rt, tcg_addr, size);
2587         } else {
2588             do_fp_ld(s, rt, tcg_addr, size);
2589         }
2590     } else {
2591         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2592         int memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
2593         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2594
2595         if (is_store) {
2596             do_gpr_st_memidx(s, tcg_rt, tcg_addr, size, memidx,
2597                              iss_valid, rt, iss_sf, false);
2598         } else {
2599             do_gpr_ld_memidx(s, tcg_rt, tcg_addr, size,
2600                              is_signed, is_extended, memidx,
2601                              iss_valid, rt, iss_sf, false);
2602         }
2603     }
2604
2605     if (writeback) {
2606         TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2607         if (post_index) {
2608             tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2609         }
2610         tcg_gen_mov_i64(tcg_rn, tcg_addr);
2611     }
2612 }
2613
2614 /*
2615  * Load/store (register offset)
2616  *
2617  * 31 30 29   27  26 25 24 23 22 21  20  16 15 13 12 11 10 9  5 4  0
2618  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2619  * |size| 1 1 1 | V | 0 0 | opc | 1 |  Rm  | opt | S| 1 0 | Rn | Rt |
2620  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2621  *
2622  * For non-vector:
2623  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2624  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2625  * For vector:
2626  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2627  *   opc<0>: 0 -> store, 1 -> load
2628  * V: 1 -> vector/simd
2629  * opt: extend encoding (see DecodeRegExtend)
2630  * S: if S=1 then scale (essentially index by sizeof(size))
2631  * Rt: register to transfer into/out of
2632  * Rn: address register or SP for base
2633  * Rm: offset register or ZR for offset
2634  */
2635 static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
2636                                    int opc,
2637                                    int size,
2638                                    int rt,
2639                                    bool is_vector)
2640 {
2641     int rn = extract32(insn, 5, 5);
2642     int shift = extract32(insn, 12, 1);
2643     int rm = extract32(insn, 16, 5);
2644     int opt = extract32(insn, 13, 3);
2645     bool is_signed = false;
2646     bool is_store = false;
2647     bool is_extended = false;
2648
2649     TCGv_i64 tcg_rm;
2650     TCGv_i64 tcg_addr;
2651
2652     if (extract32(opt, 1, 1) == 0) {
2653         unallocated_encoding(s);
2654         return;
2655     }
2656
2657     if (is_vector) {
2658         size |= (opc & 2) << 1;
2659         if (size > 4) {
2660             unallocated_encoding(s);
2661             return;
2662         }
2663         is_store = !extract32(opc, 0, 1);
2664         if (!fp_access_check(s)) {
2665             return;
2666         }
2667     } else {
2668         if (size == 3 && opc == 2) {
2669             /* PRFM - prefetch */
2670             return;
2671         }
2672         if (opc == 3 && size > 1) {
2673             unallocated_encoding(s);
2674             return;
2675         }
2676         is_store = (opc == 0);
2677         is_signed = extract32(opc, 1, 1);
2678         is_extended = (size < 3) && extract32(opc, 0, 1);
2679     }
2680
2681     if (rn == 31) {
2682         gen_check_sp_alignment(s);
2683     }
2684     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2685
2686     tcg_rm = read_cpu_reg(s, rm, 1);
2687     ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
2688
2689     tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_rm);
2690
2691     if (is_vector) {
2692         if (is_store) {
2693             do_fp_st(s, rt, tcg_addr, size);
2694         } else {
2695             do_fp_ld(s, rt, tcg_addr, size);
2696         }
2697     } else {
2698         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2699         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2700         if (is_store) {
2701             do_gpr_st(s, tcg_rt, tcg_addr, size,
2702                       true, rt, iss_sf, false);
2703         } else {
2704             do_gpr_ld(s, tcg_rt, tcg_addr, size,
2705                       is_signed, is_extended,
2706                       true, rt, iss_sf, false);
2707         }
2708     }
2709 }
2710
2711 /*
2712  * Load/store (unsigned immediate)
2713  *
2714  * 31 30 29   27  26 25 24 23 22 21        10 9     5
2715  * +----+-------+---+-----+-----+------------+-------+------+
2716  * |size| 1 1 1 | V | 0 1 | opc |   imm12    |  Rn   |  Rt  |
2717  * +----+-------+---+-----+-----+------------+-------+------+
2718  *
2719  * For non-vector:
2720  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2721  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2722  * For vector:
2723  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2724  *   opc<0>: 0 -> store, 1 -> load
2725  * Rn: base address register (inc SP)
2726  * Rt: target register
2727  */
2728 static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
2729                                         int opc,
2730                                         int size,
2731                                         int rt,
2732                                         bool is_vector)
2733 {
2734     int rn = extract32(insn, 5, 5);
2735     unsigned int imm12 = extract32(insn, 10, 12);
2736     unsigned int offset;
2737
2738     TCGv_i64 tcg_addr;
2739
2740     bool is_store;
2741     bool is_signed = false;
2742     bool is_extended = false;
2743
2744     if (is_vector) {
2745         size |= (opc & 2) << 1;
2746         if (size > 4) {
2747             unallocated_encoding(s);
2748             return;
2749         }
2750         is_store = !extract32(opc, 0, 1);
2751         if (!fp_access_check(s)) {
2752             return;
2753         }
2754     } else {
2755         if (size == 3 && opc == 2) {
2756             /* PRFM - prefetch */
2757             return;
2758         }
2759         if (opc == 3 && size > 1) {
2760             unallocated_encoding(s);
2761             return;
2762         }
2763         is_store = (opc == 0);
2764         is_signed = extract32(opc, 1, 1);
2765         is_extended = (size < 3) && extract32(opc, 0, 1);
2766     }
2767
2768     if (rn == 31) {
2769         gen_check_sp_alignment(s);
2770     }
2771     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2772     offset = imm12 << size;
2773     tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2774
2775     if (is_vector) {
2776         if (is_store) {
2777             do_fp_st(s, rt, tcg_addr, size);
2778         } else {
2779             do_fp_ld(s, rt, tcg_addr, size);
2780         }
2781     } else {
2782         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2783         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2784         if (is_store) {
2785             do_gpr_st(s, tcg_rt, tcg_addr, size,
2786                       true, rt, iss_sf, false);
2787         } else {
2788             do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended,
2789                       true, rt, iss_sf, false);
2790         }
2791     }
2792 }
2793
2794 /* Atomic memory operations
2795  *
2796  *  31  30      27  26    24    22  21   16   15    12    10    5     0
2797  * +------+-------+---+-----+-----+---+----+----+-----+-----+----+-----+
2798  * | size | 1 1 1 | V | 0 0 | A R | 1 | Rs | o3 | opc | 0 0 | Rn |  Rt |
2799  * +------+-------+---+-----+-----+--------+----+-----+-----+----+-----+
2800  *
2801  * Rt: the result register
2802  * Rn: base address or SP
2803  * Rs: the source register for the operation
2804  * V: vector flag (always 0 as of v8.3)
2805  * A: acquire flag
2806  * R: release flag
2807  */
2808 static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
2809                               int size, int rt, bool is_vector)
2810 {
2811     int rs = extract32(insn, 16, 5);
2812     int rn = extract32(insn, 5, 5);
2813     int o3_opc = extract32(insn, 12, 4);
2814     int feature = ARM_FEATURE_V8_ATOMICS;
2815     TCGv_i64 tcg_rn, tcg_rs;
2816     AtomicThreeOpFn *fn;
2817
2818     if (is_vector) {
2819         unallocated_encoding(s);
2820         return;
2821     }
2822     switch (o3_opc) {
2823     case 000: /* LDADD */
2824         fn = tcg_gen_atomic_fetch_add_i64;
2825         break;
2826     case 001: /* LDCLR */
2827         fn = tcg_gen_atomic_fetch_and_i64;
2828         break;
2829     case 002: /* LDEOR */
2830         fn = tcg_gen_atomic_fetch_xor_i64;
2831         break;
2832     case 003: /* LDSET */
2833         fn = tcg_gen_atomic_fetch_or_i64;
2834         break;
2835     case 004: /* LDSMAX */
2836         fn = tcg_gen_atomic_fetch_smax_i64;
2837         break;
2838     case 005: /* LDSMIN */
2839         fn = tcg_gen_atomic_fetch_smin_i64;
2840         break;
2841     case 006: /* LDUMAX */
2842         fn = tcg_gen_atomic_fetch_umax_i64;
2843         break;
2844     case 007: /* LDUMIN */
2845         fn = tcg_gen_atomic_fetch_umin_i64;
2846         break;
2847     case 010: /* SWP */
2848         fn = tcg_gen_atomic_xchg_i64;
2849         break;
2850     default:
2851         unallocated_encoding(s);
2852         return;
2853     }
2854     if (!arm_dc_feature(s, feature)) {
2855         unallocated_encoding(s);
2856         return;
2857     }
2858
2859     if (rn == 31) {
2860         gen_check_sp_alignment(s);
2861     }
2862     tcg_rn = cpu_reg_sp(s, rn);
2863     tcg_rs = read_cpu_reg(s, rs, true);
2864
2865     if (o3_opc == 1) { /* LDCLR */
2866         tcg_gen_not_i64(tcg_rs, tcg_rs);
2867     }
2868
2869     /* The tcg atomic primitives are all full barriers.  Therefore we
2870      * can ignore the Acquire and Release bits of this instruction.
2871      */
2872     fn(cpu_reg(s, rt), tcg_rn, tcg_rs, get_mem_index(s),
2873        s->be_data | size | MO_ALIGN);
2874 }
2875
2876 /* Load/store register (all forms) */
2877 static void disas_ldst_reg(DisasContext *s, uint32_t insn)
2878 {
2879     int rt = extract32(insn, 0, 5);
2880     int opc = extract32(insn, 22, 2);
2881     bool is_vector = extract32(insn, 26, 1);
2882     int size = extract32(insn, 30, 2);
2883
2884     switch (extract32(insn, 24, 2)) {
2885     case 0:
2886         if (extract32(insn, 21, 1) == 0) {
2887             /* Load/store register (unscaled immediate)
2888              * Load/store immediate pre/post-indexed
2889              * Load/store register unprivileged
2890              */
2891             disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector);
2892             return;
2893         }
2894         switch (extract32(insn, 10, 2)) {
2895         case 0:
2896             disas_ldst_atomic(s, insn, size, rt, is_vector);
2897             return;
2898         case 2:
2899             disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector);
2900             return;
2901         }
2902         break;
2903     case 1:
2904         disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector);
2905         return;
2906     }
2907     unallocated_encoding(s);
2908 }
2909
2910 /* AdvSIMD load/store multiple structures
2911  *
2912  *  31  30  29           23 22  21         16 15    12 11  10 9    5 4    0
2913  * +---+---+---------------+---+-------------+--------+------+------+------+
2914  * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size |  Rn  |  Rt  |
2915  * +---+---+---------------+---+-------------+--------+------+------+------+
2916  *
2917  * AdvSIMD load/store multiple structures (post-indexed)
2918  *
2919  *  31  30  29           23 22  21  20     16 15    12 11  10 9    5 4    0
2920  * +---+---+---------------+---+---+---------+--------+------+------+------+
2921  * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 |   Rm    | opcode | size |  Rn  |  Rt  |
2922  * +---+---+---------------+---+---+---------+--------+------+------+------+
2923  *
2924  * Rt: first (or only) SIMD&FP register to be transferred
2925  * Rn: base address or SP
2926  * Rm (post-index only): post-index register (when !31) or size dependent #imm
2927  */
2928 static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
2929 {
2930     int rt = extract32(insn, 0, 5);
2931     int rn = extract32(insn, 5, 5);
2932     int size = extract32(insn, 10, 2);
2933     int opcode = extract32(insn, 12, 4);
2934     bool is_store = !extract32(insn, 22, 1);
2935     bool is_postidx = extract32(insn, 23, 1);
2936     bool is_q = extract32(insn, 30, 1);
2937     TCGv_i64 tcg_addr, tcg_rn;
2938
2939     int ebytes = 1 << size;
2940     int elements = (is_q ? 128 : 64) / (8 << size);
2941     int rpt;    /* num iterations */
2942     int selem;  /* structure elements */
2943     int r;
2944
2945     if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
2946         unallocated_encoding(s);
2947         return;
2948     }
2949
2950     /* From the shared decode logic */
2951     switch (opcode) {
2952     case 0x0:
2953         rpt = 1;
2954         selem = 4;
2955         break;
2956     case 0x2:
2957         rpt = 4;
2958         selem = 1;
2959         break;
2960     case 0x4:
2961         rpt = 1;
2962         selem = 3;
2963         break;
2964     case 0x6:
2965         rpt = 3;
2966         selem = 1;
2967         break;
2968     case 0x7:
2969         rpt = 1;
2970         selem = 1;
2971         break;
2972     case 0x8:
2973         rpt = 1;
2974         selem = 2;
2975         break;
2976     case 0xa:
2977         rpt = 2;
2978         selem = 1;
2979         break;
2980     default:
2981         unallocated_encoding(s);
2982         return;
2983     }
2984
2985     if (size == 3 && !is_q && selem != 1) {
2986         /* reserved */
2987         unallocated_encoding(s);
2988         return;
2989     }
2990
2991     if (!fp_access_check(s)) {
2992         return;
2993     }
2994
2995     if (rn == 31) {
2996         gen_check_sp_alignment(s);
2997     }
2998
2999     tcg_rn = cpu_reg_sp(s, rn);
3000     tcg_addr = tcg_temp_new_i64();
3001     tcg_gen_mov_i64(tcg_addr, tcg_rn);
3002
3003     for (r = 0; r < rpt; r++) {
3004         int e;
3005         for (e = 0; e < elements; e++) {
3006             int tt = (rt + r) % 32;
3007             int xs;
3008             for (xs = 0; xs < selem; xs++) {
3009                 if (is_store) {
3010                     do_vec_st(s, tt, e, tcg_addr, size);
3011                 } else {
3012                     do_vec_ld(s, tt, e, tcg_addr, size);
3013
3014                     /* For non-quad operations, setting a slice of the low
3015                      * 64 bits of the register clears the high 64 bits (in
3016                      * the ARM ARM pseudocode this is implicit in the fact
3017                      * that 'rval' is a 64 bit wide variable).
3018                      * For quad operations, we might still need to zero the
3019                      * high bits of SVE.  We optimize by noticing that we only
3020                      * need to do this the first time we touch a register.
3021                      */
3022                     if (e == 0 && (r == 0 || xs == selem - 1)) {
3023                         clear_vec_high(s, is_q, tt);
3024                     }
3025                 }
3026                 tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
3027                 tt = (tt + 1) % 32;
3028             }
3029         }
3030     }
3031
3032     if (is_postidx) {
3033         int rm = extract32(insn, 16, 5);
3034         if (rm == 31) {
3035             tcg_gen_mov_i64(tcg_rn, tcg_addr);
3036         } else {
3037             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
3038         }
3039     }
3040     tcg_temp_free_i64(tcg_addr);
3041 }
3042
3043 /* AdvSIMD load/store single structure
3044  *
3045  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
3046  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3047  * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size |  Rn  |  Rt  |
3048  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3049  *
3050  * AdvSIMD load/store single structure (post-indexed)
3051  *
3052  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
3053  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3054  * | 0 | Q | 0 0 1 1 0 1 1 | L R |     Rm    | opc | S | size |  Rn  |  Rt  |
3055  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3056  *
3057  * Rt: first (or only) SIMD&FP register to be transferred
3058  * Rn: base address or SP
3059  * Rm (post-index only): post-index register (when !31) or size dependent #imm
3060  * index = encoded in Q:S:size dependent on size
3061  *
3062  * lane_size = encoded in R, opc
3063  * transfer width = encoded in opc, S, size
3064  */
3065 static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
3066 {
3067     int rt = extract32(insn, 0, 5);
3068     int rn = extract32(insn, 5, 5);
3069     int size = extract32(insn, 10, 2);
3070     int S = extract32(insn, 12, 1);
3071     int opc = extract32(insn, 13, 3);
3072     int R = extract32(insn, 21, 1);
3073     int is_load = extract32(insn, 22, 1);
3074     int is_postidx = extract32(insn, 23, 1);
3075     int is_q = extract32(insn, 30, 1);
3076
3077     int scale = extract32(opc, 1, 2);
3078     int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
3079     bool replicate = false;
3080     int index = is_q << 3 | S << 2 | size;
3081     int ebytes, xs;
3082     TCGv_i64 tcg_addr, tcg_rn;
3083
3084     switch (scale) {
3085     case 3:
3086         if (!is_load || S) {
3087             unallocated_encoding(s);
3088             return;
3089         }
3090         scale = size;
3091         replicate = true;
3092         break;
3093     case 0:
3094         break;
3095     case 1:
3096         if (extract32(size, 0, 1)) {
3097             unallocated_encoding(s);
3098             return;
3099         }
3100         index >>= 1;
3101         break;
3102     case 2:
3103         if (extract32(size, 1, 1)) {
3104             unallocated_encoding(s);
3105             return;
3106         }
3107         if (!extract32(size, 0, 1)) {
3108             index >>= 2;
3109         } else {
3110             if (S) {
3111                 unallocated_encoding(s);
3112                 return;
3113             }
3114             index >>= 3;
3115             scale = 3;
3116         }
3117         break;
3118     default:
3119         g_assert_not_reached();
3120     }
3121
3122     if (!fp_access_check(s)) {
3123         return;
3124     }
3125
3126     ebytes = 1 << scale;
3127
3128     if (rn == 31) {
3129         gen_check_sp_alignment(s);
3130     }
3131
3132     tcg_rn = cpu_reg_sp(s, rn);
3133     tcg_addr = tcg_temp_new_i64();
3134     tcg_gen_mov_i64(tcg_addr, tcg_rn);
3135
3136     for (xs = 0; xs < selem; xs++) {
3137         if (replicate) {
3138             /* Load and replicate to all elements */
3139             uint64_t mulconst;
3140             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3141
3142             tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr,
3143                                 get_mem_index(s), s->be_data + scale);
3144             switch (scale) {
3145             case 0:
3146                 mulconst = 0x0101010101010101ULL;
3147                 break;
3148             case 1:
3149                 mulconst = 0x0001000100010001ULL;
3150                 break;
3151             case 2:
3152                 mulconst = 0x0000000100000001ULL;
3153                 break;
3154             case 3:
3155                 mulconst = 0;
3156                 break;
3157             default:
3158                 g_assert_not_reached();
3159             }
3160             if (mulconst) {
3161                 tcg_gen_muli_i64(tcg_tmp, tcg_tmp, mulconst);
3162             }
3163             write_vec_element(s, tcg_tmp, rt, 0, MO_64);
3164             if (is_q) {
3165                 write_vec_element(s, tcg_tmp, rt, 1, MO_64);
3166             }
3167             tcg_temp_free_i64(tcg_tmp);
3168             clear_vec_high(s, is_q, rt);
3169         } else {
3170             /* Load/store one element per register */
3171             if (is_load) {
3172                 do_vec_ld(s, rt, index, tcg_addr, scale);
3173             } else {
3174                 do_vec_st(s, rt, index, tcg_addr, scale);
3175             }
3176         }
3177         tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
3178         rt = (rt + 1) % 32;
3179     }
3180
3181     if (is_postidx) {
3182         int rm = extract32(insn, 16, 5);
3183         if (rm == 31) {
3184             tcg_gen_mov_i64(tcg_rn, tcg_addr);
3185         } else {
3186             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
3187         }
3188     }
3189     tcg_temp_free_i64(tcg_addr);
3190 }
3191
3192 /* Loads and stores */
3193 static void disas_ldst(DisasContext *s, uint32_t insn)
3194 {
3195     switch (extract32(insn, 24, 6)) {
3196     case 0x08: /* Load/store exclusive */
3197         disas_ldst_excl(s, insn);
3198         break;
3199     case 0x18: case 0x1c: /* Load register (literal) */
3200         disas_ld_lit(s, insn);
3201         break;
3202     case 0x28: case 0x29:
3203     case 0x2c: case 0x2d: /* Load/store pair (all forms) */
3204         disas_ldst_pair(s, insn);
3205         break;
3206     case 0x38: case 0x39:
3207     case 0x3c: case 0x3d: /* Load/store register (all forms) */
3208         disas_ldst_reg(s, insn);
3209         break;
3210     case 0x0c: /* AdvSIMD load/store multiple structures */
3211         disas_ldst_multiple_struct(s, insn);
3212         break;
3213     case 0x0d: /* AdvSIMD load/store single structure */
3214         disas_ldst_single_struct(s, insn);
3215         break;
3216     default:
3217         unallocated_encoding(s);
3218         break;
3219     }
3220 }
3221
3222 /* PC-rel. addressing
3223  *   31  30   29 28       24 23                5 4    0
3224  * +----+-------+-----------+-------------------+------+
3225  * | op | immlo | 1 0 0 0 0 |       immhi       |  Rd  |
3226  * +----+-------+-----------+-------------------+------+
3227  */
3228 static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
3229 {
3230     unsigned int page, rd;
3231     uint64_t base;
3232     uint64_t offset;
3233
3234     page = extract32(insn, 31, 1);
3235     /* SignExtend(immhi:immlo) -> offset */
3236     offset = sextract64(insn, 5, 19);
3237     offset = offset << 2 | extract32(insn, 29, 2);
3238     rd = extract32(insn, 0, 5);
3239     base = s->pc - 4;
3240
3241     if (page) {
3242         /* ADRP (page based) */
3243         base &= ~0xfff;
3244         offset <<= 12;
3245     }
3246
3247     tcg_gen_movi_i64(cpu_reg(s, rd), base + offset);
3248 }
3249
3250 /*
3251  * Add/subtract (immediate)
3252  *
3253  *  31 30 29 28       24 23 22 21         10 9   5 4   0
3254  * +--+--+--+-----------+-----+-------------+-----+-----+
3255  * |sf|op| S| 1 0 0 0 1 |shift|    imm12    |  Rn | Rd  |
3256  * +--+--+--+-----------+-----+-------------+-----+-----+
3257  *
3258  *    sf: 0 -> 32bit, 1 -> 64bit
3259  *    op: 0 -> add  , 1 -> sub
3260  *     S: 1 -> set flags
3261  * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12
3262  */
3263 static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
3264 {
3265     int rd = extract32(insn, 0, 5);
3266     int rn = extract32(insn, 5, 5);
3267     uint64_t imm = extract32(insn, 10, 12);
3268     int shift = extract32(insn, 22, 2);
3269     bool setflags = extract32(insn, 29, 1);
3270     bool sub_op = extract32(insn, 30, 1);
3271     bool is_64bit = extract32(insn, 31, 1);
3272
3273     TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
3274     TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
3275     TCGv_i64 tcg_result;
3276
3277     switch (shift) {
3278     case 0x0:
3279         break;
3280     case 0x1:
3281         imm <<= 12;
3282         break;
3283     default:
3284         unallocated_encoding(s);
3285         return;
3286     }
3287
3288     tcg_result = tcg_temp_new_i64();
3289     if (!setflags) {
3290         if (sub_op) {
3291             tcg_gen_subi_i64(tcg_result, tcg_rn, imm);
3292         } else {
3293             tcg_gen_addi_i64(tcg_result, tcg_rn, imm);
3294         }
3295     } else {
3296         TCGv_i64 tcg_imm = tcg_const_i64(imm);
3297         if (sub_op) {
3298             gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
3299         } else {
3300             gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
3301         }
3302         tcg_temp_free_i64(tcg_imm);
3303     }
3304
3305     if (is_64bit) {
3306         tcg_gen_mov_i64(tcg_rd, tcg_result);
3307     } else {
3308         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3309     }
3310
3311     tcg_temp_free_i64(tcg_result);
3312 }
3313
3314 /* The input should be a value in the bottom e bits (with higher
3315  * bits zero); returns that value replicated into every element
3316  * of size e in a 64 bit integer.
3317  */
3318 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
3319 {
3320     assert(e != 0);
3321     while (e < 64) {
3322         mask |= mask << e;
3323         e *= 2;
3324     }
3325     return mask;
3326 }
3327
3328 /* Return a value with the bottom len bits set (where 0 < len <= 64) */
3329 static inline uint64_t bitmask64(unsigned int length)
3330 {
3331     assert(length > 0 && length <= 64);
3332     return ~0ULL >> (64 - length);
3333 }
3334
3335 /* Simplified variant of pseudocode DecodeBitMasks() for the case where we
3336  * only require the wmask. Returns false if the imms/immr/immn are a reserved
3337  * value (ie should cause a guest UNDEF exception), and true if they are
3338  * valid, in which case the decoded bit pattern is written to result.
3339  */
3340 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
3341                             unsigned int imms, unsigned int immr)
3342 {
3343     uint64_t mask;
3344     unsigned e, levels, s, r;
3345     int len;
3346
3347     assert(immn < 2 && imms < 64 && immr < 64);
3348
3349     /* The bit patterns we create here are 64 bit patterns which
3350      * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
3351      * 64 bits each. Each element contains the same value: a run
3352      * of between 1 and e-1 non-zero bits, rotated within the
3353      * element by between 0 and e-1 bits.
3354      *
3355      * The element size and run length are encoded into immn (1 bit)
3356      * and imms (6 bits) as follows:
3357      * 64 bit elements: immn = 1, imms = <length of run - 1>
3358      * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
3359      * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
3360      *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
3361      *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
3362      *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
3363      * Notice that immn = 0, imms = 11111x is the only combination
3364      * not covered by one of the above options; this is reserved.
3365      * Further, <length of run - 1> all-ones is a reserved pattern.
3366      *
3367      * In all cases the rotation is by immr % e (and immr is 6 bits).
3368      */
3369
3370     /* First determine the element size */
3371     len = 31 - clz32((immn << 6) | (~imms & 0x3f));
3372     if (len < 1) {
3373         /* This is the immn == 0, imms == 0x11111x case */
3374         return false;
3375     }
3376     e = 1 << len;
3377
3378     levels = e - 1;
3379     s = imms & levels;
3380     r = immr & levels;
3381
3382     if (s == levels) {
3383         /* <length of run - 1> mustn't be all-ones. */
3384         return false;
3385     }
3386
3387     /* Create the value of one element: s+1 set bits rotated
3388      * by r within the element (which is e bits wide)...
3389      */
3390     mask = bitmask64(s + 1);
3391     if (r) {
3392         mask = (mask >> r) | (mask << (e - r));
3393         mask &= bitmask64(e);
3394     }
3395     /* ...then replicate the element over the whole 64 bit value */
3396     mask = bitfield_replicate(mask, e);
3397     *result = mask;
3398     return true;
3399 }
3400
3401 /* Logical (immediate)
3402  *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
3403  * +----+-----+-------------+---+------+------+------+------+
3404  * | sf | opc | 1 0 0 1 0 0 | N | immr | imms |  Rn  |  Rd  |
3405  * +----+-----+-------------+---+------+------+------+------+
3406  */
3407 static void disas_logic_imm(DisasContext *s, uint32_t insn)
3408 {
3409     unsigned int sf, opc, is_n, immr, imms, rn, rd;
3410     TCGv_i64 tcg_rd, tcg_rn;
3411     uint64_t wmask;
3412     bool is_and = false;
3413
3414     sf = extract32(insn, 31, 1);
3415     opc = extract32(insn, 29, 2);
3416     is_n = extract32(insn, 22, 1);
3417     immr = extract32(insn, 16, 6);
3418     imms = extract32(insn, 10, 6);
3419     rn = extract32(insn, 5, 5);
3420     rd = extract32(insn, 0, 5);
3421
3422     if (!sf && is_n) {
3423         unallocated_encoding(s);
3424         return;
3425     }
3426
3427     if (opc == 0x3) { /* ANDS */
3428         tcg_rd = cpu_reg(s, rd);
3429     } else {
3430         tcg_rd = cpu_reg_sp(s, rd);
3431     }
3432     tcg_rn = cpu_reg(s, rn);
3433
3434     if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) {
3435         /* some immediate field values are reserved */
3436         unallocated_encoding(s);
3437         return;
3438     }
3439
3440     if (!sf) {
3441         wmask &= 0xffffffff;
3442     }
3443
3444     switch (opc) {
3445     case 0x3: /* ANDS */
3446     case 0x0: /* AND */
3447         tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask);
3448         is_and = true;
3449         break;
3450     case 0x1: /* ORR */
3451         tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask);
3452         break;
3453     case 0x2: /* EOR */
3454         tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask);
3455         break;
3456     default:
3457         assert(FALSE); /* must handle all above */
3458         break;
3459     }
3460
3461     if (!sf && !is_and) {
3462         /* zero extend final result; we know we can skip this for AND
3463          * since the immediate had the high 32 bits clear.
3464          */
3465         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3466     }
3467
3468     if (opc == 3) { /* ANDS */
3469         gen_logic_CC(sf, tcg_rd);
3470     }
3471 }
3472
3473 /*
3474  * Move wide (immediate)
3475  *
3476  *  31 30 29 28         23 22 21 20             5 4    0
3477  * +--+-----+-------------+-----+----------------+------+
3478  * |sf| opc | 1 0 0 1 0 1 |  hw |  imm16         |  Rd  |
3479  * +--+-----+-------------+-----+----------------+------+
3480  *
3481  * sf: 0 -> 32 bit, 1 -> 64 bit
3482  * opc: 00 -> N, 10 -> Z, 11 -> K
3483  * hw: shift/16 (0,16, and sf only 32, 48)
3484  */
3485 static void disas_movw_imm(DisasContext *s, uint32_t insn)
3486 {
3487     int rd = extract32(insn, 0, 5);
3488     uint64_t imm = extract32(insn, 5, 16);
3489     int sf = extract32(insn, 31, 1);
3490     int opc = extract32(insn, 29, 2);
3491     int pos = extract32(insn, 21, 2) << 4;
3492     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3493     TCGv_i64 tcg_imm;
3494
3495     if (!sf && (pos >= 32)) {
3496         unallocated_encoding(s);
3497         return;
3498     }
3499
3500     switch (opc) {
3501     case 0: /* MOVN */
3502     case 2: /* MOVZ */
3503         imm <<= pos;
3504         if (opc == 0) {
3505             imm = ~imm;
3506         }
3507         if (!sf) {
3508             imm &= 0xffffffffu;
3509         }
3510         tcg_gen_movi_i64(tcg_rd, imm);
3511         break;
3512     case 3: /* MOVK */
3513         tcg_imm = tcg_const_i64(imm);
3514         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_imm, pos, 16);
3515         tcg_temp_free_i64(tcg_imm);
3516         if (!sf) {
3517             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3518         }
3519         break;
3520     default:
3521         unallocated_encoding(s);
3522         break;
3523     }
3524 }
3525
3526 /* Bitfield
3527  *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
3528  * +----+-----+-------------+---+------+------+------+------+
3529  * | sf | opc | 1 0 0 1 1 0 | N | immr | imms |  Rn  |  Rd  |
3530  * +----+-----+-------------+---+------+------+------+------+
3531  */
3532 static void disas_bitfield(DisasContext *s, uint32_t insn)
3533 {
3534     unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len;
3535     TCGv_i64 tcg_rd, tcg_tmp;
3536
3537     sf = extract32(insn, 31, 1);
3538     opc = extract32(insn, 29, 2);
3539     n = extract32(insn, 22, 1);
3540     ri = extract32(insn, 16, 6);
3541     si = extract32(insn, 10, 6);
3542     rn = extract32(insn, 5, 5);
3543     rd = extract32(insn, 0, 5);
3544     bitsize = sf ? 64 : 32;
3545
3546     if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) {
3547         unallocated_encoding(s);
3548         return;
3549     }
3550
3551     tcg_rd = cpu_reg(s, rd);
3552
3553     /* Suppress the zero-extend for !sf.  Since RI and SI are constrained
3554        to be smaller than bitsize, we'll never reference data outside the
3555        low 32-bits anyway.  */
3556     tcg_tmp = read_cpu_reg(s, rn, 1);
3557
3558     /* Recognize simple(r) extractions.  */
3559     if (si >= ri) {
3560         /* Wd<s-r:0> = Wn<s:r> */
3561         len = (si - ri) + 1;
3562         if (opc == 0) { /* SBFM: ASR, SBFX, SXTB, SXTH, SXTW */
3563             tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
3564             goto done;
3565         } else if (opc == 2) { /* UBFM: UBFX, LSR, UXTB, UXTH */
3566             tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
3567             return;
3568         }
3569         /* opc == 1, BXFIL fall through to deposit */
3570         tcg_gen_extract_i64(tcg_tmp, tcg_tmp, ri, len);
3571         pos = 0;
3572     } else {
3573         /* Handle the ri > si case with a deposit
3574          * Wd<32+s-r,32-r> = Wn<s:0>
3575          */
3576         len = si + 1;
3577         pos = (bitsize - ri) & (bitsize - 1);
3578     }
3579
3580     if (opc == 0 && len < ri) {
3581         /* SBFM: sign extend the destination field from len to fill
3582            the balance of the word.  Let the deposit below insert all
3583            of those sign bits.  */
3584         tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
3585         len = ri;
3586     }
3587
3588     if (opc == 1) { /* BFM, BXFIL */
3589         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
3590     } else {
3591         /* SBFM or UBFM: We start with zero, and we haven't modified
3592            any bits outside bitsize, therefore the zero-extension
3593            below is unneeded.  */
3594         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
3595         return;
3596     }
3597
3598  done:
3599     if (!sf) { /* zero extend final result */
3600         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3601     }
3602 }
3603
3604 /* Extract
3605  *   31  30  29 28         23 22   21  20  16 15    10 9    5 4    0
3606  * +----+------+-------------+---+----+------+--------+------+------+
3607  * | sf | op21 | 1 0 0 1 1 1 | N | o0 |  Rm  |  imms  |  Rn  |  Rd  |
3608  * +----+------+-------------+---+----+------+--------+------+------+
3609  */
3610 static void disas_extract(DisasContext *s, uint32_t insn)
3611 {
3612     unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0;
3613
3614     sf = extract32(insn, 31, 1);
3615     n = extract32(insn, 22, 1);
3616     rm = extract32(insn, 16, 5);
3617     imm = extract32(insn, 10, 6);
3618     rn = extract32(insn, 5, 5);
3619     rd = extract32(insn, 0, 5);
3620     op21 = extract32(insn, 29, 2);
3621     op0 = extract32(insn, 21, 1);
3622     bitsize = sf ? 64 : 32;
3623
3624     if (sf != n || op21 || op0 || imm >= bitsize) {
3625         unallocated_encoding(s);
3626     } else {
3627         TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
3628
3629         tcg_rd = cpu_reg(s, rd);
3630
3631         if (unlikely(imm == 0)) {
3632             /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
3633              * so an extract from bit 0 is a special case.
3634              */
3635             if (sf) {
3636                 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm));
3637             } else {
3638                 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm));
3639             }
3640         } else if (rm == rn) { /* ROR */
3641             tcg_rm = cpu_reg(s, rm);
3642             if (sf) {
3643                 tcg_gen_rotri_i64(tcg_rd, tcg_rm, imm);
3644             } else {
3645                 TCGv_i32 tmp = tcg_temp_new_i32();
3646                 tcg_gen_extrl_i64_i32(tmp, tcg_rm);
3647                 tcg_gen_rotri_i32(tmp, tmp, imm);
3648                 tcg_gen_extu_i32_i64(tcg_rd, tmp);
3649                 tcg_temp_free_i32(tmp);
3650             }
3651         } else {
3652             tcg_rm = read_cpu_reg(s, rm, sf);
3653             tcg_rn = read_cpu_reg(s, rn, sf);
3654             tcg_gen_shri_i64(tcg_rm, tcg_rm, imm);
3655             tcg_gen_shli_i64(tcg_rn, tcg_rn, bitsize - imm);
3656             tcg_gen_or_i64(tcg_rd, tcg_rm, tcg_rn);
3657             if (!sf) {
3658                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3659             }
3660         }
3661     }
3662 }
3663
3664 /* Data processing - immediate */
3665 static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
3666 {
3667     switch (extract32(insn, 23, 6)) {
3668     case 0x20: case 0x21: /* PC-rel. addressing */
3669         disas_pc_rel_adr(s, insn);
3670         break;
3671     case 0x22: case 0x23: /* Add/subtract (immediate) */
3672         disas_add_sub_imm(s, insn);
3673         break;
3674     case 0x24: /* Logical (immediate) */
3675         disas_logic_imm(s, insn);
3676         break;
3677     case 0x25: /* Move wide (immediate) */
3678         disas_movw_imm(s, insn);
3679         break;
3680     case 0x26: /* Bitfield */
3681         disas_bitfield(s, insn);
3682         break;
3683     case 0x27: /* Extract */
3684         disas_extract(s, insn);
3685         break;
3686     default:
3687         unallocated_encoding(s);
3688         break;
3689     }
3690 }
3691
3692 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
3693  * Note that it is the caller's responsibility to ensure that the
3694  * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
3695  * mandated semantics for out of range shifts.
3696  */
3697 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
3698                       enum a64_shift_type shift_type, TCGv_i64 shift_amount)
3699 {
3700     switch (shift_type) {
3701     case A64_SHIFT_TYPE_LSL:
3702         tcg_gen_shl_i64(dst, src, shift_amount);
3703         break;
3704     case A64_SHIFT_TYPE_LSR:
3705         tcg_gen_shr_i64(dst, src, shift_amount);
3706         break;
3707     case A64_SHIFT_TYPE_ASR:
3708         if (!sf) {
3709             tcg_gen_ext32s_i64(dst, src);
3710         }
3711         tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
3712         break;
3713     case A64_SHIFT_TYPE_ROR:
3714         if (sf) {
3715             tcg_gen_rotr_i64(dst, src, shift_amount);
3716         } else {
3717             TCGv_i32 t0, t1;
3718             t0 = tcg_temp_new_i32();
3719             t1 = tcg_temp_new_i32();
3720             tcg_gen_extrl_i64_i32(t0, src);
3721             tcg_gen_extrl_i64_i32(t1, shift_amount);
3722             tcg_gen_rotr_i32(t0, t0, t1);
3723             tcg_gen_extu_i32_i64(dst, t0);
3724             tcg_temp_free_i32(t0);
3725             tcg_temp_free_i32(t1);
3726         }
3727         break;
3728     default:
3729         assert(FALSE); /* all shift types should be handled */
3730         break;
3731     }
3732
3733     if (!sf) { /* zero extend final result */
3734         tcg_gen_ext32u_i64(dst, dst);
3735     }
3736 }
3737
3738 /* Shift a TCGv src by immediate, put result in dst.
3739  * The shift amount must be in range (this should always be true as the
3740  * relevant instructions will UNDEF on bad shift immediates).
3741  */
3742 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
3743                           enum a64_shift_type shift_type, unsigned int shift_i)
3744 {
3745     assert(shift_i < (sf ? 64 : 32));
3746
3747     if (shift_i == 0) {
3748         tcg_gen_mov_i64(dst, src);
3749     } else {
3750         TCGv_i64 shift_const;
3751
3752         shift_const = tcg_const_i64(shift_i);
3753         shift_reg(dst, src, sf, shift_type, shift_const);
3754         tcg_temp_free_i64(shift_const);
3755     }
3756 }
3757
3758 /* Logical (shifted register)
3759  *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
3760  * +----+-----+-----------+-------+---+------+--------+------+------+
3761  * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
3762  * +----+-----+-----------+-------+---+------+--------+------+------+
3763  */
3764 static void disas_logic_reg(DisasContext *s, uint32_t insn)
3765 {
3766     TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
3767     unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
3768
3769     sf = extract32(insn, 31, 1);
3770     opc = extract32(insn, 29, 2);
3771     shift_type = extract32(insn, 22, 2);
3772     invert = extract32(insn, 21, 1);
3773     rm = extract32(insn, 16, 5);
3774     shift_amount = extract32(insn, 10, 6);
3775     rn = extract32(insn, 5, 5);
3776     rd = extract32(insn, 0, 5);
3777
3778     if (!sf && (shift_amount & (1 << 5))) {
3779         unallocated_encoding(s);
3780         return;
3781     }
3782
3783     tcg_rd = cpu_reg(s, rd);
3784
3785     if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
3786         /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
3787          * register-register MOV and MVN, so it is worth special casing.
3788          */
3789         tcg_rm = cpu_reg(s, rm);
3790         if (invert) {
3791             tcg_gen_not_i64(tcg_rd, tcg_rm);
3792             if (!sf) {
3793                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3794             }
3795         } else {
3796             if (sf) {
3797                 tcg_gen_mov_i64(tcg_rd, tcg_rm);
3798             } else {
3799                 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
3800             }
3801         }
3802         return;
3803     }
3804
3805     tcg_rm = read_cpu_reg(s, rm, sf);
3806
3807     if (shift_amount) {
3808         shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
3809     }
3810
3811     tcg_rn = cpu_reg(s, rn);
3812
3813     switch (opc | (invert << 2)) {
3814     case 0: /* AND */
3815     case 3: /* ANDS */
3816         tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
3817         break;
3818     case 1: /* ORR */
3819         tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
3820         break;
3821     case 2: /* EOR */
3822         tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
3823         break;
3824     case 4: /* BIC */
3825     case 7: /* BICS */
3826         tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
3827         break;
3828     case 5: /* ORN */
3829         tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
3830         break;
3831     case 6: /* EON */
3832         tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
3833         break;
3834     default:
3835         assert(FALSE);
3836         break;
3837     }
3838
3839     if (!sf) {
3840         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3841     }
3842
3843     if (opc == 3) {
3844         gen_logic_CC(sf, tcg_rd);
3845     }
3846 }
3847
3848 /*
3849  * Add/subtract (extended register)
3850  *
3851  *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
3852  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3853  * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
3854  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3855  *
3856  *  sf: 0 -> 32bit, 1 -> 64bit
3857  *  op: 0 -> add  , 1 -> sub
3858  *   S: 1 -> set flags
3859  * opt: 00
3860  * option: extension type (see DecodeRegExtend)
3861  * imm3: optional shift to Rm
3862  *
3863  * Rd = Rn + LSL(extend(Rm), amount)
3864  */
3865 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
3866 {
3867     int rd = extract32(insn, 0, 5);
3868     int rn = extract32(insn, 5, 5);
3869     int imm3 = extract32(insn, 10, 3);
3870     int option = extract32(insn, 13, 3);
3871     int rm = extract32(insn, 16, 5);
3872     bool setflags = extract32(insn, 29, 1);
3873     bool sub_op = extract32(insn, 30, 1);
3874     bool sf = extract32(insn, 31, 1);
3875
3876     TCGv_i64 tcg_rm, tcg_rn; /* temps */
3877     TCGv_i64 tcg_rd;
3878     TCGv_i64 tcg_result;
3879
3880     if (imm3 > 4) {
3881         unallocated_encoding(s);
3882         return;
3883     }
3884
3885     /* non-flag setting ops may use SP */
3886     if (!setflags) {
3887         tcg_rd = cpu_reg_sp(s, rd);
3888     } else {
3889         tcg_rd = cpu_reg(s, rd);
3890     }
3891     tcg_rn = read_cpu_reg_sp(s, rn, sf);
3892
3893     tcg_rm = read_cpu_reg(s, rm, sf);
3894     ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
3895
3896     tcg_result = tcg_temp_new_i64();
3897
3898     if (!setflags) {
3899         if (sub_op) {
3900             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3901         } else {
3902             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3903         }
3904     } else {
3905         if (sub_op) {
3906             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3907         } else {
3908             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3909         }
3910     }
3911
3912     if (sf) {
3913         tcg_gen_mov_i64(tcg_rd, tcg_result);
3914     } else {
3915         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3916     }
3917
3918     tcg_temp_free_i64(tcg_result);
3919 }
3920
3921 /*
3922  * Add/subtract (shifted register)
3923  *
3924  *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
3925  * +--+--+--+-----------+-----+--+-------+---------+------+------+
3926  * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
3927  * +--+--+--+-----------+-----+--+-------+---------+------+------+
3928  *
3929  *    sf: 0 -> 32bit, 1 -> 64bit
3930  *    op: 0 -> add  , 1 -> sub
3931  *     S: 1 -> set flags
3932  * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
3933  *  imm6: Shift amount to apply to Rm before the add/sub
3934  */
3935 static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
3936 {
3937     int rd = extract32(insn, 0, 5);
3938     int rn = extract32(insn, 5, 5);
3939     int imm6 = extract32(insn, 10, 6);
3940     int rm = extract32(insn, 16, 5);
3941     int shift_type = extract32(insn, 22, 2);
3942     bool setflags = extract32(insn, 29, 1);
3943     bool sub_op = extract32(insn, 30, 1);
3944     bool sf = extract32(insn, 31, 1);
3945
3946     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3947     TCGv_i64 tcg_rn, tcg_rm;
3948     TCGv_i64 tcg_result;
3949
3950     if ((shift_type == 3) || (!sf && (imm6 > 31))) {
3951         unallocated_encoding(s);
3952         return;
3953     }
3954
3955     tcg_rn = read_cpu_reg(s, rn, sf);
3956     tcg_rm = read_cpu_reg(s, rm, sf);
3957
3958     shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
3959
3960     tcg_result = tcg_temp_new_i64();
3961
3962     if (!setflags) {
3963         if (sub_op) {
3964             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3965         } else {
3966             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3967         }
3968     } else {
3969         if (sub_op) {
3970             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3971         } else {
3972             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3973         }
3974     }
3975
3976     if (sf) {
3977         tcg_gen_mov_i64(tcg_rd, tcg_result);
3978     } else {
3979         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3980     }
3981
3982     tcg_temp_free_i64(tcg_result);
3983 }
3984
3985 /* Data-processing (3 source)
3986  *
3987  *    31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
3988  *  +--+------+-----------+------+------+----+------+------+------+
3989  *  |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
3990  *  +--+------+-----------+------+------+----+------+------+------+
3991  */
3992 static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
3993 {
3994     int rd = extract32(insn, 0, 5);
3995     int rn = extract32(insn, 5, 5);
3996     int ra = extract32(insn, 10, 5);
3997     int rm = extract32(insn, 16, 5);
3998     int op_id = (extract32(insn, 29, 3) << 4) |
3999         (extract32(insn, 21, 3) << 1) |
4000         extract32(insn, 15, 1);
4001     bool sf = extract32(insn, 31, 1);
4002     bool is_sub = extract32(op_id, 0, 1);
4003     bool is_high = extract32(op_id, 2, 1);
4004     bool is_signed = false;
4005     TCGv_i64 tcg_op1;
4006     TCGv_i64 tcg_op2;
4007     TCGv_i64 tcg_tmp;
4008
4009     /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
4010     switch (op_id) {
4011     case 0x42: /* SMADDL */
4012     case 0x43: /* SMSUBL */
4013     case 0x44: /* SMULH */
4014         is_signed = true;
4015         break;
4016     case 0x0: /* MADD (32bit) */
4017     case 0x1: /* MSUB (32bit) */
4018     case 0x40: /* MADD (64bit) */
4019     case 0x41: /* MSUB (64bit) */
4020     case 0x4a: /* UMADDL */
4021     case 0x4b: /* UMSUBL */
4022     case 0x4c: /* UMULH */
4023         break;
4024     default:
4025         unallocated_encoding(s);
4026         return;
4027     }
4028
4029     if (is_high) {
4030         TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
4031         TCGv_i64 tcg_rd = cpu_reg(s, rd);
4032         TCGv_i64 tcg_rn = cpu_reg(s, rn);
4033         TCGv_i64 tcg_rm = cpu_reg(s, rm);
4034
4035         if (is_signed) {
4036             tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
4037         } else {
4038             tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
4039         }
4040
4041         tcg_temp_free_i64(low_bits);
4042         return;
4043     }
4044
4045     tcg_op1 = tcg_temp_new_i64();
4046     tcg_op2 = tcg_temp_new_i64();
4047     tcg_tmp = tcg_temp_new_i64();
4048
4049     if (op_id < 0x42) {
4050         tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
4051         tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
4052     } else {
4053         if (is_signed) {
4054             tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
4055             tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
4056         } else {
4057             tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
4058             tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
4059         }
4060     }
4061
4062     if (ra == 31 && !is_sub) {
4063         /* Special-case MADD with rA == XZR; it is the standard MUL alias */
4064         tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
4065     } else {
4066         tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
4067         if (is_sub) {
4068             tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
4069         } else {
4070             tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
4071         }
4072     }
4073
4074     if (!sf) {
4075         tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
4076     }
4077
4078     tcg_temp_free_i64(tcg_op1);
4079     tcg_temp_free_i64(tcg_op2);
4080     tcg_temp_free_i64(tcg_tmp);
4081 }
4082
4083 /* Add/subtract (with carry)
4084  *  31 30 29 28 27 26 25 24 23 22 21  20  16  15   10  9    5 4   0
4085  * +--+--+--+------------------------+------+---------+------+-----+
4086  * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | opcode2 |  Rn  |  Rd |
4087  * +--+--+--+------------------------+------+---------+------+-----+
4088  *                                            [000000]
4089  */
4090
4091 static void disas_adc_sbc(DisasContext *s, uint32_t insn)
4092 {
4093     unsigned int sf, op, setflags, rm, rn, rd;
4094     TCGv_i64 tcg_y, tcg_rn, tcg_rd;
4095
4096     if (extract32(insn, 10, 6) != 0) {
4097         unallocated_encoding(s);
4098         return;
4099     }
4100
4101     sf = extract32(insn, 31, 1);
4102     op = extract32(insn, 30, 1);
4103     setflags = extract32(insn, 29, 1);
4104     rm = extract32(insn, 16, 5);
4105     rn = extract32(insn, 5, 5);
4106     rd = extract32(insn, 0, 5);
4107
4108     tcg_rd = cpu_reg(s, rd);
4109     tcg_rn = cpu_reg(s, rn);
4110
4111     if (op) {
4112         tcg_y = new_tmp_a64(s);
4113         tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
4114     } else {
4115         tcg_y = cpu_reg(s, rm);
4116     }
4117
4118     if (setflags) {
4119         gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
4120     } else {
4121         gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
4122     }
4123 }
4124
4125 /* Conditional compare (immediate / register)
4126  *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
4127  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
4128  * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
4129  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
4130  *        [1]                             y                [0]       [0]
4131  */
4132 static void disas_cc(DisasContext *s, uint32_t insn)
4133 {
4134     unsigned int sf, op, y, cond, rn, nzcv, is_imm;
4135     TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
4136     TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
4137     DisasCompare c;
4138
4139     if (!extract32(insn, 29, 1)) {
4140         unallocated_encoding(s);
4141         return;
4142     }
4143     if (insn & (1 << 10 | 1 << 4)) {
4144         unallocated_encoding(s);
4145         return;
4146     }
4147     sf = extract32(insn, 31, 1);
4148     op = extract32(insn, 30, 1);
4149     is_imm = extract32(insn, 11, 1);
4150     y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
4151     cond = extract32(insn, 12, 4);
4152     rn = extract32(insn, 5, 5);
4153     nzcv = extract32(insn, 0, 4);
4154
4155     /* Set T0 = !COND.  */
4156     tcg_t0 = tcg_temp_new_i32();
4157     arm_test_cc(&c, cond);
4158     tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
4159     arm_free_cc(&c);
4160
4161     /* Load the arguments for the new comparison.  */
4162     if (is_imm) {
4163         tcg_y = new_tmp_a64(s);
4164         tcg_gen_movi_i64(tcg_y, y);
4165     } else {
4166         tcg_y = cpu_reg(s, y);
4167     }
4168     tcg_rn = cpu_reg(s, rn);
4169
4170     /* Set the flags for the new comparison.  */
4171     tcg_tmp = tcg_temp_new_i64();
4172     if (op) {
4173         gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
4174     } else {
4175         gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
4176     }
4177     tcg_temp_free_i64(tcg_tmp);
4178
4179     /* If COND was false, force the flags to #nzcv.  Compute two masks
4180      * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
4181      * For tcg hosts that support ANDC, we can make do with just T1.
4182      * In either case, allow the tcg optimizer to delete any unused mask.
4183      */
4184     tcg_t1 = tcg_temp_new_i32();
4185     tcg_t2 = tcg_temp_new_i32();
4186     tcg_gen_neg_i32(tcg_t1, tcg_t0);
4187     tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
4188
4189     if (nzcv & 8) { /* N */
4190         tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
4191     } else {
4192         if (TCG_TARGET_HAS_andc_i32) {
4193             tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
4194         } else {
4195             tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
4196         }
4197     }
4198     if (nzcv & 4) { /* Z */
4199         if (TCG_TARGET_HAS_andc_i32) {
4200             tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
4201         } else {
4202             tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
4203         }
4204     } else {
4205         tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
4206     }
4207     if (nzcv & 2) { /* C */
4208         tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
4209     } else {
4210         if (TCG_TARGET_HAS_andc_i32) {
4211             tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
4212         } else {
4213             tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
4214         }
4215     }
4216     if (nzcv & 1) { /* V */
4217         tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
4218     } else {
4219         if (TCG_TARGET_HAS_andc_i32) {
4220             tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
4221         } else {
4222             tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
4223         }
4224     }
4225     tcg_temp_free_i32(tcg_t0);
4226     tcg_temp_free_i32(tcg_t1);
4227     tcg_temp_free_i32(tcg_t2);
4228 }
4229
4230 /* Conditional select
4231  *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
4232  * +----+----+---+-----------------+------+------+-----+------+------+
4233  * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
4234  * +----+----+---+-----------------+------+------+-----+------+------+
4235  */
4236 static void disas_cond_select(DisasContext *s, uint32_t insn)
4237 {
4238     unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
4239     TCGv_i64 tcg_rd, zero;
4240     DisasCompare64 c;
4241
4242     if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
4243         /* S == 1 or op2<1> == 1 */
4244         unallocated_encoding(s);
4245         return;
4246     }
4247     sf = extract32(insn, 31, 1);
4248     else_inv = extract32(insn, 30, 1);
4249     rm = extract32(insn, 16, 5);
4250     cond = extract32(insn, 12, 4);
4251     else_inc = extract32(insn, 10, 1);
4252     rn = extract32(insn, 5, 5);
4253     rd = extract32(insn, 0, 5);
4254
4255     tcg_rd = cpu_reg(s, rd);
4256
4257     a64_test_cc(&c, cond);
4258     zero = tcg_const_i64(0);
4259
4260     if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
4261         /* CSET & CSETM.  */
4262         tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero);
4263         if (else_inv) {
4264             tcg_gen_neg_i64(tcg_rd, tcg_rd);
4265         }
4266     } else {
4267         TCGv_i64 t_true = cpu_reg(s, rn);
4268         TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
4269         if (else_inv && else_inc) {
4270             tcg_gen_neg_i64(t_false, t_false);
4271         } else if (else_inv) {
4272             tcg_gen_not_i64(t_false, t_false);
4273         } else if (else_inc) {
4274             tcg_gen_addi_i64(t_false, t_false, 1);
4275         }
4276         tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
4277     }
4278
4279     tcg_temp_free_i64(zero);
4280     a64_free_cc(&c);
4281
4282     if (!sf) {
4283         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4284     }
4285 }
4286
4287 static void handle_clz(DisasContext *s, unsigned int sf,
4288                        unsigned int rn, unsigned int rd)
4289 {
4290     TCGv_i64 tcg_rd, tcg_rn;
4291     tcg_rd = cpu_reg(s, rd);
4292     tcg_rn = cpu_reg(s, rn);
4293
4294     if (sf) {
4295         tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
4296     } else {
4297         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4298         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4299         tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32);
4300         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4301         tcg_temp_free_i32(tcg_tmp32);
4302     }
4303 }
4304
4305 static void handle_cls(DisasContext *s, unsigned int sf,
4306                        unsigned int rn, unsigned int rd)
4307 {
4308     TCGv_i64 tcg_rd, tcg_rn;
4309     tcg_rd = cpu_reg(s, rd);
4310     tcg_rn = cpu_reg(s, rn);
4311
4312     if (sf) {
4313         tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
4314     } else {
4315         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4316         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4317         tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32);
4318         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4319         tcg_temp_free_i32(tcg_tmp32);
4320     }
4321 }
4322
4323 static void handle_rbit(DisasContext *s, unsigned int sf,
4324                         unsigned int rn, unsigned int rd)
4325 {
4326     TCGv_i64 tcg_rd, tcg_rn;
4327     tcg_rd = cpu_reg(s, rd);
4328     tcg_rn = cpu_reg(s, rn);
4329
4330     if (sf) {
4331         gen_helper_rbit64(tcg_rd, tcg_rn);
4332     } else {
4333         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4334         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4335         gen_helper_rbit(tcg_tmp32, tcg_tmp32);
4336         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4337         tcg_temp_free_i32(tcg_tmp32);
4338     }
4339 }
4340
4341 /* REV with sf==1, opcode==3 ("REV64") */
4342 static void handle_rev64(DisasContext *s, unsigned int sf,
4343                          unsigned int rn, unsigned int rd)
4344 {
4345     if (!sf) {
4346         unallocated_encoding(s);
4347         return;
4348     }
4349     tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
4350 }
4351
4352 /* REV with sf==0, opcode==2
4353  * REV32 (sf==1, opcode==2)
4354  */
4355 static void handle_rev32(DisasContext *s, unsigned int sf,
4356                          unsigned int rn, unsigned int rd)
4357 {
4358     TCGv_i64 tcg_rd = cpu_reg(s, rd);
4359
4360     if (sf) {
4361         TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4362         TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4363
4364         /* bswap32_i64 requires zero high word */
4365         tcg_gen_ext32u_i64(tcg_tmp, tcg_rn);
4366         tcg_gen_bswap32_i64(tcg_rd, tcg_tmp);
4367         tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
4368         tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
4369         tcg_gen_concat32_i64(tcg_rd, tcg_rd, tcg_tmp);
4370
4371         tcg_temp_free_i64(tcg_tmp);
4372     } else {
4373         tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rn));
4374         tcg_gen_bswap32_i64(tcg_rd, tcg_rd);
4375     }
4376 }
4377
4378 /* REV16 (opcode==1) */
4379 static void handle_rev16(DisasContext *s, unsigned int sf,
4380                          unsigned int rn, unsigned int rd)
4381 {
4382     TCGv_i64 tcg_rd = cpu_reg(s, rd);
4383     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4384     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4385     TCGv_i64 mask = tcg_const_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff);
4386
4387     tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
4388     tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
4389     tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
4390     tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
4391     tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
4392
4393     tcg_temp_free_i64(mask);
4394     tcg_temp_free_i64(tcg_tmp);
4395 }
4396
4397 /* Data-processing (1 source)
4398  *   31  30  29  28             21 20     16 15    10 9    5 4    0
4399  * +----+---+---+-----------------+---------+--------+------+------+
4400  * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
4401  * +----+---+---+-----------------+---------+--------+------+------+
4402  */
4403 static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
4404 {
4405     unsigned int sf, opcode, rn, rd;
4406
4407     if (extract32(insn, 29, 1) || extract32(insn, 16, 5)) {
4408         unallocated_encoding(s);
4409         return;
4410     }
4411
4412     sf = extract32(insn, 31, 1);
4413     opcode = extract32(insn, 10, 6);
4414     rn = extract32(insn, 5, 5);
4415     rd = extract32(insn, 0, 5);
4416
4417     switch (opcode) {
4418     case 0: /* RBIT */
4419         handle_rbit(s, sf, rn, rd);
4420         break;
4421     case 1: /* REV16 */
4422         handle_rev16(s, sf, rn, rd);
4423         break;
4424     case 2: /* REV32 */
4425         handle_rev32(s, sf, rn, rd);
4426         break;
4427     case 3: /* REV64 */
4428         handle_rev64(s, sf, rn, rd);
4429         break;
4430     case 4: /* CLZ */
4431         handle_clz(s, sf, rn, rd);
4432         break;
4433     case 5: /* CLS */
4434         handle_cls(s, sf, rn, rd);
4435         break;
4436     }
4437 }
4438
4439 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
4440                        unsigned int rm, unsigned int rn, unsigned int rd)
4441 {
4442     TCGv_i64 tcg_n, tcg_m, tcg_rd;
4443     tcg_rd = cpu_reg(s, rd);
4444
4445     if (!sf && is_signed) {
4446         tcg_n = new_tmp_a64(s);
4447         tcg_m = new_tmp_a64(s);
4448         tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
4449         tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
4450     } else {
4451         tcg_n = read_cpu_reg(s, rn, sf);
4452         tcg_m = read_cpu_reg(s, rm, sf);
4453     }
4454
4455     if (is_signed) {
4456         gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
4457     } else {
4458         gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
4459     }
4460
4461     if (!sf) { /* zero extend final result */
4462         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4463     }
4464 }
4465
4466 /* LSLV, LSRV, ASRV, RORV */
4467 static void handle_shift_reg(DisasContext *s,
4468                              enum a64_shift_type shift_type, unsigned int sf,
4469                              unsigned int rm, unsigned int rn, unsigned int rd)
4470 {
4471     TCGv_i64 tcg_shift = tcg_temp_new_i64();
4472     TCGv_i64 tcg_rd = cpu_reg(s, rd);
4473     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4474
4475     tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
4476     shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
4477     tcg_temp_free_i64(tcg_shift);
4478 }
4479
4480 /* CRC32[BHWX], CRC32C[BHWX] */
4481 static void handle_crc32(DisasContext *s,
4482                          unsigned int sf, unsigned int sz, bool crc32c,
4483                          unsigned int rm, unsigned int rn, unsigned int rd)
4484 {
4485     TCGv_i64 tcg_acc, tcg_val;
4486     TCGv_i32 tcg_bytes;
4487
4488     if (!arm_dc_feature(s, ARM_FEATURE_CRC)
4489         || (sf == 1 && sz != 3)
4490         || (sf == 0 && sz == 3)) {
4491         unallocated_encoding(s);
4492         return;
4493     }
4494
4495     if (sz == 3) {
4496         tcg_val = cpu_reg(s, rm);
4497     } else {
4498         uint64_t mask;
4499         switch (sz) {
4500         case 0:
4501             mask = 0xFF;
4502             break;
4503         case 1:
4504             mask = 0xFFFF;
4505             break;
4506         case 2:
4507             mask = 0xFFFFFFFF;
4508             break;
4509         default:
4510             g_assert_not_reached();
4511         }
4512         tcg_val = new_tmp_a64(s);
4513         tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
4514     }
4515
4516     tcg_acc = cpu_reg(s, rn);
4517     tcg_bytes = tcg_const_i32(1 << sz);
4518
4519     if (crc32c) {
4520         gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
4521     } else {
4522         gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
4523     }
4524
4525     tcg_temp_free_i32(tcg_bytes);
4526 }
4527
4528 /* Data-processing (2 source)
4529  *   31   30  29 28             21 20  16 15    10 9    5 4    0
4530  * +----+---+---+-----------------+------+--------+------+------+
4531  * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
4532  * +----+---+---+-----------------+------+--------+------+------+
4533  */
4534 static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
4535 {
4536     unsigned int sf, rm, opcode, rn, rd;
4537     sf = extract32(insn, 31, 1);
4538     rm = extract32(insn, 16, 5);
4539     opcode = extract32(insn, 10, 6);
4540     rn = extract32(insn, 5, 5);
4541     rd = extract32(insn, 0, 5);
4542
4543     if (extract32(insn, 29, 1)) {
4544         unallocated_encoding(s);
4545         return;
4546     }
4547
4548     switch (opcode) {
4549     case 2: /* UDIV */
4550         handle_div(s, false, sf, rm, rn, rd);
4551         break;
4552     case 3: /* SDIV */
4553         handle_div(s, true, sf, rm, rn, rd);
4554         break;
4555     case 8: /* LSLV */
4556         handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
4557         break;
4558     case 9: /* LSRV */
4559         handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
4560         break;
4561     case 10: /* ASRV */
4562         handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
4563         break;
4564     case 11: /* RORV */
4565         handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
4566         break;
4567     case 16:
4568     case 17:
4569     case 18:
4570     case 19:
4571     case 20:
4572     case 21:
4573     case 22:
4574     case 23: /* CRC32 */
4575     {
4576         int sz = extract32(opcode, 0, 2);
4577         bool crc32c = extract32(opcode, 2, 1);
4578         handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
4579         break;
4580     }
4581     default:
4582         unallocated_encoding(s);
4583         break;
4584     }
4585 }
4586
4587 /* Data processing - register */
4588 static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
4589 {
4590     switch (extract32(insn, 24, 5)) {
4591     case 0x0a: /* Logical (shifted register) */
4592         disas_logic_reg(s, insn);
4593         break;
4594     case 0x0b: /* Add/subtract */
4595         if (insn & (1 << 21)) { /* (extended register) */
4596             disas_add_sub_ext_reg(s, insn);
4597         } else {
4598             disas_add_sub_reg(s, insn);
4599         }
4600         break;
4601     case 0x1b: /* Data-processing (3 source) */
4602         disas_data_proc_3src(s, insn);
4603         break;
4604     case 0x1a:
4605         switch (extract32(insn, 21, 3)) {
4606         case 0x0: /* Add/subtract (with carry) */
4607             disas_adc_sbc(s, insn);
4608             break;
4609         case 0x2: /* Conditional compare */
4610             disas_cc(s, insn); /* both imm and reg forms */
4611             break;
4612         case 0x4: /* Conditional select */
4613             disas_cond_select(s, insn);
4614             break;
4615         case 0x6: /* Data-processing */
4616             if (insn & (1 << 30)) { /* (1 source) */
4617                 disas_data_proc_1src(s, insn);
4618             } else {            /* (2 source) */
4619                 disas_data_proc_2src(s, insn);
4620             }
4621             break;
4622         default:
4623             unallocated_encoding(s);
4624             break;
4625         }
4626         break;
4627     default:
4628         unallocated_encoding(s);
4629         break;
4630     }
4631 }
4632
4633 static void handle_fp_compare(DisasContext *s, int size,
4634                               unsigned int rn, unsigned int rm,
4635                               bool cmp_with_zero, bool signal_all_nans)
4636 {
4637     TCGv_i64 tcg_flags = tcg_temp_new_i64();
4638     TCGv_ptr fpst = get_fpstatus_ptr(size == MO_16);
4639
4640     if (size == MO_64) {
4641         TCGv_i64 tcg_vn, tcg_vm;
4642
4643         tcg_vn = read_fp_dreg(s, rn);
4644         if (cmp_with_zero) {
4645             tcg_vm = tcg_const_i64(0);
4646         } else {
4647             tcg_vm = read_fp_dreg(s, rm);
4648         }
4649         if (signal_all_nans) {
4650             gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4651         } else {
4652             gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4653         }
4654         tcg_temp_free_i64(tcg_vn);
4655         tcg_temp_free_i64(tcg_vm);
4656     } else {
4657         TCGv_i32 tcg_vn = tcg_temp_new_i32();
4658         TCGv_i32 tcg_vm = tcg_temp_new_i32();
4659
4660         read_vec_element_i32(s, tcg_vn, rn, 0, size);
4661         if (cmp_with_zero) {
4662             tcg_gen_movi_i32(tcg_vm, 0);
4663         } else {
4664             read_vec_element_i32(s, tcg_vm, rm, 0, size);
4665         }
4666
4667         switch (size) {
4668         case MO_32:
4669             if (signal_all_nans) {
4670                 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4671             } else {
4672                 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4673             }
4674             break;
4675         case MO_16:
4676             if (signal_all_nans) {
4677                 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4678             } else {
4679                 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4680             }
4681             break;
4682         default:
4683             g_assert_not_reached();
4684         }
4685
4686         tcg_temp_free_i32(tcg_vn);
4687         tcg_temp_free_i32(tcg_vm);
4688     }
4689
4690     tcg_temp_free_ptr(fpst);
4691
4692     gen_set_nzcv(tcg_flags);
4693
4694     tcg_temp_free_i64(tcg_flags);
4695 }
4696
4697 /* Floating point compare
4698  *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
4699  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4700  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
4701  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4702  */
4703 static void disas_fp_compare(DisasContext *s, uint32_t insn)
4704 {
4705     unsigned int mos, type, rm, op, rn, opc, op2r;
4706     int size;
4707
4708     mos = extract32(insn, 29, 3);
4709     type = extract32(insn, 22, 2);
4710     rm = extract32(insn, 16, 5);
4711     op = extract32(insn, 14, 2);
4712     rn = extract32(insn, 5, 5);
4713     opc = extract32(insn, 3, 2);
4714     op2r = extract32(insn, 0, 3);
4715
4716     if (mos || op || op2r) {
4717         unallocated_encoding(s);
4718         return;
4719     }
4720
4721     switch (type) {
4722     case 0:
4723         size = MO_32;
4724         break;
4725     case 1:
4726         size = MO_64;
4727         break;
4728     case 3:
4729         size = MO_16;
4730         if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
4731             break;
4732         }
4733         /* fallthru */
4734     default:
4735         unallocated_encoding(s);
4736         return;
4737     }
4738
4739     if (!fp_access_check(s)) {
4740         return;
4741     }
4742
4743     handle_fp_compare(s, size, rn, rm, opc & 1, opc & 2);
4744 }
4745
4746 /* Floating point conditional compare
4747  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
4748  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4749  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
4750  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4751  */
4752 static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
4753 {
4754     unsigned int mos, type, rm, cond, rn, op, nzcv;
4755     TCGv_i64 tcg_flags;
4756     TCGLabel *label_continue = NULL;
4757     int size;
4758
4759     mos = extract32(insn, 29, 3);
4760     type = extract32(insn, 22, 2);
4761     rm = extract32(insn, 16, 5);
4762     cond = extract32(insn, 12, 4);
4763     rn = extract32(insn, 5, 5);
4764     op = extract32(insn, 4, 1);
4765     nzcv = extract32(insn, 0, 4);
4766
4767     if (mos) {
4768         unallocated_encoding(s);
4769         return;
4770     }
4771
4772     switch (type) {
4773     case 0:
4774         size = MO_32;
4775         break;
4776     case 1:
4777         size = MO_64;
4778         break;
4779     case 3:
4780         size = MO_16;
4781         if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
4782             break;
4783         }
4784         /* fallthru */
4785     default:
4786         unallocated_encoding(s);
4787         return;
4788     }
4789
4790     if (!fp_access_check(s)) {
4791         return;
4792     }
4793
4794     if (cond < 0x0e) { /* not always */
4795         TCGLabel *label_match = gen_new_label();
4796         label_continue = gen_new_label();
4797         arm_gen_test_cc(cond, label_match);
4798         /* nomatch: */
4799         tcg_flags = tcg_const_i64(nzcv << 28);
4800         gen_set_nzcv(tcg_flags);
4801         tcg_temp_free_i64(tcg_flags);
4802         tcg_gen_br(label_continue);
4803         gen_set_label(label_match);
4804     }
4805
4806     handle_fp_compare(s, size, rn, rm, false, op);
4807
4808     if (cond < 0x0e) {
4809         gen_set_label(label_continue);
4810     }
4811 }
4812
4813 /* Floating point conditional select
4814  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
4815  * +---+---+---+-----------+------+---+------+------+-----+------+------+
4816  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
4817  * +---+---+---+-----------+------+---+------+------+-----+------+------+
4818  */
4819 static void disas_fp_csel(DisasContext *s, uint32_t insn)
4820 {
4821     unsigned int mos, type, rm, cond, rn, rd;
4822     TCGv_i64 t_true, t_false, t_zero;
4823     DisasCompare64 c;
4824     TCGMemOp sz;
4825
4826     mos = extract32(insn, 29, 3);
4827     type = extract32(insn, 22, 2);
4828     rm = extract32(insn, 16, 5);
4829     cond = extract32(insn, 12, 4);
4830     rn = extract32(insn, 5, 5);
4831     rd = extract32(insn, 0, 5);
4832
4833     if (mos) {
4834         unallocated_encoding(s);
4835         return;
4836     }
4837
4838     switch (type) {
4839     case 0:
4840         sz = MO_32;
4841         break;
4842     case 1:
4843         sz = MO_64;
4844         break;
4845     case 3:
4846         sz = MO_16;
4847         if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
4848             break;
4849         }
4850         /* fallthru */
4851     default:
4852         unallocated_encoding(s);
4853         return;
4854     }
4855
4856     if (!fp_access_check(s)) {
4857         return;
4858     }
4859
4860     /* Zero extend sreg & hreg inputs to 64 bits now.  */
4861     t_true = tcg_temp_new_i64();
4862     t_false = tcg_temp_new_i64();
4863     read_vec_element(s, t_true, rn, 0, sz);
4864     read_vec_element(s, t_false, rm, 0, sz);
4865
4866     a64_test_cc(&c, cond);
4867     t_zero = tcg_const_i64(0);
4868     tcg_gen_movcond_i64(c.cond, t_true, c.value, t_zero, t_true, t_false);
4869     tcg_temp_free_i64(t_zero);
4870     tcg_temp_free_i64(t_false);
4871     a64_free_cc(&c);
4872
4873     /* Note that sregs & hregs write back zeros to the high bits,
4874        and we've already done the zero-extension.  */
4875     write_fp_dreg(s, rd, t_true);
4876     tcg_temp_free_i64(t_true);
4877 }
4878
4879 /* Floating-point data-processing (1 source) - half precision */
4880 static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
4881 {
4882     TCGv_ptr fpst = NULL;
4883     TCGv_i32 tcg_op = read_fp_hreg(s, rn);
4884     TCGv_i32 tcg_res = tcg_temp_new_i32();
4885
4886     switch (opcode) {
4887     case 0x0: /* FMOV */
4888         tcg_gen_mov_i32(tcg_res, tcg_op);
4889         break;
4890     case 0x1: /* FABS */
4891         tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
4892         break;
4893     case 0x2: /* FNEG */
4894         tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
4895         break;
4896     case 0x3: /* FSQRT */
4897         fpst = get_fpstatus_ptr(true);
4898         gen_helper_sqrt_f16(tcg_res, tcg_op, fpst);
4899         break;
4900     case 0x8: /* FRINTN */
4901     case 0x9: /* FRINTP */
4902     case 0xa: /* FRINTM */
4903     case 0xb: /* FRINTZ */
4904     case 0xc: /* FRINTA */
4905     {
4906         TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4907         fpst = get_fpstatus_ptr(true);
4908
4909         gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
4910         gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
4911
4912         gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
4913         tcg_temp_free_i32(tcg_rmode);
4914         break;
4915     }
4916     case 0xe: /* FRINTX */
4917         fpst = get_fpstatus_ptr(true);
4918         gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst);
4919         break;
4920     case 0xf: /* FRINTI */
4921         fpst = get_fpstatus_ptr(true);
4922         gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
4923         break;
4924     default:
4925         abort();
4926     }
4927
4928     write_fp_sreg(s, rd, tcg_res);
4929
4930     if (fpst) {
4931         tcg_temp_free_ptr(fpst);
4932     }
4933     tcg_temp_free_i32(tcg_op);
4934     tcg_temp_free_i32(tcg_res);
4935 }
4936
4937 /* Floating-point data-processing (1 source) - single precision */
4938 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
4939 {
4940     TCGv_ptr fpst;
4941     TCGv_i32 tcg_op;
4942     TCGv_i32 tcg_res;
4943
4944     fpst = get_fpstatus_ptr(false);
4945     tcg_op = read_fp_sreg(s, rn);
4946     tcg_res = tcg_temp_new_i32();
4947
4948     switch (opcode) {
4949     case 0x0: /* FMOV */
4950         tcg_gen_mov_i32(tcg_res, tcg_op);
4951         break;
4952     case 0x1: /* FABS */
4953         gen_helper_vfp_abss(tcg_res, tcg_op);
4954         break;
4955     case 0x2: /* FNEG */
4956         gen_helper_vfp_negs(tcg_res, tcg_op);
4957         break;
4958     case 0x3: /* FSQRT */
4959         gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
4960         break;
4961     case 0x8: /* FRINTN */
4962     case 0x9: /* FRINTP */
4963     case 0xa: /* FRINTM */
4964     case 0xb: /* FRINTZ */
4965     case 0xc: /* FRINTA */
4966     {
4967         TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4968
4969         gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
4970         gen_helper_rints(tcg_res, tcg_op, fpst);
4971
4972         gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
4973         tcg_temp_free_i32(tcg_rmode);
4974         break;
4975     }
4976     case 0xe: /* FRINTX */
4977         gen_helper_rints_exact(tcg_res, tcg_op, fpst);
4978         break;
4979     case 0xf: /* FRINTI */
4980         gen_helper_rints(tcg_res, tcg_op, fpst);
4981         break;
4982     default:
4983         abort();
4984     }
4985
4986     write_fp_sreg(s, rd, tcg_res);
4987
4988     tcg_temp_free_ptr(fpst);
4989     tcg_temp_free_i32(tcg_op);
4990     tcg_temp_free_i32(tcg_res);
4991 }
4992
4993 /* Floating-point data-processing (1 source) - double precision */
4994 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
4995 {
4996     TCGv_ptr fpst;
4997     TCGv_i64 tcg_op;
4998     TCGv_i64 tcg_res;
4999
5000     switch (opcode) {
5001     case 0x0: /* FMOV */
5002         gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0);
5003         return;
5004     }
5005
5006     fpst = get_fpstatus_ptr(false);
5007     tcg_op = read_fp_dreg(s, rn);
5008     tcg_res = tcg_temp_new_i64();
5009
5010     switch (opcode) {
5011     case 0x1: /* FABS */
5012         gen_helper_vfp_absd(tcg_res, tcg_op);
5013         break;
5014     case 0x2: /* FNEG */
5015         gen_helper_vfp_negd(tcg_res, tcg_op);
5016         break;
5017     case 0x3: /* FSQRT */
5018         gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
5019         break;
5020     case 0x8: /* FRINTN */
5021     case 0x9: /* FRINTP */
5022     case 0xa: /* FRINTM */
5023     case 0xb: /* FRINTZ */
5024     case 0xc: /* FRINTA */
5025     {
5026         TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
5027
5028         gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
5029         gen_helper_rintd(tcg_res, tcg_op, fpst);
5030
5031         gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
5032         tcg_temp_free_i32(tcg_rmode);
5033         break;
5034     }
5035     case 0xe: /* FRINTX */
5036         gen_helper_rintd_exact(tcg_res, tcg_op, fpst);
5037         break;
5038     case 0xf: /* FRINTI */
5039         gen_helper_rintd(tcg_res, tcg_op, fpst);
5040         break;
5041     default:
5042         abort();
5043     }
5044
5045     write_fp_dreg(s, rd, tcg_res);
5046
5047     tcg_temp_free_ptr(fpst);
5048     tcg_temp_free_i64(tcg_op);
5049     tcg_temp_free_i64(tcg_res);
5050 }
5051
5052 static void handle_fp_fcvt(DisasContext *s, int opcode,
5053                            int rd, int rn, int dtype, int ntype)
5054 {
5055     switch (ntype) {
5056     case 0x0:
5057     {
5058         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
5059         if (dtype == 1) {
5060             /* Single to double */
5061             TCGv_i64 tcg_rd = tcg_temp_new_i64();
5062             gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
5063             write_fp_dreg(s, rd, tcg_rd);
5064             tcg_temp_free_i64(tcg_rd);
5065         } else {
5066             /* Single to half */
5067             TCGv_i32 tcg_rd = tcg_temp_new_i32();
5068             TCGv_i32 ahp = get_ahp_flag();
5069             TCGv_ptr fpst = get_fpstatus_ptr(false);
5070
5071             gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, fpst, ahp);
5072             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
5073             write_fp_sreg(s, rd, tcg_rd);
5074             tcg_temp_free_i32(tcg_rd);
5075             tcg_temp_free_i32(ahp);
5076             tcg_temp_free_ptr(fpst);
5077         }
5078         tcg_temp_free_i32(tcg_rn);
5079         break;
5080     }
5081     case 0x1:
5082     {
5083         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
5084         TCGv_i32 tcg_rd = tcg_temp_new_i32();
5085         if (dtype == 0) {
5086             /* Double to single */
5087             gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
5088         } else {
5089             TCGv_ptr fpst = get_fpstatus_ptr(false);
5090             TCGv_i32 ahp = get_ahp_flag();
5091             /* Double to half */
5092             gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
5093             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
5094             tcg_temp_free_ptr(fpst);
5095             tcg_temp_free_i32(ahp);
5096         }
5097         write_fp_sreg(s, rd, tcg_rd);
5098         tcg_temp_free_i32(tcg_rd);
5099         tcg_temp_free_i64(tcg_rn);
5100         break;
5101     }
5102     case 0x3:
5103     {
5104         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
5105         TCGv_ptr tcg_fpst = get_fpstatus_ptr(false);
5106         TCGv_i32 tcg_ahp = get_ahp_flag();
5107         tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
5108         if (dtype == 0) {
5109             /* Half to single */
5110             TCGv_i32 tcg_rd = tcg_temp_new_i32();
5111             gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
5112             write_fp_sreg(s, rd, tcg_rd);
5113             tcg_temp_free_ptr(tcg_fpst);
5114             tcg_temp_free_i32(tcg_ahp);
5115             tcg_temp_free_i32(tcg_rd);
5116         } else {
5117             /* Half to double */
5118             TCGv_i64 tcg_rd = tcg_temp_new_i64();
5119             gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
5120             write_fp_dreg(s, rd, tcg_rd);
5121             tcg_temp_free_i64(tcg_rd);
5122         }
5123         tcg_temp_free_i32(tcg_rn);
5124         break;
5125     }
5126     default:
5127         abort();
5128     }
5129 }
5130
5131 /* Floating point data-processing (1 source)
5132  *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
5133  * +---+---+---+-----------+------+---+--------+-----------+------+------+
5134  * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
5135  * +---+---+---+-----------+------+---+--------+-----------+------+------+
5136  */
5137 static void disas_fp_1src(DisasContext *s, uint32_t insn)
5138 {
5139     int type = extract32(insn, 22, 2);
5140     int opcode = extract32(insn, 15, 6);
5141     int rn = extract32(insn, 5, 5);
5142     int rd = extract32(insn, 0, 5);
5143
5144     switch (opcode) {
5145     case 0x4: case 0x5: case 0x7:
5146     {
5147         /* FCVT between half, single and double precision */
5148         int dtype = extract32(opcode, 0, 2);
5149         if (type == 2 || dtype == type) {
5150             unallocated_encoding(s);
5151             return;
5152         }
5153         if (!fp_access_check(s)) {
5154             return;
5155         }
5156
5157         handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
5158         break;
5159     }
5160     case 0x0 ... 0x3:
5161     case 0x8 ... 0xc:
5162     case 0xe ... 0xf:
5163         /* 32-to-32 and 64-to-64 ops */
5164         switch (type) {
5165         case 0:
5166             if (!fp_access_check(s)) {
5167                 return;
5168             }
5169
5170             handle_fp_1src_single(s, opcode, rd, rn);
5171             break;
5172         case 1:
5173             if (!fp_access_check(s)) {
5174                 return;
5175             }
5176
5177             handle_fp_1src_double(s, opcode, rd, rn);
5178             break;
5179         case 3:
5180             if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
5181                 unallocated_encoding(s);
5182                 return;
5183             }
5184
5185             if (!fp_access_check(s)) {
5186                 return;
5187             }
5188
5189             handle_fp_1src_half(s, opcode, rd, rn);
5190             break;
5191         default:
5192             unallocated_encoding(s);
5193         }
5194         break;
5195     default:
5196         unallocated_encoding(s);
5197         break;
5198     }
5199 }
5200
5201 /* Floating-point data-processing (2 source) - single precision */
5202 static void handle_fp_2src_single(DisasContext *s, int opcode,
5203                                   int rd, int rn, int rm)
5204 {
5205     TCGv_i32 tcg_op1;
5206     TCGv_i32 tcg_op2;
5207     TCGv_i32 tcg_res;
5208     TCGv_ptr fpst;
5209
5210     tcg_res = tcg_temp_new_i32();
5211     fpst = get_fpstatus_ptr(false);
5212     tcg_op1 = read_fp_sreg(s, rn);
5213     tcg_op2 = read_fp_sreg(s, rm);
5214
5215     switch (opcode) {
5216     case 0x0: /* FMUL */
5217         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
5218         break;
5219     case 0x1: /* FDIV */
5220         gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
5221         break;
5222     case 0x2: /* FADD */
5223         gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
5224         break;
5225     case 0x3: /* FSUB */
5226         gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
5227         break;
5228     case 0x4: /* FMAX */
5229         gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
5230         break;
5231     case 0x5: /* FMIN */
5232         gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
5233         break;
5234     case 0x6: /* FMAXNM */
5235         gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
5236         break;
5237     case 0x7: /* FMINNM */
5238         gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
5239         break;
5240     case 0x8: /* FNMUL */
5241         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
5242         gen_helper_vfp_negs(tcg_res, tcg_res);
5243         break;
5244     }
5245
5246     write_fp_sreg(s, rd, tcg_res);
5247
5248     tcg_temp_free_ptr(fpst);
5249     tcg_temp_free_i32(tcg_op1);
5250     tcg_temp_free_i32(tcg_op2);
5251     tcg_temp_free_i32(tcg_res);
5252 }
5253
5254 /* Floating-point data-processing (2 source) - double precision */
5255 static void handle_fp_2src_double(DisasContext *s, int opcode,
5256                                   int rd, int rn, int rm)
5257 {
5258     TCGv_i64 tcg_op1;
5259     TCGv_i64 tcg_op2;
5260     TCGv_i64 tcg_res;
5261     TCGv_ptr fpst;
5262
5263     tcg_res = tcg_temp_new_i64();
5264     fpst = get_fpstatus_ptr(false);
5265     tcg_op1 = read_fp_dreg(s, rn);
5266     tcg_op2 = read_fp_dreg(s, rm);
5267
5268     switch (opcode) {
5269     case 0x0: /* FMUL */
5270         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
5271         break;
5272     case 0x1: /* FDIV */
5273         gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
5274         break;
5275     case 0x2: /* FADD */
5276         gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
5277         break;
5278     case 0x3: /* FSUB */
5279         gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
5280         break;
5281     case 0x4: /* FMAX */
5282         gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
5283         break;
5284     case 0x5: /* FMIN */
5285         gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
5286         break;
5287     case 0x6: /* FMAXNM */
5288         gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
5289         break;
5290     case 0x7: /* FMINNM */
5291         gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
5292         break;
5293     case 0x8: /* FNMUL */
5294         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
5295         gen_helper_vfp_negd(tcg_res, tcg_res);
5296         break;
5297     }
5298
5299     write_fp_dreg(s, rd, tcg_res);
5300
5301     tcg_temp_free_ptr(fpst);
5302     tcg_temp_free_i64(tcg_op1);
5303     tcg_temp_free_i64(tcg_op2);
5304     tcg_temp_free_i64(tcg_res);
5305 }
5306
5307 /* Floating-point data-processing (2 source) - half precision */
5308 static void handle_fp_2src_half(DisasContext *s, int opcode,
5309                                 int rd, int rn, int rm)
5310 {
5311     TCGv_i32 tcg_op1;
5312     TCGv_i32 tcg_op2;
5313     TCGv_i32 tcg_res;
5314     TCGv_ptr fpst;
5315
5316     tcg_res = tcg_temp_new_i32();
5317     fpst = get_fpstatus_ptr(true);
5318     tcg_op1 = read_fp_hreg(s, rn);
5319     tcg_op2 = read_fp_hreg(s, rm);
5320
5321     switch (opcode) {
5322     case 0x0: /* FMUL */
5323         gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
5324         break;
5325     case 0x1: /* FDIV */
5326         gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
5327         break;
5328     case 0x2: /* FADD */
5329         gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
5330         break;
5331     case 0x3: /* FSUB */
5332         gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
5333         break;
5334     case 0x4: /* FMAX */
5335         gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
5336         break;
5337     case 0x5: /* FMIN */
5338         gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
5339         break;
5340     case 0x6: /* FMAXNM */
5341         gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
5342         break;
5343     case 0x7: /* FMINNM */
5344         gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
5345         break;
5346     case 0x8: /* FNMUL */
5347         gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
5348         tcg_gen_xori_i32(tcg_res, tcg_res, 0x8000);
5349         break;
5350     default:
5351         g_assert_not_reached();
5352     }
5353
5354     write_fp_sreg(s, rd, tcg_res);
5355
5356     tcg_temp_free_ptr(fpst);
5357     tcg_temp_free_i32(tcg_op1);
5358     tcg_temp_free_i32(tcg_op2);
5359     tcg_temp_free_i32(tcg_res);
5360 }
5361
5362 /* Floating point data-processing (2 source)
5363  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
5364  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
5365  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | opcode | 1 0 |  Rn  |  Rd  |
5366  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
5367  */
5368 static void disas_fp_2src(DisasContext *s, uint32_t insn)
5369 {
5370     int type = extract32(insn, 22, 2);
5371     int rd = extract32(insn, 0, 5);
5372     int rn = extract32(insn, 5, 5);
5373     int rm = extract32(insn, 16, 5);
5374     int opcode = extract32(insn, 12, 4);
5375
5376     if (opcode > 8) {
5377         unallocated_encoding(s);
5378         return;
5379     }
5380
5381     switch (type) {
5382     case 0:
5383         if (!fp_access_check(s)) {
5384             return;
5385         }
5386         handle_fp_2src_single(s, opcode, rd, rn, rm);
5387         break;
5388     case 1:
5389         if (!fp_access_check(s)) {
5390             return;
5391         }
5392         handle_fp_2src_double(s, opcode, rd, rn, rm);
5393         break;
5394     case 3:
5395         if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
5396             unallocated_encoding(s);
5397             return;
5398         }
5399         if (!fp_access_check(s)) {
5400             return;
5401         }
5402         handle_fp_2src_half(s, opcode, rd, rn, rm);
5403         break;
5404     default:
5405         unallocated_encoding(s);
5406     }
5407 }
5408
5409 /* Floating-point data-processing (3 source) - single precision */
5410 static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
5411                                   int rd, int rn, int rm, int ra)
5412 {
5413     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
5414     TCGv_i32 tcg_res = tcg_temp_new_i32();
5415     TCGv_ptr fpst = get_fpstatus_ptr(false);
5416
5417     tcg_op1 = read_fp_sreg(s, rn);
5418     tcg_op2 = read_fp_sreg(s, rm);
5419     tcg_op3 = read_fp_sreg(s, ra);
5420
5421     /* These are fused multiply-add, and must be done as one
5422      * floating point operation with no rounding between the
5423      * multiplication and addition steps.
5424      * NB that doing the negations here as separate steps is
5425      * correct : an input NaN should come out with its sign bit
5426      * flipped if it is a negated-input.
5427      */
5428     if (o1 == true) {
5429         gen_helper_vfp_negs(tcg_op3, tcg_op3);
5430     }
5431
5432     if (o0 != o1) {
5433         gen_helper_vfp_negs(tcg_op1, tcg_op1);
5434     }
5435
5436     gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
5437
5438     write_fp_sreg(s, rd, tcg_res);
5439
5440     tcg_temp_free_ptr(fpst);
5441     tcg_temp_free_i32(tcg_op1);
5442     tcg_temp_free_i32(tcg_op2);
5443     tcg_temp_free_i32(tcg_op3);
5444     tcg_temp_free_i32(tcg_res);
5445 }
5446
5447 /* Floating-point data-processing (3 source) - double precision */
5448 static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
5449                                   int rd, int rn, int rm, int ra)
5450 {
5451     TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
5452     TCGv_i64 tcg_res = tcg_temp_new_i64();
5453     TCGv_ptr fpst = get_fpstatus_ptr(false);
5454
5455     tcg_op1 = read_fp_dreg(s, rn);
5456     tcg_op2 = read_fp_dreg(s, rm);
5457     tcg_op3 = read_fp_dreg(s, ra);
5458
5459     /* These are fused multiply-add, and must be done as one
5460      * floating point operation with no rounding between the
5461      * multiplication and addition steps.
5462      * NB that doing the negations here as separate steps is
5463      * correct : an input NaN should come out with its sign bit
5464      * flipped if it is a negated-input.
5465      */
5466     if (o1 == true) {
5467         gen_helper_vfp_negd(tcg_op3, tcg_op3);
5468     }
5469
5470     if (o0 != o1) {
5471         gen_helper_vfp_negd(tcg_op1, tcg_op1);
5472     }
5473
5474     gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
5475
5476     write_fp_dreg(s, rd, tcg_res);
5477
5478     tcg_temp_free_ptr(fpst);
5479     tcg_temp_free_i64(tcg_op1);
5480     tcg_temp_free_i64(tcg_op2);
5481     tcg_temp_free_i64(tcg_op3);
5482     tcg_temp_free_i64(tcg_res);
5483 }
5484
5485 /* Floating-point data-processing (3 source) - half precision */
5486 static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1,
5487                                 int rd, int rn, int rm, int ra)
5488 {
5489     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
5490     TCGv_i32 tcg_res = tcg_temp_new_i32();
5491     TCGv_ptr fpst = get_fpstatus_ptr(true);
5492
5493     tcg_op1 = read_fp_hreg(s, rn);
5494     tcg_op2 = read_fp_hreg(s, rm);
5495     tcg_op3 = read_fp_hreg(s, ra);
5496
5497     /* These are fused multiply-add, and must be done as one
5498      * floating point operation with no rounding between the
5499      * multiplication and addition steps.
5500      * NB that doing the negations here as separate steps is
5501      * correct : an input NaN should come out with its sign bit
5502      * flipped if it is a negated-input.
5503      */
5504     if (o1 == true) {
5505         tcg_gen_xori_i32(tcg_op3, tcg_op3, 0x8000);
5506     }
5507
5508     if (o0 != o1) {
5509         tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
5510     }
5511
5512     gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
5513
5514     write_fp_sreg(s, rd, tcg_res);
5515
5516     tcg_temp_free_ptr(fpst);
5517     tcg_temp_free_i32(tcg_op1);
5518     tcg_temp_free_i32(tcg_op2);
5519     tcg_temp_free_i32(tcg_op3);
5520     tcg_temp_free_i32(tcg_res);
5521 }
5522
5523 /* Floating point data-processing (3 source)
5524  *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
5525  * +---+---+---+-----------+------+----+------+----+------+------+------+
5526  * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
5527  * +---+---+---+-----------+------+----+------+----+------+------+------+
5528  */
5529 static void disas_fp_3src(DisasContext *s, uint32_t insn)
5530 {
5531     int type = extract32(insn, 22, 2);
5532     int rd = extract32(insn, 0, 5);
5533     int rn = extract32(insn, 5, 5);
5534     int ra = extract32(insn, 10, 5);
5535     int rm = extract32(insn, 16, 5);
5536     bool o0 = extract32(insn, 15, 1);
5537     bool o1 = extract32(insn, 21, 1);
5538
5539     switch (type) {
5540     case 0:
5541         if (!fp_access_check(s)) {
5542             return;
5543         }
5544         handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
5545         break;
5546     case 1:
5547         if (!fp_access_check(s)) {
5548             return;
5549         }
5550         handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
5551         break;
5552     case 3:
5553         if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
5554             unallocated_encoding(s);
5555             return;
5556         }
5557         if (!fp_access_check(s)) {
5558             return;
5559         }
5560         handle_fp_3src_half(s, o0, o1, rd, rn, rm, ra);
5561         break;
5562     default:
5563         unallocated_encoding(s);
5564     }
5565 }
5566
5567 /* The imm8 encodes the sign bit, enough bits to represent an exponent in
5568  * the range 01....1xx to 10....0xx, and the most significant 4 bits of
5569  * the mantissa; see VFPExpandImm() in the v8 ARM ARM.
5570  */
5571 uint64_t vfp_expand_imm(int size, uint8_t imm8)
5572 {
5573     uint64_t imm;
5574
5575     switch (size) {
5576     case MO_64:
5577         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
5578             (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
5579             extract32(imm8, 0, 6);
5580         imm <<= 48;
5581         break;
5582     case MO_32:
5583         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
5584             (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
5585             (extract32(imm8, 0, 6) << 3);
5586         imm <<= 16;
5587         break;
5588     case MO_16:
5589         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
5590             (extract32(imm8, 6, 1) ? 0x3000 : 0x4000) |
5591             (extract32(imm8, 0, 6) << 6);
5592         break;
5593     default:
5594         g_assert_not_reached();
5595     }
5596     return imm;
5597 }
5598
5599 /* Floating point immediate
5600  *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
5601  * +---+---+---+-----------+------+---+------------+-------+------+------+
5602  * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
5603  * +---+---+---+-----------+------+---+------------+-------+------+------+
5604  */
5605 static void disas_fp_imm(DisasContext *s, uint32_t insn)
5606 {
5607     int rd = extract32(insn, 0, 5);
5608     int imm8 = extract32(insn, 13, 8);
5609     int type = extract32(insn, 22, 2);
5610     uint64_t imm;
5611     TCGv_i64 tcg_res;
5612     TCGMemOp sz;
5613
5614     switch (type) {
5615     case 0:
5616         sz = MO_32;
5617         break;
5618     case 1:
5619         sz = MO_64;
5620         break;
5621     case 3:
5622         sz = MO_16;
5623         if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
5624             break;
5625         }
5626         /* fallthru */
5627     default:
5628         unallocated_encoding(s);
5629         return;
5630     }
5631
5632     if (!fp_access_check(s)) {
5633         return;
5634     }
5635
5636     imm = vfp_expand_imm(sz, imm8);
5637
5638     tcg_res = tcg_const_i64(imm);
5639     write_fp_dreg(s, rd, tcg_res);
5640     tcg_temp_free_i64(tcg_res);
5641 }
5642
5643 /* Handle floating point <=> fixed point conversions. Note that we can
5644  * also deal with fp <=> integer conversions as a special case (scale == 64)
5645  * OPTME: consider handling that special case specially or at least skipping
5646  * the call to scalbn in the helpers for zero shifts.
5647  */
5648 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
5649                            bool itof, int rmode, int scale, int sf, int type)
5650 {
5651     bool is_signed = !(opcode & 1);
5652     TCGv_ptr tcg_fpstatus;
5653     TCGv_i32 tcg_shift, tcg_single;
5654     TCGv_i64 tcg_double;
5655
5656     tcg_fpstatus = get_fpstatus_ptr(type == 3);
5657
5658     tcg_shift = tcg_const_i32(64 - scale);
5659
5660     if (itof) {
5661         TCGv_i64 tcg_int = cpu_reg(s, rn);
5662         if (!sf) {
5663             TCGv_i64 tcg_extend = new_tmp_a64(s);
5664
5665             if (is_signed) {
5666                 tcg_gen_ext32s_i64(tcg_extend, tcg_int);
5667             } else {
5668                 tcg_gen_ext32u_i64(tcg_extend, tcg_int);
5669             }
5670
5671             tcg_int = tcg_extend;
5672         }
5673
5674         switch (type) {
5675         case 1: /* float64 */
5676             tcg_double = tcg_temp_new_i64();
5677             if (is_signed) {
5678                 gen_helper_vfp_sqtod(tcg_double, tcg_int,
5679                                      tcg_shift, tcg_fpstatus);
5680             } else {
5681                 gen_helper_vfp_uqtod(tcg_double, tcg_int,
5682                                      tcg_shift, tcg_fpstatus);
5683             }
5684             write_fp_dreg(s, rd, tcg_double);
5685             tcg_temp_free_i64(tcg_double);
5686             break;
5687
5688         case 0: /* float32 */
5689             tcg_single = tcg_temp_new_i32();
5690             if (is_signed) {
5691                 gen_helper_vfp_sqtos(tcg_single, tcg_int,
5692                                      tcg_shift, tcg_fpstatus);
5693             } else {
5694                 gen_helper_vfp_uqtos(tcg_single, tcg_int,
5695                                      tcg_shift, tcg_fpstatus);
5696             }
5697             write_fp_sreg(s, rd, tcg_single);
5698             tcg_temp_free_i32(tcg_single);
5699             break;
5700
5701         case 3: /* float16 */
5702             tcg_single = tcg_temp_new_i32();
5703             if (is_signed) {
5704                 gen_helper_vfp_sqtoh(tcg_single, tcg_int,
5705                                      tcg_shift, tcg_fpstatus);
5706             } else {
5707                 gen_helper_vfp_uqtoh(tcg_single, tcg_int,
5708                                      tcg_shift, tcg_fpstatus);
5709             }
5710             write_fp_sreg(s, rd, tcg_single);
5711             tcg_temp_free_i32(tcg_single);
5712             break;
5713
5714         default:
5715             g_assert_not_reached();
5716         }
5717     } else {
5718         TCGv_i64 tcg_int = cpu_reg(s, rd);
5719         TCGv_i32 tcg_rmode;
5720
5721         if (extract32(opcode, 2, 1)) {
5722             /* There are too many rounding modes to all fit into rmode,
5723              * so FCVTA[US] is a special case.
5724              */
5725             rmode = FPROUNDING_TIEAWAY;
5726         }
5727
5728         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
5729
5730         gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
5731
5732         switch (type) {
5733         case 1: /* float64 */
5734             tcg_double = read_fp_dreg(s, rn);
5735             if (is_signed) {
5736                 if (!sf) {
5737                     gen_helper_vfp_tosld(tcg_int, tcg_double,
5738                                          tcg_shift, tcg_fpstatus);
5739                 } else {
5740                     gen_helper_vfp_tosqd(tcg_int, tcg_double,
5741                                          tcg_shift, tcg_fpstatus);
5742                 }
5743             } else {
5744                 if (!sf) {
5745                     gen_helper_vfp_tould(tcg_int, tcg_double,
5746                                          tcg_shift, tcg_fpstatus);
5747                 } else {
5748                     gen_helper_vfp_touqd(tcg_int, tcg_double,
5749                                          tcg_shift, tcg_fpstatus);
5750                 }
5751             }
5752             if (!sf) {
5753                 tcg_gen_ext32u_i64(tcg_int, tcg_int);
5754             }
5755             tcg_temp_free_i64(tcg_double);
5756             break;
5757
5758         case 0: /* float32 */
5759             tcg_single = read_fp_sreg(s, rn);
5760             if (sf) {
5761                 if (is_signed) {
5762                     gen_helper_vfp_tosqs(tcg_int, tcg_single,
5763                                          tcg_shift, tcg_fpstatus);
5764                 } else {
5765                     gen_helper_vfp_touqs(tcg_int, tcg_single,
5766                                          tcg_shift, tcg_fpstatus);
5767                 }
5768             } else {
5769                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
5770                 if (is_signed) {
5771                     gen_helper_vfp_tosls(tcg_dest, tcg_single,
5772                                          tcg_shift, tcg_fpstatus);
5773                 } else {
5774                     gen_helper_vfp_touls(tcg_dest, tcg_single,
5775                                          tcg_shift, tcg_fpstatus);
5776                 }
5777                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
5778                 tcg_temp_free_i32(tcg_dest);
5779             }
5780             tcg_temp_free_i32(tcg_single);
5781             break;
5782
5783         case 3: /* float16 */
5784             tcg_single = read_fp_sreg(s, rn);
5785             if (sf) {
5786                 if (is_signed) {
5787                     gen_helper_vfp_tosqh(tcg_int, tcg_single,
5788                                          tcg_shift, tcg_fpstatus);
5789                 } else {
5790                     gen_helper_vfp_touqh(tcg_int, tcg_single,
5791                                          tcg_shift, tcg_fpstatus);
5792                 }
5793             } else {
5794                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
5795                 if (is_signed) {
5796                     gen_helper_vfp_toslh(tcg_dest, tcg_single,
5797                                          tcg_shift, tcg_fpstatus);
5798                 } else {
5799                     gen_helper_vfp_toulh(tcg_dest, tcg_single,
5800                                          tcg_shift, tcg_fpstatus);
5801                 }
5802                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
5803                 tcg_temp_free_i32(tcg_dest);
5804             }
5805             tcg_temp_free_i32(tcg_single);
5806             break;
5807
5808         default:
5809             g_assert_not_reached();
5810         }
5811
5812         gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
5813         tcg_temp_free_i32(tcg_rmode);
5814     }
5815
5816     tcg_temp_free_ptr(tcg_fpstatus);
5817     tcg_temp_free_i32(tcg_shift);
5818 }
5819
5820 /* Floating point <-> fixed point conversions
5821  *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
5822  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
5823  * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
5824  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
5825  */
5826 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
5827 {
5828     int rd = extract32(insn, 0, 5);
5829     int rn = extract32(insn, 5, 5);
5830     int scale = extract32(insn, 10, 6);
5831     int opcode = extract32(insn, 16, 3);
5832     int rmode = extract32(insn, 19, 2);
5833     int type = extract32(insn, 22, 2);
5834     bool sbit = extract32(insn, 29, 1);
5835     bool sf = extract32(insn, 31, 1);
5836     bool itof;
5837
5838     if (sbit || (!sf && scale < 32)) {
5839         unallocated_encoding(s);
5840         return;
5841     }
5842
5843     switch (type) {
5844     case 0: /* float32 */
5845     case 1: /* float64 */
5846         break;
5847     case 3: /* float16 */
5848         if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
5849             break;
5850         }
5851         /* fallthru */
5852     default:
5853         unallocated_encoding(s);
5854         return;
5855     }
5856
5857     switch ((rmode << 3) | opcode) {
5858     case 0x2: /* SCVTF */
5859     case 0x3: /* UCVTF */
5860         itof = true;
5861         break;
5862     case 0x18: /* FCVTZS */
5863     case 0x19: /* FCVTZU */
5864         itof = false;
5865         break;
5866     default:
5867         unallocated_encoding(s);
5868         return;
5869     }
5870
5871     if (!fp_access_check(s)) {
5872         return;
5873     }
5874
5875     handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
5876 }
5877
5878 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
5879 {
5880     /* FMOV: gpr to or from float, double, or top half of quad fp reg,
5881      * without conversion.
5882      */
5883
5884     if (itof) {
5885         TCGv_i64 tcg_rn = cpu_reg(s, rn);
5886         TCGv_i64 tmp;
5887
5888         switch (type) {
5889         case 0:
5890             /* 32 bit */
5891             tmp = tcg_temp_new_i64();
5892             tcg_gen_ext32u_i64(tmp, tcg_rn);
5893             write_fp_dreg(s, rd, tmp);
5894             tcg_temp_free_i64(tmp);
5895             break;
5896         case 1:
5897             /* 64 bit */
5898             write_fp_dreg(s, rd, tcg_rn);
5899             break;
5900         case 2:
5901             /* 64 bit to top half. */
5902             tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
5903             clear_vec_high(s, true, rd);
5904             break;
5905         case 3:
5906             /* 16 bit */
5907             tmp = tcg_temp_new_i64();
5908             tcg_gen_ext16u_i64(tmp, tcg_rn);
5909             write_fp_dreg(s, rd, tmp);
5910             tcg_temp_free_i64(tmp);
5911             break;
5912         default:
5913             g_assert_not_reached();
5914         }
5915     } else {
5916         TCGv_i64 tcg_rd = cpu_reg(s, rd);
5917
5918         switch (type) {
5919         case 0:
5920             /* 32 bit */
5921             tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32));
5922             break;
5923         case 1:
5924             /* 64 bit */
5925             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64));
5926             break;
5927         case 2:
5928             /* 64 bits from top half */
5929             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
5930             break;
5931         case 3:
5932             /* 16 bit */
5933             tcg_gen_ld16u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_16));
5934             break;
5935         default:
5936             g_assert_not_reached();
5937         }
5938     }
5939 }
5940
5941 /* Floating point <-> integer conversions
5942  *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
5943  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5944  * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
5945  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5946  */
5947 static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
5948 {
5949     int rd = extract32(insn, 0, 5);
5950     int rn = extract32(insn, 5, 5);
5951     int opcode = extract32(insn, 16, 3);
5952     int rmode = extract32(insn, 19, 2);
5953     int type = extract32(insn, 22, 2);
5954     bool sbit = extract32(insn, 29, 1);
5955     bool sf = extract32(insn, 31, 1);
5956
5957     if (sbit) {
5958         unallocated_encoding(s);
5959         return;
5960     }
5961
5962     if (opcode > 5) {
5963         /* FMOV */
5964         bool itof = opcode & 1;
5965
5966         if (rmode >= 2) {
5967             unallocated_encoding(s);
5968             return;
5969         }
5970
5971         switch (sf << 3 | type << 1 | rmode) {
5972         case 0x0: /* 32 bit */
5973         case 0xa: /* 64 bit */
5974         case 0xd: /* 64 bit to top half of quad */
5975             break;
5976         case 0x6: /* 16-bit float, 32-bit int */
5977         case 0xe: /* 16-bit float, 64-bit int */
5978             if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
5979                 break;
5980             }
5981             /* fallthru */
5982         default:
5983             /* all other sf/type/rmode combinations are invalid */
5984             unallocated_encoding(s);
5985             return;
5986         }
5987
5988         if (!fp_access_check(s)) {
5989             return;
5990         }
5991         handle_fmov(s, rd, rn, type, itof);
5992     } else {
5993         /* actual FP conversions */
5994         bool itof = extract32(opcode, 1, 1);
5995
5996         if (rmode != 0 && opcode > 1) {
5997             unallocated_encoding(s);
5998             return;
5999         }
6000         switch (type) {
6001         case 0: /* float32 */
6002         case 1: /* float64 */
6003             break;
6004         case 3: /* float16 */
6005             if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
6006                 break;
6007             }
6008             /* fallthru */
6009         default:
6010             unallocated_encoding(s);
6011             return;
6012         }
6013
6014         if (!fp_access_check(s)) {
6015             return;
6016         }
6017         handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
6018     }
6019 }
6020
6021 /* FP-specific subcases of table C3-6 (SIMD and FP data processing)
6022  *   31  30  29 28     25 24                          0
6023  * +---+---+---+---------+-----------------------------+
6024  * |   | 0 |   | 1 1 1 1 |                             |
6025  * +---+---+---+---------+-----------------------------+
6026  */
6027 static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
6028 {
6029     if (extract32(insn, 24, 1)) {
6030         /* Floating point data-processing (3 source) */
6031         disas_fp_3src(s, insn);
6032     } else if (extract32(insn, 21, 1) == 0) {
6033         /* Floating point to fixed point conversions */
6034         disas_fp_fixed_conv(s, insn);
6035     } else {
6036         switch (extract32(insn, 10, 2)) {
6037         case 1:
6038             /* Floating point conditional compare */
6039             disas_fp_ccomp(s, insn);
6040             break;
6041         case 2:
6042             /* Floating point data-processing (2 source) */
6043             disas_fp_2src(s, insn);
6044             break;
6045         case 3:
6046             /* Floating point conditional select */
6047             disas_fp_csel(s, insn);
6048             break;
6049         case 0:
6050             switch (ctz32(extract32(insn, 12, 4))) {
6051             case 0: /* [15:12] == xxx1 */
6052                 /* Floating point immediate */
6053                 disas_fp_imm(s, insn);
6054                 break;
6055             case 1: /* [15:12] == xx10 */
6056                 /* Floating point compare */
6057                 disas_fp_compare(s, insn);
6058                 break;
6059             case 2: /* [15:12] == x100 */
6060                 /* Floating point data-processing (1 source) */
6061                 disas_fp_1src(s, insn);
6062                 break;
6063             case 3: /* [15:12] == 1000 */
6064                 unallocated_encoding(s);
6065                 break;
6066             default: /* [15:12] == 0000 */
6067                 /* Floating point <-> integer conversions */
6068                 disas_fp_int_conv(s, insn);
6069                 break;
6070             }
6071             break;
6072         }
6073     }
6074 }
6075
6076 static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
6077                      int pos)
6078 {
6079     /* Extract 64 bits from the middle of two concatenated 64 bit
6080      * vector register slices left:right. The extracted bits start
6081      * at 'pos' bits into the right (least significant) side.
6082      * We return the result in tcg_right, and guarantee not to
6083      * trash tcg_left.
6084      */
6085     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
6086     assert(pos > 0 && pos < 64);
6087
6088     tcg_gen_shri_i64(tcg_right, tcg_right, pos);
6089     tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
6090     tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
6091
6092     tcg_temp_free_i64(tcg_tmp);
6093 }
6094
6095 /* EXT
6096  *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
6097  * +---+---+-------------+-----+---+------+---+------+---+------+------+
6098  * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
6099  * +---+---+-------------+-----+---+------+---+------+---+------+------+
6100  */
6101 static void disas_simd_ext(DisasContext *s, uint32_t insn)
6102 {
6103     int is_q = extract32(insn, 30, 1);
6104     int op2 = extract32(insn, 22, 2);
6105     int imm4 = extract32(insn, 11, 4);
6106     int rm = extract32(insn, 16, 5);
6107     int rn = extract32(insn, 5, 5);
6108     int rd = extract32(insn, 0, 5);
6109     int pos = imm4 << 3;
6110     TCGv_i64 tcg_resl, tcg_resh;
6111
6112     if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
6113         unallocated_encoding(s);
6114         return;
6115     }
6116
6117     if (!fp_access_check(s)) {
6118         return;
6119     }
6120
6121     tcg_resh = tcg_temp_new_i64();
6122     tcg_resl = tcg_temp_new_i64();
6123
6124     /* Vd gets bits starting at pos bits into Vm:Vn. This is
6125      * either extracting 128 bits from a 128:128 concatenation, or
6126      * extracting 64 bits from a 64:64 concatenation.
6127      */
6128     if (!is_q) {
6129         read_vec_element(s, tcg_resl, rn, 0, MO_64);
6130         if (pos != 0) {
6131             read_vec_element(s, tcg_resh, rm, 0, MO_64);
6132             do_ext64(s, tcg_resh, tcg_resl, pos);
6133         }
6134         tcg_gen_movi_i64(tcg_resh, 0);
6135     } else {
6136         TCGv_i64 tcg_hh;
6137         typedef struct {
6138             int reg;
6139             int elt;
6140         } EltPosns;
6141         EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
6142         EltPosns *elt = eltposns;
6143
6144         if (pos >= 64) {
6145             elt++;
6146             pos -= 64;
6147         }
6148
6149         read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
6150         elt++;
6151         read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
6152         elt++;
6153         if (pos != 0) {
6154             do_ext64(s, tcg_resh, tcg_resl, pos);
6155             tcg_hh = tcg_temp_new_i64();
6156             read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
6157             do_ext64(s, tcg_hh, tcg_resh, pos);
6158             tcg_temp_free_i64(tcg_hh);
6159         }
6160     }
6161
6162     write_vec_element(s, tcg_resl, rd, 0, MO_64);
6163     tcg_temp_free_i64(tcg_resl);
6164     write_vec_element(s, tcg_resh, rd, 1, MO_64);
6165     tcg_temp_free_i64(tcg_resh);
6166 }
6167
6168 /* TBL/TBX
6169  *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
6170  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
6171  * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
6172  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
6173  */
6174 static void disas_simd_tb(DisasContext *s, uint32_t insn)
6175 {
6176     int op2 = extract32(insn, 22, 2);
6177     int is_q = extract32(insn, 30, 1);
6178     int rm = extract32(insn, 16, 5);
6179     int rn = extract32(insn, 5, 5);
6180     int rd = extract32(insn, 0, 5);
6181     int is_tblx = extract32(insn, 12, 1);
6182     int len = extract32(insn, 13, 2);
6183     TCGv_i64 tcg_resl, tcg_resh, tcg_idx;
6184     TCGv_i32 tcg_regno, tcg_numregs;
6185
6186     if (op2 != 0) {
6187         unallocated_encoding(s);
6188         return;
6189     }
6190
6191     if (!fp_access_check(s)) {
6192         return;
6193     }
6194
6195     /* This does a table lookup: for every byte element in the input
6196      * we index into a table formed from up to four vector registers,
6197      * and then the output is the result of the lookups. Our helper
6198      * function does the lookup operation for a single 64 bit part of
6199      * the input.
6200      */
6201     tcg_resl = tcg_temp_new_i64();
6202     tcg_resh = tcg_temp_new_i64();
6203
6204     if (is_tblx) {
6205         read_vec_element(s, tcg_resl, rd, 0, MO_64);
6206     } else {
6207         tcg_gen_movi_i64(tcg_resl, 0);
6208     }
6209     if (is_tblx && is_q) {
6210         read_vec_element(s, tcg_resh, rd, 1, MO_64);
6211     } else {
6212         tcg_gen_movi_i64(tcg_resh, 0);
6213     }
6214
6215     tcg_idx = tcg_temp_new_i64();
6216     tcg_regno = tcg_const_i32(rn);
6217     tcg_numregs = tcg_const_i32(len + 1);
6218     read_vec_element(s, tcg_idx, rm, 0, MO_64);
6219     gen_helper_simd_tbl(tcg_resl, cpu_env, tcg_resl, tcg_idx,
6220                         tcg_regno, tcg_numregs);
6221     if (is_q) {
6222         read_vec_element(s, tcg_idx, rm, 1, MO_64);
6223         gen_helper_simd_tbl(tcg_resh, cpu_env, tcg_resh, tcg_idx,
6224                             tcg_regno, tcg_numregs);
6225     }
6226     tcg_temp_free_i64(tcg_idx);
6227     tcg_temp_free_i32(tcg_regno);
6228     tcg_temp_free_i32(tcg_numregs);
6229
6230     write_vec_element(s, tcg_resl, rd, 0, MO_64);
6231     tcg_temp_free_i64(tcg_resl);
6232     write_vec_element(s, tcg_resh, rd, 1, MO_64);
6233     tcg_temp_free_i64(tcg_resh);
6234 }
6235
6236 /* ZIP/UZP/TRN
6237  *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
6238  * +---+---+-------------+------+---+------+---+------------------+------+
6239  * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
6240  * +---+---+-------------+------+---+------+---+------------------+------+
6241  */
6242 static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
6243 {
6244     int rd = extract32(insn, 0, 5);
6245     int rn = extract32(insn, 5, 5);
6246     int rm = extract32(insn, 16, 5);
6247     int size = extract32(insn, 22, 2);
6248     /* opc field bits [1:0] indicate ZIP/UZP/TRN;
6249      * bit 2 indicates 1 vs 2 variant of the insn.
6250      */
6251     int opcode = extract32(insn, 12, 2);
6252     bool part = extract32(insn, 14, 1);
6253     bool is_q = extract32(insn, 30, 1);
6254     int esize = 8 << size;
6255     int i, ofs;
6256     int datasize = is_q ? 128 : 64;
6257     int elements = datasize / esize;
6258     TCGv_i64 tcg_res, tcg_resl, tcg_resh;
6259
6260     if (opcode == 0 || (size == 3 && !is_q)) {
6261         unallocated_encoding(s);
6262         return;
6263     }
6264
6265     if (!fp_access_check(s)) {
6266         return;
6267     }
6268
6269     tcg_resl = tcg_const_i64(0);
6270     tcg_resh = tcg_const_i64(0);
6271     tcg_res = tcg_temp_new_i64();
6272
6273     for (i = 0; i < elements; i++) {
6274         switch (opcode) {
6275         case 1: /* UZP1/2 */
6276         {
6277             int midpoint = elements / 2;
6278             if (i < midpoint) {
6279                 read_vec_element(s, tcg_res, rn, 2 * i + part, size);
6280             } else {
6281                 read_vec_element(s, tcg_res, rm,
6282                                  2 * (i - midpoint) + part, size);
6283             }
6284             break;
6285         }
6286         case 2: /* TRN1/2 */
6287             if (i & 1) {
6288                 read_vec_element(s, tcg_res, rm, (i & ~1) + part, size);
6289             } else {
6290                 read_vec_element(s, tcg_res, rn, (i & ~1) + part, size);
6291             }
6292             break;
6293         case 3: /* ZIP1/2 */
6294         {
6295             int base = part * elements / 2;
6296             if (i & 1) {
6297                 read_vec_element(s, tcg_res, rm, base + (i >> 1), size);
6298             } else {
6299                 read_vec_element(s, tcg_res, rn, base + (i >> 1), size);
6300             }
6301             break;
6302         }
6303         default:
6304             g_assert_not_reached();
6305         }
6306
6307         ofs = i * esize;
6308         if (ofs < 64) {
6309             tcg_gen_shli_i64(tcg_res, tcg_res, ofs);
6310             tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res);
6311         } else {
6312             tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64);
6313             tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res);
6314         }
6315     }
6316
6317     tcg_temp_free_i64(tcg_res);
6318
6319     write_vec_element(s, tcg_resl, rd, 0, MO_64);
6320     tcg_temp_free_i64(tcg_resl);
6321     write_vec_element(s, tcg_resh, rd, 1, MO_64);
6322     tcg_temp_free_i64(tcg_resh);
6323 }
6324
6325 /*
6326  * do_reduction_op helper
6327  *
6328  * This mirrors the Reduce() pseudocode in the ARM ARM. It is
6329  * important for correct NaN propagation that we do these
6330  * operations in exactly the order specified by the pseudocode.
6331  *
6332  * This is a recursive function, TCG temps should be freed by the
6333  * calling function once it is done with the values.
6334  */
6335 static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn,
6336                                 int esize, int size, int vmap, TCGv_ptr fpst)
6337 {
6338     if (esize == size) {
6339         int element;
6340         TCGMemOp msize = esize == 16 ? MO_16 : MO_32;
6341         TCGv_i32 tcg_elem;
6342
6343         /* We should have one register left here */
6344         assert(ctpop8(vmap) == 1);
6345         element = ctz32(vmap);
6346         assert(element < 8);
6347
6348         tcg_elem = tcg_temp_new_i32();
6349         read_vec_element_i32(s, tcg_elem, rn, element, msize);
6350         return tcg_elem;
6351     } else {
6352         int bits = size / 2;
6353         int shift = ctpop8(vmap) / 2;
6354         int vmap_lo = (vmap >> shift) & vmap;
6355         int vmap_hi = (vmap & ~vmap_lo);
6356         TCGv_i32 tcg_hi, tcg_lo, tcg_res;
6357
6358         tcg_hi = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_hi, fpst);
6359         tcg_lo = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_lo, fpst);
6360         tcg_res = tcg_temp_new_i32();
6361
6362         switch (fpopcode) {
6363         case 0x0c: /* fmaxnmv half-precision */
6364             gen_helper_advsimd_maxnumh(tcg_res, tcg_lo, tcg_hi, fpst);
6365             break;
6366         case 0x0f: /* fmaxv half-precision */
6367             gen_helper_advsimd_maxh(tcg_res, tcg_lo, tcg_hi, fpst);
6368             break;
6369         case 0x1c: /* fminnmv half-precision */
6370             gen_helper_advsimd_minnumh(tcg_res, tcg_lo, tcg_hi, fpst);
6371             break;
6372         case 0x1f: /* fminv half-precision */
6373             gen_helper_advsimd_minh(tcg_res, tcg_lo, tcg_hi, fpst);
6374             break;
6375         case 0x2c: /* fmaxnmv */
6376             gen_helper_vfp_maxnums(tcg_res, tcg_lo, tcg_hi, fpst);
6377             break;
6378         case 0x2f: /* fmaxv */
6379             gen_helper_vfp_maxs(tcg_res, tcg_lo, tcg_hi, fpst);
6380             break;
6381         case 0x3c: /* fminnmv */
6382             gen_helper_vfp_minnums(tcg_res, tcg_lo, tcg_hi, fpst);
6383             break;
6384         case 0x3f: /* fminv */
6385             gen_helper_vfp_mins(tcg_res, tcg_lo, tcg_hi, fpst);
6386             break;
6387         default:
6388             g_assert_not_reached();
6389         }
6390
6391         tcg_temp_free_i32(tcg_hi);
6392         tcg_temp_free_i32(tcg_lo);
6393         return tcg_res;
6394     }
6395 }
6396
6397 /* AdvSIMD across lanes
6398  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
6399  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
6400  * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
6401  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
6402  */
6403 static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
6404 {
6405     int rd = extract32(insn, 0, 5);
6406     int rn = extract32(insn, 5, 5);
6407     int size = extract32(insn, 22, 2);
6408     int opcode = extract32(insn, 12, 5);
6409     bool is_q = extract32(insn, 30, 1);
6410     bool is_u = extract32(insn, 29, 1);
6411     bool is_fp = false;
6412     bool is_min = false;
6413     int esize;
6414     int elements;
6415     int i;
6416     TCGv_i64 tcg_res, tcg_elt;
6417
6418     switch (opcode) {
6419     case 0x1b: /* ADDV */
6420         if (is_u) {
6421             unallocated_encoding(s);
6422             return;
6423         }
6424         /* fall through */
6425     case 0x3: /* SADDLV, UADDLV */
6426     case 0xa: /* SMAXV, UMAXV */
6427     case 0x1a: /* SMINV, UMINV */
6428         if (size == 3 || (size == 2 && !is_q)) {
6429             unallocated_encoding(s);
6430             return;
6431         }
6432         break;
6433     case 0xc: /* FMAXNMV, FMINNMV */
6434     case 0xf: /* FMAXV, FMINV */
6435         /* Bit 1 of size field encodes min vs max and the actual size
6436          * depends on the encoding of the U bit. If not set (and FP16
6437          * enabled) then we do half-precision float instead of single
6438          * precision.
6439          */
6440         is_min = extract32(size, 1, 1);
6441         is_fp = true;
6442         if (!is_u && arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
6443             size = 1;
6444         } else if (!is_u || !is_q || extract32(size, 0, 1)) {
6445             unallocated_encoding(s);
6446             return;
6447         } else {
6448             size = 2;
6449         }
6450         break;
6451     default:
6452         unallocated_encoding(s);
6453         return;
6454     }
6455
6456     if (!fp_access_check(s)) {
6457         return;
6458     }
6459
6460     esize = 8 << size;
6461     elements = (is_q ? 128 : 64) / esize;
6462
6463     tcg_res = tcg_temp_new_i64();
6464     tcg_elt = tcg_temp_new_i64();
6465
6466     /* These instructions operate across all lanes of a vector
6467      * to produce a single result. We can guarantee that a 64
6468      * bit intermediate is sufficient:
6469      *  + for [US]ADDLV the maximum element size is 32 bits, and
6470      *    the result type is 64 bits
6471      *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the
6472      *    same as the element size, which is 32 bits at most
6473      * For the integer operations we can choose to work at 64
6474      * or 32 bits and truncate at the end; for simplicity
6475      * we use 64 bits always. The floating point
6476      * ops do require 32 bit intermediates, though.
6477      */
6478     if (!is_fp) {
6479         read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
6480
6481         for (i = 1; i < elements; i++) {
6482             read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
6483
6484             switch (opcode) {
6485             case 0x03: /* SADDLV / UADDLV */
6486             case 0x1b: /* ADDV */
6487                 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
6488                 break;
6489             case 0x0a: /* SMAXV / UMAXV */
6490                 if (is_u) {
6491                     tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt);
6492                 } else {
6493                     tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt);
6494                 }
6495                 break;
6496             case 0x1a: /* SMINV / UMINV */
6497                 if (is_u) {
6498                     tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt);
6499                 } else {
6500                     tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt);
6501                 }
6502                 break;
6503             default:
6504                 g_assert_not_reached();
6505             }
6506
6507         }
6508     } else {
6509         /* Floating point vector reduction ops which work across 32
6510          * bit (single) or 16 bit (half-precision) intermediates.
6511          * Note that correct NaN propagation requires that we do these
6512          * operations in exactly the order specified by the pseudocode.
6513          */
6514         TCGv_ptr fpst = get_fpstatus_ptr(size == MO_16);
6515         int fpopcode = opcode | is_min << 4 | is_u << 5;
6516         int vmap = (1 << elements) - 1;
6517         TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, esize,
6518                                              (is_q ? 128 : 64), vmap, fpst);
6519         tcg_gen_extu_i32_i64(tcg_res, tcg_res32);
6520         tcg_temp_free_i32(tcg_res32);
6521         tcg_temp_free_ptr(fpst);
6522     }
6523
6524     tcg_temp_free_i64(tcg_elt);
6525
6526     /* Now truncate the result to the width required for the final output */
6527     if (opcode == 0x03) {
6528         /* SADDLV, UADDLV: result is 2*esize */
6529         size++;
6530     }
6531
6532     switch (size) {
6533     case 0:
6534         tcg_gen_ext8u_i64(tcg_res, tcg_res);
6535         break;
6536     case 1:
6537         tcg_gen_ext16u_i64(tcg_res, tcg_res);
6538         break;
6539     case 2:
6540         tcg_gen_ext32u_i64(tcg_res, tcg_res);
6541         break;
6542     case 3:
6543         break;
6544     default:
6545         g_assert_not_reached();
6546     }
6547
6548     write_fp_dreg(s, rd, tcg_res);
6549     tcg_temp_free_i64(tcg_res);
6550 }
6551
6552 /* DUP (Element, Vector)
6553  *
6554  *  31  30   29              21 20    16 15        10  9    5 4    0
6555  * +---+---+-------------------+--------+-------------+------+------+
6556  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
6557  * +---+---+-------------------+--------+-------------+------+------+
6558  *
6559  * size: encoded in imm5 (see ARM ARM LowestSetBit())
6560  */
6561 static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
6562                              int imm5)
6563 {
6564     int size = ctz32(imm5);
6565     int index = imm5 >> (size + 1);
6566
6567     if (size > 3 || (size == 3 && !is_q)) {
6568         unallocated_encoding(s);
6569         return;
6570     }
6571
6572     if (!fp_access_check(s)) {
6573         return;
6574     }
6575
6576     tcg_gen_gvec_dup_mem(size, vec_full_reg_offset(s, rd),
6577                          vec_reg_offset(s, rn, index, size),
6578                          is_q ? 16 : 8, vec_full_reg_size(s));
6579 }
6580
6581 /* DUP (element, scalar)
6582  *  31                   21 20    16 15        10  9    5 4    0
6583  * +-----------------------+--------+-------------+------+------+
6584  * | 0 1 0 1 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
6585  * +-----------------------+--------+-------------+------+------+
6586  */
6587 static void handle_simd_dupes(DisasContext *s, int rd, int rn,
6588                               int imm5)
6589 {
6590     int size = ctz32(imm5);
6591     int index;
6592     TCGv_i64 tmp;
6593
6594     if (size > 3) {
6595         unallocated_encoding(s);
6596         return;
6597     }
6598
6599     if (!fp_access_check(s)) {
6600         return;
6601     }
6602
6603     index = imm5 >> (size + 1);
6604
6605     /* This instruction just extracts the specified element and
6606      * zero-extends it into the bottom of the destination register.
6607      */
6608     tmp = tcg_temp_new_i64();
6609     read_vec_element(s, tmp, rn, index, size);
6610     write_fp_dreg(s, rd, tmp);
6611     tcg_temp_free_i64(tmp);
6612 }
6613
6614 /* DUP (General)
6615  *
6616  *  31  30   29              21 20    16 15        10  9    5 4    0
6617  * +---+---+-------------------+--------+-------------+------+------+
6618  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 1 1 |  Rn  |  Rd  |
6619  * +---+---+-------------------+--------+-------------+------+------+
6620  *
6621  * size: encoded in imm5 (see ARM ARM LowestSetBit())
6622  */
6623 static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
6624                              int imm5)
6625 {
6626     int size = ctz32(imm5);
6627     uint32_t dofs, oprsz, maxsz;
6628
6629     if (size > 3 || ((size == 3) && !is_q)) {
6630         unallocated_encoding(s);
6631         return;
6632     }
6633
6634     if (!fp_access_check(s)) {
6635         return;
6636     }
6637
6638     dofs = vec_full_reg_offset(s, rd);
6639     oprsz = is_q ? 16 : 8;
6640     maxsz = vec_full_reg_size(s);
6641
6642     tcg_gen_gvec_dup_i64(size, dofs, oprsz, maxsz, cpu_reg(s, rn));
6643 }
6644
6645 /* INS (Element)
6646  *
6647  *  31                   21 20    16 15  14    11  10 9    5 4    0
6648  * +-----------------------+--------+------------+---+------+------+
6649  * | 0 1 1 0 1 1 1 0 0 0 0 |  imm5  | 0 |  imm4  | 1 |  Rn  |  Rd  |
6650  * +-----------------------+--------+------------+---+------+------+
6651  *
6652  * size: encoded in imm5 (see ARM ARM LowestSetBit())
6653  * index: encoded in imm5<4:size+1>
6654  */
6655 static void handle_simd_inse(DisasContext *s, int rd, int rn,
6656                              int imm4, int imm5)
6657 {
6658     int size = ctz32(imm5);
6659     int src_index, dst_index;
6660     TCGv_i64 tmp;
6661
6662     if (size > 3) {
6663         unallocated_encoding(s);
6664         return;
6665     }
6666
6667     if (!fp_access_check(s)) {
6668         return;
6669     }
6670
6671     dst_index = extract32(imm5, 1+size, 5);
6672     src_index = extract32(imm4, size, 4);
6673
6674     tmp = tcg_temp_new_i64();
6675
6676     read_vec_element(s, tmp, rn, src_index, size);
6677     write_vec_element(s, tmp, rd, dst_index, size);
6678
6679     tcg_temp_free_i64(tmp);
6680 }
6681
6682
6683 /* INS (General)
6684  *
6685  *  31                   21 20    16 15        10  9    5 4    0
6686  * +-----------------------+--------+-------------+------+------+
6687  * | 0 1 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 1 1 1 |  Rn  |  Rd  |
6688  * +-----------------------+--------+-------------+------+------+
6689  *
6690  * size: encoded in imm5 (see ARM ARM LowestSetBit())
6691  * index: encoded in imm5<4:size+1>
6692  */
6693 static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
6694 {
6695     int size = ctz32(imm5);
6696     int idx;
6697
6698     if (size > 3) {
6699         unallocated_encoding(s);
6700         return;
6701     }
6702
6703     if (!fp_access_check(s)) {
6704         return;
6705     }
6706
6707     idx = extract32(imm5, 1 + size, 4 - size);
6708     write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
6709 }
6710
6711 /*
6712  * UMOV (General)
6713  * SMOV (General)
6714  *
6715  *  31  30   29              21 20    16 15    12   10 9    5 4    0
6716  * +---+---+-------------------+--------+-------------+------+------+
6717  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 1 U 1 1 |  Rn  |  Rd  |
6718  * +---+---+-------------------+--------+-------------+------+------+
6719  *
6720  * U: unsigned when set
6721  * size: encoded in imm5 (see ARM ARM LowestSetBit())
6722  */
6723 static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
6724                                   int rn, int rd, int imm5)
6725 {
6726     int size = ctz32(imm5);
6727     int element;
6728     TCGv_i64 tcg_rd;
6729
6730     /* Check for UnallocatedEncodings */
6731     if (is_signed) {
6732         if (size > 2 || (size == 2 && !is_q)) {
6733             unallocated_encoding(s);
6734             return;
6735         }
6736     } else {
6737         if (size > 3
6738             || (size < 3 && is_q)
6739             || (size == 3 && !is_q)) {
6740             unallocated_encoding(s);
6741             return;
6742         }
6743     }
6744
6745     if (!fp_access_check(s)) {
6746         return;
6747     }
6748
6749     element = extract32(imm5, 1+size, 4);
6750
6751     tcg_rd = cpu_reg(s, rd);
6752     read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
6753     if (is_signed && !is_q) {
6754         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
6755     }
6756 }
6757
6758 /* AdvSIMD copy
6759  *   31  30  29  28             21 20  16 15  14  11 10  9    5 4    0
6760  * +---+---+----+-----------------+------+---+------+---+------+------+
6761  * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
6762  * +---+---+----+-----------------+------+---+------+---+------+------+
6763  */
6764 static void disas_simd_copy(DisasContext *s, uint32_t insn)
6765 {
6766     int rd = extract32(insn, 0, 5);
6767     int rn = extract32(insn, 5, 5);
6768     int imm4 = extract32(insn, 11, 4);
6769     int op = extract32(insn, 29, 1);
6770     int is_q = extract32(insn, 30, 1);
6771     int imm5 = extract32(insn, 16, 5);
6772
6773     if (op) {
6774         if (is_q) {
6775             /* INS (element) */
6776             handle_simd_inse(s, rd, rn, imm4, imm5);
6777         } else {
6778             unallocated_encoding(s);
6779         }
6780     } else {
6781         switch (imm4) {
6782         case 0:
6783             /* DUP (element - vector) */
6784             handle_simd_dupe(s, is_q, rd, rn, imm5);
6785             break;
6786         case 1:
6787             /* DUP (general) */
6788             handle_simd_dupg(s, is_q, rd, rn, imm5);
6789             break;
6790         case 3:
6791             if (is_q) {
6792                 /* INS (general) */
6793                 handle_simd_insg(s, rd, rn, imm5);
6794             } else {
6795                 unallocated_encoding(s);
6796             }
6797             break;
6798         case 5:
6799         case 7:
6800             /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
6801             handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
6802             break;
6803         default:
6804             unallocated_encoding(s);
6805             break;
6806         }
6807     }
6808 }
6809
6810 /* AdvSIMD modified immediate
6811  *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
6812  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
6813  * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh |  Rd  |
6814  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
6815  *
6816  * There are a number of operations that can be carried out here:
6817  *   MOVI - move (shifted) imm into register
6818  *   MVNI - move inverted (shifted) imm into register
6819  *   ORR  - bitwise OR of (shifted) imm with register
6820  *   BIC  - bitwise clear of (shifted) imm with register
6821  * With ARMv8.2 we also have:
6822  *   FMOV half-precision
6823  */
6824 static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
6825 {
6826     int rd = extract32(insn, 0, 5);
6827     int cmode = extract32(insn, 12, 4);
6828     int cmode_3_1 = extract32(cmode, 1, 3);
6829     int cmode_0 = extract32(cmode, 0, 1);
6830     int o2 = extract32(insn, 11, 1);
6831     uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
6832     bool is_neg = extract32(insn, 29, 1);
6833     bool is_q = extract32(insn, 30, 1);
6834     uint64_t imm = 0;
6835
6836     if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
6837         /* Check for FMOV (vector, immediate) - half-precision */
6838         if (!(arm_dc_feature(s, ARM_FEATURE_V8_FP16) && o2 && cmode == 0xf)) {
6839             unallocated_encoding(s);
6840             return;
6841         }
6842     }
6843
6844     if (!fp_access_check(s)) {
6845         return;
6846     }
6847
6848     /* See AdvSIMDExpandImm() in ARM ARM */
6849     switch (cmode_3_1) {
6850     case 0: /* Replicate(Zeros(24):imm8, 2) */
6851     case 1: /* Replicate(Zeros(16):imm8:Zeros(8), 2) */
6852     case 2: /* Replicate(Zeros(8):imm8:Zeros(16), 2) */
6853     case 3: /* Replicate(imm8:Zeros(24), 2) */
6854     {
6855         int shift = cmode_3_1 * 8;
6856         imm = bitfield_replicate(abcdefgh << shift, 32);
6857         break;
6858     }
6859     case 4: /* Replicate(Zeros(8):imm8, 4) */
6860     case 5: /* Replicate(imm8:Zeros(8), 4) */
6861     {
6862         int shift = (cmode_3_1 & 0x1) * 8;
6863         imm = bitfield_replicate(abcdefgh << shift, 16);
6864         break;
6865     }
6866     case 6:
6867         if (cmode_0) {
6868             /* Replicate(Zeros(8):imm8:Ones(16), 2) */
6869             imm = (abcdefgh << 16) | 0xffff;
6870         } else {
6871             /* Replicate(Zeros(16):imm8:Ones(8), 2) */
6872             imm = (abcdefgh << 8) | 0xff;
6873         }
6874         imm = bitfield_replicate(imm, 32);
6875         break;
6876     case 7:
6877         if (!cmode_0 && !is_neg) {
6878             imm = bitfield_replicate(abcdefgh, 8);
6879         } else if (!cmode_0 && is_neg) {
6880             int i;
6881             imm = 0;
6882             for (i = 0; i < 8; i++) {
6883                 if ((abcdefgh) & (1 << i)) {
6884                     imm |= 0xffULL << (i * 8);
6885                 }
6886             }
6887         } else if (cmode_0) {
6888             if (is_neg) {
6889                 imm = (abcdefgh & 0x3f) << 48;
6890                 if (abcdefgh & 0x80) {
6891                     imm |= 0x8000000000000000ULL;
6892                 }
6893                 if (abcdefgh & 0x40) {
6894                     imm |= 0x3fc0000000000000ULL;
6895                 } else {
6896                     imm |= 0x4000000000000000ULL;
6897                 }
6898             } else {
6899                 if (o2) {
6900                     /* FMOV (vector, immediate) - half-precision */
6901                     imm = vfp_expand_imm(MO_16, abcdefgh);
6902                     /* now duplicate across the lanes */
6903                     imm = bitfield_replicate(imm, 16);
6904                 } else {
6905                     imm = (abcdefgh & 0x3f) << 19;
6906                     if (abcdefgh & 0x80) {
6907                         imm |= 0x80000000;
6908                     }
6909                     if (abcdefgh & 0x40) {
6910                         imm |= 0x3e000000;
6911                     } else {
6912                         imm |= 0x40000000;
6913                     }
6914                     imm |= (imm << 32);
6915                 }
6916             }
6917         }
6918         break;
6919     default:
6920         fprintf(stderr, "%s: cmode_3_1: %x\n", __func__, cmode_3_1);
6921         g_assert_not_reached();
6922     }
6923
6924     if (cmode_3_1 != 7 && is_neg) {
6925         imm = ~imm;
6926     }
6927
6928     if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) {
6929         /* MOVI or MVNI, with MVNI negation handled above.  */
6930         tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), is_q ? 16 : 8,
6931                             vec_full_reg_size(s), imm);
6932     } else {
6933         /* ORR or BIC, with BIC negation to AND handled above.  */
6934         if (is_neg) {
6935             gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_andi, MO_64);
6936         } else {
6937             gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_ori, MO_64);
6938         }
6939     }
6940 }
6941
6942 /* AdvSIMD scalar copy
6943  *  31 30  29  28             21 20  16 15  14  11 10  9    5 4    0
6944  * +-----+----+-----------------+------+---+------+---+------+------+
6945  * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
6946  * +-----+----+-----------------+------+---+------+---+------+------+
6947  */
6948 static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
6949 {
6950     int rd = extract32(insn, 0, 5);
6951     int rn = extract32(insn, 5, 5);
6952     int imm4 = extract32(insn, 11, 4);
6953     int imm5 = extract32(insn, 16, 5);
6954     int op = extract32(insn, 29, 1);
6955
6956     if (op != 0 || imm4 != 0) {
6957         unallocated_encoding(s);
6958         return;
6959     }
6960
6961     /* DUP (element, scalar) */
6962     handle_simd_dupes(s, rd, rn, imm5);
6963 }
6964
6965 /* AdvSIMD scalar pairwise
6966  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
6967  * +-----+---+-----------+------+-----------+--------+-----+------+------+
6968  * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
6969  * +-----+---+-----------+------+-----------+--------+-----+------+------+
6970  */
6971 static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
6972 {
6973     int u = extract32(insn, 29, 1);
6974     int size = extract32(insn, 22, 2);
6975     int opcode = extract32(insn, 12, 5);
6976     int rn = extract32(insn, 5, 5);
6977     int rd = extract32(insn, 0, 5);
6978     TCGv_ptr fpst;
6979
6980     /* For some ops (the FP ones), size[1] is part of the encoding.
6981      * For ADDP strictly it is not but size[1] is always 1 for valid
6982      * encodings.
6983      */
6984     opcode |= (extract32(size, 1, 1) << 5);
6985
6986     switch (opcode) {
6987     case 0x3b: /* ADDP */
6988         if (u || size != 3) {
6989             unallocated_encoding(s);
6990             return;
6991         }
6992         if (!fp_access_check(s)) {
6993             return;
6994         }
6995
6996         fpst = NULL;
6997         break;
6998     case 0xc: /* FMAXNMP */
6999     case 0xd: /* FADDP */
7000     case 0xf: /* FMAXP */
7001     case 0x2c: /* FMINNMP */
7002     case 0x2f: /* FMINP */
7003         /* FP op, size[0] is 32 or 64 bit*/
7004         if (!u) {
7005             if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
7006                 unallocated_encoding(s);
7007                 return;
7008             } else {
7009                 size = MO_16;
7010             }
7011         } else {
7012             size = extract32(size, 0, 1) ? MO_64 : MO_32;
7013         }
7014
7015         if (!fp_access_check(s)) {
7016             return;
7017         }
7018
7019         fpst = get_fpstatus_ptr(size == MO_16);
7020         break;
7021     default:
7022         unallocated_encoding(s);
7023         return;
7024     }
7025
7026     if (size == MO_64) {
7027         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7028         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7029         TCGv_i64 tcg_res = tcg_temp_new_i64();
7030
7031         read_vec_element(s, tcg_op1, rn, 0, MO_64);
7032         read_vec_element(s, tcg_op2, rn, 1, MO_64);
7033
7034         switch (opcode) {
7035         case 0x3b: /* ADDP */
7036             tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
7037             break;
7038         case 0xc: /* FMAXNMP */
7039             gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7040             break;
7041         case 0xd: /* FADDP */
7042             gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
7043             break;
7044         case 0xf: /* FMAXP */
7045             gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
7046             break;
7047         case 0x2c: /* FMINNMP */
7048             gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7049             break;
7050         case 0x2f: /* FMINP */
7051             gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
7052             break;
7053         default:
7054             g_assert_not_reached();
7055         }
7056
7057         write_fp_dreg(s, rd, tcg_res);
7058
7059         tcg_temp_free_i64(tcg_op1);
7060         tcg_temp_free_i64(tcg_op2);
7061         tcg_temp_free_i64(tcg_res);
7062     } else {
7063         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7064         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7065         TCGv_i32 tcg_res = tcg_temp_new_i32();
7066
7067         read_vec_element_i32(s, tcg_op1, rn, 0, size);
7068         read_vec_element_i32(s, tcg_op2, rn, 1, size);
7069
7070         if (size == MO_16) {
7071             switch (opcode) {
7072             case 0xc: /* FMAXNMP */
7073                 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
7074                 break;
7075             case 0xd: /* FADDP */
7076                 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
7077                 break;
7078             case 0xf: /* FMAXP */
7079                 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
7080                 break;
7081             case 0x2c: /* FMINNMP */
7082                 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
7083                 break;
7084             case 0x2f: /* FMINP */
7085                 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
7086                 break;
7087             default:
7088                 g_assert_not_reached();
7089             }
7090         } else {
7091             switch (opcode) {
7092             case 0xc: /* FMAXNMP */
7093                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
7094                 break;
7095             case 0xd: /* FADDP */
7096                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
7097                 break;
7098             case 0xf: /* FMAXP */
7099                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
7100                 break;
7101             case 0x2c: /* FMINNMP */
7102                 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
7103                 break;
7104             case 0x2f: /* FMINP */
7105                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
7106                 break;
7107             default:
7108                 g_assert_not_reached();
7109             }
7110         }
7111
7112         write_fp_sreg(s, rd, tcg_res);
7113
7114         tcg_temp_free_i32(tcg_op1);
7115         tcg_temp_free_i32(tcg_op2);
7116         tcg_temp_free_i32(tcg_res);
7117     }
7118
7119     if (fpst) {
7120         tcg_temp_free_ptr(fpst);
7121     }
7122 }
7123
7124 /*
7125  * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
7126  *
7127  * This code is handles the common shifting code and is used by both
7128  * the vector and scalar code.
7129  */
7130 static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
7131                                     TCGv_i64 tcg_rnd, bool accumulate,
7132                                     bool is_u, int size, int shift)
7133 {
7134     bool extended_result = false;
7135     bool round = tcg_rnd != NULL;
7136     int ext_lshift = 0;
7137     TCGv_i64 tcg_src_hi;
7138
7139     if (round && size == 3) {
7140         extended_result = true;
7141         ext_lshift = 64 - shift;
7142         tcg_src_hi = tcg_temp_new_i64();
7143     } else if (shift == 64) {
7144         if (!accumulate && is_u) {
7145             /* result is zero */
7146             tcg_gen_movi_i64(tcg_res, 0);
7147             return;
7148         }
7149     }
7150
7151     /* Deal with the rounding step */
7152     if (round) {
7153         if (extended_result) {
7154             TCGv_i64 tcg_zero = tcg_const_i64(0);
7155             if (!is_u) {
7156                 /* take care of sign extending tcg_res */
7157                 tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
7158                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
7159                                  tcg_src, tcg_src_hi,
7160                                  tcg_rnd, tcg_zero);
7161             } else {
7162                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
7163                                  tcg_src, tcg_zero,
7164                                  tcg_rnd, tcg_zero);
7165             }
7166             tcg_temp_free_i64(tcg_zero);
7167         } else {
7168             tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
7169         }
7170     }
7171
7172     /* Now do the shift right */
7173     if (round && extended_result) {
7174         /* extended case, >64 bit precision required */
7175         if (ext_lshift == 0) {
7176             /* special case, only high bits matter */
7177             tcg_gen_mov_i64(tcg_src, tcg_src_hi);
7178         } else {
7179             tcg_gen_shri_i64(tcg_src, tcg_src, shift);
7180             tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
7181             tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
7182         }
7183     } else {
7184         if (is_u) {
7185             if (shift == 64) {
7186                 /* essentially shifting in 64 zeros */
7187                 tcg_gen_movi_i64(tcg_src, 0);
7188             } else {
7189                 tcg_gen_shri_i64(tcg_src, tcg_src, shift);
7190             }
7191         } else {
7192             if (shift == 64) {
7193                 /* effectively extending the sign-bit */
7194                 tcg_gen_sari_i64(tcg_src, tcg_src, 63);
7195             } else {
7196                 tcg_gen_sari_i64(tcg_src, tcg_src, shift);
7197             }
7198         }
7199     }
7200
7201     if (accumulate) {
7202         tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
7203     } else {
7204         tcg_gen_mov_i64(tcg_res, tcg_src);
7205     }
7206
7207     if (extended_result) {
7208         tcg_temp_free_i64(tcg_src_hi);
7209     }
7210 }
7211
7212 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
7213 static void handle_scalar_simd_shri(DisasContext *s,
7214                                     bool is_u, int immh, int immb,
7215                                     int opcode, int rn, int rd)
7216 {
7217     const int size = 3;
7218     int immhb = immh << 3 | immb;
7219     int shift = 2 * (8 << size) - immhb;
7220     bool accumulate = false;
7221     bool round = false;
7222     bool insert = false;
7223     TCGv_i64 tcg_rn;
7224     TCGv_i64 tcg_rd;
7225     TCGv_i64 tcg_round;
7226
7227     if (!extract32(immh, 3, 1)) {
7228         unallocated_encoding(s);
7229         return;
7230     }
7231
7232     if (!fp_access_check(s)) {
7233         return;
7234     }
7235
7236     switch (opcode) {
7237     case 0x02: /* SSRA / USRA (accumulate) */
7238         accumulate = true;
7239         break;
7240     case 0x04: /* SRSHR / URSHR (rounding) */
7241         round = true;
7242         break;
7243     case 0x06: /* SRSRA / URSRA (accum + rounding) */
7244         accumulate = round = true;
7245         break;
7246     case 0x08: /* SRI */
7247         insert = true;
7248         break;
7249     }
7250
7251     if (round) {
7252         uint64_t round_const = 1ULL << (shift - 1);
7253         tcg_round = tcg_const_i64(round_const);
7254     } else {
7255         tcg_round = NULL;
7256     }
7257
7258     tcg_rn = read_fp_dreg(s, rn);
7259     tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
7260
7261     if (insert) {
7262         /* shift count same as element size is valid but does nothing;
7263          * special case to avoid potential shift by 64.
7264          */
7265         int esize = 8 << size;
7266         if (shift != esize) {
7267             tcg_gen_shri_i64(tcg_rn, tcg_rn, shift);
7268             tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, 0, esize - shift);
7269         }
7270     } else {
7271         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
7272                                 accumulate, is_u, size, shift);
7273     }
7274
7275     write_fp_dreg(s, rd, tcg_rd);
7276
7277     tcg_temp_free_i64(tcg_rn);
7278     tcg_temp_free_i64(tcg_rd);
7279     if (round) {
7280         tcg_temp_free_i64(tcg_round);
7281     }
7282 }
7283
7284 /* SHL/SLI - Scalar shift left */
7285 static void handle_scalar_simd_shli(DisasContext *s, bool insert,
7286                                     int immh, int immb, int opcode,
7287                                     int rn, int rd)
7288 {
7289     int size = 32 - clz32(immh) - 1;
7290     int immhb = immh << 3 | immb;
7291     int shift = immhb - (8 << size);
7292     TCGv_i64 tcg_rn = new_tmp_a64(s);
7293     TCGv_i64 tcg_rd = new_tmp_a64(s);
7294
7295     if (!extract32(immh, 3, 1)) {
7296         unallocated_encoding(s);
7297         return;
7298     }
7299
7300     if (!fp_access_check(s)) {
7301         return;
7302     }
7303
7304     tcg_rn = read_fp_dreg(s, rn);
7305     tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
7306
7307     if (insert) {
7308         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, shift, 64 - shift);
7309     } else {
7310         tcg_gen_shli_i64(tcg_rd, tcg_rn, shift);
7311     }
7312
7313     write_fp_dreg(s, rd, tcg_rd);
7314
7315     tcg_temp_free_i64(tcg_rn);
7316     tcg_temp_free_i64(tcg_rd);
7317 }
7318
7319 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
7320  * (signed/unsigned) narrowing */
7321 static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
7322                                    bool is_u_shift, bool is_u_narrow,
7323                                    int immh, int immb, int opcode,
7324                                    int rn, int rd)
7325 {
7326     int immhb = immh << 3 | immb;
7327     int size = 32 - clz32(immh) - 1;
7328     int esize = 8 << size;
7329     int shift = (2 * esize) - immhb;
7330     int elements = is_scalar ? 1 : (64 / esize);
7331     bool round = extract32(opcode, 0, 1);
7332     TCGMemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
7333     TCGv_i64 tcg_rn, tcg_rd, tcg_round;
7334     TCGv_i32 tcg_rd_narrowed;
7335     TCGv_i64 tcg_final;
7336
7337     static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
7338         { gen_helper_neon_narrow_sat_s8,
7339           gen_helper_neon_unarrow_sat8 },
7340         { gen_helper_neon_narrow_sat_s16,
7341           gen_helper_neon_unarrow_sat16 },
7342         { gen_helper_neon_narrow_sat_s32,
7343           gen_helper_neon_unarrow_sat32 },
7344         { NULL, NULL },
7345     };
7346     static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
7347         gen_helper_neon_narrow_sat_u8,
7348         gen_helper_neon_narrow_sat_u16,
7349         gen_helper_neon_narrow_sat_u32,
7350         NULL
7351     };
7352     NeonGenNarrowEnvFn *narrowfn;
7353
7354     int i;
7355
7356     assert(size < 4);
7357
7358     if (extract32(immh, 3, 1)) {
7359         unallocated_encoding(s);
7360         return;
7361     }
7362
7363     if (!fp_access_check(s)) {
7364         return;
7365     }
7366
7367     if (is_u_shift) {
7368         narrowfn = unsigned_narrow_fns[size];
7369     } else {
7370         narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
7371     }
7372
7373     tcg_rn = tcg_temp_new_i64();
7374     tcg_rd = tcg_temp_new_i64();
7375     tcg_rd_narrowed = tcg_temp_new_i32();
7376     tcg_final = tcg_const_i64(0);
7377
7378     if (round) {
7379         uint64_t round_const = 1ULL << (shift - 1);
7380         tcg_round = tcg_const_i64(round_const);
7381     } else {
7382         tcg_round = NULL;
7383     }
7384
7385     for (i = 0; i < elements; i++) {
7386         read_vec_element(s, tcg_rn, rn, i, ldop);
7387         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
7388                                 false, is_u_shift, size+1, shift);
7389         narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd);
7390         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
7391         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
7392     }
7393
7394     if (!is_q) {
7395         write_vec_element(s, tcg_final, rd, 0, MO_64);
7396     } else {
7397         write_vec_element(s, tcg_final, rd, 1, MO_64);
7398     }
7399
7400     if (round) {
7401         tcg_temp_free_i64(tcg_round);
7402     }
7403     tcg_temp_free_i64(tcg_rn);
7404     tcg_temp_free_i64(tcg_rd);
7405     tcg_temp_free_i32(tcg_rd_narrowed);
7406     tcg_temp_free_i64(tcg_final);
7407
7408     clear_vec_high(s, is_q, rd);
7409 }
7410
7411 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */
7412 static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
7413                              bool src_unsigned, bool dst_unsigned,
7414                              int immh, int immb, int rn, int rd)
7415 {
7416     int immhb = immh << 3 | immb;
7417     int size = 32 - clz32(immh) - 1;
7418     int shift = immhb - (8 << size);
7419     int pass;
7420
7421     assert(immh != 0);
7422     assert(!(scalar && is_q));
7423
7424     if (!scalar) {
7425         if (!is_q && extract32(immh, 3, 1)) {
7426             unallocated_encoding(s);
7427             return;
7428         }
7429
7430         /* Since we use the variable-shift helpers we must
7431          * replicate the shift count into each element of
7432          * the tcg_shift value.
7433          */
7434         switch (size) {
7435         case 0:
7436             shift |= shift << 8;
7437             /* fall through */
7438         case 1:
7439             shift |= shift << 16;
7440             break;
7441         case 2:
7442         case 3:
7443             break;
7444         default:
7445             g_assert_not_reached();
7446         }
7447     }
7448
7449     if (!fp_access_check(s)) {
7450         return;
7451     }
7452
7453     if (size == 3) {
7454         TCGv_i64 tcg_shift = tcg_const_i64(shift);
7455         static NeonGenTwo64OpEnvFn * const fns[2][2] = {
7456             { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
7457             { NULL, gen_helper_neon_qshl_u64 },
7458         };
7459         NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
7460         int maxpass = is_q ? 2 : 1;
7461
7462         for (pass = 0; pass < maxpass; pass++) {
7463             TCGv_i64 tcg_op = tcg_temp_new_i64();
7464
7465             read_vec_element(s, tcg_op, rn, pass, MO_64);
7466             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
7467             write_vec_element(s, tcg_op, rd, pass, MO_64);
7468
7469             tcg_temp_free_i64(tcg_op);
7470         }
7471         tcg_temp_free_i64(tcg_shift);
7472         clear_vec_high(s, is_q, rd);
7473     } else {
7474         TCGv_i32 tcg_shift = tcg_const_i32(shift);
7475         static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
7476             {
7477                 { gen_helper_neon_qshl_s8,
7478                   gen_helper_neon_qshl_s16,
7479                   gen_helper_neon_qshl_s32 },
7480                 { gen_helper_neon_qshlu_s8,
7481                   gen_helper_neon_qshlu_s16,
7482                   gen_helper_neon_qshlu_s32 }
7483             }, {
7484                 { NULL, NULL, NULL },
7485                 { gen_helper_neon_qshl_u8,
7486                   gen_helper_neon_qshl_u16,
7487                   gen_helper_neon_qshl_u32 }
7488             }
7489         };
7490         NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
7491         TCGMemOp memop = scalar ? size : MO_32;
7492         int maxpass = scalar ? 1 : is_q ? 4 : 2;
7493
7494         for (pass = 0; pass < maxpass; pass++) {
7495             TCGv_i32 tcg_op = tcg_temp_new_i32();
7496
7497             read_vec_element_i32(s, tcg_op, rn, pass, memop);
7498             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
7499             if (scalar) {
7500                 switch (size) {
7501                 case 0:
7502                     tcg_gen_ext8u_i32(tcg_op, tcg_op);
7503                     break;
7504                 case 1:
7505                     tcg_gen_ext16u_i32(tcg_op, tcg_op);
7506                     break;
7507                 case 2:
7508                     break;
7509                 default:
7510                     g_assert_not_reached();
7511                 }
7512                 write_fp_sreg(s, rd, tcg_op);
7513             } else {
7514                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
7515             }
7516
7517             tcg_temp_free_i32(tcg_op);
7518         }
7519         tcg_temp_free_i32(tcg_shift);
7520
7521         if (!scalar) {
7522             clear_vec_high(s, is_q, rd);
7523         }
7524     }
7525 }
7526
7527 /* Common vector code for handling integer to FP conversion */
7528 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
7529                                    int elements, int is_signed,
7530                                    int fracbits, int size)
7531 {
7532     TCGv_ptr tcg_fpst = get_fpstatus_ptr(size == MO_16);
7533     TCGv_i32 tcg_shift = NULL;
7534
7535     TCGMemOp mop = size | (is_signed ? MO_SIGN : 0);
7536     int pass;
7537
7538     if (fracbits || size == MO_64) {
7539         tcg_shift = tcg_const_i32(fracbits);
7540     }
7541
7542     if (size == MO_64) {
7543         TCGv_i64 tcg_int64 = tcg_temp_new_i64();
7544         TCGv_i64 tcg_double = tcg_temp_new_i64();
7545
7546         for (pass = 0; pass < elements; pass++) {
7547             read_vec_element(s, tcg_int64, rn, pass, mop);
7548
7549             if (is_signed) {
7550                 gen_helper_vfp_sqtod(tcg_double, tcg_int64,
7551                                      tcg_shift, tcg_fpst);
7552             } else {
7553                 gen_helper_vfp_uqtod(tcg_double, tcg_int64,
7554                                      tcg_shift, tcg_fpst);
7555             }
7556             if (elements == 1) {
7557                 write_fp_dreg(s, rd, tcg_double);
7558             } else {
7559                 write_vec_element(s, tcg_double, rd, pass, MO_64);
7560             }
7561         }
7562
7563         tcg_temp_free_i64(tcg_int64);
7564         tcg_temp_free_i64(tcg_double);
7565
7566     } else {
7567         TCGv_i32 tcg_int32 = tcg_temp_new_i32();
7568         TCGv_i32 tcg_float = tcg_temp_new_i32();
7569
7570         for (pass = 0; pass < elements; pass++) {
7571             read_vec_element_i32(s, tcg_int32, rn, pass, mop);
7572
7573             switch (size) {
7574             case MO_32:
7575                 if (fracbits) {
7576                     if (is_signed) {
7577                         gen_helper_vfp_sltos(tcg_float, tcg_int32,
7578                                              tcg_shift, tcg_fpst);
7579                     } else {
7580                         gen_helper_vfp_ultos(tcg_float, tcg_int32,
7581                                              tcg_shift, tcg_fpst);
7582                     }
7583                 } else {
7584                     if (is_signed) {
7585                         gen_helper_vfp_sitos(tcg_float, tcg_int32, tcg_fpst);
7586                     } else {
7587                         gen_helper_vfp_uitos(tcg_float, tcg_int32, tcg_fpst);
7588                     }
7589                 }
7590                 break;
7591             case MO_16:
7592                 if (fracbits) {
7593                     if (is_signed) {
7594                         gen_helper_vfp_sltoh(tcg_float, tcg_int32,
7595                                              tcg_shift, tcg_fpst);
7596                     } else {
7597                         gen_helper_vfp_ultoh(tcg_float, tcg_int32,
7598                                              tcg_shift, tcg_fpst);
7599                     }
7600                 } else {
7601                     if (is_signed) {
7602                         gen_helper_vfp_sitoh(tcg_float, tcg_int32, tcg_fpst);
7603                     } else {
7604                         gen_helper_vfp_uitoh(tcg_float, tcg_int32, tcg_fpst);
7605                     }
7606                 }
7607                 break;
7608             default:
7609                 g_assert_not_reached();
7610             }
7611
7612             if (elements == 1) {
7613                 write_fp_sreg(s, rd, tcg_float);
7614             } else {
7615                 write_vec_element_i32(s, tcg_float, rd, pass, size);
7616             }
7617         }
7618
7619         tcg_temp_free_i32(tcg_int32);
7620         tcg_temp_free_i32(tcg_float);
7621     }
7622
7623     tcg_temp_free_ptr(tcg_fpst);
7624     if (tcg_shift) {
7625         tcg_temp_free_i32(tcg_shift);
7626     }
7627
7628     clear_vec_high(s, elements << size == 16, rd);
7629 }
7630
7631 /* UCVTF/SCVTF - Integer to FP conversion */
7632 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
7633                                          bool is_q, bool is_u,
7634                                          int immh, int immb, int opcode,
7635                                          int rn, int rd)
7636 {
7637     int size, elements, fracbits;
7638     int immhb = immh << 3 | immb;
7639
7640     if (immh & 8) {
7641         size = MO_64;
7642         if (!is_scalar && !is_q) {
7643             unallocated_encoding(s);
7644             return;
7645         }
7646     } else if (immh & 4) {
7647         size = MO_32;
7648     } else if (immh & 2) {
7649         size = MO_16;
7650         if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
7651             unallocated_encoding(s);
7652             return;
7653         }
7654     } else {
7655         /* immh == 0 would be a failure of the decode logic */
7656         g_assert(immh == 1);
7657         unallocated_encoding(s);
7658         return;
7659     }
7660
7661     if (is_scalar) {
7662         elements = 1;
7663     } else {
7664         elements = (8 << is_q) >> size;
7665     }
7666     fracbits = (16 << size) - immhb;
7667
7668     if (!fp_access_check(s)) {
7669         return;
7670     }
7671
7672     handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
7673 }
7674
7675 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
7676 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
7677                                          bool is_q, bool is_u,
7678                                          int immh, int immb, int rn, int rd)
7679 {
7680     int immhb = immh << 3 | immb;
7681     int pass, size, fracbits;
7682     TCGv_ptr tcg_fpstatus;
7683     TCGv_i32 tcg_rmode, tcg_shift;
7684
7685     if (immh & 0x8) {
7686         size = MO_64;
7687         if (!is_scalar && !is_q) {
7688             unallocated_encoding(s);
7689             return;
7690         }
7691     } else if (immh & 0x4) {
7692         size = MO_32;
7693     } else if (immh & 0x2) {
7694         size = MO_16;
7695         if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
7696             unallocated_encoding(s);
7697             return;
7698         }
7699     } else {
7700         /* Should have split out AdvSIMD modified immediate earlier.  */
7701         assert(immh == 1);
7702         unallocated_encoding(s);
7703         return;
7704     }
7705
7706     if (!fp_access_check(s)) {
7707         return;
7708     }
7709
7710     assert(!(is_scalar && is_q));
7711
7712     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO));
7713     tcg_fpstatus = get_fpstatus_ptr(size == MO_16);
7714     gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
7715     fracbits = (16 << size) - immhb;
7716     tcg_shift = tcg_const_i32(fracbits);
7717
7718     if (size == MO_64) {
7719         int maxpass = is_scalar ? 1 : 2;
7720
7721         for (pass = 0; pass < maxpass; pass++) {
7722             TCGv_i64 tcg_op = tcg_temp_new_i64();
7723
7724             read_vec_element(s, tcg_op, rn, pass, MO_64);
7725             if (is_u) {
7726                 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
7727             } else {
7728                 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
7729             }
7730             write_vec_element(s, tcg_op, rd, pass, MO_64);
7731             tcg_temp_free_i64(tcg_op);
7732         }
7733         clear_vec_high(s, is_q, rd);
7734     } else {
7735         void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
7736         int maxpass = is_scalar ? 1 : ((8 << is_q) >> size);
7737
7738         switch (size) {
7739         case MO_16:
7740             if (is_u) {
7741                 fn = gen_helper_vfp_touhh;
7742             } else {
7743                 fn = gen_helper_vfp_toshh;
7744             }
7745             break;
7746         case MO_32:
7747             if (is_u) {
7748                 fn = gen_helper_vfp_touls;
7749             } else {
7750                 fn = gen_helper_vfp_tosls;
7751             }
7752             break;
7753         default:
7754             g_assert_not_reached();
7755         }
7756
7757         for (pass = 0; pass < maxpass; pass++) {
7758             TCGv_i32 tcg_op = tcg_temp_new_i32();
7759
7760             read_vec_element_i32(s, tcg_op, rn, pass, size);
7761             fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
7762             if (is_scalar) {
7763                 write_fp_sreg(s, rd, tcg_op);
7764             } else {
7765                 write_vec_element_i32(s, tcg_op, rd, pass, size);
7766             }
7767             tcg_temp_free_i32(tcg_op);
7768         }
7769         if (!is_scalar) {
7770             clear_vec_high(s, is_q, rd);
7771         }
7772     }
7773
7774     tcg_temp_free_ptr(tcg_fpstatus);
7775     tcg_temp_free_i32(tcg_shift);
7776     gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
7777     tcg_temp_free_i32(tcg_rmode);
7778 }
7779
7780 /* AdvSIMD scalar shift by immediate
7781  *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
7782  * +-----+---+-------------+------+------+--------+---+------+------+
7783  * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
7784  * +-----+---+-------------+------+------+--------+---+------+------+
7785  *
7786  * This is the scalar version so it works on a fixed sized registers
7787  */
7788 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
7789 {
7790     int rd = extract32(insn, 0, 5);
7791     int rn = extract32(insn, 5, 5);
7792     int opcode = extract32(insn, 11, 5);
7793     int immb = extract32(insn, 16, 3);
7794     int immh = extract32(insn, 19, 4);
7795     bool is_u = extract32(insn, 29, 1);
7796
7797     if (immh == 0) {
7798         unallocated_encoding(s);
7799         return;
7800     }
7801
7802     switch (opcode) {
7803     case 0x08: /* SRI */
7804         if (!is_u) {
7805             unallocated_encoding(s);
7806             return;
7807         }
7808         /* fall through */
7809     case 0x00: /* SSHR / USHR */
7810     case 0x02: /* SSRA / USRA */
7811     case 0x04: /* SRSHR / URSHR */
7812     case 0x06: /* SRSRA / URSRA */
7813         handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
7814         break;
7815     case 0x0a: /* SHL / SLI */
7816         handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
7817         break;
7818     case 0x1c: /* SCVTF, UCVTF */
7819         handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
7820                                      opcode, rn, rd);
7821         break;
7822     case 0x10: /* SQSHRUN, SQSHRUN2 */
7823     case 0x11: /* SQRSHRUN, SQRSHRUN2 */
7824         if (!is_u) {
7825             unallocated_encoding(s);
7826             return;
7827         }
7828         handle_vec_simd_sqshrn(s, true, false, false, true,
7829                                immh, immb, opcode, rn, rd);
7830         break;
7831     case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
7832     case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
7833         handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
7834                                immh, immb, opcode, rn, rd);
7835         break;
7836     case 0xc: /* SQSHLU */
7837         if (!is_u) {
7838             unallocated_encoding(s);
7839             return;
7840         }
7841         handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
7842         break;
7843     case 0xe: /* SQSHL, UQSHL */
7844         handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
7845         break;
7846     case 0x1f: /* FCVTZS, FCVTZU */
7847         handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
7848         break;
7849     default:
7850         unallocated_encoding(s);
7851         break;
7852     }
7853 }
7854
7855 /* AdvSIMD scalar three different
7856  *  31 30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
7857  * +-----+---+-----------+------+---+------+--------+-----+------+------+
7858  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
7859  * +-----+---+-----------+------+---+------+--------+-----+------+------+
7860  */
7861 static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
7862 {
7863     bool is_u = extract32(insn, 29, 1);
7864     int size = extract32(insn, 22, 2);
7865     int opcode = extract32(insn, 12, 4);
7866     int rm = extract32(insn, 16, 5);
7867     int rn = extract32(insn, 5, 5);
7868     int rd = extract32(insn, 0, 5);
7869
7870     if (is_u) {
7871         unallocated_encoding(s);
7872         return;
7873     }
7874
7875     switch (opcode) {
7876     case 0x9: /* SQDMLAL, SQDMLAL2 */
7877     case 0xb: /* SQDMLSL, SQDMLSL2 */
7878     case 0xd: /* SQDMULL, SQDMULL2 */
7879         if (size == 0 || size == 3) {
7880             unallocated_encoding(s);
7881             return;
7882         }
7883         break;
7884     default:
7885         unallocated_encoding(s);
7886         return;
7887     }
7888
7889     if (!fp_access_check(s)) {
7890         return;
7891     }
7892
7893     if (size == 2) {
7894         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7895         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7896         TCGv_i64 tcg_res = tcg_temp_new_i64();
7897
7898         read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
7899         read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
7900
7901         tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
7902         gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
7903
7904         switch (opcode) {
7905         case 0xd: /* SQDMULL, SQDMULL2 */
7906             break;
7907         case 0xb: /* SQDMLSL, SQDMLSL2 */
7908             tcg_gen_neg_i64(tcg_res, tcg_res);
7909             /* fall through */
7910         case 0x9: /* SQDMLAL, SQDMLAL2 */
7911             read_vec_element(s, tcg_op1, rd, 0, MO_64);
7912             gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
7913                                               tcg_res, tcg_op1);
7914             break;
7915         default:
7916             g_assert_not_reached();
7917         }
7918
7919         write_fp_dreg(s, rd, tcg_res);
7920
7921         tcg_temp_free_i64(tcg_op1);
7922         tcg_temp_free_i64(tcg_op2);
7923         tcg_temp_free_i64(tcg_res);
7924     } else {
7925         TCGv_i32 tcg_op1 = read_fp_hreg(s, rn);
7926         TCGv_i32 tcg_op2 = read_fp_hreg(s, rm);
7927         TCGv_i64 tcg_res = tcg_temp_new_i64();
7928
7929         gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
7930         gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
7931
7932         switch (opcode) {
7933         case 0xd: /* SQDMULL, SQDMULL2 */
7934             break;
7935         case 0xb: /* SQDMLSL, SQDMLSL2 */
7936             gen_helper_neon_negl_u32(tcg_res, tcg_res);
7937             /* fall through */
7938         case 0x9: /* SQDMLAL, SQDMLAL2 */
7939         {
7940             TCGv_i64 tcg_op3 = tcg_temp_new_i64();
7941             read_vec_element(s, tcg_op3, rd, 0, MO_32);
7942             gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
7943                                               tcg_res, tcg_op3);
7944             tcg_temp_free_i64(tcg_op3);
7945             break;
7946         }
7947         default:
7948             g_assert_not_reached();
7949         }
7950
7951         tcg_gen_ext32u_i64(tcg_res, tcg_res);
7952         write_fp_dreg(s, rd, tcg_res);
7953
7954         tcg_temp_free_i32(tcg_op1);
7955         tcg_temp_free_i32(tcg_op2);
7956         tcg_temp_free_i64(tcg_res);
7957     }
7958 }
7959
7960 /* CMTST : test is "if (X & Y != 0)". */
7961 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
7962 {
7963     tcg_gen_and_i32(d, a, b);
7964     tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
7965     tcg_gen_neg_i32(d, d);
7966 }
7967
7968 static void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
7969 {
7970     tcg_gen_and_i64(d, a, b);
7971     tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
7972     tcg_gen_neg_i64(d, d);
7973 }
7974
7975 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
7976 {
7977     tcg_gen_and_vec(vece, d, a, b);
7978     tcg_gen_dupi_vec(vece, a, 0);
7979     tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
7980 }
7981
7982 static void handle_3same_64(DisasContext *s, int opcode, bool u,
7983                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
7984 {
7985     /* Handle 64x64->64 opcodes which are shared between the scalar
7986      * and vector 3-same groups. We cover every opcode where size == 3
7987      * is valid in either the three-reg-same (integer, not pairwise)
7988      * or scalar-three-reg-same groups.
7989      */
7990     TCGCond cond;
7991
7992     switch (opcode) {
7993     case 0x1: /* SQADD */
7994         if (u) {
7995             gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7996         } else {
7997             gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7998         }
7999         break;
8000     case 0x5: /* SQSUB */
8001         if (u) {
8002             gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8003         } else {
8004             gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8005         }
8006         break;
8007     case 0x6: /* CMGT, CMHI */
8008         /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
8009          * We implement this using setcond (test) and then negating.
8010          */
8011         cond = u ? TCG_COND_GTU : TCG_COND_GT;
8012     do_cmop:
8013         tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
8014         tcg_gen_neg_i64(tcg_rd, tcg_rd);
8015         break;
8016     case 0x7: /* CMGE, CMHS */
8017         cond = u ? TCG_COND_GEU : TCG_COND_GE;
8018         goto do_cmop;
8019     case 0x11: /* CMTST, CMEQ */
8020         if (u) {
8021             cond = TCG_COND_EQ;
8022             goto do_cmop;
8023         }
8024         gen_cmtst_i64(tcg_rd, tcg_rn, tcg_rm);
8025         break;
8026     case 0x8: /* SSHL, USHL */
8027         if (u) {
8028             gen_helper_neon_shl_u64(tcg_rd, tcg_rn, tcg_rm);
8029         } else {
8030             gen_helper_neon_shl_s64(tcg_rd, tcg_rn, tcg_rm);
8031         }
8032         break;
8033     case 0x9: /* SQSHL, UQSHL */
8034         if (u) {
8035             gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8036         } else {
8037             gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8038         }
8039         break;
8040     case 0xa: /* SRSHL, URSHL */
8041         if (u) {
8042             gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
8043         } else {
8044             gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
8045         }
8046         break;
8047     case 0xb: /* SQRSHL, UQRSHL */
8048         if (u) {
8049             gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8050         } else {
8051             gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8052         }
8053         break;
8054     case 0x10: /* ADD, SUB */
8055         if (u) {
8056             tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
8057         } else {
8058             tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
8059         }
8060         break;
8061     default:
8062         g_assert_not_reached();
8063     }
8064 }
8065
8066 /* Handle the 3-same-operands float operations; shared by the scalar
8067  * and vector encodings. The caller must filter out any encodings
8068  * not allocated for the encoding it is dealing with.
8069  */
8070 static void handle_3same_float(DisasContext *s, int size, int elements,
8071                                int fpopcode, int rd, int rn, int rm)
8072 {
8073     int pass;
8074     TCGv_ptr fpst = get_fpstatus_ptr(false);
8075
8076     for (pass = 0; pass < elements; pass++) {
8077         if (size) {
8078             /* Double */
8079             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8080             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8081             TCGv_i64 tcg_res = tcg_temp_new_i64();
8082
8083             read_vec_element(s, tcg_op1, rn, pass, MO_64);
8084             read_vec_element(s, tcg_op2, rm, pass, MO_64);
8085
8086             switch (fpopcode) {
8087             case 0x39: /* FMLS */
8088                 /* As usual for ARM, separate negation for fused multiply-add */
8089                 gen_helper_vfp_negd(tcg_op1, tcg_op1);
8090                 /* fall through */
8091             case 0x19: /* FMLA */
8092                 read_vec_element(s, tcg_res, rd, pass, MO_64);
8093                 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
8094                                        tcg_res, fpst);
8095                 break;
8096             case 0x18: /* FMAXNM */
8097                 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
8098                 break;
8099             case 0x1a: /* FADD */
8100                 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
8101                 break;
8102             case 0x1b: /* FMULX */
8103                 gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
8104                 break;
8105             case 0x1c: /* FCMEQ */
8106                 gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8107                 break;
8108             case 0x1e: /* FMAX */
8109                 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
8110                 break;
8111             case 0x1f: /* FRECPS */
8112                 gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8113                 break;
8114             case 0x38: /* FMINNM */
8115                 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
8116                 break;
8117             case 0x3a: /* FSUB */
8118                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
8119                 break;
8120             case 0x3e: /* FMIN */
8121                 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
8122                 break;
8123             case 0x3f: /* FRSQRTS */
8124                 gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8125                 break;
8126             case 0x5b: /* FMUL */
8127                 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
8128                 break;
8129             case 0x5c: /* FCMGE */
8130                 gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8131                 break;
8132             case 0x5d: /* FACGE */
8133                 gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8134                 break;
8135             case 0x5f: /* FDIV */
8136                 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
8137                 break;
8138             case 0x7a: /* FABD */
8139                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
8140                 gen_helper_vfp_absd(tcg_res, tcg_res);
8141                 break;
8142             case 0x7c: /* FCMGT */
8143                 gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8144                 break;
8145             case 0x7d: /* FACGT */
8146                 gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8147                 break;
8148             default:
8149                 g_assert_not_reached();
8150             }
8151
8152             write_vec_element(s, tcg_res, rd, pass, MO_64);
8153
8154             tcg_temp_free_i64(tcg_res);
8155             tcg_temp_free_i64(tcg_op1);
8156             tcg_temp_free_i64(tcg_op2);
8157         } else {
8158             /* Single */
8159             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
8160             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8161             TCGv_i32 tcg_res = tcg_temp_new_i32();
8162
8163             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
8164             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
8165
8166             switch (fpopcode) {
8167             case 0x39: /* FMLS */
8168                 /* As usual for ARM, separate negation for fused multiply-add */
8169                 gen_helper_vfp_negs(tcg_op1, tcg_op1);
8170                 /* fall through */
8171             case 0x19: /* FMLA */
8172                 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
8173                 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
8174                                        tcg_res, fpst);
8175                 break;
8176             case 0x1a: /* FADD */
8177                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
8178                 break;
8179             case 0x1b: /* FMULX */
8180                 gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
8181                 break;
8182             case 0x1c: /* FCMEQ */
8183                 gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8184                 break;
8185             case 0x1e: /* FMAX */
8186                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
8187                 break;
8188             case 0x1f: /* FRECPS */
8189                 gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8190                 break;
8191             case 0x18: /* FMAXNM */
8192                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
8193                 break;
8194             case 0x38: /* FMINNM */
8195                 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
8196                 break;
8197             case 0x3a: /* FSUB */
8198                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
8199                 break;
8200             case 0x3e: /* FMIN */
8201                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
8202                 break;
8203             case 0x3f: /* FRSQRTS */
8204                 gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8205                 break;
8206             case 0x5b: /* FMUL */
8207                 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
8208                 break;
8209             case 0x5c: /* FCMGE */
8210                 gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8211                 break;
8212             case 0x5d: /* FACGE */
8213                 gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8214                 break;
8215             case 0x5f: /* FDIV */
8216                 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
8217                 break;
8218             case 0x7a: /* FABD */
8219                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
8220                 gen_helper_vfp_abss(tcg_res, tcg_res);
8221                 break;
8222             case 0x7c: /* FCMGT */
8223                 gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8224                 break;
8225             case 0x7d: /* FACGT */
8226                 gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8227                 break;
8228             default:
8229                 g_assert_not_reached();
8230             }
8231
8232             if (elements == 1) {
8233                 /* scalar single so clear high part */
8234                 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
8235
8236                 tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
8237                 write_vec_element(s, tcg_tmp, rd, pass, MO_64);
8238                 tcg_temp_free_i64(tcg_tmp);
8239             } else {
8240                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
8241             }
8242
8243             tcg_temp_free_i32(tcg_res);
8244             tcg_temp_free_i32(tcg_op1);
8245             tcg_temp_free_i32(tcg_op2);
8246         }
8247     }
8248
8249     tcg_temp_free_ptr(fpst);
8250
8251     clear_vec_high(s, elements * (size ? 8 : 4) > 8, rd);
8252 }
8253
8254 /* AdvSIMD scalar three same
8255  *  31 30  29 28       24 23  22  21 20  16 15    11  10 9    5 4    0
8256  * +-----+---+-----------+------+---+------+--------+---+------+------+
8257  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
8258  * +-----+---+-----------+------+---+------+--------+---+------+------+
8259  */
8260 static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
8261 {
8262     int rd = extract32(insn, 0, 5);
8263     int rn = extract32(insn, 5, 5);
8264     int opcode = extract32(insn, 11, 5);
8265     int rm = extract32(insn, 16, 5);
8266     int size = extract32(insn, 22, 2);
8267     bool u = extract32(insn, 29, 1);
8268     TCGv_i64 tcg_rd;
8269
8270     if (opcode >= 0x18) {
8271         /* Floating point: U, size[1] and opcode indicate operation */
8272         int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
8273         switch (fpopcode) {
8274         case 0x1b: /* FMULX */
8275         case 0x1f: /* FRECPS */
8276         case 0x3f: /* FRSQRTS */
8277         case 0x5d: /* FACGE */
8278         case 0x7d: /* FACGT */
8279         case 0x1c: /* FCMEQ */
8280         case 0x5c: /* FCMGE */
8281         case 0x7c: /* FCMGT */
8282         case 0x7a: /* FABD */
8283             break;
8284         default:
8285             unallocated_encoding(s);
8286             return;
8287         }
8288
8289         if (!fp_access_check(s)) {
8290             return;
8291         }
8292
8293         handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
8294         return;
8295     }
8296
8297     switch (opcode) {
8298     case 0x1: /* SQADD, UQADD */
8299     case 0x5: /* SQSUB, UQSUB */
8300     case 0x9: /* SQSHL, UQSHL */
8301     case 0xb: /* SQRSHL, UQRSHL */
8302         break;
8303     case 0x8: /* SSHL, USHL */
8304     case 0xa: /* SRSHL, URSHL */
8305     case 0x6: /* CMGT, CMHI */
8306     case 0x7: /* CMGE, CMHS */
8307     case 0x11: /* CMTST, CMEQ */
8308     case 0x10: /* ADD, SUB (vector) */
8309         if (size != 3) {
8310             unallocated_encoding(s);
8311             return;
8312         }
8313         break;
8314     case 0x16: /* SQDMULH, SQRDMULH (vector) */
8315         if (size != 1 && size != 2) {
8316             unallocated_encoding(s);
8317             return;
8318         }
8319         break;
8320     default:
8321         unallocated_encoding(s);
8322         return;
8323     }
8324
8325     if (!fp_access_check(s)) {
8326         return;
8327     }
8328
8329     tcg_rd = tcg_temp_new_i64();
8330
8331     if (size == 3) {
8332         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
8333         TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
8334
8335         handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
8336         tcg_temp_free_i64(tcg_rn);
8337         tcg_temp_free_i64(tcg_rm);
8338     } else {
8339         /* Do a single operation on the lowest element in the vector.
8340          * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
8341          * no side effects for all these operations.
8342          * OPTME: special-purpose helpers would avoid doing some
8343          * unnecessary work in the helper for the 8 and 16 bit cases.
8344          */
8345         NeonGenTwoOpEnvFn *genenvfn;
8346         TCGv_i32 tcg_rn = tcg_temp_new_i32();
8347         TCGv_i32 tcg_rm = tcg_temp_new_i32();
8348         TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
8349
8350         read_vec_element_i32(s, tcg_rn, rn, 0, size);
8351         read_vec_element_i32(s, tcg_rm, rm, 0, size);
8352
8353         switch (opcode) {
8354         case 0x1: /* SQADD, UQADD */
8355         {
8356             static NeonGenTwoOpEnvFn * const fns[3][2] = {
8357                 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
8358                 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
8359                 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
8360             };
8361             genenvfn = fns[size][u];
8362             break;
8363         }
8364         case 0x5: /* SQSUB, UQSUB */
8365         {
8366             static NeonGenTwoOpEnvFn * const fns[3][2] = {
8367                 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
8368                 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
8369                 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
8370             };
8371             genenvfn = fns[size][u];
8372             break;
8373         }
8374         case 0x9: /* SQSHL, UQSHL */
8375         {
8376             static NeonGenTwoOpEnvFn * const fns[3][2] = {
8377                 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
8378                 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
8379                 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
8380             };
8381             genenvfn = fns[size][u];
8382             break;
8383         }
8384         case 0xb: /* SQRSHL, UQRSHL */
8385         {
8386             static NeonGenTwoOpEnvFn * const fns[3][2] = {
8387                 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
8388                 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
8389                 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
8390             };
8391             genenvfn = fns[size][u];
8392             break;
8393         }
8394         case 0x16: /* SQDMULH, SQRDMULH */
8395         {
8396             static NeonGenTwoOpEnvFn * const fns[2][2] = {
8397                 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
8398                 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
8399             };
8400             assert(size == 1 || size == 2);
8401             genenvfn = fns[size - 1][u];
8402             break;
8403         }
8404         default:
8405             g_assert_not_reached();
8406         }
8407
8408         genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
8409         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
8410         tcg_temp_free_i32(tcg_rd32);
8411         tcg_temp_free_i32(tcg_rn);
8412         tcg_temp_free_i32(tcg_rm);
8413     }
8414
8415     write_fp_dreg(s, rd, tcg_rd);
8416
8417     tcg_temp_free_i64(tcg_rd);
8418 }
8419
8420 /* AdvSIMD scalar three same FP16
8421  *  31 30  29 28       24 23  22 21 20  16 15 14 13    11 10  9  5 4  0
8422  * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
8423  * | 0 1 | U | 1 1 1 1 0 | a | 1 0 |  Rm  | 0 0 | opcode | 1 | Rn | Rd |
8424  * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
8425  * v: 0101 1110 0100 0000 0000 0100 0000 0000 => 5e400400
8426  * m: 1101 1111 0110 0000 1100 0100 0000 0000 => df60c400
8427  */
8428 static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
8429                                                   uint32_t insn)
8430 {
8431     int rd = extract32(insn, 0, 5);
8432     int rn = extract32(insn, 5, 5);
8433     int opcode = extract32(insn, 11, 3);
8434     int rm = extract32(insn, 16, 5);
8435     bool u = extract32(insn, 29, 1);
8436     bool a = extract32(insn, 23, 1);
8437     int fpopcode = opcode | (a << 3) |  (u << 4);
8438     TCGv_ptr fpst;
8439     TCGv_i32 tcg_op1;
8440     TCGv_i32 tcg_op2;
8441     TCGv_i32 tcg_res;
8442
8443     switch (fpopcode) {
8444     case 0x03: /* FMULX */
8445     case 0x04: /* FCMEQ (reg) */
8446     case 0x07: /* FRECPS */
8447     case 0x0f: /* FRSQRTS */
8448     case 0x14: /* FCMGE (reg) */
8449     case 0x15: /* FACGE */
8450     case 0x1a: /* FABD */
8451     case 0x1c: /* FCMGT (reg) */
8452     case 0x1d: /* FACGT */
8453         break;
8454     default:
8455         unallocated_encoding(s);
8456         return;
8457     }
8458
8459     if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
8460         unallocated_encoding(s);
8461     }
8462
8463     if (!fp_access_check(s)) {
8464         return;
8465     }
8466
8467     fpst = get_fpstatus_ptr(true);
8468
8469     tcg_op1 = read_fp_hreg(s, rn);
8470     tcg_op2 = read_fp_hreg(s, rm);
8471     tcg_res = tcg_temp_new_i32();
8472
8473     switch (fpopcode) {
8474     case 0x03: /* FMULX */
8475         gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
8476         break;
8477     case 0x04: /* FCMEQ (reg) */
8478         gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
8479         break;
8480     case 0x07: /* FRECPS */
8481         gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
8482         break;
8483     case 0x0f: /* FRSQRTS */
8484         gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
8485         break;
8486     case 0x14: /* FCMGE (reg) */
8487         gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
8488         break;
8489     case 0x15: /* FACGE */
8490         gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
8491         break;
8492     case 0x1a: /* FABD */
8493         gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
8494         tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
8495         break;
8496     case 0x1c: /* FCMGT (reg) */
8497         gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
8498         break;
8499     case 0x1d: /* FACGT */
8500         gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
8501         break;
8502     default:
8503         g_assert_not_reached();
8504     }
8505
8506     write_fp_sreg(s, rd, tcg_res);
8507
8508
8509     tcg_temp_free_i32(tcg_res);
8510     tcg_temp_free_i32(tcg_op1);
8511     tcg_temp_free_i32(tcg_op2);
8512     tcg_temp_free_ptr(fpst);
8513 }
8514
8515 /* AdvSIMD scalar three same extra
8516  *  31 30  29 28       24 23  22  21 20  16  15 14    11  10 9  5 4  0
8517  * +-----+---+-----------+------+---+------+---+--------+---+----+----+
8518  * | 0 1 | U | 1 1 1 1 0 | size | 0 |  Rm  | 1 | opcode | 1 | Rn | Rd |
8519  * +-----+---+-----------+------+---+------+---+--------+---+----+----+
8520  */
8521 static void disas_simd_scalar_three_reg_same_extra(DisasContext *s,
8522                                                    uint32_t insn)
8523 {
8524     int rd = extract32(insn, 0, 5);
8525     int rn = extract32(insn, 5, 5);
8526     int opcode = extract32(insn, 11, 4);
8527     int rm = extract32(insn, 16, 5);
8528     int size = extract32(insn, 22, 2);
8529     bool u = extract32(insn, 29, 1);
8530     TCGv_i32 ele1, ele2, ele3;
8531     TCGv_i64 res;
8532     int feature;
8533
8534     switch (u * 16 + opcode) {
8535     case 0x10: /* SQRDMLAH (vector) */
8536     case 0x11: /* SQRDMLSH (vector) */
8537         if (size != 1 && size != 2) {
8538             unallocated_encoding(s);
8539             return;
8540         }
8541         feature = ARM_FEATURE_V8_RDM;
8542         break;
8543     default:
8544         unallocated_encoding(s);
8545         return;
8546     }
8547     if (!arm_dc_feature(s, feature)) {
8548         unallocated_encoding(s);
8549         return;
8550     }
8551     if (!fp_access_check(s)) {
8552         return;
8553     }
8554
8555     /* Do a single operation on the lowest element in the vector.
8556      * We use the standard Neon helpers and rely on 0 OP 0 == 0
8557      * with no side effects for all these operations.
8558      * OPTME: special-purpose helpers would avoid doing some
8559      * unnecessary work in the helper for the 16 bit cases.
8560      */
8561     ele1 = tcg_temp_new_i32();
8562     ele2 = tcg_temp_new_i32();
8563     ele3 = tcg_temp_new_i32();
8564
8565     read_vec_element_i32(s, ele1, rn, 0, size);
8566     read_vec_element_i32(s, ele2, rm, 0, size);
8567     read_vec_element_i32(s, ele3, rd, 0, size);
8568
8569     switch (opcode) {
8570     case 0x0: /* SQRDMLAH */
8571         if (size == 1) {
8572             gen_helper_neon_qrdmlah_s16(ele3, cpu_env, ele1, ele2, ele3);
8573         } else {
8574             gen_helper_neon_qrdmlah_s32(ele3, cpu_env, ele1, ele2, ele3);
8575         }
8576         break;
8577     case 0x1: /* SQRDMLSH */
8578         if (size == 1) {
8579             gen_helper_neon_qrdmlsh_s16(ele3, cpu_env, ele1, ele2, ele3);
8580         } else {
8581             gen_helper_neon_qrdmlsh_s32(ele3, cpu_env, ele1, ele2, ele3);
8582         }
8583         break;
8584     default:
8585         g_assert_not_reached();
8586     }
8587     tcg_temp_free_i32(ele1);
8588     tcg_temp_free_i32(ele2);
8589
8590     res = tcg_temp_new_i64();
8591     tcg_gen_extu_i32_i64(res, ele3);
8592     tcg_temp_free_i32(ele3);
8593
8594     write_fp_dreg(s, rd, res);
8595     tcg_temp_free_i64(res);
8596 }
8597
8598 static void handle_2misc_64(DisasContext *s, int opcode, bool u,
8599                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
8600                             TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
8601 {
8602     /* Handle 64->64 opcodes which are shared between the scalar and
8603      * vector 2-reg-misc groups. We cover every integer opcode where size == 3
8604      * is valid in either group and also the double-precision fp ops.
8605      * The caller only need provide tcg_rmode and tcg_fpstatus if the op
8606      * requires them.
8607      */
8608     TCGCond cond;
8609
8610     switch (opcode) {
8611     case 0x4: /* CLS, CLZ */
8612         if (u) {
8613             tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
8614         } else {
8615             tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
8616         }
8617         break;
8618     case 0x5: /* NOT */
8619         /* This opcode is shared with CNT and RBIT but we have earlier
8620          * enforced that size == 3 if and only if this is the NOT insn.
8621          */
8622         tcg_gen_not_i64(tcg_rd, tcg_rn);
8623         break;
8624     case 0x7: /* SQABS, SQNEG */
8625         if (u) {
8626             gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn);
8627         } else {
8628             gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn);
8629         }
8630         break;
8631     case 0xa: /* CMLT */
8632         /* 64 bit integer comparison against zero, result is
8633          * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
8634          * subtracting 1.
8635          */
8636         cond = TCG_COND_LT;
8637     do_cmop:
8638         tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
8639         tcg_gen_neg_i64(tcg_rd, tcg_rd);
8640         break;
8641     case 0x8: /* CMGT, CMGE */
8642         cond = u ? TCG_COND_GE : TCG_COND_GT;
8643         goto do_cmop;
8644     case 0x9: /* CMEQ, CMLE */
8645         cond = u ? TCG_COND_LE : TCG_COND_EQ;
8646         goto do_cmop;
8647     case 0xb: /* ABS, NEG */
8648         if (u) {
8649             tcg_gen_neg_i64(tcg_rd, tcg_rn);
8650         } else {
8651             TCGv_i64 tcg_zero = tcg_const_i64(0);
8652             tcg_gen_neg_i64(tcg_rd, tcg_rn);
8653             tcg_gen_movcond_i64(TCG_COND_GT, tcg_rd, tcg_rn, tcg_zero,
8654                                 tcg_rn, tcg_rd);
8655             tcg_temp_free_i64(tcg_zero);
8656         }
8657         break;
8658     case 0x2f: /* FABS */
8659         gen_helper_vfp_absd(tcg_rd, tcg_rn);
8660         break;
8661     case 0x6f: /* FNEG */
8662         gen_helper_vfp_negd(tcg_rd, tcg_rn);
8663         break;
8664     case 0x7f: /* FSQRT */
8665         gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env);
8666         break;
8667     case 0x1a: /* FCVTNS */
8668     case 0x1b: /* FCVTMS */
8669     case 0x1c: /* FCVTAS */
8670     case 0x3a: /* FCVTPS */
8671     case 0x3b: /* FCVTZS */
8672     {
8673         TCGv_i32 tcg_shift = tcg_const_i32(0);
8674         gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8675         tcg_temp_free_i32(tcg_shift);
8676         break;
8677     }
8678     case 0x5a: /* FCVTNU */
8679     case 0x5b: /* FCVTMU */
8680     case 0x5c: /* FCVTAU */
8681     case 0x7a: /* FCVTPU */
8682     case 0x7b: /* FCVTZU */
8683     {
8684         TCGv_i32 tcg_shift = tcg_const_i32(0);
8685         gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8686         tcg_temp_free_i32(tcg_shift);
8687         break;
8688     }
8689     case 0x18: /* FRINTN */
8690     case 0x19: /* FRINTM */
8691     case 0x38: /* FRINTP */
8692     case 0x39: /* FRINTZ */
8693     case 0x58: /* FRINTA */
8694     case 0x79: /* FRINTI */
8695         gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
8696         break;
8697     case 0x59: /* FRINTX */
8698         gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
8699         break;
8700     default:
8701         g_assert_not_reached();
8702     }
8703 }
8704
8705 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
8706                                    bool is_scalar, bool is_u, bool is_q,
8707                                    int size, int rn, int rd)
8708 {
8709     bool is_double = (size == MO_64);
8710     TCGv_ptr fpst;
8711
8712     if (!fp_access_check(s)) {
8713         return;
8714     }
8715
8716     fpst = get_fpstatus_ptr(size == MO_16);
8717
8718     if (is_double) {
8719         TCGv_i64 tcg_op = tcg_temp_new_i64();
8720         TCGv_i64 tcg_zero = tcg_const_i64(0);
8721         TCGv_i64 tcg_res = tcg_temp_new_i64();
8722         NeonGenTwoDoubleOPFn *genfn;
8723         bool swap = false;
8724         int pass;
8725
8726         switch (opcode) {
8727         case 0x2e: /* FCMLT (zero) */
8728             swap = true;
8729             /* fallthrough */
8730         case 0x2c: /* FCMGT (zero) */
8731             genfn = gen_helper_neon_cgt_f64;
8732             break;
8733         case 0x2d: /* FCMEQ (zero) */
8734             genfn = gen_helper_neon_ceq_f64;
8735             break;
8736         case 0x6d: /* FCMLE (zero) */
8737             swap = true;
8738             /* fall through */
8739         case 0x6c: /* FCMGE (zero) */
8740             genfn = gen_helper_neon_cge_f64;
8741             break;
8742         default:
8743             g_assert_not_reached();
8744         }
8745
8746         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
8747             read_vec_element(s, tcg_op, rn, pass, MO_64);
8748             if (swap) {
8749                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
8750             } else {
8751                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
8752             }
8753             write_vec_element(s, tcg_res, rd, pass, MO_64);
8754         }
8755         tcg_temp_free_i64(tcg_res);
8756         tcg_temp_free_i64(tcg_zero);
8757         tcg_temp_free_i64(tcg_op);
8758
8759         clear_vec_high(s, !is_scalar, rd);
8760     } else {
8761         TCGv_i32 tcg_op = tcg_temp_new_i32();
8762         TCGv_i32 tcg_zero = tcg_const_i32(0);
8763         TCGv_i32 tcg_res = tcg_temp_new_i32();
8764         NeonGenTwoSingleOPFn *genfn;
8765         bool swap = false;
8766         int pass, maxpasses;
8767
8768         if (size == MO_16) {
8769             switch (opcode) {
8770             case 0x2e: /* FCMLT (zero) */
8771                 swap = true;
8772                 /* fall through */
8773             case 0x2c: /* FCMGT (zero) */
8774                 genfn = gen_helper_advsimd_cgt_f16;
8775                 break;
8776             case 0x2d: /* FCMEQ (zero) */
8777                 genfn = gen_helper_advsimd_ceq_f16;
8778                 break;
8779             case 0x6d: /* FCMLE (zero) */
8780                 swap = true;
8781                 /* fall through */
8782             case 0x6c: /* FCMGE (zero) */
8783                 genfn = gen_helper_advsimd_cge_f16;
8784                 break;
8785             default:
8786                 g_assert_not_reached();
8787             }
8788         } else {
8789             switch (opcode) {
8790             case 0x2e: /* FCMLT (zero) */
8791                 swap = true;
8792                 /* fall through */
8793             case 0x2c: /* FCMGT (zero) */
8794                 genfn = gen_helper_neon_cgt_f32;
8795                 break;
8796             case 0x2d: /* FCMEQ (zero) */
8797                 genfn = gen_helper_neon_ceq_f32;
8798                 break;
8799             case 0x6d: /* FCMLE (zero) */
8800                 swap = true;
8801                 /* fall through */
8802             case 0x6c: /* FCMGE (zero) */
8803                 genfn = gen_helper_neon_cge_f32;
8804                 break;
8805             default:
8806                 g_assert_not_reached();
8807             }
8808         }
8809
8810         if (is_scalar) {
8811             maxpasses = 1;
8812         } else {
8813             int vector_size = 8 << is_q;
8814             maxpasses = vector_size >> size;
8815         }
8816
8817         for (pass = 0; pass < maxpasses; pass++) {
8818             read_vec_element_i32(s, tcg_op, rn, pass, size);
8819             if (swap) {
8820                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
8821             } else {
8822                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
8823             }
8824             if (is_scalar) {
8825                 write_fp_sreg(s, rd, tcg_res);
8826             } else {
8827                 write_vec_element_i32(s, tcg_res, rd, pass, size);
8828             }
8829         }
8830         tcg_temp_free_i32(tcg_res);
8831         tcg_temp_free_i32(tcg_zero);
8832         tcg_temp_free_i32(tcg_op);
8833         if (!is_scalar) {
8834             clear_vec_high(s, is_q, rd);
8835         }
8836     }
8837
8838     tcg_temp_free_ptr(fpst);
8839 }
8840
8841 static void handle_2misc_reciprocal(DisasContext *s, int opcode,
8842                                     bool is_scalar, bool is_u, bool is_q,
8843                                     int size, int rn, int rd)
8844 {
8845     bool is_double = (size == 3);
8846     TCGv_ptr fpst = get_fpstatus_ptr(false);
8847
8848     if (is_double) {
8849         TCGv_i64 tcg_op = tcg_temp_new_i64();
8850         TCGv_i64 tcg_res = tcg_temp_new_i64();
8851         int pass;
8852
8853         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
8854             read_vec_element(s, tcg_op, rn, pass, MO_64);
8855             switch (opcode) {
8856             case 0x3d: /* FRECPE */
8857                 gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
8858                 break;
8859             case 0x3f: /* FRECPX */
8860                 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
8861                 break;
8862             case 0x7d: /* FRSQRTE */
8863                 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
8864                 break;
8865             default:
8866                 g_assert_not_reached();
8867             }
8868             write_vec_element(s, tcg_res, rd, pass, MO_64);
8869         }
8870         tcg_temp_free_i64(tcg_res);
8871         tcg_temp_free_i64(tcg_op);
8872         clear_vec_high(s, !is_scalar, rd);
8873     } else {
8874         TCGv_i32 tcg_op = tcg_temp_new_i32();
8875         TCGv_i32 tcg_res = tcg_temp_new_i32();
8876         int pass, maxpasses;
8877
8878         if (is_scalar) {
8879             maxpasses = 1;
8880         } else {
8881             maxpasses = is_q ? 4 : 2;
8882         }
8883
8884         for (pass = 0; pass < maxpasses; pass++) {
8885             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
8886
8887             switch (opcode) {
8888             case 0x3c: /* URECPE */
8889                 gen_helper_recpe_u32(tcg_res, tcg_op, fpst);
8890                 break;
8891             case 0x3d: /* FRECPE */
8892                 gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
8893                 break;
8894             case 0x3f: /* FRECPX */
8895                 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
8896                 break;
8897             case 0x7d: /* FRSQRTE */
8898                 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
8899                 break;
8900             default:
8901                 g_assert_not_reached();
8902             }
8903
8904             if (is_scalar) {
8905                 write_fp_sreg(s, rd, tcg_res);
8906             } else {
8907                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
8908             }
8909         }
8910         tcg_temp_free_i32(tcg_res);
8911         tcg_temp_free_i32(tcg_op);
8912         if (!is_scalar) {
8913             clear_vec_high(s, is_q, rd);
8914         }
8915     }
8916     tcg_temp_free_ptr(fpst);
8917 }
8918
8919 static void handle_2misc_narrow(DisasContext *s, bool scalar,
8920                                 int opcode, bool u, bool is_q,
8921                                 int size, int rn, int rd)
8922 {
8923     /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
8924      * in the source becomes a size element in the destination).
8925      */
8926     int pass;
8927     TCGv_i32 tcg_res[2];
8928     int destelt = is_q ? 2 : 0;
8929     int passes = scalar ? 1 : 2;
8930
8931     if (scalar) {
8932         tcg_res[1] = tcg_const_i32(0);
8933     }
8934
8935     for (pass = 0; pass < passes; pass++) {
8936         TCGv_i64 tcg_op = tcg_temp_new_i64();
8937         NeonGenNarrowFn *genfn = NULL;
8938         NeonGenNarrowEnvFn *genenvfn = NULL;
8939
8940         if (scalar) {
8941             read_vec_element(s, tcg_op, rn, pass, size + 1);
8942         } else {
8943             read_vec_element(s, tcg_op, rn, pass, MO_64);
8944         }
8945         tcg_res[pass] = tcg_temp_new_i32();
8946
8947         switch (opcode) {
8948         case 0x12: /* XTN, SQXTUN */
8949         {
8950             static NeonGenNarrowFn * const xtnfns[3] = {
8951                 gen_helper_neon_narrow_u8,
8952                 gen_helper_neon_narrow_u16,
8953                 tcg_gen_extrl_i64_i32,
8954             };
8955             static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
8956                 gen_helper_neon_unarrow_sat8,
8957                 gen_helper_neon_unarrow_sat16,
8958                 gen_helper_neon_unarrow_sat32,
8959             };
8960             if (u) {
8961                 genenvfn = sqxtunfns[size];
8962             } else {
8963                 genfn = xtnfns[size];
8964             }
8965             break;
8966         }
8967         case 0x14: /* SQXTN, UQXTN */
8968         {
8969             static NeonGenNarrowEnvFn * const fns[3][2] = {
8970                 { gen_helper_neon_narrow_sat_s8,
8971                   gen_helper_neon_narrow_sat_u8 },
8972                 { gen_helper_neon_narrow_sat_s16,
8973                   gen_helper_neon_narrow_sat_u16 },
8974                 { gen_helper_neon_narrow_sat_s32,
8975                   gen_helper_neon_narrow_sat_u32 },
8976             };
8977             genenvfn = fns[size][u];
8978             break;
8979         }
8980         case 0x16: /* FCVTN, FCVTN2 */
8981             /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
8982             if (size == 2) {
8983                 gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env);
8984             } else {
8985                 TCGv_i32 tcg_lo = tcg_temp_new_i32();
8986                 TCGv_i32 tcg_hi = tcg_temp_new_i32();
8987                 TCGv_ptr fpst = get_fpstatus_ptr(false);
8988                 TCGv_i32 ahp = get_ahp_flag();
8989
8990                 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
8991                 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp);
8992                 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp);
8993                 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
8994                 tcg_temp_free_i32(tcg_lo);
8995                 tcg_temp_free_i32(tcg_hi);
8996                 tcg_temp_free_ptr(fpst);
8997                 tcg_temp_free_i32(ahp);
8998             }
8999             break;
9000         case 0x56:  /* FCVTXN, FCVTXN2 */
9001             /* 64 bit to 32 bit float conversion
9002              * with von Neumann rounding (round to odd)
9003              */
9004             assert(size == 2);
9005             gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env);
9006             break;
9007         default:
9008             g_assert_not_reached();
9009         }
9010
9011         if (genfn) {
9012             genfn(tcg_res[pass], tcg_op);
9013         } else if (genenvfn) {
9014             genenvfn(tcg_res[pass], cpu_env, tcg_op);
9015         }
9016
9017         tcg_temp_free_i64(tcg_op);
9018     }
9019
9020     for (pass = 0; pass < 2; pass++) {
9021         write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
9022         tcg_temp_free_i32(tcg_res[pass]);
9023     }
9024     clear_vec_high(s, is_q, rd);
9025 }
9026
9027 /* Remaining saturating accumulating ops */
9028 static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
9029                                 bool is_q, int size, int rn, int rd)
9030 {
9031     bool is_double = (size == 3);
9032
9033     if (is_double) {
9034         TCGv_i64 tcg_rn = tcg_temp_new_i64();
9035         TCGv_i64 tcg_rd = tcg_temp_new_i64();
9036         int pass;
9037
9038         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9039             read_vec_element(s, tcg_rn, rn, pass, MO_64);
9040             read_vec_element(s, tcg_rd, rd, pass, MO_64);
9041
9042             if (is_u) { /* USQADD */
9043                 gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9044             } else { /* SUQADD */
9045                 gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9046             }
9047             write_vec_element(s, tcg_rd, rd, pass, MO_64);
9048         }
9049         tcg_temp_free_i64(tcg_rd);
9050         tcg_temp_free_i64(tcg_rn);
9051         clear_vec_high(s, !is_scalar, rd);
9052     } else {
9053         TCGv_i32 tcg_rn = tcg_temp_new_i32();
9054         TCGv_i32 tcg_rd = tcg_temp_new_i32();
9055         int pass, maxpasses;
9056
9057         if (is_scalar) {
9058             maxpasses = 1;
9059         } else {
9060             maxpasses = is_q ? 4 : 2;
9061         }
9062
9063         for (pass = 0; pass < maxpasses; pass++) {
9064             if (is_scalar) {
9065                 read_vec_element_i32(s, tcg_rn, rn, pass, size);
9066                 read_vec_element_i32(s, tcg_rd, rd, pass, size);
9067             } else {
9068                 read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
9069                 read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
9070             }
9071
9072             if (is_u) { /* USQADD */
9073                 switch (size) {
9074                 case 0:
9075                     gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9076                     break;
9077                 case 1:
9078                     gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9079                     break;
9080                 case 2:
9081                     gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9082                     break;
9083                 default:
9084                     g_assert_not_reached();
9085                 }
9086             } else { /* SUQADD */
9087                 switch (size) {
9088                 case 0:
9089                     gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9090                     break;
9091                 case 1:
9092                     gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9093                     break;
9094                 case 2:
9095                     gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9096                     break;
9097                 default:
9098                     g_assert_not_reached();
9099                 }
9100             }
9101
9102             if (is_scalar) {
9103                 TCGv_i64 tcg_zero = tcg_const_i64(0);
9104                 write_vec_element(s, tcg_zero, rd, 0, MO_64);
9105                 tcg_temp_free_i64(tcg_zero);
9106             }
9107             write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
9108         }
9109         tcg_temp_free_i32(tcg_rd);
9110         tcg_temp_free_i32(tcg_rn);
9111         clear_vec_high(s, is_q, rd);
9112     }
9113 }
9114
9115 /* AdvSIMD scalar two reg misc
9116  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
9117  * +-----+---+-----------+------+-----------+--------+-----+------+------+
9118  * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
9119  * +-----+---+-----------+------+-----------+--------+-----+------+------+
9120  */
9121 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
9122 {
9123     int rd = extract32(insn, 0, 5);
9124     int rn = extract32(insn, 5, 5);
9125     int opcode = extract32(insn, 12, 5);
9126     int size = extract32(insn, 22, 2);
9127     bool u = extract32(insn, 29, 1);
9128     bool is_fcvt = false;
9129     int rmode;
9130     TCGv_i32 tcg_rmode;
9131     TCGv_ptr tcg_fpstatus;
9132
9133     switch (opcode) {
9134     case 0x3: /* USQADD / SUQADD*/
9135         if (!fp_access_check(s)) {
9136             return;
9137         }
9138         handle_2misc_satacc(s, true, u, false, size, rn, rd);
9139         return;
9140     case 0x7: /* SQABS / SQNEG */
9141         break;
9142     case 0xa: /* CMLT */
9143         if (u) {
9144             unallocated_encoding(s);
9145             return;
9146         }
9147         /* fall through */
9148     case 0x8: /* CMGT, CMGE */
9149     case 0x9: /* CMEQ, CMLE */
9150     case 0xb: /* ABS, NEG */
9151         if (size != 3) {
9152             unallocated_encoding(s);
9153             return;
9154         }
9155         break;
9156     case 0x12: /* SQXTUN */
9157         if (!u) {
9158             unallocated_encoding(s);
9159             return;
9160         }
9161         /* fall through */
9162     case 0x14: /* SQXTN, UQXTN */
9163         if (size == 3) {
9164             unallocated_encoding(s);
9165             return;
9166         }
9167         if (!fp_access_check(s)) {
9168             return;
9169         }
9170         handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
9171         return;
9172     case 0xc ... 0xf:
9173     case 0x16 ... 0x1d:
9174     case 0x1f:
9175         /* Floating point: U, size[1] and opcode indicate operation;
9176          * size[0] indicates single or double precision.
9177          */
9178         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
9179         size = extract32(size, 0, 1) ? 3 : 2;
9180         switch (opcode) {
9181         case 0x2c: /* FCMGT (zero) */
9182         case 0x2d: /* FCMEQ (zero) */
9183         case 0x2e: /* FCMLT (zero) */
9184         case 0x6c: /* FCMGE (zero) */
9185         case 0x6d: /* FCMLE (zero) */
9186             handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
9187             return;
9188         case 0x1d: /* SCVTF */
9189         case 0x5d: /* UCVTF */
9190         {
9191             bool is_signed = (opcode == 0x1d);
9192             if (!fp_access_check(s)) {
9193                 return;
9194             }
9195             handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
9196             return;
9197         }
9198         case 0x3d: /* FRECPE */
9199         case 0x3f: /* FRECPX */
9200         case 0x7d: /* FRSQRTE */
9201             if (!fp_access_check(s)) {
9202                 return;
9203             }
9204             handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
9205             return;
9206         case 0x1a: /* FCVTNS */
9207         case 0x1b: /* FCVTMS */
9208         case 0x3a: /* FCVTPS */
9209         case 0x3b: /* FCVTZS */
9210         case 0x5a: /* FCVTNU */
9211         case 0x5b: /* FCVTMU */
9212         case 0x7a: /* FCVTPU */
9213         case 0x7b: /* FCVTZU */
9214             is_fcvt = true;
9215             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
9216             break;
9217         case 0x1c: /* FCVTAS */
9218         case 0x5c: /* FCVTAU */
9219             /* TIEAWAY doesn't fit in the usual rounding mode encoding */
9220             is_fcvt = true;
9221             rmode = FPROUNDING_TIEAWAY;
9222             break;
9223         case 0x56: /* FCVTXN, FCVTXN2 */
9224             if (size == 2) {
9225                 unallocated_encoding(s);
9226                 return;
9227             }
9228             if (!fp_access_check(s)) {
9229                 return;
9230             }
9231             handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
9232             return;
9233         default:
9234             unallocated_encoding(s);
9235             return;
9236         }
9237         break;
9238     default:
9239         unallocated_encoding(s);
9240         return;
9241     }
9242
9243     if (!fp_access_check(s)) {
9244         return;
9245     }
9246
9247     if (is_fcvt) {
9248         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
9249         tcg_fpstatus = get_fpstatus_ptr(false);
9250         gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
9251     } else {
9252         tcg_rmode = NULL;
9253         tcg_fpstatus = NULL;
9254     }
9255
9256     if (size == 3) {
9257         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
9258         TCGv_i64 tcg_rd = tcg_temp_new_i64();
9259
9260         handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
9261         write_fp_dreg(s, rd, tcg_rd);
9262         tcg_temp_free_i64(tcg_rd);
9263         tcg_temp_free_i64(tcg_rn);
9264     } else {
9265         TCGv_i32 tcg_rn = tcg_temp_new_i32();
9266         TCGv_i32 tcg_rd = tcg_temp_new_i32();
9267
9268         read_vec_element_i32(s, tcg_rn, rn, 0, size);
9269
9270         switch (opcode) {
9271         case 0x7: /* SQABS, SQNEG */
9272         {
9273             NeonGenOneOpEnvFn *genfn;
9274             static NeonGenOneOpEnvFn * const fns[3][2] = {
9275                 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
9276                 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
9277                 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
9278             };
9279             genfn = fns[size][u];
9280             genfn(tcg_rd, cpu_env, tcg_rn);
9281             break;
9282         }
9283         case 0x1a: /* FCVTNS */
9284         case 0x1b: /* FCVTMS */
9285         case 0x1c: /* FCVTAS */
9286         case 0x3a: /* FCVTPS */
9287         case 0x3b: /* FCVTZS */
9288         {
9289             TCGv_i32 tcg_shift = tcg_const_i32(0);
9290             gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
9291             tcg_temp_free_i32(tcg_shift);
9292             break;
9293         }
9294         case 0x5a: /* FCVTNU */
9295         case 0x5b: /* FCVTMU */
9296         case 0x5c: /* FCVTAU */
9297         case 0x7a: /* FCVTPU */
9298         case 0x7b: /* FCVTZU */
9299         {
9300             TCGv_i32 tcg_shift = tcg_const_i32(0);
9301             gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
9302             tcg_temp_free_i32(tcg_shift);
9303             break;
9304         }
9305         default:
9306             g_assert_not_reached();
9307         }
9308
9309         write_fp_sreg(s, rd, tcg_rd);
9310         tcg_temp_free_i32(tcg_rd);
9311         tcg_temp_free_i32(tcg_rn);
9312     }
9313
9314     if (is_fcvt) {
9315         gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
9316         tcg_temp_free_i32(tcg_rmode);
9317         tcg_temp_free_ptr(tcg_fpstatus);
9318     }
9319 }
9320
9321 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
9322 {
9323     tcg_gen_vec_sar8i_i64(a, a, shift);
9324     tcg_gen_vec_add8_i64(d, d, a);
9325 }
9326
9327 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
9328 {
9329     tcg_gen_vec_sar16i_i64(a, a, shift);
9330     tcg_gen_vec_add16_i64(d, d, a);
9331 }
9332
9333 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
9334 {
9335     tcg_gen_sari_i32(a, a, shift);
9336     tcg_gen_add_i32(d, d, a);
9337 }
9338
9339 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
9340 {
9341     tcg_gen_sari_i64(a, a, shift);
9342     tcg_gen_add_i64(d, d, a);
9343 }
9344
9345 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
9346 {
9347     tcg_gen_sari_vec(vece, a, a, sh);
9348     tcg_gen_add_vec(vece, d, d, a);
9349 }
9350
9351 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
9352 {
9353     tcg_gen_vec_shr8i_i64(a, a, shift);
9354     tcg_gen_vec_add8_i64(d, d, a);
9355 }
9356
9357 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
9358 {
9359     tcg_gen_vec_shr16i_i64(a, a, shift);
9360     tcg_gen_vec_add16_i64(d, d, a);
9361 }
9362
9363 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
9364 {
9365     tcg_gen_shri_i32(a, a, shift);
9366     tcg_gen_add_i32(d, d, a);
9367 }
9368
9369 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
9370 {
9371     tcg_gen_shri_i64(a, a, shift);
9372     tcg_gen_add_i64(d, d, a);
9373 }
9374
9375 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
9376 {
9377     tcg_gen_shri_vec(vece, a, a, sh);
9378     tcg_gen_add_vec(vece, d, d, a);
9379 }
9380
9381 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
9382 {
9383     uint64_t mask = dup_const(MO_8, 0xff >> shift);
9384     TCGv_i64 t = tcg_temp_new_i64();
9385
9386     tcg_gen_shri_i64(t, a, shift);
9387     tcg_gen_andi_i64(t, t, mask);
9388     tcg_gen_andi_i64(d, d, ~mask);
9389     tcg_gen_or_i64(d, d, t);
9390     tcg_temp_free_i64(t);
9391 }
9392
9393 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
9394 {
9395     uint64_t mask = dup_const(MO_16, 0xffff >> shift);
9396     TCGv_i64 t = tcg_temp_new_i64();
9397
9398     tcg_gen_shri_i64(t, a, shift);
9399     tcg_gen_andi_i64(t, t, mask);
9400     tcg_gen_andi_i64(d, d, ~mask);
9401     tcg_gen_or_i64(d, d, t);
9402     tcg_temp_free_i64(t);
9403 }
9404
9405 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
9406 {
9407     tcg_gen_shri_i32(a, a, shift);
9408     tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
9409 }
9410
9411 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
9412 {
9413     tcg_gen_shri_i64(a, a, shift);
9414     tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
9415 }
9416
9417 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
9418 {
9419     uint64_t mask = (2ull << ((8 << vece) - 1)) - 1;
9420     TCGv_vec t = tcg_temp_new_vec_matching(d);
9421     TCGv_vec m = tcg_temp_new_vec_matching(d);
9422
9423     tcg_gen_dupi_vec(vece, m, mask ^ (mask >> sh));
9424     tcg_gen_shri_vec(vece, t, a, sh);
9425     tcg_gen_and_vec(vece, d, d, m);
9426     tcg_gen_or_vec(vece, d, d, t);
9427
9428     tcg_temp_free_vec(t);
9429     tcg_temp_free_vec(m);
9430 }
9431
9432 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
9433 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
9434                                  int immh, int immb, int opcode, int rn, int rd)
9435 {
9436     static const GVecGen2i ssra_op[4] = {
9437         { .fni8 = gen_ssra8_i64,
9438           .fniv = gen_ssra_vec,
9439           .load_dest = true,
9440           .opc = INDEX_op_sari_vec,
9441           .vece = MO_8 },
9442         { .fni8 = gen_ssra16_i64,
9443           .fniv = gen_ssra_vec,
9444           .load_dest = true,
9445           .opc = INDEX_op_sari_vec,
9446           .vece = MO_16 },
9447         { .fni4 = gen_ssra32_i32,
9448           .fniv = gen_ssra_vec,
9449           .load_dest = true,
9450           .opc = INDEX_op_sari_vec,
9451           .vece = MO_32 },
9452         { .fni8 = gen_ssra64_i64,
9453           .fniv = gen_ssra_vec,
9454           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
9455           .load_dest = true,
9456           .opc = INDEX_op_sari_vec,
9457           .vece = MO_64 },
9458     };
9459     static const GVecGen2i usra_op[4] = {
9460         { .fni8 = gen_usra8_i64,
9461           .fniv = gen_usra_vec,
9462           .load_dest = true,
9463           .opc = INDEX_op_shri_vec,
9464           .vece = MO_8, },
9465         { .fni8 = gen_usra16_i64,
9466           .fniv = gen_usra_vec,
9467           .load_dest = true,
9468           .opc = INDEX_op_shri_vec,
9469           .vece = MO_16, },
9470         { .fni4 = gen_usra32_i32,
9471           .fniv = gen_usra_vec,
9472           .load_dest = true,
9473           .opc = INDEX_op_shri_vec,
9474           .vece = MO_32, },
9475         { .fni8 = gen_usra64_i64,
9476           .fniv = gen_usra_vec,
9477           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
9478           .load_dest = true,
9479           .opc = INDEX_op_shri_vec,
9480           .vece = MO_64, },
9481     };
9482     static const GVecGen2i sri_op[4] = {
9483         { .fni8 = gen_shr8_ins_i64,
9484           .fniv = gen_shr_ins_vec,
9485           .load_dest = true,
9486           .opc = INDEX_op_shri_vec,
9487           .vece = MO_8 },
9488         { .fni8 = gen_shr16_ins_i64,
9489           .fniv = gen_shr_ins_vec,
9490           .load_dest = true,
9491           .opc = INDEX_op_shri_vec,
9492           .vece = MO_16 },
9493         { .fni4 = gen_shr32_ins_i32,
9494           .fniv = gen_shr_ins_vec,
9495           .load_dest = true,
9496           .opc = INDEX_op_shri_vec,
9497           .vece = MO_32 },
9498         { .fni8 = gen_shr64_ins_i64,
9499           .fniv = gen_shr_ins_vec,
9500           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
9501           .load_dest = true,
9502           .opc = INDEX_op_shri_vec,
9503           .vece = MO_64 },
9504     };
9505
9506     int size = 32 - clz32(immh) - 1;
9507     int immhb = immh << 3 | immb;
9508     int shift = 2 * (8 << size) - immhb;
9509     bool accumulate = false;
9510     int dsize = is_q ? 128 : 64;
9511     int esize = 8 << size;
9512     int elements = dsize/esize;
9513     TCGMemOp memop = size | (is_u ? 0 : MO_SIGN);
9514     TCGv_i64 tcg_rn = new_tmp_a64(s);
9515     TCGv_i64 tcg_rd = new_tmp_a64(s);
9516     TCGv_i64 tcg_round;
9517     uint64_t round_const;
9518     int i;
9519
9520     if (extract32(immh, 3, 1) && !is_q) {
9521         unallocated_encoding(s);
9522         return;
9523     }
9524     tcg_debug_assert(size <= 3);
9525
9526     if (!fp_access_check(s)) {
9527         return;
9528     }
9529
9530     switch (opcode) {
9531     case 0x02: /* SSRA / USRA (accumulate) */
9532         if (is_u) {
9533             /* Shift count same as element size produces zero to add.  */
9534             if (shift == 8 << size) {
9535                 goto done;
9536             }
9537             gen_gvec_op2i(s, is_q, rd, rn, shift, &usra_op[size]);
9538         } else {
9539             /* Shift count same as element size produces all sign to add.  */
9540             if (shift == 8 << size) {
9541                 shift -= 1;
9542             }
9543             gen_gvec_op2i(s, is_q, rd, rn, shift, &ssra_op[size]);
9544         }
9545         return;
9546     case 0x08: /* SRI */
9547         /* Shift count same as element size is valid but does nothing.  */
9548         if (shift == 8 << size) {
9549             goto done;
9550         }
9551         gen_gvec_op2i(s, is_q, rd, rn, shift, &sri_op[size]);
9552         return;
9553
9554     case 0x00: /* SSHR / USHR */
9555         if (is_u) {
9556             if (shift == 8 << size) {
9557                 /* Shift count the same size as element size produces zero.  */
9558                 tcg_gen_gvec_dup8i(vec_full_reg_offset(s, rd),
9559                                    is_q ? 16 : 8, vec_full_reg_size(s), 0);
9560             } else {
9561                 gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shri, size);
9562             }
9563         } else {
9564             /* Shift count the same size as element size produces all sign.  */
9565             if (shift == 8 << size) {
9566                 shift -= 1;
9567             }
9568             gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_sari, size);
9569         }
9570         return;
9571
9572     case 0x04: /* SRSHR / URSHR (rounding) */
9573         break;
9574     case 0x06: /* SRSRA / URSRA (accum + rounding) */
9575         accumulate = true;
9576         break;
9577     default:
9578         g_assert_not_reached();
9579     }
9580
9581     round_const = 1ULL << (shift - 1);
9582     tcg_round = tcg_const_i64(round_const);
9583
9584     for (i = 0; i < elements; i++) {
9585         read_vec_element(s, tcg_rn, rn, i, memop);
9586         if (accumulate) {
9587             read_vec_element(s, tcg_rd, rd, i, memop);
9588         }
9589
9590         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
9591                                 accumulate, is_u, size, shift);
9592
9593         write_vec_element(s, tcg_rd, rd, i, size);
9594     }
9595     tcg_temp_free_i64(tcg_round);
9596
9597  done:
9598     clear_vec_high(s, is_q, rd);
9599 }
9600
9601 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
9602 {
9603     uint64_t mask = dup_const(MO_8, 0xff << shift);
9604     TCGv_i64 t = tcg_temp_new_i64();
9605
9606     tcg_gen_shli_i64(t, a, shift);
9607     tcg_gen_andi_i64(t, t, mask);
9608     tcg_gen_andi_i64(d, d, ~mask);
9609     tcg_gen_or_i64(d, d, t);
9610     tcg_temp_free_i64(t);
9611 }
9612
9613 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
9614 {
9615     uint64_t mask = dup_const(MO_16, 0xffff << shift);
9616     TCGv_i64 t = tcg_temp_new_i64();
9617
9618     tcg_gen_shli_i64(t, a, shift);
9619     tcg_gen_andi_i64(t, t, mask);
9620     tcg_gen_andi_i64(d, d, ~mask);
9621     tcg_gen_or_i64(d, d, t);
9622     tcg_temp_free_i64(t);
9623 }
9624
9625 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
9626 {
9627     tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
9628 }
9629
9630 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
9631 {
9632     tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
9633 }
9634
9635 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
9636 {
9637     uint64_t mask = (1ull << sh) - 1;
9638     TCGv_vec t = tcg_temp_new_vec_matching(d);
9639     TCGv_vec m = tcg_temp_new_vec_matching(d);
9640
9641     tcg_gen_dupi_vec(vece, m, mask);
9642     tcg_gen_shli_vec(vece, t, a, sh);
9643     tcg_gen_and_vec(vece, d, d, m);
9644     tcg_gen_or_vec(vece, d, d, t);
9645
9646     tcg_temp_free_vec(t);
9647     tcg_temp_free_vec(m);
9648 }
9649
9650 /* SHL/SLI - Vector shift left */
9651 static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
9652                                  int immh, int immb, int opcode, int rn, int rd)
9653 {
9654     static const GVecGen2i shi_op[4] = {
9655         { .fni8 = gen_shl8_ins_i64,
9656           .fniv = gen_shl_ins_vec,
9657           .opc = INDEX_op_shli_vec,
9658           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
9659           .load_dest = true,
9660           .vece = MO_8 },
9661         { .fni8 = gen_shl16_ins_i64,
9662           .fniv = gen_shl_ins_vec,
9663           .opc = INDEX_op_shli_vec,
9664           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
9665           .load_dest = true,
9666           .vece = MO_16 },
9667         { .fni4 = gen_shl32_ins_i32,
9668           .fniv = gen_shl_ins_vec,
9669           .opc = INDEX_op_shli_vec,
9670           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
9671           .load_dest = true,
9672           .vece = MO_32 },
9673         { .fni8 = gen_shl64_ins_i64,
9674           .fniv = gen_shl_ins_vec,
9675           .opc = INDEX_op_shli_vec,
9676           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
9677           .load_dest = true,
9678           .vece = MO_64 },
9679     };
9680     int size = 32 - clz32(immh) - 1;
9681     int immhb = immh << 3 | immb;
9682     int shift = immhb - (8 << size);
9683
9684     if (extract32(immh, 3, 1) && !is_q) {
9685         unallocated_encoding(s);
9686         return;
9687     }
9688
9689     if (size > 3 && !is_q) {
9690         unallocated_encoding(s);
9691         return;
9692     }
9693
9694     if (!fp_access_check(s)) {
9695         return;
9696     }
9697
9698     if (insert) {
9699         gen_gvec_op2i(s, is_q, rd, rn, shift, &shi_op[size]);
9700     } else {
9701         gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size);
9702     }
9703 }
9704
9705 /* USHLL/SHLL - Vector shift left with widening */
9706 static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
9707                                  int immh, int immb, int opcode, int rn, int rd)
9708 {
9709     int size = 32 - clz32(immh) - 1;
9710     int immhb = immh << 3 | immb;
9711     int shift = immhb - (8 << size);
9712     int dsize = 64;
9713     int esize = 8 << size;
9714     int elements = dsize/esize;
9715     TCGv_i64 tcg_rn = new_tmp_a64(s);
9716     TCGv_i64 tcg_rd = new_tmp_a64(s);
9717     int i;
9718
9719     if (size >= 3) {
9720         unallocated_encoding(s);
9721         return;
9722     }
9723
9724     if (!fp_access_check(s)) {
9725         return;
9726     }
9727
9728     /* For the LL variants the store is larger than the load,
9729      * so if rd == rn we would overwrite parts of our input.
9730      * So load everything right now and use shifts in the main loop.
9731      */
9732     read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
9733
9734     for (i = 0; i < elements; i++) {
9735         tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
9736         ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
9737         tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
9738         write_vec_element(s, tcg_rd, rd, i, size + 1);
9739     }
9740 }
9741
9742 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
9743 static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
9744                                  int immh, int immb, int opcode, int rn, int rd)
9745 {
9746     int immhb = immh << 3 | immb;
9747     int size = 32 - clz32(immh) - 1;
9748     int dsize = 64;
9749     int esize = 8 << size;
9750     int elements = dsize/esize;
9751     int shift = (2 * esize) - immhb;
9752     bool round = extract32(opcode, 0, 1);
9753     TCGv_i64 tcg_rn, tcg_rd, tcg_final;
9754     TCGv_i64 tcg_round;
9755     int i;
9756
9757     if (extract32(immh, 3, 1)) {
9758         unallocated_encoding(s);
9759         return;
9760     }
9761
9762     if (!fp_access_check(s)) {
9763         return;
9764     }
9765
9766     tcg_rn = tcg_temp_new_i64();
9767     tcg_rd = tcg_temp_new_i64();
9768     tcg_final = tcg_temp_new_i64();
9769     read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
9770
9771     if (round) {
9772         uint64_t round_const = 1ULL << (shift - 1);
9773         tcg_round = tcg_const_i64(round_const);
9774     } else {
9775         tcg_round = NULL;
9776     }
9777
9778     for (i = 0; i < elements; i++) {
9779         read_vec_element(s, tcg_rn, rn, i, size+1);
9780         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
9781                                 false, true, size+1, shift);
9782
9783         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
9784     }
9785
9786     if (!is_q) {
9787         write_vec_element(s, tcg_final, rd, 0, MO_64);
9788     } else {
9789         write_vec_element(s, tcg_final, rd, 1, MO_64);
9790     }
9791     if (round) {
9792         tcg_temp_free_i64(tcg_round);
9793     }
9794     tcg_temp_free_i64(tcg_rn);
9795     tcg_temp_free_i64(tcg_rd);
9796     tcg_temp_free_i64(tcg_final);
9797
9798     clear_vec_high(s, is_q, rd);
9799 }
9800
9801
9802 /* AdvSIMD shift by immediate
9803  *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
9804  * +---+---+---+-------------+------+------+--------+---+------+------+
9805  * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
9806  * +---+---+---+-------------+------+------+--------+---+------+------+
9807  */
9808 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
9809 {
9810     int rd = extract32(insn, 0, 5);
9811     int rn = extract32(insn, 5, 5);
9812     int opcode = extract32(insn, 11, 5);
9813     int immb = extract32(insn, 16, 3);
9814     int immh = extract32(insn, 19, 4);
9815     bool is_u = extract32(insn, 29, 1);
9816     bool is_q = extract32(insn, 30, 1);
9817
9818     switch (opcode) {
9819     case 0x08: /* SRI */
9820         if (!is_u) {
9821             unallocated_encoding(s);
9822             return;
9823         }
9824         /* fall through */
9825     case 0x00: /* SSHR / USHR */
9826     case 0x02: /* SSRA / USRA (accumulate) */
9827     case 0x04: /* SRSHR / URSHR (rounding) */
9828     case 0x06: /* SRSRA / URSRA (accum + rounding) */
9829         handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
9830         break;
9831     case 0x0a: /* SHL / SLI */
9832         handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
9833         break;
9834     case 0x10: /* SHRN */
9835     case 0x11: /* RSHRN / SQRSHRUN */
9836         if (is_u) {
9837             handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
9838                                    opcode, rn, rd);
9839         } else {
9840             handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
9841         }
9842         break;
9843     case 0x12: /* SQSHRN / UQSHRN */
9844     case 0x13: /* SQRSHRN / UQRSHRN */
9845         handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
9846                                opcode, rn, rd);
9847         break;
9848     case 0x14: /* SSHLL / USHLL */
9849         handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
9850         break;
9851     case 0x1c: /* SCVTF / UCVTF */
9852         handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
9853                                      opcode, rn, rd);
9854         break;
9855     case 0xc: /* SQSHLU */
9856         if (!is_u) {
9857             unallocated_encoding(s);
9858             return;
9859         }
9860         handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
9861         break;
9862     case 0xe: /* SQSHL, UQSHL */
9863         handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
9864         break;
9865     case 0x1f: /* FCVTZS/ FCVTZU */
9866         handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
9867         return;
9868     default:
9869         unallocated_encoding(s);
9870         return;
9871     }
9872 }
9873
9874 /* Generate code to do a "long" addition or subtraction, ie one done in
9875  * TCGv_i64 on vector lanes twice the width specified by size.
9876  */
9877 static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
9878                           TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
9879 {
9880     static NeonGenTwo64OpFn * const fns[3][2] = {
9881         { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
9882         { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
9883         { tcg_gen_add_i64, tcg_gen_sub_i64 },
9884     };
9885     NeonGenTwo64OpFn *genfn;
9886     assert(size < 3);
9887
9888     genfn = fns[size][is_sub];
9889     genfn(tcg_res, tcg_op1, tcg_op2);
9890 }
9891
9892 static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
9893                                 int opcode, int rd, int rn, int rm)
9894 {
9895     /* 3-reg-different widening insns: 64 x 64 -> 128 */
9896     TCGv_i64 tcg_res[2];
9897     int pass, accop;
9898
9899     tcg_res[0] = tcg_temp_new_i64();
9900     tcg_res[1] = tcg_temp_new_i64();
9901
9902     /* Does this op do an adding accumulate, a subtracting accumulate,
9903      * or no accumulate at all?
9904      */
9905     switch (opcode) {
9906     case 5:
9907     case 8:
9908     case 9:
9909         accop = 1;
9910         break;
9911     case 10:
9912     case 11:
9913         accop = -1;
9914         break;
9915     default:
9916         accop = 0;
9917         break;
9918     }
9919
9920     if (accop != 0) {
9921         read_vec_element(s, tcg_res[0], rd, 0, MO_64);
9922         read_vec_element(s, tcg_res[1], rd, 1, MO_64);
9923     }
9924
9925     /* size == 2 means two 32x32->64 operations; this is worth special
9926      * casing because we can generally handle it inline.
9927      */
9928     if (size == 2) {
9929         for (pass = 0; pass < 2; pass++) {
9930             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9931             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9932             TCGv_i64 tcg_passres;
9933             TCGMemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
9934
9935             int elt = pass + is_q * 2;
9936
9937             read_vec_element(s, tcg_op1, rn, elt, memop);
9938             read_vec_element(s, tcg_op2, rm, elt, memop);
9939
9940             if (accop == 0) {
9941                 tcg_passres = tcg_res[pass];
9942             } else {
9943                 tcg_passres = tcg_temp_new_i64();
9944             }
9945
9946             switch (opcode) {
9947             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
9948                 tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
9949                 break;
9950             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
9951                 tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
9952                 break;
9953             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
9954             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
9955             {
9956                 TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
9957                 TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
9958
9959                 tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
9960                 tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
9961                 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
9962                                     tcg_passres,
9963                                     tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
9964                 tcg_temp_free_i64(tcg_tmp1);
9965                 tcg_temp_free_i64(tcg_tmp2);
9966                 break;
9967             }
9968             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
9969             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
9970             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
9971                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
9972                 break;
9973             case 9: /* SQDMLAL, SQDMLAL2 */
9974             case 11: /* SQDMLSL, SQDMLSL2 */
9975             case 13: /* SQDMULL, SQDMULL2 */
9976                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
9977                 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
9978                                                   tcg_passres, tcg_passres);
9979                 break;
9980             default:
9981                 g_assert_not_reached();
9982             }
9983
9984             if (opcode == 9 || opcode == 11) {
9985                 /* saturating accumulate ops */
9986                 if (accop < 0) {
9987                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
9988                 }
9989                 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
9990                                                   tcg_res[pass], tcg_passres);
9991             } else if (accop > 0) {
9992                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
9993             } else if (accop < 0) {
9994                 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
9995             }
9996
9997             if (accop != 0) {
9998                 tcg_temp_free_i64(tcg_passres);
9999             }
10000
10001             tcg_temp_free_i64(tcg_op1);
10002             tcg_temp_free_i64(tcg_op2);
10003         }
10004     } else {
10005         /* size 0 or 1, generally helper functions */
10006         for (pass = 0; pass < 2; pass++) {
10007             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10008             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10009             TCGv_i64 tcg_passres;
10010             int elt = pass + is_q * 2;
10011
10012             read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
10013             read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
10014
10015             if (accop == 0) {
10016                 tcg_passres = tcg_res[pass];
10017             } else {
10018                 tcg_passres = tcg_temp_new_i64();
10019             }
10020
10021             switch (opcode) {
10022             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10023             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10024             {
10025                 TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
10026                 static NeonGenWidenFn * const widenfns[2][2] = {
10027                     { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
10028                     { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
10029                 };
10030                 NeonGenWidenFn *widenfn = widenfns[size][is_u];
10031
10032                 widenfn(tcg_op2_64, tcg_op2);
10033                 widenfn(tcg_passres, tcg_op1);
10034                 gen_neon_addl(size, (opcode == 2), tcg_passres,
10035                               tcg_passres, tcg_op2_64);
10036                 tcg_temp_free_i64(tcg_op2_64);
10037                 break;
10038             }
10039             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10040             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10041                 if (size == 0) {
10042                     if (is_u) {
10043                         gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
10044                     } else {
10045                         gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
10046                     }
10047                 } else {
10048                     if (is_u) {
10049                         gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
10050                     } else {
10051                         gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
10052                     }
10053                 }
10054                 break;
10055             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10056             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10057             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
10058                 if (size == 0) {
10059                     if (is_u) {
10060                         gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
10061                     } else {
10062                         gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
10063                     }
10064                 } else {
10065                     if (is_u) {
10066                         gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
10067                     } else {
10068                         gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
10069                     }
10070                 }
10071                 break;
10072             case 9: /* SQDMLAL, SQDMLAL2 */
10073             case 11: /* SQDMLSL, SQDMLSL2 */
10074             case 13: /* SQDMULL, SQDMULL2 */
10075                 assert(size == 1);
10076                 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
10077                 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
10078                                                   tcg_passres, tcg_passres);
10079                 break;
10080             case 14: /* PMULL */
10081                 assert(size == 0);
10082                 gen_helper_neon_mull_p8(tcg_passres, tcg_op1, tcg_op2);
10083                 break;
10084             default:
10085                 g_assert_not_reached();
10086             }
10087             tcg_temp_free_i32(tcg_op1);
10088             tcg_temp_free_i32(tcg_op2);
10089
10090             if (accop != 0) {
10091                 if (opcode == 9 || opcode == 11) {
10092                     /* saturating accumulate ops */
10093                     if (accop < 0) {
10094                         gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
10095                     }
10096                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
10097                                                       tcg_res[pass],
10098                                                       tcg_passres);
10099                 } else {
10100                     gen_neon_addl(size, (accop < 0), tcg_res[pass],
10101                                   tcg_res[pass], tcg_passres);
10102                 }
10103                 tcg_temp_free_i64(tcg_passres);
10104             }
10105         }
10106     }
10107
10108     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
10109     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
10110     tcg_temp_free_i64(tcg_res[0]);
10111     tcg_temp_free_i64(tcg_res[1]);
10112 }
10113
10114 static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
10115                             int opcode, int rd, int rn, int rm)
10116 {
10117     TCGv_i64 tcg_res[2];
10118     int part = is_q ? 2 : 0;
10119     int pass;
10120
10121     for (pass = 0; pass < 2; pass++) {
10122         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10123         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10124         TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
10125         static NeonGenWidenFn * const widenfns[3][2] = {
10126             { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
10127             { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
10128             { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
10129         };
10130         NeonGenWidenFn *widenfn = widenfns[size][is_u];
10131
10132         read_vec_element(s, tcg_op1, rn, pass, MO_64);
10133         read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
10134         widenfn(tcg_op2_wide, tcg_op2);
10135         tcg_temp_free_i32(tcg_op2);
10136         tcg_res[pass] = tcg_temp_new_i64();
10137         gen_neon_addl(size, (opcode == 3),
10138                       tcg_res[pass], tcg_op1, tcg_op2_wide);
10139         tcg_temp_free_i64(tcg_op1);
10140         tcg_temp_free_i64(tcg_op2_wide);
10141     }
10142
10143     for (pass = 0; pass < 2; pass++) {
10144         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10145         tcg_temp_free_i64(tcg_res[pass]);
10146     }
10147 }
10148
10149 static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
10150 {
10151     tcg_gen_addi_i64(in, in, 1U << 31);
10152     tcg_gen_extrh_i64_i32(res, in);
10153 }
10154
10155 static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
10156                                  int opcode, int rd, int rn, int rm)
10157 {
10158     TCGv_i32 tcg_res[2];
10159     int part = is_q ? 2 : 0;
10160     int pass;
10161
10162     for (pass = 0; pass < 2; pass++) {
10163         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10164         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10165         TCGv_i64 tcg_wideres = tcg_temp_new_i64();
10166         static NeonGenNarrowFn * const narrowfns[3][2] = {
10167             { gen_helper_neon_narrow_high_u8,
10168               gen_helper_neon_narrow_round_high_u8 },
10169             { gen_helper_neon_narrow_high_u16,
10170               gen_helper_neon_narrow_round_high_u16 },
10171             { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
10172         };
10173         NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
10174
10175         read_vec_element(s, tcg_op1, rn, pass, MO_64);
10176         read_vec_element(s, tcg_op2, rm, pass, MO_64);
10177
10178         gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
10179
10180         tcg_temp_free_i64(tcg_op1);
10181         tcg_temp_free_i64(tcg_op2);
10182
10183         tcg_res[pass] = tcg_temp_new_i32();
10184         gennarrow(tcg_res[pass], tcg_wideres);
10185         tcg_temp_free_i64(tcg_wideres);
10186     }
10187
10188     for (pass = 0; pass < 2; pass++) {
10189         write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
10190         tcg_temp_free_i32(tcg_res[pass]);
10191     }
10192     clear_vec_high(s, is_q, rd);
10193 }
10194
10195 static void handle_pmull_64(DisasContext *s, int is_q, int rd, int rn, int rm)
10196 {
10197     /* PMULL of 64 x 64 -> 128 is an odd special case because it
10198      * is the only three-reg-diff instruction which produces a
10199      * 128-bit wide result from a single operation. However since
10200      * it's possible to calculate the two halves more or less
10201      * separately we just use two helper calls.
10202      */
10203     TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10204     TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10205     TCGv_i64 tcg_res = tcg_temp_new_i64();
10206
10207     read_vec_element(s, tcg_op1, rn, is_q, MO_64);
10208     read_vec_element(s, tcg_op2, rm, is_q, MO_64);
10209     gen_helper_neon_pmull_64_lo(tcg_res, tcg_op1, tcg_op2);
10210     write_vec_element(s, tcg_res, rd, 0, MO_64);
10211     gen_helper_neon_pmull_64_hi(tcg_res, tcg_op1, tcg_op2);
10212     write_vec_element(s, tcg_res, rd, 1, MO_64);
10213
10214     tcg_temp_free_i64(tcg_op1);
10215     tcg_temp_free_i64(tcg_op2);
10216     tcg_temp_free_i64(tcg_res);
10217 }
10218
10219 /* AdvSIMD three different
10220  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
10221  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
10222  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
10223  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
10224  */
10225 static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
10226 {
10227     /* Instructions in this group fall into three basic classes
10228      * (in each case with the operation working on each element in
10229      * the input vectors):
10230      * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
10231      *     128 bit input)
10232      * (2) wide 64 x 128 -> 128
10233      * (3) narrowing 128 x 128 -> 64
10234      * Here we do initial decode, catch unallocated cases and
10235      * dispatch to separate functions for each class.
10236      */
10237     int is_q = extract32(insn, 30, 1);
10238     int is_u = extract32(insn, 29, 1);
10239     int size = extract32(insn, 22, 2);
10240     int opcode = extract32(insn, 12, 4);
10241     int rm = extract32(insn, 16, 5);
10242     int rn = extract32(insn, 5, 5);
10243     int rd = extract32(insn, 0, 5);
10244
10245     switch (opcode) {
10246     case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
10247     case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
10248         /* 64 x 128 -> 128 */
10249         if (size == 3) {
10250             unallocated_encoding(s);
10251             return;
10252         }
10253         if (!fp_access_check(s)) {
10254             return;
10255         }
10256         handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
10257         break;
10258     case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
10259     case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
10260         /* 128 x 128 -> 64 */
10261         if (size == 3) {
10262             unallocated_encoding(s);
10263             return;
10264         }
10265         if (!fp_access_check(s)) {
10266             return;
10267         }
10268         handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
10269         break;
10270     case 14: /* PMULL, PMULL2 */
10271         if (is_u || size == 1 || size == 2) {
10272             unallocated_encoding(s);
10273             return;
10274         }
10275         if (size == 3) {
10276             if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) {
10277                 unallocated_encoding(s);
10278                 return;
10279             }
10280             if (!fp_access_check(s)) {
10281                 return;
10282             }
10283             handle_pmull_64(s, is_q, rd, rn, rm);
10284             return;
10285         }
10286         goto is_widening;
10287     case 9: /* SQDMLAL, SQDMLAL2 */
10288     case 11: /* SQDMLSL, SQDMLSL2 */
10289     case 13: /* SQDMULL, SQDMULL2 */
10290         if (is_u || size == 0) {
10291             unallocated_encoding(s);
10292             return;
10293         }
10294         /* fall through */
10295     case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10296     case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10297     case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10298     case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10299     case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10300     case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10301     case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
10302         /* 64 x 64 -> 128 */
10303         if (size == 3) {
10304             unallocated_encoding(s);
10305             return;
10306         }
10307     is_widening:
10308         if (!fp_access_check(s)) {
10309             return;
10310         }
10311
10312         handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
10313         break;
10314     default:
10315         /* opcode 15 not allocated */
10316         unallocated_encoding(s);
10317         break;
10318     }
10319 }
10320
10321 static void gen_bsl_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
10322 {
10323     tcg_gen_xor_i64(rn, rn, rm);
10324     tcg_gen_and_i64(rn, rn, rd);
10325     tcg_gen_xor_i64(rd, rm, rn);
10326 }
10327
10328 static void gen_bit_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
10329 {
10330     tcg_gen_xor_i64(rn, rn, rd);
10331     tcg_gen_and_i64(rn, rn, rm);
10332     tcg_gen_xor_i64(rd, rd, rn);
10333 }
10334
10335 static void gen_bif_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
10336 {
10337     tcg_gen_xor_i64(rn, rn, rd);
10338     tcg_gen_andc_i64(rn, rn, rm);
10339     tcg_gen_xor_i64(rd, rd, rn);
10340 }
10341
10342 static void gen_bsl_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
10343 {
10344     tcg_gen_xor_vec(vece, rn, rn, rm);
10345     tcg_gen_and_vec(vece, rn, rn, rd);
10346     tcg_gen_xor_vec(vece, rd, rm, rn);
10347 }
10348
10349 static void gen_bit_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
10350 {
10351     tcg_gen_xor_vec(vece, rn, rn, rd);
10352     tcg_gen_and_vec(vece, rn, rn, rm);
10353     tcg_gen_xor_vec(vece, rd, rd, rn);
10354 }
10355
10356 static void gen_bif_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
10357 {
10358     tcg_gen_xor_vec(vece, rn, rn, rd);
10359     tcg_gen_andc_vec(vece, rn, rn, rm);
10360     tcg_gen_xor_vec(vece, rd, rd, rn);
10361 }
10362
10363 /* Logic op (opcode == 3) subgroup of C3.6.16. */
10364 static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
10365 {
10366     static const GVecGen3 bsl_op = {
10367         .fni8 = gen_bsl_i64,
10368         .fniv = gen_bsl_vec,
10369         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
10370         .load_dest = true
10371     };
10372     static const GVecGen3 bit_op = {
10373         .fni8 = gen_bit_i64,
10374         .fniv = gen_bit_vec,
10375         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
10376         .load_dest = true
10377     };
10378     static const GVecGen3 bif_op = {
10379         .fni8 = gen_bif_i64,
10380         .fniv = gen_bif_vec,
10381         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
10382         .load_dest = true
10383     };
10384
10385     int rd = extract32(insn, 0, 5);
10386     int rn = extract32(insn, 5, 5);
10387     int rm = extract32(insn, 16, 5);
10388     int size = extract32(insn, 22, 2);
10389     bool is_u = extract32(insn, 29, 1);
10390     bool is_q = extract32(insn, 30, 1);
10391
10392     if (!fp_access_check(s)) {
10393         return;
10394     }
10395
10396     switch (size + 4 * is_u) {
10397     case 0: /* AND */
10398         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_and, 0);
10399         return;
10400     case 1: /* BIC */
10401         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_andc, 0);
10402         return;
10403     case 2: /* ORR */
10404         if (rn == rm) { /* MOV */
10405             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_mov, 0);
10406         } else {
10407             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0);
10408         }
10409         return;
10410     case 3: /* ORN */
10411         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_orc, 0);
10412         return;
10413     case 4: /* EOR */
10414         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_xor, 0);
10415         return;
10416
10417     case 5: /* BSL bitwise select */
10418         gen_gvec_op3(s, is_q, rd, rn, rm, &bsl_op);
10419         return;
10420     case 6: /* BIT, bitwise insert if true */
10421         gen_gvec_op3(s, is_q, rd, rn, rm, &bit_op);
10422         return;
10423     case 7: /* BIF, bitwise insert if false */
10424         gen_gvec_op3(s, is_q, rd, rn, rm, &bif_op);
10425         return;
10426
10427     default:
10428         g_assert_not_reached();
10429     }
10430 }
10431
10432 /* Pairwise op subgroup of C3.6.16.
10433  *
10434  * This is called directly or via the handle_3same_float for float pairwise
10435  * operations where the opcode and size are calculated differently.
10436  */
10437 static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
10438                                    int size, int rn, int rm, int rd)
10439 {
10440     TCGv_ptr fpst;
10441     int pass;
10442
10443     /* Floating point operations need fpst */
10444     if (opcode >= 0x58) {
10445         fpst = get_fpstatus_ptr(false);
10446     } else {
10447         fpst = NULL;
10448     }
10449
10450     if (!fp_access_check(s)) {
10451         return;
10452     }
10453
10454     /* These operations work on the concatenated rm:rn, with each pair of
10455      * adjacent elements being operated on to produce an element in the result.
10456      */
10457     if (size == 3) {
10458         TCGv_i64 tcg_res[2];
10459
10460         for (pass = 0; pass < 2; pass++) {
10461             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10462             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10463             int passreg = (pass == 0) ? rn : rm;
10464
10465             read_vec_element(s, tcg_op1, passreg, 0, MO_64);
10466             read_vec_element(s, tcg_op2, passreg, 1, MO_64);
10467             tcg_res[pass] = tcg_temp_new_i64();
10468
10469             switch (opcode) {
10470             case 0x17: /* ADDP */
10471                 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
10472                 break;
10473             case 0x58: /* FMAXNMP */
10474                 gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10475                 break;
10476             case 0x5a: /* FADDP */
10477                 gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10478                 break;
10479             case 0x5e: /* FMAXP */
10480                 gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10481                 break;
10482             case 0x78: /* FMINNMP */
10483                 gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10484                 break;
10485             case 0x7e: /* FMINP */
10486                 gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10487                 break;
10488             default:
10489                 g_assert_not_reached();
10490             }
10491
10492             tcg_temp_free_i64(tcg_op1);
10493             tcg_temp_free_i64(tcg_op2);
10494         }
10495
10496         for (pass = 0; pass < 2; pass++) {
10497             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10498             tcg_temp_free_i64(tcg_res[pass]);
10499         }
10500     } else {
10501         int maxpass = is_q ? 4 : 2;
10502         TCGv_i32 tcg_res[4];
10503
10504         for (pass = 0; pass < maxpass; pass++) {
10505             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10506             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10507             NeonGenTwoOpFn *genfn = NULL;
10508             int passreg = pass < (maxpass / 2) ? rn : rm;
10509             int passelt = (is_q && (pass & 1)) ? 2 : 0;
10510
10511             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
10512             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
10513             tcg_res[pass] = tcg_temp_new_i32();
10514
10515             switch (opcode) {
10516             case 0x17: /* ADDP */
10517             {
10518                 static NeonGenTwoOpFn * const fns[3] = {
10519                     gen_helper_neon_padd_u8,
10520                     gen_helper_neon_padd_u16,
10521                     tcg_gen_add_i32,
10522                 };
10523                 genfn = fns[size];
10524                 break;
10525             }
10526             case 0x14: /* SMAXP, UMAXP */
10527             {
10528                 static NeonGenTwoOpFn * const fns[3][2] = {
10529                     { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
10530                     { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
10531                     { tcg_gen_smax_i32, tcg_gen_umax_i32 },
10532                 };
10533                 genfn = fns[size][u];
10534                 break;
10535             }
10536             case 0x15: /* SMINP, UMINP */
10537             {
10538                 static NeonGenTwoOpFn * const fns[3][2] = {
10539                     { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
10540                     { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
10541                     { tcg_gen_smin_i32, tcg_gen_umin_i32 },
10542                 };
10543                 genfn = fns[size][u];
10544                 break;
10545             }
10546             /* The FP operations are all on single floats (32 bit) */
10547             case 0x58: /* FMAXNMP */
10548                 gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10549                 break;
10550             case 0x5a: /* FADDP */
10551                 gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10552                 break;
10553             case 0x5e: /* FMAXP */
10554                 gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10555                 break;
10556             case 0x78: /* FMINNMP */
10557                 gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10558                 break;
10559             case 0x7e: /* FMINP */
10560                 gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10561                 break;
10562             default:
10563                 g_assert_not_reached();
10564             }
10565
10566             /* FP ops called directly, otherwise call now */
10567             if (genfn) {
10568                 genfn(tcg_res[pass], tcg_op1, tcg_op2);
10569             }
10570
10571             tcg_temp_free_i32(tcg_op1);
10572             tcg_temp_free_i32(tcg_op2);
10573         }
10574
10575         for (pass = 0; pass < maxpass; pass++) {
10576             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
10577             tcg_temp_free_i32(tcg_res[pass]);
10578         }
10579         clear_vec_high(s, is_q, rd);
10580     }
10581
10582     if (fpst) {
10583         tcg_temp_free_ptr(fpst);
10584     }
10585 }
10586
10587 /* Floating point op subgroup of C3.6.16. */
10588 static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
10589 {
10590     /* For floating point ops, the U, size[1] and opcode bits
10591      * together indicate the operation. size[0] indicates single
10592      * or double.
10593      */
10594     int fpopcode = extract32(insn, 11, 5)
10595         | (extract32(insn, 23, 1) << 5)
10596         | (extract32(insn, 29, 1) << 6);
10597     int is_q = extract32(insn, 30, 1);
10598     int size = extract32(insn, 22, 1);
10599     int rm = extract32(insn, 16, 5);
10600     int rn = extract32(insn, 5, 5);
10601     int rd = extract32(insn, 0, 5);
10602
10603     int datasize = is_q ? 128 : 64;
10604     int esize = 32 << size;
10605     int elements = datasize / esize;
10606
10607     if (size == 1 && !is_q) {
10608         unallocated_encoding(s);
10609         return;
10610     }
10611
10612     switch (fpopcode) {
10613     case 0x58: /* FMAXNMP */
10614     case 0x5a: /* FADDP */
10615     case 0x5e: /* FMAXP */
10616     case 0x78: /* FMINNMP */
10617     case 0x7e: /* FMINP */
10618         if (size && !is_q) {
10619             unallocated_encoding(s);
10620             return;
10621         }
10622         handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
10623                                rn, rm, rd);
10624         return;
10625     case 0x1b: /* FMULX */
10626     case 0x1f: /* FRECPS */
10627     case 0x3f: /* FRSQRTS */
10628     case 0x5d: /* FACGE */
10629     case 0x7d: /* FACGT */
10630     case 0x19: /* FMLA */
10631     case 0x39: /* FMLS */
10632     case 0x18: /* FMAXNM */
10633     case 0x1a: /* FADD */
10634     case 0x1c: /* FCMEQ */
10635     case 0x1e: /* FMAX */
10636     case 0x38: /* FMINNM */
10637     case 0x3a: /* FSUB */
10638     case 0x3e: /* FMIN */
10639     case 0x5b: /* FMUL */
10640     case 0x5c: /* FCMGE */
10641     case 0x5f: /* FDIV */
10642     case 0x7a: /* FABD */
10643     case 0x7c: /* FCMGT */
10644         if (!fp_access_check(s)) {
10645             return;
10646         }
10647
10648         handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
10649         return;
10650     default:
10651         unallocated_encoding(s);
10652         return;
10653     }
10654 }
10655
10656 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
10657 {
10658     gen_helper_neon_mul_u8(a, a, b);
10659     gen_helper_neon_add_u8(d, d, a);
10660 }
10661
10662 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
10663 {
10664     gen_helper_neon_mul_u16(a, a, b);
10665     gen_helper_neon_add_u16(d, d, a);
10666 }
10667
10668 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
10669 {
10670     tcg_gen_mul_i32(a, a, b);
10671     tcg_gen_add_i32(d, d, a);
10672 }
10673
10674 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
10675 {
10676     tcg_gen_mul_i64(a, a, b);
10677     tcg_gen_add_i64(d, d, a);
10678 }
10679
10680 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
10681 {
10682     tcg_gen_mul_vec(vece, a, a, b);
10683     tcg_gen_add_vec(vece, d, d, a);
10684 }
10685
10686 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
10687 {
10688     gen_helper_neon_mul_u8(a, a, b);
10689     gen_helper_neon_sub_u8(d, d, a);
10690 }
10691
10692 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
10693 {
10694     gen_helper_neon_mul_u16(a, a, b);
10695     gen_helper_neon_sub_u16(d, d, a);
10696 }
10697
10698 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
10699 {
10700     tcg_gen_mul_i32(a, a, b);
10701     tcg_gen_sub_i32(d, d, a);
10702 }
10703
10704 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
10705 {
10706     tcg_gen_mul_i64(a, a, b);
10707     tcg_gen_sub_i64(d, d, a);
10708 }
10709
10710 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
10711 {
10712     tcg_gen_mul_vec(vece, a, a, b);
10713     tcg_gen_sub_vec(vece, d, d, a);
10714 }
10715
10716 /* Integer op subgroup of C3.6.16. */
10717 static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
10718 {
10719     static const GVecGen3 cmtst_op[4] = {
10720         { .fni4 = gen_helper_neon_tst_u8,
10721           .fniv = gen_cmtst_vec,
10722           .vece = MO_8 },
10723         { .fni4 = gen_helper_neon_tst_u16,
10724           .fniv = gen_cmtst_vec,
10725           .vece = MO_16 },
10726         { .fni4 = gen_cmtst_i32,
10727           .fniv = gen_cmtst_vec,
10728           .vece = MO_32 },
10729         { .fni8 = gen_cmtst_i64,
10730           .fniv = gen_cmtst_vec,
10731           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
10732           .vece = MO_64 },
10733     };
10734     static const GVecGen3 mla_op[4] = {
10735         { .fni4 = gen_mla8_i32,
10736           .fniv = gen_mla_vec,
10737           .opc = INDEX_op_mul_vec,
10738           .load_dest = true,
10739           .vece = MO_8 },
10740         { .fni4 = gen_mla16_i32,
10741           .fniv = gen_mla_vec,
10742           .opc = INDEX_op_mul_vec,
10743           .load_dest = true,
10744           .vece = MO_16 },
10745         { .fni4 = gen_mla32_i32,
10746           .fniv = gen_mla_vec,
10747           .opc = INDEX_op_mul_vec,
10748           .load_dest = true,
10749           .vece = MO_32 },
10750         { .fni8 = gen_mla64_i64,
10751           .fniv = gen_mla_vec,
10752           .opc = INDEX_op_mul_vec,
10753           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
10754           .load_dest = true,
10755           .vece = MO_64 },
10756     };
10757     static const GVecGen3 mls_op[4] = {
10758         { .fni4 = gen_mls8_i32,
10759           .fniv = gen_mls_vec,
10760           .opc = INDEX_op_mul_vec,
10761           .load_dest = true,
10762           .vece = MO_8 },
10763         { .fni4 = gen_mls16_i32,
10764           .fniv = gen_mls_vec,
10765           .opc = INDEX_op_mul_vec,
10766           .load_dest = true,
10767           .vece = MO_16 },
10768         { .fni4 = gen_mls32_i32,
10769           .fniv = gen_mls_vec,
10770           .opc = INDEX_op_mul_vec,
10771           .load_dest = true,
10772           .vece = MO_32 },
10773         { .fni8 = gen_mls64_i64,
10774           .fniv = gen_mls_vec,
10775           .opc = INDEX_op_mul_vec,
10776           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
10777           .load_dest = true,
10778           .vece = MO_64 },
10779     };
10780
10781     int is_q = extract32(insn, 30, 1);
10782     int u = extract32(insn, 29, 1);
10783     int size = extract32(insn, 22, 2);
10784     int opcode = extract32(insn, 11, 5);
10785     int rm = extract32(insn, 16, 5);
10786     int rn = extract32(insn, 5, 5);
10787     int rd = extract32(insn, 0, 5);
10788     int pass;
10789     TCGCond cond;
10790
10791     switch (opcode) {
10792     case 0x13: /* MUL, PMUL */
10793         if (u && size != 0) {
10794             unallocated_encoding(s);
10795             return;
10796         }
10797         /* fall through */
10798     case 0x0: /* SHADD, UHADD */
10799     case 0x2: /* SRHADD, URHADD */
10800     case 0x4: /* SHSUB, UHSUB */
10801     case 0xc: /* SMAX, UMAX */
10802     case 0xd: /* SMIN, UMIN */
10803     case 0xe: /* SABD, UABD */
10804     case 0xf: /* SABA, UABA */
10805     case 0x12: /* MLA, MLS */
10806         if (size == 3) {
10807             unallocated_encoding(s);
10808             return;
10809         }
10810         break;
10811     case 0x16: /* SQDMULH, SQRDMULH */
10812         if (size == 0 || size == 3) {
10813             unallocated_encoding(s);
10814             return;
10815         }
10816         break;
10817     default:
10818         if (size == 3 && !is_q) {
10819             unallocated_encoding(s);
10820             return;
10821         }
10822         break;
10823     }
10824
10825     if (!fp_access_check(s)) {
10826         return;
10827     }
10828
10829     switch (opcode) {
10830     case 0x10: /* ADD, SUB */
10831         if (u) {
10832             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size);
10833         } else {
10834             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_add, size);
10835         }
10836         return;
10837     case 0x13: /* MUL, PMUL */
10838         if (!u) { /* MUL */
10839             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size);
10840             return;
10841         }
10842         break;
10843     case 0x12: /* MLA, MLS */
10844         if (u) {
10845             gen_gvec_op3(s, is_q, rd, rn, rm, &mls_op[size]);
10846         } else {
10847             gen_gvec_op3(s, is_q, rd, rn, rm, &mla_op[size]);
10848         }
10849         return;
10850     case 0x11:
10851         if (!u) { /* CMTST */
10852             gen_gvec_op3(s, is_q, rd, rn, rm, &cmtst_op[size]);
10853             return;
10854         }
10855         /* else CMEQ */
10856         cond = TCG_COND_EQ;
10857         goto do_gvec_cmp;
10858     case 0x06: /* CMGT, CMHI */
10859         cond = u ? TCG_COND_GTU : TCG_COND_GT;
10860         goto do_gvec_cmp;
10861     case 0x07: /* CMGE, CMHS */
10862         cond = u ? TCG_COND_GEU : TCG_COND_GE;
10863     do_gvec_cmp:
10864         tcg_gen_gvec_cmp(cond, size, vec_full_reg_offset(s, rd),
10865                          vec_full_reg_offset(s, rn),
10866                          vec_full_reg_offset(s, rm),
10867                          is_q ? 16 : 8, vec_full_reg_size(s));
10868         return;
10869     }
10870
10871     if (size == 3) {
10872         assert(is_q);
10873         for (pass = 0; pass < 2; pass++) {
10874             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10875             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10876             TCGv_i64 tcg_res = tcg_temp_new_i64();
10877
10878             read_vec_element(s, tcg_op1, rn, pass, MO_64);
10879             read_vec_element(s, tcg_op2, rm, pass, MO_64);
10880
10881             handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
10882
10883             write_vec_element(s, tcg_res, rd, pass, MO_64);
10884
10885             tcg_temp_free_i64(tcg_res);
10886             tcg_temp_free_i64(tcg_op1);
10887             tcg_temp_free_i64(tcg_op2);
10888         }
10889     } else {
10890         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
10891             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10892             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10893             TCGv_i32 tcg_res = tcg_temp_new_i32();
10894             NeonGenTwoOpFn *genfn = NULL;
10895             NeonGenTwoOpEnvFn *genenvfn = NULL;
10896
10897             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
10898             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
10899
10900             switch (opcode) {
10901             case 0x0: /* SHADD, UHADD */
10902             {
10903                 static NeonGenTwoOpFn * const fns[3][2] = {
10904                     { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
10905                     { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
10906                     { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
10907                 };
10908                 genfn = fns[size][u];
10909                 break;
10910             }
10911             case 0x1: /* SQADD, UQADD */
10912             {
10913                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
10914                     { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
10915                     { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
10916                     { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
10917                 };
10918                 genenvfn = fns[size][u];
10919                 break;
10920             }
10921             case 0x2: /* SRHADD, URHADD */
10922             {
10923                 static NeonGenTwoOpFn * const fns[3][2] = {
10924                     { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
10925                     { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
10926                     { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
10927                 };
10928                 genfn = fns[size][u];
10929                 break;
10930             }
10931             case 0x4: /* SHSUB, UHSUB */
10932             {
10933                 static NeonGenTwoOpFn * const fns[3][2] = {
10934                     { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
10935                     { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
10936                     { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
10937                 };
10938                 genfn = fns[size][u];
10939                 break;
10940             }
10941             case 0x5: /* SQSUB, UQSUB */
10942             {
10943                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
10944                     { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
10945                     { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
10946                     { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
10947                 };
10948                 genenvfn = fns[size][u];
10949                 break;
10950             }
10951             case 0x8: /* SSHL, USHL */
10952             {
10953                 static NeonGenTwoOpFn * const fns[3][2] = {
10954                     { gen_helper_neon_shl_s8, gen_helper_neon_shl_u8 },
10955                     { gen_helper_neon_shl_s16, gen_helper_neon_shl_u16 },
10956                     { gen_helper_neon_shl_s32, gen_helper_neon_shl_u32 },
10957                 };
10958                 genfn = fns[size][u];
10959                 break;
10960             }
10961             case 0x9: /* SQSHL, UQSHL */
10962             {
10963                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
10964                     { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
10965                     { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
10966                     { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
10967                 };
10968                 genenvfn = fns[size][u];
10969                 break;
10970             }
10971             case 0xa: /* SRSHL, URSHL */
10972             {
10973                 static NeonGenTwoOpFn * const fns[3][2] = {
10974                     { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
10975                     { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
10976                     { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
10977                 };
10978                 genfn = fns[size][u];
10979                 break;
10980             }
10981             case 0xb: /* SQRSHL, UQRSHL */
10982             {
10983                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
10984                     { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
10985                     { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
10986                     { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
10987                 };
10988                 genenvfn = fns[size][u];
10989                 break;
10990             }
10991             case 0xc: /* SMAX, UMAX */
10992             {
10993                 static NeonGenTwoOpFn * const fns[3][2] = {
10994                     { gen_helper_neon_max_s8, gen_helper_neon_max_u8 },
10995                     { gen_helper_neon_max_s16, gen_helper_neon_max_u16 },
10996                     { tcg_gen_smax_i32, tcg_gen_umax_i32 },
10997                 };
10998                 genfn = fns[size][u];
10999                 break;
11000             }
11001
11002             case 0xd: /* SMIN, UMIN */
11003             {
11004                 static NeonGenTwoOpFn * const fns[3][2] = {
11005                     { gen_helper_neon_min_s8, gen_helper_neon_min_u8 },
11006                     { gen_helper_neon_min_s16, gen_helper_neon_min_u16 },
11007                     { tcg_gen_smin_i32, tcg_gen_umin_i32 },
11008                 };
11009                 genfn = fns[size][u];
11010                 break;
11011             }
11012             case 0xe: /* SABD, UABD */
11013             case 0xf: /* SABA, UABA */
11014             {
11015                 static NeonGenTwoOpFn * const fns[3][2] = {
11016                     { gen_helper_neon_abd_s8, gen_helper_neon_abd_u8 },
11017                     { gen_helper_neon_abd_s16, gen_helper_neon_abd_u16 },
11018                     { gen_helper_neon_abd_s32, gen_helper_neon_abd_u32 },
11019                 };
11020                 genfn = fns[size][u];
11021                 break;
11022             }
11023             case 0x13: /* MUL, PMUL */
11024                 assert(u); /* PMUL */
11025                 assert(size == 0);
11026                 genfn = gen_helper_neon_mul_p8;
11027                 break;
11028             case 0x16: /* SQDMULH, SQRDMULH */
11029             {
11030                 static NeonGenTwoOpEnvFn * const fns[2][2] = {
11031                     { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
11032                     { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
11033                 };
11034                 assert(size == 1 || size == 2);
11035                 genenvfn = fns[size - 1][u];
11036                 break;
11037             }
11038             default:
11039                 g_assert_not_reached();
11040             }
11041
11042             if (genenvfn) {
11043                 genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
11044             } else {
11045                 genfn(tcg_res, tcg_op1, tcg_op2);
11046             }
11047
11048             if (opcode == 0xf) {
11049                 /* SABA, UABA: accumulating ops */
11050                 static NeonGenTwoOpFn * const fns[3] = {
11051                     gen_helper_neon_add_u8,
11052                     gen_helper_neon_add_u16,
11053                     tcg_gen_add_i32,
11054                 };
11055
11056                 read_vec_element_i32(s, tcg_op1, rd, pass, MO_32);
11057                 fns[size](tcg_res, tcg_op1, tcg_res);
11058             }
11059
11060             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
11061
11062             tcg_temp_free_i32(tcg_res);
11063             tcg_temp_free_i32(tcg_op1);
11064             tcg_temp_free_i32(tcg_op2);
11065         }
11066     }
11067     clear_vec_high(s, is_q, rd);
11068 }
11069
11070 /* AdvSIMD three same
11071  *  31  30  29  28       24 23  22  21 20  16 15    11  10 9    5 4    0
11072  * +---+---+---+-----------+------+---+------+--------+---+------+------+
11073  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
11074  * +---+---+---+-----------+------+---+------+--------+---+------+------+
11075  */
11076 static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
11077 {
11078     int opcode = extract32(insn, 11, 5);
11079
11080     switch (opcode) {
11081     case 0x3: /* logic ops */
11082         disas_simd_3same_logic(s, insn);
11083         break;
11084     case 0x17: /* ADDP */
11085     case 0x14: /* SMAXP, UMAXP */
11086     case 0x15: /* SMINP, UMINP */
11087     {
11088         /* Pairwise operations */
11089         int is_q = extract32(insn, 30, 1);
11090         int u = extract32(insn, 29, 1);
11091         int size = extract32(insn, 22, 2);
11092         int rm = extract32(insn, 16, 5);
11093         int rn = extract32(insn, 5, 5);
11094         int rd = extract32(insn, 0, 5);
11095         if (opcode == 0x17) {
11096             if (u || (size == 3 && !is_q)) {
11097                 unallocated_encoding(s);
11098                 return;
11099             }
11100         } else {
11101             if (size == 3) {
11102                 unallocated_encoding(s);
11103                 return;
11104             }
11105         }
11106         handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
11107         break;
11108     }
11109     case 0x18 ... 0x31:
11110         /* floating point ops, sz[1] and U are part of opcode */
11111         disas_simd_3same_float(s, insn);
11112         break;
11113     default:
11114         disas_simd_3same_int(s, insn);
11115         break;
11116     }
11117 }
11118
11119 /*
11120  * Advanced SIMD three same (ARMv8.2 FP16 variants)
11121  *
11122  *  31  30  29  28       24 23  22 21 20  16 15 14 13    11 10  9    5 4    0
11123  * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
11124  * | 0 | Q | U | 0 1 1 1 0 | a | 1 0 |  Rm  | 0 0 | opcode | 1 |  Rn  |  Rd  |
11125  * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
11126  *
11127  * This includes FMULX, FCMEQ (register), FRECPS, FRSQRTS, FCMGE
11128  * (register), FACGE, FABD, FCMGT (register) and FACGT.
11129  *
11130  */
11131 static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
11132 {
11133     int opcode, fpopcode;
11134     int is_q, u, a, rm, rn, rd;
11135     int datasize, elements;
11136     int pass;
11137     TCGv_ptr fpst;
11138     bool pairwise = false;
11139
11140     if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
11141         unallocated_encoding(s);
11142         return;
11143     }
11144
11145     if (!fp_access_check(s)) {
11146         return;
11147     }
11148
11149     /* For these floating point ops, the U, a and opcode bits
11150      * together indicate the operation.
11151      */
11152     opcode = extract32(insn, 11, 3);
11153     u = extract32(insn, 29, 1);
11154     a = extract32(insn, 23, 1);
11155     is_q = extract32(insn, 30, 1);
11156     rm = extract32(insn, 16, 5);
11157     rn = extract32(insn, 5, 5);
11158     rd = extract32(insn, 0, 5);
11159
11160     fpopcode = opcode | (a << 3) |  (u << 4);
11161     datasize = is_q ? 128 : 64;
11162     elements = datasize / 16;
11163
11164     switch (fpopcode) {
11165     case 0x10: /* FMAXNMP */
11166     case 0x12: /* FADDP */
11167     case 0x16: /* FMAXP */
11168     case 0x18: /* FMINNMP */
11169     case 0x1e: /* FMINP */
11170         pairwise = true;
11171         break;
11172     }
11173
11174     fpst = get_fpstatus_ptr(true);
11175
11176     if (pairwise) {
11177         int maxpass = is_q ? 8 : 4;
11178         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11179         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11180         TCGv_i32 tcg_res[8];
11181
11182         for (pass = 0; pass < maxpass; pass++) {
11183             int passreg = pass < (maxpass / 2) ? rn : rm;
11184             int passelt = (pass << 1) & (maxpass - 1);
11185
11186             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_16);
11187             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_16);
11188             tcg_res[pass] = tcg_temp_new_i32();
11189
11190             switch (fpopcode) {
11191             case 0x10: /* FMAXNMP */
11192                 gen_helper_advsimd_maxnumh(tcg_res[pass], tcg_op1, tcg_op2,
11193                                            fpst);
11194                 break;
11195             case 0x12: /* FADDP */
11196                 gen_helper_advsimd_addh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11197                 break;
11198             case 0x16: /* FMAXP */
11199                 gen_helper_advsimd_maxh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11200                 break;
11201             case 0x18: /* FMINNMP */
11202                 gen_helper_advsimd_minnumh(tcg_res[pass], tcg_op1, tcg_op2,
11203                                            fpst);
11204                 break;
11205             case 0x1e: /* FMINP */
11206                 gen_helper_advsimd_minh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11207                 break;
11208             default:
11209                 g_assert_not_reached();
11210             }
11211         }
11212
11213         for (pass = 0; pass < maxpass; pass++) {
11214             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_16);
11215             tcg_temp_free_i32(tcg_res[pass]);
11216         }
11217
11218         tcg_temp_free_i32(tcg_op1);
11219         tcg_temp_free_i32(tcg_op2);
11220
11221     } else {
11222         for (pass = 0; pass < elements; pass++) {
11223             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11224             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11225             TCGv_i32 tcg_res = tcg_temp_new_i32();
11226
11227             read_vec_element_i32(s, tcg_op1, rn, pass, MO_16);
11228             read_vec_element_i32(s, tcg_op2, rm, pass, MO_16);
11229
11230             switch (fpopcode) {
11231             case 0x0: /* FMAXNM */
11232                 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
11233                 break;
11234             case 0x1: /* FMLA */
11235                 read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11236                 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
11237                                            fpst);
11238                 break;
11239             case 0x2: /* FADD */
11240                 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
11241                 break;
11242             case 0x3: /* FMULX */
11243                 gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
11244                 break;
11245             case 0x4: /* FCMEQ */
11246                 gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11247                 break;
11248             case 0x6: /* FMAX */
11249                 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
11250                 break;
11251             case 0x7: /* FRECPS */
11252                 gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11253                 break;
11254             case 0x8: /* FMINNM */
11255                 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
11256                 break;
11257             case 0x9: /* FMLS */
11258                 /* As usual for ARM, separate negation for fused multiply-add */
11259                 tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
11260                 read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11261                 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
11262                                            fpst);
11263                 break;
11264             case 0xa: /* FSUB */
11265                 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
11266                 break;
11267             case 0xe: /* FMIN */
11268                 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
11269                 break;
11270             case 0xf: /* FRSQRTS */
11271                 gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11272                 break;
11273             case 0x13: /* FMUL */
11274                 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
11275                 break;
11276             case 0x14: /* FCMGE */
11277                 gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11278                 break;
11279             case 0x15: /* FACGE */
11280                 gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11281                 break;
11282             case 0x17: /* FDIV */
11283                 gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
11284                 break;
11285             case 0x1a: /* FABD */
11286                 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
11287                 tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
11288                 break;
11289             case 0x1c: /* FCMGT */
11290                 gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11291                 break;
11292             case 0x1d: /* FACGT */
11293                 gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11294                 break;
11295             default:
11296                 fprintf(stderr, "%s: insn %#04x, fpop %#2x @ %#" PRIx64 "\n",
11297                         __func__, insn, fpopcode, s->pc);
11298                 g_assert_not_reached();
11299             }
11300
11301             write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11302             tcg_temp_free_i32(tcg_res);
11303             tcg_temp_free_i32(tcg_op1);
11304             tcg_temp_free_i32(tcg_op2);
11305         }
11306     }
11307
11308     tcg_temp_free_ptr(fpst);
11309
11310     clear_vec_high(s, is_q, rd);
11311 }
11312
11313 /* AdvSIMD three same extra
11314  *  31   30  29 28       24 23  22  21 20  16  15 14    11  10 9  5 4  0
11315  * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
11316  * | 0 | Q | U | 0 1 1 1 0 | size | 0 |  Rm  | 1 | opcode | 1 | Rn | Rd |
11317  * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
11318  */
11319 static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
11320 {
11321     int rd = extract32(insn, 0, 5);
11322     int rn = extract32(insn, 5, 5);
11323     int opcode = extract32(insn, 11, 4);
11324     int rm = extract32(insn, 16, 5);
11325     int size = extract32(insn, 22, 2);
11326     bool u = extract32(insn, 29, 1);
11327     bool is_q = extract32(insn, 30, 1);
11328     int feature, rot;
11329
11330     switch (u * 16 + opcode) {
11331     case 0x10: /* SQRDMLAH (vector) */
11332     case 0x11: /* SQRDMLSH (vector) */
11333         if (size != 1 && size != 2) {
11334             unallocated_encoding(s);
11335             return;
11336         }
11337         feature = ARM_FEATURE_V8_RDM;
11338         break;
11339     case 0x8: /* FCMLA, #0 */
11340     case 0x9: /* FCMLA, #90 */
11341     case 0xa: /* FCMLA, #180 */
11342     case 0xb: /* FCMLA, #270 */
11343     case 0xc: /* FCADD, #90 */
11344     case 0xe: /* FCADD, #270 */
11345         if (size == 0
11346             || (size == 1 && !arm_dc_feature(s, ARM_FEATURE_V8_FP16))
11347             || (size == 3 && !is_q)) {
11348             unallocated_encoding(s);
11349             return;
11350         }
11351         feature = ARM_FEATURE_V8_FCMA;
11352         break;
11353     default:
11354         unallocated_encoding(s);
11355         return;
11356     }
11357     if (!arm_dc_feature(s, feature)) {
11358         unallocated_encoding(s);
11359         return;
11360     }
11361     if (!fp_access_check(s)) {
11362         return;
11363     }
11364
11365     switch (opcode) {
11366     case 0x0: /* SQRDMLAH (vector) */
11367         switch (size) {
11368         case 1:
11369             gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlah_s16);
11370             break;
11371         case 2:
11372             gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlah_s32);
11373             break;
11374         default:
11375             g_assert_not_reached();
11376         }
11377         return;
11378
11379     case 0x1: /* SQRDMLSH (vector) */
11380         switch (size) {
11381         case 1:
11382             gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlsh_s16);
11383             break;
11384         case 2:
11385             gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlsh_s32);
11386             break;
11387         default:
11388             g_assert_not_reached();
11389         }
11390         return;
11391
11392     case 0x8: /* FCMLA, #0 */
11393     case 0x9: /* FCMLA, #90 */
11394     case 0xa: /* FCMLA, #180 */
11395     case 0xb: /* FCMLA, #270 */
11396         rot = extract32(opcode, 0, 2);
11397         switch (size) {
11398         case 1:
11399             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, true, rot,
11400                               gen_helper_gvec_fcmlah);
11401             break;
11402         case 2:
11403             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, false, rot,
11404                               gen_helper_gvec_fcmlas);
11405             break;
11406         case 3:
11407             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, false, rot,
11408                               gen_helper_gvec_fcmlad);
11409             break;
11410         default:
11411             g_assert_not_reached();
11412         }
11413         return;
11414
11415     case 0xc: /* FCADD, #90 */
11416     case 0xe: /* FCADD, #270 */
11417         rot = extract32(opcode, 1, 1);
11418         switch (size) {
11419         case 1:
11420             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11421                               gen_helper_gvec_fcaddh);
11422             break;
11423         case 2:
11424             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11425                               gen_helper_gvec_fcadds);
11426             break;
11427         case 3:
11428             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11429                               gen_helper_gvec_fcaddd);
11430             break;
11431         default:
11432             g_assert_not_reached();
11433         }
11434         return;
11435
11436     default:
11437         g_assert_not_reached();
11438     }
11439 }
11440
11441 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
11442                                   int size, int rn, int rd)
11443 {
11444     /* Handle 2-reg-misc ops which are widening (so each size element
11445      * in the source becomes a 2*size element in the destination.
11446      * The only instruction like this is FCVTL.
11447      */
11448     int pass;
11449
11450     if (size == 3) {
11451         /* 32 -> 64 bit fp conversion */
11452         TCGv_i64 tcg_res[2];
11453         int srcelt = is_q ? 2 : 0;
11454
11455         for (pass = 0; pass < 2; pass++) {
11456             TCGv_i32 tcg_op = tcg_temp_new_i32();
11457             tcg_res[pass] = tcg_temp_new_i64();
11458
11459             read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
11460             gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env);
11461             tcg_temp_free_i32(tcg_op);
11462         }
11463         for (pass = 0; pass < 2; pass++) {
11464             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11465             tcg_temp_free_i64(tcg_res[pass]);
11466         }
11467     } else {
11468         /* 16 -> 32 bit fp conversion */
11469         int srcelt = is_q ? 4 : 0;
11470         TCGv_i32 tcg_res[4];
11471         TCGv_ptr fpst = get_fpstatus_ptr(false);
11472         TCGv_i32 ahp = get_ahp_flag();
11473
11474         for (pass = 0; pass < 4; pass++) {
11475             tcg_res[pass] = tcg_temp_new_i32();
11476
11477             read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
11478             gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
11479                                            fpst, ahp);
11480         }
11481         for (pass = 0; pass < 4; pass++) {
11482             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
11483             tcg_temp_free_i32(tcg_res[pass]);
11484         }
11485
11486         tcg_temp_free_ptr(fpst);
11487         tcg_temp_free_i32(ahp);
11488     }
11489 }
11490
11491 static void handle_rev(DisasContext *s, int opcode, bool u,
11492                        bool is_q, int size, int rn, int rd)
11493 {
11494     int op = (opcode << 1) | u;
11495     int opsz = op + size;
11496     int grp_size = 3 - opsz;
11497     int dsize = is_q ? 128 : 64;
11498     int i;
11499
11500     if (opsz >= 3) {
11501         unallocated_encoding(s);
11502         return;
11503     }
11504
11505     if (!fp_access_check(s)) {
11506         return;
11507     }
11508
11509     if (size == 0) {
11510         /* Special case bytes, use bswap op on each group of elements */
11511         int groups = dsize / (8 << grp_size);
11512
11513         for (i = 0; i < groups; i++) {
11514             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
11515
11516             read_vec_element(s, tcg_tmp, rn, i, grp_size);
11517             switch (grp_size) {
11518             case MO_16:
11519                 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
11520                 break;
11521             case MO_32:
11522                 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
11523                 break;
11524             case MO_64:
11525                 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
11526                 break;
11527             default:
11528                 g_assert_not_reached();
11529             }
11530             write_vec_element(s, tcg_tmp, rd, i, grp_size);
11531             tcg_temp_free_i64(tcg_tmp);
11532         }
11533         clear_vec_high(s, is_q, rd);
11534     } else {
11535         int revmask = (1 << grp_size) - 1;
11536         int esize = 8 << size;
11537         int elements = dsize / esize;
11538         TCGv_i64 tcg_rn = tcg_temp_new_i64();
11539         TCGv_i64 tcg_rd = tcg_const_i64(0);
11540         TCGv_i64 tcg_rd_hi = tcg_const_i64(0);
11541
11542         for (i = 0; i < elements; i++) {
11543             int e_rev = (i & 0xf) ^ revmask;
11544             int off = e_rev * esize;
11545             read_vec_element(s, tcg_rn, rn, i, size);
11546             if (off >= 64) {
11547                 tcg_gen_deposit_i64(tcg_rd_hi, tcg_rd_hi,
11548                                     tcg_rn, off - 64, esize);
11549             } else {
11550                 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, off, esize);
11551             }
11552         }
11553         write_vec_element(s, tcg_rd, rd, 0, MO_64);
11554         write_vec_element(s, tcg_rd_hi, rd, 1, MO_64);
11555
11556         tcg_temp_free_i64(tcg_rd_hi);
11557         tcg_temp_free_i64(tcg_rd);
11558         tcg_temp_free_i64(tcg_rn);
11559     }
11560 }
11561
11562 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
11563                                   bool is_q, int size, int rn, int rd)
11564 {
11565     /* Implement the pairwise operations from 2-misc:
11566      * SADDLP, UADDLP, SADALP, UADALP.
11567      * These all add pairs of elements in the input to produce a
11568      * double-width result element in the output (possibly accumulating).
11569      */
11570     bool accum = (opcode == 0x6);
11571     int maxpass = is_q ? 2 : 1;
11572     int pass;
11573     TCGv_i64 tcg_res[2];
11574
11575     if (size == 2) {
11576         /* 32 + 32 -> 64 op */
11577         TCGMemOp memop = size + (u ? 0 : MO_SIGN);
11578
11579         for (pass = 0; pass < maxpass; pass++) {
11580             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11581             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11582
11583             tcg_res[pass] = tcg_temp_new_i64();
11584
11585             read_vec_element(s, tcg_op1, rn, pass * 2, memop);
11586             read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
11587             tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
11588             if (accum) {
11589                 read_vec_element(s, tcg_op1, rd, pass, MO_64);
11590                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
11591             }
11592
11593             tcg_temp_free_i64(tcg_op1);
11594             tcg_temp_free_i64(tcg_op2);
11595         }
11596     } else {
11597         for (pass = 0; pass < maxpass; pass++) {
11598             TCGv_i64 tcg_op = tcg_temp_new_i64();
11599             NeonGenOneOpFn *genfn;
11600             static NeonGenOneOpFn * const fns[2][2] = {
11601                 { gen_helper_neon_addlp_s8,  gen_helper_neon_addlp_u8 },
11602                 { gen_helper_neon_addlp_s16,  gen_helper_neon_addlp_u16 },
11603             };
11604
11605             genfn = fns[size][u];
11606
11607             tcg_res[pass] = tcg_temp_new_i64();
11608
11609             read_vec_element(s, tcg_op, rn, pass, MO_64);
11610             genfn(tcg_res[pass], tcg_op);
11611
11612             if (accum) {
11613                 read_vec_element(s, tcg_op, rd, pass, MO_64);
11614                 if (size == 0) {
11615                     gen_helper_neon_addl_u16(tcg_res[pass],
11616                                              tcg_res[pass], tcg_op);
11617                 } else {
11618                     gen_helper_neon_addl_u32(tcg_res[pass],
11619                                              tcg_res[pass], tcg_op);
11620                 }
11621             }
11622             tcg_temp_free_i64(tcg_op);
11623         }
11624     }
11625     if (!is_q) {
11626         tcg_res[1] = tcg_const_i64(0);
11627     }
11628     for (pass = 0; pass < 2; pass++) {
11629         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11630         tcg_temp_free_i64(tcg_res[pass]);
11631     }
11632 }
11633
11634 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
11635 {
11636     /* Implement SHLL and SHLL2 */
11637     int pass;
11638     int part = is_q ? 2 : 0;
11639     TCGv_i64 tcg_res[2];
11640
11641     for (pass = 0; pass < 2; pass++) {
11642         static NeonGenWidenFn * const widenfns[3] = {
11643             gen_helper_neon_widen_u8,
11644             gen_helper_neon_widen_u16,
11645             tcg_gen_extu_i32_i64,
11646         };
11647         NeonGenWidenFn *widenfn = widenfns[size];
11648         TCGv_i32 tcg_op = tcg_temp_new_i32();
11649
11650         read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
11651         tcg_res[pass] = tcg_temp_new_i64();
11652         widenfn(tcg_res[pass], tcg_op);
11653         tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
11654
11655         tcg_temp_free_i32(tcg_op);
11656     }
11657
11658     for (pass = 0; pass < 2; pass++) {
11659         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11660         tcg_temp_free_i64(tcg_res[pass]);
11661     }
11662 }
11663
11664 /* AdvSIMD two reg misc
11665  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
11666  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
11667  * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
11668  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
11669  */
11670 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
11671 {
11672     int size = extract32(insn, 22, 2);
11673     int opcode = extract32(insn, 12, 5);
11674     bool u = extract32(insn, 29, 1);
11675     bool is_q = extract32(insn, 30, 1);
11676     int rn = extract32(insn, 5, 5);
11677     int rd = extract32(insn, 0, 5);
11678     bool need_fpstatus = false;
11679     bool need_rmode = false;
11680     int rmode = -1;
11681     TCGv_i32 tcg_rmode;
11682     TCGv_ptr tcg_fpstatus;
11683
11684     switch (opcode) {
11685     case 0x0: /* REV64, REV32 */
11686     case 0x1: /* REV16 */
11687         handle_rev(s, opcode, u, is_q, size, rn, rd);
11688         return;
11689     case 0x5: /* CNT, NOT, RBIT */
11690         if (u && size == 0) {
11691             /* NOT */
11692             break;
11693         } else if (u && size == 1) {
11694             /* RBIT */
11695             break;
11696         } else if (!u && size == 0) {
11697             /* CNT */
11698             break;
11699         }
11700         unallocated_encoding(s);
11701         return;
11702     case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
11703     case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
11704         if (size == 3) {
11705             unallocated_encoding(s);
11706             return;
11707         }
11708         if (!fp_access_check(s)) {
11709             return;
11710         }
11711
11712         handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
11713         return;
11714     case 0x4: /* CLS, CLZ */
11715         if (size == 3) {
11716             unallocated_encoding(s);
11717             return;
11718         }
11719         break;
11720     case 0x2: /* SADDLP, UADDLP */
11721     case 0x6: /* SADALP, UADALP */
11722         if (size == 3) {
11723             unallocated_encoding(s);
11724             return;
11725         }
11726         if (!fp_access_check(s)) {
11727             return;
11728         }
11729         handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
11730         return;
11731     case 0x13: /* SHLL, SHLL2 */
11732         if (u == 0 || size == 3) {
11733             unallocated_encoding(s);
11734             return;
11735         }
11736         if (!fp_access_check(s)) {
11737             return;
11738         }
11739         handle_shll(s, is_q, size, rn, rd);
11740         return;
11741     case 0xa: /* CMLT */
11742         if (u == 1) {
11743             unallocated_encoding(s);
11744             return;
11745         }
11746         /* fall through */
11747     case 0x8: /* CMGT, CMGE */
11748     case 0x9: /* CMEQ, CMLE */
11749     case 0xb: /* ABS, NEG */
11750         if (size == 3 && !is_q) {
11751             unallocated_encoding(s);
11752             return;
11753         }
11754         break;
11755     case 0x3: /* SUQADD, USQADD */
11756         if (size == 3 && !is_q) {
11757             unallocated_encoding(s);
11758             return;
11759         }
11760         if (!fp_access_check(s)) {
11761             return;
11762         }
11763         handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
11764         return;
11765     case 0x7: /* SQABS, SQNEG */
11766         if (size == 3 && !is_q) {
11767             unallocated_encoding(s);
11768             return;
11769         }
11770         break;
11771     case 0xc ... 0xf:
11772     case 0x16 ... 0x1d:
11773     case 0x1f:
11774     {
11775         /* Floating point: U, size[1] and opcode indicate operation;
11776          * size[0] indicates single or double precision.
11777          */
11778         int is_double = extract32(size, 0, 1);
11779         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
11780         size = is_double ? 3 : 2;
11781         switch (opcode) {
11782         case 0x2f: /* FABS */
11783         case 0x6f: /* FNEG */
11784             if (size == 3 && !is_q) {
11785                 unallocated_encoding(s);
11786                 return;
11787             }
11788             break;
11789         case 0x1d: /* SCVTF */
11790         case 0x5d: /* UCVTF */
11791         {
11792             bool is_signed = (opcode == 0x1d) ? true : false;
11793             int elements = is_double ? 2 : is_q ? 4 : 2;
11794             if (is_double && !is_q) {
11795                 unallocated_encoding(s);
11796                 return;
11797             }
11798             if (!fp_access_check(s)) {
11799                 return;
11800             }
11801             handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
11802             return;
11803         }
11804         case 0x2c: /* FCMGT (zero) */
11805         case 0x2d: /* FCMEQ (zero) */
11806         case 0x2e: /* FCMLT (zero) */
11807         case 0x6c: /* FCMGE (zero) */
11808         case 0x6d: /* FCMLE (zero) */
11809             if (size == 3 && !is_q) {
11810                 unallocated_encoding(s);
11811                 return;
11812             }
11813             handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
11814             return;
11815         case 0x7f: /* FSQRT */
11816             if (size == 3 && !is_q) {
11817                 unallocated_encoding(s);
11818                 return;
11819             }
11820             break;
11821         case 0x1a: /* FCVTNS */
11822         case 0x1b: /* FCVTMS */
11823         case 0x3a: /* FCVTPS */
11824         case 0x3b: /* FCVTZS */
11825         case 0x5a: /* FCVTNU */
11826         case 0x5b: /* FCVTMU */
11827         case 0x7a: /* FCVTPU */
11828         case 0x7b: /* FCVTZU */
11829             need_fpstatus = true;
11830             need_rmode = true;
11831             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
11832             if (size == 3 && !is_q) {
11833                 unallocated_encoding(s);
11834                 return;
11835             }
11836             break;
11837         case 0x5c: /* FCVTAU */
11838         case 0x1c: /* FCVTAS */
11839             need_fpstatus = true;
11840             need_rmode = true;
11841             rmode = FPROUNDING_TIEAWAY;
11842             if (size == 3 && !is_q) {
11843                 unallocated_encoding(s);
11844                 return;
11845             }
11846             break;
11847         case 0x3c: /* URECPE */
11848             if (size == 3) {
11849                 unallocated_encoding(s);
11850                 return;
11851             }
11852             /* fall through */
11853         case 0x3d: /* FRECPE */
11854         case 0x7d: /* FRSQRTE */
11855             if (size == 3 && !is_q) {
11856                 unallocated_encoding(s);
11857                 return;
11858             }
11859             if (!fp_access_check(s)) {
11860                 return;
11861             }
11862             handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
11863             return;
11864         case 0x56: /* FCVTXN, FCVTXN2 */
11865             if (size == 2) {
11866                 unallocated_encoding(s);
11867                 return;
11868             }
11869             /* fall through */
11870         case 0x16: /* FCVTN, FCVTN2 */
11871             /* handle_2misc_narrow does a 2*size -> size operation, but these
11872              * instructions encode the source size rather than dest size.
11873              */
11874             if (!fp_access_check(s)) {
11875                 return;
11876             }
11877             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
11878             return;
11879         case 0x17: /* FCVTL, FCVTL2 */
11880             if (!fp_access_check(s)) {
11881                 return;
11882             }
11883             handle_2misc_widening(s, opcode, is_q, size, rn, rd);
11884             return;
11885         case 0x18: /* FRINTN */
11886         case 0x19: /* FRINTM */
11887         case 0x38: /* FRINTP */
11888         case 0x39: /* FRINTZ */
11889             need_rmode = true;
11890             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
11891             /* fall through */
11892         case 0x59: /* FRINTX */
11893         case 0x79: /* FRINTI */
11894             need_fpstatus = true;
11895             if (size == 3 && !is_q) {
11896                 unallocated_encoding(s);
11897                 return;
11898             }
11899             break;
11900         case 0x58: /* FRINTA */
11901             need_rmode = true;
11902             rmode = FPROUNDING_TIEAWAY;
11903             need_fpstatus = true;
11904             if (size == 3 && !is_q) {
11905                 unallocated_encoding(s);
11906                 return;
11907             }
11908             break;
11909         case 0x7c: /* URSQRTE */
11910             if (size == 3) {
11911                 unallocated_encoding(s);
11912                 return;
11913             }
11914             need_fpstatus = true;
11915             break;
11916         default:
11917             unallocated_encoding(s);
11918             return;
11919         }
11920         break;
11921     }
11922     default:
11923         unallocated_encoding(s);
11924         return;
11925     }
11926
11927     if (!fp_access_check(s)) {
11928         return;
11929     }
11930
11931     if (need_fpstatus || need_rmode) {
11932         tcg_fpstatus = get_fpstatus_ptr(false);
11933     } else {
11934         tcg_fpstatus = NULL;
11935     }
11936     if (need_rmode) {
11937         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
11938         gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
11939     } else {
11940         tcg_rmode = NULL;
11941     }
11942
11943     switch (opcode) {
11944     case 0x5:
11945         if (u && size == 0) { /* NOT */
11946             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_not, 0);
11947             return;
11948         }
11949         break;
11950     case 0xb:
11951         if (u) { /* NEG */
11952             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size);
11953             return;
11954         }
11955         break;
11956     }
11957
11958     if (size == 3) {
11959         /* All 64-bit element operations can be shared with scalar 2misc */
11960         int pass;
11961
11962         /* Coverity claims (size == 3 && !is_q) has been eliminated
11963          * from all paths leading to here.
11964          */
11965         tcg_debug_assert(is_q);
11966         for (pass = 0; pass < 2; pass++) {
11967             TCGv_i64 tcg_op = tcg_temp_new_i64();
11968             TCGv_i64 tcg_res = tcg_temp_new_i64();
11969
11970             read_vec_element(s, tcg_op, rn, pass, MO_64);
11971
11972             handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
11973                             tcg_rmode, tcg_fpstatus);
11974
11975             write_vec_element(s, tcg_res, rd, pass, MO_64);
11976
11977             tcg_temp_free_i64(tcg_res);
11978             tcg_temp_free_i64(tcg_op);
11979         }
11980     } else {
11981         int pass;
11982
11983         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
11984             TCGv_i32 tcg_op = tcg_temp_new_i32();
11985             TCGv_i32 tcg_res = tcg_temp_new_i32();
11986             TCGCond cond;
11987
11988             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
11989
11990             if (size == 2) {
11991                 /* Special cases for 32 bit elements */
11992                 switch (opcode) {
11993                 case 0xa: /* CMLT */
11994                     /* 32 bit integer comparison against zero, result is
11995                      * test ? (2^32 - 1) : 0. We implement via setcond(test)
11996                      * and inverting.
11997                      */
11998                     cond = TCG_COND_LT;
11999                 do_cmop:
12000                     tcg_gen_setcondi_i32(cond, tcg_res, tcg_op, 0);
12001                     tcg_gen_neg_i32(tcg_res, tcg_res);
12002                     break;
12003                 case 0x8: /* CMGT, CMGE */
12004                     cond = u ? TCG_COND_GE : TCG_COND_GT;
12005                     goto do_cmop;
12006                 case 0x9: /* CMEQ, CMLE */
12007                     cond = u ? TCG_COND_LE : TCG_COND_EQ;
12008                     goto do_cmop;
12009                 case 0x4: /* CLS */
12010                     if (u) {
12011                         tcg_gen_clzi_i32(tcg_res, tcg_op, 32);
12012                     } else {
12013                         tcg_gen_clrsb_i32(tcg_res, tcg_op);
12014                     }
12015                     break;
12016                 case 0x7: /* SQABS, SQNEG */
12017                     if (u) {
12018                         gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op);
12019                     } else {
12020                         gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op);
12021                     }
12022                     break;
12023                 case 0xb: /* ABS, NEG */
12024                     if (u) {
12025                         tcg_gen_neg_i32(tcg_res, tcg_op);
12026                     } else {
12027                         TCGv_i32 tcg_zero = tcg_const_i32(0);
12028                         tcg_gen_neg_i32(tcg_res, tcg_op);
12029                         tcg_gen_movcond_i32(TCG_COND_GT, tcg_res, tcg_op,
12030                                             tcg_zero, tcg_op, tcg_res);
12031                         tcg_temp_free_i32(tcg_zero);
12032                     }
12033                     break;
12034                 case 0x2f: /* FABS */
12035                     gen_helper_vfp_abss(tcg_res, tcg_op);
12036                     break;
12037                 case 0x6f: /* FNEG */
12038                     gen_helper_vfp_negs(tcg_res, tcg_op);
12039                     break;
12040                 case 0x7f: /* FSQRT */
12041                     gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
12042                     break;
12043                 case 0x1a: /* FCVTNS */
12044                 case 0x1b: /* FCVTMS */
12045                 case 0x1c: /* FCVTAS */
12046                 case 0x3a: /* FCVTPS */
12047                 case 0x3b: /* FCVTZS */
12048                 {
12049                     TCGv_i32 tcg_shift = tcg_const_i32(0);
12050                     gen_helper_vfp_tosls(tcg_res, tcg_op,
12051                                          tcg_shift, tcg_fpstatus);
12052                     tcg_temp_free_i32(tcg_shift);
12053                     break;
12054                 }
12055                 case 0x5a: /* FCVTNU */
12056                 case 0x5b: /* FCVTMU */
12057                 case 0x5c: /* FCVTAU */
12058                 case 0x7a: /* FCVTPU */
12059                 case 0x7b: /* FCVTZU */
12060                 {
12061                     TCGv_i32 tcg_shift = tcg_const_i32(0);
12062                     gen_helper_vfp_touls(tcg_res, tcg_op,
12063                                          tcg_shift, tcg_fpstatus);
12064                     tcg_temp_free_i32(tcg_shift);
12065                     break;
12066                 }
12067                 case 0x18: /* FRINTN */
12068                 case 0x19: /* FRINTM */
12069                 case 0x38: /* FRINTP */
12070                 case 0x39: /* FRINTZ */
12071                 case 0x58: /* FRINTA */
12072                 case 0x79: /* FRINTI */
12073                     gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
12074                     break;
12075                 case 0x59: /* FRINTX */
12076                     gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
12077                     break;
12078                 case 0x7c: /* URSQRTE */
12079                     gen_helper_rsqrte_u32(tcg_res, tcg_op, tcg_fpstatus);
12080                     break;
12081                 default:
12082                     g_assert_not_reached();
12083                 }
12084             } else {
12085                 /* Use helpers for 8 and 16 bit elements */
12086                 switch (opcode) {
12087                 case 0x5: /* CNT, RBIT */
12088                     /* For these two insns size is part of the opcode specifier
12089                      * (handled earlier); they always operate on byte elements.
12090                      */
12091                     if (u) {
12092                         gen_helper_neon_rbit_u8(tcg_res, tcg_op);
12093                     } else {
12094                         gen_helper_neon_cnt_u8(tcg_res, tcg_op);
12095                     }
12096                     break;
12097                 case 0x7: /* SQABS, SQNEG */
12098                 {
12099                     NeonGenOneOpEnvFn *genfn;
12100                     static NeonGenOneOpEnvFn * const fns[2][2] = {
12101                         { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
12102                         { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
12103                     };
12104                     genfn = fns[size][u];
12105                     genfn(tcg_res, cpu_env, tcg_op);
12106                     break;
12107                 }
12108                 case 0x8: /* CMGT, CMGE */
12109                 case 0x9: /* CMEQ, CMLE */
12110                 case 0xa: /* CMLT */
12111                 {
12112                     static NeonGenTwoOpFn * const fns[3][2] = {
12113                         { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_s16 },
12114                         { gen_helper_neon_cge_s8, gen_helper_neon_cge_s16 },
12115                         { gen_helper_neon_ceq_u8, gen_helper_neon_ceq_u16 },
12116                     };
12117                     NeonGenTwoOpFn *genfn;
12118                     int comp;
12119                     bool reverse;
12120                     TCGv_i32 tcg_zero = tcg_const_i32(0);
12121
12122                     /* comp = index into [CMGT, CMGE, CMEQ, CMLE, CMLT] */
12123                     comp = (opcode - 0x8) * 2 + u;
12124                     /* ...but LE, LT are implemented as reverse GE, GT */
12125                     reverse = (comp > 2);
12126                     if (reverse) {
12127                         comp = 4 - comp;
12128                     }
12129                     genfn = fns[comp][size];
12130                     if (reverse) {
12131                         genfn(tcg_res, tcg_zero, tcg_op);
12132                     } else {
12133                         genfn(tcg_res, tcg_op, tcg_zero);
12134                     }
12135                     tcg_temp_free_i32(tcg_zero);
12136                     break;
12137                 }
12138                 case 0xb: /* ABS, NEG */
12139                     if (u) {
12140                         TCGv_i32 tcg_zero = tcg_const_i32(0);
12141                         if (size) {
12142                             gen_helper_neon_sub_u16(tcg_res, tcg_zero, tcg_op);
12143                         } else {
12144                             gen_helper_neon_sub_u8(tcg_res, tcg_zero, tcg_op);
12145                         }
12146                         tcg_temp_free_i32(tcg_zero);
12147                     } else {
12148                         if (size) {
12149                             gen_helper_neon_abs_s16(tcg_res, tcg_op);
12150                         } else {
12151                             gen_helper_neon_abs_s8(tcg_res, tcg_op);
12152                         }
12153                     }
12154                     break;
12155                 case 0x4: /* CLS, CLZ */
12156                     if (u) {
12157                         if (size == 0) {
12158                             gen_helper_neon_clz_u8(tcg_res, tcg_op);
12159                         } else {
12160                             gen_helper_neon_clz_u16(tcg_res, tcg_op);
12161                         }
12162                     } else {
12163                         if (size == 0) {
12164                             gen_helper_neon_cls_s8(tcg_res, tcg_op);
12165                         } else {
12166                             gen_helper_neon_cls_s16(tcg_res, tcg_op);
12167                         }
12168                     }
12169                     break;
12170                 default:
12171                     g_assert_not_reached();
12172                 }
12173             }
12174
12175             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
12176
12177             tcg_temp_free_i32(tcg_res);
12178             tcg_temp_free_i32(tcg_op);
12179         }
12180     }
12181     clear_vec_high(s, is_q, rd);
12182
12183     if (need_rmode) {
12184         gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
12185         tcg_temp_free_i32(tcg_rmode);
12186     }
12187     if (need_fpstatus) {
12188         tcg_temp_free_ptr(tcg_fpstatus);
12189     }
12190 }
12191
12192 /* AdvSIMD [scalar] two register miscellaneous (FP16)
12193  *
12194  *   31  30  29 28  27     24  23 22 21       17 16    12 11 10 9    5 4    0
12195  * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
12196  * | 0 | Q | U | S | 1 1 1 0 | a | 1 1 1 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
12197  * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
12198  *   mask: 1000 1111 0111 1110 0000 1100 0000 0000 0x8f7e 0c00
12199  *   val:  0000 1110 0111 1000 0000 1000 0000 0000 0x0e78 0800
12200  *
12201  * This actually covers two groups where scalar access is governed by
12202  * bit 28. A bunch of the instructions (float to integral) only exist
12203  * in the vector form and are un-allocated for the scalar decode. Also
12204  * in the scalar decode Q is always 1.
12205  */
12206 static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
12207 {
12208     int fpop, opcode, a, u;
12209     int rn, rd;
12210     bool is_q;
12211     bool is_scalar;
12212     bool only_in_vector = false;
12213
12214     int pass;
12215     TCGv_i32 tcg_rmode = NULL;
12216     TCGv_ptr tcg_fpstatus = NULL;
12217     bool need_rmode = false;
12218     bool need_fpst = true;
12219     int rmode;
12220
12221     if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
12222         unallocated_encoding(s);
12223         return;
12224     }
12225
12226     rd = extract32(insn, 0, 5);
12227     rn = extract32(insn, 5, 5);
12228
12229     a = extract32(insn, 23, 1);
12230     u = extract32(insn, 29, 1);
12231     is_scalar = extract32(insn, 28, 1);
12232     is_q = extract32(insn, 30, 1);
12233
12234     opcode = extract32(insn, 12, 5);
12235     fpop = deposit32(opcode, 5, 1, a);
12236     fpop = deposit32(fpop, 6, 1, u);
12237
12238     rd = extract32(insn, 0, 5);
12239     rn = extract32(insn, 5, 5);
12240
12241     switch (fpop) {
12242     case 0x1d: /* SCVTF */
12243     case 0x5d: /* UCVTF */
12244     {
12245         int elements;
12246
12247         if (is_scalar) {
12248             elements = 1;
12249         } else {
12250             elements = (is_q ? 8 : 4);
12251         }
12252
12253         if (!fp_access_check(s)) {
12254             return;
12255         }
12256         handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16);
12257         return;
12258     }
12259     break;
12260     case 0x2c: /* FCMGT (zero) */
12261     case 0x2d: /* FCMEQ (zero) */
12262     case 0x2e: /* FCMLT (zero) */
12263     case 0x6c: /* FCMGE (zero) */
12264     case 0x6d: /* FCMLE (zero) */
12265         handle_2misc_fcmp_zero(s, fpop, is_scalar, 0, is_q, MO_16, rn, rd);
12266         return;
12267     case 0x3d: /* FRECPE */
12268     case 0x3f: /* FRECPX */
12269         break;
12270     case 0x18: /* FRINTN */
12271         need_rmode = true;
12272         only_in_vector = true;
12273         rmode = FPROUNDING_TIEEVEN;
12274         break;
12275     case 0x19: /* FRINTM */
12276         need_rmode = true;
12277         only_in_vector = true;
12278         rmode = FPROUNDING_NEGINF;
12279         break;
12280     case 0x38: /* FRINTP */
12281         need_rmode = true;
12282         only_in_vector = true;
12283         rmode = FPROUNDING_POSINF;
12284         break;
12285     case 0x39: /* FRINTZ */
12286         need_rmode = true;
12287         only_in_vector = true;
12288         rmode = FPROUNDING_ZERO;
12289         break;
12290     case 0x58: /* FRINTA */
12291         need_rmode = true;
12292         only_in_vector = true;
12293         rmode = FPROUNDING_TIEAWAY;
12294         break;
12295     case 0x59: /* FRINTX */
12296     case 0x79: /* FRINTI */
12297         only_in_vector = true;
12298         /* current rounding mode */
12299         break;
12300     case 0x1a: /* FCVTNS */
12301         need_rmode = true;
12302         rmode = FPROUNDING_TIEEVEN;
12303         break;
12304     case 0x1b: /* FCVTMS */
12305         need_rmode = true;
12306         rmode = FPROUNDING_NEGINF;
12307         break;
12308     case 0x1c: /* FCVTAS */
12309         need_rmode = true;
12310         rmode = FPROUNDING_TIEAWAY;
12311         break;
12312     case 0x3a: /* FCVTPS */
12313         need_rmode = true;
12314         rmode = FPROUNDING_POSINF;
12315         break;
12316     case 0x3b: /* FCVTZS */
12317         need_rmode = true;
12318         rmode = FPROUNDING_ZERO;
12319         break;
12320     case 0x5a: /* FCVTNU */
12321         need_rmode = true;
12322         rmode = FPROUNDING_TIEEVEN;
12323         break;
12324     case 0x5b: /* FCVTMU */
12325         need_rmode = true;
12326         rmode = FPROUNDING_NEGINF;
12327         break;
12328     case 0x5c: /* FCVTAU */
12329         need_rmode = true;
12330         rmode = FPROUNDING_TIEAWAY;
12331         break;
12332     case 0x7a: /* FCVTPU */
12333         need_rmode = true;
12334         rmode = FPROUNDING_POSINF;
12335         break;
12336     case 0x7b: /* FCVTZU */
12337         need_rmode = true;
12338         rmode = FPROUNDING_ZERO;
12339         break;
12340     case 0x2f: /* FABS */
12341     case 0x6f: /* FNEG */
12342         need_fpst = false;
12343         break;
12344     case 0x7d: /* FRSQRTE */
12345     case 0x7f: /* FSQRT (vector) */
12346         break;
12347     default:
12348         fprintf(stderr, "%s: insn %#04x fpop %#2x\n", __func__, insn, fpop);
12349         g_assert_not_reached();
12350     }
12351
12352
12353     /* Check additional constraints for the scalar encoding */
12354     if (is_scalar) {
12355         if (!is_q) {
12356             unallocated_encoding(s);
12357             return;
12358         }
12359         /* FRINTxx is only in the vector form */
12360         if (only_in_vector) {
12361             unallocated_encoding(s);
12362             return;
12363         }
12364     }
12365
12366     if (!fp_access_check(s)) {
12367         return;
12368     }
12369
12370     if (need_rmode || need_fpst) {
12371         tcg_fpstatus = get_fpstatus_ptr(true);
12372     }
12373
12374     if (need_rmode) {
12375         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
12376         gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
12377     }
12378
12379     if (is_scalar) {
12380         TCGv_i32 tcg_op = read_fp_hreg(s, rn);
12381         TCGv_i32 tcg_res = tcg_temp_new_i32();
12382
12383         switch (fpop) {
12384         case 0x1a: /* FCVTNS */
12385         case 0x1b: /* FCVTMS */
12386         case 0x1c: /* FCVTAS */
12387         case 0x3a: /* FCVTPS */
12388         case 0x3b: /* FCVTZS */
12389             gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
12390             break;
12391         case 0x3d: /* FRECPE */
12392             gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
12393             break;
12394         case 0x3f: /* FRECPX */
12395             gen_helper_frecpx_f16(tcg_res, tcg_op, tcg_fpstatus);
12396             break;
12397         case 0x5a: /* FCVTNU */
12398         case 0x5b: /* FCVTMU */
12399         case 0x5c: /* FCVTAU */
12400         case 0x7a: /* FCVTPU */
12401         case 0x7b: /* FCVTZU */
12402             gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
12403             break;
12404         case 0x6f: /* FNEG */
12405             tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
12406             break;
12407         case 0x7d: /* FRSQRTE */
12408             gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
12409             break;
12410         default:
12411             g_assert_not_reached();
12412         }
12413
12414         /* limit any sign extension going on */
12415         tcg_gen_andi_i32(tcg_res, tcg_res, 0xffff);
12416         write_fp_sreg(s, rd, tcg_res);
12417
12418         tcg_temp_free_i32(tcg_res);
12419         tcg_temp_free_i32(tcg_op);
12420     } else {
12421         for (pass = 0; pass < (is_q ? 8 : 4); pass++) {
12422             TCGv_i32 tcg_op = tcg_temp_new_i32();
12423             TCGv_i32 tcg_res = tcg_temp_new_i32();
12424
12425             read_vec_element_i32(s, tcg_op, rn, pass, MO_16);
12426
12427             switch (fpop) {
12428             case 0x1a: /* FCVTNS */
12429             case 0x1b: /* FCVTMS */
12430             case 0x1c: /* FCVTAS */
12431             case 0x3a: /* FCVTPS */
12432             case 0x3b: /* FCVTZS */
12433                 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
12434                 break;
12435             case 0x3d: /* FRECPE */
12436                 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
12437                 break;
12438             case 0x5a: /* FCVTNU */
12439             case 0x5b: /* FCVTMU */
12440             case 0x5c: /* FCVTAU */
12441             case 0x7a: /* FCVTPU */
12442             case 0x7b: /* FCVTZU */
12443                 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
12444                 break;
12445             case 0x18: /* FRINTN */
12446             case 0x19: /* FRINTM */
12447             case 0x38: /* FRINTP */
12448             case 0x39: /* FRINTZ */
12449             case 0x58: /* FRINTA */
12450             case 0x79: /* FRINTI */
12451                 gen_helper_advsimd_rinth(tcg_res, tcg_op, tcg_fpstatus);
12452                 break;
12453             case 0x59: /* FRINTX */
12454                 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, tcg_fpstatus);
12455                 break;
12456             case 0x2f: /* FABS */
12457                 tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
12458                 break;
12459             case 0x6f: /* FNEG */
12460                 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
12461                 break;
12462             case 0x7d: /* FRSQRTE */
12463                 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
12464                 break;
12465             case 0x7f: /* FSQRT */
12466                 gen_helper_sqrt_f16(tcg_res, tcg_op, tcg_fpstatus);
12467                 break;
12468             default:
12469                 g_assert_not_reached();
12470             }
12471
12472             write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
12473
12474             tcg_temp_free_i32(tcg_res);
12475             tcg_temp_free_i32(tcg_op);
12476         }
12477
12478         clear_vec_high(s, is_q, rd);
12479     }
12480
12481     if (tcg_rmode) {
12482         gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
12483         tcg_temp_free_i32(tcg_rmode);
12484     }
12485
12486     if (tcg_fpstatus) {
12487         tcg_temp_free_ptr(tcg_fpstatus);
12488     }
12489 }
12490
12491 /* AdvSIMD scalar x indexed element
12492  *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
12493  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
12494  * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
12495  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
12496  * AdvSIMD vector x indexed element
12497  *   31  30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
12498  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
12499  * | 0 | Q | U | 0 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
12500  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
12501  */
12502 static void disas_simd_indexed(DisasContext *s, uint32_t insn)
12503 {
12504     /* This encoding has two kinds of instruction:
12505      *  normal, where we perform elt x idxelt => elt for each
12506      *     element in the vector
12507      *  long, where we perform elt x idxelt and generate a result of
12508      *     double the width of the input element
12509      * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
12510      */
12511     bool is_scalar = extract32(insn, 28, 1);
12512     bool is_q = extract32(insn, 30, 1);
12513     bool u = extract32(insn, 29, 1);
12514     int size = extract32(insn, 22, 2);
12515     int l = extract32(insn, 21, 1);
12516     int m = extract32(insn, 20, 1);
12517     /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
12518     int rm = extract32(insn, 16, 4);
12519     int opcode = extract32(insn, 12, 4);
12520     int h = extract32(insn, 11, 1);
12521     int rn = extract32(insn, 5, 5);
12522     int rd = extract32(insn, 0, 5);
12523     bool is_long = false;
12524     int is_fp = 0;
12525     bool is_fp16 = false;
12526     int index;
12527     TCGv_ptr fpst;
12528
12529     switch (16 * u + opcode) {
12530     case 0x08: /* MUL */
12531     case 0x10: /* MLA */
12532     case 0x14: /* MLS */
12533         if (is_scalar) {
12534             unallocated_encoding(s);
12535             return;
12536         }
12537         break;
12538     case 0x02: /* SMLAL, SMLAL2 */
12539     case 0x12: /* UMLAL, UMLAL2 */
12540     case 0x06: /* SMLSL, SMLSL2 */
12541     case 0x16: /* UMLSL, UMLSL2 */
12542     case 0x0a: /* SMULL, SMULL2 */
12543     case 0x1a: /* UMULL, UMULL2 */
12544         if (is_scalar) {
12545             unallocated_encoding(s);
12546             return;
12547         }
12548         is_long = true;
12549         break;
12550     case 0x03: /* SQDMLAL, SQDMLAL2 */
12551     case 0x07: /* SQDMLSL, SQDMLSL2 */
12552     case 0x0b: /* SQDMULL, SQDMULL2 */
12553         is_long = true;
12554         break;
12555     case 0x0c: /* SQDMULH */
12556     case 0x0d: /* SQRDMULH */
12557         break;
12558     case 0x01: /* FMLA */
12559     case 0x05: /* FMLS */
12560     case 0x09: /* FMUL */
12561     case 0x19: /* FMULX */
12562         is_fp = 1;
12563         break;
12564     case 0x1d: /* SQRDMLAH */
12565     case 0x1f: /* SQRDMLSH */
12566         if (!arm_dc_feature(s, ARM_FEATURE_V8_RDM)) {
12567             unallocated_encoding(s);
12568             return;
12569         }
12570         break;
12571     case 0x11: /* FCMLA #0 */
12572     case 0x13: /* FCMLA #90 */
12573     case 0x15: /* FCMLA #180 */
12574     case 0x17: /* FCMLA #270 */
12575         if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA)) {
12576             unallocated_encoding(s);
12577             return;
12578         }
12579         is_fp = 2;
12580         break;
12581     default:
12582         unallocated_encoding(s);
12583         return;
12584     }
12585
12586     switch (is_fp) {
12587     case 1: /* normal fp */
12588         /* convert insn encoded size to TCGMemOp size */
12589         switch (size) {
12590         case 0: /* half-precision */
12591             size = MO_16;
12592             is_fp16 = true;
12593             break;
12594         case MO_32: /* single precision */
12595         case MO_64: /* double precision */
12596             break;
12597         default:
12598             unallocated_encoding(s);
12599             return;
12600         }
12601         break;
12602
12603     case 2: /* complex fp */
12604         /* Each indexable element is a complex pair.  */
12605         size <<= 1;
12606         switch (size) {
12607         case MO_32:
12608             if (h && !is_q) {
12609                 unallocated_encoding(s);
12610                 return;
12611             }
12612             is_fp16 = true;
12613             break;
12614         case MO_64:
12615             break;
12616         default:
12617             unallocated_encoding(s);
12618             return;
12619         }
12620         break;
12621
12622     default: /* integer */
12623         switch (size) {
12624         case MO_8:
12625         case MO_64:
12626             unallocated_encoding(s);
12627             return;
12628         }
12629         break;
12630     }
12631     if (is_fp16 && !arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
12632         unallocated_encoding(s);
12633         return;
12634     }
12635
12636     /* Given TCGMemOp size, adjust register and indexing.  */
12637     switch (size) {
12638     case MO_16:
12639         index = h << 2 | l << 1 | m;
12640         break;
12641     case MO_32:
12642         index = h << 1 | l;
12643         rm |= m << 4;
12644         break;
12645     case MO_64:
12646         if (l || !is_q) {
12647             unallocated_encoding(s);
12648             return;
12649         }
12650         index = h;
12651         rm |= m << 4;
12652         break;
12653     default:
12654         g_assert_not_reached();
12655     }
12656
12657     if (!fp_access_check(s)) {
12658         return;
12659     }
12660
12661     if (is_fp) {
12662         fpst = get_fpstatus_ptr(is_fp16);
12663     } else {
12664         fpst = NULL;
12665     }
12666
12667     switch (16 * u + opcode) {
12668     case 0x11: /* FCMLA #0 */
12669     case 0x13: /* FCMLA #90 */
12670     case 0x15: /* FCMLA #180 */
12671     case 0x17: /* FCMLA #270 */
12672         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
12673                            vec_full_reg_offset(s, rn),
12674                            vec_reg_offset(s, rm, index, size), fpst,
12675                            is_q ? 16 : 8, vec_full_reg_size(s),
12676                            extract32(insn, 13, 2), /* rot */
12677                            size == MO_64
12678                            ? gen_helper_gvec_fcmlas_idx
12679                            : gen_helper_gvec_fcmlah_idx);
12680         tcg_temp_free_ptr(fpst);
12681         return;
12682     }
12683
12684     if (size == 3) {
12685         TCGv_i64 tcg_idx = tcg_temp_new_i64();
12686         int pass;
12687
12688         assert(is_fp && is_q && !is_long);
12689
12690         read_vec_element(s, tcg_idx, rm, index, MO_64);
12691
12692         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
12693             TCGv_i64 tcg_op = tcg_temp_new_i64();
12694             TCGv_i64 tcg_res = tcg_temp_new_i64();
12695
12696             read_vec_element(s, tcg_op, rn, pass, MO_64);
12697
12698             switch (16 * u + opcode) {
12699             case 0x05: /* FMLS */
12700                 /* As usual for ARM, separate negation for fused multiply-add */
12701                 gen_helper_vfp_negd(tcg_op, tcg_op);
12702                 /* fall through */
12703             case 0x01: /* FMLA */
12704                 read_vec_element(s, tcg_res, rd, pass, MO_64);
12705                 gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
12706                 break;
12707             case 0x09: /* FMUL */
12708                 gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
12709                 break;
12710             case 0x19: /* FMULX */
12711                 gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
12712                 break;
12713             default:
12714                 g_assert_not_reached();
12715             }
12716
12717             write_vec_element(s, tcg_res, rd, pass, MO_64);
12718             tcg_temp_free_i64(tcg_op);
12719             tcg_temp_free_i64(tcg_res);
12720         }
12721
12722         tcg_temp_free_i64(tcg_idx);
12723         clear_vec_high(s, !is_scalar, rd);
12724     } else if (!is_long) {
12725         /* 32 bit floating point, or 16 or 32 bit integer.
12726          * For the 16 bit scalar case we use the usual Neon helpers and
12727          * rely on the fact that 0 op 0 == 0 with no side effects.
12728          */
12729         TCGv_i32 tcg_idx = tcg_temp_new_i32();
12730         int pass, maxpasses;
12731
12732         if (is_scalar) {
12733             maxpasses = 1;
12734         } else {
12735             maxpasses = is_q ? 4 : 2;
12736         }
12737
12738         read_vec_element_i32(s, tcg_idx, rm, index, size);
12739
12740         if (size == 1 && !is_scalar) {
12741             /* The simplest way to handle the 16x16 indexed ops is to duplicate
12742              * the index into both halves of the 32 bit tcg_idx and then use
12743              * the usual Neon helpers.
12744              */
12745             tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
12746         }
12747
12748         for (pass = 0; pass < maxpasses; pass++) {
12749             TCGv_i32 tcg_op = tcg_temp_new_i32();
12750             TCGv_i32 tcg_res = tcg_temp_new_i32();
12751
12752             read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
12753
12754             switch (16 * u + opcode) {
12755             case 0x08: /* MUL */
12756             case 0x10: /* MLA */
12757             case 0x14: /* MLS */
12758             {
12759                 static NeonGenTwoOpFn * const fns[2][2] = {
12760                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
12761                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
12762                 };
12763                 NeonGenTwoOpFn *genfn;
12764                 bool is_sub = opcode == 0x4;
12765
12766                 if (size == 1) {
12767                     gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
12768                 } else {
12769                     tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
12770                 }
12771                 if (opcode == 0x8) {
12772                     break;
12773                 }
12774                 read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
12775                 genfn = fns[size - 1][is_sub];
12776                 genfn(tcg_res, tcg_op, tcg_res);
12777                 break;
12778             }
12779             case 0x05: /* FMLS */
12780             case 0x01: /* FMLA */
12781                 read_vec_element_i32(s, tcg_res, rd, pass,
12782                                      is_scalar ? size : MO_32);
12783                 switch (size) {
12784                 case 1:
12785                     if (opcode == 0x5) {
12786                         /* As usual for ARM, separate negation for fused
12787                          * multiply-add */
12788                         tcg_gen_xori_i32(tcg_op, tcg_op, 0x80008000);
12789                     }
12790                     if (is_scalar) {
12791                         gen_helper_advsimd_muladdh(tcg_res, tcg_op, tcg_idx,
12792                                                    tcg_res, fpst);
12793                     } else {
12794                         gen_helper_advsimd_muladd2h(tcg_res, tcg_op, tcg_idx,
12795                                                     tcg_res, fpst);
12796                     }
12797                     break;
12798                 case 2:
12799                     if (opcode == 0x5) {
12800                         /* As usual for ARM, separate negation for
12801                          * fused multiply-add */
12802                         tcg_gen_xori_i32(tcg_op, tcg_op, 0x80000000);
12803                     }
12804                     gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx,
12805                                            tcg_res, fpst);
12806                     break;
12807                 default:
12808                     g_assert_not_reached();
12809                 }
12810                 break;
12811             case 0x09: /* FMUL */
12812                 switch (size) {
12813                 case 1:
12814                     if (is_scalar) {
12815                         gen_helper_advsimd_mulh(tcg_res, tcg_op,
12816                                                 tcg_idx, fpst);
12817                     } else {
12818                         gen_helper_advsimd_mul2h(tcg_res, tcg_op,
12819                                                  tcg_idx, fpst);
12820                     }
12821                     break;
12822                 case 2:
12823                     gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
12824                     break;
12825                 default:
12826                     g_assert_not_reached();
12827                 }
12828                 break;
12829             case 0x19: /* FMULX */
12830                 switch (size) {
12831                 case 1:
12832                     if (is_scalar) {
12833                         gen_helper_advsimd_mulxh(tcg_res, tcg_op,
12834                                                  tcg_idx, fpst);
12835                     } else {
12836                         gen_helper_advsimd_mulx2h(tcg_res, tcg_op,
12837                                                   tcg_idx, fpst);
12838                     }
12839                     break;
12840                 case 2:
12841                     gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
12842                     break;
12843                 default:
12844                     g_assert_not_reached();
12845                 }
12846                 break;
12847             case 0x0c: /* SQDMULH */
12848                 if (size == 1) {
12849                     gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
12850                                                tcg_op, tcg_idx);
12851                 } else {
12852                     gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
12853                                                tcg_op, tcg_idx);
12854                 }
12855                 break;
12856             case 0x0d: /* SQRDMULH */
12857                 if (size == 1) {
12858                     gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
12859                                                 tcg_op, tcg_idx);
12860                 } else {
12861                     gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
12862                                                 tcg_op, tcg_idx);
12863                 }
12864                 break;
12865             case 0x1d: /* SQRDMLAH */
12866                 read_vec_element_i32(s, tcg_res, rd, pass,
12867                                      is_scalar ? size : MO_32);
12868                 if (size == 1) {
12869                     gen_helper_neon_qrdmlah_s16(tcg_res, cpu_env,
12870                                                 tcg_op, tcg_idx, tcg_res);
12871                 } else {
12872                     gen_helper_neon_qrdmlah_s32(tcg_res, cpu_env,
12873                                                 tcg_op, tcg_idx, tcg_res);
12874                 }
12875                 break;
12876             case 0x1f: /* SQRDMLSH */
12877                 read_vec_element_i32(s, tcg_res, rd, pass,
12878                                      is_scalar ? size : MO_32);
12879                 if (size == 1) {
12880                     gen_helper_neon_qrdmlsh_s16(tcg_res, cpu_env,
12881                                                 tcg_op, tcg_idx, tcg_res);
12882                 } else {
12883                     gen_helper_neon_qrdmlsh_s32(tcg_res, cpu_env,
12884                                                 tcg_op, tcg_idx, tcg_res);
12885                 }
12886                 break;
12887             default:
12888                 g_assert_not_reached();
12889             }
12890
12891             if (is_scalar) {
12892                 write_fp_sreg(s, rd, tcg_res);
12893             } else {
12894                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
12895             }
12896
12897             tcg_temp_free_i32(tcg_op);
12898             tcg_temp_free_i32(tcg_res);
12899         }
12900
12901         tcg_temp_free_i32(tcg_idx);
12902         clear_vec_high(s, is_q, rd);
12903     } else {
12904         /* long ops: 16x16->32 or 32x32->64 */
12905         TCGv_i64 tcg_res[2];
12906         int pass;
12907         bool satop = extract32(opcode, 0, 1);
12908         TCGMemOp memop = MO_32;
12909
12910         if (satop || !u) {
12911             memop |= MO_SIGN;
12912         }
12913
12914         if (size == 2) {
12915             TCGv_i64 tcg_idx = tcg_temp_new_i64();
12916
12917             read_vec_element(s, tcg_idx, rm, index, memop);
12918
12919             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
12920                 TCGv_i64 tcg_op = tcg_temp_new_i64();
12921                 TCGv_i64 tcg_passres;
12922                 int passelt;
12923
12924                 if (is_scalar) {
12925                     passelt = 0;
12926                 } else {
12927                     passelt = pass + (is_q * 2);
12928                 }
12929
12930                 read_vec_element(s, tcg_op, rn, passelt, memop);
12931
12932                 tcg_res[pass] = tcg_temp_new_i64();
12933
12934                 if (opcode == 0xa || opcode == 0xb) {
12935                     /* Non-accumulating ops */
12936                     tcg_passres = tcg_res[pass];
12937                 } else {
12938                     tcg_passres = tcg_temp_new_i64();
12939                 }
12940
12941                 tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
12942                 tcg_temp_free_i64(tcg_op);
12943
12944                 if (satop) {
12945                     /* saturating, doubling */
12946                     gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
12947                                                       tcg_passres, tcg_passres);
12948                 }
12949
12950                 if (opcode == 0xa || opcode == 0xb) {
12951                     continue;
12952                 }
12953
12954                 /* Accumulating op: handle accumulate step */
12955                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
12956
12957                 switch (opcode) {
12958                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
12959                     tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
12960                     break;
12961                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
12962                     tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
12963                     break;
12964                 case 0x7: /* SQDMLSL, SQDMLSL2 */
12965                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
12966                     /* fall through */
12967                 case 0x3: /* SQDMLAL, SQDMLAL2 */
12968                     gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
12969                                                       tcg_res[pass],
12970                                                       tcg_passres);
12971                     break;
12972                 default:
12973                     g_assert_not_reached();
12974                 }
12975                 tcg_temp_free_i64(tcg_passres);
12976             }
12977             tcg_temp_free_i64(tcg_idx);
12978
12979             clear_vec_high(s, !is_scalar, rd);
12980         } else {
12981             TCGv_i32 tcg_idx = tcg_temp_new_i32();
12982
12983             assert(size == 1);
12984             read_vec_element_i32(s, tcg_idx, rm, index, size);
12985
12986             if (!is_scalar) {
12987                 /* The simplest way to handle the 16x16 indexed ops is to
12988                  * duplicate the index into both halves of the 32 bit tcg_idx
12989                  * and then use the usual Neon helpers.
12990                  */
12991                 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
12992             }
12993
12994             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
12995                 TCGv_i32 tcg_op = tcg_temp_new_i32();
12996                 TCGv_i64 tcg_passres;
12997
12998                 if (is_scalar) {
12999                     read_vec_element_i32(s, tcg_op, rn, pass, size);
13000                 } else {
13001                     read_vec_element_i32(s, tcg_op, rn,
13002                                          pass + (is_q * 2), MO_32);
13003                 }
13004
13005                 tcg_res[pass] = tcg_temp_new_i64();
13006
13007                 if (opcode == 0xa || opcode == 0xb) {
13008                     /* Non-accumulating ops */
13009                     tcg_passres = tcg_res[pass];
13010                 } else {
13011                     tcg_passres = tcg_temp_new_i64();
13012                 }
13013
13014                 if (memop & MO_SIGN) {
13015                     gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
13016                 } else {
13017                     gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
13018                 }
13019                 if (satop) {
13020                     gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
13021                                                       tcg_passres, tcg_passres);
13022                 }
13023                 tcg_temp_free_i32(tcg_op);
13024
13025                 if (opcode == 0xa || opcode == 0xb) {
13026                     continue;
13027                 }
13028
13029                 /* Accumulating op: handle accumulate step */
13030                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13031
13032                 switch (opcode) {
13033                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
13034                     gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
13035                                              tcg_passres);
13036                     break;
13037                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
13038                     gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
13039                                              tcg_passres);
13040                     break;
13041                 case 0x7: /* SQDMLSL, SQDMLSL2 */
13042                     gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
13043                     /* fall through */
13044                 case 0x3: /* SQDMLAL, SQDMLAL2 */
13045                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
13046                                                       tcg_res[pass],
13047                                                       tcg_passres);
13048                     break;
13049                 default:
13050                     g_assert_not_reached();
13051                 }
13052                 tcg_temp_free_i64(tcg_passres);
13053             }
13054             tcg_temp_free_i32(tcg_idx);
13055
13056             if (is_scalar) {
13057                 tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
13058             }
13059         }
13060
13061         if (is_scalar) {
13062             tcg_res[1] = tcg_const_i64(0);
13063         }
13064
13065         for (pass = 0; pass < 2; pass++) {
13066             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13067             tcg_temp_free_i64(tcg_res[pass]);
13068         }
13069     }
13070
13071     if (fpst) {
13072         tcg_temp_free_ptr(fpst);
13073     }
13074 }
13075
13076 /* Crypto AES
13077  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
13078  * +-----------------+------+-----------+--------+-----+------+------+
13079  * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
13080  * +-----------------+------+-----------+--------+-----+------+------+
13081  */
13082 static void disas_crypto_aes(DisasContext *s, uint32_t insn)
13083 {
13084     int size = extract32(insn, 22, 2);
13085     int opcode = extract32(insn, 12, 5);
13086     int rn = extract32(insn, 5, 5);
13087     int rd = extract32(insn, 0, 5);
13088     int decrypt;
13089     TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
13090     TCGv_i32 tcg_decrypt;
13091     CryptoThreeOpIntFn *genfn;
13092
13093     if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)
13094         || size != 0) {
13095         unallocated_encoding(s);
13096         return;
13097     }
13098
13099     switch (opcode) {
13100     case 0x4: /* AESE */
13101         decrypt = 0;
13102         genfn = gen_helper_crypto_aese;
13103         break;
13104     case 0x6: /* AESMC */
13105         decrypt = 0;
13106         genfn = gen_helper_crypto_aesmc;
13107         break;
13108     case 0x5: /* AESD */
13109         decrypt = 1;
13110         genfn = gen_helper_crypto_aese;
13111         break;
13112     case 0x7: /* AESIMC */
13113         decrypt = 1;
13114         genfn = gen_helper_crypto_aesmc;
13115         break;
13116     default:
13117         unallocated_encoding(s);
13118         return;
13119     }
13120
13121     if (!fp_access_check(s)) {
13122         return;
13123     }
13124
13125     tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13126     tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13127     tcg_decrypt = tcg_const_i32(decrypt);
13128
13129     genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_decrypt);
13130
13131     tcg_temp_free_ptr(tcg_rd_ptr);
13132     tcg_temp_free_ptr(tcg_rn_ptr);
13133     tcg_temp_free_i32(tcg_decrypt);
13134 }
13135
13136 /* Crypto three-reg SHA
13137  *  31             24 23  22  21 20  16  15 14    12 11 10 9    5 4    0
13138  * +-----------------+------+---+------+---+--------+-----+------+------+
13139  * | 0 1 0 1 1 1 1 0 | size | 0 |  Rm  | 0 | opcode | 0 0 |  Rn  |  Rd  |
13140  * +-----------------+------+---+------+---+--------+-----+------+------+
13141  */
13142 static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
13143 {
13144     int size = extract32(insn, 22, 2);
13145     int opcode = extract32(insn, 12, 3);
13146     int rm = extract32(insn, 16, 5);
13147     int rn = extract32(insn, 5, 5);
13148     int rd = extract32(insn, 0, 5);
13149     CryptoThreeOpFn *genfn;
13150     TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
13151     int feature = ARM_FEATURE_V8_SHA256;
13152
13153     if (size != 0) {
13154         unallocated_encoding(s);
13155         return;
13156     }
13157
13158     switch (opcode) {
13159     case 0: /* SHA1C */
13160     case 1: /* SHA1P */
13161     case 2: /* SHA1M */
13162     case 3: /* SHA1SU0 */
13163         genfn = NULL;
13164         feature = ARM_FEATURE_V8_SHA1;
13165         break;
13166     case 4: /* SHA256H */
13167         genfn = gen_helper_crypto_sha256h;
13168         break;
13169     case 5: /* SHA256H2 */
13170         genfn = gen_helper_crypto_sha256h2;
13171         break;
13172     case 6: /* SHA256SU1 */
13173         genfn = gen_helper_crypto_sha256su1;
13174         break;
13175     default:
13176         unallocated_encoding(s);
13177         return;
13178     }
13179
13180     if (!arm_dc_feature(s, feature)) {
13181         unallocated_encoding(s);
13182         return;
13183     }
13184
13185     if (!fp_access_check(s)) {
13186         return;
13187     }
13188
13189     tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13190     tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13191     tcg_rm_ptr = vec_full_reg_ptr(s, rm);
13192
13193     if (genfn) {
13194         genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr);
13195     } else {
13196         TCGv_i32 tcg_opcode = tcg_const_i32(opcode);
13197
13198         gen_helper_crypto_sha1_3reg(tcg_rd_ptr, tcg_rn_ptr,
13199                                     tcg_rm_ptr, tcg_opcode);
13200         tcg_temp_free_i32(tcg_opcode);
13201     }
13202
13203     tcg_temp_free_ptr(tcg_rd_ptr);
13204     tcg_temp_free_ptr(tcg_rn_ptr);
13205     tcg_temp_free_ptr(tcg_rm_ptr);
13206 }
13207
13208 /* Crypto two-reg SHA
13209  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
13210  * +-----------------+------+-----------+--------+-----+------+------+
13211  * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
13212  * +-----------------+------+-----------+--------+-----+------+------+
13213  */
13214 static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
13215 {
13216     int size = extract32(insn, 22, 2);
13217     int opcode = extract32(insn, 12, 5);
13218     int rn = extract32(insn, 5, 5);
13219     int rd = extract32(insn, 0, 5);
13220     CryptoTwoOpFn *genfn;
13221     int feature;
13222     TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
13223
13224     if (size != 0) {
13225         unallocated_encoding(s);
13226         return;
13227     }
13228
13229     switch (opcode) {
13230     case 0: /* SHA1H */
13231         feature = ARM_FEATURE_V8_SHA1;
13232         genfn = gen_helper_crypto_sha1h;
13233         break;
13234     case 1: /* SHA1SU1 */
13235         feature = ARM_FEATURE_V8_SHA1;
13236         genfn = gen_helper_crypto_sha1su1;
13237         break;
13238     case 2: /* SHA256SU0 */
13239         feature = ARM_FEATURE_V8_SHA256;
13240         genfn = gen_helper_crypto_sha256su0;
13241         break;
13242     default:
13243         unallocated_encoding(s);
13244         return;
13245     }
13246
13247     if (!arm_dc_feature(s, feature)) {
13248         unallocated_encoding(s);
13249         return;
13250     }
13251
13252     if (!fp_access_check(s)) {
13253         return;
13254     }
13255
13256     tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13257     tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13258
13259     genfn(tcg_rd_ptr, tcg_rn_ptr);
13260
13261     tcg_temp_free_ptr(tcg_rd_ptr);
13262     tcg_temp_free_ptr(tcg_rn_ptr);
13263 }
13264
13265 /* Crypto three-reg SHA512
13266  *  31                   21 20  16 15  14  13 12  11  10  9    5 4    0
13267  * +-----------------------+------+---+---+-----+--------+------+------+
13268  * | 1 1 0 0 1 1 1 0 0 1 1 |  Rm  | 1 | O | 0 0 | opcode |  Rn  |  Rd  |
13269  * +-----------------------+------+---+---+-----+--------+------+------+
13270  */
13271 static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
13272 {
13273     int opcode = extract32(insn, 10, 2);
13274     int o =  extract32(insn, 14, 1);
13275     int rm = extract32(insn, 16, 5);
13276     int rn = extract32(insn, 5, 5);
13277     int rd = extract32(insn, 0, 5);
13278     int feature;
13279     CryptoThreeOpFn *genfn;
13280
13281     if (o == 0) {
13282         switch (opcode) {
13283         case 0: /* SHA512H */
13284             feature = ARM_FEATURE_V8_SHA512;
13285             genfn = gen_helper_crypto_sha512h;
13286             break;
13287         case 1: /* SHA512H2 */
13288             feature = ARM_FEATURE_V8_SHA512;
13289             genfn = gen_helper_crypto_sha512h2;
13290             break;
13291         case 2: /* SHA512SU1 */
13292             feature = ARM_FEATURE_V8_SHA512;
13293             genfn = gen_helper_crypto_sha512su1;
13294             break;
13295         case 3: /* RAX1 */
13296             feature = ARM_FEATURE_V8_SHA3;
13297             genfn = NULL;
13298             break;
13299         }
13300     } else {
13301         switch (opcode) {
13302         case 0: /* SM3PARTW1 */
13303             feature = ARM_FEATURE_V8_SM3;
13304             genfn = gen_helper_crypto_sm3partw1;
13305             break;
13306         case 1: /* SM3PARTW2 */
13307             feature = ARM_FEATURE_V8_SM3;
13308             genfn = gen_helper_crypto_sm3partw2;
13309             break;
13310         case 2: /* SM4EKEY */
13311             feature = ARM_FEATURE_V8_SM4;
13312             genfn = gen_helper_crypto_sm4ekey;
13313             break;
13314         default:
13315             unallocated_encoding(s);
13316             return;
13317         }
13318     }
13319
13320     if (!arm_dc_feature(s, feature)) {
13321         unallocated_encoding(s);
13322         return;
13323     }
13324
13325     if (!fp_access_check(s)) {
13326         return;
13327     }
13328
13329     if (genfn) {
13330         TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
13331
13332         tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13333         tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13334         tcg_rm_ptr = vec_full_reg_ptr(s, rm);
13335
13336         genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr);
13337
13338         tcg_temp_free_ptr(tcg_rd_ptr);
13339         tcg_temp_free_ptr(tcg_rn_ptr);
13340         tcg_temp_free_ptr(tcg_rm_ptr);
13341     } else {
13342         TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
13343         int pass;
13344
13345         tcg_op1 = tcg_temp_new_i64();
13346         tcg_op2 = tcg_temp_new_i64();
13347         tcg_res[0] = tcg_temp_new_i64();
13348         tcg_res[1] = tcg_temp_new_i64();
13349
13350         for (pass = 0; pass < 2; pass++) {
13351             read_vec_element(s, tcg_op1, rn, pass, MO_64);
13352             read_vec_element(s, tcg_op2, rm, pass, MO_64);
13353
13354             tcg_gen_rotli_i64(tcg_res[pass], tcg_op2, 1);
13355             tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
13356         }
13357         write_vec_element(s, tcg_res[0], rd, 0, MO_64);
13358         write_vec_element(s, tcg_res[1], rd, 1, MO_64);
13359
13360         tcg_temp_free_i64(tcg_op1);
13361         tcg_temp_free_i64(tcg_op2);
13362         tcg_temp_free_i64(tcg_res[0]);
13363         tcg_temp_free_i64(tcg_res[1]);
13364     }
13365 }
13366
13367 /* Crypto two-reg SHA512
13368  *  31                                     12  11  10  9    5 4    0
13369  * +-----------------------------------------+--------+------+------+
13370  * | 1 1 0 0 1 1 1 0 1 1 0 0 0 0 0 0 1 0 0 0 | opcode |  Rn  |  Rd  |
13371  * +-----------------------------------------+--------+------+------+
13372  */
13373 static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn)
13374 {
13375     int opcode = extract32(insn, 10, 2);
13376     int rn = extract32(insn, 5, 5);
13377     int rd = extract32(insn, 0, 5);
13378     TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
13379     int feature;
13380     CryptoTwoOpFn *genfn;
13381
13382     switch (opcode) {
13383     case 0: /* SHA512SU0 */
13384         feature = ARM_FEATURE_V8_SHA512;
13385         genfn = gen_helper_crypto_sha512su0;
13386         break;
13387     case 1: /* SM4E */
13388         feature = ARM_FEATURE_V8_SM4;
13389         genfn = gen_helper_crypto_sm4e;
13390         break;
13391     default:
13392         unallocated_encoding(s);
13393         return;
13394     }
13395
13396     if (!arm_dc_feature(s, feature)) {
13397         unallocated_encoding(s);
13398         return;
13399     }
13400
13401     if (!fp_access_check(s)) {
13402         return;
13403     }
13404
13405     tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13406     tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13407
13408     genfn(tcg_rd_ptr, tcg_rn_ptr);
13409
13410     tcg_temp_free_ptr(tcg_rd_ptr);
13411     tcg_temp_free_ptr(tcg_rn_ptr);
13412 }
13413
13414 /* Crypto four-register
13415  *  31               23 22 21 20  16 15  14  10 9    5 4    0
13416  * +-------------------+-----+------+---+------+------+------+
13417  * | 1 1 0 0 1 1 1 0 0 | Op0 |  Rm  | 0 |  Ra  |  Rn  |  Rd  |
13418  * +-------------------+-----+------+---+------+------+------+
13419  */
13420 static void disas_crypto_four_reg(DisasContext *s, uint32_t insn)
13421 {
13422     int op0 = extract32(insn, 21, 2);
13423     int rm = extract32(insn, 16, 5);
13424     int ra = extract32(insn, 10, 5);
13425     int rn = extract32(insn, 5, 5);
13426     int rd = extract32(insn, 0, 5);
13427     int feature;
13428
13429     switch (op0) {
13430     case 0: /* EOR3 */
13431     case 1: /* BCAX */
13432         feature = ARM_FEATURE_V8_SHA3;
13433         break;
13434     case 2: /* SM3SS1 */
13435         feature = ARM_FEATURE_V8_SM3;
13436         break;
13437     default:
13438         unallocated_encoding(s);
13439         return;
13440     }
13441
13442     if (!arm_dc_feature(s, feature)) {
13443         unallocated_encoding(s);
13444         return;
13445     }
13446
13447     if (!fp_access_check(s)) {
13448         return;
13449     }
13450
13451     if (op0 < 2) {
13452         TCGv_i64 tcg_op1, tcg_op2, tcg_op3, tcg_res[2];
13453         int pass;
13454
13455         tcg_op1 = tcg_temp_new_i64();
13456         tcg_op2 = tcg_temp_new_i64();
13457         tcg_op3 = tcg_temp_new_i64();
13458         tcg_res[0] = tcg_temp_new_i64();
13459         tcg_res[1] = tcg_temp_new_i64();
13460
13461         for (pass = 0; pass < 2; pass++) {
13462             read_vec_element(s, tcg_op1, rn, pass, MO_64);
13463             read_vec_element(s, tcg_op2, rm, pass, MO_64);
13464             read_vec_element(s, tcg_op3, ra, pass, MO_64);
13465
13466             if (op0 == 0) {
13467                 /* EOR3 */
13468                 tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op3);
13469             } else {
13470                 /* BCAX */
13471                 tcg_gen_andc_i64(tcg_res[pass], tcg_op2, tcg_op3);
13472             }
13473             tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
13474         }
13475         write_vec_element(s, tcg_res[0], rd, 0, MO_64);
13476         write_vec_element(s, tcg_res[1], rd, 1, MO_64);
13477
13478         tcg_temp_free_i64(tcg_op1);
13479         tcg_temp_free_i64(tcg_op2);
13480         tcg_temp_free_i64(tcg_op3);
13481         tcg_temp_free_i64(tcg_res[0]);
13482         tcg_temp_free_i64(tcg_res[1]);
13483     } else {
13484         TCGv_i32 tcg_op1, tcg_op2, tcg_op3, tcg_res, tcg_zero;
13485
13486         tcg_op1 = tcg_temp_new_i32();
13487         tcg_op2 = tcg_temp_new_i32();
13488         tcg_op3 = tcg_temp_new_i32();
13489         tcg_res = tcg_temp_new_i32();
13490         tcg_zero = tcg_const_i32(0);
13491
13492         read_vec_element_i32(s, tcg_op1, rn, 3, MO_32);
13493         read_vec_element_i32(s, tcg_op2, rm, 3, MO_32);
13494         read_vec_element_i32(s, tcg_op3, ra, 3, MO_32);
13495
13496         tcg_gen_rotri_i32(tcg_res, tcg_op1, 20);
13497         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2);
13498         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3);
13499         tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
13500
13501         write_vec_element_i32(s, tcg_zero, rd, 0, MO_32);
13502         write_vec_element_i32(s, tcg_zero, rd, 1, MO_32);
13503         write_vec_element_i32(s, tcg_zero, rd, 2, MO_32);
13504         write_vec_element_i32(s, tcg_res, rd, 3, MO_32);
13505
13506         tcg_temp_free_i32(tcg_op1);
13507         tcg_temp_free_i32(tcg_op2);
13508         tcg_temp_free_i32(tcg_op3);
13509         tcg_temp_free_i32(tcg_res);
13510         tcg_temp_free_i32(tcg_zero);
13511     }
13512 }
13513
13514 /* Crypto XAR
13515  *  31                   21 20  16 15    10 9    5 4    0
13516  * +-----------------------+------+--------+------+------+
13517  * | 1 1 0 0 1 1 1 0 1 0 0 |  Rm  |  imm6  |  Rn  |  Rd  |
13518  * +-----------------------+------+--------+------+------+
13519  */
13520 static void disas_crypto_xar(DisasContext *s, uint32_t insn)
13521 {
13522     int rm = extract32(insn, 16, 5);
13523     int imm6 = extract32(insn, 10, 6);
13524     int rn = extract32(insn, 5, 5);
13525     int rd = extract32(insn, 0, 5);
13526     TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
13527     int pass;
13528
13529     if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA3)) {
13530         unallocated_encoding(s);
13531         return;
13532     }
13533
13534     if (!fp_access_check(s)) {
13535         return;
13536     }
13537
13538     tcg_op1 = tcg_temp_new_i64();
13539     tcg_op2 = tcg_temp_new_i64();
13540     tcg_res[0] = tcg_temp_new_i64();
13541     tcg_res[1] = tcg_temp_new_i64();
13542
13543     for (pass = 0; pass < 2; pass++) {
13544         read_vec_element(s, tcg_op1, rn, pass, MO_64);
13545         read_vec_element(s, tcg_op2, rm, pass, MO_64);
13546
13547         tcg_gen_xor_i64(tcg_res[pass], tcg_op1, tcg_op2);
13548         tcg_gen_rotri_i64(tcg_res[pass], tcg_res[pass], imm6);
13549     }
13550     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
13551     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
13552
13553     tcg_temp_free_i64(tcg_op1);
13554     tcg_temp_free_i64(tcg_op2);
13555     tcg_temp_free_i64(tcg_res[0]);
13556     tcg_temp_free_i64(tcg_res[1]);
13557 }
13558
13559 /* Crypto three-reg imm2
13560  *  31                   21 20  16 15  14 13 12  11  10  9    5 4    0
13561  * +-----------------------+------+-----+------+--------+------+------+
13562  * | 1 1 0 0 1 1 1 0 0 1 0 |  Rm  | 1 0 | imm2 | opcode |  Rn  |  Rd  |
13563  * +-----------------------+------+-----+------+--------+------+------+
13564  */
13565 static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn)
13566 {
13567     int opcode = extract32(insn, 10, 2);
13568     int imm2 = extract32(insn, 12, 2);
13569     int rm = extract32(insn, 16, 5);
13570     int rn = extract32(insn, 5, 5);
13571     int rd = extract32(insn, 0, 5);
13572     TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
13573     TCGv_i32 tcg_imm2, tcg_opcode;
13574
13575     if (!arm_dc_feature(s, ARM_FEATURE_V8_SM3)) {
13576         unallocated_encoding(s);
13577         return;
13578     }
13579
13580     if (!fp_access_check(s)) {
13581         return;
13582     }
13583
13584     tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13585     tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13586     tcg_rm_ptr = vec_full_reg_ptr(s, rm);
13587     tcg_imm2   = tcg_const_i32(imm2);
13588     tcg_opcode = tcg_const_i32(opcode);
13589
13590     gen_helper_crypto_sm3tt(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr, tcg_imm2,
13591                             tcg_opcode);
13592
13593     tcg_temp_free_ptr(tcg_rd_ptr);
13594     tcg_temp_free_ptr(tcg_rn_ptr);
13595     tcg_temp_free_ptr(tcg_rm_ptr);
13596     tcg_temp_free_i32(tcg_imm2);
13597     tcg_temp_free_i32(tcg_opcode);
13598 }
13599
13600 /* C3.6 Data processing - SIMD, inc Crypto
13601  *
13602  * As the decode gets a little complex we are using a table based
13603  * approach for this part of the decode.
13604  */
13605 static const AArch64DecodeTable data_proc_simd[] = {
13606     /* pattern  ,  mask     ,  fn                        */
13607     { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
13608     { 0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra },
13609     { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
13610     { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
13611     { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
13612     { 0x0e000400, 0x9fe08400, disas_simd_copy },
13613     { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
13614     /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
13615     { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
13616     { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
13617     { 0x0e000000, 0xbf208c00, disas_simd_tb },
13618     { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
13619     { 0x2e000000, 0xbf208400, disas_simd_ext },
13620     { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
13621     { 0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra },
13622     { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
13623     { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
13624     { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
13625     { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
13626     { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
13627     { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
13628     { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
13629     { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
13630     { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
13631     { 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 },
13632     { 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 },
13633     { 0xce000000, 0xff808000, disas_crypto_four_reg },
13634     { 0xce800000, 0xffe00000, disas_crypto_xar },
13635     { 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2 },
13636     { 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 },
13637     { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 },
13638     { 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16 },
13639     { 0x00000000, 0x00000000, NULL }
13640 };
13641
13642 static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
13643 {
13644     /* Note that this is called with all non-FP cases from
13645      * table C3-6 so it must UNDEF for entries not specifically
13646      * allocated to instructions in that table.
13647      */
13648     AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
13649     if (fn) {
13650         fn(s, insn);
13651     } else {
13652         unallocated_encoding(s);
13653     }
13654 }
13655
13656 /* C3.6 Data processing - SIMD and floating point */
13657 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
13658 {
13659     if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
13660         disas_data_proc_fp(s, insn);
13661     } else {
13662         /* SIMD, including crypto */
13663         disas_data_proc_simd(s, insn);
13664     }
13665 }
13666
13667 /* C3.1 A64 instruction index by encoding */
13668 static void disas_a64_insn(CPUARMState *env, DisasContext *s)
13669 {
13670     uint32_t insn;
13671
13672     insn = arm_ldl_code(env, s->pc, s->sctlr_b);
13673     s->insn = insn;
13674     s->pc += 4;
13675
13676     s->fp_access_checked = false;
13677
13678     switch (extract32(insn, 25, 4)) {
13679     case 0x0: case 0x1: case 0x3: /* UNALLOCATED */
13680         unallocated_encoding(s);
13681         break;
13682     case 0x2:
13683         if (!arm_dc_feature(s, ARM_FEATURE_SVE) || !disas_sve(s, insn)) {
13684             unallocated_encoding(s);
13685         }
13686         break;
13687     case 0x8: case 0x9: /* Data processing - immediate */
13688         disas_data_proc_imm(s, insn);
13689         break;
13690     case 0xa: case 0xb: /* Branch, exception generation and system insns */
13691         disas_b_exc_sys(s, insn);
13692         break;
13693     case 0x4:
13694     case 0x6:
13695     case 0xc:
13696     case 0xe:      /* Loads and stores */
13697         disas_ldst(s, insn);
13698         break;
13699     case 0x5:
13700     case 0xd:      /* Data processing - register */
13701         disas_data_proc_reg(s, insn);
13702         break;
13703     case 0x7:
13704     case 0xf:      /* Data processing - SIMD and floating point */
13705         disas_data_proc_simd_fp(s, insn);
13706         break;
13707     default:
13708         assert(FALSE); /* all 15 cases should be handled above */
13709         break;
13710     }
13711
13712     /* if we allocated any temporaries, free them here */
13713     free_tmp_a64(s);
13714 }
13715
13716 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
13717                                           CPUState *cpu)
13718 {
13719     DisasContext *dc = container_of(dcbase, DisasContext, base);
13720     CPUARMState *env = cpu->env_ptr;
13721     ARMCPU *arm_cpu = arm_env_get_cpu(env);
13722     int bound;
13723
13724     dc->pc = dc->base.pc_first;
13725     dc->condjmp = 0;
13726
13727     dc->aarch64 = 1;
13728     /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
13729      * there is no secure EL1, so we route exceptions to EL3.
13730      */
13731     dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
13732                                !arm_el_is_aa64(env, 3);
13733     dc->thumb = 0;
13734     dc->sctlr_b = 0;
13735     dc->be_data = ARM_TBFLAG_BE_DATA(dc->base.tb->flags) ? MO_BE : MO_LE;
13736     dc->condexec_mask = 0;
13737     dc->condexec_cond = 0;
13738     dc->mmu_idx = core_to_arm_mmu_idx(env, ARM_TBFLAG_MMUIDX(dc->base.tb->flags));
13739     dc->tbi0 = ARM_TBFLAG_TBI0(dc->base.tb->flags);
13740     dc->tbi1 = ARM_TBFLAG_TBI1(dc->base.tb->flags);
13741     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
13742 #if !defined(CONFIG_USER_ONLY)
13743     dc->user = (dc->current_el == 0);
13744 #endif
13745     dc->fp_excp_el = ARM_TBFLAG_FPEXC_EL(dc->base.tb->flags);
13746     dc->sve_excp_el = ARM_TBFLAG_SVEEXC_EL(dc->base.tb->flags);
13747     dc->sve_len = (ARM_TBFLAG_ZCR_LEN(dc->base.tb->flags) + 1) * 16;
13748     dc->vec_len = 0;
13749     dc->vec_stride = 0;
13750     dc->cp_regs = arm_cpu->cp_regs;
13751     dc->features = env->features;
13752
13753     /* Single step state. The code-generation logic here is:
13754      *  SS_ACTIVE == 0:
13755      *   generate code with no special handling for single-stepping (except
13756      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
13757      *   this happens anyway because those changes are all system register or
13758      *   PSTATE writes).
13759      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
13760      *   emit code for one insn
13761      *   emit code to clear PSTATE.SS
13762      *   emit code to generate software step exception for completed step
13763      *   end TB (as usual for having generated an exception)
13764      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
13765      *   emit code to generate a software step exception
13766      *   end the TB
13767      */
13768     dc->ss_active = ARM_TBFLAG_SS_ACTIVE(dc->base.tb->flags);
13769     dc->pstate_ss = ARM_TBFLAG_PSTATE_SS(dc->base.tb->flags);
13770     dc->is_ldex = false;
13771     dc->ss_same_el = (arm_debug_target_el(env) == dc->current_el);
13772
13773     /* Bound the number of insns to execute to those left on the page.  */
13774     bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
13775
13776     /* If architectural single step active, limit to 1.  */
13777     if (dc->ss_active) {
13778         bound = 1;
13779     }
13780     dc->base.max_insns = MIN(dc->base.max_insns, bound);
13781
13782     init_tmp_a64_array(dc);
13783 }
13784
13785 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
13786 {
13787     tcg_clear_temp_count();
13788 }
13789
13790 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
13791 {
13792     DisasContext *dc = container_of(dcbase, DisasContext, base);
13793
13794     tcg_gen_insn_start(dc->pc, 0, 0);
13795     dc->insn_start = tcg_last_op();
13796 }
13797
13798 static bool aarch64_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
13799                                         const CPUBreakpoint *bp)
13800 {
13801     DisasContext *dc = container_of(dcbase, DisasContext, base);
13802
13803     if (bp->flags & BP_CPU) {
13804         gen_a64_set_pc_im(dc->pc);
13805         gen_helper_check_breakpoints(cpu_env);
13806         /* End the TB early; it likely won't be executed */
13807         dc->base.is_jmp = DISAS_TOO_MANY;
13808     } else {
13809         gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
13810         /* The address covered by the breakpoint must be
13811            included in [tb->pc, tb->pc + tb->size) in order
13812            to for it to be properly cleared -- thus we
13813            increment the PC here so that the logic setting
13814            tb->size below does the right thing.  */
13815         dc->pc += 4;
13816         dc->base.is_jmp = DISAS_NORETURN;
13817     }
13818
13819     return true;
13820 }
13821
13822 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
13823 {
13824     DisasContext *dc = container_of(dcbase, DisasContext, base);
13825     CPUARMState *env = cpu->env_ptr;
13826
13827     if (dc->ss_active && !dc->pstate_ss) {
13828         /* Singlestep state is Active-pending.
13829          * If we're in this state at the start of a TB then either
13830          *  a) we just took an exception to an EL which is being debugged
13831          *     and this is the first insn in the exception handler
13832          *  b) debug exceptions were masked and we just unmasked them
13833          *     without changing EL (eg by clearing PSTATE.D)
13834          * In either case we're going to take a swstep exception in the
13835          * "did not step an insn" case, and so the syndrome ISV and EX
13836          * bits should be zero.
13837          */
13838         assert(dc->base.num_insns == 1);
13839         gen_exception(EXCP_UDEF, syn_swstep(dc->ss_same_el, 0, 0),
13840                       default_exception_el(dc));
13841         dc->base.is_jmp = DISAS_NORETURN;
13842     } else {
13843         disas_a64_insn(env, dc);
13844     }
13845
13846     dc->base.pc_next = dc->pc;
13847     translator_loop_temp_check(&dc->base);
13848 }
13849
13850 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
13851 {
13852     DisasContext *dc = container_of(dcbase, DisasContext, base);
13853
13854     if (unlikely(dc->base.singlestep_enabled || dc->ss_active)) {
13855         /* Note that this means single stepping WFI doesn't halt the CPU.
13856          * For conditional branch insns this is harmless unreachable code as
13857          * gen_goto_tb() has already handled emitting the debug exception
13858          * (and thus a tb-jump is not possible when singlestepping).
13859          */
13860         switch (dc->base.is_jmp) {
13861         default:
13862             gen_a64_set_pc_im(dc->pc);
13863             /* fall through */
13864         case DISAS_EXIT:
13865         case DISAS_JUMP:
13866             if (dc->base.singlestep_enabled) {
13867                 gen_exception_internal(EXCP_DEBUG);
13868             } else {
13869                 gen_step_complete_exception(dc);
13870             }
13871             break;
13872         case DISAS_NORETURN:
13873             break;
13874         }
13875     } else {
13876         switch (dc->base.is_jmp) {
13877         case DISAS_NEXT:
13878         case DISAS_TOO_MANY:
13879             gen_goto_tb(dc, 1, dc->pc);
13880             break;
13881         default:
13882         case DISAS_UPDATE:
13883             gen_a64_set_pc_im(dc->pc);
13884             /* fall through */
13885         case DISAS_EXIT:
13886             tcg_gen_exit_tb(NULL, 0);
13887             break;
13888         case DISAS_JUMP:
13889             tcg_gen_lookup_and_goto_ptr();
13890             break;
13891         case DISAS_NORETURN:
13892         case DISAS_SWI:
13893             break;
13894         case DISAS_WFE:
13895             gen_a64_set_pc_im(dc->pc);
13896             gen_helper_wfe(cpu_env);
13897             break;
13898         case DISAS_YIELD:
13899             gen_a64_set_pc_im(dc->pc);
13900             gen_helper_yield(cpu_env);
13901             break;
13902         case DISAS_WFI:
13903         {
13904             /* This is a special case because we don't want to just halt the CPU
13905              * if trying to debug across a WFI.
13906              */
13907             TCGv_i32 tmp = tcg_const_i32(4);
13908
13909             gen_a64_set_pc_im(dc->pc);
13910             gen_helper_wfi(cpu_env, tmp);
13911             tcg_temp_free_i32(tmp);
13912             /* The helper doesn't necessarily throw an exception, but we
13913              * must go back to the main loop to check for interrupts anyway.
13914              */
13915             tcg_gen_exit_tb(NULL, 0);
13916             break;
13917         }
13918         }
13919     }
13920
13921     /* Functions above can change dc->pc, so re-align db->pc_next */
13922     dc->base.pc_next = dc->pc;
13923 }
13924
13925 static void aarch64_tr_disas_log(const DisasContextBase *dcbase,
13926                                       CPUState *cpu)
13927 {
13928     DisasContext *dc = container_of(dcbase, DisasContext, base);
13929
13930     qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
13931     log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
13932 }
13933
13934 const TranslatorOps aarch64_translator_ops = {
13935     .init_disas_context = aarch64_tr_init_disas_context,
13936     .tb_start           = aarch64_tr_tb_start,
13937     .insn_start         = aarch64_tr_insn_start,
13938     .breakpoint_check   = aarch64_tr_breakpoint_check,
13939     .translate_insn     = aarch64_tr_translate_insn,
13940     .tb_stop            = aarch64_tr_tb_stop,
13941     .disas_log          = aarch64_tr_disas_log,
13942 };