target/arm/translate.c

   1 /*
   2  *  ARM translation
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *  Copyright (c) 2005-2007 CodeSourcery
   6  *  Copyright (c) 2007 OpenedHand, Ltd.
   7  *
   8  * This library is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2 of the License, or (at your option) any later version.
  12  *
  13  * This library is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  20  */
  21 #include "qemu/osdep.h"
  22
  23 #include "cpu.h"
  24 #include "internals.h"
  25 #include "disas/disas.h"
  26 #include "exec/exec-all.h"
  27 #include "tcg/tcg-op.h"
  28 #include "tcg/tcg-op-gvec.h"
  29 #include "qemu/log.h"
  30 #include "qemu/bitops.h"
  31 #include "arm_ldst.h"
  32 #include "hw/semihosting/semihost.h"
  33
  34 #include "exec/helper-proto.h"
  35 #include "exec/helper-gen.h"
  36
  37 #include "trace-tcg.h"
  38 #include "exec/log.h"
  39
  40
  41 #define ENABLE_ARCH_4T    arm_dc_feature(s, ARM_FEATURE_V4T)
  42 #define ENABLE_ARCH_5     arm_dc_feature(s, ARM_FEATURE_V5)
  43 /* currently all emulated v5 cores are also v5TE, so don't bother */
  44 #define ENABLE_ARCH_5TE   arm_dc_feature(s, ARM_FEATURE_V5)
  45 #define ENABLE_ARCH_5J    dc_isar_feature(aa32_jazelle, s)
  46 #define ENABLE_ARCH_6     arm_dc_feature(s, ARM_FEATURE_V6)
  47 #define ENABLE_ARCH_6K    arm_dc_feature(s, ARM_FEATURE_V6K)
  48 #define ENABLE_ARCH_6T2   arm_dc_feature(s, ARM_FEATURE_THUMB2)
  49 #define ENABLE_ARCH_7     arm_dc_feature(s, ARM_FEATURE_V7)
  50 #define ENABLE_ARCH_8     arm_dc_feature(s, ARM_FEATURE_V8)
  51
  52 #define ARCH(x) do { if (!ENABLE_ARCH_##x) goto illegal_op; } while(0)
  53
  54 #include "translate.h"
  55
  56 #if defined(CONFIG_USER_ONLY)
  57 #define IS_USER(s) 1
  58 #else
  59 #define IS_USER(s) (s->user)
  60 #endif
  61
  62 /* We reuse the same 64-bit temporaries for efficiency.  */
  63 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
  64 static TCGv_i32 cpu_R[16];
  65 TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
  66 TCGv_i64 cpu_exclusive_addr;
  67 TCGv_i64 cpu_exclusive_val;
  68
  69 #include "exec/gen-icount.h"
  70
  71 static const char * const regnames[] =
  72     { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
  73       "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
  74
  75 /* Function prototypes for gen_ functions calling Neon helpers.  */
  76 typedef void NeonGenThreeOpEnvFn(TCGv_i32, TCGv_env, TCGv_i32,
  77                                  TCGv_i32, TCGv_i32);
  78 /* Function prototypes for gen_ functions for fix point conversions */
  79 typedef void VFPGenFixPointFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
  80
  81 /* initialize TCG globals.  */
  82 void arm_translate_init(void)
  83 {
  84     int i;
  85
  86     for (i = 0; i < 16; i++) {
  87         cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
  88                                           offsetof(CPUARMState, regs[i]),
  89                                           regnames[i]);
  90     }
  91     cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF");
  92     cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF");
  93     cpu_VF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, VF), "VF");
  94     cpu_ZF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, ZF), "ZF");
  95
  96     cpu_exclusive_addr = tcg_global_mem_new_i64(cpu_env,
  97         offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
  98     cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
  99         offsetof(CPUARMState, exclusive_val), "exclusive_val");
 100
 101     a64_translate_init();
 102 }
 103
 104 /* Flags for the disas_set_da_iss info argument:
 105  * lower bits hold the Rt register number, higher bits are flags.
 106  */
 107 typedef enum ISSInfo {
 108     ISSNone = 0,
 109     ISSRegMask = 0x1f,
 110     ISSInvalid = (1 << 5),
 111     ISSIsAcqRel = (1 << 6),
 112     ISSIsWrite = (1 << 7),
 113     ISSIs16Bit = (1 << 8),
 114 } ISSInfo;
 115
 116 /* Save the syndrome information for a Data Abort */
 117 static void disas_set_da_iss(DisasContext *s, MemOp memop, ISSInfo issinfo)
 118 {
 119     uint32_t syn;
 120     int sas = memop & MO_SIZE;
 121     bool sse = memop & MO_SIGN;
 122     bool is_acqrel = issinfo & ISSIsAcqRel;
 123     bool is_write = issinfo & ISSIsWrite;
 124     bool is_16bit = issinfo & ISSIs16Bit;
 125     int srt = issinfo & ISSRegMask;
 126
 127     if (issinfo & ISSInvalid) {
 128         /* Some callsites want to conditionally provide ISS info,
 129          * eg "only if this was not a writeback"
 130          */
 131         return;
 132     }
 133
 134     if (srt == 15) {
 135         /* For AArch32, insns where the src/dest is R15 never generate
 136          * ISS information. Catching that here saves checking at all
 137          * the call sites.
 138          */
 139         return;
 140     }
 141
 142     syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
 143                                   0, 0, 0, is_write, 0, is_16bit);
 144     disas_set_insn_syndrome(s, syn);
 145 }
 146
 147 static inline int get_a32_user_mem_index(DisasContext *s)
 148 {
 149     /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
 150      * insns:
 151      *  if PL2, UNPREDICTABLE (we choose to implement as if PL0)
 152      *  otherwise, access as if at PL0.
 153      */
 154     switch (s->mmu_idx) {
 155     case ARMMMUIdx_E2:        /* this one is UNPREDICTABLE */
 156     case ARMMMUIdx_E10_0:
 157     case ARMMMUIdx_E10_1:
 158     case ARMMMUIdx_E10_1_PAN:
 159         return arm_to_core_mmu_idx(ARMMMUIdx_E10_0);
 160     case ARMMMUIdx_SE3:
 161     case ARMMMUIdx_SE10_0:
 162     case ARMMMUIdx_SE10_1:
 163     case ARMMMUIdx_SE10_1_PAN:
 164         return arm_to_core_mmu_idx(ARMMMUIdx_SE10_0);
 165     case ARMMMUIdx_MUser:
 166     case ARMMMUIdx_MPriv:
 167         return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
 168     case ARMMMUIdx_MUserNegPri:
 169     case ARMMMUIdx_MPrivNegPri:
 170         return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
 171     case ARMMMUIdx_MSUser:
 172     case ARMMMUIdx_MSPriv:
 173         return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
 174     case ARMMMUIdx_MSUserNegPri:
 175     case ARMMMUIdx_MSPrivNegPri:
 176         return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
 177     default:
 178         g_assert_not_reached();
 179     }
 180 }
 181
 182 static inline TCGv_i32 load_cpu_offset(int offset)
 183 {
 184     TCGv_i32 tmp = tcg_temp_new_i32();
 185     tcg_gen_ld_i32(tmp, cpu_env, offset);
 186     return tmp;
 187 }
 188
 189 #define load_cpu_field(name) load_cpu_offset(offsetof(CPUARMState, name))
 190
 191 static inline void store_cpu_offset(TCGv_i32 var, int offset)
 192 {
 193     tcg_gen_st_i32(var, cpu_env, offset);
 194     tcg_temp_free_i32(var);
 195 }
 196
 197 #define store_cpu_field(var, name) \
 198     store_cpu_offset(var, offsetof(CPUARMState, name))
 199
 200 /* The architectural value of PC.  */
 201 static uint32_t read_pc(DisasContext *s)
 202 {
 203     return s->pc_curr + (s->thumb ? 4 : 8);
 204 }
 205
 206 /* Set a variable to the value of a CPU register.  */
 207 static void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
 208 {
 209     if (reg == 15) {
 210         tcg_gen_movi_i32(var, read_pc(s));
 211     } else {
 212         tcg_gen_mov_i32(var, cpu_R[reg]);
 213     }
 214 }
 215
 216 /* Create a new temporary and set it to the value of a CPU register.  */
 217 static inline TCGv_i32 load_reg(DisasContext *s, int reg)
 218 {
 219     TCGv_i32 tmp = tcg_temp_new_i32();
 220     load_reg_var(s, tmp, reg);
 221     return tmp;
 222 }
 223
 224 /*
 225  * Create a new temp, REG + OFS, except PC is ALIGN(PC, 4).
 226  * This is used for load/store for which use of PC implies (literal),
 227  * or ADD that implies ADR.
 228  */
 229 static TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
 230 {
 231     TCGv_i32 tmp = tcg_temp_new_i32();
 232
 233     if (reg == 15) {
 234         tcg_gen_movi_i32(tmp, (read_pc(s) & ~3) + ofs);
 235     } else {
 236         tcg_gen_addi_i32(tmp, cpu_R[reg], ofs);
 237     }
 238     return tmp;
 239 }
 240
 241 /* Set a CPU register.  The source must be a temporary and will be
 242    marked as dead.  */
 243 static void store_reg(DisasContext *s, int reg, TCGv_i32 var)
 244 {
 245     if (reg == 15) {
 246         /* In Thumb mode, we must ignore bit 0.
 247          * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
 248          * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
 249          * We choose to ignore [1:0] in ARM mode for all architecture versions.
 250          */
 251         tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
 252         s->base.is_jmp = DISAS_JUMP;
 253     }
 254     tcg_gen_mov_i32(cpu_R[reg], var);
 255     tcg_temp_free_i32(var);
 256 }
 257
 258 /*
 259  * Variant of store_reg which applies v8M stack-limit checks before updating
 260  * SP. If the check fails this will result in an exception being taken.
 261  * We disable the stack checks for CONFIG_USER_ONLY because we have
 262  * no idea what the stack limits should be in that case.
 263  * If stack checking is not being done this just acts like store_reg().
 264  */
 265 static void store_sp_checked(DisasContext *s, TCGv_i32 var)
 266 {
 267 #ifndef CONFIG_USER_ONLY
 268     if (s->v8m_stackcheck) {
 269         gen_helper_v8m_stackcheck(cpu_env, var);
 270     }
 271 #endif
 272     store_reg(s, 13, var);
 273 }
 274
 275 /* Value extensions.  */
 276 #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
 277 #define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
 278 #define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
 279 #define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
 280
 281 #define gen_sxtb16(var) gen_helper_sxtb16(var, var)
 282 #define gen_uxtb16(var) gen_helper_uxtb16(var, var)
 283
 284
 285 static inline void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
 286 {
 287     TCGv_i32 tmp_mask = tcg_const_i32(mask);
 288     gen_helper_cpsr_write(cpu_env, var, tmp_mask);
 289     tcg_temp_free_i32(tmp_mask);
 290 }
 291 /* Set NZCV flags from the high 4 bits of var.  */
 292 #define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
 293
 294 static void gen_exception_internal(int excp)
 295 {
 296     TCGv_i32 tcg_excp = tcg_const_i32(excp);
 297
 298     assert(excp_is_internal(excp));
 299     gen_helper_exception_internal(cpu_env, tcg_excp);
 300     tcg_temp_free_i32(tcg_excp);
 301 }
 302
 303 static void gen_step_complete_exception(DisasContext *s)
 304 {
 305     /* We just completed step of an insn. Move from Active-not-pending
 306      * to Active-pending, and then also take the swstep exception.
 307      * This corresponds to making the (IMPDEF) choice to prioritize
 308      * swstep exceptions over asynchronous exceptions taken to an exception
 309      * level where debug is disabled. This choice has the advantage that
 310      * we do not need to maintain internal state corresponding to the
 311      * ISV/EX syndrome bits between completion of the step and generation
 312      * of the exception, and our syndrome information is always correct.
 313      */
 314     gen_ss_advance(s);
 315     gen_swstep_exception(s, 1, s->is_ldex);
 316     s->base.is_jmp = DISAS_NORETURN;
 317 }
 318
 319 static void gen_singlestep_exception(DisasContext *s)
 320 {
 321     /* Generate the right kind of exception for singlestep, which is
 322      * either the architectural singlestep or EXCP_DEBUG for QEMU's
 323      * gdb singlestepping.
 324      */
 325     if (s->ss_active) {
 326         gen_step_complete_exception(s);
 327     } else {
 328         gen_exception_internal(EXCP_DEBUG);
 329     }
 330 }
 331
 332 static inline bool is_singlestepping(DisasContext *s)
 333 {
 334     /* Return true if we are singlestepping either because of
 335      * architectural singlestep or QEMU gdbstub singlestep. This does
 336      * not include the command line '-singlestep' mode which is rather
 337      * misnamed as it only means "one instruction per TB" and doesn't
 338      * affect the code we generate.
 339      */
 340     return s->base.singlestep_enabled || s->ss_active;
 341 }
 342
 343 static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
 344 {
 345     TCGv_i32 tmp1 = tcg_temp_new_i32();
 346     TCGv_i32 tmp2 = tcg_temp_new_i32();
 347     tcg_gen_ext16s_i32(tmp1, a);
 348     tcg_gen_ext16s_i32(tmp2, b);
 349     tcg_gen_mul_i32(tmp1, tmp1, tmp2);
 350     tcg_temp_free_i32(tmp2);
 351     tcg_gen_sari_i32(a, a, 16);
 352     tcg_gen_sari_i32(b, b, 16);
 353     tcg_gen_mul_i32(b, b, a);
 354     tcg_gen_mov_i32(a, tmp1);
 355     tcg_temp_free_i32(tmp1);
 356 }
 357
 358 /* Byteswap each halfword.  */
 359 static void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
 360 {
 361     TCGv_i32 tmp = tcg_temp_new_i32();
 362     TCGv_i32 mask = tcg_const_i32(0x00ff00ff);
 363     tcg_gen_shri_i32(tmp, var, 8);
 364     tcg_gen_and_i32(tmp, tmp, mask);
 365     tcg_gen_and_i32(var, var, mask);
 366     tcg_gen_shli_i32(var, var, 8);
 367     tcg_gen_or_i32(dest, var, tmp);
 368     tcg_temp_free_i32(mask);
 369     tcg_temp_free_i32(tmp);
 370 }
 371
 372 /* Byteswap low halfword and sign extend.  */
 373 static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
 374 {
 375     tcg_gen_ext16u_i32(var, var);
 376     tcg_gen_bswap16_i32(var, var);
 377     tcg_gen_ext16s_i32(dest, var);
 378 }
 379
 380 /* 32x32->64 multiply.  Marks inputs as dead.  */
 381 static TCGv_i64 gen_mulu_i64_i32(TCGv_i32 a, TCGv_i32 b)
 382 {
 383     TCGv_i32 lo = tcg_temp_new_i32();
 384     TCGv_i32 hi = tcg_temp_new_i32();
 385     TCGv_i64 ret;
 386
 387     tcg_gen_mulu2_i32(lo, hi, a, b);
 388     tcg_temp_free_i32(a);
 389     tcg_temp_free_i32(b);
 390
 391     ret = tcg_temp_new_i64();
 392     tcg_gen_concat_i32_i64(ret, lo, hi);
 393     tcg_temp_free_i32(lo);
 394     tcg_temp_free_i32(hi);
 395
 396     return ret;
 397 }
 398
 399 static TCGv_i64 gen_muls_i64_i32(TCGv_i32 a, TCGv_i32 b)
 400 {
 401     TCGv_i32 lo = tcg_temp_new_i32();
 402     TCGv_i32 hi = tcg_temp_new_i32();
 403     TCGv_i64 ret;
 404
 405     tcg_gen_muls2_i32(lo, hi, a, b);
 406     tcg_temp_free_i32(a);
 407     tcg_temp_free_i32(b);
 408
 409     ret = tcg_temp_new_i64();
 410     tcg_gen_concat_i32_i64(ret, lo, hi);
 411     tcg_temp_free_i32(lo);
 412     tcg_temp_free_i32(hi);
 413
 414     return ret;
 415 }
 416
 417 /* Swap low and high halfwords.  */
 418 static void gen_swap_half(TCGv_i32 var)
 419 {
 420     tcg_gen_rotri_i32(var, var, 16);
 421 }
 422
 423 /* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
 424     tmp = (t0 ^ t1) & 0x8000;
 425     t0 &= ~0x8000;
 426     t1 &= ~0x8000;
 427     t0 = (t0 + t1) ^ tmp;
 428  */
 429
 430 static void gen_add16(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 431 {
 432     TCGv_i32 tmp = tcg_temp_new_i32();
 433     tcg_gen_xor_i32(tmp, t0, t1);
 434     tcg_gen_andi_i32(tmp, tmp, 0x8000);
 435     tcg_gen_andi_i32(t0, t0, ~0x8000);
 436     tcg_gen_andi_i32(t1, t1, ~0x8000);
 437     tcg_gen_add_i32(t0, t0, t1);
 438     tcg_gen_xor_i32(dest, t0, tmp);
 439     tcg_temp_free_i32(tmp);
 440 }
 441
 442 /* Set N and Z flags from var.  */
 443 static inline void gen_logic_CC(TCGv_i32 var)
 444 {
 445     tcg_gen_mov_i32(cpu_NF, var);
 446     tcg_gen_mov_i32(cpu_ZF, var);
 447 }
 448
 449 /* dest = T0 + T1 + CF. */
 450 static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 451 {
 452     tcg_gen_add_i32(dest, t0, t1);
 453     tcg_gen_add_i32(dest, dest, cpu_CF);
 454 }
 455
 456 /* dest = T0 - T1 + CF - 1.  */
 457 static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 458 {
 459     tcg_gen_sub_i32(dest, t0, t1);
 460     tcg_gen_add_i32(dest, dest, cpu_CF);
 461     tcg_gen_subi_i32(dest, dest, 1);
 462 }
 463
 464 /* dest = T0 + T1. Compute C, N, V and Z flags */
 465 static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 466 {
 467     TCGv_i32 tmp = tcg_temp_new_i32();
 468     tcg_gen_movi_i32(tmp, 0);
 469     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
 470     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 471     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 472     tcg_gen_xor_i32(tmp, t0, t1);
 473     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 474     tcg_temp_free_i32(tmp);
 475     tcg_gen_mov_i32(dest, cpu_NF);
 476 }
 477
 478 /* dest = T0 + T1 + CF.  Compute C, N, V and Z flags */
 479 static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 480 {
 481     TCGv_i32 tmp = tcg_temp_new_i32();
 482     if (TCG_TARGET_HAS_add2_i32) {
 483         tcg_gen_movi_i32(tmp, 0);
 484         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
 485         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
 486     } else {
 487         TCGv_i64 q0 = tcg_temp_new_i64();
 488         TCGv_i64 q1 = tcg_temp_new_i64();
 489         tcg_gen_extu_i32_i64(q0, t0);
 490         tcg_gen_extu_i32_i64(q1, t1);
 491         tcg_gen_add_i64(q0, q0, q1);
 492         tcg_gen_extu_i32_i64(q1, cpu_CF);
 493         tcg_gen_add_i64(q0, q0, q1);
 494         tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
 495         tcg_temp_free_i64(q0);
 496         tcg_temp_free_i64(q1);
 497     }
 498     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 499     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 500     tcg_gen_xor_i32(tmp, t0, t1);
 501     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 502     tcg_temp_free_i32(tmp);
 503     tcg_gen_mov_i32(dest, cpu_NF);
 504 }
 505
 506 /* dest = T0 - T1. Compute C, N, V and Z flags */
 507 static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 508 {
 509     TCGv_i32 tmp;
 510     tcg_gen_sub_i32(cpu_NF, t0, t1);
 511     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 512     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
 513     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 514     tmp = tcg_temp_new_i32();
 515     tcg_gen_xor_i32(tmp, t0, t1);
 516     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 517     tcg_temp_free_i32(tmp);
 518     tcg_gen_mov_i32(dest, cpu_NF);
 519 }
 520
 521 /* dest = T0 + ~T1 + CF.  Compute C, N, V and Z flags */
 522 static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 523 {
 524     TCGv_i32 tmp = tcg_temp_new_i32();
 525     tcg_gen_not_i32(tmp, t1);
 526     gen_adc_CC(dest, t0, tmp);
 527     tcg_temp_free_i32(tmp);
 528 }
 529
 530 #define GEN_SHIFT(name)                                               \
 531 static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)       \
 532 {                                                                     \
 533     TCGv_i32 tmp1, tmp2, tmp3;                                        \
 534     tmp1 = tcg_temp_new_i32();                                        \
 535     tcg_gen_andi_i32(tmp1, t1, 0xff);                                 \
 536     tmp2 = tcg_const_i32(0);                                          \
 537     tmp3 = tcg_const_i32(0x1f);                                       \
 538     tcg_gen_movcond_i32(TCG_COND_GTU, tmp2, tmp1, tmp3, tmp2, t0);    \
 539     tcg_temp_free_i32(tmp3);                                          \
 540     tcg_gen_andi_i32(tmp1, tmp1, 0x1f);                               \
 541     tcg_gen_##name##_i32(dest, tmp2, tmp1);                           \
 542     tcg_temp_free_i32(tmp2);                                          \
 543     tcg_temp_free_i32(tmp1);                                          \
 544 }
 545 GEN_SHIFT(shl)
 546 GEN_SHIFT(shr)
 547 #undef GEN_SHIFT
 548
 549 static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 550 {
 551     TCGv_i32 tmp1, tmp2;
 552     tmp1 = tcg_temp_new_i32();
 553     tcg_gen_andi_i32(tmp1, t1, 0xff);
 554     tmp2 = tcg_const_i32(0x1f);
 555     tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
 556     tcg_temp_free_i32(tmp2);
 557     tcg_gen_sar_i32(dest, t0, tmp1);
 558     tcg_temp_free_i32(tmp1);
 559 }
 560
 561 static void shifter_out_im(TCGv_i32 var, int shift)
 562 {
 563     tcg_gen_extract_i32(cpu_CF, var, shift, 1);
 564 }
 565
 566 /* Shift by immediate.  Includes special handling for shift == 0.  */
 567 static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
 568                                     int shift, int flags)
 569 {
 570     switch (shiftop) {
 571     case 0: /* LSL */
 572         if (shift != 0) {
 573             if (flags)
 574                 shifter_out_im(var, 32 - shift);
 575             tcg_gen_shli_i32(var, var, shift);
 576         }
 577         break;
 578     case 1: /* LSR */
 579         if (shift == 0) {
 580             if (flags) {
 581                 tcg_gen_shri_i32(cpu_CF, var, 31);
 582             }
 583             tcg_gen_movi_i32(var, 0);
 584         } else {
 585             if (flags)
 586                 shifter_out_im(var, shift - 1);
 587             tcg_gen_shri_i32(var, var, shift);
 588         }
 589         break;
 590     case 2: /* ASR */
 591         if (shift == 0)
 592             shift = 32;
 593         if (flags)
 594             shifter_out_im(var, shift - 1);
 595         if (shift == 32)
 596           shift = 31;
 597         tcg_gen_sari_i32(var, var, shift);
 598         break;
 599     case 3: /* ROR/RRX */
 600         if (shift != 0) {
 601             if (flags)
 602                 shifter_out_im(var, shift - 1);
 603             tcg_gen_rotri_i32(var, var, shift); break;
 604         } else {
 605             TCGv_i32 tmp = tcg_temp_new_i32();
 606             tcg_gen_shli_i32(tmp, cpu_CF, 31);
 607             if (flags)
 608                 shifter_out_im(var, 0);
 609             tcg_gen_shri_i32(var, var, 1);
 610             tcg_gen_or_i32(var, var, tmp);
 611             tcg_temp_free_i32(tmp);
 612         }
 613     }
 614 };
 615
 616 static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
 617                                      TCGv_i32 shift, int flags)
 618 {
 619     if (flags) {
 620         switch (shiftop) {
 621         case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
 622         case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
 623         case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
 624         case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
 625         }
 626     } else {
 627         switch (shiftop) {
 628         case 0:
 629             gen_shl(var, var, shift);
 630             break;
 631         case 1:
 632             gen_shr(var, var, shift);
 633             break;
 634         case 2:
 635             gen_sar(var, var, shift);
 636             break;
 637         case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
 638                 tcg_gen_rotr_i32(var, var, shift); break;
 639         }
 640     }
 641     tcg_temp_free_i32(shift);
 642 }
 643
 644 /*
 645  * Generate a conditional based on ARM condition code cc.
 646  * This is common between ARM and Aarch64 targets.
 647  */
 648 void arm_test_cc(DisasCompare *cmp, int cc)
 649 {
 650     TCGv_i32 value;
 651     TCGCond cond;
 652     bool global = true;
 653
 654     switch (cc) {
 655     case 0: /* eq: Z */
 656     case 1: /* ne: !Z */
 657         cond = TCG_COND_EQ;
 658         value = cpu_ZF;
 659         break;
 660
 661     case 2: /* cs: C */
 662     case 3: /* cc: !C */
 663         cond = TCG_COND_NE;
 664         value = cpu_CF;
 665         break;
 666
 667     case 4: /* mi: N */
 668     case 5: /* pl: !N */
 669         cond = TCG_COND_LT;
 670         value = cpu_NF;
 671         break;
 672
 673     case 6: /* vs: V */
 674     case 7: /* vc: !V */
 675         cond = TCG_COND_LT;
 676         value = cpu_VF;
 677         break;
 678
 679     case 8: /* hi: C && !Z */
 680     case 9: /* ls: !C || Z -> !(C && !Z) */
 681         cond = TCG_COND_NE;
 682         value = tcg_temp_new_i32();
 683         global = false;
 684         /* CF is 1 for C, so -CF is an all-bits-set mask for C;
 685            ZF is non-zero for !Z; so AND the two subexpressions.  */
 686         tcg_gen_neg_i32(value, cpu_CF);
 687         tcg_gen_and_i32(value, value, cpu_ZF);
 688         break;
 689
 690     case 10: /* ge: N == V -> N ^ V == 0 */
 691     case 11: /* lt: N != V -> N ^ V != 0 */
 692         /* Since we're only interested in the sign bit, == 0 is >= 0.  */
 693         cond = TCG_COND_GE;
 694         value = tcg_temp_new_i32();
 695         global = false;
 696         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
 697         break;
 698
 699     case 12: /* gt: !Z && N == V */
 700     case 13: /* le: Z || N != V */
 701         cond = TCG_COND_NE;
 702         value = tcg_temp_new_i32();
 703         global = false;
 704         /* (N == V) is equal to the sign bit of ~(NF ^ VF).  Propagate
 705          * the sign bit then AND with ZF to yield the result.  */
 706         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
 707         tcg_gen_sari_i32(value, value, 31);
 708         tcg_gen_andc_i32(value, cpu_ZF, value);
 709         break;
 710
 711     case 14: /* always */
 712     case 15: /* always */
 713         /* Use the ALWAYS condition, which will fold early.
 714          * It doesn't matter what we use for the value.  */
 715         cond = TCG_COND_ALWAYS;
 716         value = cpu_ZF;
 717         goto no_invert;
 718
 719     default:
 720         fprintf(stderr, "Bad condition code 0x%x\n", cc);
 721         abort();
 722     }
 723
 724     if (cc & 1) {
 725         cond = tcg_invert_cond(cond);
 726     }
 727
 728  no_invert:
 729     cmp->cond = cond;
 730     cmp->value = value;
 731     cmp->value_global = global;
 732 }
 733
 734 void arm_free_cc(DisasCompare *cmp)
 735 {
 736     if (!cmp->value_global) {
 737         tcg_temp_free_i32(cmp->value);
 738     }
 739 }
 740
 741 void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
 742 {
 743     tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
 744 }
 745
 746 void arm_gen_test_cc(int cc, TCGLabel *label)
 747 {
 748     DisasCompare cmp;
 749     arm_test_cc(&cmp, cc);
 750     arm_jump_cc(&cmp, label);
 751     arm_free_cc(&cmp);
 752 }
 753
 754 static inline void gen_set_condexec(DisasContext *s)
 755 {
 756     if (s->condexec_mask) {
 757         uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
 758         TCGv_i32 tmp = tcg_temp_new_i32();
 759         tcg_gen_movi_i32(tmp, val);
 760         store_cpu_field(tmp, condexec_bits);
 761     }
 762 }
 763
 764 static inline void gen_set_pc_im(DisasContext *s, target_ulong val)
 765 {
 766     tcg_gen_movi_i32(cpu_R[15], val);
 767 }
 768
 769 /* Set PC and Thumb state from var.  var is marked as dead.  */
 770 static inline void gen_bx(DisasContext *s, TCGv_i32 var)
 771 {
 772     s->base.is_jmp = DISAS_JUMP;
 773     tcg_gen_andi_i32(cpu_R[15], var, ~1);
 774     tcg_gen_andi_i32(var, var, 1);
 775     store_cpu_field(var, thumb);
 776 }
 777
 778 /*
 779  * Set PC and Thumb state from var. var is marked as dead.
 780  * For M-profile CPUs, include logic to detect exception-return
 781  * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
 782  * and BX reg, and no others, and happens only for code in Handler mode.
 783  * The Security Extension also requires us to check for the FNC_RETURN
 784  * which signals a function return from non-secure state; this can happen
 785  * in both Handler and Thread mode.
 786  * To avoid having to do multiple comparisons in inline generated code,
 787  * we make the check we do here loose, so it will match for EXC_RETURN
 788  * in Thread mode. For system emulation do_v7m_exception_exit() checks
 789  * for these spurious cases and returns without doing anything (giving
 790  * the same behaviour as for a branch to a non-magic address).
 791  *
 792  * In linux-user mode it is unclear what the right behaviour for an
 793  * attempted FNC_RETURN should be, because in real hardware this will go
 794  * directly to Secure code (ie not the Linux kernel) which will then treat
 795  * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN
 796  * attempt behave the way it would on a CPU without the security extension,
 797  * which is to say "like a normal branch". That means we can simply treat
 798  * all branches as normal with no magic address behaviour.
 799  */
 800 static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
 801 {
 802     /* Generate the same code here as for a simple bx, but flag via
 803      * s->base.is_jmp that we need to do the rest of the work later.
 804      */
 805     gen_bx(s, var);
 806 #ifndef CONFIG_USER_ONLY
 807     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
 808         (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
 809         s->base.is_jmp = DISAS_BX_EXCRET;
 810     }
 811 #endif
 812 }
 813
 814 static inline void gen_bx_excret_final_code(DisasContext *s)
 815 {
 816     /* Generate the code to finish possible exception return and end the TB */
 817     TCGLabel *excret_label = gen_new_label();
 818     uint32_t min_magic;
 819
 820     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
 821         /* Covers FNC_RETURN and EXC_RETURN magic */
 822         min_magic = FNC_RETURN_MIN_MAGIC;
 823     } else {
 824         /* EXC_RETURN magic only */
 825         min_magic = EXC_RETURN_MIN_MAGIC;
 826     }
 827
 828     /* Is the new PC value in the magic range indicating exception return? */
 829     tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label);
 830     /* No: end the TB as we would for a DISAS_JMP */
 831     if (is_singlestepping(s)) {
 832         gen_singlestep_exception(s);
 833     } else {
 834         tcg_gen_exit_tb(NULL, 0);
 835     }
 836     gen_set_label(excret_label);
 837     /* Yes: this is an exception return.
 838      * At this point in runtime env->regs[15] and env->thumb will hold
 839      * the exception-return magic number, which do_v7m_exception_exit()
 840      * will read. Nothing else will be able to see those values because
 841      * the cpu-exec main loop guarantees that we will always go straight
 842      * from raising the exception to the exception-handling code.
 843      *
 844      * gen_ss_advance(s) does nothing on M profile currently but
 845      * calling it is conceptually the right thing as we have executed
 846      * this instruction (compare SWI, HVC, SMC handling).
 847      */
 848     gen_ss_advance(s);
 849     gen_exception_internal(EXCP_EXCEPTION_EXIT);
 850 }
 851
 852 static inline void gen_bxns(DisasContext *s, int rm)
 853 {
 854     TCGv_i32 var = load_reg(s, rm);
 855
 856     /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
 857      * we need to sync state before calling it, but:
 858      *  - we don't need to do gen_set_pc_im() because the bxns helper will
 859      *    always set the PC itself
 860      *  - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
 861      *    unless it's outside an IT block or the last insn in an IT block,
 862      *    so we know that condexec == 0 (already set at the top of the TB)
 863      *    is correct in the non-UNPREDICTABLE cases, and we can choose
 864      *    "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
 865      */
 866     gen_helper_v7m_bxns(cpu_env, var);
 867     tcg_temp_free_i32(var);
 868     s->base.is_jmp = DISAS_EXIT;
 869 }
 870
 871 static inline void gen_blxns(DisasContext *s, int rm)
 872 {
 873     TCGv_i32 var = load_reg(s, rm);
 874
 875     /* We don't need to sync condexec state, for the same reason as bxns.
 876      * We do however need to set the PC, because the blxns helper reads it.
 877      * The blxns helper may throw an exception.
 878      */
 879     gen_set_pc_im(s, s->base.pc_next);
 880     gen_helper_v7m_blxns(cpu_env, var);
 881     tcg_temp_free_i32(var);
 882     s->base.is_jmp = DISAS_EXIT;
 883 }
 884
 885 /* Variant of store_reg which uses branch&exchange logic when storing
 886    to r15 in ARM architecture v7 and above. The source must be a temporary
 887    and will be marked as dead. */
 888 static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
 889 {
 890     if (reg == 15 && ENABLE_ARCH_7) {
 891         gen_bx(s, var);
 892     } else {
 893         store_reg(s, reg, var);
 894     }
 895 }
 896
 897 /* Variant of store_reg which uses branch&exchange logic when storing
 898  * to r15 in ARM architecture v5T and above. This is used for storing
 899  * the results of a LDR/LDM/POP into r15, and corresponds to the cases
 900  * in the ARM ARM which use the LoadWritePC() pseudocode function. */
 901 static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
 902 {
 903     if (reg == 15 && ENABLE_ARCH_5) {
 904         gen_bx_excret(s, var);
 905     } else {
 906         store_reg(s, reg, var);
 907     }
 908 }
 909
 910 #ifdef CONFIG_USER_ONLY
 911 #define IS_USER_ONLY 1
 912 #else
 913 #define IS_USER_ONLY 0
 914 #endif
 915
 916 /* Abstractions of "generate code to do a guest load/store for
 917  * AArch32", where a vaddr is always 32 bits (and is zero
 918  * extended if we're a 64 bit core) and  data is also
 919  * 32 bits unless specifically doing a 64 bit access.
 920  * These functions work like tcg_gen_qemu_{ld,st}* except
 921  * that the address argument is TCGv_i32 rather than TCGv.
 922  */
 923
 924 static inline TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
 925 {
 926     TCGv addr = tcg_temp_new();
 927     tcg_gen_extu_i32_tl(addr, a32);
 928
 929     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
 930     if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
 931         tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
 932     }
 933     return addr;
 934 }
 935
 936 static void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
 937                             int index, MemOp opc)
 938 {
 939     TCGv addr;
 940
 941     if (arm_dc_feature(s, ARM_FEATURE_M) &&
 942         !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
 943         opc |= MO_ALIGN;
 944     }
 945
 946     addr = gen_aa32_addr(s, a32, opc);
 947     tcg_gen_qemu_ld_i32(val, addr, index, opc);
 948     tcg_temp_free(addr);
 949 }
 950
 951 static void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
 952                             int index, MemOp opc)
 953 {
 954     TCGv addr;
 955
 956     if (arm_dc_feature(s, ARM_FEATURE_M) &&
 957         !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
 958         opc |= MO_ALIGN;
 959     }
 960
 961     addr = gen_aa32_addr(s, a32, opc);
 962     tcg_gen_qemu_st_i32(val, addr, index, opc);
 963     tcg_temp_free(addr);
 964 }
 965
 966 #define DO_GEN_LD(SUFF, OPC)                                             \
 967 static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val,      \
 968                                      TCGv_i32 a32, int index)            \
 969 {                                                                        \
 970     gen_aa32_ld_i32(s, val, a32, index, OPC | s->be_data);               \
 971 }
 972
 973 #define DO_GEN_ST(SUFF, OPC)                                             \
 974 static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val,      \
 975                                      TCGv_i32 a32, int index)            \
 976 {                                                                        \
 977     gen_aa32_st_i32(s, val, a32, index, OPC | s->be_data);               \
 978 }
 979
 980 static inline void gen_aa32_frob64(DisasContext *s, TCGv_i64 val)
 981 {
 982     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
 983     if (!IS_USER_ONLY && s->sctlr_b) {
 984         tcg_gen_rotri_i64(val, val, 32);
 985     }
 986 }
 987
 988 static void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
 989                             int index, MemOp opc)
 990 {
 991     TCGv addr = gen_aa32_addr(s, a32, opc);
 992     tcg_gen_qemu_ld_i64(val, addr, index, opc);
 993     gen_aa32_frob64(s, val);
 994     tcg_temp_free(addr);
 995 }
 996
 997 static inline void gen_aa32_ld64(DisasContext *s, TCGv_i64 val,
 998                                  TCGv_i32 a32, int index)
 999 {
1000     gen_aa32_ld_i64(s, val, a32, index, MO_Q | s->be_data);
1001 }
1002
1003 static void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1004                             int index, MemOp opc)
1005 {
1006     TCGv addr = gen_aa32_addr(s, a32, opc);
1007
1008     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
1009     if (!IS_USER_ONLY && s->sctlr_b) {
1010         TCGv_i64 tmp = tcg_temp_new_i64();
1011         tcg_gen_rotri_i64(tmp, val, 32);
1012         tcg_gen_qemu_st_i64(tmp, addr, index, opc);
1013         tcg_temp_free_i64(tmp);
1014     } else {
1015         tcg_gen_qemu_st_i64(val, addr, index, opc);
1016     }
1017     tcg_temp_free(addr);
1018 }
1019
1020 static inline void gen_aa32_st64(DisasContext *s, TCGv_i64 val,
1021                                  TCGv_i32 a32, int index)
1022 {
1023     gen_aa32_st_i64(s, val, a32, index, MO_Q | s->be_data);
1024 }
1025
1026 DO_GEN_LD(8u, MO_UB)
1027 DO_GEN_LD(16u, MO_UW)
1028 DO_GEN_LD(32u, MO_UL)
1029 DO_GEN_ST(8, MO_UB)
1030 DO_GEN_ST(16, MO_UW)
1031 DO_GEN_ST(32, MO_UL)
1032
1033 static inline void gen_hvc(DisasContext *s, int imm16)
1034 {
1035     /* The pre HVC helper handles cases when HVC gets trapped
1036      * as an undefined insn by runtime configuration (ie before
1037      * the insn really executes).
1038      */
1039     gen_set_pc_im(s, s->pc_curr);
1040     gen_helper_pre_hvc(cpu_env);
1041     /* Otherwise we will treat this as a real exception which
1042      * happens after execution of the insn. (The distinction matters
1043      * for the PC value reported to the exception handler and also
1044      * for single stepping.)
1045      */
1046     s->svc_imm = imm16;
1047     gen_set_pc_im(s, s->base.pc_next);
1048     s->base.is_jmp = DISAS_HVC;
1049 }
1050
1051 static inline void gen_smc(DisasContext *s)
1052 {
1053     /* As with HVC, we may take an exception either before or after
1054      * the insn executes.
1055      */
1056     TCGv_i32 tmp;
1057
1058     gen_set_pc_im(s, s->pc_curr);
1059     tmp = tcg_const_i32(syn_aa32_smc());
1060     gen_helper_pre_smc(cpu_env, tmp);
1061     tcg_temp_free_i32(tmp);
1062     gen_set_pc_im(s, s->base.pc_next);
1063     s->base.is_jmp = DISAS_SMC;
1064 }
1065
1066 static void gen_exception_internal_insn(DisasContext *s, uint32_t pc, int excp)
1067 {
1068     gen_set_condexec(s);
1069     gen_set_pc_im(s, pc);
1070     gen_exception_internal(excp);
1071     s->base.is_jmp = DISAS_NORETURN;
1072 }
1073
1074 static void gen_exception_insn(DisasContext *s, uint32_t pc, int excp,
1075                                int syn, uint32_t target_el)
1076 {
1077     gen_set_condexec(s);
1078     gen_set_pc_im(s, pc);
1079     gen_exception(excp, syn, target_el);
1080     s->base.is_jmp = DISAS_NORETURN;
1081 }
1082
1083 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn)
1084 {
1085     TCGv_i32 tcg_syn;
1086
1087     gen_set_condexec(s);
1088     gen_set_pc_im(s, s->pc_curr);
1089     tcg_syn = tcg_const_i32(syn);
1090     gen_helper_exception_bkpt_insn(cpu_env, tcg_syn);
1091     tcg_temp_free_i32(tcg_syn);
1092     s->base.is_jmp = DISAS_NORETURN;
1093 }
1094
1095 static void unallocated_encoding(DisasContext *s)
1096 {
1097     /* Unallocated and reserved encodings are uncategorized */
1098     gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(),
1099                        default_exception_el(s));
1100 }
1101
1102 /* Force a TB lookup after an instruction that changes the CPU state.  */
1103 static inline void gen_lookup_tb(DisasContext *s)
1104 {
1105     tcg_gen_movi_i32(cpu_R[15], s->base.pc_next);
1106     s->base.is_jmp = DISAS_EXIT;
1107 }
1108
1109 static inline void gen_hlt(DisasContext *s, int imm)
1110 {
1111     /* HLT. This has two purposes.
1112      * Architecturally, it is an external halting debug instruction.
1113      * Since QEMU doesn't implement external debug, we treat this as
1114      * it is required for halting debug disabled: it will UNDEF.
1115      * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
1116      * and "HLT 0xF000" is an A32 semihosting syscall. These traps
1117      * must trigger semihosting even for ARMv7 and earlier, where
1118      * HLT was an undefined encoding.
1119      * In system mode, we don't allow userspace access to
1120      * semihosting, to provide some semblance of security
1121      * (and for consistency with our 32-bit semihosting).
1122      */
1123     if (semihosting_enabled() &&
1124 #ifndef CONFIG_USER_ONLY
1125         s->current_el != 0 &&
1126 #endif
1127         (imm == (s->thumb ? 0x3c : 0xf000))) {
1128         gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
1129         return;
1130     }
1131
1132     unallocated_encoding(s);
1133 }
1134
1135 static TCGv_ptr get_fpstatus_ptr(int neon)
1136 {
1137     TCGv_ptr statusptr = tcg_temp_new_ptr();
1138     int offset;
1139     if (neon) {
1140         offset = offsetof(CPUARMState, vfp.standard_fp_status);
1141     } else {
1142         offset = offsetof(CPUARMState, vfp.fp_status);
1143     }
1144     tcg_gen_addi_ptr(statusptr, cpu_env, offset);
1145     return statusptr;
1146 }
1147
1148 static inline long vfp_reg_offset(bool dp, unsigned reg)
1149 {
1150     if (dp) {
1151         return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
1152     } else {
1153         long ofs = offsetof(CPUARMState, vfp.zregs[reg >> 2].d[(reg >> 1) & 1]);
1154         if (reg & 1) {
1155             ofs += offsetof(CPU_DoubleU, l.upper);
1156         } else {
1157             ofs += offsetof(CPU_DoubleU, l.lower);
1158         }
1159         return ofs;
1160     }
1161 }
1162
1163 /* Return the offset of a 32-bit piece of a NEON register.
1164    zero is the least significant end of the register.  */
1165 static inline long
1166 neon_reg_offset (int reg, int n)
1167 {
1168     int sreg;
1169     sreg = reg * 2 + n;
1170     return vfp_reg_offset(0, sreg);
1171 }
1172
1173 /* Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
1174  * where 0 is the least significant end of the register.
1175  */
1176 static inline long
1177 neon_element_offset(int reg, int element, MemOp size)
1178 {
1179     int element_size = 1 << size;
1180     int ofs = element * element_size;
1181 #ifdef HOST_WORDS_BIGENDIAN
1182     /* Calculate the offset assuming fully little-endian,
1183      * then XOR to account for the order of the 8-byte units.
1184      */
1185     if (element_size < 8) {
1186         ofs ^= 8 - element_size;
1187     }
1188 #endif
1189     return neon_reg_offset(reg, 0) + ofs;
1190 }
1191
1192 static TCGv_i32 neon_load_reg(int reg, int pass)
1193 {
1194     TCGv_i32 tmp = tcg_temp_new_i32();
1195     tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass));
1196     return tmp;
1197 }
1198
1199 static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop)
1200 {
1201     long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
1202
1203     switch (mop) {
1204     case MO_UB:
1205         tcg_gen_ld8u_i32(var, cpu_env, offset);
1206         break;
1207     case MO_UW:
1208         tcg_gen_ld16u_i32(var, cpu_env, offset);
1209         break;
1210     case MO_UL:
1211         tcg_gen_ld_i32(var, cpu_env, offset);
1212         break;
1213     default:
1214         g_assert_not_reached();
1215     }
1216 }
1217
1218 static void neon_load_element64(TCGv_i64 var, int reg, int ele, MemOp mop)
1219 {
1220     long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
1221
1222     switch (mop) {
1223     case MO_UB:
1224         tcg_gen_ld8u_i64(var, cpu_env, offset);
1225         break;
1226     case MO_UW:
1227         tcg_gen_ld16u_i64(var, cpu_env, offset);
1228         break;
1229     case MO_UL:
1230         tcg_gen_ld32u_i64(var, cpu_env, offset);
1231         break;
1232     case MO_Q:
1233         tcg_gen_ld_i64(var, cpu_env, offset);
1234         break;
1235     default:
1236         g_assert_not_reached();
1237     }
1238 }
1239
1240 static void neon_store_reg(int reg, int pass, TCGv_i32 var)
1241 {
1242     tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass));
1243     tcg_temp_free_i32(var);
1244 }
1245
1246 static void neon_store_element(int reg, int ele, MemOp size, TCGv_i32 var)
1247 {
1248     long offset = neon_element_offset(reg, ele, size);
1249
1250     switch (size) {
1251     case MO_8:
1252         tcg_gen_st8_i32(var, cpu_env, offset);
1253         break;
1254     case MO_16:
1255         tcg_gen_st16_i32(var, cpu_env, offset);
1256         break;
1257     case MO_32:
1258         tcg_gen_st_i32(var, cpu_env, offset);
1259         break;
1260     default:
1261         g_assert_not_reached();
1262     }
1263 }
1264
1265 static void neon_store_element64(int reg, int ele, MemOp size, TCGv_i64 var)
1266 {
1267     long offset = neon_element_offset(reg, ele, size);
1268
1269     switch (size) {
1270     case MO_8:
1271         tcg_gen_st8_i64(var, cpu_env, offset);
1272         break;
1273     case MO_16:
1274         tcg_gen_st16_i64(var, cpu_env, offset);
1275         break;
1276     case MO_32:
1277         tcg_gen_st32_i64(var, cpu_env, offset);
1278         break;
1279     case MO_64:
1280         tcg_gen_st_i64(var, cpu_env, offset);
1281         break;
1282     default:
1283         g_assert_not_reached();
1284     }
1285 }
1286
1287 static inline void neon_load_reg64(TCGv_i64 var, int reg)
1288 {
1289     tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg));
1290 }
1291
1292 static inline void neon_store_reg64(TCGv_i64 var, int reg)
1293 {
1294     tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg));
1295 }
1296
1297 static inline void neon_load_reg32(TCGv_i32 var, int reg)
1298 {
1299     tcg_gen_ld_i32(var, cpu_env, vfp_reg_offset(false, reg));
1300 }
1301
1302 static inline void neon_store_reg32(TCGv_i32 var, int reg)
1303 {
1304     tcg_gen_st_i32(var, cpu_env, vfp_reg_offset(false, reg));
1305 }
1306
1307 static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
1308 {
1309     TCGv_ptr ret = tcg_temp_new_ptr();
1310     tcg_gen_addi_ptr(ret, cpu_env, vfp_reg_offset(dp, reg));
1311     return ret;
1312 }
1313
1314 #define ARM_CP_RW_BIT   (1 << 20)
1315
1316 /* Include the VFP and Neon decoders */
1317 #include "translate-vfp.inc.c"
1318 #include "translate-neon.inc.c"
1319
1320 static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1321 {
1322     tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1323 }
1324
1325 static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1326 {
1327     tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1328 }
1329
1330 static inline TCGv_i32 iwmmxt_load_creg(int reg)
1331 {
1332     TCGv_i32 var = tcg_temp_new_i32();
1333     tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1334     return var;
1335 }
1336
1337 static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1338 {
1339     tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1340     tcg_temp_free_i32(var);
1341 }
1342
1343 static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1344 {
1345     iwmmxt_store_reg(cpu_M0, rn);
1346 }
1347
1348 static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1349 {
1350     iwmmxt_load_reg(cpu_M0, rn);
1351 }
1352
1353 static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1354 {
1355     iwmmxt_load_reg(cpu_V1, rn);
1356     tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1357 }
1358
1359 static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1360 {
1361     iwmmxt_load_reg(cpu_V1, rn);
1362     tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1363 }
1364
1365 static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1366 {
1367     iwmmxt_load_reg(cpu_V1, rn);
1368     tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1369 }
1370
1371 #define IWMMXT_OP(name) \
1372 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1373 { \
1374     iwmmxt_load_reg(cpu_V1, rn); \
1375     gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1376 }
1377
1378 #define IWMMXT_OP_ENV(name) \
1379 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1380 { \
1381     iwmmxt_load_reg(cpu_V1, rn); \
1382     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1383 }
1384
1385 #define IWMMXT_OP_ENV_SIZE(name) \
1386 IWMMXT_OP_ENV(name##b) \
1387 IWMMXT_OP_ENV(name##w) \
1388 IWMMXT_OP_ENV(name##l)
1389
1390 #define IWMMXT_OP_ENV1(name) \
1391 static inline void gen_op_iwmmxt_##name##_M0(void) \
1392 { \
1393     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1394 }
1395
1396 IWMMXT_OP(maddsq)
1397 IWMMXT_OP(madduq)
1398 IWMMXT_OP(sadb)
1399 IWMMXT_OP(sadw)
1400 IWMMXT_OP(mulslw)
1401 IWMMXT_OP(mulshw)
1402 IWMMXT_OP(mululw)
1403 IWMMXT_OP(muluhw)
1404 IWMMXT_OP(macsw)
1405 IWMMXT_OP(macuw)
1406
1407 IWMMXT_OP_ENV_SIZE(unpackl)
1408 IWMMXT_OP_ENV_SIZE(unpackh)
1409
1410 IWMMXT_OP_ENV1(unpacklub)
1411 IWMMXT_OP_ENV1(unpackluw)
1412 IWMMXT_OP_ENV1(unpacklul)
1413 IWMMXT_OP_ENV1(unpackhub)
1414 IWMMXT_OP_ENV1(unpackhuw)
1415 IWMMXT_OP_ENV1(unpackhul)
1416 IWMMXT_OP_ENV1(unpacklsb)
1417 IWMMXT_OP_ENV1(unpacklsw)
1418 IWMMXT_OP_ENV1(unpacklsl)
1419 IWMMXT_OP_ENV1(unpackhsb)
1420 IWMMXT_OP_ENV1(unpackhsw)
1421 IWMMXT_OP_ENV1(unpackhsl)
1422
1423 IWMMXT_OP_ENV_SIZE(cmpeq)
1424 IWMMXT_OP_ENV_SIZE(cmpgtu)
1425 IWMMXT_OP_ENV_SIZE(cmpgts)
1426
1427 IWMMXT_OP_ENV_SIZE(mins)
1428 IWMMXT_OP_ENV_SIZE(minu)
1429 IWMMXT_OP_ENV_SIZE(maxs)
1430 IWMMXT_OP_ENV_SIZE(maxu)
1431
1432 IWMMXT_OP_ENV_SIZE(subn)
1433 IWMMXT_OP_ENV_SIZE(addn)
1434 IWMMXT_OP_ENV_SIZE(subu)
1435 IWMMXT_OP_ENV_SIZE(addu)
1436 IWMMXT_OP_ENV_SIZE(subs)
1437 IWMMXT_OP_ENV_SIZE(adds)
1438
1439 IWMMXT_OP_ENV(avgb0)
1440 IWMMXT_OP_ENV(avgb1)
1441 IWMMXT_OP_ENV(avgw0)
1442 IWMMXT_OP_ENV(avgw1)
1443
1444 IWMMXT_OP_ENV(packuw)
1445 IWMMXT_OP_ENV(packul)
1446 IWMMXT_OP_ENV(packuq)
1447 IWMMXT_OP_ENV(packsw)
1448 IWMMXT_OP_ENV(packsl)
1449 IWMMXT_OP_ENV(packsq)
1450
1451 static void gen_op_iwmmxt_set_mup(void)
1452 {
1453     TCGv_i32 tmp;
1454     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1455     tcg_gen_ori_i32(tmp, tmp, 2);
1456     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1457 }
1458
1459 static void gen_op_iwmmxt_set_cup(void)
1460 {
1461     TCGv_i32 tmp;
1462     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1463     tcg_gen_ori_i32(tmp, tmp, 1);
1464     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1465 }
1466
1467 static void gen_op_iwmmxt_setpsr_nz(void)
1468 {
1469     TCGv_i32 tmp = tcg_temp_new_i32();
1470     gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1471     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1472 }
1473
1474 static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1475 {
1476     iwmmxt_load_reg(cpu_V1, rn);
1477     tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1478     tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1479 }
1480
1481 static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1482                                      TCGv_i32 dest)
1483 {
1484     int rd;
1485     uint32_t offset;
1486     TCGv_i32 tmp;
1487
1488     rd = (insn >> 16) & 0xf;
1489     tmp = load_reg(s, rd);
1490
1491     offset = (insn & 0xff) << ((insn >> 7) & 2);
1492     if (insn & (1 << 24)) {
1493         /* Pre indexed */
1494         if (insn & (1 << 23))
1495             tcg_gen_addi_i32(tmp, tmp, offset);
1496         else
1497             tcg_gen_addi_i32(tmp, tmp, -offset);
1498         tcg_gen_mov_i32(dest, tmp);
1499         if (insn & (1 << 21))
1500             store_reg(s, rd, tmp);
1501         else
1502             tcg_temp_free_i32(tmp);
1503     } else if (insn & (1 << 21)) {
1504         /* Post indexed */
1505         tcg_gen_mov_i32(dest, tmp);
1506         if (insn & (1 << 23))
1507             tcg_gen_addi_i32(tmp, tmp, offset);
1508         else
1509             tcg_gen_addi_i32(tmp, tmp, -offset);
1510         store_reg(s, rd, tmp);
1511     } else if (!(insn & (1 << 23)))
1512         return 1;
1513     return 0;
1514 }
1515
1516 static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1517 {
1518     int rd = (insn >> 0) & 0xf;
1519     TCGv_i32 tmp;
1520
1521     if (insn & (1 << 8)) {
1522         if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1523             return 1;
1524         } else {
1525             tmp = iwmmxt_load_creg(rd);
1526         }
1527     } else {
1528         tmp = tcg_temp_new_i32();
1529         iwmmxt_load_reg(cpu_V0, rd);
1530         tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1531     }
1532     tcg_gen_andi_i32(tmp, tmp, mask);
1533     tcg_gen_mov_i32(dest, tmp);
1534     tcg_temp_free_i32(tmp);
1535     return 0;
1536 }
1537
1538 /* Disassemble an iwMMXt instruction.  Returns nonzero if an error occurred
1539    (ie. an undefined instruction).  */
1540 static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1541 {
1542     int rd, wrd;
1543     int rdhi, rdlo, rd0, rd1, i;
1544     TCGv_i32 addr;
1545     TCGv_i32 tmp, tmp2, tmp3;
1546
1547     if ((insn & 0x0e000e00) == 0x0c000000) {
1548         if ((insn & 0x0fe00ff0) == 0x0c400000) {
1549             wrd = insn & 0xf;
1550             rdlo = (insn >> 12) & 0xf;
1551             rdhi = (insn >> 16) & 0xf;
1552             if (insn & ARM_CP_RW_BIT) {                         /* TMRRC */
1553                 iwmmxt_load_reg(cpu_V0, wrd);
1554                 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1555                 tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
1556             } else {                                    /* TMCRR */
1557                 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1558                 iwmmxt_store_reg(cpu_V0, wrd);
1559                 gen_op_iwmmxt_set_mup();
1560             }
1561             return 0;
1562         }
1563
1564         wrd = (insn >> 12) & 0xf;
1565         addr = tcg_temp_new_i32();
1566         if (gen_iwmmxt_address(s, insn, addr)) {
1567             tcg_temp_free_i32(addr);
1568             return 1;
1569         }
1570         if (insn & ARM_CP_RW_BIT) {
1571             if ((insn >> 28) == 0xf) {                  /* WLDRW wCx */
1572                 tmp = tcg_temp_new_i32();
1573                 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1574                 iwmmxt_store_creg(wrd, tmp);
1575             } else {
1576                 i = 1;
1577                 if (insn & (1 << 8)) {
1578                     if (insn & (1 << 22)) {             /* WLDRD */
1579                         gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
1580                         i = 0;
1581                     } else {                            /* WLDRW wRd */
1582                         tmp = tcg_temp_new_i32();
1583                         gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1584                     }
1585                 } else {
1586                     tmp = tcg_temp_new_i32();
1587                     if (insn & (1 << 22)) {             /* WLDRH */
1588                         gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1589                     } else {                            /* WLDRB */
1590                         gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
1591                     }
1592                 }
1593                 if (i) {
1594                     tcg_gen_extu_i32_i64(cpu_M0, tmp);
1595                     tcg_temp_free_i32(tmp);
1596                 }
1597                 gen_op_iwmmxt_movq_wRn_M0(wrd);
1598             }
1599         } else {
1600             if ((insn >> 28) == 0xf) {                  /* WSTRW wCx */
1601                 tmp = iwmmxt_load_creg(wrd);
1602                 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1603             } else {
1604                 gen_op_iwmmxt_movq_M0_wRn(wrd);
1605                 tmp = tcg_temp_new_i32();
1606                 if (insn & (1 << 8)) {
1607                     if (insn & (1 << 22)) {             /* WSTRD */
1608                         gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
1609                     } else {                            /* WSTRW wRd */
1610                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1611                         gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1612                     }
1613                 } else {
1614                     if (insn & (1 << 22)) {             /* WSTRH */
1615                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1616                         gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1617                     } else {                            /* WSTRB */
1618                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1619                         gen_aa32_st8(s, tmp, addr, get_mem_index(s));
1620                     }
1621                 }
1622             }
1623             tcg_temp_free_i32(tmp);
1624         }
1625         tcg_temp_free_i32(addr);
1626         return 0;
1627     }
1628
1629     if ((insn & 0x0f000000) != 0x0e000000)
1630         return 1;
1631
1632     switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1633     case 0x000:                                                 /* WOR */
1634         wrd = (insn >> 12) & 0xf;
1635         rd0 = (insn >> 0) & 0xf;
1636         rd1 = (insn >> 16) & 0xf;
1637         gen_op_iwmmxt_movq_M0_wRn(rd0);
1638         gen_op_iwmmxt_orq_M0_wRn(rd1);
1639         gen_op_iwmmxt_setpsr_nz();
1640         gen_op_iwmmxt_movq_wRn_M0(wrd);
1641         gen_op_iwmmxt_set_mup();
1642         gen_op_iwmmxt_set_cup();
1643         break;
1644     case 0x011:                                                 /* TMCR */
1645         if (insn & 0xf)
1646             return 1;
1647         rd = (insn >> 12) & 0xf;
1648         wrd = (insn >> 16) & 0xf;
1649         switch (wrd) {
1650         case ARM_IWMMXT_wCID:
1651         case ARM_IWMMXT_wCASF:
1652             break;
1653         case ARM_IWMMXT_wCon:
1654             gen_op_iwmmxt_set_cup();
1655             /* Fall through.  */
1656         case ARM_IWMMXT_wCSSF:
1657             tmp = iwmmxt_load_creg(wrd);
1658             tmp2 = load_reg(s, rd);
1659             tcg_gen_andc_i32(tmp, tmp, tmp2);
1660             tcg_temp_free_i32(tmp2);
1661             iwmmxt_store_creg(wrd, tmp);
1662             break;
1663         case ARM_IWMMXT_wCGR0:
1664         case ARM_IWMMXT_wCGR1:
1665         case ARM_IWMMXT_wCGR2:
1666         case ARM_IWMMXT_wCGR3:
1667             gen_op_iwmmxt_set_cup();
1668             tmp = load_reg(s, rd);
1669             iwmmxt_store_creg(wrd, tmp);
1670             break;
1671         default:
1672             return 1;
1673         }
1674         break;
1675     case 0x100:                                                 /* WXOR */
1676         wrd = (insn >> 12) & 0xf;
1677         rd0 = (insn >> 0) & 0xf;
1678         rd1 = (insn >> 16) & 0xf;
1679         gen_op_iwmmxt_movq_M0_wRn(rd0);
1680         gen_op_iwmmxt_xorq_M0_wRn(rd1);
1681         gen_op_iwmmxt_setpsr_nz();
1682         gen_op_iwmmxt_movq_wRn_M0(wrd);
1683         gen_op_iwmmxt_set_mup();
1684         gen_op_iwmmxt_set_cup();
1685         break;
1686     case 0x111:                                                 /* TMRC */
1687         if (insn & 0xf)
1688             return 1;
1689         rd = (insn >> 12) & 0xf;
1690         wrd = (insn >> 16) & 0xf;
1691         tmp = iwmmxt_load_creg(wrd);
1692         store_reg(s, rd, tmp);
1693         break;
1694     case 0x300:                                                 /* WANDN */
1695         wrd = (insn >> 12) & 0xf;
1696         rd0 = (insn >> 0) & 0xf;
1697         rd1 = (insn >> 16) & 0xf;
1698         gen_op_iwmmxt_movq_M0_wRn(rd0);
1699         tcg_gen_neg_i64(cpu_M0, cpu_M0);
1700         gen_op_iwmmxt_andq_M0_wRn(rd1);
1701         gen_op_iwmmxt_setpsr_nz();
1702         gen_op_iwmmxt_movq_wRn_M0(wrd);
1703         gen_op_iwmmxt_set_mup();
1704         gen_op_iwmmxt_set_cup();
1705         break;
1706     case 0x200:                                                 /* WAND */
1707         wrd = (insn >> 12) & 0xf;
1708         rd0 = (insn >> 0) & 0xf;
1709         rd1 = (insn >> 16) & 0xf;
1710         gen_op_iwmmxt_movq_M0_wRn(rd0);
1711         gen_op_iwmmxt_andq_M0_wRn(rd1);
1712         gen_op_iwmmxt_setpsr_nz();
1713         gen_op_iwmmxt_movq_wRn_M0(wrd);
1714         gen_op_iwmmxt_set_mup();
1715         gen_op_iwmmxt_set_cup();
1716         break;
1717     case 0x810: case 0xa10:                             /* WMADD */
1718         wrd = (insn >> 12) & 0xf;
1719         rd0 = (insn >> 0) & 0xf;
1720         rd1 = (insn >> 16) & 0xf;
1721         gen_op_iwmmxt_movq_M0_wRn(rd0);
1722         if (insn & (1 << 21))
1723             gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1724         else
1725             gen_op_iwmmxt_madduq_M0_wRn(rd1);
1726         gen_op_iwmmxt_movq_wRn_M0(wrd);
1727         gen_op_iwmmxt_set_mup();
1728         break;
1729     case 0x10e: case 0x50e: case 0x90e: case 0xd0e:     /* WUNPCKIL */
1730         wrd = (insn >> 12) & 0xf;
1731         rd0 = (insn >> 16) & 0xf;
1732         rd1 = (insn >> 0) & 0xf;
1733         gen_op_iwmmxt_movq_M0_wRn(rd0);
1734         switch ((insn >> 22) & 3) {
1735         case 0:
1736             gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1737             break;
1738         case 1:
1739             gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1740             break;
1741         case 2:
1742             gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1743             break;
1744         case 3:
1745             return 1;
1746         }
1747         gen_op_iwmmxt_movq_wRn_M0(wrd);
1748         gen_op_iwmmxt_set_mup();
1749         gen_op_iwmmxt_set_cup();
1750         break;
1751     case 0x10c: case 0x50c: case 0x90c: case 0xd0c:     /* WUNPCKIH */
1752         wrd = (insn >> 12) & 0xf;
1753         rd0 = (insn >> 16) & 0xf;
1754         rd1 = (insn >> 0) & 0xf;
1755         gen_op_iwmmxt_movq_M0_wRn(rd0);
1756         switch ((insn >> 22) & 3) {
1757         case 0:
1758             gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1759             break;
1760         case 1:
1761             gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1762             break;
1763         case 2:
1764             gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1765             break;
1766         case 3:
1767             return 1;
1768         }
1769         gen_op_iwmmxt_movq_wRn_M0(wrd);
1770         gen_op_iwmmxt_set_mup();
1771         gen_op_iwmmxt_set_cup();
1772         break;
1773     case 0x012: case 0x112: case 0x412: case 0x512:     /* WSAD */
1774         wrd = (insn >> 12) & 0xf;
1775         rd0 = (insn >> 16) & 0xf;
1776         rd1 = (insn >> 0) & 0xf;
1777         gen_op_iwmmxt_movq_M0_wRn(rd0);
1778         if (insn & (1 << 22))
1779             gen_op_iwmmxt_sadw_M0_wRn(rd1);
1780         else
1781             gen_op_iwmmxt_sadb_M0_wRn(rd1);
1782         if (!(insn & (1 << 20)))
1783             gen_op_iwmmxt_addl_M0_wRn(wrd);
1784         gen_op_iwmmxt_movq_wRn_M0(wrd);
1785         gen_op_iwmmxt_set_mup();
1786         break;
1787     case 0x010: case 0x110: case 0x210: case 0x310:     /* WMUL */
1788         wrd = (insn >> 12) & 0xf;
1789         rd0 = (insn >> 16) & 0xf;
1790         rd1 = (insn >> 0) & 0xf;
1791         gen_op_iwmmxt_movq_M0_wRn(rd0);
1792         if (insn & (1 << 21)) {
1793             if (insn & (1 << 20))
1794                 gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1795             else
1796                 gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1797         } else {
1798             if (insn & (1 << 20))
1799                 gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1800             else
1801                 gen_op_iwmmxt_mululw_M0_wRn(rd1);
1802         }
1803         gen_op_iwmmxt_movq_wRn_M0(wrd);
1804         gen_op_iwmmxt_set_mup();
1805         break;
1806     case 0x410: case 0x510: case 0x610: case 0x710:     /* WMAC */
1807         wrd = (insn >> 12) & 0xf;
1808         rd0 = (insn >> 16) & 0xf;
1809         rd1 = (insn >> 0) & 0xf;
1810         gen_op_iwmmxt_movq_M0_wRn(rd0);
1811         if (insn & (1 << 21))
1812             gen_op_iwmmxt_macsw_M0_wRn(rd1);
1813         else
1814             gen_op_iwmmxt_macuw_M0_wRn(rd1);
1815         if (!(insn & (1 << 20))) {
1816             iwmmxt_load_reg(cpu_V1, wrd);
1817             tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1818         }
1819         gen_op_iwmmxt_movq_wRn_M0(wrd);
1820         gen_op_iwmmxt_set_mup();
1821         break;
1822     case 0x006: case 0x406: case 0x806: case 0xc06:     /* WCMPEQ */
1823         wrd = (insn >> 12) & 0xf;
1824         rd0 = (insn >> 16) & 0xf;
1825         rd1 = (insn >> 0) & 0xf;
1826         gen_op_iwmmxt_movq_M0_wRn(rd0);
1827         switch ((insn >> 22) & 3) {
1828         case 0:
1829             gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1830             break;
1831         case 1:
1832             gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1833             break;
1834         case 2:
1835             gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1836             break;
1837         case 3:
1838             return 1;
1839         }
1840         gen_op_iwmmxt_movq_wRn_M0(wrd);
1841         gen_op_iwmmxt_set_mup();
1842         gen_op_iwmmxt_set_cup();
1843         break;
1844     case 0x800: case 0x900: case 0xc00: case 0xd00:     /* WAVG2 */
1845         wrd = (insn >> 12) & 0xf;
1846         rd0 = (insn >> 16) & 0xf;
1847         rd1 = (insn >> 0) & 0xf;
1848         gen_op_iwmmxt_movq_M0_wRn(rd0);
1849         if (insn & (1 << 22)) {
1850             if (insn & (1 << 20))
1851                 gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1852             else
1853                 gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1854         } else {
1855             if (insn & (1 << 20))
1856                 gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1857             else
1858                 gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1859         }
1860         gen_op_iwmmxt_movq_wRn_M0(wrd);
1861         gen_op_iwmmxt_set_mup();
1862         gen_op_iwmmxt_set_cup();
1863         break;
1864     case 0x802: case 0x902: case 0xa02: case 0xb02:     /* WALIGNR */
1865         wrd = (insn >> 12) & 0xf;
1866         rd0 = (insn >> 16) & 0xf;
1867         rd1 = (insn >> 0) & 0xf;
1868         gen_op_iwmmxt_movq_M0_wRn(rd0);
1869         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1870         tcg_gen_andi_i32(tmp, tmp, 7);
1871         iwmmxt_load_reg(cpu_V1, rd1);
1872         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1873         tcg_temp_free_i32(tmp);
1874         gen_op_iwmmxt_movq_wRn_M0(wrd);
1875         gen_op_iwmmxt_set_mup();
1876         break;
1877     case 0x601: case 0x605: case 0x609: case 0x60d:     /* TINSR */
1878         if (((insn >> 6) & 3) == 3)
1879             return 1;
1880         rd = (insn >> 12) & 0xf;
1881         wrd = (insn >> 16) & 0xf;
1882         tmp = load_reg(s, rd);
1883         gen_op_iwmmxt_movq_M0_wRn(wrd);
1884         switch ((insn >> 6) & 3) {
1885         case 0:
1886             tmp2 = tcg_const_i32(0xff);
1887             tmp3 = tcg_const_i32((insn & 7) << 3);
1888             break;
1889         case 1:
1890             tmp2 = tcg_const_i32(0xffff);
1891             tmp3 = tcg_const_i32((insn & 3) << 4);
1892             break;
1893         case 2:
1894             tmp2 = tcg_const_i32(0xffffffff);
1895             tmp3 = tcg_const_i32((insn & 1) << 5);
1896             break;
1897         default:
1898             tmp2 = NULL;
1899             tmp3 = NULL;
1900         }
1901         gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1902         tcg_temp_free_i32(tmp3);
1903         tcg_temp_free_i32(tmp2);
1904         tcg_temp_free_i32(tmp);
1905         gen_op_iwmmxt_movq_wRn_M0(wrd);
1906         gen_op_iwmmxt_set_mup();
1907         break;
1908     case 0x107: case 0x507: case 0x907: case 0xd07:     /* TEXTRM */
1909         rd = (insn >> 12) & 0xf;
1910         wrd = (insn >> 16) & 0xf;
1911         if (rd == 15 || ((insn >> 22) & 3) == 3)
1912             return 1;
1913         gen_op_iwmmxt_movq_M0_wRn(wrd);
1914         tmp = tcg_temp_new_i32();
1915         switch ((insn >> 22) & 3) {
1916         case 0:
1917             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1918             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1919             if (insn & 8) {
1920                 tcg_gen_ext8s_i32(tmp, tmp);
1921             } else {
1922                 tcg_gen_andi_i32(tmp, tmp, 0xff);
1923             }
1924             break;
1925         case 1:
1926             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1927             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1928             if (insn & 8) {
1929                 tcg_gen_ext16s_i32(tmp, tmp);
1930             } else {
1931                 tcg_gen_andi_i32(tmp, tmp, 0xffff);
1932             }
1933             break;
1934         case 2:
1935             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1936             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1937             break;
1938         }
1939         store_reg(s, rd, tmp);
1940         break;
1941     case 0x117: case 0x517: case 0x917: case 0xd17:     /* TEXTRC */
1942         if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1943             return 1;
1944         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1945         switch ((insn >> 22) & 3) {
1946         case 0:
1947             tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
1948             break;
1949         case 1:
1950             tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
1951             break;
1952         case 2:
1953             tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
1954             break;
1955         }
1956         tcg_gen_shli_i32(tmp, tmp, 28);
1957         gen_set_nzcv(tmp);
1958         tcg_temp_free_i32(tmp);
1959         break;
1960     case 0x401: case 0x405: case 0x409: case 0x40d:     /* TBCST */
1961         if (((insn >> 6) & 3) == 3)
1962             return 1;
1963         rd = (insn >> 12) & 0xf;
1964         wrd = (insn >> 16) & 0xf;
1965         tmp = load_reg(s, rd);
1966         switch ((insn >> 6) & 3) {
1967         case 0:
1968             gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
1969             break;
1970         case 1:
1971             gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
1972             break;
1973         case 2:
1974             gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
1975             break;
1976         }
1977         tcg_temp_free_i32(tmp);
1978         gen_op_iwmmxt_movq_wRn_M0(wrd);
1979         gen_op_iwmmxt_set_mup();
1980         break;
1981     case 0x113: case 0x513: case 0x913: case 0xd13:     /* TANDC */
1982         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1983             return 1;
1984         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1985         tmp2 = tcg_temp_new_i32();
1986         tcg_gen_mov_i32(tmp2, tmp);
1987         switch ((insn >> 22) & 3) {
1988         case 0:
1989             for (i = 0; i < 7; i ++) {
1990                 tcg_gen_shli_i32(tmp2, tmp2, 4);
1991                 tcg_gen_and_i32(tmp, tmp, tmp2);
1992             }
1993             break;
1994         case 1:
1995             for (i = 0; i < 3; i ++) {
1996                 tcg_gen_shli_i32(tmp2, tmp2, 8);
1997                 tcg_gen_and_i32(tmp, tmp, tmp2);
1998             }
1999             break;
2000         case 2:
2001             tcg_gen_shli_i32(tmp2, tmp2, 16);
2002             tcg_gen_and_i32(tmp, tmp, tmp2);
2003             break;
2004         }
2005         gen_set_nzcv(tmp);
2006         tcg_temp_free_i32(tmp2);
2007         tcg_temp_free_i32(tmp);
2008         break;
2009     case 0x01c: case 0x41c: case 0x81c: case 0xc1c:     /* WACC */
2010         wrd = (insn >> 12) & 0xf;
2011         rd0 = (insn >> 16) & 0xf;
2012         gen_op_iwmmxt_movq_M0_wRn(rd0);
2013         switch ((insn >> 22) & 3) {
2014         case 0:
2015             gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
2016             break;
2017         case 1:
2018             gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
2019             break;
2020         case 2:
2021             gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
2022             break;
2023         case 3:
2024             return 1;
2025         }
2026         gen_op_iwmmxt_movq_wRn_M0(wrd);
2027         gen_op_iwmmxt_set_mup();
2028         break;
2029     case 0x115: case 0x515: case 0x915: case 0xd15:     /* TORC */
2030         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
2031             return 1;
2032         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
2033         tmp2 = tcg_temp_new_i32();
2034         tcg_gen_mov_i32(tmp2, tmp);
2035         switch ((insn >> 22) & 3) {
2036         case 0:
2037             for (i = 0; i < 7; i ++) {
2038                 tcg_gen_shli_i32(tmp2, tmp2, 4);
2039                 tcg_gen_or_i32(tmp, tmp, tmp2);
2040             }
2041             break;
2042         case 1:
2043             for (i = 0; i < 3; i ++) {
2044                 tcg_gen_shli_i32(tmp2, tmp2, 8);
2045                 tcg_gen_or_i32(tmp, tmp, tmp2);
2046             }
2047             break;
2048         case 2:
2049             tcg_gen_shli_i32(tmp2, tmp2, 16);
2050             tcg_gen_or_i32(tmp, tmp, tmp2);
2051             break;
2052         }
2053         gen_set_nzcv(tmp);
2054         tcg_temp_free_i32(tmp2);
2055         tcg_temp_free_i32(tmp);
2056         break;
2057     case 0x103: case 0x503: case 0x903: case 0xd03:     /* TMOVMSK */
2058         rd = (insn >> 12) & 0xf;
2059         rd0 = (insn >> 16) & 0xf;
2060         if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
2061             return 1;
2062         gen_op_iwmmxt_movq_M0_wRn(rd0);
2063         tmp = tcg_temp_new_i32();
2064         switch ((insn >> 22) & 3) {
2065         case 0:
2066             gen_helper_iwmmxt_msbb(tmp, cpu_M0);
2067             break;
2068         case 1:
2069             gen_helper_iwmmxt_msbw(tmp, cpu_M0);
2070             break;
2071         case 2:
2072             gen_helper_iwmmxt_msbl(tmp, cpu_M0);
2073             break;
2074         }
2075         store_reg(s, rd, tmp);
2076         break;
2077     case 0x106: case 0x306: case 0x506: case 0x706:     /* WCMPGT */
2078     case 0x906: case 0xb06: case 0xd06: case 0xf06:
2079         wrd = (insn >> 12) & 0xf;
2080         rd0 = (insn >> 16) & 0xf;
2081         rd1 = (insn >> 0) & 0xf;
2082         gen_op_iwmmxt_movq_M0_wRn(rd0);
2083         switch ((insn >> 22) & 3) {
2084         case 0:
2085             if (insn & (1 << 21))
2086                 gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2087             else
2088                 gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2089             break;
2090         case 1:
2091             if (insn & (1 << 21))
2092                 gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2093             else
2094                 gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2095             break;
2096         case 2:
2097             if (insn & (1 << 21))
2098                 gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2099             else
2100                 gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2101             break;
2102         case 3:
2103             return 1;
2104         }
2105         gen_op_iwmmxt_movq_wRn_M0(wrd);
2106         gen_op_iwmmxt_set_mup();
2107         gen_op_iwmmxt_set_cup();
2108         break;
2109     case 0x00e: case 0x20e: case 0x40e: case 0x60e:     /* WUNPCKEL */
2110     case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2111         wrd = (insn >> 12) & 0xf;
2112         rd0 = (insn >> 16) & 0xf;
2113         gen_op_iwmmxt_movq_M0_wRn(rd0);
2114         switch ((insn >> 22) & 3) {
2115         case 0:
2116             if (insn & (1 << 21))
2117                 gen_op_iwmmxt_unpacklsb_M0();
2118             else
2119                 gen_op_iwmmxt_unpacklub_M0();
2120             break;
2121         case 1:
2122             if (insn & (1 << 21))
2123                 gen_op_iwmmxt_unpacklsw_M0();
2124             else
2125                 gen_op_iwmmxt_unpackluw_M0();
2126             break;
2127         case 2:
2128             if (insn & (1 << 21))
2129                 gen_op_iwmmxt_unpacklsl_M0();
2130             else
2131                 gen_op_iwmmxt_unpacklul_M0();
2132             break;
2133         case 3:
2134             return 1;
2135         }
2136         gen_op_iwmmxt_movq_wRn_M0(wrd);
2137         gen_op_iwmmxt_set_mup();
2138         gen_op_iwmmxt_set_cup();
2139         break;
2140     case 0x00c: case 0x20c: case 0x40c: case 0x60c:     /* WUNPCKEH */
2141     case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2142         wrd = (insn >> 12) & 0xf;
2143         rd0 = (insn >> 16) & 0xf;
2144         gen_op_iwmmxt_movq_M0_wRn(rd0);
2145         switch ((insn >> 22) & 3) {
2146         case 0:
2147             if (insn & (1 << 21))
2148                 gen_op_iwmmxt_unpackhsb_M0();
2149             else
2150                 gen_op_iwmmxt_unpackhub_M0();
2151             break;
2152         case 1:
2153             if (insn & (1 << 21))
2154                 gen_op_iwmmxt_unpackhsw_M0();
2155             else
2156                 gen_op_iwmmxt_unpackhuw_M0();
2157             break;
2158         case 2:
2159             if (insn & (1 << 21))
2160                 gen_op_iwmmxt_unpackhsl_M0();
2161             else
2162                 gen_op_iwmmxt_unpackhul_M0();
2163             break;
2164         case 3:
2165             return 1;
2166         }
2167         gen_op_iwmmxt_movq_wRn_M0(wrd);
2168         gen_op_iwmmxt_set_mup();
2169         gen_op_iwmmxt_set_cup();
2170         break;
2171     case 0x204: case 0x604: case 0xa04: case 0xe04:     /* WSRL */
2172     case 0x214: case 0x614: case 0xa14: case 0xe14:
2173         if (((insn >> 22) & 3) == 0)
2174             return 1;
2175         wrd = (insn >> 12) & 0xf;
2176         rd0 = (insn >> 16) & 0xf;
2177         gen_op_iwmmxt_movq_M0_wRn(rd0);
2178         tmp = tcg_temp_new_i32();
2179         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2180             tcg_temp_free_i32(tmp);
2181             return 1;
2182         }
2183         switch ((insn >> 22) & 3) {
2184         case 1:
2185             gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2186             break;
2187         case 2:
2188             gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2189             break;
2190         case 3:
2191             gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2192             break;
2193         }
2194         tcg_temp_free_i32(tmp);
2195         gen_op_iwmmxt_movq_wRn_M0(wrd);
2196         gen_op_iwmmxt_set_mup();
2197         gen_op_iwmmxt_set_cup();
2198         break;
2199     case 0x004: case 0x404: case 0x804: case 0xc04:     /* WSRA */
2200     case 0x014: case 0x414: case 0x814: case 0xc14:
2201         if (((insn >> 22) & 3) == 0)
2202             return 1;
2203         wrd = (insn >> 12) & 0xf;
2204         rd0 = (insn >> 16) & 0xf;
2205         gen_op_iwmmxt_movq_M0_wRn(rd0);
2206         tmp = tcg_temp_new_i32();
2207         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2208             tcg_temp_free_i32(tmp);
2209             return 1;
2210         }
2211         switch ((insn >> 22) & 3) {
2212         case 1:
2213             gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2214             break;
2215         case 2:
2216             gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2217             break;
2218         case 3:
2219             gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2220             break;
2221         }
2222         tcg_temp_free_i32(tmp);
2223         gen_op_iwmmxt_movq_wRn_M0(wrd);
2224         gen_op_iwmmxt_set_mup();
2225         gen_op_iwmmxt_set_cup();
2226         break;
2227     case 0x104: case 0x504: case 0x904: case 0xd04:     /* WSLL */
2228     case 0x114: case 0x514: case 0x914: case 0xd14:
2229         if (((insn >> 22) & 3) == 0)
2230             return 1;
2231         wrd = (insn >> 12) & 0xf;
2232         rd0 = (insn >> 16) & 0xf;
2233         gen_op_iwmmxt_movq_M0_wRn(rd0);
2234         tmp = tcg_temp_new_i32();
2235         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2236             tcg_temp_free_i32(tmp);
2237             return 1;
2238         }
2239         switch ((insn >> 22) & 3) {
2240         case 1:
2241             gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2242             break;
2243         case 2:
2244             gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2245             break;
2246         case 3:
2247             gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2248             break;
2249         }
2250         tcg_temp_free_i32(tmp);
2251         gen_op_iwmmxt_movq_wRn_M0(wrd);
2252         gen_op_iwmmxt_set_mup();
2253         gen_op_iwmmxt_set_cup();
2254         break;
2255     case 0x304: case 0x704: case 0xb04: case 0xf04:     /* WROR */
2256     case 0x314: case 0x714: case 0xb14: case 0xf14:
2257         if (((insn >> 22) & 3) == 0)
2258             return 1;
2259         wrd = (insn >> 12) & 0xf;
2260         rd0 = (insn >> 16) & 0xf;
2261         gen_op_iwmmxt_movq_M0_wRn(rd0);
2262         tmp = tcg_temp_new_i32();
2263         switch ((insn >> 22) & 3) {
2264         case 1:
2265             if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2266                 tcg_temp_free_i32(tmp);
2267                 return 1;
2268             }
2269             gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2270             break;
2271         case 2:
2272             if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2273                 tcg_temp_free_i32(tmp);
2274                 return 1;
2275             }
2276             gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2277             break;
2278         case 3:
2279             if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2280                 tcg_temp_free_i32(tmp);
2281                 return 1;
2282             }
2283             gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2284             break;
2285         }
2286         tcg_temp_free_i32(tmp);
2287         gen_op_iwmmxt_movq_wRn_M0(wrd);
2288         gen_op_iwmmxt_set_mup();
2289         gen_op_iwmmxt_set_cup();
2290         break;
2291     case 0x116: case 0x316: case 0x516: case 0x716:     /* WMIN */
2292     case 0x916: case 0xb16: case 0xd16: case 0xf16:
2293         wrd = (insn >> 12) & 0xf;
2294         rd0 = (insn >> 16) & 0xf;
2295         rd1 = (insn >> 0) & 0xf;
2296         gen_op_iwmmxt_movq_M0_wRn(rd0);
2297         switch ((insn >> 22) & 3) {
2298         case 0:
2299             if (insn & (1 << 21))
2300                 gen_op_iwmmxt_minsb_M0_wRn(rd1);
2301             else
2302                 gen_op_iwmmxt_minub_M0_wRn(rd1);
2303             break;
2304         case 1:
2305             if (insn & (1 << 21))
2306                 gen_op_iwmmxt_minsw_M0_wRn(rd1);
2307             else
2308                 gen_op_iwmmxt_minuw_M0_wRn(rd1);
2309             break;
2310         case 2:
2311             if (insn & (1 << 21))
2312                 gen_op_iwmmxt_minsl_M0_wRn(rd1);
2313             else
2314                 gen_op_iwmmxt_minul_M0_wRn(rd1);
2315             break;
2316         case 3:
2317             return 1;
2318         }
2319         gen_op_iwmmxt_movq_wRn_M0(wrd);
2320         gen_op_iwmmxt_set_mup();
2321         break;
2322     case 0x016: case 0x216: case 0x416: case 0x616:     /* WMAX */
2323     case 0x816: case 0xa16: case 0xc16: case 0xe16:
2324         wrd = (insn >> 12) & 0xf;
2325         rd0 = (insn >> 16) & 0xf;
2326         rd1 = (insn >> 0) & 0xf;
2327         gen_op_iwmmxt_movq_M0_wRn(rd0);
2328         switch ((insn >> 22) & 3) {
2329         case 0:
2330             if (insn & (1 << 21))
2331                 gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2332             else
2333                 gen_op_iwmmxt_maxub_M0_wRn(rd1);
2334             break;
2335         case 1:
2336             if (insn & (1 << 21))
2337                 gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2338             else
2339                 gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2340             break;
2341         case 2:
2342             if (insn & (1 << 21))
2343                 gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2344             else
2345                 gen_op_iwmmxt_maxul_M0_wRn(rd1);
2346             break;
2347         case 3:
2348             return 1;
2349         }
2350         gen_op_iwmmxt_movq_wRn_M0(wrd);
2351         gen_op_iwmmxt_set_mup();
2352         break;
2353     case 0x002: case 0x102: case 0x202: case 0x302:     /* WALIGNI */
2354     case 0x402: case 0x502: case 0x602: case 0x702:
2355         wrd = (insn >> 12) & 0xf;
2356         rd0 = (insn >> 16) & 0xf;
2357         rd1 = (insn >> 0) & 0xf;
2358         gen_op_iwmmxt_movq_M0_wRn(rd0);
2359         tmp = tcg_const_i32((insn >> 20) & 3);
2360         iwmmxt_load_reg(cpu_V1, rd1);
2361         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
2362         tcg_temp_free_i32(tmp);
2363         gen_op_iwmmxt_movq_wRn_M0(wrd);
2364         gen_op_iwmmxt_set_mup();
2365         break;
2366     case 0x01a: case 0x11a: case 0x21a: case 0x31a:     /* WSUB */
2367     case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2368     case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2369     case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2370         wrd = (insn >> 12) & 0xf;
2371         rd0 = (insn >> 16) & 0xf;
2372         rd1 = (insn >> 0) & 0xf;
2373         gen_op_iwmmxt_movq_M0_wRn(rd0);
2374         switch ((insn >> 20) & 0xf) {
2375         case 0x0:
2376             gen_op_iwmmxt_subnb_M0_wRn(rd1);
2377             break;
2378         case 0x1:
2379             gen_op_iwmmxt_subub_M0_wRn(rd1);
2380             break;
2381         case 0x3:
2382             gen_op_iwmmxt_subsb_M0_wRn(rd1);
2383             break;
2384         case 0x4:
2385             gen_op_iwmmxt_subnw_M0_wRn(rd1);
2386             break;
2387         case 0x5:
2388             gen_op_iwmmxt_subuw_M0_wRn(rd1);
2389             break;
2390         case 0x7:
2391             gen_op_iwmmxt_subsw_M0_wRn(rd1);
2392             break;
2393         case 0x8:
2394             gen_op_iwmmxt_subnl_M0_wRn(rd1);
2395             break;
2396         case 0x9:
2397             gen_op_iwmmxt_subul_M0_wRn(rd1);
2398             break;
2399         case 0xb:
2400             gen_op_iwmmxt_subsl_M0_wRn(rd1);
2401             break;
2402         default:
2403             return 1;
2404         }
2405         gen_op_iwmmxt_movq_wRn_M0(wrd);
2406         gen_op_iwmmxt_set_mup();
2407         gen_op_iwmmxt_set_cup();
2408         break;
2409     case 0x01e: case 0x11e: case 0x21e: case 0x31e:     /* WSHUFH */
2410     case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2411     case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2412     case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2413         wrd = (insn >> 12) & 0xf;
2414         rd0 = (insn >> 16) & 0xf;
2415         gen_op_iwmmxt_movq_M0_wRn(rd0);
2416         tmp = tcg_const_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2417         gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2418         tcg_temp_free_i32(tmp);
2419         gen_op_iwmmxt_movq_wRn_M0(wrd);
2420         gen_op_iwmmxt_set_mup();
2421         gen_op_iwmmxt_set_cup();
2422         break;
2423     case 0x018: case 0x118: case 0x218: case 0x318:     /* WADD */
2424     case 0x418: case 0x518: case 0x618: case 0x718:
2425     case 0x818: case 0x918: case 0xa18: case 0xb18:
2426     case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2427         wrd = (insn >> 12) & 0xf;
2428         rd0 = (insn >> 16) & 0xf;
2429         rd1 = (insn >> 0) & 0xf;
2430         gen_op_iwmmxt_movq_M0_wRn(rd0);
2431         switch ((insn >> 20) & 0xf) {
2432         case 0x0:
2433             gen_op_iwmmxt_addnb_M0_wRn(rd1);
2434             break;
2435         case 0x1:
2436             gen_op_iwmmxt_addub_M0_wRn(rd1);
2437             break;
2438         case 0x3:
2439             gen_op_iwmmxt_addsb_M0_wRn(rd1);
2440             break;
2441         case 0x4:
2442             gen_op_iwmmxt_addnw_M0_wRn(rd1);
2443             break;
2444         case 0x5:
2445             gen_op_iwmmxt_adduw_M0_wRn(rd1);
2446             break;
2447         case 0x7:
2448             gen_op_iwmmxt_addsw_M0_wRn(rd1);
2449             break;
2450         case 0x8:
2451             gen_op_iwmmxt_addnl_M0_wRn(rd1);
2452             break;
2453         case 0x9:
2454             gen_op_iwmmxt_addul_M0_wRn(rd1);
2455             break;
2456         case 0xb:
2457             gen_op_iwmmxt_addsl_M0_wRn(rd1);
2458             break;
2459         default:
2460             return 1;
2461         }
2462         gen_op_iwmmxt_movq_wRn_M0(wrd);
2463         gen_op_iwmmxt_set_mup();
2464         gen_op_iwmmxt_set_cup();
2465         break;
2466     case 0x008: case 0x108: case 0x208: case 0x308:     /* WPACK */
2467     case 0x408: case 0x508: case 0x608: case 0x708:
2468     case 0x808: case 0x908: case 0xa08: case 0xb08:
2469     case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2470         if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2471             return 1;
2472         wrd = (insn >> 12) & 0xf;
2473         rd0 = (insn >> 16) & 0xf;
2474         rd1 = (insn >> 0) & 0xf;
2475         gen_op_iwmmxt_movq_M0_wRn(rd0);
2476         switch ((insn >> 22) & 3) {
2477         case 1:
2478             if (insn & (1 << 21))
2479                 gen_op_iwmmxt_packsw_M0_wRn(rd1);
2480             else
2481                 gen_op_iwmmxt_packuw_M0_wRn(rd1);
2482             break;
2483         case 2:
2484             if (insn & (1 << 21))
2485                 gen_op_iwmmxt_packsl_M0_wRn(rd1);
2486             else
2487                 gen_op_iwmmxt_packul_M0_wRn(rd1);
2488             break;
2489         case 3:
2490             if (insn & (1 << 21))
2491                 gen_op_iwmmxt_packsq_M0_wRn(rd1);
2492             else
2493                 gen_op_iwmmxt_packuq_M0_wRn(rd1);
2494             break;
2495         }
2496         gen_op_iwmmxt_movq_wRn_M0(wrd);
2497         gen_op_iwmmxt_set_mup();
2498         gen_op_iwmmxt_set_cup();
2499         break;
2500     case 0x201: case 0x203: case 0x205: case 0x207:
2501     case 0x209: case 0x20b: case 0x20d: case 0x20f:
2502     case 0x211: case 0x213: case 0x215: case 0x217:
2503     case 0x219: case 0x21b: case 0x21d: case 0x21f:
2504         wrd = (insn >> 5) & 0xf;
2505         rd0 = (insn >> 12) & 0xf;
2506         rd1 = (insn >> 0) & 0xf;
2507         if (rd0 == 0xf || rd1 == 0xf)
2508             return 1;
2509         gen_op_iwmmxt_movq_M0_wRn(wrd);
2510         tmp = load_reg(s, rd0);
2511         tmp2 = load_reg(s, rd1);
2512         switch ((insn >> 16) & 0xf) {
2513         case 0x0:                                       /* TMIA */
2514             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2515             break;
2516         case 0x8:                                       /* TMIAPH */
2517             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2518             break;
2519         case 0xc: case 0xd: case 0xe: case 0xf:                 /* TMIAxy */
2520             if (insn & (1 << 16))
2521                 tcg_gen_shri_i32(tmp, tmp, 16);
2522             if (insn & (1 << 17))
2523                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2524             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2525             break;
2526         default:
2527             tcg_temp_free_i32(tmp2);
2528             tcg_temp_free_i32(tmp);
2529             return 1;
2530         }
2531         tcg_temp_free_i32(tmp2);
2532         tcg_temp_free_i32(tmp);
2533         gen_op_iwmmxt_movq_wRn_M0(wrd);
2534         gen_op_iwmmxt_set_mup();
2535         break;
2536     default:
2537         return 1;
2538     }
2539
2540     return 0;
2541 }
2542
2543 /* Disassemble an XScale DSP instruction.  Returns nonzero if an error occurred
2544    (ie. an undefined instruction).  */
2545 static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2546 {
2547     int acc, rd0, rd1, rdhi, rdlo;
2548     TCGv_i32 tmp, tmp2;
2549
2550     if ((insn & 0x0ff00f10) == 0x0e200010) {
2551         /* Multiply with Internal Accumulate Format */
2552         rd0 = (insn >> 12) & 0xf;
2553         rd1 = insn & 0xf;
2554         acc = (insn >> 5) & 7;
2555
2556         if (acc != 0)
2557             return 1;
2558
2559         tmp = load_reg(s, rd0);
2560         tmp2 = load_reg(s, rd1);
2561         switch ((insn >> 16) & 0xf) {
2562         case 0x0:                                       /* MIA */
2563             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2564             break;
2565         case 0x8:                                       /* MIAPH */
2566             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2567             break;
2568         case 0xc:                                       /* MIABB */
2569         case 0xd:                                       /* MIABT */
2570         case 0xe:                                       /* MIATB */
2571         case 0xf:                                       /* MIATT */
2572             if (insn & (1 << 16))
2573                 tcg_gen_shri_i32(tmp, tmp, 16);
2574             if (insn & (1 << 17))
2575                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2576             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2577             break;
2578         default:
2579             return 1;
2580         }
2581         tcg_temp_free_i32(tmp2);
2582         tcg_temp_free_i32(tmp);
2583
2584         gen_op_iwmmxt_movq_wRn_M0(acc);
2585         return 0;
2586     }
2587
2588     if ((insn & 0x0fe00ff8) == 0x0c400000) {
2589         /* Internal Accumulator Access Format */
2590         rdhi = (insn >> 16) & 0xf;
2591         rdlo = (insn >> 12) & 0xf;
2592         acc = insn & 7;
2593
2594         if (acc != 0)
2595             return 1;
2596
2597         if (insn & ARM_CP_RW_BIT) {                     /* MRA */
2598             iwmmxt_load_reg(cpu_V0, acc);
2599             tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2600             tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
2601             tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2602         } else {                                        /* MAR */
2603             tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2604             iwmmxt_store_reg(cpu_V0, acc);
2605         }
2606         return 0;
2607     }
2608
2609     return 1;
2610 }
2611
2612 #define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n))
2613 #define VFP_DREG(reg, insn, bigbit, smallbit) do { \
2614     if (dc_isar_feature(aa32_simd_r32, s)) { \
2615         reg = (((insn) >> (bigbit)) & 0x0f) \
2616               | (((insn) >> ((smallbit) - 4)) & 0x10); \
2617     } else { \
2618         if (insn & (1 << (smallbit))) \
2619             return 1; \
2620         reg = ((insn) >> (bigbit)) & 0x0f; \
2621     }} while (0)
2622
2623 #define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22)
2624 #define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16,  7)
2625 #define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn,  0,  5)
2626
2627 static void gen_neon_dup_low16(TCGv_i32 var)
2628 {
2629     TCGv_i32 tmp = tcg_temp_new_i32();
2630     tcg_gen_ext16u_i32(var, var);
2631     tcg_gen_shli_i32(tmp, var, 16);
2632     tcg_gen_or_i32(var, var, tmp);
2633     tcg_temp_free_i32(tmp);
2634 }
2635
2636 static void gen_neon_dup_high16(TCGv_i32 var)
2637 {
2638     TCGv_i32 tmp = tcg_temp_new_i32();
2639     tcg_gen_andi_i32(var, var, 0xffff0000);
2640     tcg_gen_shri_i32(tmp, var, 16);
2641     tcg_gen_or_i32(var, var, tmp);
2642     tcg_temp_free_i32(tmp);
2643 }
2644
2645 static inline bool use_goto_tb(DisasContext *s, target_ulong dest)
2646 {
2647 #ifndef CONFIG_USER_ONLY
2648     return (s->base.tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK) ||
2649            ((s->base.pc_next - 1) & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
2650 #else
2651     return true;
2652 #endif
2653 }
2654
2655 static void gen_goto_ptr(void)
2656 {
2657     tcg_gen_lookup_and_goto_ptr();
2658 }
2659
2660 /* This will end the TB but doesn't guarantee we'll return to
2661  * cpu_loop_exec. Any live exit_requests will be processed as we
2662  * enter the next TB.
2663  */
2664 static void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
2665 {
2666     if (use_goto_tb(s, dest)) {
2667         tcg_gen_goto_tb(n);
2668         gen_set_pc_im(s, dest);
2669         tcg_gen_exit_tb(s->base.tb, n);
2670     } else {
2671         gen_set_pc_im(s, dest);
2672         gen_goto_ptr();
2673     }
2674     s->base.is_jmp = DISAS_NORETURN;
2675 }
2676
2677 static inline void gen_jmp (DisasContext *s, uint32_t dest)
2678 {
2679     if (unlikely(is_singlestepping(s))) {
2680         /* An indirect jump so that we still trigger the debug exception.  */
2681         gen_set_pc_im(s, dest);
2682         s->base.is_jmp = DISAS_JUMP;
2683     } else {
2684         gen_goto_tb(s, 0, dest);
2685     }
2686 }
2687
2688 static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
2689 {
2690     if (x)
2691         tcg_gen_sari_i32(t0, t0, 16);
2692     else
2693         gen_sxth(t0);
2694     if (y)
2695         tcg_gen_sari_i32(t1, t1, 16);
2696     else
2697         gen_sxth(t1);
2698     tcg_gen_mul_i32(t0, t0, t1);
2699 }
2700
2701 /* Return the mask of PSR bits set by a MSR instruction.  */
2702 static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
2703 {
2704     uint32_t mask = 0;
2705
2706     if (flags & (1 << 0)) {
2707         mask |= 0xff;
2708     }
2709     if (flags & (1 << 1)) {
2710         mask |= 0xff00;
2711     }
2712     if (flags & (1 << 2)) {
2713         mask |= 0xff0000;
2714     }
2715     if (flags & (1 << 3)) {
2716         mask |= 0xff000000;
2717     }
2718
2719     /* Mask out undefined and reserved bits.  */
2720     mask &= aarch32_cpsr_valid_mask(s->features, s->isar);
2721
2722     /* Mask out execution state.  */
2723     if (!spsr) {
2724         mask &= ~CPSR_EXEC;
2725     }
2726
2727     /* Mask out privileged bits.  */
2728     if (IS_USER(s)) {
2729         mask &= CPSR_USER;
2730     }
2731     return mask;
2732 }
2733
2734 /* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
2735 static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
2736 {
2737     TCGv_i32 tmp;
2738     if (spsr) {
2739         /* ??? This is also undefined in system mode.  */
2740         if (IS_USER(s))
2741             return 1;
2742
2743         tmp = load_cpu_field(spsr);
2744         tcg_gen_andi_i32(tmp, tmp, ~mask);
2745         tcg_gen_andi_i32(t0, t0, mask);
2746         tcg_gen_or_i32(tmp, tmp, t0);
2747         store_cpu_field(tmp, spsr);
2748     } else {
2749         gen_set_cpsr(t0, mask);
2750     }
2751     tcg_temp_free_i32(t0);
2752     gen_lookup_tb(s);
2753     return 0;
2754 }
2755
2756 /* Returns nonzero if access to the PSR is not permitted.  */
2757 static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
2758 {
2759     TCGv_i32 tmp;
2760     tmp = tcg_temp_new_i32();
2761     tcg_gen_movi_i32(tmp, val);
2762     return gen_set_psr(s, mask, spsr, tmp);
2763 }
2764
2765 static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
2766                                      int *tgtmode, int *regno)
2767 {
2768     /* Decode the r and sysm fields of MSR/MRS banked accesses into
2769      * the target mode and register number, and identify the various
2770      * unpredictable cases.
2771      * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
2772      *  + executed in user mode
2773      *  + using R15 as the src/dest register
2774      *  + accessing an unimplemented register
2775      *  + accessing a register that's inaccessible at current PL/security state*
2776      *  + accessing a register that you could access with a different insn
2777      * We choose to UNDEF in all these cases.
2778      * Since we don't know which of the various AArch32 modes we are in
2779      * we have to defer some checks to runtime.
2780      * Accesses to Monitor mode registers from Secure EL1 (which implies
2781      * that EL3 is AArch64) must trap to EL3.
2782      *
2783      * If the access checks fail this function will emit code to take
2784      * an exception and return false. Otherwise it will return true,
2785      * and set *tgtmode and *regno appropriately.
2786      */
2787     int exc_target = default_exception_el(s);
2788
2789     /* These instructions are present only in ARMv8, or in ARMv7 with the
2790      * Virtualization Extensions.
2791      */
2792     if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
2793         !arm_dc_feature(s, ARM_FEATURE_EL2)) {
2794         goto undef;
2795     }
2796
2797     if (IS_USER(s) || rn == 15) {
2798         goto undef;
2799     }
2800
2801     /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
2802      * of registers into (r, sysm).
2803      */
2804     if (r) {
2805         /* SPSRs for other modes */
2806         switch (sysm) {
2807         case 0xe: /* SPSR_fiq */
2808             *tgtmode = ARM_CPU_MODE_FIQ;
2809             break;
2810         case 0x10: /* SPSR_irq */
2811             *tgtmode = ARM_CPU_MODE_IRQ;
2812             break;
2813         case 0x12: /* SPSR_svc */
2814             *tgtmode = ARM_CPU_MODE_SVC;
2815             break;
2816         case 0x14: /* SPSR_abt */
2817             *tgtmode = ARM_CPU_MODE_ABT;
2818             break;
2819         case 0x16: /* SPSR_und */
2820             *tgtmode = ARM_CPU_MODE_UND;
2821             break;
2822         case 0x1c: /* SPSR_mon */
2823             *tgtmode = ARM_CPU_MODE_MON;
2824             break;
2825         case 0x1e: /* SPSR_hyp */
2826             *tgtmode = ARM_CPU_MODE_HYP;
2827             break;
2828         default: /* unallocated */
2829             goto undef;
2830         }
2831         /* We arbitrarily assign SPSR a register number of 16. */
2832         *regno = 16;
2833     } else {
2834         /* general purpose registers for other modes */
2835         switch (sysm) {
2836         case 0x0 ... 0x6:   /* 0b00xxx : r8_usr ... r14_usr */
2837             *tgtmode = ARM_CPU_MODE_USR;
2838             *regno = sysm + 8;
2839             break;
2840         case 0x8 ... 0xe:   /* 0b01xxx : r8_fiq ... r14_fiq */
2841             *tgtmode = ARM_CPU_MODE_FIQ;
2842             *regno = sysm;
2843             break;
2844         case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
2845             *tgtmode = ARM_CPU_MODE_IRQ;
2846             *regno = sysm & 1 ? 13 : 14;
2847             break;
2848         case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
2849             *tgtmode = ARM_CPU_MODE_SVC;
2850             *regno = sysm & 1 ? 13 : 14;
2851             break;
2852         case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
2853             *tgtmode = ARM_CPU_MODE_ABT;
2854             *regno = sysm & 1 ? 13 : 14;
2855             break;
2856         case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
2857             *tgtmode = ARM_CPU_MODE_UND;
2858             *regno = sysm & 1 ? 13 : 14;
2859             break;
2860         case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
2861             *tgtmode = ARM_CPU_MODE_MON;
2862             *regno = sysm & 1 ? 13 : 14;
2863             break;
2864         case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
2865             *tgtmode = ARM_CPU_MODE_HYP;
2866             /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
2867             *regno = sysm & 1 ? 13 : 17;
2868             break;
2869         default: /* unallocated */
2870             goto undef;
2871         }
2872     }
2873
2874     /* Catch the 'accessing inaccessible register' cases we can detect
2875      * at translate time.
2876      */
2877     switch (*tgtmode) {
2878     case ARM_CPU_MODE_MON:
2879         if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
2880             goto undef;
2881         }
2882         if (s->current_el == 1) {
2883             /* If we're in Secure EL1 (which implies that EL3 is AArch64)
2884              * then accesses to Mon registers trap to EL3
2885              */
2886             exc_target = 3;
2887             goto undef;
2888         }
2889         break;
2890     case ARM_CPU_MODE_HYP:
2891         /*
2892          * SPSR_hyp and r13_hyp can only be accessed from Monitor mode
2893          * (and so we can forbid accesses from EL2 or below). elr_hyp
2894          * can be accessed also from Hyp mode, so forbid accesses from
2895          * EL0 or EL1.
2896          */
2897         if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
2898             (s->current_el < 3 && *regno != 17)) {
2899             goto undef;
2900         }
2901         break;
2902     default:
2903         break;
2904     }
2905
2906     return true;
2907
2908 undef:
2909     /* If we get here then some access check did not pass */
2910     gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
2911                        syn_uncategorized(), exc_target);
2912     return false;
2913 }
2914
2915 static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
2916 {
2917     TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
2918     int tgtmode = 0, regno = 0;
2919
2920     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2921         return;
2922     }
2923
2924     /* Sync state because msr_banked() can raise exceptions */
2925     gen_set_condexec(s);
2926     gen_set_pc_im(s, s->pc_curr);
2927     tcg_reg = load_reg(s, rn);
2928     tcg_tgtmode = tcg_const_i32(tgtmode);
2929     tcg_regno = tcg_const_i32(regno);
2930     gen_helper_msr_banked(cpu_env, tcg_reg, tcg_tgtmode, tcg_regno);
2931     tcg_temp_free_i32(tcg_tgtmode);
2932     tcg_temp_free_i32(tcg_regno);
2933     tcg_temp_free_i32(tcg_reg);
2934     s->base.is_jmp = DISAS_UPDATE;
2935 }
2936
2937 static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
2938 {
2939     TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
2940     int tgtmode = 0, regno = 0;
2941
2942     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2943         return;
2944     }
2945
2946     /* Sync state because mrs_banked() can raise exceptions */
2947     gen_set_condexec(s);
2948     gen_set_pc_im(s, s->pc_curr);
2949     tcg_reg = tcg_temp_new_i32();
2950     tcg_tgtmode = tcg_const_i32(tgtmode);
2951     tcg_regno = tcg_const_i32(regno);
2952     gen_helper_mrs_banked(tcg_reg, cpu_env, tcg_tgtmode, tcg_regno);
2953     tcg_temp_free_i32(tcg_tgtmode);
2954     tcg_temp_free_i32(tcg_regno);
2955     store_reg(s, rn, tcg_reg);
2956     s->base.is_jmp = DISAS_UPDATE;
2957 }
2958
2959 /* Store value to PC as for an exception return (ie don't
2960  * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
2961  * will do the masking based on the new value of the Thumb bit.
2962  */
2963 static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
2964 {
2965     tcg_gen_mov_i32(cpu_R[15], pc);
2966     tcg_temp_free_i32(pc);
2967 }
2968
2969 /* Generate a v6 exception return.  Marks both values as dead.  */
2970 static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
2971 {
2972     store_pc_exc_ret(s, pc);
2973     /* The cpsr_write_eret helper will mask the low bits of PC
2974      * appropriately depending on the new Thumb bit, so it must
2975      * be called after storing the new PC.
2976      */
2977     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
2978         gen_io_start();
2979     }
2980     gen_helper_cpsr_write_eret(cpu_env, cpsr);
2981     tcg_temp_free_i32(cpsr);
2982     /* Must exit loop to check un-masked IRQs */
2983     s->base.is_jmp = DISAS_EXIT;
2984 }
2985
2986 /* Generate an old-style exception return. Marks pc as dead. */
2987 static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
2988 {
2989     gen_rfe(s, pc, load_cpu_field(spsr));
2990 }
2991
2992 #define CPU_V001 cpu_V0, cpu_V0, cpu_V1
2993
2994 static inline void gen_neon_add(int size, TCGv_i32 t0, TCGv_i32 t1)
2995 {
2996     switch (size) {
2997     case 0: gen_helper_neon_add_u8(t0, t0, t1); break;
2998     case 1: gen_helper_neon_add_u16(t0, t0, t1); break;
2999     case 2: tcg_gen_add_i32(t0, t0, t1); break;
3000     default: abort();
3001     }
3002 }
3003
3004 static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1)
3005 {
3006     switch (size) {
3007     case 0: gen_helper_neon_sub_u8(t0, t1, t0); break;
3008     case 1: gen_helper_neon_sub_u16(t0, t1, t0); break;
3009     case 2: tcg_gen_sub_i32(t0, t1, t0); break;
3010     default: return;
3011     }
3012 }
3013
3014 /* 32-bit pairwise ops end up the same as the elementwise versions.  */
3015 #define gen_helper_neon_pmax_s32  tcg_gen_smax_i32
3016 #define gen_helper_neon_pmax_u32  tcg_gen_umax_i32
3017 #define gen_helper_neon_pmin_s32  tcg_gen_smin_i32
3018 #define gen_helper_neon_pmin_u32  tcg_gen_umin_i32
3019
3020 #define GEN_NEON_INTEGER_OP_ENV(name) do { \
3021     switch ((size << 1) | u) { \
3022     case 0: \
3023         gen_helper_neon_##name##_s8(tmp, cpu_env, tmp, tmp2); \
3024         break; \
3025     case 1: \
3026         gen_helper_neon_##name##_u8(tmp, cpu_env, tmp, tmp2); \
3027         break; \
3028     case 2: \
3029         gen_helper_neon_##name##_s16(tmp, cpu_env, tmp, tmp2); \
3030         break; \
3031     case 3: \
3032         gen_helper_neon_##name##_u16(tmp, cpu_env, tmp, tmp2); \
3033         break; \
3034     case 4: \
3035         gen_helper_neon_##name##_s32(tmp, cpu_env, tmp, tmp2); \
3036         break; \
3037     case 5: \
3038         gen_helper_neon_##name##_u32(tmp, cpu_env, tmp, tmp2); \
3039         break; \
3040     default: return 1; \
3041     }} while (0)
3042
3043 #define GEN_NEON_INTEGER_OP(name) do { \
3044     switch ((size << 1) | u) { \
3045     case 0: \
3046         gen_helper_neon_##name##_s8(tmp, tmp, tmp2); \
3047         break; \
3048     case 1: \
3049         gen_helper_neon_##name##_u8(tmp, tmp, tmp2); \
3050         break; \
3051     case 2: \
3052         gen_helper_neon_##name##_s16(tmp, tmp, tmp2); \
3053         break; \
3054     case 3: \
3055         gen_helper_neon_##name##_u16(tmp, tmp, tmp2); \
3056         break; \
3057     case 4: \
3058         gen_helper_neon_##name##_s32(tmp, tmp, tmp2); \
3059         break; \
3060     case 5: \
3061         gen_helper_neon_##name##_u32(tmp, tmp, tmp2); \
3062         break; \
3063     default: return 1; \
3064     }} while (0)
3065
3066 static TCGv_i32 neon_load_scratch(int scratch)
3067 {
3068     TCGv_i32 tmp = tcg_temp_new_i32();
3069     tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
3070     return tmp;
3071 }
3072
3073 static void neon_store_scratch(int scratch, TCGv_i32 var)
3074 {
3075     tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
3076     tcg_temp_free_i32(var);
3077 }
3078
3079 static inline TCGv_i32 neon_get_scalar(int size, int reg)
3080 {
3081     TCGv_i32 tmp;
3082     if (size == 1) {
3083         tmp = neon_load_reg(reg & 7, reg >> 4);
3084         if (reg & 8) {
3085             gen_neon_dup_high16(tmp);
3086         } else {
3087             gen_neon_dup_low16(tmp);
3088         }
3089     } else {
3090         tmp = neon_load_reg(reg & 15, reg >> 4);
3091     }
3092     return tmp;
3093 }
3094
3095 static int gen_neon_unzip(int rd, int rm, int size, int q)
3096 {
3097     TCGv_ptr pd, pm;
3098
3099     if (!q && size == 2) {
3100         return 1;
3101     }
3102     pd = vfp_reg_ptr(true, rd);
3103     pm = vfp_reg_ptr(true, rm);
3104     if (q) {
3105         switch (size) {
3106         case 0:
3107             gen_helper_neon_qunzip8(pd, pm);
3108             break;
3109         case 1:
3110             gen_helper_neon_qunzip16(pd, pm);
3111             break;
3112         case 2:
3113             gen_helper_neon_qunzip32(pd, pm);
3114             break;
3115         default:
3116             abort();
3117         }
3118     } else {
3119         switch (size) {
3120         case 0:
3121             gen_helper_neon_unzip8(pd, pm);
3122             break;
3123         case 1:
3124             gen_helper_neon_unzip16(pd, pm);
3125             break;
3126         default:
3127             abort();
3128         }
3129     }
3130     tcg_temp_free_ptr(pd);
3131     tcg_temp_free_ptr(pm);
3132     return 0;
3133 }
3134
3135 static int gen_neon_zip(int rd, int rm, int size, int q)
3136 {
3137     TCGv_ptr pd, pm;
3138
3139     if (!q && size == 2) {
3140         return 1;
3141     }
3142     pd = vfp_reg_ptr(true, rd);
3143     pm = vfp_reg_ptr(true, rm);
3144     if (q) {
3145         switch (size) {
3146         case 0:
3147             gen_helper_neon_qzip8(pd, pm);
3148             break;
3149         case 1:
3150             gen_helper_neon_qzip16(pd, pm);
3151             break;
3152         case 2:
3153             gen_helper_neon_qzip32(pd, pm);
3154             break;
3155         default:
3156             abort();
3157         }
3158     } else {
3159         switch (size) {
3160         case 0:
3161             gen_helper_neon_zip8(pd, pm);
3162             break;
3163         case 1:
3164             gen_helper_neon_zip16(pd, pm);
3165             break;
3166         default:
3167             abort();
3168         }
3169     }
3170     tcg_temp_free_ptr(pd);
3171     tcg_temp_free_ptr(pm);
3172     return 0;
3173 }
3174
3175 static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
3176 {
3177     TCGv_i32 rd, tmp;
3178
3179     rd = tcg_temp_new_i32();
3180     tmp = tcg_temp_new_i32();
3181
3182     tcg_gen_shli_i32(rd, t0, 8);
3183     tcg_gen_andi_i32(rd, rd, 0xff00ff00);
3184     tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
3185     tcg_gen_or_i32(rd, rd, tmp);
3186
3187     tcg_gen_shri_i32(t1, t1, 8);
3188     tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
3189     tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
3190     tcg_gen_or_i32(t1, t1, tmp);
3191     tcg_gen_mov_i32(t0, rd);
3192
3193     tcg_temp_free_i32(tmp);
3194     tcg_temp_free_i32(rd);
3195 }
3196
3197 static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
3198 {
3199     TCGv_i32 rd, tmp;
3200
3201     rd = tcg_temp_new_i32();
3202     tmp = tcg_temp_new_i32();
3203
3204     tcg_gen_shli_i32(rd, t0, 16);
3205     tcg_gen_andi_i32(tmp, t1, 0xffff);
3206     tcg_gen_or_i32(rd, rd, tmp);
3207     tcg_gen_shri_i32(t1, t1, 16);
3208     tcg_gen_andi_i32(tmp, t0, 0xffff0000);
3209     tcg_gen_or_i32(t1, t1, tmp);
3210     tcg_gen_mov_i32(t0, rd);
3211
3212     tcg_temp_free_i32(tmp);
3213     tcg_temp_free_i32(rd);
3214 }
3215
3216 static inline void gen_neon_narrow(int size, TCGv_i32 dest, TCGv_i64 src)
3217 {
3218     switch (size) {
3219     case 0: gen_helper_neon_narrow_u8(dest, src); break;
3220     case 1: gen_helper_neon_narrow_u16(dest, src); break;
3221     case 2: tcg_gen_extrl_i64_i32(dest, src); break;
3222     default: abort();
3223     }
3224 }
3225
3226 static inline void gen_neon_narrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
3227 {
3228     switch (size) {
3229     case 0: gen_helper_neon_narrow_sat_s8(dest, cpu_env, src); break;
3230     case 1: gen_helper_neon_narrow_sat_s16(dest, cpu_env, src); break;
3231     case 2: gen_helper_neon_narrow_sat_s32(dest, cpu_env, src); break;
3232     default: abort();
3233     }
3234 }
3235
3236 static inline void gen_neon_narrow_satu(int size, TCGv_i32 dest, TCGv_i64 src)
3237 {
3238     switch (size) {
3239     case 0: gen_helper_neon_narrow_sat_u8(dest, cpu_env, src); break;
3240     case 1: gen_helper_neon_narrow_sat_u16(dest, cpu_env, src); break;
3241     case 2: gen_helper_neon_narrow_sat_u32(dest, cpu_env, src); break;
3242     default: abort();
3243     }
3244 }
3245
3246 static inline void gen_neon_unarrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
3247 {
3248     switch (size) {
3249     case 0: gen_helper_neon_unarrow_sat8(dest, cpu_env, src); break;
3250     case 1: gen_helper_neon_unarrow_sat16(dest, cpu_env, src); break;
3251     case 2: gen_helper_neon_unarrow_sat32(dest, cpu_env, src); break;
3252     default: abort();
3253     }
3254 }
3255
3256 static inline void gen_neon_shift_narrow(int size, TCGv_i32 var, TCGv_i32 shift,
3257                                          int q, int u)
3258 {
3259     if (q) {
3260         if (u) {
3261             switch (size) {
3262             case 1: gen_helper_neon_rshl_u16(var, var, shift); break;
3263             case 2: gen_helper_neon_rshl_u32(var, var, shift); break;
3264             default: abort();
3265             }
3266         } else {
3267             switch (size) {
3268             case 1: gen_helper_neon_rshl_s16(var, var, shift); break;
3269             case 2: gen_helper_neon_rshl_s32(var, var, shift); break;
3270             default: abort();
3271             }
3272         }
3273     } else {
3274         if (u) {
3275             switch (size) {
3276             case 1: gen_helper_neon_shl_u16(var, var, shift); break;
3277             case 2: gen_ushl_i32(var, var, shift); break;
3278             default: abort();
3279             }
3280         } else {
3281             switch (size) {
3282             case 1: gen_helper_neon_shl_s16(var, var, shift); break;
3283             case 2: gen_sshl_i32(var, var, shift); break;
3284             default: abort();
3285             }
3286         }
3287     }
3288 }
3289
3290 static inline void gen_neon_widen(TCGv_i64 dest, TCGv_i32 src, int size, int u)
3291 {
3292     if (u) {
3293         switch (size) {
3294         case 0: gen_helper_neon_widen_u8(dest, src); break;
3295         case 1: gen_helper_neon_widen_u16(dest, src); break;
3296         case 2: tcg_gen_extu_i32_i64(dest, src); break;
3297         default: abort();
3298         }
3299     } else {
3300         switch (size) {
3301         case 0: gen_helper_neon_widen_s8(dest, src); break;
3302         case 1: gen_helper_neon_widen_s16(dest, src); break;
3303         case 2: tcg_gen_ext_i32_i64(dest, src); break;
3304         default: abort();
3305         }
3306     }
3307     tcg_temp_free_i32(src);
3308 }
3309
3310 static inline void gen_neon_addl(int size)
3311 {
3312     switch (size) {
3313     case 0: gen_helper_neon_addl_u16(CPU_V001); break;
3314     case 1: gen_helper_neon_addl_u32(CPU_V001); break;
3315     case 2: tcg_gen_add_i64(CPU_V001); break;
3316     default: abort();
3317     }
3318 }
3319
3320 static inline void gen_neon_subl(int size)
3321 {
3322     switch (size) {
3323     case 0: gen_helper_neon_subl_u16(CPU_V001); break;
3324     case 1: gen_helper_neon_subl_u32(CPU_V001); break;
3325     case 2: tcg_gen_sub_i64(CPU_V001); break;
3326     default: abort();
3327     }
3328 }
3329
3330 static inline void gen_neon_negl(TCGv_i64 var, int size)
3331 {
3332     switch (size) {
3333     case 0: gen_helper_neon_negl_u16(var, var); break;
3334     case 1: gen_helper_neon_negl_u32(var, var); break;
3335     case 2:
3336         tcg_gen_neg_i64(var, var);
3337         break;
3338     default: abort();
3339     }
3340 }
3341
3342 static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size)
3343 {
3344     switch (size) {
3345     case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break;
3346     case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break;
3347     default: abort();
3348     }
3349 }
3350
3351 static inline void gen_neon_mull(TCGv_i64 dest, TCGv_i32 a, TCGv_i32 b,
3352                                  int size, int u)
3353 {
3354     TCGv_i64 tmp;
3355
3356     switch ((size << 1) | u) {
3357     case 0: gen_helper_neon_mull_s8(dest, a, b); break;
3358     case 1: gen_helper_neon_mull_u8(dest, a, b); break;
3359     case 2: gen_helper_neon_mull_s16(dest, a, b); break;
3360     case 3: gen_helper_neon_mull_u16(dest, a, b); break;
3361     case 4:
3362         tmp = gen_muls_i64_i32(a, b);
3363         tcg_gen_mov_i64(dest, tmp);
3364         tcg_temp_free_i64(tmp);
3365         break;
3366     case 5:
3367         tmp = gen_mulu_i64_i32(a, b);
3368         tcg_gen_mov_i64(dest, tmp);
3369         tcg_temp_free_i64(tmp);
3370         break;
3371     default: abort();
3372     }
3373
3374     /* gen_helper_neon_mull_[su]{8|16} do not free their parameters.
3375        Don't forget to clean them now.  */
3376     if (size < 2) {
3377         tcg_temp_free_i32(a);
3378         tcg_temp_free_i32(b);
3379     }
3380 }
3381
3382 static void gen_neon_narrow_op(int op, int u, int size,
3383                                TCGv_i32 dest, TCGv_i64 src)
3384 {
3385     if (op) {
3386         if (u) {
3387             gen_neon_unarrow_sats(size, dest, src);
3388         } else {
3389             gen_neon_narrow(size, dest, src);
3390         }
3391     } else {
3392         if (u) {
3393             gen_neon_narrow_satu(size, dest, src);
3394         } else {
3395             gen_neon_narrow_sats(size, dest, src);
3396         }
3397     }
3398 }
3399
3400 /* Symbolic constants for op fields for Neon 3-register same-length.
3401  * The values correspond to bits [11:8,4]; see the ARM ARM DDI0406B
3402  * table A7-9.
3403  */
3404 #define NEON_3R_VHADD 0
3405 #define NEON_3R_VQADD 1
3406 #define NEON_3R_VRHADD 2
3407 #define NEON_3R_LOGIC 3 /* VAND,VBIC,VORR,VMOV,VORN,VEOR,VBIF,VBIT,VBSL */
3408 #define NEON_3R_VHSUB 4
3409 #define NEON_3R_VQSUB 5
3410 #define NEON_3R_VCGT 6
3411 #define NEON_3R_VCGE 7
3412 #define NEON_3R_VSHL 8
3413 #define NEON_3R_VQSHL 9
3414 #define NEON_3R_VRSHL 10
3415 #define NEON_3R_VQRSHL 11
3416 #define NEON_3R_VMAX 12
3417 #define NEON_3R_VMIN 13
3418 #define NEON_3R_VABD 14
3419 #define NEON_3R_VABA 15
3420 #define NEON_3R_VADD_VSUB 16
3421 #define NEON_3R_VTST_VCEQ 17
3422 #define NEON_3R_VML 18 /* VMLA, VMLS */
3423 #define NEON_3R_VMUL 19
3424 #define NEON_3R_VPMAX 20
3425 #define NEON_3R_VPMIN 21
3426 #define NEON_3R_VQDMULH_VQRDMULH 22
3427 #define NEON_3R_VPADD_VQRDMLAH 23
3428 #define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */
3429 #define NEON_3R_VFM_VQRDMLSH 25 /* VFMA, VFMS, VQRDMLSH */
3430 #define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
3431 #define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
3432 #define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */
3433 #define NEON_3R_FLOAT_ACMP 29 /* float VACGE, VACGT, VACLE, VACLT */
3434 #define NEON_3R_FLOAT_MINMAX 30 /* float VMIN, VMAX */
3435 #define NEON_3R_FLOAT_MISC 31 /* float VRECPS, VRSQRTS, VMAXNM/MINNM */
3436
3437 static const uint8_t neon_3r_sizes[] = {
3438     [NEON_3R_VHADD] = 0x7,
3439     [NEON_3R_VQADD] = 0xf,
3440     [NEON_3R_VRHADD] = 0x7,
3441     [NEON_3R_LOGIC] = 0xf, /* size field encodes op type */
3442     [NEON_3R_VHSUB] = 0x7,
3443     [NEON_3R_VQSUB] = 0xf,
3444     [NEON_3R_VCGT] = 0x7,
3445     [NEON_3R_VCGE] = 0x7,
3446     [NEON_3R_VSHL] = 0xf,
3447     [NEON_3R_VQSHL] = 0xf,
3448     [NEON_3R_VRSHL] = 0xf,
3449     [NEON_3R_VQRSHL] = 0xf,
3450     [NEON_3R_VMAX] = 0x7,
3451     [NEON_3R_VMIN] = 0x7,
3452     [NEON_3R_VABD] = 0x7,
3453     [NEON_3R_VABA] = 0x7,
3454     [NEON_3R_VADD_VSUB] = 0xf,
3455     [NEON_3R_VTST_VCEQ] = 0x7,
3456     [NEON_3R_VML] = 0x7,
3457     [NEON_3R_VMUL] = 0x7,
3458     [NEON_3R_VPMAX] = 0x7,
3459     [NEON_3R_VPMIN] = 0x7,
3460     [NEON_3R_VQDMULH_VQRDMULH] = 0x6,
3461     [NEON_3R_VPADD_VQRDMLAH] = 0x7,
3462     [NEON_3R_SHA] = 0xf, /* size field encodes op type */
3463     [NEON_3R_VFM_VQRDMLSH] = 0x7, /* For VFM, size bit 1 encodes op */
3464     [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
3465     [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
3466     [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */
3467     [NEON_3R_FLOAT_ACMP] = 0x5, /* size bit 1 encodes op */
3468     [NEON_3R_FLOAT_MINMAX] = 0x5, /* size bit 1 encodes op */
3469     [NEON_3R_FLOAT_MISC] = 0x5, /* size bit 1 encodes op */
3470 };
3471
3472 /* Symbolic constants for op fields for Neon 2-register miscellaneous.
3473  * The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B
3474  * table A7-13.
3475  */
3476 #define NEON_2RM_VREV64 0
3477 #define NEON_2RM_VREV32 1
3478 #define NEON_2RM_VREV16 2
3479 #define NEON_2RM_VPADDL 4
3480 #define NEON_2RM_VPADDL_U 5
3481 #define NEON_2RM_AESE 6 /* Includes AESD */
3482 #define NEON_2RM_AESMC 7 /* Includes AESIMC */
3483 #define NEON_2RM_VCLS 8
3484 #define NEON_2RM_VCLZ 9
3485 #define NEON_2RM_VCNT 10
3486 #define NEON_2RM_VMVN 11
3487 #define NEON_2RM_VPADAL 12
3488 #define NEON_2RM_VPADAL_U 13
3489 #define NEON_2RM_VQABS 14
3490 #define NEON_2RM_VQNEG 15
3491 #define NEON_2RM_VCGT0 16
3492 #define NEON_2RM_VCGE0 17
3493 #define NEON_2RM_VCEQ0 18
3494 #define NEON_2RM_VCLE0 19
3495 #define NEON_2RM_VCLT0 20
3496 #define NEON_2RM_SHA1H 21
3497 #define NEON_2RM_VABS 22
3498 #define NEON_2RM_VNEG 23
3499 #define NEON_2RM_VCGT0_F 24
3500 #define NEON_2RM_VCGE0_F 25
3501 #define NEON_2RM_VCEQ0_F 26
3502 #define NEON_2RM_VCLE0_F 27
3503 #define NEON_2RM_VCLT0_F 28
3504 #define NEON_2RM_VABS_F 30
3505 #define NEON_2RM_VNEG_F 31
3506 #define NEON_2RM_VSWP 32
3507 #define NEON_2RM_VTRN 33
3508 #define NEON_2RM_VUZP 34
3509 #define NEON_2RM_VZIP 35
3510 #define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */
3511 #define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */
3512 #define NEON_2RM_VSHLL 38
3513 #define NEON_2RM_SHA1SU1 39 /* Includes SHA256SU0 */
3514 #define NEON_2RM_VRINTN 40
3515 #define NEON_2RM_VRINTX 41
3516 #define NEON_2RM_VRINTA 42
3517 #define NEON_2RM_VRINTZ 43
3518 #define NEON_2RM_VCVT_F16_F32 44
3519 #define NEON_2RM_VRINTM 45
3520 #define NEON_2RM_VCVT_F32_F16 46
3521 #define NEON_2RM_VRINTP 47
3522 #define NEON_2RM_VCVTAU 48
3523 #define NEON_2RM_VCVTAS 49
3524 #define NEON_2RM_VCVTNU 50
3525 #define NEON_2RM_VCVTNS 51
3526 #define NEON_2RM_VCVTPU 52
3527 #define NEON_2RM_VCVTPS 53
3528 #define NEON_2RM_VCVTMU 54
3529 #define NEON_2RM_VCVTMS 55
3530 #define NEON_2RM_VRECPE 56
3531 #define NEON_2RM_VRSQRTE 57
3532 #define NEON_2RM_VRECPE_F 58
3533 #define NEON_2RM_VRSQRTE_F 59
3534 #define NEON_2RM_VCVT_FS 60
3535 #define NEON_2RM_VCVT_FU 61
3536 #define NEON_2RM_VCVT_SF 62
3537 #define NEON_2RM_VCVT_UF 63
3538
3539 static bool neon_2rm_is_v8_op(int op)
3540 {
3541     /* Return true if this neon 2reg-misc op is ARMv8 and up */
3542     switch (op) {
3543     case NEON_2RM_VRINTN:
3544     case NEON_2RM_VRINTA:
3545     case NEON_2RM_VRINTM:
3546     case NEON_2RM_VRINTP:
3547     case NEON_2RM_VRINTZ:
3548     case NEON_2RM_VRINTX:
3549     case NEON_2RM_VCVTAU:
3550     case NEON_2RM_VCVTAS:
3551     case NEON_2RM_VCVTNU:
3552     case NEON_2RM_VCVTNS:
3553     case NEON_2RM_VCVTPU:
3554     case NEON_2RM_VCVTPS:
3555     case NEON_2RM_VCVTMU:
3556     case NEON_2RM_VCVTMS:
3557         return true;
3558     default:
3559         return false;
3560     }
3561 }
3562
3563 /* Each entry in this array has bit n set if the insn allows
3564  * size value n (otherwise it will UNDEF). Since unallocated
3565  * op values will have no bits set they always UNDEF.
3566  */
3567 static const uint8_t neon_2rm_sizes[] = {
3568     [NEON_2RM_VREV64] = 0x7,
3569     [NEON_2RM_VREV32] = 0x3,
3570     [NEON_2RM_VREV16] = 0x1,
3571     [NEON_2RM_VPADDL] = 0x7,
3572     [NEON_2RM_VPADDL_U] = 0x7,
3573     [NEON_2RM_AESE] = 0x1,
3574     [NEON_2RM_AESMC] = 0x1,
3575     [NEON_2RM_VCLS] = 0x7,
3576     [NEON_2RM_VCLZ] = 0x7,
3577     [NEON_2RM_VCNT] = 0x1,
3578     [NEON_2RM_VMVN] = 0x1,
3579     [NEON_2RM_VPADAL] = 0x7,
3580     [NEON_2RM_VPADAL_U] = 0x7,
3581     [NEON_2RM_VQABS] = 0x7,
3582     [NEON_2RM_VQNEG] = 0x7,
3583     [NEON_2RM_VCGT0] = 0x7,
3584     [NEON_2RM_VCGE0] = 0x7,
3585     [NEON_2RM_VCEQ0] = 0x7,
3586     [NEON_2RM_VCLE0] = 0x7,
3587     [NEON_2RM_VCLT0] = 0x7,
3588     [NEON_2RM_SHA1H] = 0x4,
3589     [NEON_2RM_VABS] = 0x7,
3590     [NEON_2RM_VNEG] = 0x7,
3591     [NEON_2RM_VCGT0_F] = 0x4,
3592     [NEON_2RM_VCGE0_F] = 0x4,
3593     [NEON_2RM_VCEQ0_F] = 0x4,
3594     [NEON_2RM_VCLE0_F] = 0x4,
3595     [NEON_2RM_VCLT0_F] = 0x4,
3596     [NEON_2RM_VABS_F] = 0x4,
3597     [NEON_2RM_VNEG_F] = 0x4,
3598     [NEON_2RM_VSWP] = 0x1,
3599     [NEON_2RM_VTRN] = 0x7,
3600     [NEON_2RM_VUZP] = 0x7,
3601     [NEON_2RM_VZIP] = 0x7,
3602     [NEON_2RM_VMOVN] = 0x7,
3603     [NEON_2RM_VQMOVN] = 0x7,
3604     [NEON_2RM_VSHLL] = 0x7,
3605     [NEON_2RM_SHA1SU1] = 0x4,
3606     [NEON_2RM_VRINTN] = 0x4,
3607     [NEON_2RM_VRINTX] = 0x4,
3608     [NEON_2RM_VRINTA] = 0x4,
3609     [NEON_2RM_VRINTZ] = 0x4,
3610     [NEON_2RM_VCVT_F16_F32] = 0x2,
3611     [NEON_2RM_VRINTM] = 0x4,
3612     [NEON_2RM_VCVT_F32_F16] = 0x2,
3613     [NEON_2RM_VRINTP] = 0x4,
3614     [NEON_2RM_VCVTAU] = 0x4,
3615     [NEON_2RM_VCVTAS] = 0x4,
3616     [NEON_2RM_VCVTNU] = 0x4,
3617     [NEON_2RM_VCVTNS] = 0x4,
3618     [NEON_2RM_VCVTPU] = 0x4,
3619     [NEON_2RM_VCVTPS] = 0x4,
3620     [NEON_2RM_VCVTMU] = 0x4,
3621     [NEON_2RM_VCVTMS] = 0x4,
3622     [NEON_2RM_VRECPE] = 0x4,
3623     [NEON_2RM_VRSQRTE] = 0x4,
3624     [NEON_2RM_VRECPE_F] = 0x4,
3625     [NEON_2RM_VRSQRTE_F] = 0x4,
3626     [NEON_2RM_VCVT_FS] = 0x4,
3627     [NEON_2RM_VCVT_FU] = 0x4,
3628     [NEON_2RM_VCVT_SF] = 0x4,
3629     [NEON_2RM_VCVT_UF] = 0x4,
3630 };
3631
3632
3633 /* Expand v8.1 simd helper.  */
3634 static int do_v81_helper(DisasContext *s, gen_helper_gvec_3_ptr *fn,
3635                          int q, int rd, int rn, int rm)
3636 {
3637     if (dc_isar_feature(aa32_rdm, s)) {
3638         int opr_sz = (1 + q) * 8;
3639         tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
3640                            vfp_reg_offset(1, rn),
3641                            vfp_reg_offset(1, rm), cpu_env,
3642                            opr_sz, opr_sz, 0, fn);
3643         return 0;
3644     }
3645     return 1;
3646 }
3647
3648 #define GEN_CMP0(NAME, COND)                                            \
3649     static void gen_##NAME##0_i32(TCGv_i32 d, TCGv_i32 a)               \
3650     {                                                                   \
3651         tcg_gen_setcondi_i32(COND, d, a, 0);                            \
3652         tcg_gen_neg_i32(d, d);                                          \
3653     }                                                                   \
3654     static void gen_##NAME##0_i64(TCGv_i64 d, TCGv_i64 a)               \
3655     {                                                                   \
3656         tcg_gen_setcondi_i64(COND, d, a, 0);                            \
3657         tcg_gen_neg_i64(d, d);                                          \
3658     }                                                                   \
3659     static void gen_##NAME##0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) \
3660     {                                                                   \
3661         TCGv_vec zero = tcg_const_zeros_vec_matching(d);                \
3662         tcg_gen_cmp_vec(COND, vece, d, a, zero);                        \
3663         tcg_temp_free_vec(zero);                                        \
3664     }                                                                   \
3665     void gen_gvec_##NAME##0(unsigned vece, uint32_t d, uint32_t m,      \
3666                             uint32_t opr_sz, uint32_t max_sz)           \
3667     {                                                                   \
3668         const GVecGen2 op[4] = {                                        \
3669             { .fno = gen_helper_gvec_##NAME##0_b,                       \
3670               .fniv = gen_##NAME##0_vec,                                \
3671               .opt_opc = vecop_list_cmp,                                \
3672               .vece = MO_8 },                                           \
3673             { .fno = gen_helper_gvec_##NAME##0_h,                       \
3674               .fniv = gen_##NAME##0_vec,                                \
3675               .opt_opc = vecop_list_cmp,                                \
3676               .vece = MO_16 },                                          \
3677             { .fni4 = gen_##NAME##0_i32,                                \
3678               .fniv = gen_##NAME##0_vec,                                \
3679               .opt_opc = vecop_list_cmp,                                \
3680               .vece = MO_32 },                                          \
3681             { .fni8 = gen_##NAME##0_i64,                                \
3682               .fniv = gen_##NAME##0_vec,                                \
3683               .opt_opc = vecop_list_cmp,                                \
3684               .prefer_i64 = TCG_TARGET_REG_BITS == 64,                  \
3685               .vece = MO_64 },                                          \
3686         };                                                              \
3687         tcg_gen_gvec_2(d, m, opr_sz, max_sz, &op[vece]);                \
3688     }
3689
3690 static const TCGOpcode vecop_list_cmp[] = {
3691     INDEX_op_cmp_vec, 0
3692 };
3693
3694 GEN_CMP0(ceq, TCG_COND_EQ)
3695 GEN_CMP0(cle, TCG_COND_LE)
3696 GEN_CMP0(cge, TCG_COND_GE)
3697 GEN_CMP0(clt, TCG_COND_LT)
3698 GEN_CMP0(cgt, TCG_COND_GT)
3699
3700 #undef GEN_CMP0
3701
3702 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3703 {
3704     tcg_gen_vec_sar8i_i64(a, a, shift);
3705     tcg_gen_vec_add8_i64(d, d, a);
3706 }
3707
3708 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3709 {
3710     tcg_gen_vec_sar16i_i64(a, a, shift);
3711     tcg_gen_vec_add16_i64(d, d, a);
3712 }
3713
3714 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3715 {
3716     tcg_gen_sari_i32(a, a, shift);
3717     tcg_gen_add_i32(d, d, a);
3718 }
3719
3720 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3721 {
3722     tcg_gen_sari_i64(a, a, shift);
3723     tcg_gen_add_i64(d, d, a);
3724 }
3725
3726 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3727 {
3728     tcg_gen_sari_vec(vece, a, a, sh);
3729     tcg_gen_add_vec(vece, d, d, a);
3730 }
3731
3732 void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3733                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3734 {
3735     static const TCGOpcode vecop_list[] = {
3736         INDEX_op_sari_vec, INDEX_op_add_vec, 0
3737     };
3738     static const GVecGen2i ops[4] = {
3739         { .fni8 = gen_ssra8_i64,
3740           .fniv = gen_ssra_vec,
3741           .fno = gen_helper_gvec_ssra_b,
3742           .load_dest = true,
3743           .opt_opc = vecop_list,
3744           .vece = MO_8 },
3745         { .fni8 = gen_ssra16_i64,
3746           .fniv = gen_ssra_vec,
3747           .fno = gen_helper_gvec_ssra_h,
3748           .load_dest = true,
3749           .opt_opc = vecop_list,
3750           .vece = MO_16 },
3751         { .fni4 = gen_ssra32_i32,
3752           .fniv = gen_ssra_vec,
3753           .fno = gen_helper_gvec_ssra_s,
3754           .load_dest = true,
3755           .opt_opc = vecop_list,
3756           .vece = MO_32 },
3757         { .fni8 = gen_ssra64_i64,
3758           .fniv = gen_ssra_vec,
3759           .fno = gen_helper_gvec_ssra_b,
3760           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3761           .opt_opc = vecop_list,
3762           .load_dest = true,
3763           .vece = MO_64 },
3764     };
3765
3766     /* tszimm encoding produces immediates in the range [1..esize]. */
3767     tcg_debug_assert(shift > 0);
3768     tcg_debug_assert(shift <= (8 << vece));
3769
3770     /*
3771      * Shifts larger than the element size are architecturally valid.
3772      * Signed results in all sign bits.
3773      */
3774     shift = MIN(shift, (8 << vece) - 1);
3775     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3776 }
3777
3778 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3779 {
3780     tcg_gen_vec_shr8i_i64(a, a, shift);
3781     tcg_gen_vec_add8_i64(d, d, a);
3782 }
3783
3784 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3785 {
3786     tcg_gen_vec_shr16i_i64(a, a, shift);
3787     tcg_gen_vec_add16_i64(d, d, a);
3788 }
3789
3790 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3791 {
3792     tcg_gen_shri_i32(a, a, shift);
3793     tcg_gen_add_i32(d, d, a);
3794 }
3795
3796 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3797 {
3798     tcg_gen_shri_i64(a, a, shift);
3799     tcg_gen_add_i64(d, d, a);
3800 }
3801
3802 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3803 {
3804     tcg_gen_shri_vec(vece, a, a, sh);
3805     tcg_gen_add_vec(vece, d, d, a);
3806 }
3807
3808 void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3809                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3810 {
3811     static const TCGOpcode vecop_list[] = {
3812         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3813     };
3814     static const GVecGen2i ops[4] = {
3815         { .fni8 = gen_usra8_i64,
3816           .fniv = gen_usra_vec,
3817           .fno = gen_helper_gvec_usra_b,
3818           .load_dest = true,
3819           .opt_opc = vecop_list,
3820           .vece = MO_8, },
3821         { .fni8 = gen_usra16_i64,
3822           .fniv = gen_usra_vec,
3823           .fno = gen_helper_gvec_usra_h,
3824           .load_dest = true,
3825           .opt_opc = vecop_list,
3826           .vece = MO_16, },
3827         { .fni4 = gen_usra32_i32,
3828           .fniv = gen_usra_vec,
3829           .fno = gen_helper_gvec_usra_s,
3830           .load_dest = true,
3831           .opt_opc = vecop_list,
3832           .vece = MO_32, },
3833         { .fni8 = gen_usra64_i64,
3834           .fniv = gen_usra_vec,
3835           .fno = gen_helper_gvec_usra_d,
3836           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3837           .load_dest = true,
3838           .opt_opc = vecop_list,
3839           .vece = MO_64, },
3840     };
3841
3842     /* tszimm encoding produces immediates in the range [1..esize]. */
3843     tcg_debug_assert(shift > 0);
3844     tcg_debug_assert(shift <= (8 << vece));
3845
3846     /*
3847      * Shifts larger than the element size are architecturally valid.
3848      * Unsigned results in all zeros as input to accumulate: nop.
3849      */
3850     if (shift < (8 << vece)) {
3851         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3852     } else {
3853         /* Nop, but we do need to clear the tail. */
3854         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3855     }
3856 }
3857
3858 /*
3859  * Shift one less than the requested amount, and the low bit is
3860  * the rounding bit.  For the 8 and 16-bit operations, because we
3861  * mask the low bit, we can perform a normal integer shift instead
3862  * of a vector shift.
3863  */
3864 static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3865 {
3866     TCGv_i64 t = tcg_temp_new_i64();
3867
3868     tcg_gen_shri_i64(t, a, sh - 1);
3869     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3870     tcg_gen_vec_sar8i_i64(d, a, sh);
3871     tcg_gen_vec_add8_i64(d, d, t);
3872     tcg_temp_free_i64(t);
3873 }
3874
3875 static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3876 {
3877     TCGv_i64 t = tcg_temp_new_i64();
3878
3879     tcg_gen_shri_i64(t, a, sh - 1);
3880     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3881     tcg_gen_vec_sar16i_i64(d, a, sh);
3882     tcg_gen_vec_add16_i64(d, d, t);
3883     tcg_temp_free_i64(t);
3884 }
3885
3886 static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3887 {
3888     TCGv_i32 t = tcg_temp_new_i32();
3889
3890     tcg_gen_extract_i32(t, a, sh - 1, 1);
3891     tcg_gen_sari_i32(d, a, sh);
3892     tcg_gen_add_i32(d, d, t);
3893     tcg_temp_free_i32(t);
3894 }
3895
3896 static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3897 {
3898     TCGv_i64 t = tcg_temp_new_i64();
3899
3900     tcg_gen_extract_i64(t, a, sh - 1, 1);
3901     tcg_gen_sari_i64(d, a, sh);
3902     tcg_gen_add_i64(d, d, t);
3903     tcg_temp_free_i64(t);
3904 }
3905
3906 static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3907 {
3908     TCGv_vec t = tcg_temp_new_vec_matching(d);
3909     TCGv_vec ones = tcg_temp_new_vec_matching(d);
3910
3911     tcg_gen_shri_vec(vece, t, a, sh - 1);
3912     tcg_gen_dupi_vec(vece, ones, 1);
3913     tcg_gen_and_vec(vece, t, t, ones);
3914     tcg_gen_sari_vec(vece, d, a, sh);
3915     tcg_gen_add_vec(vece, d, d, t);
3916
3917     tcg_temp_free_vec(t);
3918     tcg_temp_free_vec(ones);
3919 }
3920
3921 void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3922                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3923 {
3924     static const TCGOpcode vecop_list[] = {
3925         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3926     };
3927     static const GVecGen2i ops[4] = {
3928         { .fni8 = gen_srshr8_i64,
3929           .fniv = gen_srshr_vec,
3930           .fno = gen_helper_gvec_srshr_b,
3931           .opt_opc = vecop_list,
3932           .vece = MO_8 },
3933         { .fni8 = gen_srshr16_i64,
3934           .fniv = gen_srshr_vec,
3935           .fno = gen_helper_gvec_srshr_h,
3936           .opt_opc = vecop_list,
3937           .vece = MO_16 },
3938         { .fni4 = gen_srshr32_i32,
3939           .fniv = gen_srshr_vec,
3940           .fno = gen_helper_gvec_srshr_s,
3941           .opt_opc = vecop_list,
3942           .vece = MO_32 },
3943         { .fni8 = gen_srshr64_i64,
3944           .fniv = gen_srshr_vec,
3945           .fno = gen_helper_gvec_srshr_d,
3946           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3947           .opt_opc = vecop_list,
3948           .vece = MO_64 },
3949     };
3950
3951     /* tszimm encoding produces immediates in the range [1..esize] */
3952     tcg_debug_assert(shift > 0);
3953     tcg_debug_assert(shift <= (8 << vece));
3954
3955     if (shift == (8 << vece)) {
3956         /*
3957          * Shifts larger than the element size are architecturally valid.
3958          * Signed results in all sign bits.  With rounding, this produces
3959          *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3960          * I.e. always zero.
3961          */
3962         tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
3963     } else {
3964         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3965     }
3966 }
3967
3968 static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3969 {
3970     TCGv_i64 t = tcg_temp_new_i64();
3971
3972     gen_srshr8_i64(t, a, sh);
3973     tcg_gen_vec_add8_i64(d, d, t);
3974     tcg_temp_free_i64(t);
3975 }
3976
3977 static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3978 {
3979     TCGv_i64 t = tcg_temp_new_i64();
3980
3981     gen_srshr16_i64(t, a, sh);
3982     tcg_gen_vec_add16_i64(d, d, t);
3983     tcg_temp_free_i64(t);
3984 }
3985
3986 static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3987 {
3988     TCGv_i32 t = tcg_temp_new_i32();
3989
3990     gen_srshr32_i32(t, a, sh);
3991     tcg_gen_add_i32(d, d, t);
3992     tcg_temp_free_i32(t);
3993 }
3994
3995 static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3996 {
3997     TCGv_i64 t = tcg_temp_new_i64();
3998
3999     gen_srshr64_i64(t, a, sh);
4000     tcg_gen_add_i64(d, d, t);
4001     tcg_temp_free_i64(t);
4002 }
4003
4004 static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4005 {
4006     TCGv_vec t = tcg_temp_new_vec_matching(d);
4007
4008     gen_srshr_vec(vece, t, a, sh);
4009     tcg_gen_add_vec(vece, d, d, t);
4010     tcg_temp_free_vec(t);
4011 }
4012
4013 void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4014                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4015 {
4016     static const TCGOpcode vecop_list[] = {
4017         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
4018     };
4019     static const GVecGen2i ops[4] = {
4020         { .fni8 = gen_srsra8_i64,
4021           .fniv = gen_srsra_vec,
4022           .fno = gen_helper_gvec_srsra_b,
4023           .opt_opc = vecop_list,
4024           .load_dest = true,
4025           .vece = MO_8 },
4026         { .fni8 = gen_srsra16_i64,
4027           .fniv = gen_srsra_vec,
4028           .fno = gen_helper_gvec_srsra_h,
4029           .opt_opc = vecop_list,
4030           .load_dest = true,
4031           .vece = MO_16 },
4032         { .fni4 = gen_srsra32_i32,
4033           .fniv = gen_srsra_vec,
4034           .fno = gen_helper_gvec_srsra_s,
4035           .opt_opc = vecop_list,
4036           .load_dest = true,
4037           .vece = MO_32 },
4038         { .fni8 = gen_srsra64_i64,
4039           .fniv = gen_srsra_vec,
4040           .fno = gen_helper_gvec_srsra_d,
4041           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4042           .opt_opc = vecop_list,
4043           .load_dest = true,
4044           .vece = MO_64 },
4045     };
4046
4047     /* tszimm encoding produces immediates in the range [1..esize] */
4048     tcg_debug_assert(shift > 0);
4049     tcg_debug_assert(shift <= (8 << vece));
4050
4051     /*
4052      * Shifts larger than the element size are architecturally valid.
4053      * Signed results in all sign bits.  With rounding, this produces
4054      *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
4055      * I.e. always zero.  With accumulation, this leaves D unchanged.
4056      */
4057     if (shift == (8 << vece)) {
4058         /* Nop, but we do need to clear the tail. */
4059         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
4060     } else {
4061         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4062     }
4063 }
4064
4065 static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4066 {
4067     TCGv_i64 t = tcg_temp_new_i64();
4068
4069     tcg_gen_shri_i64(t, a, sh - 1);
4070     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
4071     tcg_gen_vec_shr8i_i64(d, a, sh);
4072     tcg_gen_vec_add8_i64(d, d, t);
4073     tcg_temp_free_i64(t);
4074 }
4075
4076 static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4077 {
4078     TCGv_i64 t = tcg_temp_new_i64();
4079
4080     tcg_gen_shri_i64(t, a, sh - 1);
4081     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
4082     tcg_gen_vec_shr16i_i64(d, a, sh);
4083     tcg_gen_vec_add16_i64(d, d, t);
4084     tcg_temp_free_i64(t);
4085 }
4086
4087 static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
4088 {
4089     TCGv_i32 t = tcg_temp_new_i32();
4090
4091     tcg_gen_extract_i32(t, a, sh - 1, 1);
4092     tcg_gen_shri_i32(d, a, sh);
4093     tcg_gen_add_i32(d, d, t);
4094     tcg_temp_free_i32(t);
4095 }
4096
4097 static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4098 {
4099     TCGv_i64 t = tcg_temp_new_i64();
4100
4101     tcg_gen_extract_i64(t, a, sh - 1, 1);
4102     tcg_gen_shri_i64(d, a, sh);
4103     tcg_gen_add_i64(d, d, t);
4104     tcg_temp_free_i64(t);
4105 }
4106
4107 static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
4108 {
4109     TCGv_vec t = tcg_temp_new_vec_matching(d);
4110     TCGv_vec ones = tcg_temp_new_vec_matching(d);
4111
4112     tcg_gen_shri_vec(vece, t, a, shift - 1);
4113     tcg_gen_dupi_vec(vece, ones, 1);
4114     tcg_gen_and_vec(vece, t, t, ones);
4115     tcg_gen_shri_vec(vece, d, a, shift);
4116     tcg_gen_add_vec(vece, d, d, t);
4117
4118     tcg_temp_free_vec(t);
4119     tcg_temp_free_vec(ones);
4120 }
4121
4122 void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4123                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4124 {
4125     static const TCGOpcode vecop_list[] = {
4126         INDEX_op_shri_vec, INDEX_op_add_vec, 0
4127     };
4128     static const GVecGen2i ops[4] = {
4129         { .fni8 = gen_urshr8_i64,
4130           .fniv = gen_urshr_vec,
4131           .fno = gen_helper_gvec_urshr_b,
4132           .opt_opc = vecop_list,
4133           .vece = MO_8 },
4134         { .fni8 = gen_urshr16_i64,
4135           .fniv = gen_urshr_vec,
4136           .fno = gen_helper_gvec_urshr_h,
4137           .opt_opc = vecop_list,
4138           .vece = MO_16 },
4139         { .fni4 = gen_urshr32_i32,
4140           .fniv = gen_urshr_vec,
4141           .fno = gen_helper_gvec_urshr_s,
4142           .opt_opc = vecop_list,
4143           .vece = MO_32 },
4144         { .fni8 = gen_urshr64_i64,
4145           .fniv = gen_urshr_vec,
4146           .fno = gen_helper_gvec_urshr_d,
4147           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4148           .opt_opc = vecop_list,
4149           .vece = MO_64 },
4150     };
4151
4152     /* tszimm encoding produces immediates in the range [1..esize] */
4153     tcg_debug_assert(shift > 0);
4154     tcg_debug_assert(shift <= (8 << vece));
4155
4156     if (shift == (8 << vece)) {
4157         /*
4158          * Shifts larger than the element size are architecturally valid.
4159          * Unsigned results in zero.  With rounding, this produces a
4160          * copy of the most significant bit.
4161          */
4162         tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
4163     } else {
4164         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4165     }
4166 }
4167
4168 static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4169 {
4170     TCGv_i64 t = tcg_temp_new_i64();
4171
4172     if (sh == 8) {
4173         tcg_gen_vec_shr8i_i64(t, a, 7);
4174     } else {
4175         gen_urshr8_i64(t, a, sh);
4176     }
4177     tcg_gen_vec_add8_i64(d, d, t);
4178     tcg_temp_free_i64(t);
4179 }
4180
4181 static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4182 {
4183     TCGv_i64 t = tcg_temp_new_i64();
4184
4185     if (sh == 16) {
4186         tcg_gen_vec_shr16i_i64(t, a, 15);
4187     } else {
4188         gen_urshr16_i64(t, a, sh);
4189     }
4190     tcg_gen_vec_add16_i64(d, d, t);
4191     tcg_temp_free_i64(t);
4192 }
4193
4194 static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
4195 {
4196     TCGv_i32 t = tcg_temp_new_i32();
4197
4198     if (sh == 32) {
4199         tcg_gen_shri_i32(t, a, 31);
4200     } else {
4201         gen_urshr32_i32(t, a, sh);
4202     }
4203     tcg_gen_add_i32(d, d, t);
4204     tcg_temp_free_i32(t);
4205 }
4206
4207 static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4208 {
4209     TCGv_i64 t = tcg_temp_new_i64();
4210
4211     if (sh == 64) {
4212         tcg_gen_shri_i64(t, a, 63);
4213     } else {
4214         gen_urshr64_i64(t, a, sh);
4215     }
4216     tcg_gen_add_i64(d, d, t);
4217     tcg_temp_free_i64(t);
4218 }
4219
4220 static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4221 {
4222     TCGv_vec t = tcg_temp_new_vec_matching(d);
4223
4224     if (sh == (8 << vece)) {
4225         tcg_gen_shri_vec(vece, t, a, sh - 1);
4226     } else {
4227         gen_urshr_vec(vece, t, a, sh);
4228     }
4229     tcg_gen_add_vec(vece, d, d, t);
4230     tcg_temp_free_vec(t);
4231 }
4232
4233 void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4234                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4235 {
4236     static const TCGOpcode vecop_list[] = {
4237         INDEX_op_shri_vec, INDEX_op_add_vec, 0
4238     };
4239     static const GVecGen2i ops[4] = {
4240         { .fni8 = gen_ursra8_i64,
4241           .fniv = gen_ursra_vec,
4242           .fno = gen_helper_gvec_ursra_b,
4243           .opt_opc = vecop_list,
4244           .load_dest = true,
4245           .vece = MO_8 },
4246         { .fni8 = gen_ursra16_i64,
4247           .fniv = gen_ursra_vec,
4248           .fno = gen_helper_gvec_ursra_h,
4249           .opt_opc = vecop_list,
4250           .load_dest = true,
4251           .vece = MO_16 },
4252         { .fni4 = gen_ursra32_i32,
4253           .fniv = gen_ursra_vec,
4254           .fno = gen_helper_gvec_ursra_s,
4255           .opt_opc = vecop_list,
4256           .load_dest = true,
4257           .vece = MO_32 },
4258         { .fni8 = gen_ursra64_i64,
4259           .fniv = gen_ursra_vec,
4260           .fno = gen_helper_gvec_ursra_d,
4261           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4262           .opt_opc = vecop_list,
4263           .load_dest = true,
4264           .vece = MO_64 },
4265     };
4266
4267     /* tszimm encoding produces immediates in the range [1..esize] */
4268     tcg_debug_assert(shift > 0);
4269     tcg_debug_assert(shift <= (8 << vece));
4270
4271     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4272 }
4273
4274 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4275 {
4276     uint64_t mask = dup_const(MO_8, 0xff >> shift);
4277     TCGv_i64 t = tcg_temp_new_i64();
4278
4279     tcg_gen_shri_i64(t, a, shift);
4280     tcg_gen_andi_i64(t, t, mask);
4281     tcg_gen_andi_i64(d, d, ~mask);
4282     tcg_gen_or_i64(d, d, t);
4283     tcg_temp_free_i64(t);
4284 }
4285
4286 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4287 {
4288     uint64_t mask = dup_const(MO_16, 0xffff >> shift);
4289     TCGv_i64 t = tcg_temp_new_i64();
4290
4291     tcg_gen_shri_i64(t, a, shift);
4292     tcg_gen_andi_i64(t, t, mask);
4293     tcg_gen_andi_i64(d, d, ~mask);
4294     tcg_gen_or_i64(d, d, t);
4295     tcg_temp_free_i64(t);
4296 }
4297
4298 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
4299 {
4300     tcg_gen_shri_i32(a, a, shift);
4301     tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
4302 }
4303
4304 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4305 {
4306     tcg_gen_shri_i64(a, a, shift);
4307     tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
4308 }
4309
4310 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4311 {
4312     TCGv_vec t = tcg_temp_new_vec_matching(d);
4313     TCGv_vec m = tcg_temp_new_vec_matching(d);
4314
4315     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
4316     tcg_gen_shri_vec(vece, t, a, sh);
4317     tcg_gen_and_vec(vece, d, d, m);
4318     tcg_gen_or_vec(vece, d, d, t);
4319
4320     tcg_temp_free_vec(t);
4321     tcg_temp_free_vec(m);
4322 }
4323
4324 void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4325                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4326 {
4327     static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
4328     const GVecGen2i ops[4] = {
4329         { .fni8 = gen_shr8_ins_i64,
4330           .fniv = gen_shr_ins_vec,
4331           .fno = gen_helper_gvec_sri_b,
4332           .load_dest = true,
4333           .opt_opc = vecop_list,
4334           .vece = MO_8 },
4335         { .fni8 = gen_shr16_ins_i64,
4336           .fniv = gen_shr_ins_vec,
4337           .fno = gen_helper_gvec_sri_h,
4338           .load_dest = true,
4339           .opt_opc = vecop_list,
4340           .vece = MO_16 },
4341         { .fni4 = gen_shr32_ins_i32,
4342           .fniv = gen_shr_ins_vec,
4343           .fno = gen_helper_gvec_sri_s,
4344           .load_dest = true,
4345           .opt_opc = vecop_list,
4346           .vece = MO_32 },
4347         { .fni8 = gen_shr64_ins_i64,
4348           .fniv = gen_shr_ins_vec,
4349           .fno = gen_helper_gvec_sri_d,
4350           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4351           .load_dest = true,
4352           .opt_opc = vecop_list,
4353           .vece = MO_64 },
4354     };
4355
4356     /* tszimm encoding produces immediates in the range [1..esize]. */
4357     tcg_debug_assert(shift > 0);
4358     tcg_debug_assert(shift <= (8 << vece));
4359
4360     /* Shift of esize leaves destination unchanged. */
4361     if (shift < (8 << vece)) {
4362         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4363     } else {
4364         /* Nop, but we do need to clear the tail. */
4365         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
4366     }
4367 }
4368
4369 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4370 {
4371     uint64_t mask = dup_const(MO_8, 0xff << shift);
4372     TCGv_i64 t = tcg_temp_new_i64();
4373
4374     tcg_gen_shli_i64(t, a, shift);
4375     tcg_gen_andi_i64(t, t, mask);
4376     tcg_gen_andi_i64(d, d, ~mask);
4377     tcg_gen_or_i64(d, d, t);
4378     tcg_temp_free_i64(t);
4379 }
4380
4381 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4382 {
4383     uint64_t mask = dup_const(MO_16, 0xffff << shift);
4384     TCGv_i64 t = tcg_temp_new_i64();
4385
4386     tcg_gen_shli_i64(t, a, shift);
4387     tcg_gen_andi_i64(t, t, mask);
4388     tcg_gen_andi_i64(d, d, ~mask);
4389     tcg_gen_or_i64(d, d, t);
4390     tcg_temp_free_i64(t);
4391 }
4392
4393 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
4394 {
4395     tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
4396 }
4397
4398 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4399 {
4400     tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
4401 }
4402
4403 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4404 {
4405     TCGv_vec t = tcg_temp_new_vec_matching(d);
4406     TCGv_vec m = tcg_temp_new_vec_matching(d);
4407
4408     tcg_gen_shli_vec(vece, t, a, sh);
4409     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
4410     tcg_gen_and_vec(vece, d, d, m);
4411     tcg_gen_or_vec(vece, d, d, t);
4412
4413     tcg_temp_free_vec(t);
4414     tcg_temp_free_vec(m);
4415 }
4416
4417 void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4418                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4419 {
4420     static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
4421     const GVecGen2i ops[4] = {
4422         { .fni8 = gen_shl8_ins_i64,
4423           .fniv = gen_shl_ins_vec,
4424           .fno = gen_helper_gvec_sli_b,
4425           .load_dest = true,
4426           .opt_opc = vecop_list,
4427           .vece = MO_8 },
4428         { .fni8 = gen_shl16_ins_i64,
4429           .fniv = gen_shl_ins_vec,
4430           .fno = gen_helper_gvec_sli_h,
4431           .load_dest = true,
4432           .opt_opc = vecop_list,
4433           .vece = MO_16 },
4434         { .fni4 = gen_shl32_ins_i32,
4435           .fniv = gen_shl_ins_vec,
4436           .fno = gen_helper_gvec_sli_s,
4437           .load_dest = true,
4438           .opt_opc = vecop_list,
4439           .vece = MO_32 },
4440         { .fni8 = gen_shl64_ins_i64,
4441           .fniv = gen_shl_ins_vec,
4442           .fno = gen_helper_gvec_sli_d,
4443           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4444           .load_dest = true,
4445           .opt_opc = vecop_list,
4446           .vece = MO_64 },
4447     };
4448
4449     /* tszimm encoding produces immediates in the range [0..esize-1]. */
4450     tcg_debug_assert(shift >= 0);
4451     tcg_debug_assert(shift < (8 << vece));
4452
4453     if (shift == 0) {
4454         tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
4455     } else {
4456         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4457     }
4458 }
4459
4460 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4461 {
4462     gen_helper_neon_mul_u8(a, a, b);
4463     gen_helper_neon_add_u8(d, d, a);
4464 }
4465
4466 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4467 {
4468     gen_helper_neon_mul_u8(a, a, b);
4469     gen_helper_neon_sub_u8(d, d, a);
4470 }
4471
4472 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4473 {
4474     gen_helper_neon_mul_u16(a, a, b);
4475     gen_helper_neon_add_u16(d, d, a);
4476 }
4477
4478 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4479 {
4480     gen_helper_neon_mul_u16(a, a, b);
4481     gen_helper_neon_sub_u16(d, d, a);
4482 }
4483
4484 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4485 {
4486     tcg_gen_mul_i32(a, a, b);
4487     tcg_gen_add_i32(d, d, a);
4488 }
4489
4490 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4491 {
4492     tcg_gen_mul_i32(a, a, b);
4493     tcg_gen_sub_i32(d, d, a);
4494 }
4495
4496 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4497 {
4498     tcg_gen_mul_i64(a, a, b);
4499     tcg_gen_add_i64(d, d, a);
4500 }
4501
4502 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4503 {
4504     tcg_gen_mul_i64(a, a, b);
4505     tcg_gen_sub_i64(d, d, a);
4506 }
4507
4508 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4509 {
4510     tcg_gen_mul_vec(vece, a, a, b);
4511     tcg_gen_add_vec(vece, d, d, a);
4512 }
4513
4514 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4515 {
4516     tcg_gen_mul_vec(vece, a, a, b);
4517     tcg_gen_sub_vec(vece, d, d, a);
4518 }
4519
4520 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
4521  * these tables are shared with AArch64 which does support them.
4522  */
4523 void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4524                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4525 {
4526     static const TCGOpcode vecop_list[] = {
4527         INDEX_op_mul_vec, INDEX_op_add_vec, 0
4528     };
4529     static const GVecGen3 ops[4] = {
4530         { .fni4 = gen_mla8_i32,
4531           .fniv = gen_mla_vec,
4532           .load_dest = true,
4533           .opt_opc = vecop_list,
4534           .vece = MO_8 },
4535         { .fni4 = gen_mla16_i32,
4536           .fniv = gen_mla_vec,
4537           .load_dest = true,
4538           .opt_opc = vecop_list,
4539           .vece = MO_16 },
4540         { .fni4 = gen_mla32_i32,
4541           .fniv = gen_mla_vec,
4542           .load_dest = true,
4543           .opt_opc = vecop_list,
4544           .vece = MO_32 },
4545         { .fni8 = gen_mla64_i64,
4546           .fniv = gen_mla_vec,
4547           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4548           .load_dest = true,
4549           .opt_opc = vecop_list,
4550           .vece = MO_64 },
4551     };
4552     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4553 }
4554
4555 void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4556                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4557 {
4558     static const TCGOpcode vecop_list[] = {
4559         INDEX_op_mul_vec, INDEX_op_sub_vec, 0
4560     };
4561     static const GVecGen3 ops[4] = {
4562         { .fni4 = gen_mls8_i32,
4563           .fniv = gen_mls_vec,
4564           .load_dest = true,
4565           .opt_opc = vecop_list,
4566           .vece = MO_8 },
4567         { .fni4 = gen_mls16_i32,
4568           .fniv = gen_mls_vec,
4569           .load_dest = true,
4570           .opt_opc = vecop_list,
4571           .vece = MO_16 },
4572         { .fni4 = gen_mls32_i32,
4573           .fniv = gen_mls_vec,
4574           .load_dest = true,
4575           .opt_opc = vecop_list,
4576           .vece = MO_32 },
4577         { .fni8 = gen_mls64_i64,
4578           .fniv = gen_mls_vec,
4579           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4580           .load_dest = true,
4581           .opt_opc = vecop_list,
4582           .vece = MO_64 },
4583     };
4584     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4585 }
4586
4587 /* CMTST : test is "if (X & Y != 0)". */
4588 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4589 {
4590     tcg_gen_and_i32(d, a, b);
4591     tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
4592     tcg_gen_neg_i32(d, d);
4593 }
4594
4595 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4596 {
4597     tcg_gen_and_i64(d, a, b);
4598     tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
4599     tcg_gen_neg_i64(d, d);
4600 }
4601
4602 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4603 {
4604     tcg_gen_and_vec(vece, d, a, b);
4605     tcg_gen_dupi_vec(vece, a, 0);
4606     tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
4607 }
4608
4609 void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4610                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4611 {
4612     static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
4613     static const GVecGen3 ops[4] = {
4614         { .fni4 = gen_helper_neon_tst_u8,
4615           .fniv = gen_cmtst_vec,
4616           .opt_opc = vecop_list,
4617           .vece = MO_8 },
4618         { .fni4 = gen_helper_neon_tst_u16,
4619           .fniv = gen_cmtst_vec,
4620           .opt_opc = vecop_list,
4621           .vece = MO_16 },
4622         { .fni4 = gen_cmtst_i32,
4623           .fniv = gen_cmtst_vec,
4624           .opt_opc = vecop_list,
4625           .vece = MO_32 },
4626         { .fni8 = gen_cmtst_i64,
4627           .fniv = gen_cmtst_vec,
4628           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4629           .opt_opc = vecop_list,
4630           .vece = MO_64 },
4631     };
4632     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4633 }
4634
4635 void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4636 {
4637     TCGv_i32 lval = tcg_temp_new_i32();
4638     TCGv_i32 rval = tcg_temp_new_i32();
4639     TCGv_i32 lsh = tcg_temp_new_i32();
4640     TCGv_i32 rsh = tcg_temp_new_i32();
4641     TCGv_i32 zero = tcg_const_i32(0);
4642     TCGv_i32 max = tcg_const_i32(32);
4643
4644     /*
4645      * Rely on the TCG guarantee that out of range shifts produce
4646      * unspecified results, not undefined behaviour (i.e. no trap).
4647      * Discard out-of-range results after the fact.
4648      */
4649     tcg_gen_ext8s_i32(lsh, shift);
4650     tcg_gen_neg_i32(rsh, lsh);
4651     tcg_gen_shl_i32(lval, src, lsh);
4652     tcg_gen_shr_i32(rval, src, rsh);
4653     tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
4654     tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
4655
4656     tcg_temp_free_i32(lval);
4657     tcg_temp_free_i32(rval);
4658     tcg_temp_free_i32(lsh);
4659     tcg_temp_free_i32(rsh);
4660     tcg_temp_free_i32(zero);
4661     tcg_temp_free_i32(max);
4662 }
4663
4664 void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4665 {
4666     TCGv_i64 lval = tcg_temp_new_i64();
4667     TCGv_i64 rval = tcg_temp_new_i64();
4668     TCGv_i64 lsh = tcg_temp_new_i64();
4669     TCGv_i64 rsh = tcg_temp_new_i64();
4670     TCGv_i64 zero = tcg_const_i64(0);
4671     TCGv_i64 max = tcg_const_i64(64);
4672
4673     /*
4674      * Rely on the TCG guarantee that out of range shifts produce
4675      * unspecified results, not undefined behaviour (i.e. no trap).
4676      * Discard out-of-range results after the fact.
4677      */
4678     tcg_gen_ext8s_i64(lsh, shift);
4679     tcg_gen_neg_i64(rsh, lsh);
4680     tcg_gen_shl_i64(lval, src, lsh);
4681     tcg_gen_shr_i64(rval, src, rsh);
4682     tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
4683     tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
4684
4685     tcg_temp_free_i64(lval);
4686     tcg_temp_free_i64(rval);
4687     tcg_temp_free_i64(lsh);
4688     tcg_temp_free_i64(rsh);
4689     tcg_temp_free_i64(zero);
4690     tcg_temp_free_i64(max);
4691 }
4692
4693 static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
4694                          TCGv_vec src, TCGv_vec shift)
4695 {
4696     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4697     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4698     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4699     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4700     TCGv_vec msk, max;
4701
4702     tcg_gen_neg_vec(vece, rsh, shift);
4703     if (vece == MO_8) {
4704         tcg_gen_mov_vec(lsh, shift);
4705     } else {
4706         msk = tcg_temp_new_vec_matching(dst);
4707         tcg_gen_dupi_vec(vece, msk, 0xff);
4708         tcg_gen_and_vec(vece, lsh, shift, msk);
4709         tcg_gen_and_vec(vece, rsh, rsh, msk);
4710         tcg_temp_free_vec(msk);
4711     }
4712
4713     /*
4714      * Rely on the TCG guarantee that out of range shifts produce
4715      * unspecified results, not undefined behaviour (i.e. no trap).
4716      * Discard out-of-range results after the fact.
4717      */
4718     tcg_gen_shlv_vec(vece, lval, src, lsh);
4719     tcg_gen_shrv_vec(vece, rval, src, rsh);
4720
4721     max = tcg_temp_new_vec_matching(dst);
4722     tcg_gen_dupi_vec(vece, max, 8 << vece);
4723
4724     /*
4725      * The choice of LT (signed) and GEU (unsigned) are biased toward
4726      * the instructions of the x86_64 host.  For MO_8, the whole byte
4727      * is significant so we must use an unsigned compare; otherwise we
4728      * have already masked to a byte and so a signed compare works.
4729      * Other tcg hosts have a full set of comparisons and do not care.
4730      */
4731     if (vece == MO_8) {
4732         tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
4733         tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
4734         tcg_gen_andc_vec(vece, lval, lval, lsh);
4735         tcg_gen_andc_vec(vece, rval, rval, rsh);
4736     } else {
4737         tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
4738         tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
4739         tcg_gen_and_vec(vece, lval, lval, lsh);
4740         tcg_gen_and_vec(vece, rval, rval, rsh);
4741     }
4742     tcg_gen_or_vec(vece, dst, lval, rval);
4743
4744     tcg_temp_free_vec(max);
4745     tcg_temp_free_vec(lval);
4746     tcg_temp_free_vec(rval);
4747     tcg_temp_free_vec(lsh);
4748     tcg_temp_free_vec(rsh);
4749 }
4750
4751 void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4752                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4753 {
4754     static const TCGOpcode vecop_list[] = {
4755         INDEX_op_neg_vec, INDEX_op_shlv_vec,
4756         INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
4757     };
4758     static const GVecGen3 ops[4] = {
4759         { .fniv = gen_ushl_vec,
4760           .fno = gen_helper_gvec_ushl_b,
4761           .opt_opc = vecop_list,
4762           .vece = MO_8 },
4763         { .fniv = gen_ushl_vec,
4764           .fno = gen_helper_gvec_ushl_h,
4765           .opt_opc = vecop_list,
4766           .vece = MO_16 },
4767         { .fni4 = gen_ushl_i32,
4768           .fniv = gen_ushl_vec,
4769           .opt_opc = vecop_list,
4770           .vece = MO_32 },
4771         { .fni8 = gen_ushl_i64,
4772           .fniv = gen_ushl_vec,
4773           .opt_opc = vecop_list,
4774           .vece = MO_64 },
4775     };
4776     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4777 }
4778
4779 void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4780 {
4781     TCGv_i32 lval = tcg_temp_new_i32();
4782     TCGv_i32 rval = tcg_temp_new_i32();
4783     TCGv_i32 lsh = tcg_temp_new_i32();
4784     TCGv_i32 rsh = tcg_temp_new_i32();
4785     TCGv_i32 zero = tcg_const_i32(0);
4786     TCGv_i32 max = tcg_const_i32(31);
4787
4788     /*
4789      * Rely on the TCG guarantee that out of range shifts produce
4790      * unspecified results, not undefined behaviour (i.e. no trap).
4791      * Discard out-of-range results after the fact.
4792      */
4793     tcg_gen_ext8s_i32(lsh, shift);
4794     tcg_gen_neg_i32(rsh, lsh);
4795     tcg_gen_shl_i32(lval, src, lsh);
4796     tcg_gen_umin_i32(rsh, rsh, max);
4797     tcg_gen_sar_i32(rval, src, rsh);
4798     tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
4799     tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
4800
4801     tcg_temp_free_i32(lval);
4802     tcg_temp_free_i32(rval);
4803     tcg_temp_free_i32(lsh);
4804     tcg_temp_free_i32(rsh);
4805     tcg_temp_free_i32(zero);
4806     tcg_temp_free_i32(max);
4807 }
4808
4809 void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4810 {
4811     TCGv_i64 lval = tcg_temp_new_i64();
4812     TCGv_i64 rval = tcg_temp_new_i64();
4813     TCGv_i64 lsh = tcg_temp_new_i64();
4814     TCGv_i64 rsh = tcg_temp_new_i64();
4815     TCGv_i64 zero = tcg_const_i64(0);
4816     TCGv_i64 max = tcg_const_i64(63);
4817
4818     /*
4819      * Rely on the TCG guarantee that out of range shifts produce
4820      * unspecified results, not undefined behaviour (i.e. no trap).
4821      * Discard out-of-range results after the fact.
4822      */
4823     tcg_gen_ext8s_i64(lsh, shift);
4824     tcg_gen_neg_i64(rsh, lsh);
4825     tcg_gen_shl_i64(lval, src, lsh);
4826     tcg_gen_umin_i64(rsh, rsh, max);
4827     tcg_gen_sar_i64(rval, src, rsh);
4828     tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
4829     tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
4830
4831     tcg_temp_free_i64(lval);
4832     tcg_temp_free_i64(rval);
4833     tcg_temp_free_i64(lsh);
4834     tcg_temp_free_i64(rsh);
4835     tcg_temp_free_i64(zero);
4836     tcg_temp_free_i64(max);
4837 }
4838
4839 static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
4840                          TCGv_vec src, TCGv_vec shift)
4841 {
4842     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4843     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4844     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4845     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4846     TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
4847
4848     /*
4849      * Rely on the TCG guarantee that out of range shifts produce
4850      * unspecified results, not undefined behaviour (i.e. no trap).
4851      * Discard out-of-range results after the fact.
4852      */
4853     tcg_gen_neg_vec(vece, rsh, shift);
4854     if (vece == MO_8) {
4855         tcg_gen_mov_vec(lsh, shift);
4856     } else {
4857         tcg_gen_dupi_vec(vece, tmp, 0xff);
4858         tcg_gen_and_vec(vece, lsh, shift, tmp);
4859         tcg_gen_and_vec(vece, rsh, rsh, tmp);
4860     }
4861
4862     /* Bound rsh so out of bound right shift gets -1.  */
4863     tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
4864     tcg_gen_umin_vec(vece, rsh, rsh, tmp);
4865     tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
4866
4867     tcg_gen_shlv_vec(vece, lval, src, lsh);
4868     tcg_gen_sarv_vec(vece, rval, src, rsh);
4869
4870     /* Select in-bound left shift.  */
4871     tcg_gen_andc_vec(vece, lval, lval, tmp);
4872
4873     /* Select between left and right shift.  */
4874     if (vece == MO_8) {
4875         tcg_gen_dupi_vec(vece, tmp, 0);
4876         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
4877     } else {
4878         tcg_gen_dupi_vec(vece, tmp, 0x80);
4879         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
4880     }
4881
4882     tcg_temp_free_vec(lval);
4883     tcg_temp_free_vec(rval);
4884     tcg_temp_free_vec(lsh);
4885     tcg_temp_free_vec(rsh);
4886     tcg_temp_free_vec(tmp);
4887 }
4888
4889 void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4890                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4891 {
4892     static const TCGOpcode vecop_list[] = {
4893         INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
4894         INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
4895     };
4896     static const GVecGen3 ops[4] = {
4897         { .fniv = gen_sshl_vec,
4898           .fno = gen_helper_gvec_sshl_b,
4899           .opt_opc = vecop_list,
4900           .vece = MO_8 },
4901         { .fniv = gen_sshl_vec,
4902           .fno = gen_helper_gvec_sshl_h,
4903           .opt_opc = vecop_list,
4904           .vece = MO_16 },
4905         { .fni4 = gen_sshl_i32,
4906           .fniv = gen_sshl_vec,
4907           .opt_opc = vecop_list,
4908           .vece = MO_32 },
4909         { .fni8 = gen_sshl_i64,
4910           .fniv = gen_sshl_vec,
4911           .opt_opc = vecop_list,
4912           .vece = MO_64 },
4913     };
4914     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4915 }
4916
4917 static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4918                           TCGv_vec a, TCGv_vec b)
4919 {
4920     TCGv_vec x = tcg_temp_new_vec_matching(t);
4921     tcg_gen_add_vec(vece, x, a, b);
4922     tcg_gen_usadd_vec(vece, t, a, b);
4923     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4924     tcg_gen_or_vec(vece, sat, sat, x);
4925     tcg_temp_free_vec(x);
4926 }
4927
4928 void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4929                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4930 {
4931     static const TCGOpcode vecop_list[] = {
4932         INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4933     };
4934     static const GVecGen4 ops[4] = {
4935         { .fniv = gen_uqadd_vec,
4936           .fno = gen_helper_gvec_uqadd_b,
4937           .write_aofs = true,
4938           .opt_opc = vecop_list,
4939           .vece = MO_8 },
4940         { .fniv = gen_uqadd_vec,
4941           .fno = gen_helper_gvec_uqadd_h,
4942           .write_aofs = true,
4943           .opt_opc = vecop_list,
4944           .vece = MO_16 },
4945         { .fniv = gen_uqadd_vec,
4946           .fno = gen_helper_gvec_uqadd_s,
4947           .write_aofs = true,
4948           .opt_opc = vecop_list,
4949           .vece = MO_32 },
4950         { .fniv = gen_uqadd_vec,
4951           .fno = gen_helper_gvec_uqadd_d,
4952           .write_aofs = true,
4953           .opt_opc = vecop_list,
4954           .vece = MO_64 },
4955     };
4956     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4957                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4958 }
4959
4960 static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4961                           TCGv_vec a, TCGv_vec b)
4962 {
4963     TCGv_vec x = tcg_temp_new_vec_matching(t);
4964     tcg_gen_add_vec(vece, x, a, b);
4965     tcg_gen_ssadd_vec(vece, t, a, b);
4966     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4967     tcg_gen_or_vec(vece, sat, sat, x);
4968     tcg_temp_free_vec(x);
4969 }
4970
4971 void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4972                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4973 {
4974     static const TCGOpcode vecop_list[] = {
4975         INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4976     };
4977     static const GVecGen4 ops[4] = {
4978         { .fniv = gen_sqadd_vec,
4979           .fno = gen_helper_gvec_sqadd_b,
4980           .opt_opc = vecop_list,
4981           .write_aofs = true,
4982           .vece = MO_8 },
4983         { .fniv = gen_sqadd_vec,
4984           .fno = gen_helper_gvec_sqadd_h,
4985           .opt_opc = vecop_list,
4986           .write_aofs = true,
4987           .vece = MO_16 },
4988         { .fniv = gen_sqadd_vec,
4989           .fno = gen_helper_gvec_sqadd_s,
4990           .opt_opc = vecop_list,
4991           .write_aofs = true,
4992           .vece = MO_32 },
4993         { .fniv = gen_sqadd_vec,
4994           .fno = gen_helper_gvec_sqadd_d,
4995           .opt_opc = vecop_list,
4996           .write_aofs = true,
4997           .vece = MO_64 },
4998     };
4999     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
5000                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
5001 }
5002
5003 static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
5004                           TCGv_vec a, TCGv_vec b)
5005 {
5006     TCGv_vec x = tcg_temp_new_vec_matching(t);
5007     tcg_gen_sub_vec(vece, x, a, b);
5008     tcg_gen_ussub_vec(vece, t, a, b);
5009     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
5010     tcg_gen_or_vec(vece, sat, sat, x);
5011     tcg_temp_free_vec(x);
5012 }
5013
5014 void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
5015                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
5016 {
5017     static const TCGOpcode vecop_list[] = {
5018         INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
5019     };
5020     static const GVecGen4 ops[4] = {
5021         { .fniv = gen_uqsub_vec,
5022           .fno = gen_helper_gvec_uqsub_b,
5023           .opt_opc = vecop_list,
5024           .write_aofs = true,
5025           .vece = MO_8 },
5026         { .fniv = gen_uqsub_vec,
5027           .fno = gen_helper_gvec_uqsub_h,
5028           .opt_opc = vecop_list,
5029           .write_aofs = true,
5030           .vece = MO_16 },
5031         { .fniv = gen_uqsub_vec,
5032           .fno = gen_helper_gvec_uqsub_s,
5033           .opt_opc = vecop_list,
5034           .write_aofs = true,
5035           .vece = MO_32 },
5036         { .fniv = gen_uqsub_vec,
5037           .fno = gen_helper_gvec_uqsub_d,
5038           .opt_opc = vecop_list,
5039           .write_aofs = true,
5040           .vece = MO_64 },
5041     };
5042     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
5043                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
5044 }
5045
5046 static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
5047                           TCGv_vec a, TCGv_vec b)
5048 {
5049     TCGv_vec x = tcg_temp_new_vec_matching(t);
5050     tcg_gen_sub_vec(vece, x, a, b);
5051     tcg_gen_sssub_vec(vece, t, a, b);
5052     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
5053     tcg_gen_or_vec(vece, sat, sat, x);
5054     tcg_temp_free_vec(x);
5055 }
5056
5057 void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
5058                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
5059 {
5060     static const TCGOpcode vecop_list[] = {
5061         INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
5062     };
5063     static const GVecGen4 ops[4] = {
5064         { .fniv = gen_sqsub_vec,
5065           .fno = gen_helper_gvec_sqsub_b,
5066           .opt_opc = vecop_list,
5067           .write_aofs = true,
5068           .vece = MO_8 },
5069         { .fniv = gen_sqsub_vec,
5070           .fno = gen_helper_gvec_sqsub_h,
5071           .opt_opc = vecop_list,
5072           .write_aofs = true,
5073           .vece = MO_16 },
5074         { .fniv = gen_sqsub_vec,
5075           .fno = gen_helper_gvec_sqsub_s,
5076           .opt_opc = vecop_list,
5077           .write_aofs = true,
5078           .vece = MO_32 },
5079         { .fniv = gen_sqsub_vec,
5080           .fno = gen_helper_gvec_sqsub_d,
5081           .opt_opc = vecop_list,
5082           .write_aofs = true,
5083           .vece = MO_64 },
5084     };
5085     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
5086                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
5087 }
5088
5089 /* Translate a NEON data processing instruction.  Return nonzero if the
5090    instruction is invalid.
5091    We process data in a mixture of 32-bit and 64-bit chunks.
5092    Mostly we use 32-bit chunks so we can use normal scalar instructions.  */
5093
5094 static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
5095 {
5096     int op;
5097     int q;
5098     int rd, rn, rm, rd_ofs, rn_ofs, rm_ofs;
5099     int size;
5100     int shift;
5101     int pass;
5102     int count;
5103     int pairwise;
5104     int u;
5105     int vec_size;
5106     uint32_t imm;
5107     TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5;
5108     TCGv_ptr ptr1, ptr2, ptr3;
5109     TCGv_i64 tmp64;
5110
5111     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
5112         return 1;
5113     }
5114
5115     /* FIXME: this access check should not take precedence over UNDEF
5116      * for invalid encodings; we will generate incorrect syndrome information
5117      * for attempts to execute invalid vfp/neon encodings with FP disabled.
5118      */
5119     if (s->fp_excp_el) {
5120         gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
5121                            syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
5122         return 0;
5123     }
5124
5125     if (!s->vfp_enabled)
5126       return 1;
5127     q = (insn & (1 << 6)) != 0;
5128     u = (insn >> 24) & 1;
5129     VFP_DREG_D(rd, insn);
5130     VFP_DREG_N(rn, insn);
5131     VFP_DREG_M(rm, insn);
5132     size = (insn >> 20) & 3;
5133     vec_size = q ? 16 : 8;
5134     rd_ofs = neon_reg_offset(rd, 0);
5135     rn_ofs = neon_reg_offset(rn, 0);
5136     rm_ofs = neon_reg_offset(rm, 0);
5137
5138     if ((insn & (1 << 23)) == 0) {
5139         /* Three register same length.  */
5140         op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1);
5141         /* Catch invalid op and bad size combinations: UNDEF */
5142         if ((neon_3r_sizes[op] & (1 << size)) == 0) {
5143             return 1;
5144         }
5145         /* All insns of this form UNDEF for either this condition or the
5146          * superset of cases "Q==1"; we catch the latter later.
5147          */
5148         if (q && ((rd | rn | rm) & 1)) {
5149             return 1;
5150         }
5151         switch (op) {
5152         case NEON_3R_SHA:
5153             /* The SHA-1/SHA-256 3-register instructions require special
5154              * treatment here, as their size field is overloaded as an
5155              * op type selector, and they all consume their input in a
5156              * single pass.
5157              */
5158             if (!q) {
5159                 return 1;
5160             }
5161             if (!u) { /* SHA-1 */
5162                 if (!dc_isar_feature(aa32_sha1, s)) {
5163                     return 1;
5164                 }
5165                 ptr1 = vfp_reg_ptr(true, rd);
5166                 ptr2 = vfp_reg_ptr(true, rn);
5167                 ptr3 = vfp_reg_ptr(true, rm);
5168                 tmp4 = tcg_const_i32(size);
5169                 gen_helper_crypto_sha1_3reg(ptr1, ptr2, ptr3, tmp4);
5170                 tcg_temp_free_i32(tmp4);
5171             } else { /* SHA-256 */
5172                 if (!dc_isar_feature(aa32_sha2, s) || size == 3) {
5173                     return 1;
5174                 }
5175                 ptr1 = vfp_reg_ptr(true, rd);
5176                 ptr2 = vfp_reg_ptr(true, rn);
5177                 ptr3 = vfp_reg_ptr(true, rm);
5178                 switch (size) {
5179                 case 0:
5180                     gen_helper_crypto_sha256h(ptr1, ptr2, ptr3);
5181                     break;
5182                 case 1:
5183                     gen_helper_crypto_sha256h2(ptr1, ptr2, ptr3);
5184                     break;
5185                 case 2:
5186                     gen_helper_crypto_sha256su1(ptr1, ptr2, ptr3);
5187                     break;
5188                 }
5189             }
5190             tcg_temp_free_ptr(ptr1);
5191             tcg_temp_free_ptr(ptr2);
5192             tcg_temp_free_ptr(ptr3);
5193             return 0;
5194
5195         case NEON_3R_VPADD_VQRDMLAH:
5196             if (!u) {
5197                 break;  /* VPADD */
5198             }
5199             /* VQRDMLAH */
5200             switch (size) {
5201             case 1:
5202                 return do_v81_helper(s, gen_helper_gvec_qrdmlah_s16,
5203                                      q, rd, rn, rm);
5204             case 2:
5205                 return do_v81_helper(s, gen_helper_gvec_qrdmlah_s32,
5206                                      q, rd, rn, rm);
5207             }
5208             return 1;
5209
5210         case NEON_3R_VFM_VQRDMLSH:
5211             if (!u) {
5212                 /* VFM, VFMS */
5213                 if (size == 1) {
5214                     return 1;
5215                 }
5216                 break;
5217             }
5218             /* VQRDMLSH */
5219             switch (size) {
5220             case 1:
5221                 return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s16,
5222                                      q, rd, rn, rm);
5223             case 2:
5224                 return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s32,
5225                                      q, rd, rn, rm);
5226             }
5227             return 1;
5228
5229         case NEON_3R_VADD_VSUB:
5230         case NEON_3R_LOGIC:
5231         case NEON_3R_VMAX:
5232         case NEON_3R_VMIN:
5233         case NEON_3R_VTST_VCEQ:
5234         case NEON_3R_VCGT:
5235         case NEON_3R_VCGE:
5236         case NEON_3R_VQADD:
5237         case NEON_3R_VQSUB:
5238         case NEON_3R_VMUL:
5239         case NEON_3R_VML:
5240         case NEON_3R_VSHL:
5241             /* Already handled by decodetree */
5242             return 1;
5243         }
5244
5245         if (size == 3) {
5246             /* 64-bit element instructions. */
5247             for (pass = 0; pass < (q ? 2 : 1); pass++) {
5248                 neon_load_reg64(cpu_V0, rn + pass);
5249                 neon_load_reg64(cpu_V1, rm + pass);
5250                 switch (op) {
5251                 case NEON_3R_VQSHL:
5252                     if (u) {
5253                         gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
5254                                                  cpu_V1, cpu_V0);
5255                     } else {
5256                         gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
5257                                                  cpu_V1, cpu_V0);
5258                     }
5259                     break;
5260                 case NEON_3R_VRSHL:
5261                     if (u) {
5262                         gen_helper_neon_rshl_u64(cpu_V0, cpu_V1, cpu_V0);
5263                     } else {
5264                         gen_helper_neon_rshl_s64(cpu_V0, cpu_V1, cpu_V0);
5265                     }
5266                     break;
5267                 case NEON_3R_VQRSHL:
5268                     if (u) {
5269                         gen_helper_neon_qrshl_u64(cpu_V0, cpu_env,
5270                                                   cpu_V1, cpu_V0);
5271                     } else {
5272                         gen_helper_neon_qrshl_s64(cpu_V0, cpu_env,
5273                                                   cpu_V1, cpu_V0);
5274                     }
5275                     break;
5276                 default:
5277                     abort();
5278                 }
5279                 neon_store_reg64(cpu_V0, rd + pass);
5280             }
5281             return 0;
5282         }
5283         pairwise = 0;
5284         switch (op) {
5285         case NEON_3R_VQSHL:
5286         case NEON_3R_VRSHL:
5287         case NEON_3R_VQRSHL:
5288             {
5289                 int rtmp;
5290                 /* Shift instruction operands are reversed.  */
5291                 rtmp = rn;
5292                 rn = rm;
5293                 rm = rtmp;
5294             }
5295             break;
5296         case NEON_3R_VPADD_VQRDMLAH:
5297         case NEON_3R_VPMAX:
5298         case NEON_3R_VPMIN:
5299             pairwise = 1;
5300             break;
5301         case NEON_3R_FLOAT_ARITH:
5302             pairwise = (u && size < 2); /* if VPADD (float) */
5303             break;
5304         case NEON_3R_FLOAT_MINMAX:
5305             pairwise = u; /* if VPMIN/VPMAX (float) */
5306             break;
5307         case NEON_3R_FLOAT_CMP:
5308             if (!u && size) {
5309                 /* no encoding for U=0 C=1x */
5310                 return 1;
5311             }
5312             break;
5313         case NEON_3R_FLOAT_ACMP:
5314             if (!u) {
5315                 return 1;
5316             }
5317             break;
5318         case NEON_3R_FLOAT_MISC:
5319             /* VMAXNM/VMINNM in ARMv8 */
5320             if (u && !arm_dc_feature(s, ARM_FEATURE_V8)) {
5321                 return 1;
5322             }
5323             break;
5324         case NEON_3R_VFM_VQRDMLSH:
5325             if (!dc_isar_feature(aa32_simdfmac, s)) {
5326                 return 1;
5327             }
5328             break;
5329         default:
5330             break;
5331         }
5332
5333         if (pairwise && q) {
5334             /* All the pairwise insns UNDEF if Q is set */
5335             return 1;
5336         }
5337
5338         for (pass = 0; pass < (q ? 4 : 2); pass++) {
5339
5340         if (pairwise) {
5341             /* Pairwise.  */
5342             if (pass < 1) {
5343                 tmp = neon_load_reg(rn, 0);
5344                 tmp2 = neon_load_reg(rn, 1);
5345             } else {
5346                 tmp = neon_load_reg(rm, 0);
5347                 tmp2 = neon_load_reg(rm, 1);
5348             }
5349         } else {
5350             /* Elementwise.  */
5351             tmp = neon_load_reg(rn, pass);
5352             tmp2 = neon_load_reg(rm, pass);
5353         }
5354         switch (op) {
5355         case NEON_3R_VHADD:
5356             GEN_NEON_INTEGER_OP(hadd);
5357             break;
5358         case NEON_3R_VRHADD:
5359             GEN_NEON_INTEGER_OP(rhadd);
5360             break;
5361         case NEON_3R_VHSUB:
5362             GEN_NEON_INTEGER_OP(hsub);
5363             break;
5364         case NEON_3R_VQSHL:
5365             GEN_NEON_INTEGER_OP_ENV(qshl);
5366             break;
5367         case NEON_3R_VRSHL:
5368             GEN_NEON_INTEGER_OP(rshl);
5369             break;
5370         case NEON_3R_VQRSHL:
5371             GEN_NEON_INTEGER_OP_ENV(qrshl);
5372             break;
5373         case NEON_3R_VABD:
5374             GEN_NEON_INTEGER_OP(abd);
5375             break;
5376         case NEON_3R_VABA:
5377             GEN_NEON_INTEGER_OP(abd);
5378             tcg_temp_free_i32(tmp2);
5379             tmp2 = neon_load_reg(rd, pass);
5380             gen_neon_add(size, tmp, tmp2);
5381             break;
5382         case NEON_3R_VPMAX:
5383             GEN_NEON_INTEGER_OP(pmax);
5384             break;
5385         case NEON_3R_VPMIN:
5386             GEN_NEON_INTEGER_OP(pmin);
5387             break;
5388         case NEON_3R_VQDMULH_VQRDMULH: /* Multiply high.  */
5389             if (!u) { /* VQDMULH */
5390                 switch (size) {
5391                 case 1:
5392                     gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
5393                     break;
5394                 case 2:
5395                     gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
5396                     break;
5397                 default: abort();
5398                 }
5399             } else { /* VQRDMULH */
5400                 switch (size) {
5401                 case 1:
5402                     gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
5403                     break;
5404                 case 2:
5405                     gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
5406                     break;
5407                 default: abort();
5408                 }
5409             }
5410             break;
5411         case NEON_3R_VPADD_VQRDMLAH:
5412             switch (size) {
5413             case 0: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break;
5414             case 1: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break;
5415             case 2: tcg_gen_add_i32(tmp, tmp, tmp2); break;
5416             default: abort();
5417             }
5418             break;
5419         case NEON_3R_FLOAT_ARITH: /* Floating point arithmetic. */
5420         {
5421             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5422             switch ((u << 2) | size) {
5423             case 0: /* VADD */
5424             case 4: /* VPADD */
5425                 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
5426                 break;
5427             case 2: /* VSUB */
5428                 gen_helper_vfp_subs(tmp, tmp, tmp2, fpstatus);
5429                 break;
5430             case 6: /* VABD */
5431                 gen_helper_neon_abd_f32(tmp, tmp, tmp2, fpstatus);
5432                 break;
5433             default:
5434                 abort();
5435             }
5436             tcg_temp_free_ptr(fpstatus);
5437             break;
5438         }
5439         case NEON_3R_FLOAT_MULTIPLY:
5440         {
5441             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5442             gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
5443             if (!u) {
5444                 tcg_temp_free_i32(tmp2);
5445                 tmp2 = neon_load_reg(rd, pass);
5446                 if (size == 0) {
5447                     gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
5448                 } else {
5449                     gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
5450                 }
5451             }
5452             tcg_temp_free_ptr(fpstatus);
5453             break;
5454         }
5455         case NEON_3R_FLOAT_CMP:
5456         {
5457             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5458             if (!u) {
5459                 gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
5460             } else {
5461                 if (size == 0) {
5462                     gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
5463                 } else {
5464                     gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
5465                 }
5466             }
5467             tcg_temp_free_ptr(fpstatus);
5468             break;
5469         }
5470         case NEON_3R_FLOAT_ACMP:
5471         {
5472             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5473             if (size == 0) {
5474                 gen_helper_neon_acge_f32(tmp, tmp, tmp2, fpstatus);
5475             } else {
5476                 gen_helper_neon_acgt_f32(tmp, tmp, tmp2, fpstatus);
5477             }
5478             tcg_temp_free_ptr(fpstatus);
5479             break;
5480         }
5481         case NEON_3R_FLOAT_MINMAX:
5482         {
5483             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5484             if (size == 0) {
5485                 gen_helper_vfp_maxs(tmp, tmp, tmp2, fpstatus);
5486             } else {
5487                 gen_helper_vfp_mins(tmp, tmp, tmp2, fpstatus);
5488             }
5489             tcg_temp_free_ptr(fpstatus);
5490             break;
5491         }
5492         case NEON_3R_FLOAT_MISC:
5493             if (u) {
5494                 /* VMAXNM/VMINNM */
5495                 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5496                 if (size == 0) {
5497                     gen_helper_vfp_maxnums(tmp, tmp, tmp2, fpstatus);
5498                 } else {
5499                     gen_helper_vfp_minnums(tmp, tmp, tmp2, fpstatus);
5500                 }
5501                 tcg_temp_free_ptr(fpstatus);
5502             } else {
5503                 if (size == 0) {
5504                     gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env);
5505                 } else {
5506                     gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env);
5507               }
5508             }
5509             break;
5510         case NEON_3R_VFM_VQRDMLSH:
5511         {
5512             /* VFMA, VFMS: fused multiply-add */
5513             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5514             TCGv_i32 tmp3 = neon_load_reg(rd, pass);
5515             if (size) {
5516                 /* VFMS */
5517                 gen_helper_vfp_negs(tmp, tmp);
5518             }
5519             gen_helper_vfp_muladds(tmp, tmp, tmp2, tmp3, fpstatus);
5520             tcg_temp_free_i32(tmp3);
5521             tcg_temp_free_ptr(fpstatus);
5522             break;
5523         }
5524         default:
5525             abort();
5526         }
5527         tcg_temp_free_i32(tmp2);
5528
5529         /* Save the result.  For elementwise operations we can put it
5530            straight into the destination register.  For pairwise operations
5531            we have to be careful to avoid clobbering the source operands.  */
5532         if (pairwise && rd == rm) {
5533             neon_store_scratch(pass, tmp);
5534         } else {
5535             neon_store_reg(rd, pass, tmp);
5536         }
5537
5538         } /* for pass */
5539         if (pairwise && rd == rm) {
5540             for (pass = 0; pass < (q ? 4 : 2); pass++) {
5541                 tmp = neon_load_scratch(pass);
5542                 neon_store_reg(rd, pass, tmp);
5543             }
5544         }
5545         /* End of 3 register same size operations.  */
5546     } else if (insn & (1 << 4)) {
5547         if ((insn & 0x00380080) != 0) {
5548             /* Two registers and shift.  */
5549             op = (insn >> 8) & 0xf;
5550             if (insn & (1 << 7)) {
5551                 /* 64-bit shift. */
5552                 if (op > 7) {
5553                     return 1;
5554                 }
5555                 size = 3;
5556             } else {
5557                 size = 2;
5558                 while ((insn & (1 << (size + 19))) == 0)
5559                     size--;
5560             }
5561             shift = (insn >> 16) & ((1 << (3 + size)) - 1);
5562             if (op < 8) {
5563                 /* Shift by immediate:
5564                    VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU.  */
5565                 if (q && ((rd | rm) & 1)) {
5566                     return 1;
5567                 }
5568                 if (!u && (op == 4 || op == 6)) {
5569                     return 1;
5570                 }
5571                 /* Right shifts are encoded as N - shift, where N is the
5572                    element size in bits.  */
5573                 if (op <= 4) {
5574                     shift = shift - (1 << (size + 3));
5575                 }
5576
5577                 switch (op) {
5578                 case 0:  /* VSHR */
5579                     /* Right shift comes here negative.  */
5580                     shift = -shift;
5581                     /* Shifts larger than the element size are architecturally
5582                      * valid.  Unsigned results in all zeros; signed results
5583                      * in all sign bits.
5584                      */
5585                     if (!u) {
5586                         tcg_gen_gvec_sari(size, rd_ofs, rm_ofs,
5587                                           MIN(shift, (8 << size) - 1),
5588                                           vec_size, vec_size);
5589                     } else if (shift >= 8 << size) {
5590                         tcg_gen_gvec_dup_imm(MO_8, rd_ofs, vec_size,
5591                                              vec_size, 0);
5592                     } else {
5593                         tcg_gen_gvec_shri(size, rd_ofs, rm_ofs, shift,
5594                                           vec_size, vec_size);
5595                     }
5596                     return 0;
5597
5598                 case 1:  /* VSRA */
5599                     /* Right shift comes here negative.  */
5600                     shift = -shift;
5601                     if (u) {
5602                         gen_gvec_usra(size, rd_ofs, rm_ofs, shift,
5603                                       vec_size, vec_size);
5604                     } else {
5605                         gen_gvec_ssra(size, rd_ofs, rm_ofs, shift,
5606                                       vec_size, vec_size);
5607                     }
5608                     return 0;
5609
5610                 case 2: /* VRSHR */
5611                     /* Right shift comes here negative.  */
5612                     shift = -shift;
5613                     if (u) {
5614                         gen_gvec_urshr(size, rd_ofs, rm_ofs, shift,
5615                                        vec_size, vec_size);
5616                     } else {
5617                         gen_gvec_srshr(size, rd_ofs, rm_ofs, shift,
5618                                        vec_size, vec_size);
5619                     }
5620                     return 0;
5621
5622                 case 3: /* VRSRA */
5623                     /* Right shift comes here negative.  */
5624                     shift = -shift;
5625                     if (u) {
5626                         gen_gvec_ursra(size, rd_ofs, rm_ofs, shift,
5627                                        vec_size, vec_size);
5628                     } else {
5629                         gen_gvec_srsra(size, rd_ofs, rm_ofs, shift,
5630                                        vec_size, vec_size);
5631                     }
5632                     return 0;
5633
5634                 case 4: /* VSRI */
5635                     if (!u) {
5636                         return 1;
5637                     }
5638                     /* Right shift comes here negative.  */
5639                     shift = -shift;
5640                     gen_gvec_sri(size, rd_ofs, rm_ofs, shift,
5641                                  vec_size, vec_size);
5642                     return 0;
5643
5644                 case 5: /* VSHL, VSLI */
5645                     if (u) { /* VSLI */
5646                         gen_gvec_sli(size, rd_ofs, rm_ofs, shift,
5647                                      vec_size, vec_size);
5648                     } else { /* VSHL */
5649                         tcg_gen_gvec_shli(size, rd_ofs, rm_ofs, shift,
5650                                           vec_size, vec_size);
5651                     }
5652                     return 0;
5653                 }
5654
5655                 if (size == 3) {
5656                     count = q + 1;
5657                 } else {
5658                     count = q ? 4: 2;
5659                 }
5660
5661                 /* To avoid excessive duplication of ops we implement shift
5662                  * by immediate using the variable shift operations.
5663                   */
5664                 imm = dup_const(size, shift);
5665
5666                 for (pass = 0; pass < count; pass++) {
5667                     if (size == 3) {
5668                         neon_load_reg64(cpu_V0, rm + pass);
5669                         tcg_gen_movi_i64(cpu_V1, imm);
5670                         switch (op) {
5671                         case 6: /* VQSHLU */
5672                             gen_helper_neon_qshlu_s64(cpu_V0, cpu_env,
5673                                                       cpu_V0, cpu_V1);
5674                             break;
5675                         case 7: /* VQSHL */
5676                             if (u) {
5677                                 gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
5678                                                          cpu_V0, cpu_V1);
5679                             } else {
5680                                 gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
5681                                                          cpu_V0, cpu_V1);
5682                             }
5683                             break;
5684                         default:
5685                             g_assert_not_reached();
5686                         }
5687                         neon_store_reg64(cpu_V0, rd + pass);
5688                     } else { /* size < 3 */
5689                         /* Operands in T0 and T1.  */
5690                         tmp = neon_load_reg(rm, pass);
5691                         tmp2 = tcg_temp_new_i32();
5692                         tcg_gen_movi_i32(tmp2, imm);
5693                         switch (op) {
5694                         case 6: /* VQSHLU */
5695                             switch (size) {
5696                             case 0:
5697                                 gen_helper_neon_qshlu_s8(tmp, cpu_env,
5698                                                          tmp, tmp2);
5699                                 break;
5700                             case 1:
5701                                 gen_helper_neon_qshlu_s16(tmp, cpu_env,
5702                                                           tmp, tmp2);
5703                                 break;
5704                             case 2:
5705                                 gen_helper_neon_qshlu_s32(tmp, cpu_env,
5706                                                           tmp, tmp2);
5707                                 break;
5708                             default:
5709                                 abort();
5710                             }
5711                             break;
5712                         case 7: /* VQSHL */
5713                             GEN_NEON_INTEGER_OP_ENV(qshl);
5714                             break;
5715                         default:
5716                             g_assert_not_reached();
5717                         }
5718                         tcg_temp_free_i32(tmp2);
5719                         neon_store_reg(rd, pass, tmp);
5720                     }
5721                 } /* for pass */
5722             } else if (op < 10) {
5723                 /* Shift by immediate and narrow:
5724                    VSHRN, VRSHRN, VQSHRN, VQRSHRN.  */
5725                 int input_unsigned = (op == 8) ? !u : u;
5726                 if (rm & 1) {
5727                     return 1;
5728                 }
5729                 shift = shift - (1 << (size + 3));
5730                 size++;
5731                 if (size == 3) {
5732                     tmp64 = tcg_const_i64(shift);
5733                     neon_load_reg64(cpu_V0, rm);
5734                     neon_load_reg64(cpu_V1, rm + 1);
5735                     for (pass = 0; pass < 2; pass++) {
5736                         TCGv_i64 in;
5737                         if (pass == 0) {
5738                             in = cpu_V0;
5739                         } else {
5740                             in = cpu_V1;
5741                         }
5742                         if (q) {
5743                             if (input_unsigned) {
5744                                 gen_helper_neon_rshl_u64(cpu_V0, in, tmp64);
5745                             } else {
5746                                 gen_helper_neon_rshl_s64(cpu_V0, in, tmp64);
5747                             }
5748                         } else {
5749                             if (input_unsigned) {
5750                                 gen_ushl_i64(cpu_V0, in, tmp64);
5751                             } else {
5752                                 gen_sshl_i64(cpu_V0, in, tmp64);
5753                             }
5754                         }
5755                         tmp = tcg_temp_new_i32();
5756                         gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
5757                         neon_store_reg(rd, pass, tmp);
5758                     } /* for pass */
5759                     tcg_temp_free_i64(tmp64);
5760                 } else {
5761                     if (size == 1) {
5762                         imm = (uint16_t)shift;
5763                         imm |= imm << 16;
5764                     } else {
5765                         /* size == 2 */
5766                         imm = (uint32_t)shift;
5767                     }
5768                     tmp2 = tcg_const_i32(imm);
5769                     tmp4 = neon_load_reg(rm + 1, 0);
5770                     tmp5 = neon_load_reg(rm + 1, 1);
5771                     for (pass = 0; pass < 2; pass++) {
5772                         if (pass == 0) {
5773                             tmp = neon_load_reg(rm, 0);
5774                         } else {
5775                             tmp = tmp4;
5776                         }
5777                         gen_neon_shift_narrow(size, tmp, tmp2, q,
5778                                               input_unsigned);
5779                         if (pass == 0) {
5780                             tmp3 = neon_load_reg(rm, 1);
5781                         } else {
5782                             tmp3 = tmp5;
5783                         }
5784                         gen_neon_shift_narrow(size, tmp3, tmp2, q,
5785                                               input_unsigned);
5786                         tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3);
5787                         tcg_temp_free_i32(tmp);
5788                         tcg_temp_free_i32(tmp3);
5789                         tmp = tcg_temp_new_i32();
5790                         gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
5791                         neon_store_reg(rd, pass, tmp);
5792                     } /* for pass */
5793                     tcg_temp_free_i32(tmp2);
5794                 }
5795             } else if (op == 10) {
5796                 /* VSHLL, VMOVL */
5797                 if (q || (rd & 1)) {
5798                     return 1;
5799                 }
5800                 tmp = neon_load_reg(rm, 0);
5801                 tmp2 = neon_load_reg(rm, 1);
5802                 for (pass = 0; pass < 2; pass++) {
5803                     if (pass == 1)
5804                         tmp = tmp2;
5805
5806                     gen_neon_widen(cpu_V0, tmp, size, u);
5807
5808                     if (shift != 0) {
5809                         /* The shift is less than the width of the source
5810                            type, so we can just shift the whole register.  */
5811                         tcg_gen_shli_i64(cpu_V0, cpu_V0, shift);
5812                         /* Widen the result of shift: we need to clear
5813                          * the potential overflow bits resulting from
5814                          * left bits of the narrow input appearing as
5815                          * right bits of left the neighbour narrow
5816                          * input.  */
5817                         if (size < 2 || !u) {
5818                             uint64_t imm64;
5819                             if (size == 0) {
5820                                 imm = (0xffu >> (8 - shift));
5821                                 imm |= imm << 16;
5822                             } else if (size == 1) {
5823                                 imm = 0xffff >> (16 - shift);
5824                             } else {
5825                                 /* size == 2 */
5826                                 imm = 0xffffffff >> (32 - shift);
5827                             }
5828                             if (size < 2) {
5829                                 imm64 = imm | (((uint64_t)imm) << 32);
5830                             } else {
5831                                 imm64 = imm;
5832                             }
5833                             tcg_gen_andi_i64(cpu_V0, cpu_V0, ~imm64);
5834                         }
5835                     }
5836                     neon_store_reg64(cpu_V0, rd + pass);
5837                 }
5838             } else if (op >= 14) {
5839                 /* VCVT fixed-point.  */
5840                 TCGv_ptr fpst;
5841                 TCGv_i32 shiftv;
5842                 VFPGenFixPointFn *fn;
5843
5844                 if (!(insn & (1 << 21)) || (q && ((rd | rm) & 1))) {
5845                     return 1;
5846                 }
5847
5848                 if (!(op & 1)) {
5849                     if (u) {
5850                         fn = gen_helper_vfp_ultos;
5851                     } else {
5852                         fn = gen_helper_vfp_sltos;
5853                     }
5854                 } else {
5855                     if (u) {
5856                         fn = gen_helper_vfp_touls_round_to_zero;
5857                     } else {
5858                         fn = gen_helper_vfp_tosls_round_to_zero;
5859                     }
5860                 }
5861
5862                 /* We have already masked out the must-be-1 top bit of imm6,
5863                  * hence this 32-shift where the ARM ARM has 64-imm6.
5864                  */
5865                 shift = 32 - shift;
5866                 fpst = get_fpstatus_ptr(1);
5867                 shiftv = tcg_const_i32(shift);
5868                 for (pass = 0; pass < (q ? 4 : 2); pass++) {
5869                     TCGv_i32 tmpf = neon_load_reg(rm, pass);
5870                     fn(tmpf, tmpf, shiftv, fpst);
5871                     neon_store_reg(rd, pass, tmpf);
5872                 }
5873                 tcg_temp_free_ptr(fpst);
5874                 tcg_temp_free_i32(shiftv);
5875             } else {
5876                 return 1;
5877             }
5878         } else { /* (insn & 0x00380080) == 0 */
5879             int invert, reg_ofs, vec_size;
5880
5881             if (q && (rd & 1)) {
5882                 return 1;
5883             }
5884
5885             op = (insn >> 8) & 0xf;
5886             /* One register and immediate.  */
5887             imm = (u << 7) | ((insn >> 12) & 0x70) | (insn & 0xf);
5888             invert = (insn & (1 << 5)) != 0;
5889             /* Note that op = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE.
5890              * We choose to not special-case this and will behave as if a
5891              * valid constant encoding of 0 had been given.
5892              */
5893             switch (op) {
5894             case 0: case 1:
5895                 /* no-op */
5896                 break;
5897             case 2: case 3:
5898                 imm <<= 8;
5899                 break;
5900             case 4: case 5:
5901                 imm <<= 16;
5902                 break;
5903             case 6: case 7:
5904                 imm <<= 24;
5905                 break;
5906             case 8: case 9:
5907                 imm |= imm << 16;
5908                 break;
5909             case 10: case 11:
5910                 imm = (imm << 8) | (imm << 24);
5911                 break;
5912             case 12:
5913                 imm = (imm << 8) | 0xff;
5914                 break;
5915             case 13:
5916                 imm = (imm << 16) | 0xffff;
5917                 break;
5918             case 14:
5919                 imm |= (imm << 8) | (imm << 16) | (imm << 24);
5920                 if (invert) {
5921                     imm = ~imm;
5922                 }
5923                 break;
5924             case 15:
5925                 if (invert) {
5926                     return 1;
5927                 }
5928                 imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
5929                       | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
5930                 break;
5931             }
5932             if (invert) {
5933                 imm = ~imm;
5934             }
5935
5936             reg_ofs = neon_reg_offset(rd, 0);
5937             vec_size = q ? 16 : 8;
5938
5939             if (op & 1 && op < 12) {
5940                 if (invert) {
5941                     /* The immediate value has already been inverted,
5942                      * so BIC becomes AND.
5943                      */
5944                     tcg_gen_gvec_andi(MO_32, reg_ofs, reg_ofs, imm,
5945                                       vec_size, vec_size);
5946                 } else {
5947                     tcg_gen_gvec_ori(MO_32, reg_ofs, reg_ofs, imm,
5948                                      vec_size, vec_size);
5949                 }
5950             } else {
5951                 /* VMOV, VMVN.  */
5952                 if (op == 14 && invert) {
5953                     TCGv_i64 t64 = tcg_temp_new_i64();
5954
5955                     for (pass = 0; pass <= q; ++pass) {
5956                         uint64_t val = 0;
5957                         int n;
5958
5959                         for (n = 0; n < 8; n++) {
5960                             if (imm & (1 << (n + pass * 8))) {
5961                                 val |= 0xffull << (n * 8);
5962                             }
5963                         }
5964                         tcg_gen_movi_i64(t64, val);
5965                         neon_store_reg64(t64, rd + pass);
5966                     }
5967                     tcg_temp_free_i64(t64);
5968                 } else {
5969                     tcg_gen_gvec_dup_imm(MO_32, reg_ofs, vec_size,
5970                                          vec_size, imm);
5971                 }
5972             }
5973         }
5974     } else { /* (insn & 0x00800010 == 0x00800000) */
5975         if (size != 3) {
5976             op = (insn >> 8) & 0xf;
5977             if ((insn & (1 << 6)) == 0) {
5978                 /* Three registers of different lengths.  */
5979                 int src1_wide;
5980                 int src2_wide;
5981                 int prewiden;
5982                 /* undefreq: bit 0 : UNDEF if size == 0
5983                  *           bit 1 : UNDEF if size == 1
5984                  *           bit 2 : UNDEF if size == 2
5985                  *           bit 3 : UNDEF if U == 1
5986                  * Note that [2:0] set implies 'always UNDEF'
5987                  */
5988                 int undefreq;
5989                 /* prewiden, src1_wide, src2_wide, undefreq */
5990                 static const int neon_3reg_wide[16][4] = {
5991                     {1, 0, 0, 0}, /* VADDL */
5992                     {1, 1, 0, 0}, /* VADDW */
5993                     {1, 0, 0, 0}, /* VSUBL */
5994                     {1, 1, 0, 0}, /* VSUBW */
5995                     {0, 1, 1, 0}, /* VADDHN */
5996                     {0, 0, 0, 0}, /* VABAL */
5997                     {0, 1, 1, 0}, /* VSUBHN */
5998                     {0, 0, 0, 0}, /* VABDL */
5999                     {0, 0, 0, 0}, /* VMLAL */
6000                     {0, 0, 0, 9}, /* VQDMLAL */
6001                     {0, 0, 0, 0}, /* VMLSL */
6002                     {0, 0, 0, 9}, /* VQDMLSL */
6003                     {0, 0, 0, 0}, /* Integer VMULL */
6004                     {0, 0, 0, 9}, /* VQDMULL */
6005                     {0, 0, 0, 0xa}, /* Polynomial VMULL */
6006                     {0, 0, 0, 7}, /* Reserved: always UNDEF */
6007                 };
6008
6009                 prewiden = neon_3reg_wide[op][0];
6010                 src1_wide = neon_3reg_wide[op][1];
6011                 src2_wide = neon_3reg_wide[op][2];
6012                 undefreq = neon_3reg_wide[op][3];
6013
6014                 if ((undefreq & (1 << size)) ||
6015                     ((undefreq & 8) && u)) {
6016                     return 1;
6017                 }
6018                 if ((src1_wide && (rn & 1)) ||
6019                     (src2_wide && (rm & 1)) ||
6020                     (!src2_wide && (rd & 1))) {
6021                     return 1;
6022                 }
6023
6024                 /* Handle polynomial VMULL in a single pass.  */
6025                 if (op == 14) {
6026                     if (size == 0) {
6027                         /* VMULL.P8 */
6028                         tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16,
6029                                            0, gen_helper_neon_pmull_h);
6030                     } else {
6031                         /* VMULL.P64 */
6032                         if (!dc_isar_feature(aa32_pmull, s)) {
6033                             return 1;
6034                         }
6035                         tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16,
6036                                            0, gen_helper_gvec_pmull_q);
6037                     }
6038                     return 0;
6039                 }
6040
6041                 /* Avoid overlapping operands.  Wide source operands are
6042                    always aligned so will never overlap with wide
6043                    destinations in problematic ways.  */
6044                 if (rd == rm && !src2_wide) {
6045                     tmp = neon_load_reg(rm, 1);
6046                     neon_store_scratch(2, tmp);
6047                 } else if (rd == rn && !src1_wide) {
6048                     tmp = neon_load_reg(rn, 1);
6049                     neon_store_scratch(2, tmp);
6050                 }
6051                 tmp3 = NULL;
6052                 for (pass = 0; pass < 2; pass++) {
6053                     if (src1_wide) {
6054                         neon_load_reg64(cpu_V0, rn + pass);
6055                         tmp = NULL;
6056                     } else {
6057                         if (pass == 1 && rd == rn) {
6058                             tmp = neon_load_scratch(2);
6059                         } else {
6060                             tmp = neon_load_reg(rn, pass);
6061                         }
6062                         if (prewiden) {
6063                             gen_neon_widen(cpu_V0, tmp, size, u);
6064                         }
6065                     }
6066                     if (src2_wide) {
6067                         neon_load_reg64(cpu_V1, rm + pass);
6068                         tmp2 = NULL;
6069                     } else {
6070                         if (pass == 1 && rd == rm) {
6071                             tmp2 = neon_load_scratch(2);
6072                         } else {
6073                             tmp2 = neon_load_reg(rm, pass);
6074                         }
6075                         if (prewiden) {
6076                             gen_neon_widen(cpu_V1, tmp2, size, u);
6077                         }
6078                     }
6079                     switch (op) {
6080                     case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */
6081                         gen_neon_addl(size);
6082                         break;
6083                     case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHN, VRSUBHN */
6084                         gen_neon_subl(size);
6085                         break;
6086                     case 5: case 7: /* VABAL, VABDL */
6087                         switch ((size << 1) | u) {
6088                         case 0:
6089                             gen_helper_neon_abdl_s16(cpu_V0, tmp, tmp2);
6090                             break;
6091                         case 1:
6092                             gen_helper_neon_abdl_u16(cpu_V0, tmp, tmp2);
6093                             break;
6094                         case 2:
6095                             gen_helper_neon_abdl_s32(cpu_V0, tmp, tmp2);
6096                             break;
6097                         case 3:
6098                             gen_helper_neon_abdl_u32(cpu_V0, tmp, tmp2);
6099                             break;
6100                         case 4:
6101                             gen_helper_neon_abdl_s64(cpu_V0, tmp, tmp2);
6102                             break;
6103                         case 5:
6104                             gen_helper_neon_abdl_u64(cpu_V0, tmp, tmp2);
6105                             break;
6106                         default: abort();
6107                         }
6108                         tcg_temp_free_i32(tmp2);
6109                         tcg_temp_free_i32(tmp);
6110                         break;
6111                     case 8: case 9: case 10: case 11: case 12: case 13:
6112                         /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
6113                         gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
6114                         break;
6115                     default: /* 15 is RESERVED: caught earlier  */
6116                         abort();
6117                     }
6118                     if (op == 13) {
6119                         /* VQDMULL */
6120                         gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6121                         neon_store_reg64(cpu_V0, rd + pass);
6122                     } else if (op == 5 || (op >= 8 && op <= 11)) {
6123                         /* Accumulate.  */
6124                         neon_load_reg64(cpu_V1, rd + pass);
6125                         switch (op) {
6126                         case 10: /* VMLSL */
6127                             gen_neon_negl(cpu_V0, size);
6128                             /* Fall through */
6129                         case 5: case 8: /* VABAL, VMLAL */
6130                             gen_neon_addl(size);
6131                             break;
6132                         case 9: case 11: /* VQDMLAL, VQDMLSL */
6133                             gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6134                             if (op == 11) {
6135                                 gen_neon_negl(cpu_V0, size);
6136                             }
6137                             gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
6138                             break;
6139                         default:
6140                             abort();
6141                         }
6142                         neon_store_reg64(cpu_V0, rd + pass);
6143                     } else if (op == 4 || op == 6) {
6144                         /* Narrowing operation.  */
6145                         tmp = tcg_temp_new_i32();
6146                         if (!u) {
6147                             switch (size) {
6148                             case 0:
6149                                 gen_helper_neon_narrow_high_u8(tmp, cpu_V0);
6150                                 break;
6151                             case 1:
6152                                 gen_helper_neon_narrow_high_u16(tmp, cpu_V0);
6153                                 break;
6154                             case 2:
6155                                 tcg_gen_extrh_i64_i32(tmp, cpu_V0);
6156                                 break;
6157                             default: abort();
6158                             }
6159                         } else {
6160                             switch (size) {
6161                             case 0:
6162                                 gen_helper_neon_narrow_round_high_u8(tmp, cpu_V0);
6163                                 break;
6164                             case 1:
6165                                 gen_helper_neon_narrow_round_high_u16(tmp, cpu_V0);
6166                                 break;
6167                             case 2:
6168                                 tcg_gen_addi_i64(cpu_V0, cpu_V0, 1u << 31);
6169                                 tcg_gen_extrh_i64_i32(tmp, cpu_V0);
6170                                 break;
6171                             default: abort();
6172                             }
6173                         }
6174                         if (pass == 0) {
6175                             tmp3 = tmp;
6176                         } else {
6177                             neon_store_reg(rd, 0, tmp3);
6178                             neon_store_reg(rd, 1, tmp);
6179                         }
6180                     } else {
6181                         /* Write back the result.  */
6182                         neon_store_reg64(cpu_V0, rd + pass);
6183                     }
6184                 }
6185             } else {
6186                 /* Two registers and a scalar. NB that for ops of this form
6187                  * the ARM ARM labels bit 24 as Q, but it is in our variable
6188                  * 'u', not 'q'.
6189                  */
6190                 if (size == 0) {
6191                     return 1;
6192                 }
6193                 switch (op) {
6194                 case 1: /* Float VMLA scalar */
6195                 case 5: /* Floating point VMLS scalar */
6196                 case 9: /* Floating point VMUL scalar */
6197                     if (size == 1) {
6198                         return 1;
6199                     }
6200                     /* fall through */
6201                 case 0: /* Integer VMLA scalar */
6202                 case 4: /* Integer VMLS scalar */
6203                 case 8: /* Integer VMUL scalar */
6204                 case 12: /* VQDMULH scalar */
6205                 case 13: /* VQRDMULH scalar */
6206                     if (u && ((rd | rn) & 1)) {
6207                         return 1;
6208                     }
6209                     tmp = neon_get_scalar(size, rm);
6210                     neon_store_scratch(0, tmp);
6211                     for (pass = 0; pass < (u ? 4 : 2); pass++) {
6212                         tmp = neon_load_scratch(0);
6213                         tmp2 = neon_load_reg(rn, pass);
6214                         if (op == 12) {
6215                             if (size == 1) {
6216                                 gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
6217                             } else {
6218                                 gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
6219                             }
6220                         } else if (op == 13) {
6221                             if (size == 1) {
6222                                 gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
6223                             } else {
6224                                 gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
6225                             }
6226                         } else if (op & 1) {
6227                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6228                             gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
6229                             tcg_temp_free_ptr(fpstatus);
6230                         } else {
6231                             switch (size) {
6232                             case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
6233                             case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
6234                             case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
6235                             default: abort();
6236                             }
6237                         }
6238                         tcg_temp_free_i32(tmp2);
6239                         if (op < 8) {
6240                             /* Accumulate.  */
6241                             tmp2 = neon_load_reg(rd, pass);
6242                             switch (op) {
6243                             case 0:
6244                                 gen_neon_add(size, tmp, tmp2);
6245                                 break;
6246                             case 1:
6247                             {
6248                                 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6249                                 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
6250                                 tcg_temp_free_ptr(fpstatus);
6251                                 break;
6252                             }
6253                             case 4:
6254                                 gen_neon_rsb(size, tmp, tmp2);
6255                                 break;
6256                             case 5:
6257                             {
6258                                 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6259                                 gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
6260                                 tcg_temp_free_ptr(fpstatus);
6261                                 break;
6262                             }
6263                             default:
6264                                 abort();
6265                             }
6266                             tcg_temp_free_i32(tmp2);
6267                         }
6268                         neon_store_reg(rd, pass, tmp);
6269                     }
6270                     break;
6271                 case 3: /* VQDMLAL scalar */
6272                 case 7: /* VQDMLSL scalar */
6273                 case 11: /* VQDMULL scalar */
6274                     if (u == 1) {
6275                         return 1;
6276                     }
6277                     /* fall through */
6278                 case 2: /* VMLAL sclar */
6279                 case 6: /* VMLSL scalar */
6280                 case 10: /* VMULL scalar */
6281                     if (rd & 1) {
6282                         return 1;
6283                     }
6284                     tmp2 = neon_get_scalar(size, rm);
6285                     /* We need a copy of tmp2 because gen_neon_mull
6286                      * deletes it during pass 0.  */
6287                     tmp4 = tcg_temp_new_i32();
6288                     tcg_gen_mov_i32(tmp4, tmp2);
6289                     tmp3 = neon_load_reg(rn, 1);
6290
6291                     for (pass = 0; pass < 2; pass++) {
6292                         if (pass == 0) {
6293                             tmp = neon_load_reg(rn, 0);
6294                         } else {
6295                             tmp = tmp3;
6296                             tmp2 = tmp4;
6297                         }
6298                         gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
6299                         if (op != 11) {
6300                             neon_load_reg64(cpu_V1, rd + pass);
6301                         }
6302                         switch (op) {
6303                         case 6:
6304                             gen_neon_negl(cpu_V0, size);
6305                             /* Fall through */
6306                         case 2:
6307                             gen_neon_addl(size);
6308                             break;
6309                         case 3: case 7:
6310                             gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6311                             if (op == 7) {
6312                                 gen_neon_negl(cpu_V0, size);
6313                             }
6314                             gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
6315                             break;
6316                         case 10:
6317                             /* no-op */
6318                             break;
6319                         case 11:
6320                             gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6321                             break;
6322                         default:
6323                             abort();
6324                         }
6325                         neon_store_reg64(cpu_V0, rd + pass);
6326                     }
6327                     break;
6328                 case 14: /* VQRDMLAH scalar */
6329                 case 15: /* VQRDMLSH scalar */
6330                     {
6331                         NeonGenThreeOpEnvFn *fn;
6332
6333                         if (!dc_isar_feature(aa32_rdm, s)) {
6334                             return 1;
6335                         }
6336                         if (u && ((rd | rn) & 1)) {
6337                             return 1;
6338                         }
6339                         if (op == 14) {
6340                             if (size == 1) {
6341                                 fn = gen_helper_neon_qrdmlah_s16;
6342                             } else {
6343                                 fn = gen_helper_neon_qrdmlah_s32;
6344                             }
6345                         } else {
6346                             if (size == 1) {
6347                                 fn = gen_helper_neon_qrdmlsh_s16;
6348                             } else {
6349                                 fn = gen_helper_neon_qrdmlsh_s32;
6350                             }
6351                         }
6352
6353                         tmp2 = neon_get_scalar(size, rm);
6354                         for (pass = 0; pass < (u ? 4 : 2); pass++) {
6355                             tmp = neon_load_reg(rn, pass);
6356                             tmp3 = neon_load_reg(rd, pass);
6357                             fn(tmp, cpu_env, tmp, tmp2, tmp3);
6358                             tcg_temp_free_i32(tmp3);
6359                             neon_store_reg(rd, pass, tmp);
6360                         }
6361                         tcg_temp_free_i32(tmp2);
6362                     }
6363                     break;
6364                 default:
6365                     g_assert_not_reached();
6366                 }
6367             }
6368         } else { /* size == 3 */
6369             if (!u) {
6370                 /* Extract.  */
6371                 imm = (insn >> 8) & 0xf;
6372
6373                 if (imm > 7 && !q)
6374                     return 1;
6375
6376                 if (q && ((rd | rn | rm) & 1)) {
6377                     return 1;
6378                 }
6379
6380                 if (imm == 0) {
6381                     neon_load_reg64(cpu_V0, rn);
6382                     if (q) {
6383                         neon_load_reg64(cpu_V1, rn + 1);
6384                     }
6385                 } else if (imm == 8) {
6386                     neon_load_reg64(cpu_V0, rn + 1);
6387                     if (q) {
6388                         neon_load_reg64(cpu_V1, rm);
6389                     }
6390                 } else if (q) {
6391                     tmp64 = tcg_temp_new_i64();
6392                     if (imm < 8) {
6393                         neon_load_reg64(cpu_V0, rn);
6394                         neon_load_reg64(tmp64, rn + 1);
6395                     } else {
6396                         neon_load_reg64(cpu_V0, rn + 1);
6397                         neon_load_reg64(tmp64, rm);
6398                     }
6399                     tcg_gen_shri_i64(cpu_V0, cpu_V0, (imm & 7) * 8);
6400                     tcg_gen_shli_i64(cpu_V1, tmp64, 64 - ((imm & 7) * 8));
6401                     tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
6402                     if (imm < 8) {
6403                         neon_load_reg64(cpu_V1, rm);
6404                     } else {
6405                         neon_load_reg64(cpu_V1, rm + 1);
6406                         imm -= 8;
6407                     }
6408                     tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
6409                     tcg_gen_shri_i64(tmp64, tmp64, imm * 8);
6410                     tcg_gen_or_i64(cpu_V1, cpu_V1, tmp64);
6411                     tcg_temp_free_i64(tmp64);
6412                 } else {
6413                     /* BUGFIX */
6414                     neon_load_reg64(cpu_V0, rn);
6415                     tcg_gen_shri_i64(cpu_V0, cpu_V0, imm * 8);
6416                     neon_load_reg64(cpu_V1, rm);
6417                     tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
6418                     tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
6419                 }
6420                 neon_store_reg64(cpu_V0, rd);
6421                 if (q) {
6422                     neon_store_reg64(cpu_V1, rd + 1);
6423                 }
6424             } else if ((insn & (1 << 11)) == 0) {
6425                 /* Two register misc.  */
6426                 op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf);
6427                 size = (insn >> 18) & 3;
6428                 /* UNDEF for unknown op values and bad op-size combinations */
6429                 if ((neon_2rm_sizes[op] & (1 << size)) == 0) {
6430                     return 1;
6431                 }
6432                 if (neon_2rm_is_v8_op(op) &&
6433                     !arm_dc_feature(s, ARM_FEATURE_V8)) {
6434                     return 1;
6435                 }
6436                 if ((op != NEON_2RM_VMOVN && op != NEON_2RM_VQMOVN) &&
6437                     q && ((rm | rd) & 1)) {
6438                     return 1;
6439                 }
6440                 switch (op) {
6441                 case NEON_2RM_VREV64:
6442                     for (pass = 0; pass < (q ? 2 : 1); pass++) {
6443                         tmp = neon_load_reg(rm, pass * 2);
6444                         tmp2 = neon_load_reg(rm, pass * 2 + 1);
6445                         switch (size) {
6446                         case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
6447                         case 1: gen_swap_half(tmp); break;
6448                         case 2: /* no-op */ break;
6449                         default: abort();
6450                         }
6451                         neon_store_reg(rd, pass * 2 + 1, tmp);
6452                         if (size == 2) {
6453                             neon_store_reg(rd, pass * 2, tmp2);
6454                         } else {
6455                             switch (size) {
6456                             case 0: tcg_gen_bswap32_i32(tmp2, tmp2); break;
6457                             case 1: gen_swap_half(tmp2); break;
6458                             default: abort();
6459                             }
6460                             neon_store_reg(rd, pass * 2, tmp2);
6461                         }
6462                     }
6463                     break;
6464                 case NEON_2RM_VPADDL: case NEON_2RM_VPADDL_U:
6465                 case NEON_2RM_VPADAL: case NEON_2RM_VPADAL_U:
6466                     for (pass = 0; pass < q + 1; pass++) {
6467                         tmp = neon_load_reg(rm, pass * 2);
6468                         gen_neon_widen(cpu_V0, tmp, size, op & 1);
6469                         tmp = neon_load_reg(rm, pass * 2 + 1);
6470                         gen_neon_widen(cpu_V1, tmp, size, op & 1);
6471                         switch (size) {
6472                         case 0: gen_helper_neon_paddl_u16(CPU_V001); break;
6473                         case 1: gen_helper_neon_paddl_u32(CPU_V001); break;
6474                         case 2: tcg_gen_add_i64(CPU_V001); break;
6475                         default: abort();
6476                         }
6477                         if (op >= NEON_2RM_VPADAL) {
6478                             /* Accumulate.  */
6479                             neon_load_reg64(cpu_V1, rd + pass);
6480                             gen_neon_addl(size);
6481                         }
6482                         neon_store_reg64(cpu_V0, rd + pass);
6483                     }
6484                     break;
6485                 case NEON_2RM_VTRN:
6486                     if (size == 2) {
6487                         int n;
6488                         for (n = 0; n < (q ? 4 : 2); n += 2) {
6489                             tmp = neon_load_reg(rm, n);
6490                             tmp2 = neon_load_reg(rd, n + 1);
6491                             neon_store_reg(rm, n, tmp2);
6492                             neon_store_reg(rd, n + 1, tmp);
6493                         }
6494                     } else {
6495                         goto elementwise;
6496                     }
6497                     break;
6498                 case NEON_2RM_VUZP:
6499                     if (gen_neon_unzip(rd, rm, size, q)) {
6500                         return 1;
6501                     }
6502                     break;
6503                 case NEON_2RM_VZIP:
6504                     if (gen_neon_zip(rd, rm, size, q)) {
6505                         return 1;
6506                     }
6507                     break;
6508                 case NEON_2RM_VMOVN: case NEON_2RM_VQMOVN:
6509                     /* also VQMOVUN; op field and mnemonics don't line up */
6510                     if (rm & 1) {
6511                         return 1;
6512                     }
6513                     tmp2 = NULL;
6514                     for (pass = 0; pass < 2; pass++) {
6515                         neon_load_reg64(cpu_V0, rm + pass);
6516                         tmp = tcg_temp_new_i32();
6517                         gen_neon_narrow_op(op == NEON_2RM_VMOVN, q, size,
6518                                            tmp, cpu_V0);
6519                         if (pass == 0) {
6520                             tmp2 = tmp;
6521                         } else {
6522                             neon_store_reg(rd, 0, tmp2);
6523                             neon_store_reg(rd, 1, tmp);
6524                         }
6525                     }
6526                     break;
6527                 case NEON_2RM_VSHLL:
6528                     if (q || (rd & 1)) {
6529                         return 1;
6530                     }
6531                     tmp = neon_load_reg(rm, 0);
6532                     tmp2 = neon_load_reg(rm, 1);
6533                     for (pass = 0; pass < 2; pass++) {
6534                         if (pass == 1)
6535                             tmp = tmp2;
6536                         gen_neon_widen(cpu_V0, tmp, size, 1);
6537                         tcg_gen_shli_i64(cpu_V0, cpu_V0, 8 << size);
6538                         neon_store_reg64(cpu_V0, rd + pass);
6539                     }
6540                     break;
6541                 case NEON_2RM_VCVT_F16_F32:
6542                 {
6543                     TCGv_ptr fpst;
6544                     TCGv_i32 ahp;
6545
6546                     if (!dc_isar_feature(aa32_fp16_spconv, s) ||
6547                         q || (rm & 1)) {
6548                         return 1;
6549                     }
6550                     fpst = get_fpstatus_ptr(true);
6551                     ahp = get_ahp_flag();
6552                     tmp = neon_load_reg(rm, 0);
6553                     gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
6554                     tmp2 = neon_load_reg(rm, 1);
6555                     gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
6556                     tcg_gen_shli_i32(tmp2, tmp2, 16);
6557                     tcg_gen_or_i32(tmp2, tmp2, tmp);
6558                     tcg_temp_free_i32(tmp);
6559                     tmp = neon_load_reg(rm, 2);
6560                     gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
6561                     tmp3 = neon_load_reg(rm, 3);
6562                     neon_store_reg(rd, 0, tmp2);
6563                     gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
6564                     tcg_gen_shli_i32(tmp3, tmp3, 16);
6565                     tcg_gen_or_i32(tmp3, tmp3, tmp);
6566                     neon_store_reg(rd, 1, tmp3);
6567                     tcg_temp_free_i32(tmp);
6568                     tcg_temp_free_i32(ahp);
6569                     tcg_temp_free_ptr(fpst);
6570                     break;
6571                 }
6572                 case NEON_2RM_VCVT_F32_F16:
6573                 {
6574                     TCGv_ptr fpst;
6575                     TCGv_i32 ahp;
6576                     if (!dc_isar_feature(aa32_fp16_spconv, s) ||
6577                         q || (rd & 1)) {
6578                         return 1;
6579                     }
6580                     fpst = get_fpstatus_ptr(true);
6581                     ahp = get_ahp_flag();
6582                     tmp3 = tcg_temp_new_i32();
6583                     tmp = neon_load_reg(rm, 0);
6584                     tmp2 = neon_load_reg(rm, 1);
6585                     tcg_gen_ext16u_i32(tmp3, tmp);
6586                     gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
6587                     neon_store_reg(rd, 0, tmp3);
6588                     tcg_gen_shri_i32(tmp, tmp, 16);
6589                     gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
6590                     neon_store_reg(rd, 1, tmp);
6591                     tmp3 = tcg_temp_new_i32();
6592                     tcg_gen_ext16u_i32(tmp3, tmp2);
6593                     gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
6594                     neon_store_reg(rd, 2, tmp3);
6595                     tcg_gen_shri_i32(tmp2, tmp2, 16);
6596                     gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
6597                     neon_store_reg(rd, 3, tmp2);
6598                     tcg_temp_free_i32(ahp);
6599                     tcg_temp_free_ptr(fpst);
6600                     break;
6601                 }
6602                 case NEON_2RM_AESE: case NEON_2RM_AESMC:
6603                     if (!dc_isar_feature(aa32_aes, s) || ((rm | rd) & 1)) {
6604                         return 1;
6605                     }
6606                     ptr1 = vfp_reg_ptr(true, rd);
6607                     ptr2 = vfp_reg_ptr(true, rm);
6608
6609                      /* Bit 6 is the lowest opcode bit; it distinguishes between
6610                       * encryption (AESE/AESMC) and decryption (AESD/AESIMC)
6611                       */
6612                     tmp3 = tcg_const_i32(extract32(insn, 6, 1));
6613
6614                     if (op == NEON_2RM_AESE) {
6615                         gen_helper_crypto_aese(ptr1, ptr2, tmp3);
6616                     } else {
6617                         gen_helper_crypto_aesmc(ptr1, ptr2, tmp3);
6618                     }
6619                     tcg_temp_free_ptr(ptr1);
6620                     tcg_temp_free_ptr(ptr2);
6621                     tcg_temp_free_i32(tmp3);
6622                     break;
6623                 case NEON_2RM_SHA1H:
6624                     if (!dc_isar_feature(aa32_sha1, s) || ((rm | rd) & 1)) {
6625                         return 1;
6626                     }
6627                     ptr1 = vfp_reg_ptr(true, rd);
6628                     ptr2 = vfp_reg_ptr(true, rm);
6629
6630                     gen_helper_crypto_sha1h(ptr1, ptr2);
6631
6632                     tcg_temp_free_ptr(ptr1);
6633                     tcg_temp_free_ptr(ptr2);
6634                     break;
6635                 case NEON_2RM_SHA1SU1:
6636                     if ((rm | rd) & 1) {
6637                             return 1;
6638                     }
6639                     /* bit 6 (q): set -> SHA256SU0, cleared -> SHA1SU1 */
6640                     if (q) {
6641                         if (!dc_isar_feature(aa32_sha2, s)) {
6642                             return 1;
6643                         }
6644                     } else if (!dc_isar_feature(aa32_sha1, s)) {
6645                         return 1;
6646                     }
6647                     ptr1 = vfp_reg_ptr(true, rd);
6648                     ptr2 = vfp_reg_ptr(true, rm);
6649                     if (q) {
6650                         gen_helper_crypto_sha256su0(ptr1, ptr2);
6651                     } else {
6652                         gen_helper_crypto_sha1su1(ptr1, ptr2);
6653                     }
6654                     tcg_temp_free_ptr(ptr1);
6655                     tcg_temp_free_ptr(ptr2);
6656                     break;
6657
6658                 case NEON_2RM_VMVN:
6659                     tcg_gen_gvec_not(0, rd_ofs, rm_ofs, vec_size, vec_size);
6660                     break;
6661                 case NEON_2RM_VNEG:
6662                     tcg_gen_gvec_neg(size, rd_ofs, rm_ofs, vec_size, vec_size);
6663                     break;
6664                 case NEON_2RM_VABS:
6665                     tcg_gen_gvec_abs(size, rd_ofs, rm_ofs, vec_size, vec_size);
6666                     break;
6667
6668                 case NEON_2RM_VCEQ0:
6669                     gen_gvec_ceq0(size, rd_ofs, rm_ofs, vec_size, vec_size);
6670                     break;
6671                 case NEON_2RM_VCGT0:
6672                     gen_gvec_cgt0(size, rd_ofs, rm_ofs, vec_size, vec_size);
6673                     break;
6674                 case NEON_2RM_VCLE0:
6675                     gen_gvec_cle0(size, rd_ofs, rm_ofs, vec_size, vec_size);
6676                     break;
6677                 case NEON_2RM_VCGE0:
6678                     gen_gvec_cge0(size, rd_ofs, rm_ofs, vec_size, vec_size);
6679                     break;
6680                 case NEON_2RM_VCLT0:
6681                     gen_gvec_clt0(size, rd_ofs, rm_ofs, vec_size, vec_size);
6682                     break;
6683
6684                 default:
6685                 elementwise:
6686                     for (pass = 0; pass < (q ? 4 : 2); pass++) {
6687                         tmp = neon_load_reg(rm, pass);
6688                         switch (op) {
6689                         case NEON_2RM_VREV32:
6690                             switch (size) {
6691                             case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
6692                             case 1: gen_swap_half(tmp); break;
6693                             default: abort();
6694                             }
6695                             break;
6696                         case NEON_2RM_VREV16:
6697                             gen_rev16(tmp, tmp);
6698                             break;
6699                         case NEON_2RM_VCLS:
6700                             switch (size) {
6701                             case 0: gen_helper_neon_cls_s8(tmp, tmp); break;
6702                             case 1: gen_helper_neon_cls_s16(tmp, tmp); break;
6703                             case 2: gen_helper_neon_cls_s32(tmp, tmp); break;
6704                             default: abort();
6705                             }
6706                             break;
6707                         case NEON_2RM_VCLZ:
6708                             switch (size) {
6709                             case 0: gen_helper_neon_clz_u8(tmp, tmp); break;
6710                             case 1: gen_helper_neon_clz_u16(tmp, tmp); break;
6711                             case 2: tcg_gen_clzi_i32(tmp, tmp, 32); break;
6712                             default: abort();
6713                             }
6714                             break;
6715                         case NEON_2RM_VCNT:
6716                             gen_helper_neon_cnt_u8(tmp, tmp);
6717                             break;
6718                         case NEON_2RM_VQABS:
6719                             switch (size) {
6720                             case 0:
6721                                 gen_helper_neon_qabs_s8(tmp, cpu_env, tmp);
6722                                 break;
6723                             case 1:
6724                                 gen_helper_neon_qabs_s16(tmp, cpu_env, tmp);
6725                                 break;
6726                             case 2:
6727                                 gen_helper_neon_qabs_s32(tmp, cpu_env, tmp);
6728                                 break;
6729                             default: abort();
6730                             }
6731                             break;
6732                         case NEON_2RM_VQNEG:
6733                             switch (size) {
6734                             case 0:
6735                                 gen_helper_neon_qneg_s8(tmp, cpu_env, tmp);
6736                                 break;
6737                             case 1:
6738                                 gen_helper_neon_qneg_s16(tmp, cpu_env, tmp);
6739                                 break;
6740                             case 2:
6741                                 gen_helper_neon_qneg_s32(tmp, cpu_env, tmp);
6742                                 break;
6743                             default: abort();
6744                             }
6745                             break;
6746                         case NEON_2RM_VCGT0_F:
6747                         {
6748                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6749                             tmp2 = tcg_const_i32(0);
6750                             gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
6751                             tcg_temp_free_i32(tmp2);
6752                             tcg_temp_free_ptr(fpstatus);
6753                             break;
6754                         }
6755                         case NEON_2RM_VCGE0_F:
6756                         {
6757                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6758                             tmp2 = tcg_const_i32(0);
6759                             gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
6760                             tcg_temp_free_i32(tmp2);
6761                             tcg_temp_free_ptr(fpstatus);
6762                             break;
6763                         }
6764                         case NEON_2RM_VCEQ0_F:
6765                         {
6766                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6767                             tmp2 = tcg_const_i32(0);
6768                             gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
6769                             tcg_temp_free_i32(tmp2);
6770                             tcg_temp_free_ptr(fpstatus);
6771                             break;
6772                         }
6773                         case NEON_2RM_VCLE0_F:
6774                         {
6775                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6776                             tmp2 = tcg_const_i32(0);
6777                             gen_helper_neon_cge_f32(tmp, tmp2, tmp, fpstatus);
6778                             tcg_temp_free_i32(tmp2);
6779                             tcg_temp_free_ptr(fpstatus);
6780                             break;
6781                         }
6782                         case NEON_2RM_VCLT0_F:
6783                         {
6784                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6785                             tmp2 = tcg_const_i32(0);
6786                             gen_helper_neon_cgt_f32(tmp, tmp2, tmp, fpstatus);
6787                             tcg_temp_free_i32(tmp2);
6788                             tcg_temp_free_ptr(fpstatus);
6789                             break;
6790                         }
6791                         case NEON_2RM_VABS_F:
6792                             gen_helper_vfp_abss(tmp, tmp);
6793                             break;
6794                         case NEON_2RM_VNEG_F:
6795                             gen_helper_vfp_negs(tmp, tmp);
6796                             break;
6797                         case NEON_2RM_VSWP:
6798                             tmp2 = neon_load_reg(rd, pass);
6799                             neon_store_reg(rm, pass, tmp2);
6800                             break;
6801                         case NEON_2RM_VTRN:
6802                             tmp2 = neon_load_reg(rd, pass);
6803                             switch (size) {
6804                             case 0: gen_neon_trn_u8(tmp, tmp2); break;
6805                             case 1: gen_neon_trn_u16(tmp, tmp2); break;
6806                             default: abort();
6807                             }
6808                             neon_store_reg(rm, pass, tmp2);
6809                             break;
6810                         case NEON_2RM_VRINTN:
6811                         case NEON_2RM_VRINTA:
6812                         case NEON_2RM_VRINTM:
6813                         case NEON_2RM_VRINTP:
6814                         case NEON_2RM_VRINTZ:
6815                         {
6816                             TCGv_i32 tcg_rmode;
6817                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6818                             int rmode;
6819
6820                             if (op == NEON_2RM_VRINTZ) {
6821                                 rmode = FPROUNDING_ZERO;
6822                             } else {
6823                                 rmode = fp_decode_rm[((op & 0x6) >> 1) ^ 1];
6824                             }
6825
6826                             tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
6827                             gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6828                                                       cpu_env);
6829                             gen_helper_rints(tmp, tmp, fpstatus);
6830                             gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6831                                                       cpu_env);
6832                             tcg_temp_free_ptr(fpstatus);
6833                             tcg_temp_free_i32(tcg_rmode);
6834                             break;
6835                         }
6836                         case NEON_2RM_VRINTX:
6837                         {
6838                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6839                             gen_helper_rints_exact(tmp, tmp, fpstatus);
6840                             tcg_temp_free_ptr(fpstatus);
6841                             break;
6842                         }
6843                         case NEON_2RM_VCVTAU:
6844                         case NEON_2RM_VCVTAS:
6845                         case NEON_2RM_VCVTNU:
6846                         case NEON_2RM_VCVTNS:
6847                         case NEON_2RM_VCVTPU:
6848                         case NEON_2RM_VCVTPS:
6849                         case NEON_2RM_VCVTMU:
6850                         case NEON_2RM_VCVTMS:
6851                         {
6852                             bool is_signed = !extract32(insn, 7, 1);
6853                             TCGv_ptr fpst = get_fpstatus_ptr(1);
6854                             TCGv_i32 tcg_rmode, tcg_shift;
6855                             int rmode = fp_decode_rm[extract32(insn, 8, 2)];
6856
6857                             tcg_shift = tcg_const_i32(0);
6858                             tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
6859                             gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6860                                                       cpu_env);
6861
6862                             if (is_signed) {
6863                                 gen_helper_vfp_tosls(tmp, tmp,
6864                                                      tcg_shift, fpst);
6865                             } else {
6866                                 gen_helper_vfp_touls(tmp, tmp,
6867                                                      tcg_shift, fpst);
6868                             }
6869
6870                             gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6871                                                       cpu_env);
6872                             tcg_temp_free_i32(tcg_rmode);
6873                             tcg_temp_free_i32(tcg_shift);
6874                             tcg_temp_free_ptr(fpst);
6875                             break;
6876                         }
6877                         case NEON_2RM_VRECPE:
6878                             gen_helper_recpe_u32(tmp, tmp);
6879                             break;
6880                         case NEON_2RM_VRSQRTE:
6881                             gen_helper_rsqrte_u32(tmp, tmp);
6882                             break;
6883                         case NEON_2RM_VRECPE_F:
6884                         {
6885                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6886                             gen_helper_recpe_f32(tmp, tmp, fpstatus);
6887                             tcg_temp_free_ptr(fpstatus);
6888                             break;
6889                         }
6890                         case NEON_2RM_VRSQRTE_F:
6891                         {
6892                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6893                             gen_helper_rsqrte_f32(tmp, tmp, fpstatus);
6894                             tcg_temp_free_ptr(fpstatus);
6895                             break;
6896                         }
6897                         case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */
6898                         {
6899                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6900                             gen_helper_vfp_sitos(tmp, tmp, fpstatus);
6901                             tcg_temp_free_ptr(fpstatus);
6902                             break;
6903                         }
6904                         case NEON_2RM_VCVT_FU: /* VCVT.F32.U32 */
6905                         {
6906                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6907                             gen_helper_vfp_uitos(tmp, tmp, fpstatus);
6908                             tcg_temp_free_ptr(fpstatus);
6909                             break;
6910                         }
6911                         case NEON_2RM_VCVT_SF: /* VCVT.S32.F32 */
6912                         {
6913                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6914                             gen_helper_vfp_tosizs(tmp, tmp, fpstatus);
6915                             tcg_temp_free_ptr(fpstatus);
6916                             break;
6917                         }
6918                         case NEON_2RM_VCVT_UF: /* VCVT.U32.F32 */
6919                         {
6920                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6921                             gen_helper_vfp_touizs(tmp, tmp, fpstatus);
6922                             tcg_temp_free_ptr(fpstatus);
6923                             break;
6924                         }
6925                         default:
6926                             /* Reserved op values were caught by the
6927                              * neon_2rm_sizes[] check earlier.
6928                              */
6929                             abort();
6930                         }
6931                         neon_store_reg(rd, pass, tmp);
6932                     }
6933                     break;
6934                 }
6935             } else if ((insn & (1 << 10)) == 0) {
6936                 /* VTBL, VTBX.  */
6937                 int n = ((insn >> 8) & 3) + 1;
6938                 if ((rn + n) > 32) {
6939                     /* This is UNPREDICTABLE; we choose to UNDEF to avoid the
6940                      * helper function running off the end of the register file.
6941                      */
6942                     return 1;
6943                 }
6944                 n <<= 3;
6945                 if (insn & (1 << 6)) {
6946                     tmp = neon_load_reg(rd, 0);
6947                 } else {
6948                     tmp = tcg_temp_new_i32();
6949                     tcg_gen_movi_i32(tmp, 0);
6950                 }
6951                 tmp2 = neon_load_reg(rm, 0);
6952                 ptr1 = vfp_reg_ptr(true, rn);
6953                 tmp5 = tcg_const_i32(n);
6954                 gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp5);
6955                 tcg_temp_free_i32(tmp);
6956                 if (insn & (1 << 6)) {
6957                     tmp = neon_load_reg(rd, 1);
6958                 } else {
6959                     tmp = tcg_temp_new_i32();
6960                     tcg_gen_movi_i32(tmp, 0);
6961                 }
6962                 tmp3 = neon_load_reg(rm, 1);
6963                 gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp5);
6964                 tcg_temp_free_i32(tmp5);
6965                 tcg_temp_free_ptr(ptr1);
6966                 neon_store_reg(rd, 0, tmp2);
6967                 neon_store_reg(rd, 1, tmp3);
6968                 tcg_temp_free_i32(tmp);
6969             } else if ((insn & 0x380) == 0) {
6970                 /* VDUP */
6971                 int element;
6972                 MemOp size;
6973
6974                 if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) {
6975                     return 1;
6976                 }
6977                 if (insn & (1 << 16)) {
6978                     size = MO_8;
6979                     element = (insn >> 17) & 7;
6980                 } else if (insn & (1 << 17)) {
6981                     size = MO_16;
6982                     element = (insn >> 18) & 3;
6983                 } else {
6984                     size = MO_32;
6985                     element = (insn >> 19) & 1;
6986                 }
6987                 tcg_gen_gvec_dup_mem(size, neon_reg_offset(rd, 0),
6988                                      neon_element_offset(rm, element, size),
6989                                      q ? 16 : 8, q ? 16 : 8);
6990             } else {
6991                 return 1;
6992             }
6993         }
6994     }
6995     return 0;
6996 }
6997
6998 static int disas_coproc_insn(DisasContext *s, uint32_t insn)
6999 {
7000     int cpnum, is64, crn, crm, opc1, opc2, isread, rt, rt2;
7001     const ARMCPRegInfo *ri;
7002
7003     cpnum = (insn >> 8) & 0xf;
7004
7005     /* First check for coprocessor space used for XScale/iwMMXt insns */
7006     if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cpnum < 2)) {
7007         if (extract32(s->c15_cpar, cpnum, 1) == 0) {
7008             return 1;
7009         }
7010         if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
7011             return disas_iwmmxt_insn(s, insn);
7012         } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
7013             return disas_dsp_insn(s, insn);
7014         }
7015         return 1;
7016     }
7017
7018     /* Otherwise treat as a generic register access */
7019     is64 = (insn & (1 << 25)) == 0;
7020     if (!is64 && ((insn & (1 << 4)) == 0)) {
7021         /* cdp */
7022         return 1;
7023     }
7024
7025     crm = insn & 0xf;
7026     if (is64) {
7027         crn = 0;
7028         opc1 = (insn >> 4) & 0xf;
7029         opc2 = 0;
7030         rt2 = (insn >> 16) & 0xf;
7031     } else {
7032         crn = (insn >> 16) & 0xf;
7033         opc1 = (insn >> 21) & 7;
7034         opc2 = (insn >> 5) & 7;
7035         rt2 = 0;
7036     }
7037     isread = (insn >> 20) & 1;
7038     rt = (insn >> 12) & 0xf;
7039
7040     ri = get_arm_cp_reginfo(s->cp_regs,
7041             ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2));
7042     if (ri) {
7043         bool need_exit_tb;
7044
7045         /* Check access permissions */
7046         if (!cp_access_ok(s->current_el, ri, isread)) {
7047             return 1;
7048         }
7049
7050         if (s->hstr_active || ri->accessfn ||
7051             (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
7052             /* Emit code to perform further access permissions checks at
7053              * runtime; this may result in an exception.
7054              * Note that on XScale all cp0..c13 registers do an access check
7055              * call in order to handle c15_cpar.
7056              */
7057             TCGv_ptr tmpptr;
7058             TCGv_i32 tcg_syn, tcg_isread;
7059             uint32_t syndrome;
7060
7061             /* Note that since we are an implementation which takes an
7062              * exception on a trapped conditional instruction only if the
7063              * instruction passes its condition code check, we can take
7064              * advantage of the clause in the ARM ARM that allows us to set
7065              * the COND field in the instruction to 0xE in all cases.
7066              * We could fish the actual condition out of the insn (ARM)
7067              * or the condexec bits (Thumb) but it isn't necessary.
7068              */
7069             switch (cpnum) {
7070             case 14:
7071                 if (is64) {
7072                     syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
7073                                                  isread, false);
7074                 } else {
7075                     syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
7076                                                 rt, isread, false);
7077                 }
7078                 break;
7079             case 15:
7080                 if (is64) {
7081                     syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
7082                                                  isread, false);
7083                 } else {
7084                     syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
7085                                                 rt, isread, false);
7086                 }
7087                 break;
7088             default:
7089                 /* ARMv8 defines that only coprocessors 14 and 15 exist,
7090                  * so this can only happen if this is an ARMv7 or earlier CPU,
7091                  * in which case the syndrome information won't actually be
7092                  * guest visible.
7093                  */
7094                 assert(!arm_dc_feature(s, ARM_FEATURE_V8));
7095                 syndrome = syn_uncategorized();
7096                 break;
7097             }
7098
7099             gen_set_condexec(s);
7100             gen_set_pc_im(s, s->pc_curr);
7101             tmpptr = tcg_const_ptr(ri);
7102             tcg_syn = tcg_const_i32(syndrome);
7103             tcg_isread = tcg_const_i32(isread);
7104             gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn,
7105                                            tcg_isread);
7106             tcg_temp_free_ptr(tmpptr);
7107             tcg_temp_free_i32(tcg_syn);
7108             tcg_temp_free_i32(tcg_isread);
7109         } else if (ri->type & ARM_CP_RAISES_EXC) {
7110             /*
7111              * The readfn or writefn might raise an exception;
7112              * synchronize the CPU state in case it does.
7113              */
7114             gen_set_condexec(s);
7115             gen_set_pc_im(s, s->pc_curr);
7116         }
7117
7118         /* Handle special cases first */
7119         switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
7120         case ARM_CP_NOP:
7121             return 0;
7122         case ARM_CP_WFI:
7123             if (isread) {
7124                 return 1;
7125             }
7126             gen_set_pc_im(s, s->base.pc_next);
7127             s->base.is_jmp = DISAS_WFI;
7128             return 0;
7129         default:
7130             break;
7131         }
7132
7133         if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
7134             gen_io_start();
7135         }
7136
7137         if (isread) {
7138             /* Read */
7139             if (is64) {
7140                 TCGv_i64 tmp64;
7141                 TCGv_i32 tmp;
7142                 if (ri->type & ARM_CP_CONST) {
7143                     tmp64 = tcg_const_i64(ri->resetvalue);
7144                 } else if (ri->readfn) {
7145                     TCGv_ptr tmpptr;
7146                     tmp64 = tcg_temp_new_i64();
7147                     tmpptr = tcg_const_ptr(ri);
7148                     gen_helper_get_cp_reg64(tmp64, cpu_env, tmpptr);
7149                     tcg_temp_free_ptr(tmpptr);
7150                 } else {
7151                     tmp64 = tcg_temp_new_i64();
7152                     tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
7153                 }
7154                 tmp = tcg_temp_new_i32();
7155                 tcg_gen_extrl_i64_i32(tmp, tmp64);
7156                 store_reg(s, rt, tmp);
7157                 tmp = tcg_temp_new_i32();
7158                 tcg_gen_extrh_i64_i32(tmp, tmp64);
7159                 tcg_temp_free_i64(tmp64);
7160                 store_reg(s, rt2, tmp);
7161             } else {
7162                 TCGv_i32 tmp;
7163                 if (ri->type & ARM_CP_CONST) {
7164                     tmp = tcg_const_i32(ri->resetvalue);
7165                 } else if (ri->readfn) {
7166                     TCGv_ptr tmpptr;
7167                     tmp = tcg_temp_new_i32();
7168                     tmpptr = tcg_const_ptr(ri);
7169                     gen_helper_get_cp_reg(tmp, cpu_env, tmpptr);
7170                     tcg_temp_free_ptr(tmpptr);
7171                 } else {
7172                     tmp = load_cpu_offset(ri->fieldoffset);
7173                 }
7174                 if (rt == 15) {
7175                     /* Destination register of r15 for 32 bit loads sets
7176                      * the condition codes from the high 4 bits of the value
7177                      */
7178                     gen_set_nzcv(tmp);
7179                     tcg_temp_free_i32(tmp);
7180                 } else {
7181                     store_reg(s, rt, tmp);
7182                 }
7183             }
7184         } else {
7185             /* Write */
7186             if (ri->type & ARM_CP_CONST) {
7187                 /* If not forbidden by access permissions, treat as WI */
7188                 return 0;
7189             }
7190
7191             if (is64) {
7192                 TCGv_i32 tmplo, tmphi;
7193                 TCGv_i64 tmp64 = tcg_temp_new_i64();
7194                 tmplo = load_reg(s, rt);
7195                 tmphi = load_reg(s, rt2);
7196                 tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
7197                 tcg_temp_free_i32(tmplo);
7198                 tcg_temp_free_i32(tmphi);
7199                 if (ri->writefn) {
7200                     TCGv_ptr tmpptr = tcg_const_ptr(ri);
7201                     gen_helper_set_cp_reg64(cpu_env, tmpptr, tmp64);
7202                     tcg_temp_free_ptr(tmpptr);
7203                 } else {
7204                     tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
7205                 }
7206                 tcg_temp_free_i64(tmp64);
7207             } else {
7208                 if (ri->writefn) {
7209                     TCGv_i32 tmp;
7210                     TCGv_ptr tmpptr;
7211                     tmp = load_reg(s, rt);
7212                     tmpptr = tcg_const_ptr(ri);
7213                     gen_helper_set_cp_reg(cpu_env, tmpptr, tmp);
7214                     tcg_temp_free_ptr(tmpptr);
7215                     tcg_temp_free_i32(tmp);
7216                 } else {
7217                     TCGv_i32 tmp = load_reg(s, rt);
7218                     store_cpu_offset(tmp, ri->fieldoffset);
7219                 }
7220             }
7221         }
7222
7223         /* I/O operations must end the TB here (whether read or write) */
7224         need_exit_tb = ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) &&
7225                         (ri->type & ARM_CP_IO));
7226
7227         if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
7228             /*
7229              * A write to any coprocessor register that ends a TB
7230              * must rebuild the hflags for the next TB.
7231              */
7232             TCGv_i32 tcg_el = tcg_const_i32(s->current_el);
7233             if (arm_dc_feature(s, ARM_FEATURE_M)) {
7234                 gen_helper_rebuild_hflags_m32(cpu_env, tcg_el);
7235             } else {
7236                 if (ri->type & ARM_CP_NEWEL) {
7237                     gen_helper_rebuild_hflags_a32_newel(cpu_env);
7238                 } else {
7239                     gen_helper_rebuild_hflags_a32(cpu_env, tcg_el);
7240                 }
7241             }
7242             tcg_temp_free_i32(tcg_el);
7243             /*
7244              * We default to ending the TB on a coprocessor register write,
7245              * but allow this to be suppressed by the register definition
7246              * (usually only necessary to work around guest bugs).
7247              */
7248             need_exit_tb = true;
7249         }
7250         if (need_exit_tb) {
7251             gen_lookup_tb(s);
7252         }
7253
7254         return 0;
7255     }
7256
7257     /* Unknown register; this might be a guest error or a QEMU
7258      * unimplemented feature.
7259      */
7260     if (is64) {
7261         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
7262                       "64 bit system register cp:%d opc1: %d crm:%d "
7263                       "(%s)\n",
7264                       isread ? "read" : "write", cpnum, opc1, crm,
7265                       s->ns ? "non-secure" : "secure");
7266     } else {
7267         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
7268                       "system register cp:%d opc1:%d crn:%d crm:%d opc2:%d "
7269                       "(%s)\n",
7270                       isread ? "read" : "write", cpnum, opc1, crn, crm, opc2,
7271                       s->ns ? "non-secure" : "secure");
7272     }
7273
7274     return 1;
7275 }
7276
7277
7278 /* Store a 64-bit value to a register pair.  Clobbers val.  */
7279 static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
7280 {
7281     TCGv_i32 tmp;
7282     tmp = tcg_temp_new_i32();
7283     tcg_gen_extrl_i64_i32(tmp, val);
7284     store_reg(s, rlow, tmp);
7285     tmp = tcg_temp_new_i32();
7286     tcg_gen_extrh_i64_i32(tmp, val);
7287     store_reg(s, rhigh, tmp);
7288 }
7289
7290 /* load and add a 64-bit value from a register pair.  */
7291 static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
7292 {
7293     TCGv_i64 tmp;
7294     TCGv_i32 tmpl;
7295     TCGv_i32 tmph;
7296
7297     /* Load 64-bit value rd:rn.  */
7298     tmpl = load_reg(s, rlow);
7299     tmph = load_reg(s, rhigh);
7300     tmp = tcg_temp_new_i64();
7301     tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
7302     tcg_temp_free_i32(tmpl);
7303     tcg_temp_free_i32(tmph);
7304     tcg_gen_add_i64(val, val, tmp);
7305     tcg_temp_free_i64(tmp);
7306 }
7307
7308 /* Set N and Z flags from hi|lo.  */
7309 static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
7310 {
7311     tcg_gen_mov_i32(cpu_NF, hi);
7312     tcg_gen_or_i32(cpu_ZF, lo, hi);
7313 }
7314
7315 /* Load/Store exclusive instructions are implemented by remembering
7316    the value/address loaded, and seeing if these are the same
7317    when the store is performed.  This should be sufficient to implement
7318    the architecturally mandated semantics, and avoids having to monitor
7319    regular stores.  The compare vs the remembered value is done during
7320    the cmpxchg operation, but we must compare the addresses manually.  */
7321 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
7322                                TCGv_i32 addr, int size)
7323 {
7324     TCGv_i32 tmp = tcg_temp_new_i32();
7325     MemOp opc = size | MO_ALIGN | s->be_data;
7326
7327     s->is_ldex = true;
7328
7329     if (size == 3) {
7330         TCGv_i32 tmp2 = tcg_temp_new_i32();
7331         TCGv_i64 t64 = tcg_temp_new_i64();
7332
7333         /* For AArch32, architecturally the 32-bit word at the lowest
7334          * address is always Rt and the one at addr+4 is Rt2, even if
7335          * the CPU is big-endian. That means we don't want to do a
7336          * gen_aa32_ld_i64(), which invokes gen_aa32_frob64() as if
7337          * for an architecturally 64-bit access, but instead do a
7338          * 64-bit access using MO_BE if appropriate and then split
7339          * the two halves.
7340          * This only makes a difference for BE32 user-mode, where
7341          * frob64() must not flip the two halves of the 64-bit data
7342          * but this code must treat BE32 user-mode like BE32 system.
7343          */
7344         TCGv taddr = gen_aa32_addr(s, addr, opc);
7345
7346         tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
7347         tcg_temp_free(taddr);
7348         tcg_gen_mov_i64(cpu_exclusive_val, t64);
7349         if (s->be_data == MO_BE) {
7350             tcg_gen_extr_i64_i32(tmp2, tmp, t64);
7351         } else {
7352             tcg_gen_extr_i64_i32(tmp, tmp2, t64);
7353         }
7354         tcg_temp_free_i64(t64);
7355
7356         store_reg(s, rt2, tmp2);
7357     } else {
7358         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
7359         tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
7360     }
7361
7362     store_reg(s, rt, tmp);
7363     tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
7364 }
7365
7366 static void gen_clrex(DisasContext *s)
7367 {
7368     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
7369 }
7370
7371 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
7372                                 TCGv_i32 addr, int size)
7373 {
7374     TCGv_i32 t0, t1, t2;
7375     TCGv_i64 extaddr;
7376     TCGv taddr;
7377     TCGLabel *done_label;
7378     TCGLabel *fail_label;
7379     MemOp opc = size | MO_ALIGN | s->be_data;
7380
7381     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
7382          [addr] = {Rt};
7383          {Rd} = 0;
7384        } else {
7385          {Rd} = 1;
7386        } */
7387     fail_label = gen_new_label();
7388     done_label = gen_new_label();
7389     extaddr = tcg_temp_new_i64();
7390     tcg_gen_extu_i32_i64(extaddr, addr);
7391     tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
7392     tcg_temp_free_i64(extaddr);
7393
7394     taddr = gen_aa32_addr(s, addr, opc);
7395     t0 = tcg_temp_new_i32();
7396     t1 = load_reg(s, rt);
7397     if (size == 3) {
7398         TCGv_i64 o64 = tcg_temp_new_i64();
7399         TCGv_i64 n64 = tcg_temp_new_i64();
7400
7401         t2 = load_reg(s, rt2);
7402         /* For AArch32, architecturally the 32-bit word at the lowest
7403          * address is always Rt and the one at addr+4 is Rt2, even if
7404          * the CPU is big-endian. Since we're going to treat this as a
7405          * single 64-bit BE store, we need to put the two halves in the
7406          * opposite order for BE to LE, so that they end up in the right
7407          * places.
7408          * We don't want gen_aa32_frob64() because that does the wrong
7409          * thing for BE32 usermode.
7410          */
7411         if (s->be_data == MO_BE) {
7412             tcg_gen_concat_i32_i64(n64, t2, t1);
7413         } else {
7414             tcg_gen_concat_i32_i64(n64, t1, t2);
7415         }
7416         tcg_temp_free_i32(t2);
7417
7418         tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
7419                                    get_mem_index(s), opc);
7420         tcg_temp_free_i64(n64);
7421
7422         tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
7423         tcg_gen_extrl_i64_i32(t0, o64);
7424
7425         tcg_temp_free_i64(o64);
7426     } else {
7427         t2 = tcg_temp_new_i32();
7428         tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
7429         tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
7430         tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
7431         tcg_temp_free_i32(t2);
7432     }
7433     tcg_temp_free_i32(t1);
7434     tcg_temp_free(taddr);
7435     tcg_gen_mov_i32(cpu_R[rd], t0);
7436     tcg_temp_free_i32(t0);
7437     tcg_gen_br(done_label);
7438
7439     gen_set_label(fail_label);
7440     tcg_gen_movi_i32(cpu_R[rd], 1);
7441     gen_set_label(done_label);
7442     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
7443 }
7444
7445 /* gen_srs:
7446  * @env: CPUARMState
7447  * @s: DisasContext
7448  * @mode: mode field from insn (which stack to store to)
7449  * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
7450  * @writeback: true if writeback bit set
7451  *
7452  * Generate code for the SRS (Store Return State) insn.
7453  */
7454 static void gen_srs(DisasContext *s,
7455                     uint32_t mode, uint32_t amode, bool writeback)
7456 {
7457     int32_t offset;
7458     TCGv_i32 addr, tmp;
7459     bool undef = false;
7460
7461     /* SRS is:
7462      * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
7463      *   and specified mode is monitor mode
7464      * - UNDEFINED in Hyp mode
7465      * - UNPREDICTABLE in User or System mode
7466      * - UNPREDICTABLE if the specified mode is:
7467      * -- not implemented
7468      * -- not a valid mode number
7469      * -- a mode that's at a higher exception level
7470      * -- Monitor, if we are Non-secure
7471      * For the UNPREDICTABLE cases we choose to UNDEF.
7472      */
7473     if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
7474         gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(), 3);
7475         return;
7476     }
7477
7478     if (s->current_el == 0 || s->current_el == 2) {
7479         undef = true;
7480     }
7481
7482     switch (mode) {
7483     case ARM_CPU_MODE_USR:
7484     case ARM_CPU_MODE_FIQ:
7485     case ARM_CPU_MODE_IRQ:
7486     case ARM_CPU_MODE_SVC:
7487     case ARM_CPU_MODE_ABT:
7488     case ARM_CPU_MODE_UND:
7489     case ARM_CPU_MODE_SYS:
7490         break;
7491     case ARM_CPU_MODE_HYP:
7492         if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
7493             undef = true;
7494         }
7495         break;
7496     case ARM_CPU_MODE_MON:
7497         /* No need to check specifically for "are we non-secure" because
7498          * we've already made EL0 UNDEF and handled the trap for S-EL1;
7499          * so if this isn't EL3 then we must be non-secure.
7500          */
7501         if (s->current_el != 3) {
7502             undef = true;
7503         }
7504         break;
7505     default:
7506         undef = true;
7507     }
7508
7509     if (undef) {
7510         unallocated_encoding(s);
7511         return;
7512     }
7513
7514     addr = tcg_temp_new_i32();
7515     tmp = tcg_const_i32(mode);
7516     /* get_r13_banked() will raise an exception if called from System mode */
7517     gen_set_condexec(s);
7518     gen_set_pc_im(s, s->pc_curr);
7519     gen_helper_get_r13_banked(addr, cpu_env, tmp);
7520     tcg_temp_free_i32(tmp);
7521     switch (amode) {
7522     case 0: /* DA */
7523         offset = -4;
7524         break;
7525     case 1: /* IA */
7526         offset = 0;
7527         break;
7528     case 2: /* DB */
7529         offset = -8;
7530         break;
7531     case 3: /* IB */
7532         offset = 4;
7533         break;
7534     default:
7535         abort();
7536     }
7537     tcg_gen_addi_i32(addr, addr, offset);
7538     tmp = load_reg(s, 14);
7539     gen_aa32_st32(s, tmp, addr, get_mem_index(s));
7540     tcg_temp_free_i32(tmp);
7541     tmp = load_cpu_field(spsr);
7542     tcg_gen_addi_i32(addr, addr, 4);
7543     gen_aa32_st32(s, tmp, addr, get_mem_index(s));
7544     tcg_temp_free_i32(tmp);
7545     if (writeback) {
7546         switch (amode) {
7547         case 0:
7548             offset = -8;
7549             break;
7550         case 1:
7551             offset = 4;
7552             break;
7553         case 2:
7554             offset = -4;
7555             break;
7556         case 3:
7557             offset = 0;
7558             break;
7559         default:
7560             abort();
7561         }
7562         tcg_gen_addi_i32(addr, addr, offset);
7563         tmp = tcg_const_i32(mode);
7564         gen_helper_set_r13_banked(cpu_env, tmp, addr);
7565         tcg_temp_free_i32(tmp);
7566     }
7567     tcg_temp_free_i32(addr);
7568     s->base.is_jmp = DISAS_UPDATE;
7569 }
7570
7571 /* Generate a label used for skipping this instruction */
7572 static void arm_gen_condlabel(DisasContext *s)
7573 {
7574     if (!s->condjmp) {
7575         s->condlabel = gen_new_label();
7576         s->condjmp = 1;
7577     }
7578 }
7579
7580 /* Skip this instruction if the ARM condition is false */
7581 static void arm_skip_unless(DisasContext *s, uint32_t cond)
7582 {
7583     arm_gen_condlabel(s);
7584     arm_gen_test_cc(cond ^ 1, s->condlabel);
7585 }
7586
7587
7588 /*
7589  * Constant expanders for the decoders.
7590  */
7591
7592 static int negate(DisasContext *s, int x)
7593 {
7594     return -x;
7595 }
7596
7597 static int plus_2(DisasContext *s, int x)
7598 {
7599     return x + 2;
7600 }
7601
7602 static int times_2(DisasContext *s, int x)
7603 {
7604     return x * 2;
7605 }
7606
7607 static int times_4(DisasContext *s, int x)
7608 {
7609     return x * 4;
7610 }
7611
7612 /* Return only the rotation part of T32ExpandImm.  */
7613 static int t32_expandimm_rot(DisasContext *s, int x)
7614 {
7615     return x & 0xc00 ? extract32(x, 7, 5) : 0;
7616 }
7617
7618 /* Return the unrotated immediate from T32ExpandImm.  */
7619 static int t32_expandimm_imm(DisasContext *s, int x)
7620 {
7621     int imm = extract32(x, 0, 8);
7622
7623     switch (extract32(x, 8, 4)) {
7624     case 0: /* XY */
7625         /* Nothing to do.  */
7626         break;
7627     case 1: /* 00XY00XY */
7628         imm *= 0x00010001;
7629         break;
7630     case 2: /* XY00XY00 */
7631         imm *= 0x01000100;
7632         break;
7633     case 3: /* XYXYXYXY */
7634         imm *= 0x01010101;
7635         break;
7636     default:
7637         /* Rotated constant.  */
7638         imm |= 0x80;
7639         break;
7640     }
7641     return imm;
7642 }
7643
7644 static int t32_branch24(DisasContext *s, int x)
7645 {
7646     /* Convert J1:J2 at x[22:21] to I2:I1, which involves I=J^~S.  */
7647     x ^= !(x < 0) * (3 << 21);
7648     /* Append the final zero.  */
7649     return x << 1;
7650 }
7651
7652 static int t16_setflags(DisasContext *s)
7653 {
7654     return s->condexec_mask == 0;
7655 }
7656
7657 static int t16_push_list(DisasContext *s, int x)
7658 {
7659     return (x & 0xff) | (x & 0x100) << (14 - 8);
7660 }
7661
7662 static int t16_pop_list(DisasContext *s, int x)
7663 {
7664     return (x & 0xff) | (x & 0x100) << (15 - 8);
7665 }
7666
7667 /*
7668  * Include the generated decoders.
7669  */
7670
7671 #include "decode-a32.inc.c"
7672 #include "decode-a32-uncond.inc.c"
7673 #include "decode-t32.inc.c"
7674 #include "decode-t16.inc.c"
7675
7676 /* Helpers to swap operands for reverse-subtract.  */
7677 static void gen_rsb(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
7678 {
7679     tcg_gen_sub_i32(dst, b, a);
7680 }
7681
7682 static void gen_rsb_CC(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
7683 {
7684     gen_sub_CC(dst, b, a);
7685 }
7686
7687 static void gen_rsc(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
7688 {
7689     gen_sub_carry(dest, b, a);
7690 }
7691
7692 static void gen_rsc_CC(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
7693 {
7694     gen_sbc_CC(dest, b, a);
7695 }
7696
7697 /*
7698  * Helpers for the data processing routines.
7699  *
7700  * After the computation store the results back.
7701  * This may be suppressed altogether (STREG_NONE), require a runtime
7702  * check against the stack limits (STREG_SP_CHECK), or generate an
7703  * exception return.  Oh, or store into a register.
7704  *
7705  * Always return true, indicating success for a trans_* function.
7706  */
7707 typedef enum {
7708    STREG_NONE,
7709    STREG_NORMAL,
7710    STREG_SP_CHECK,
7711    STREG_EXC_RET,
7712 } StoreRegKind;
7713
7714 static bool store_reg_kind(DisasContext *s, int rd,
7715                             TCGv_i32 val, StoreRegKind kind)
7716 {
7717     switch (kind) {
7718     case STREG_NONE:
7719         tcg_temp_free_i32(val);
7720         return true;
7721     case STREG_NORMAL:
7722         /* See ALUWritePC: Interworking only from a32 mode. */
7723         if (s->thumb) {
7724             store_reg(s, rd, val);
7725         } else {
7726             store_reg_bx(s, rd, val);
7727         }
7728         return true;
7729     case STREG_SP_CHECK:
7730         store_sp_checked(s, val);
7731         return true;
7732     case STREG_EXC_RET:
7733         gen_exception_return(s, val);
7734         return true;
7735     }
7736     g_assert_not_reached();
7737 }
7738
7739 /*
7740  * Data Processing (register)
7741  *
7742  * Operate, with set flags, one register source,
7743  * one immediate shifted register source, and a destination.
7744  */
7745 static bool op_s_rrr_shi(DisasContext *s, arg_s_rrr_shi *a,
7746                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
7747                          int logic_cc, StoreRegKind kind)
7748 {
7749     TCGv_i32 tmp1, tmp2;
7750
7751     tmp2 = load_reg(s, a->rm);
7752     gen_arm_shift_im(tmp2, a->shty, a->shim, logic_cc);
7753     tmp1 = load_reg(s, a->rn);
7754
7755     gen(tmp1, tmp1, tmp2);
7756     tcg_temp_free_i32(tmp2);
7757
7758     if (logic_cc) {
7759         gen_logic_CC(tmp1);
7760     }
7761     return store_reg_kind(s, a->rd, tmp1, kind);
7762 }
7763
7764 static bool op_s_rxr_shi(DisasContext *s, arg_s_rrr_shi *a,
7765                          void (*gen)(TCGv_i32, TCGv_i32),
7766                          int logic_cc, StoreRegKind kind)
7767 {
7768     TCGv_i32 tmp;
7769
7770     tmp = load_reg(s, a->rm);
7771     gen_arm_shift_im(tmp, a->shty, a->shim, logic_cc);
7772
7773     gen(tmp, tmp);
7774     if (logic_cc) {
7775         gen_logic_CC(tmp);
7776     }
7777     return store_reg_kind(s, a->rd, tmp, kind);
7778 }
7779
7780 /*
7781  * Data-processing (register-shifted register)
7782  *
7783  * Operate, with set flags, one register source,
7784  * one register shifted register source, and a destination.
7785  */
7786 static bool op_s_rrr_shr(DisasContext *s, arg_s_rrr_shr *a,
7787                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
7788                          int logic_cc, StoreRegKind kind)
7789 {
7790     TCGv_i32 tmp1, tmp2;
7791
7792     tmp1 = load_reg(s, a->rs);
7793     tmp2 = load_reg(s, a->rm);
7794     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
7795     tmp1 = load_reg(s, a->rn);
7796
7797     gen(tmp1, tmp1, tmp2);
7798     tcg_temp_free_i32(tmp2);
7799
7800     if (logic_cc) {
7801         gen_logic_CC(tmp1);
7802     }
7803     return store_reg_kind(s, a->rd, tmp1, kind);
7804 }
7805
7806 static bool op_s_rxr_shr(DisasContext *s, arg_s_rrr_shr *a,
7807                          void (*gen)(TCGv_i32, TCGv_i32),
7808                          int logic_cc, StoreRegKind kind)
7809 {
7810     TCGv_i32 tmp1, tmp2;
7811
7812     tmp1 = load_reg(s, a->rs);
7813     tmp2 = load_reg(s, a->rm);
7814     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
7815
7816     gen(tmp2, tmp2);
7817     if (logic_cc) {
7818         gen_logic_CC(tmp2);
7819     }
7820     return store_reg_kind(s, a->rd, tmp2, kind);
7821 }
7822
7823 /*
7824  * Data-processing (immediate)
7825  *
7826  * Operate, with set flags, one register source,
7827  * one rotated immediate, and a destination.
7828  *
7829  * Note that logic_cc && a->rot setting CF based on the msb of the
7830  * immediate is the reason why we must pass in the unrotated form
7831  * of the immediate.
7832  */
7833 static bool op_s_rri_rot(DisasContext *s, arg_s_rri_rot *a,
7834                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
7835                          int logic_cc, StoreRegKind kind)
7836 {
7837     TCGv_i32 tmp1, tmp2;
7838     uint32_t imm;
7839
7840     imm = ror32(a->imm, a->rot);
7841     if (logic_cc && a->rot) {
7842         tcg_gen_movi_i32(cpu_CF, imm >> 31);
7843     }
7844     tmp2 = tcg_const_i32(imm);
7845     tmp1 = load_reg(s, a->rn);
7846
7847     gen(tmp1, tmp1, tmp2);
7848     tcg_temp_free_i32(tmp2);
7849
7850     if (logic_cc) {
7851         gen_logic_CC(tmp1);
7852     }
7853     return store_reg_kind(s, a->rd, tmp1, kind);
7854 }
7855
7856 static bool op_s_rxi_rot(DisasContext *s, arg_s_rri_rot *a,
7857                          void (*gen)(TCGv_i32, TCGv_i32),
7858                          int logic_cc, StoreRegKind kind)
7859 {
7860     TCGv_i32 tmp;
7861     uint32_t imm;
7862
7863     imm = ror32(a->imm, a->rot);
7864     if (logic_cc && a->rot) {
7865         tcg_gen_movi_i32(cpu_CF, imm >> 31);
7866     }
7867     tmp = tcg_const_i32(imm);
7868
7869     gen(tmp, tmp);
7870     if (logic_cc) {
7871         gen_logic_CC(tmp);
7872     }
7873     return store_reg_kind(s, a->rd, tmp, kind);
7874 }
7875
7876 #define DO_ANY3(NAME, OP, L, K)                                         \
7877     static bool trans_##NAME##_rrri(DisasContext *s, arg_s_rrr_shi *a)  \
7878     { StoreRegKind k = (K); return op_s_rrr_shi(s, a, OP, L, k); }      \
7879     static bool trans_##NAME##_rrrr(DisasContext *s, arg_s_rrr_shr *a)  \
7880     { StoreRegKind k = (K); return op_s_rrr_shr(s, a, OP, L, k); }      \
7881     static bool trans_##NAME##_rri(DisasContext *s, arg_s_rri_rot *a)   \
7882     { StoreRegKind k = (K); return op_s_rri_rot(s, a, OP, L, k); }
7883
7884 #define DO_ANY2(NAME, OP, L, K)                                         \
7885     static bool trans_##NAME##_rxri(DisasContext *s, arg_s_rrr_shi *a)  \
7886     { StoreRegKind k = (K); return op_s_rxr_shi(s, a, OP, L, k); }      \
7887     static bool trans_##NAME##_rxrr(DisasContext *s, arg_s_rrr_shr *a)  \
7888     { StoreRegKind k = (K); return op_s_rxr_shr(s, a, OP, L, k); }      \
7889     static bool trans_##NAME##_rxi(DisasContext *s, arg_s_rri_rot *a)   \
7890     { StoreRegKind k = (K); return op_s_rxi_rot(s, a, OP, L, k); }
7891
7892 #define DO_CMP2(NAME, OP, L)                                            \
7893     static bool trans_##NAME##_xrri(DisasContext *s, arg_s_rrr_shi *a)  \
7894     { return op_s_rrr_shi(s, a, OP, L, STREG_NONE); }                   \
7895     static bool trans_##NAME##_xrrr(DisasContext *s, arg_s_rrr_shr *a)  \
7896     { return op_s_rrr_shr(s, a, OP, L, STREG_NONE); }                   \
7897     static bool trans_##NAME##_xri(DisasContext *s, arg_s_rri_rot *a)   \
7898     { return op_s_rri_rot(s, a, OP, L, STREG_NONE); }
7899
7900 DO_ANY3(AND, tcg_gen_and_i32, a->s, STREG_NORMAL)
7901 DO_ANY3(EOR, tcg_gen_xor_i32, a->s, STREG_NORMAL)
7902 DO_ANY3(ORR, tcg_gen_or_i32, a->s, STREG_NORMAL)
7903 DO_ANY3(BIC, tcg_gen_andc_i32, a->s, STREG_NORMAL)
7904
7905 DO_ANY3(RSB, a->s ? gen_rsb_CC : gen_rsb, false, STREG_NORMAL)
7906 DO_ANY3(ADC, a->s ? gen_adc_CC : gen_add_carry, false, STREG_NORMAL)
7907 DO_ANY3(SBC, a->s ? gen_sbc_CC : gen_sub_carry, false, STREG_NORMAL)
7908 DO_ANY3(RSC, a->s ? gen_rsc_CC : gen_rsc, false, STREG_NORMAL)
7909
7910 DO_CMP2(TST, tcg_gen_and_i32, true)
7911 DO_CMP2(TEQ, tcg_gen_xor_i32, true)
7912 DO_CMP2(CMN, gen_add_CC, false)
7913 DO_CMP2(CMP, gen_sub_CC, false)
7914
7915 DO_ANY3(ADD, a->s ? gen_add_CC : tcg_gen_add_i32, false,
7916         a->rd == 13 && a->rn == 13 ? STREG_SP_CHECK : STREG_NORMAL)
7917
7918 /*
7919  * Note for the computation of StoreRegKind we return out of the
7920  * middle of the functions that are expanded by DO_ANY3, and that
7921  * we modify a->s via that parameter before it is used by OP.
7922  */
7923 DO_ANY3(SUB, a->s ? gen_sub_CC : tcg_gen_sub_i32, false,
7924         ({
7925             StoreRegKind ret = STREG_NORMAL;
7926             if (a->rd == 15 && a->s) {
7927                 /*
7928                  * See ALUExceptionReturn:
7929                  * In User mode, UNPREDICTABLE; we choose UNDEF.
7930                  * In Hyp mode, UNDEFINED.
7931                  */
7932                 if (IS_USER(s) || s->current_el == 2) {
7933                     unallocated_encoding(s);
7934                     return true;
7935                 }
7936                 /* There is no writeback of nzcv to PSTATE.  */
7937                 a->s = 0;
7938                 ret = STREG_EXC_RET;
7939             } else if (a->rd == 13 && a->rn == 13) {
7940                 ret = STREG_SP_CHECK;
7941             }
7942             ret;
7943         }))
7944
7945 DO_ANY2(MOV, tcg_gen_mov_i32, a->s,
7946         ({
7947             StoreRegKind ret = STREG_NORMAL;
7948             if (a->rd == 15 && a->s) {
7949                 /*
7950                  * See ALUExceptionReturn:
7951                  * In User mode, UNPREDICTABLE; we choose UNDEF.
7952                  * In Hyp mode, UNDEFINED.
7953                  */
7954                 if (IS_USER(s) || s->current_el == 2) {
7955                     unallocated_encoding(s);
7956                     return true;
7957                 }
7958                 /* There is no writeback of nzcv to PSTATE.  */
7959                 a->s = 0;
7960                 ret = STREG_EXC_RET;
7961             } else if (a->rd == 13) {
7962                 ret = STREG_SP_CHECK;
7963             }
7964             ret;
7965         }))
7966
7967 DO_ANY2(MVN, tcg_gen_not_i32, a->s, STREG_NORMAL)
7968
7969 /*
7970  * ORN is only available with T32, so there is no register-shifted-register
7971  * form of the insn.  Using the DO_ANY3 macro would create an unused function.
7972  */
7973 static bool trans_ORN_rrri(DisasContext *s, arg_s_rrr_shi *a)
7974 {
7975     return op_s_rrr_shi(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
7976 }
7977
7978 static bool trans_ORN_rri(DisasContext *s, arg_s_rri_rot *a)
7979 {
7980     return op_s_rri_rot(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
7981 }
7982
7983 #undef DO_ANY3
7984 #undef DO_ANY2
7985 #undef DO_CMP2
7986
7987 static bool trans_ADR(DisasContext *s, arg_ri *a)
7988 {
7989     store_reg_bx(s, a->rd, add_reg_for_lit(s, 15, a->imm));
7990     return true;
7991 }
7992
7993 static bool trans_MOVW(DisasContext *s, arg_MOVW *a)
7994 {
7995     TCGv_i32 tmp;
7996
7997     if (!ENABLE_ARCH_6T2) {
7998         return false;
7999     }
8000
8001     tmp = tcg_const_i32(a->imm);
8002     store_reg(s, a->rd, tmp);
8003     return true;
8004 }
8005
8006 static bool trans_MOVT(DisasContext *s, arg_MOVW *a)
8007 {
8008     TCGv_i32 tmp;
8009
8010     if (!ENABLE_ARCH_6T2) {
8011         return false;
8012     }
8013
8014     tmp = load_reg(s, a->rd);
8015     tcg_gen_ext16u_i32(tmp, tmp);
8016     tcg_gen_ori_i32(tmp, tmp, a->imm << 16);
8017     store_reg(s, a->rd, tmp);
8018     return true;
8019 }
8020
8021 /*
8022  * Multiply and multiply accumulate
8023  */
8024
8025 static bool op_mla(DisasContext *s, arg_s_rrrr *a, bool add)
8026 {
8027     TCGv_i32 t1, t2;
8028
8029     t1 = load_reg(s, a->rn);
8030     t2 = load_reg(s, a->rm);
8031     tcg_gen_mul_i32(t1, t1, t2);
8032     tcg_temp_free_i32(t2);
8033     if (add) {
8034         t2 = load_reg(s, a->ra);
8035         tcg_gen_add_i32(t1, t1, t2);
8036         tcg_temp_free_i32(t2);
8037     }
8038     if (a->s) {
8039         gen_logic_CC(t1);
8040     }
8041     store_reg(s, a->rd, t1);
8042     return true;
8043 }
8044
8045 static bool trans_MUL(DisasContext *s, arg_MUL *a)
8046 {
8047     return op_mla(s, a, false);
8048 }
8049
8050 static bool trans_MLA(DisasContext *s, arg_MLA *a)
8051 {
8052     return op_mla(s, a, true);
8053 }
8054
8055 static bool trans_MLS(DisasContext *s, arg_MLS *a)
8056 {
8057     TCGv_i32 t1, t2;
8058
8059     if (!ENABLE_ARCH_6T2) {
8060         return false;
8061     }
8062     t1 = load_reg(s, a->rn);
8063     t2 = load_reg(s, a->rm);
8064     tcg_gen_mul_i32(t1, t1, t2);
8065     tcg_temp_free_i32(t2);
8066     t2 = load_reg(s, a->ra);
8067     tcg_gen_sub_i32(t1, t2, t1);
8068     tcg_temp_free_i32(t2);
8069     store_reg(s, a->rd, t1);
8070     return true;
8071 }
8072
8073 static bool op_mlal(DisasContext *s, arg_s_rrrr *a, bool uns, bool add)
8074 {
8075     TCGv_i32 t0, t1, t2, t3;
8076
8077     t0 = load_reg(s, a->rm);
8078     t1 = load_reg(s, a->rn);
8079     if (uns) {
8080         tcg_gen_mulu2_i32(t0, t1, t0, t1);
8081     } else {
8082         tcg_gen_muls2_i32(t0, t1, t0, t1);
8083     }
8084     if (add) {
8085         t2 = load_reg(s, a->ra);
8086         t3 = load_reg(s, a->rd);
8087         tcg_gen_add2_i32(t0, t1, t0, t1, t2, t3);
8088         tcg_temp_free_i32(t2);
8089         tcg_temp_free_i32(t3);
8090     }
8091     if (a->s) {
8092         gen_logicq_cc(t0, t1);
8093     }
8094     store_reg(s, a->ra, t0);
8095     store_reg(s, a->rd, t1);
8096     return true;
8097 }
8098
8099 static bool trans_UMULL(DisasContext *s, arg_UMULL *a)
8100 {
8101     return op_mlal(s, a, true, false);
8102 }
8103
8104 static bool trans_SMULL(DisasContext *s, arg_SMULL *a)
8105 {
8106     return op_mlal(s, a, false, false);
8107 }
8108
8109 static bool trans_UMLAL(DisasContext *s, arg_UMLAL *a)
8110 {
8111     return op_mlal(s, a, true, true);
8112 }
8113
8114 static bool trans_SMLAL(DisasContext *s, arg_SMLAL *a)
8115 {
8116     return op_mlal(s, a, false, true);
8117 }
8118
8119 static bool trans_UMAAL(DisasContext *s, arg_UMAAL *a)
8120 {
8121     TCGv_i32 t0, t1, t2, zero;
8122
8123     if (s->thumb
8124         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
8125         : !ENABLE_ARCH_6) {
8126         return false;
8127     }
8128
8129     t0 = load_reg(s, a->rm);
8130     t1 = load_reg(s, a->rn);
8131     tcg_gen_mulu2_i32(t0, t1, t0, t1);
8132     zero = tcg_const_i32(0);
8133     t2 = load_reg(s, a->ra);
8134     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
8135     tcg_temp_free_i32(t2);
8136     t2 = load_reg(s, a->rd);
8137     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
8138     tcg_temp_free_i32(t2);
8139     tcg_temp_free_i32(zero);
8140     store_reg(s, a->ra, t0);
8141     store_reg(s, a->rd, t1);
8142     return true;
8143 }
8144
8145 /*
8146  * Saturating addition and subtraction
8147  */
8148
8149 static bool op_qaddsub(DisasContext *s, arg_rrr *a, bool add, bool doub)
8150 {
8151     TCGv_i32 t0, t1;
8152
8153     if (s->thumb
8154         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
8155         : !ENABLE_ARCH_5TE) {
8156         return false;
8157     }
8158
8159     t0 = load_reg(s, a->rm);
8160     t1 = load_reg(s, a->rn);
8161     if (doub) {
8162         gen_helper_add_saturate(t1, cpu_env, t1, t1);
8163     }
8164     if (add) {
8165         gen_helper_add_saturate(t0, cpu_env, t0, t1);
8166     } else {
8167         gen_helper_sub_saturate(t0, cpu_env, t0, t1);
8168     }
8169     tcg_temp_free_i32(t1);
8170     store_reg(s, a->rd, t0);
8171     return true;
8172 }
8173
8174 #define DO_QADDSUB(NAME, ADD, DOUB) \
8175 static bool trans_##NAME(DisasContext *s, arg_rrr *a)    \
8176 {                                                        \
8177     return op_qaddsub(s, a, ADD, DOUB);                  \
8178 }
8179
8180 DO_QADDSUB(QADD, true, false)
8181 DO_QADDSUB(QSUB, false, false)
8182 DO_QADDSUB(QDADD, true, true)
8183 DO_QADDSUB(QDSUB, false, true)
8184
8185 #undef DO_QADDSUB
8186
8187 /*
8188  * Halfword multiply and multiply accumulate
8189  */
8190
8191 static bool op_smlaxxx(DisasContext *s, arg_rrrr *a,
8192                        int add_long, bool nt, bool mt)
8193 {
8194     TCGv_i32 t0, t1, tl, th;
8195
8196     if (s->thumb
8197         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
8198         : !ENABLE_ARCH_5TE) {
8199         return false;
8200     }
8201
8202     t0 = load_reg(s, a->rn);
8203     t1 = load_reg(s, a->rm);
8204     gen_mulxy(t0, t1, nt, mt);
8205     tcg_temp_free_i32(t1);
8206
8207     switch (add_long) {
8208     case 0:
8209         store_reg(s, a->rd, t0);
8210         break;
8211     case 1:
8212         t1 = load_reg(s, a->ra);
8213         gen_helper_add_setq(t0, cpu_env, t0, t1);
8214         tcg_temp_free_i32(t1);
8215         store_reg(s, a->rd, t0);
8216         break;
8217     case 2:
8218         tl = load_reg(s, a->ra);
8219         th = load_reg(s, a->rd);
8220         /* Sign-extend the 32-bit product to 64 bits.  */
8221         t1 = tcg_temp_new_i32();
8222         tcg_gen_sari_i32(t1, t0, 31);
8223         tcg_gen_add2_i32(tl, th, tl, th, t0, t1);
8224         tcg_temp_free_i32(t0);
8225         tcg_temp_free_i32(t1);
8226         store_reg(s, a->ra, tl);
8227         store_reg(s, a->rd, th);
8228         break;
8229     default:
8230         g_assert_not_reached();
8231     }
8232     return true;
8233 }
8234
8235 #define DO_SMLAX(NAME, add, nt, mt) \
8236 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
8237 {                                                          \
8238     return op_smlaxxx(s, a, add, nt, mt);                  \
8239 }
8240
8241 DO_SMLAX(SMULBB, 0, 0, 0)
8242 DO_SMLAX(SMULBT, 0, 0, 1)
8243 DO_SMLAX(SMULTB, 0, 1, 0)
8244 DO_SMLAX(SMULTT, 0, 1, 1)
8245
8246 DO_SMLAX(SMLABB, 1, 0, 0)
8247 DO_SMLAX(SMLABT, 1, 0, 1)
8248 DO_SMLAX(SMLATB, 1, 1, 0)
8249 DO_SMLAX(SMLATT, 1, 1, 1)
8250
8251 DO_SMLAX(SMLALBB, 2, 0, 0)
8252 DO_SMLAX(SMLALBT, 2, 0, 1)
8253 DO_SMLAX(SMLALTB, 2, 1, 0)
8254 DO_SMLAX(SMLALTT, 2, 1, 1)
8255
8256 #undef DO_SMLAX
8257
8258 static bool op_smlawx(DisasContext *s, arg_rrrr *a, bool add, bool mt)
8259 {
8260     TCGv_i32 t0, t1;
8261
8262     if (!ENABLE_ARCH_5TE) {
8263         return false;
8264     }
8265
8266     t0 = load_reg(s, a->rn);
8267     t1 = load_reg(s, a->rm);
8268     /*
8269      * Since the nominal result is product<47:16>, shift the 16-bit
8270      * input up by 16 bits, so that the result is at product<63:32>.
8271      */
8272     if (mt) {
8273         tcg_gen_andi_i32(t1, t1, 0xffff0000);
8274     } else {
8275         tcg_gen_shli_i32(t1, t1, 16);
8276     }
8277     tcg_gen_muls2_i32(t0, t1, t0, t1);
8278     tcg_temp_free_i32(t0);
8279     if (add) {
8280         t0 = load_reg(s, a->ra);
8281         gen_helper_add_setq(t1, cpu_env, t1, t0);
8282         tcg_temp_free_i32(t0);
8283     }
8284     store_reg(s, a->rd, t1);
8285     return true;
8286 }
8287
8288 #define DO_SMLAWX(NAME, add, mt) \
8289 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
8290 {                                                          \
8291     return op_smlawx(s, a, add, mt);                       \
8292 }
8293
8294 DO_SMLAWX(SMULWB, 0, 0)
8295 DO_SMLAWX(SMULWT, 0, 1)
8296 DO_SMLAWX(SMLAWB, 1, 0)
8297 DO_SMLAWX(SMLAWT, 1, 1)
8298
8299 #undef DO_SMLAWX
8300
8301 /*
8302  * MSR (immediate) and hints
8303  */
8304
8305 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
8306 {
8307     /*
8308      * When running single-threaded TCG code, use the helper to ensure that
8309      * the next round-robin scheduled vCPU gets a crack.  When running in
8310      * MTTCG we don't generate jumps to the helper as it won't affect the
8311      * scheduling of other vCPUs.
8312      */
8313     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
8314         gen_set_pc_im(s, s->base.pc_next);
8315         s->base.is_jmp = DISAS_YIELD;
8316     }
8317     return true;
8318 }
8319
8320 static bool trans_WFE(DisasContext *s, arg_WFE *a)
8321 {
8322     /*
8323      * When running single-threaded TCG code, use the helper to ensure that
8324      * the next round-robin scheduled vCPU gets a crack.  In MTTCG mode we
8325      * just skip this instruction.  Currently the SEV/SEVL instructions,
8326      * which are *one* of many ways to wake the CPU from WFE, are not
8327      * implemented so we can't sleep like WFI does.
8328      */
8329     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
8330         gen_set_pc_im(s, s->base.pc_next);
8331         s->base.is_jmp = DISAS_WFE;
8332     }
8333     return true;
8334 }
8335
8336 static bool trans_WFI(DisasContext *s, arg_WFI *a)
8337 {
8338     /* For WFI, halt the vCPU until an IRQ. */
8339     gen_set_pc_im(s, s->base.pc_next);
8340     s->base.is_jmp = DISAS_WFI;
8341     return true;
8342 }
8343
8344 static bool trans_NOP(DisasContext *s, arg_NOP *a)
8345 {
8346     return true;
8347 }
8348
8349 static bool trans_MSR_imm(DisasContext *s, arg_MSR_imm *a)
8350 {
8351     uint32_t val = ror32(a->imm, a->rot * 2);
8352     uint32_t mask = msr_mask(s, a->mask, a->r);
8353
8354     if (gen_set_psr_im(s, mask, a->r, val)) {
8355         unallocated_encoding(s);
8356     }
8357     return true;
8358 }
8359
8360 /*
8361  * Cyclic Redundancy Check
8362  */
8363
8364 static bool op_crc32(DisasContext *s, arg_rrr *a, bool c, MemOp sz)
8365 {
8366     TCGv_i32 t1, t2, t3;
8367
8368     if (!dc_isar_feature(aa32_crc32, s)) {
8369         return false;
8370     }
8371
8372     t1 = load_reg(s, a->rn);
8373     t2 = load_reg(s, a->rm);
8374     switch (sz) {
8375     case MO_8:
8376         gen_uxtb(t2);
8377         break;
8378     case MO_16:
8379         gen_uxth(t2);
8380         break;
8381     case MO_32:
8382         break;
8383     default:
8384         g_assert_not_reached();
8385     }
8386     t3 = tcg_const_i32(1 << sz);
8387     if (c) {
8388         gen_helper_crc32c(t1, t1, t2, t3);
8389     } else {
8390         gen_helper_crc32(t1, t1, t2, t3);
8391     }
8392     tcg_temp_free_i32(t2);
8393     tcg_temp_free_i32(t3);
8394     store_reg(s, a->rd, t1);
8395     return true;
8396 }
8397
8398 #define DO_CRC32(NAME, c, sz) \
8399 static bool trans_##NAME(DisasContext *s, arg_rrr *a)  \
8400     { return op_crc32(s, a, c, sz); }
8401
8402 DO_CRC32(CRC32B, false, MO_8)
8403 DO_CRC32(CRC32H, false, MO_16)
8404 DO_CRC32(CRC32W, false, MO_32)
8405 DO_CRC32(CRC32CB, true, MO_8)
8406 DO_CRC32(CRC32CH, true, MO_16)
8407 DO_CRC32(CRC32CW, true, MO_32)
8408
8409 #undef DO_CRC32
8410
8411 /*
8412  * Miscellaneous instructions
8413  */
8414
8415 static bool trans_MRS_bank(DisasContext *s, arg_MRS_bank *a)
8416 {
8417     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8418         return false;
8419     }
8420     gen_mrs_banked(s, a->r, a->sysm, a->rd);
8421     return true;
8422 }
8423
8424 static bool trans_MSR_bank(DisasContext *s, arg_MSR_bank *a)
8425 {
8426     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8427         return false;
8428     }
8429     gen_msr_banked(s, a->r, a->sysm, a->rn);
8430     return true;
8431 }
8432
8433 static bool trans_MRS_reg(DisasContext *s, arg_MRS_reg *a)
8434 {
8435     TCGv_i32 tmp;
8436
8437     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8438         return false;
8439     }
8440     if (a->r) {
8441         if (IS_USER(s)) {
8442             unallocated_encoding(s);
8443             return true;
8444         }
8445         tmp = load_cpu_field(spsr);
8446     } else {
8447         tmp = tcg_temp_new_i32();
8448         gen_helper_cpsr_read(tmp, cpu_env);
8449     }
8450     store_reg(s, a->rd, tmp);
8451     return true;
8452 }
8453
8454 static bool trans_MSR_reg(DisasContext *s, arg_MSR_reg *a)
8455 {
8456     TCGv_i32 tmp;
8457     uint32_t mask = msr_mask(s, a->mask, a->r);
8458
8459     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8460         return false;
8461     }
8462     tmp = load_reg(s, a->rn);
8463     if (gen_set_psr(s, mask, a->r, tmp)) {
8464         unallocated_encoding(s);
8465     }
8466     return true;
8467 }
8468
8469 static bool trans_MRS_v7m(DisasContext *s, arg_MRS_v7m *a)
8470 {
8471     TCGv_i32 tmp;
8472
8473     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8474         return false;
8475     }
8476     tmp = tcg_const_i32(a->sysm);
8477     gen_helper_v7m_mrs(tmp, cpu_env, tmp);
8478     store_reg(s, a->rd, tmp);
8479     return true;
8480 }
8481
8482 static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
8483 {
8484     TCGv_i32 addr, reg;
8485
8486     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8487         return false;
8488     }
8489     addr = tcg_const_i32((a->mask << 10) | a->sysm);
8490     reg = load_reg(s, a->rn);
8491     gen_helper_v7m_msr(cpu_env, addr, reg);
8492     tcg_temp_free_i32(addr);
8493     tcg_temp_free_i32(reg);
8494     /* If we wrote to CONTROL, the EL might have changed */
8495     gen_helper_rebuild_hflags_m32_newel(cpu_env);
8496     gen_lookup_tb(s);
8497     return true;
8498 }
8499
8500 static bool trans_BX(DisasContext *s, arg_BX *a)
8501 {
8502     if (!ENABLE_ARCH_4T) {
8503         return false;
8504     }
8505     gen_bx_excret(s, load_reg(s, a->rm));
8506     return true;
8507 }
8508
8509 static bool trans_BXJ(DisasContext *s, arg_BXJ *a)
8510 {
8511     if (!ENABLE_ARCH_5J || arm_dc_feature(s, ARM_FEATURE_M)) {
8512         return false;
8513     }
8514     /* Trivial implementation equivalent to bx.  */
8515     gen_bx(s, load_reg(s, a->rm));
8516     return true;
8517 }
8518
8519 static bool trans_BLX_r(DisasContext *s, arg_BLX_r *a)
8520 {
8521     TCGv_i32 tmp;
8522
8523     if (!ENABLE_ARCH_5) {
8524         return false;
8525     }
8526     tmp = load_reg(s, a->rm);
8527     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
8528     gen_bx(s, tmp);
8529     return true;
8530 }
8531
8532 /*
8533  * BXNS/BLXNS: only exist for v8M with the security extensions,
8534  * and always UNDEF if NonSecure.  We don't implement these in
8535  * the user-only mode either (in theory you can use them from
8536  * Secure User mode but they are too tied in to system emulation).
8537  */
8538 static bool trans_BXNS(DisasContext *s, arg_BXNS *a)
8539 {
8540     if (!s->v8m_secure || IS_USER_ONLY) {
8541         unallocated_encoding(s);
8542     } else {
8543         gen_bxns(s, a->rm);
8544     }
8545     return true;
8546 }
8547
8548 static bool trans_BLXNS(DisasContext *s, arg_BLXNS *a)
8549 {
8550     if (!s->v8m_secure || IS_USER_ONLY) {
8551         unallocated_encoding(s);
8552     } else {
8553         gen_blxns(s, a->rm);
8554     }
8555     return true;
8556 }
8557
8558 static bool trans_CLZ(DisasContext *s, arg_CLZ *a)
8559 {
8560     TCGv_i32 tmp;
8561
8562     if (!ENABLE_ARCH_5) {
8563         return false;
8564     }
8565     tmp = load_reg(s, a->rm);
8566     tcg_gen_clzi_i32(tmp, tmp, 32);
8567     store_reg(s, a->rd, tmp);
8568     return true;
8569 }
8570
8571 static bool trans_ERET(DisasContext *s, arg_ERET *a)
8572 {
8573     TCGv_i32 tmp;
8574
8575     if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
8576         return false;
8577     }
8578     if (IS_USER(s)) {
8579         unallocated_encoding(s);
8580         return true;
8581     }
8582     if (s->current_el == 2) {
8583         /* ERET from Hyp uses ELR_Hyp, not LR */
8584         tmp = load_cpu_field(elr_el[2]);
8585     } else {
8586         tmp = load_reg(s, 14);
8587     }
8588     gen_exception_return(s, tmp);
8589     return true;
8590 }
8591
8592 static bool trans_HLT(DisasContext *s, arg_HLT *a)
8593 {
8594     gen_hlt(s, a->imm);
8595     return true;
8596 }
8597
8598 static bool trans_BKPT(DisasContext *s, arg_BKPT *a)
8599 {
8600     if (!ENABLE_ARCH_5) {
8601         return false;
8602     }
8603     if (arm_dc_feature(s, ARM_FEATURE_M) &&
8604         semihosting_enabled() &&
8605 #ifndef CONFIG_USER_ONLY
8606         !IS_USER(s) &&
8607 #endif
8608         (a->imm == 0xab)) {
8609         gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
8610     } else {
8611         gen_exception_bkpt_insn(s, syn_aa32_bkpt(a->imm, false));
8612     }
8613     return true;
8614 }
8615
8616 static bool trans_HVC(DisasContext *s, arg_HVC *a)
8617 {
8618     if (!ENABLE_ARCH_7 || arm_dc_feature(s, ARM_FEATURE_M)) {
8619         return false;
8620     }
8621     if (IS_USER(s)) {
8622         unallocated_encoding(s);
8623     } else {
8624         gen_hvc(s, a->imm);
8625     }
8626     return true;
8627 }
8628
8629 static bool trans_SMC(DisasContext *s, arg_SMC *a)
8630 {
8631     if (!ENABLE_ARCH_6K || arm_dc_feature(s, ARM_FEATURE_M)) {
8632         return false;
8633     }
8634     if (IS_USER(s)) {
8635         unallocated_encoding(s);
8636     } else {
8637         gen_smc(s);
8638     }
8639     return true;
8640 }
8641
8642 static bool trans_SG(DisasContext *s, arg_SG *a)
8643 {
8644     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
8645         !arm_dc_feature(s, ARM_FEATURE_V8)) {
8646         return false;
8647     }
8648     /*
8649      * SG (v8M only)
8650      * The bulk of the behaviour for this instruction is implemented
8651      * in v7m_handle_execute_nsc(), which deals with the insn when
8652      * it is executed by a CPU in non-secure state from memory
8653      * which is Secure & NonSecure-Callable.
8654      * Here we only need to handle the remaining cases:
8655      *  * in NS memory (including the "security extension not
8656      *    implemented" case) : NOP
8657      *  * in S memory but CPU already secure (clear IT bits)
8658      * We know that the attribute for the memory this insn is
8659      * in must match the current CPU state, because otherwise
8660      * get_phys_addr_pmsav8 would have generated an exception.
8661      */
8662     if (s->v8m_secure) {
8663         /* Like the IT insn, we don't need to generate any code */
8664         s->condexec_cond = 0;
8665         s->condexec_mask = 0;
8666     }
8667     return true;
8668 }
8669
8670 static bool trans_TT(DisasContext *s, arg_TT *a)
8671 {
8672     TCGv_i32 addr, tmp;
8673
8674     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
8675         !arm_dc_feature(s, ARM_FEATURE_V8)) {
8676         return false;
8677     }
8678     if (a->rd == 13 || a->rd == 15 || a->rn == 15) {
8679         /* We UNDEF for these UNPREDICTABLE cases */
8680         unallocated_encoding(s);
8681         return true;
8682     }
8683     if (a->A && !s->v8m_secure) {
8684         /* This case is UNDEFINED.  */
8685         unallocated_encoding(s);
8686         return true;
8687     }
8688
8689     addr = load_reg(s, a->rn);
8690     tmp = tcg_const_i32((a->A << 1) | a->T);
8691     gen_helper_v7m_tt(tmp, cpu_env, addr, tmp);
8692     tcg_temp_free_i32(addr);
8693     store_reg(s, a->rd, tmp);
8694     return true;
8695 }
8696
8697 /*
8698  * Load/store register index
8699  */
8700
8701 static ISSInfo make_issinfo(DisasContext *s, int rd, bool p, bool w)
8702 {
8703     ISSInfo ret;
8704
8705     /* ISS not valid if writeback */
8706     if (p && !w) {
8707         ret = rd;
8708         if (s->base.pc_next - s->pc_curr == 2) {
8709             ret |= ISSIs16Bit;
8710         }
8711     } else {
8712         ret = ISSInvalid;
8713     }
8714     return ret;
8715 }
8716
8717 static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a)
8718 {
8719     TCGv_i32 addr = load_reg(s, a->rn);
8720
8721     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
8722         gen_helper_v8m_stackcheck(cpu_env, addr);
8723     }
8724
8725     if (a->p) {
8726         TCGv_i32 ofs = load_reg(s, a->rm);
8727         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
8728         if (a->u) {
8729             tcg_gen_add_i32(addr, addr, ofs);
8730         } else {
8731             tcg_gen_sub_i32(addr, addr, ofs);
8732         }
8733         tcg_temp_free_i32(ofs);
8734     }
8735     return addr;
8736 }
8737
8738 static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
8739                             TCGv_i32 addr, int address_offset)
8740 {
8741     if (!a->p) {
8742         TCGv_i32 ofs = load_reg(s, a->rm);
8743         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
8744         if (a->u) {
8745             tcg_gen_add_i32(addr, addr, ofs);
8746         } else {
8747             tcg_gen_sub_i32(addr, addr, ofs);
8748         }
8749         tcg_temp_free_i32(ofs);
8750     } else if (!a->w) {
8751         tcg_temp_free_i32(addr);
8752         return;
8753     }
8754     tcg_gen_addi_i32(addr, addr, address_offset);
8755     store_reg(s, a->rn, addr);
8756 }
8757
8758 static bool op_load_rr(DisasContext *s, arg_ldst_rr *a,
8759                        MemOp mop, int mem_idx)
8760 {
8761     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
8762     TCGv_i32 addr, tmp;
8763
8764     addr = op_addr_rr_pre(s, a);
8765
8766     tmp = tcg_temp_new_i32();
8767     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8768     disas_set_da_iss(s, mop, issinfo);
8769
8770     /*
8771      * Perform base writeback before the loaded value to
8772      * ensure correct behavior with overlapping index registers.
8773      */
8774     op_addr_rr_post(s, a, addr, 0);
8775     store_reg_from_load(s, a->rt, tmp);
8776     return true;
8777 }
8778
8779 static bool op_store_rr(DisasContext *s, arg_ldst_rr *a,
8780                         MemOp mop, int mem_idx)
8781 {
8782     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
8783     TCGv_i32 addr, tmp;
8784
8785     addr = op_addr_rr_pre(s, a);
8786
8787     tmp = load_reg(s, a->rt);
8788     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8789     disas_set_da_iss(s, mop, issinfo);
8790     tcg_temp_free_i32(tmp);
8791
8792     op_addr_rr_post(s, a, addr, 0);
8793     return true;
8794 }
8795
8796 static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
8797 {
8798     int mem_idx = get_mem_index(s);
8799     TCGv_i32 addr, tmp;
8800
8801     if (!ENABLE_ARCH_5TE) {
8802         return false;
8803     }
8804     if (a->rt & 1) {
8805         unallocated_encoding(s);
8806         return true;
8807     }
8808     addr = op_addr_rr_pre(s, a);
8809
8810     tmp = tcg_temp_new_i32();
8811     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8812     store_reg(s, a->rt, tmp);
8813
8814     tcg_gen_addi_i32(addr, addr, 4);
8815
8816     tmp = tcg_temp_new_i32();
8817     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8818     store_reg(s, a->rt + 1, tmp);
8819
8820     /* LDRD w/ base writeback is undefined if the registers overlap.  */
8821     op_addr_rr_post(s, a, addr, -4);
8822     return true;
8823 }
8824
8825 static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
8826 {
8827     int mem_idx = get_mem_index(s);
8828     TCGv_i32 addr, tmp;
8829
8830     if (!ENABLE_ARCH_5TE) {
8831         return false;
8832     }
8833     if (a->rt & 1) {
8834         unallocated_encoding(s);
8835         return true;
8836     }
8837     addr = op_addr_rr_pre(s, a);
8838
8839     tmp = load_reg(s, a->rt);
8840     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8841     tcg_temp_free_i32(tmp);
8842
8843     tcg_gen_addi_i32(addr, addr, 4);
8844
8845     tmp = load_reg(s, a->rt + 1);
8846     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8847     tcg_temp_free_i32(tmp);
8848
8849     op_addr_rr_post(s, a, addr, -4);
8850     return true;
8851 }
8852
8853 /*
8854  * Load/store immediate index
8855  */
8856
8857 static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a)
8858 {
8859     int ofs = a->imm;
8860
8861     if (!a->u) {
8862         ofs = -ofs;
8863     }
8864
8865     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
8866         /*
8867          * Stackcheck. Here we know 'addr' is the current SP;
8868          * U is set if we're moving SP up, else down. It is
8869          * UNKNOWN whether the limit check triggers when SP starts
8870          * below the limit and ends up above it; we chose to do so.
8871          */
8872         if (!a->u) {
8873             TCGv_i32 newsp = tcg_temp_new_i32();
8874             tcg_gen_addi_i32(newsp, cpu_R[13], ofs);
8875             gen_helper_v8m_stackcheck(cpu_env, newsp);
8876             tcg_temp_free_i32(newsp);
8877         } else {
8878             gen_helper_v8m_stackcheck(cpu_env, cpu_R[13]);
8879         }
8880     }
8881
8882     return add_reg_for_lit(s, a->rn, a->p ? ofs : 0);
8883 }
8884
8885 static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a,
8886                             TCGv_i32 addr, int address_offset)
8887 {
8888     if (!a->p) {
8889         if (a->u) {
8890             address_offset += a->imm;
8891         } else {
8892             address_offset -= a->imm;
8893         }
8894     } else if (!a->w) {
8895         tcg_temp_free_i32(addr);
8896         return;
8897     }
8898     tcg_gen_addi_i32(addr, addr, address_offset);
8899     store_reg(s, a->rn, addr);
8900 }
8901
8902 static bool op_load_ri(DisasContext *s, arg_ldst_ri *a,
8903                        MemOp mop, int mem_idx)
8904 {
8905     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
8906     TCGv_i32 addr, tmp;
8907
8908     addr = op_addr_ri_pre(s, a);
8909
8910     tmp = tcg_temp_new_i32();
8911     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8912     disas_set_da_iss(s, mop, issinfo);
8913
8914     /*
8915      * Perform base writeback before the loaded value to
8916      * ensure correct behavior with overlapping index registers.
8917      */
8918     op_addr_ri_post(s, a, addr, 0);
8919     store_reg_from_load(s, a->rt, tmp);
8920     return true;
8921 }
8922
8923 static bool op_store_ri(DisasContext *s, arg_ldst_ri *a,
8924                         MemOp mop, int mem_idx)
8925 {
8926     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
8927     TCGv_i32 addr, tmp;
8928
8929     addr = op_addr_ri_pre(s, a);
8930
8931     tmp = load_reg(s, a->rt);
8932     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8933     disas_set_da_iss(s, mop, issinfo);
8934     tcg_temp_free_i32(tmp);
8935
8936     op_addr_ri_post(s, a, addr, 0);
8937     return true;
8938 }
8939
8940 static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
8941 {
8942     int mem_idx = get_mem_index(s);
8943     TCGv_i32 addr, tmp;
8944
8945     addr = op_addr_ri_pre(s, a);
8946
8947     tmp = tcg_temp_new_i32();
8948     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8949     store_reg(s, a->rt, tmp);
8950
8951     tcg_gen_addi_i32(addr, addr, 4);
8952
8953     tmp = tcg_temp_new_i32();
8954     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8955     store_reg(s, rt2, tmp);
8956
8957     /* LDRD w/ base writeback is undefined if the registers overlap.  */
8958     op_addr_ri_post(s, a, addr, -4);
8959     return true;
8960 }
8961
8962 static bool trans_LDRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
8963 {
8964     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
8965         return false;
8966     }
8967     return op_ldrd_ri(s, a, a->rt + 1);
8968 }
8969
8970 static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
8971 {
8972     arg_ldst_ri b = {
8973         .u = a->u, .w = a->w, .p = a->p,
8974         .rn = a->rn, .rt = a->rt, .imm = a->imm
8975     };
8976     return op_ldrd_ri(s, &b, a->rt2);
8977 }
8978
8979 static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
8980 {
8981     int mem_idx = get_mem_index(s);
8982     TCGv_i32 addr, tmp;
8983
8984     addr = op_addr_ri_pre(s, a);
8985
8986     tmp = load_reg(s, a->rt);
8987     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8988     tcg_temp_free_i32(tmp);
8989
8990     tcg_gen_addi_i32(addr, addr, 4);
8991
8992     tmp = load_reg(s, rt2);
8993     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8994     tcg_temp_free_i32(tmp);
8995
8996     op_addr_ri_post(s, a, addr, -4);
8997     return true;
8998 }
8999
9000 static bool trans_STRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
9001 {
9002     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
9003         return false;
9004     }
9005     return op_strd_ri(s, a, a->rt + 1);
9006 }
9007
9008 static bool trans_STRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
9009 {
9010     arg_ldst_ri b = {
9011         .u = a->u, .w = a->w, .p = a->p,
9012         .rn = a->rn, .rt = a->rt, .imm = a->imm
9013     };
9014     return op_strd_ri(s, &b, a->rt2);
9015 }
9016
9017 #define DO_LDST(NAME, WHICH, MEMOP) \
9018 static bool trans_##NAME##_ri(DisasContext *s, arg_ldst_ri *a)        \
9019 {                                                                     \
9020     return op_##WHICH##_ri(s, a, MEMOP, get_mem_index(s));            \
9021 }                                                                     \
9022 static bool trans_##NAME##T_ri(DisasContext *s, arg_ldst_ri *a)       \
9023 {                                                                     \
9024     return op_##WHICH##_ri(s, a, MEMOP, get_a32_user_mem_index(s));   \
9025 }                                                                     \
9026 static bool trans_##NAME##_rr(DisasContext *s, arg_ldst_rr *a)        \
9027 {                                                                     \
9028     return op_##WHICH##_rr(s, a, MEMOP, get_mem_index(s));            \
9029 }                                                                     \
9030 static bool trans_##NAME##T_rr(DisasContext *s, arg_ldst_rr *a)       \
9031 {                                                                     \
9032     return op_##WHICH##_rr(s, a, MEMOP, get_a32_user_mem_index(s));   \
9033 }
9034
9035 DO_LDST(LDR, load, MO_UL)
9036 DO_LDST(LDRB, load, MO_UB)
9037 DO_LDST(LDRH, load, MO_UW)
9038 DO_LDST(LDRSB, load, MO_SB)
9039 DO_LDST(LDRSH, load, MO_SW)
9040
9041 DO_LDST(STR, store, MO_UL)
9042 DO_LDST(STRB, store, MO_UB)
9043 DO_LDST(STRH, store, MO_UW)
9044
9045 #undef DO_LDST
9046
9047 /*
9048  * Synchronization primitives
9049  */
9050
9051 static bool op_swp(DisasContext *s, arg_SWP *a, MemOp opc)
9052 {
9053     TCGv_i32 addr, tmp;
9054     TCGv taddr;
9055
9056     opc |= s->be_data;
9057     addr = load_reg(s, a->rn);
9058     taddr = gen_aa32_addr(s, addr, opc);
9059     tcg_temp_free_i32(addr);
9060
9061     tmp = load_reg(s, a->rt2);
9062     tcg_gen_atomic_xchg_i32(tmp, taddr, tmp, get_mem_index(s), opc);
9063     tcg_temp_free(taddr);
9064
9065     store_reg(s, a->rt, tmp);
9066     return true;
9067 }
9068
9069 static bool trans_SWP(DisasContext *s, arg_SWP *a)
9070 {
9071     return op_swp(s, a, MO_UL | MO_ALIGN);
9072 }
9073
9074 static bool trans_SWPB(DisasContext *s, arg_SWP *a)
9075 {
9076     return op_swp(s, a, MO_UB);
9077 }
9078
9079 /*
9080  * Load/Store Exclusive and Load-Acquire/Store-Release
9081  */
9082
9083 static bool op_strex(DisasContext *s, arg_STREX *a, MemOp mop, bool rel)
9084 {
9085     TCGv_i32 addr;
9086     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
9087     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
9088
9089     /* We UNDEF for these UNPREDICTABLE cases.  */
9090     if (a->rd == 15 || a->rn == 15 || a->rt == 15
9091         || a->rd == a->rn || a->rd == a->rt
9092         || (!v8a && s->thumb && (a->rd == 13 || a->rt == 13))
9093         || (mop == MO_64
9094             && (a->rt2 == 15
9095                 || a->rd == a->rt2
9096                 || (!v8a && s->thumb && a->rt2 == 13)))) {
9097         unallocated_encoding(s);
9098         return true;
9099     }
9100
9101     if (rel) {
9102         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
9103     }
9104
9105     addr = tcg_temp_local_new_i32();
9106     load_reg_var(s, addr, a->rn);
9107     tcg_gen_addi_i32(addr, addr, a->imm);
9108
9109     gen_store_exclusive(s, a->rd, a->rt, a->rt2, addr, mop);
9110     tcg_temp_free_i32(addr);
9111     return true;
9112 }
9113
9114 static bool trans_STREX(DisasContext *s, arg_STREX *a)
9115 {
9116     if (!ENABLE_ARCH_6) {
9117         return false;
9118     }
9119     return op_strex(s, a, MO_32, false);
9120 }
9121
9122 static bool trans_STREXD_a32(DisasContext *s, arg_STREX *a)
9123 {
9124     if (!ENABLE_ARCH_6K) {
9125         return false;
9126     }
9127     /* We UNDEF for these UNPREDICTABLE cases.  */
9128     if (a->rt & 1) {
9129         unallocated_encoding(s);
9130         return true;
9131     }
9132     a->rt2 = a->rt + 1;
9133     return op_strex(s, a, MO_64, false);
9134 }
9135
9136 static bool trans_STREXD_t32(DisasContext *s, arg_STREX *a)
9137 {
9138     return op_strex(s, a, MO_64, false);
9139 }
9140
9141 static bool trans_STREXB(DisasContext *s, arg_STREX *a)
9142 {
9143     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
9144         return false;
9145     }
9146     return op_strex(s, a, MO_8, false);
9147 }
9148
9149 static bool trans_STREXH(DisasContext *s, arg_STREX *a)
9150 {
9151     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
9152         return false;
9153     }
9154     return op_strex(s, a, MO_16, false);
9155 }
9156
9157 static bool trans_STLEX(DisasContext *s, arg_STREX *a)
9158 {
9159     if (!ENABLE_ARCH_8) {
9160         return false;
9161     }
9162     return op_strex(s, a, MO_32, true);
9163 }
9164
9165 static bool trans_STLEXD_a32(DisasContext *s, arg_STREX *a)
9166 {
9167     if (!ENABLE_ARCH_8) {
9168         return false;
9169     }
9170     /* We UNDEF for these UNPREDICTABLE cases.  */
9171     if (a->rt & 1) {
9172         unallocated_encoding(s);
9173         return true;
9174     }
9175     a->rt2 = a->rt + 1;
9176     return op_strex(s, a, MO_64, true);
9177 }
9178
9179 static bool trans_STLEXD_t32(DisasContext *s, arg_STREX *a)
9180 {
9181     if (!ENABLE_ARCH_8) {
9182         return false;
9183     }
9184     return op_strex(s, a, MO_64, true);
9185 }
9186
9187 static bool trans_STLEXB(DisasContext *s, arg_STREX *a)
9188 {
9189     if (!ENABLE_ARCH_8) {
9190         return false;
9191     }
9192     return op_strex(s, a, MO_8, true);
9193 }
9194
9195 static bool trans_STLEXH(DisasContext *s, arg_STREX *a)
9196 {
9197     if (!ENABLE_ARCH_8) {
9198         return false;
9199     }
9200     return op_strex(s, a, MO_16, true);
9201 }
9202
9203 static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop)
9204 {
9205     TCGv_i32 addr, tmp;
9206
9207     if (!ENABLE_ARCH_8) {
9208         return false;
9209     }
9210     /* We UNDEF for these UNPREDICTABLE cases.  */
9211     if (a->rn == 15 || a->rt == 15) {
9212         unallocated_encoding(s);
9213         return true;
9214     }
9215
9216     addr = load_reg(s, a->rn);
9217     tmp = load_reg(s, a->rt);
9218     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
9219     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | s->be_data);
9220     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite);
9221
9222     tcg_temp_free_i32(tmp);
9223     tcg_temp_free_i32(addr);
9224     return true;
9225 }
9226
9227 static bool trans_STL(DisasContext *s, arg_STL *a)
9228 {
9229     return op_stl(s, a, MO_UL);
9230 }
9231
9232 static bool trans_STLB(DisasContext *s, arg_STL *a)
9233 {
9234     return op_stl(s, a, MO_UB);
9235 }
9236
9237 static bool trans_STLH(DisasContext *s, arg_STL *a)
9238 {
9239     return op_stl(s, a, MO_UW);
9240 }
9241
9242 static bool op_ldrex(DisasContext *s, arg_LDREX *a, MemOp mop, bool acq)
9243 {
9244     TCGv_i32 addr;
9245     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
9246     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
9247
9248     /* We UNDEF for these UNPREDICTABLE cases.  */
9249     if (a->rn == 15 || a->rt == 15
9250         || (!v8a && s->thumb && a->rt == 13)
9251         || (mop == MO_64
9252             && (a->rt2 == 15 || a->rt == a->rt2
9253                 || (!v8a && s->thumb && a->rt2 == 13)))) {
9254         unallocated_encoding(s);
9255         return true;
9256     }
9257
9258     addr = tcg_temp_local_new_i32();
9259     load_reg_var(s, addr, a->rn);
9260     tcg_gen_addi_i32(addr, addr, a->imm);
9261
9262     gen_load_exclusive(s, a->rt, a->rt2, addr, mop);
9263     tcg_temp_free_i32(addr);
9264
9265     if (acq) {
9266         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
9267     }
9268     return true;
9269 }
9270
9271 static bool trans_LDREX(DisasContext *s, arg_LDREX *a)
9272 {
9273     if (!ENABLE_ARCH_6) {
9274         return false;
9275     }
9276     return op_ldrex(s, a, MO_32, false);
9277 }
9278
9279 static bool trans_LDREXD_a32(DisasContext *s, arg_LDREX *a)
9280 {
9281     if (!ENABLE_ARCH_6K) {
9282         return false;
9283     }
9284     /* We UNDEF for these UNPREDICTABLE cases.  */
9285     if (a->rt & 1) {
9286         unallocated_encoding(s);
9287         return true;
9288     }
9289     a->rt2 = a->rt + 1;
9290     return op_ldrex(s, a, MO_64, false);
9291 }
9292
9293 static bool trans_LDREXD_t32(DisasContext *s, arg_LDREX *a)
9294 {
9295     return op_ldrex(s, a, MO_64, false);
9296 }
9297
9298 static bool trans_LDREXB(DisasContext *s, arg_LDREX *a)
9299 {
9300     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
9301         return false;
9302     }
9303     return op_ldrex(s, a, MO_8, false);
9304 }
9305
9306 static bool trans_LDREXH(DisasContext *s, arg_LDREX *a)
9307 {
9308     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
9309         return false;
9310     }
9311     return op_ldrex(s, a, MO_16, false);
9312 }
9313
9314 static bool trans_LDAEX(DisasContext *s, arg_LDREX *a)
9315 {
9316     if (!ENABLE_ARCH_8) {
9317         return false;
9318     }
9319     return op_ldrex(s, a, MO_32, true);
9320 }
9321
9322 static bool trans_LDAEXD_a32(DisasContext *s, arg_LDREX *a)
9323 {
9324     if (!ENABLE_ARCH_8) {
9325         return false;
9326     }
9327     /* We UNDEF for these UNPREDICTABLE cases.  */
9328     if (a->rt & 1) {
9329         unallocated_encoding(s);
9330         return true;
9331     }
9332     a->rt2 = a->rt + 1;
9333     return op_ldrex(s, a, MO_64, true);
9334 }
9335
9336 static bool trans_LDAEXD_t32(DisasContext *s, arg_LDREX *a)
9337 {
9338     if (!ENABLE_ARCH_8) {
9339         return false;
9340     }
9341     return op_ldrex(s, a, MO_64, true);
9342 }
9343
9344 static bool trans_LDAEXB(DisasContext *s, arg_LDREX *a)
9345 {
9346     if (!ENABLE_ARCH_8) {
9347         return false;
9348     }
9349     return op_ldrex(s, a, MO_8, true);
9350 }
9351
9352 static bool trans_LDAEXH(DisasContext *s, arg_LDREX *a)
9353 {
9354     if (!ENABLE_ARCH_8) {
9355         return false;
9356     }
9357     return op_ldrex(s, a, MO_16, true);
9358 }
9359
9360 static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop)
9361 {
9362     TCGv_i32 addr, tmp;
9363
9364     if (!ENABLE_ARCH_8) {
9365         return false;
9366     }
9367     /* We UNDEF for these UNPREDICTABLE cases.  */
9368     if (a->rn == 15 || a->rt == 15) {
9369         unallocated_encoding(s);
9370         return true;
9371     }
9372
9373     addr = load_reg(s, a->rn);
9374     tmp = tcg_temp_new_i32();
9375     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | s->be_data);
9376     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel);
9377     tcg_temp_free_i32(addr);
9378
9379     store_reg(s, a->rt, tmp);
9380     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
9381     return true;
9382 }
9383
9384 static bool trans_LDA(DisasContext *s, arg_LDA *a)
9385 {
9386     return op_lda(s, a, MO_UL);
9387 }
9388
9389 static bool trans_LDAB(DisasContext *s, arg_LDA *a)
9390 {
9391     return op_lda(s, a, MO_UB);
9392 }
9393
9394 static bool trans_LDAH(DisasContext *s, arg_LDA *a)
9395 {
9396     return op_lda(s, a, MO_UW);
9397 }
9398
9399 /*
9400  * Media instructions
9401  */
9402
9403 static bool trans_USADA8(DisasContext *s, arg_USADA8 *a)
9404 {
9405     TCGv_i32 t1, t2;
9406
9407     if (!ENABLE_ARCH_6) {
9408         return false;
9409     }
9410
9411     t1 = load_reg(s, a->rn);
9412     t2 = load_reg(s, a->rm);
9413     gen_helper_usad8(t1, t1, t2);
9414     tcg_temp_free_i32(t2);
9415     if (a->ra != 15) {
9416         t2 = load_reg(s, a->ra);
9417         tcg_gen_add_i32(t1, t1, t2);
9418         tcg_temp_free_i32(t2);
9419     }
9420     store_reg(s, a->rd, t1);
9421     return true;
9422 }
9423
9424 static bool op_bfx(DisasContext *s, arg_UBFX *a, bool u)
9425 {
9426     TCGv_i32 tmp;
9427     int width = a->widthm1 + 1;
9428     int shift = a->lsb;
9429
9430     if (!ENABLE_ARCH_6T2) {
9431         return false;
9432     }
9433     if (shift + width > 32) {
9434         /* UNPREDICTABLE; we choose to UNDEF */
9435         unallocated_encoding(s);
9436         return true;
9437     }
9438
9439     tmp = load_reg(s, a->rn);
9440     if (u) {
9441         tcg_gen_extract_i32(tmp, tmp, shift, width);
9442     } else {
9443         tcg_gen_sextract_i32(tmp, tmp, shift, width);
9444     }
9445     store_reg(s, a->rd, tmp);
9446     return true;
9447 }
9448
9449 static bool trans_SBFX(DisasContext *s, arg_SBFX *a)
9450 {
9451     return op_bfx(s, a, false);
9452 }
9453
9454 static bool trans_UBFX(DisasContext *s, arg_UBFX *a)
9455 {
9456     return op_bfx(s, a, true);
9457 }
9458
9459 static bool trans_BFCI(DisasContext *s, arg_BFCI *a)
9460 {
9461     TCGv_i32 tmp;
9462     int msb = a->msb, lsb = a->lsb;
9463     int width;
9464
9465     if (!ENABLE_ARCH_6T2) {
9466         return false;
9467     }
9468     if (msb < lsb) {
9469         /* UNPREDICTABLE; we choose to UNDEF */
9470         unallocated_encoding(s);
9471         return true;
9472     }
9473
9474     width = msb + 1 - lsb;
9475     if (a->rn == 15) {
9476         /* BFC */
9477         tmp = tcg_const_i32(0);
9478     } else {
9479         /* BFI */
9480         tmp = load_reg(s, a->rn);
9481     }
9482     if (width != 32) {
9483         TCGv_i32 tmp2 = load_reg(s, a->rd);
9484         tcg_gen_deposit_i32(tmp, tmp2, tmp, lsb, width);
9485         tcg_temp_free_i32(tmp2);
9486     }
9487     store_reg(s, a->rd, tmp);
9488     return true;
9489 }
9490
9491 static bool trans_UDF(DisasContext *s, arg_UDF *a)
9492 {
9493     unallocated_encoding(s);
9494     return true;
9495 }
9496
9497 /*
9498  * Parallel addition and subtraction
9499  */
9500
9501 static bool op_par_addsub(DisasContext *s, arg_rrr *a,
9502                           void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
9503 {
9504     TCGv_i32 t0, t1;
9505
9506     if (s->thumb
9507         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9508         : !ENABLE_ARCH_6) {
9509         return false;
9510     }
9511
9512     t0 = load_reg(s, a->rn);
9513     t1 = load_reg(s, a->rm);
9514
9515     gen(t0, t0, t1);
9516
9517     tcg_temp_free_i32(t1);
9518     store_reg(s, a->rd, t0);
9519     return true;
9520 }
9521
9522 static bool op_par_addsub_ge(DisasContext *s, arg_rrr *a,
9523                              void (*gen)(TCGv_i32, TCGv_i32,
9524                                          TCGv_i32, TCGv_ptr))
9525 {
9526     TCGv_i32 t0, t1;
9527     TCGv_ptr ge;
9528
9529     if (s->thumb
9530         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9531         : !ENABLE_ARCH_6) {
9532         return false;
9533     }
9534
9535     t0 = load_reg(s, a->rn);
9536     t1 = load_reg(s, a->rm);
9537
9538     ge = tcg_temp_new_ptr();
9539     tcg_gen_addi_ptr(ge, cpu_env, offsetof(CPUARMState, GE));
9540     gen(t0, t0, t1, ge);
9541
9542     tcg_temp_free_ptr(ge);
9543     tcg_temp_free_i32(t1);
9544     store_reg(s, a->rd, t0);
9545     return true;
9546 }
9547
9548 #define DO_PAR_ADDSUB(NAME, helper) \
9549 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
9550 {                                                       \
9551     return op_par_addsub(s, a, helper);                 \
9552 }
9553
9554 #define DO_PAR_ADDSUB_GE(NAME, helper) \
9555 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
9556 {                                                       \
9557     return op_par_addsub_ge(s, a, helper);              \
9558 }
9559
9560 DO_PAR_ADDSUB_GE(SADD16, gen_helper_sadd16)
9561 DO_PAR_ADDSUB_GE(SASX, gen_helper_saddsubx)
9562 DO_PAR_ADDSUB_GE(SSAX, gen_helper_ssubaddx)
9563 DO_PAR_ADDSUB_GE(SSUB16, gen_helper_ssub16)
9564 DO_PAR_ADDSUB_GE(SADD8, gen_helper_sadd8)
9565 DO_PAR_ADDSUB_GE(SSUB8, gen_helper_ssub8)
9566
9567 DO_PAR_ADDSUB_GE(UADD16, gen_helper_uadd16)
9568 DO_PAR_ADDSUB_GE(UASX, gen_helper_uaddsubx)
9569 DO_PAR_ADDSUB_GE(USAX, gen_helper_usubaddx)
9570 DO_PAR_ADDSUB_GE(USUB16, gen_helper_usub16)
9571 DO_PAR_ADDSUB_GE(UADD8, gen_helper_uadd8)
9572 DO_PAR_ADDSUB_GE(USUB8, gen_helper_usub8)
9573
9574 DO_PAR_ADDSUB(QADD16, gen_helper_qadd16)
9575 DO_PAR_ADDSUB(QASX, gen_helper_qaddsubx)
9576 DO_PAR_ADDSUB(QSAX, gen_helper_qsubaddx)
9577 DO_PAR_ADDSUB(QSUB16, gen_helper_qsub16)
9578 DO_PAR_ADDSUB(QADD8, gen_helper_qadd8)
9579 DO_PAR_ADDSUB(QSUB8, gen_helper_qsub8)
9580
9581 DO_PAR_ADDSUB(UQADD16, gen_helper_uqadd16)
9582 DO_PAR_ADDSUB(UQASX, gen_helper_uqaddsubx)
9583 DO_PAR_ADDSUB(UQSAX, gen_helper_uqsubaddx)
9584 DO_PAR_ADDSUB(UQSUB16, gen_helper_uqsub16)
9585 DO_PAR_ADDSUB(UQADD8, gen_helper_uqadd8)
9586 DO_PAR_ADDSUB(UQSUB8, gen_helper_uqsub8)
9587
9588 DO_PAR_ADDSUB(SHADD16, gen_helper_shadd16)
9589 DO_PAR_ADDSUB(SHASX, gen_helper_shaddsubx)
9590 DO_PAR_ADDSUB(SHSAX, gen_helper_shsubaddx)
9591 DO_PAR_ADDSUB(SHSUB16, gen_helper_shsub16)
9592 DO_PAR_ADDSUB(SHADD8, gen_helper_shadd8)
9593 DO_PAR_ADDSUB(SHSUB8, gen_helper_shsub8)
9594
9595 DO_PAR_ADDSUB(UHADD16, gen_helper_uhadd16)
9596 DO_PAR_ADDSUB(UHASX, gen_helper_uhaddsubx)
9597 DO_PAR_ADDSUB(UHSAX, gen_helper_uhsubaddx)
9598 DO_PAR_ADDSUB(UHSUB16, gen_helper_uhsub16)
9599 DO_PAR_ADDSUB(UHADD8, gen_helper_uhadd8)
9600 DO_PAR_ADDSUB(UHSUB8, gen_helper_uhsub8)
9601
9602 #undef DO_PAR_ADDSUB
9603 #undef DO_PAR_ADDSUB_GE
9604
9605 /*
9606  * Packing, unpacking, saturation, and reversal
9607  */
9608
9609 static bool trans_PKH(DisasContext *s, arg_PKH *a)
9610 {
9611     TCGv_i32 tn, tm;
9612     int shift = a->imm;
9613
9614     if (s->thumb
9615         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9616         : !ENABLE_ARCH_6) {
9617         return false;
9618     }
9619
9620     tn = load_reg(s, a->rn);
9621     tm = load_reg(s, a->rm);
9622     if (a->tb) {
9623         /* PKHTB */
9624         if (shift == 0) {
9625             shift = 31;
9626         }
9627         tcg_gen_sari_i32(tm, tm, shift);
9628         tcg_gen_deposit_i32(tn, tn, tm, 0, 16);
9629     } else {
9630         /* PKHBT */
9631         tcg_gen_shli_i32(tm, tm, shift);
9632         tcg_gen_deposit_i32(tn, tm, tn, 0, 16);
9633     }
9634     tcg_temp_free_i32(tm);
9635     store_reg(s, a->rd, tn);
9636     return true;
9637 }
9638
9639 static bool op_sat(DisasContext *s, arg_sat *a,
9640                    void (*gen)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32))
9641 {
9642     TCGv_i32 tmp, satimm;
9643     int shift = a->imm;
9644
9645     if (!ENABLE_ARCH_6) {
9646         return false;
9647     }
9648
9649     tmp = load_reg(s, a->rn);
9650     if (a->sh) {
9651         tcg_gen_sari_i32(tmp, tmp, shift ? shift : 31);
9652     } else {
9653         tcg_gen_shli_i32(tmp, tmp, shift);
9654     }
9655
9656     satimm = tcg_const_i32(a->satimm);
9657     gen(tmp, cpu_env, tmp, satimm);
9658     tcg_temp_free_i32(satimm);
9659
9660     store_reg(s, a->rd, tmp);
9661     return true;
9662 }
9663
9664 static bool trans_SSAT(DisasContext *s, arg_sat *a)
9665 {
9666     return op_sat(s, a, gen_helper_ssat);
9667 }
9668
9669 static bool trans_USAT(DisasContext *s, arg_sat *a)
9670 {
9671     return op_sat(s, a, gen_helper_usat);
9672 }
9673
9674 static bool trans_SSAT16(DisasContext *s, arg_sat *a)
9675 {
9676     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9677         return false;
9678     }
9679     return op_sat(s, a, gen_helper_ssat16);
9680 }
9681
9682 static bool trans_USAT16(DisasContext *s, arg_sat *a)
9683 {
9684     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9685         return false;
9686     }
9687     return op_sat(s, a, gen_helper_usat16);
9688 }
9689
9690 static bool op_xta(DisasContext *s, arg_rrr_rot *a,
9691                    void (*gen_extract)(TCGv_i32, TCGv_i32),
9692                    void (*gen_add)(TCGv_i32, TCGv_i32, TCGv_i32))
9693 {
9694     TCGv_i32 tmp;
9695
9696     if (!ENABLE_ARCH_6) {
9697         return false;
9698     }
9699
9700     tmp = load_reg(s, a->rm);
9701     /*
9702      * TODO: In many cases we could do a shift instead of a rotate.
9703      * Combined with a simple extend, that becomes an extract.
9704      */
9705     tcg_gen_rotri_i32(tmp, tmp, a->rot * 8);
9706     gen_extract(tmp, tmp);
9707
9708     if (a->rn != 15) {
9709         TCGv_i32 tmp2 = load_reg(s, a->rn);
9710         gen_add(tmp, tmp, tmp2);
9711         tcg_temp_free_i32(tmp2);
9712     }
9713     store_reg(s, a->rd, tmp);
9714     return true;
9715 }
9716
9717 static bool trans_SXTAB(DisasContext *s, arg_rrr_rot *a)
9718 {
9719     return op_xta(s, a, tcg_gen_ext8s_i32, tcg_gen_add_i32);
9720 }
9721
9722 static bool trans_SXTAH(DisasContext *s, arg_rrr_rot *a)
9723 {
9724     return op_xta(s, a, tcg_gen_ext16s_i32, tcg_gen_add_i32);
9725 }
9726
9727 static bool trans_SXTAB16(DisasContext *s, arg_rrr_rot *a)
9728 {
9729     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9730         return false;
9731     }
9732     return op_xta(s, a, gen_helper_sxtb16, gen_add16);
9733 }
9734
9735 static bool trans_UXTAB(DisasContext *s, arg_rrr_rot *a)
9736 {
9737     return op_xta(s, a, tcg_gen_ext8u_i32, tcg_gen_add_i32);
9738 }
9739
9740 static bool trans_UXTAH(DisasContext *s, arg_rrr_rot *a)
9741 {
9742     return op_xta(s, a, tcg_gen_ext16u_i32, tcg_gen_add_i32);
9743 }
9744
9745 static bool trans_UXTAB16(DisasContext *s, arg_rrr_rot *a)
9746 {
9747     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9748         return false;
9749     }
9750     return op_xta(s, a, gen_helper_uxtb16, gen_add16);
9751 }
9752
9753 static bool trans_SEL(DisasContext *s, arg_rrr *a)
9754 {
9755     TCGv_i32 t1, t2, t3;
9756
9757     if (s->thumb
9758         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9759         : !ENABLE_ARCH_6) {
9760         return false;
9761     }
9762
9763     t1 = load_reg(s, a->rn);
9764     t2 = load_reg(s, a->rm);
9765     t3 = tcg_temp_new_i32();
9766     tcg_gen_ld_i32(t3, cpu_env, offsetof(CPUARMState, GE));
9767     gen_helper_sel_flags(t1, t3, t1, t2);
9768     tcg_temp_free_i32(t3);
9769     tcg_temp_free_i32(t2);
9770     store_reg(s, a->rd, t1);
9771     return true;
9772 }
9773
9774 static bool op_rr(DisasContext *s, arg_rr *a,
9775                   void (*gen)(TCGv_i32, TCGv_i32))
9776 {
9777     TCGv_i32 tmp;
9778
9779     tmp = load_reg(s, a->rm);
9780     gen(tmp, tmp);
9781     store_reg(s, a->rd, tmp);
9782     return true;
9783 }
9784
9785 static bool trans_REV(DisasContext *s, arg_rr *a)
9786 {
9787     if (!ENABLE_ARCH_6) {
9788         return false;
9789     }
9790     return op_rr(s, a, tcg_gen_bswap32_i32);
9791 }
9792
9793 static bool trans_REV16(DisasContext *s, arg_rr *a)
9794 {
9795     if (!ENABLE_ARCH_6) {
9796         return false;
9797     }
9798     return op_rr(s, a, gen_rev16);
9799 }
9800
9801 static bool trans_REVSH(DisasContext *s, arg_rr *a)
9802 {
9803     if (!ENABLE_ARCH_6) {
9804         return false;
9805     }
9806     return op_rr(s, a, gen_revsh);
9807 }
9808
9809 static bool trans_RBIT(DisasContext *s, arg_rr *a)
9810 {
9811     if (!ENABLE_ARCH_6T2) {
9812         return false;
9813     }
9814     return op_rr(s, a, gen_helper_rbit);
9815 }
9816
9817 /*
9818  * Signed multiply, signed and unsigned divide
9819  */
9820
9821 static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
9822 {
9823     TCGv_i32 t1, t2;
9824
9825     if (!ENABLE_ARCH_6) {
9826         return false;
9827     }
9828
9829     t1 = load_reg(s, a->rn);
9830     t2 = load_reg(s, a->rm);
9831     if (m_swap) {
9832         gen_swap_half(t2);
9833     }
9834     gen_smul_dual(t1, t2);
9835
9836     if (sub) {
9837         /* This subtraction cannot overflow. */
9838         tcg_gen_sub_i32(t1, t1, t2);
9839     } else {
9840         /*
9841          * This addition cannot overflow 32 bits; however it may
9842          * overflow considered as a signed operation, in which case
9843          * we must set the Q flag.
9844          */
9845         gen_helper_add_setq(t1, cpu_env, t1, t2);
9846     }
9847     tcg_temp_free_i32(t2);
9848
9849     if (a->ra != 15) {
9850         t2 = load_reg(s, a->ra);
9851         gen_helper_add_setq(t1, cpu_env, t1, t2);
9852         tcg_temp_free_i32(t2);
9853     }
9854     store_reg(s, a->rd, t1);
9855     return true;
9856 }
9857
9858 static bool trans_SMLAD(DisasContext *s, arg_rrrr *a)
9859 {
9860     return op_smlad(s, a, false, false);
9861 }
9862
9863 static bool trans_SMLADX(DisasContext *s, arg_rrrr *a)
9864 {
9865     return op_smlad(s, a, true, false);
9866 }
9867
9868 static bool trans_SMLSD(DisasContext *s, arg_rrrr *a)
9869 {
9870     return op_smlad(s, a, false, true);
9871 }
9872
9873 static bool trans_SMLSDX(DisasContext *s, arg_rrrr *a)
9874 {
9875     return op_smlad(s, a, true, true);
9876 }
9877
9878 static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
9879 {
9880     TCGv_i32 t1, t2;
9881     TCGv_i64 l1, l2;
9882
9883     if (!ENABLE_ARCH_6) {
9884         return false;
9885     }
9886
9887     t1 = load_reg(s, a->rn);
9888     t2 = load_reg(s, a->rm);
9889     if (m_swap) {
9890         gen_swap_half(t2);
9891     }
9892     gen_smul_dual(t1, t2);
9893
9894     l1 = tcg_temp_new_i64();
9895     l2 = tcg_temp_new_i64();
9896     tcg_gen_ext_i32_i64(l1, t1);
9897     tcg_gen_ext_i32_i64(l2, t2);
9898     tcg_temp_free_i32(t1);
9899     tcg_temp_free_i32(t2);
9900
9901     if (sub) {
9902         tcg_gen_sub_i64(l1, l1, l2);
9903     } else {
9904         tcg_gen_add_i64(l1, l1, l2);
9905     }
9906     tcg_temp_free_i64(l2);
9907
9908     gen_addq(s, l1, a->ra, a->rd);
9909     gen_storeq_reg(s, a->ra, a->rd, l1);
9910     tcg_temp_free_i64(l1);
9911     return true;
9912 }
9913
9914 static bool trans_SMLALD(DisasContext *s, arg_rrrr *a)
9915 {
9916     return op_smlald(s, a, false, false);
9917 }
9918
9919 static bool trans_SMLALDX(DisasContext *s, arg_rrrr *a)
9920 {
9921     return op_smlald(s, a, true, false);
9922 }
9923
9924 static bool trans_SMLSLD(DisasContext *s, arg_rrrr *a)
9925 {
9926     return op_smlald(s, a, false, true);
9927 }
9928
9929 static bool trans_SMLSLDX(DisasContext *s, arg_rrrr *a)
9930 {
9931     return op_smlald(s, a, true, true);
9932 }
9933
9934 static bool op_smmla(DisasContext *s, arg_rrrr *a, bool round, bool sub)
9935 {
9936     TCGv_i32 t1, t2;
9937
9938     if (s->thumb
9939         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9940         : !ENABLE_ARCH_6) {
9941         return false;
9942     }
9943
9944     t1 = load_reg(s, a->rn);
9945     t2 = load_reg(s, a->rm);
9946     tcg_gen_muls2_i32(t2, t1, t1, t2);
9947
9948     if (a->ra != 15) {
9949         TCGv_i32 t3 = load_reg(s, a->ra);
9950         if (sub) {
9951             /*
9952              * For SMMLS, we need a 64-bit subtract.  Borrow caused by
9953              * a non-zero multiplicand lowpart, and the correct result
9954              * lowpart for rounding.
9955              */
9956             TCGv_i32 zero = tcg_const_i32(0);
9957             tcg_gen_sub2_i32(t2, t1, zero, t3, t2, t1);
9958             tcg_temp_free_i32(zero);
9959         } else {
9960             tcg_gen_add_i32(t1, t1, t3);
9961         }
9962         tcg_temp_free_i32(t3);
9963     }
9964     if (round) {
9965         /*
9966          * Adding 0x80000000 to the 64-bit quantity means that we have
9967          * carry in to the high word when the low word has the msb set.
9968          */
9969         tcg_gen_shri_i32(t2, t2, 31);
9970         tcg_gen_add_i32(t1, t1, t2);
9971     }
9972     tcg_temp_free_i32(t2);
9973     store_reg(s, a->rd, t1);
9974     return true;
9975 }
9976
9977 static bool trans_SMMLA(DisasContext *s, arg_rrrr *a)
9978 {
9979     return op_smmla(s, a, false, false);
9980 }
9981
9982 static bool trans_SMMLAR(DisasContext *s, arg_rrrr *a)
9983 {
9984     return op_smmla(s, a, true, false);
9985 }
9986
9987 static bool trans_SMMLS(DisasContext *s, arg_rrrr *a)
9988 {
9989     return op_smmla(s, a, false, true);
9990 }
9991
9992 static bool trans_SMMLSR(DisasContext *s, arg_rrrr *a)
9993 {
9994     return op_smmla(s, a, true, true);
9995 }
9996
9997 static bool op_div(DisasContext *s, arg_rrr *a, bool u)
9998 {
9999     TCGv_i32 t1, t2;
10000
10001     if (s->thumb
10002         ? !dc_isar_feature(aa32_thumb_div, s)
10003         : !dc_isar_feature(aa32_arm_div, s)) {
10004         return false;
10005     }
10006
10007     t1 = load_reg(s, a->rn);
10008     t2 = load_reg(s, a->rm);
10009     if (u) {
10010         gen_helper_udiv(t1, t1, t2);
10011     } else {
10012         gen_helper_sdiv(t1, t1, t2);
10013     }
10014     tcg_temp_free_i32(t2);
10015     store_reg(s, a->rd, t1);
10016     return true;
10017 }
10018
10019 static bool trans_SDIV(DisasContext *s, arg_rrr *a)
10020 {
10021     return op_div(s, a, false);
10022 }
10023
10024 static bool trans_UDIV(DisasContext *s, arg_rrr *a)
10025 {
10026     return op_div(s, a, true);
10027 }
10028
10029 /*
10030  * Block data transfer
10031  */
10032
10033 static TCGv_i32 op_addr_block_pre(DisasContext *s, arg_ldst_block *a, int n)
10034 {
10035     TCGv_i32 addr = load_reg(s, a->rn);
10036
10037     if (a->b) {
10038         if (a->i) {
10039             /* pre increment */
10040             tcg_gen_addi_i32(addr, addr, 4);
10041         } else {
10042             /* pre decrement */
10043             tcg_gen_addi_i32(addr, addr, -(n * 4));
10044         }
10045     } else if (!a->i && n != 1) {
10046         /* post decrement */
10047         tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
10048     }
10049
10050     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
10051         /*
10052          * If the writeback is incrementing SP rather than
10053          * decrementing it, and the initial SP is below the
10054          * stack limit but the final written-back SP would
10055          * be above, then then we must not perform any memory
10056          * accesses, but it is IMPDEF whether we generate
10057          * an exception. We choose to do so in this case.
10058          * At this point 'addr' is the lowest address, so
10059          * either the original SP (if incrementing) or our
10060          * final SP (if decrementing), so that's what we check.
10061          */
10062         gen_helper_v8m_stackcheck(cpu_env, addr);
10063     }
10064
10065     return addr;
10066 }
10067
10068 static void op_addr_block_post(DisasContext *s, arg_ldst_block *a,
10069                                TCGv_i32 addr, int n)
10070 {
10071     if (a->w) {
10072         /* write back */
10073         if (!a->b) {
10074             if (a->i) {
10075                 /* post increment */
10076                 tcg_gen_addi_i32(addr, addr, 4);
10077             } else {
10078                 /* post decrement */
10079                 tcg_gen_addi_i32(addr, addr, -(n * 4));
10080             }
10081         } else if (!a->i && n != 1) {
10082             /* pre decrement */
10083             tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
10084         }
10085         store_reg(s, a->rn, addr);
10086     } else {
10087         tcg_temp_free_i32(addr);
10088     }
10089 }
10090
10091 static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n)
10092 {
10093     int i, j, n, list, mem_idx;
10094     bool user = a->u;
10095     TCGv_i32 addr, tmp, tmp2;
10096
10097     if (user) {
10098         /* STM (user) */
10099         if (IS_USER(s)) {
10100             /* Only usable in supervisor mode.  */
10101             unallocated_encoding(s);
10102             return true;
10103         }
10104     }
10105
10106     list = a->list;
10107     n = ctpop16(list);
10108     if (n < min_n || a->rn == 15) {
10109         unallocated_encoding(s);
10110         return true;
10111     }
10112
10113     addr = op_addr_block_pre(s, a, n);
10114     mem_idx = get_mem_index(s);
10115
10116     for (i = j = 0; i < 16; i++) {
10117         if (!(list & (1 << i))) {
10118             continue;
10119         }
10120
10121         if (user && i != 15) {
10122             tmp = tcg_temp_new_i32();
10123             tmp2 = tcg_const_i32(i);
10124             gen_helper_get_user_reg(tmp, cpu_env, tmp2);
10125             tcg_temp_free_i32(tmp2);
10126         } else {
10127             tmp = load_reg(s, i);
10128         }
10129         gen_aa32_st32(s, tmp, addr, mem_idx);
10130         tcg_temp_free_i32(tmp);
10131
10132         /* No need to add after the last transfer.  */
10133         if (++j != n) {
10134             tcg_gen_addi_i32(addr, addr, 4);
10135         }
10136     }
10137
10138     op_addr_block_post(s, a, addr, n);
10139     return true;
10140 }
10141
10142 static bool trans_STM(DisasContext *s, arg_ldst_block *a)
10143 {
10144     /* BitCount(list) < 1 is UNPREDICTABLE */
10145     return op_stm(s, a, 1);
10146 }
10147
10148 static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a)
10149 {
10150     /* Writeback register in register list is UNPREDICTABLE for T32.  */
10151     if (a->w && (a->list & (1 << a->rn))) {
10152         unallocated_encoding(s);
10153         return true;
10154     }
10155     /* BitCount(list) < 2 is UNPREDICTABLE */
10156     return op_stm(s, a, 2);
10157 }
10158
10159 static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n)
10160 {
10161     int i, j, n, list, mem_idx;
10162     bool loaded_base;
10163     bool user = a->u;
10164     bool exc_return = false;
10165     TCGv_i32 addr, tmp, tmp2, loaded_var;
10166
10167     if (user) {
10168         /* LDM (user), LDM (exception return) */
10169         if (IS_USER(s)) {
10170             /* Only usable in supervisor mode.  */
10171             unallocated_encoding(s);
10172             return true;
10173         }
10174         if (extract32(a->list, 15, 1)) {
10175             exc_return = true;
10176             user = false;
10177         } else {
10178             /* LDM (user) does not allow writeback.  */
10179             if (a->w) {
10180                 unallocated_encoding(s);
10181                 return true;
10182             }
10183         }
10184     }
10185
10186     list = a->list;
10187     n = ctpop16(list);
10188     if (n < min_n || a->rn == 15) {
10189         unallocated_encoding(s);
10190         return true;
10191     }
10192
10193     addr = op_addr_block_pre(s, a, n);
10194     mem_idx = get_mem_index(s);
10195     loaded_base = false;
10196     loaded_var = NULL;
10197
10198     for (i = j = 0; i < 16; i++) {
10199         if (!(list & (1 << i))) {
10200             continue;
10201         }
10202
10203         tmp = tcg_temp_new_i32();
10204         gen_aa32_ld32u(s, tmp, addr, mem_idx);
10205         if (user) {
10206             tmp2 = tcg_const_i32(i);
10207             gen_helper_set_user_reg(cpu_env, tmp2, tmp);
10208             tcg_temp_free_i32(tmp2);
10209             tcg_temp_free_i32(tmp);
10210         } else if (i == a->rn) {
10211             loaded_var = tmp;
10212             loaded_base = true;
10213         } else if (i == 15 && exc_return) {
10214             store_pc_exc_ret(s, tmp);
10215         } else {
10216             store_reg_from_load(s, i, tmp);
10217         }
10218
10219         /* No need to add after the last transfer.  */
10220         if (++j != n) {
10221             tcg_gen_addi_i32(addr, addr, 4);
10222         }
10223     }
10224
10225     op_addr_block_post(s, a, addr, n);
10226
10227     if (loaded_base) {
10228         /* Note that we reject base == pc above.  */
10229         store_reg(s, a->rn, loaded_var);
10230     }
10231
10232     if (exc_return) {
10233         /* Restore CPSR from SPSR.  */
10234         tmp = load_cpu_field(spsr);
10235         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
10236             gen_io_start();
10237         }
10238         gen_helper_cpsr_write_eret(cpu_env, tmp);
10239         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
10240             gen_io_end();
10241         }
10242         tcg_temp_free_i32(tmp);
10243         /* Must exit loop to check un-masked IRQs */
10244         s->base.is_jmp = DISAS_EXIT;
10245     }
10246     return true;
10247 }
10248
10249 static bool trans_LDM_a32(DisasContext *s, arg_ldst_block *a)
10250 {
10251     /*
10252      * Writeback register in register list is UNPREDICTABLE
10253      * for ArchVersion() >= 7.  Prior to v7, A32 would write
10254      * an UNKNOWN value to the base register.
10255      */
10256     if (ENABLE_ARCH_7 && a->w && (a->list & (1 << a->rn))) {
10257         unallocated_encoding(s);
10258         return true;
10259     }
10260     /* BitCount(list) < 1 is UNPREDICTABLE */
10261     return do_ldm(s, a, 1);
10262 }
10263
10264 static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a)
10265 {
10266     /* Writeback register in register list is UNPREDICTABLE for T32. */
10267     if (a->w && (a->list & (1 << a->rn))) {
10268         unallocated_encoding(s);
10269         return true;
10270     }
10271     /* BitCount(list) < 2 is UNPREDICTABLE */
10272     return do_ldm(s, a, 2);
10273 }
10274
10275 static bool trans_LDM_t16(DisasContext *s, arg_ldst_block *a)
10276 {
10277     /* Writeback is conditional on the base register not being loaded.  */
10278     a->w = !(a->list & (1 << a->rn));
10279     /* BitCount(list) < 1 is UNPREDICTABLE */
10280     return do_ldm(s, a, 1);
10281 }
10282
10283 /*
10284  * Branch, branch with link
10285  */
10286
10287 static bool trans_B(DisasContext *s, arg_i *a)
10288 {
10289     gen_jmp(s, read_pc(s) + a->imm);
10290     return true;
10291 }
10292
10293 static bool trans_B_cond_thumb(DisasContext *s, arg_ci *a)
10294 {
10295     /* This has cond from encoding, required to be outside IT block.  */
10296     if (a->cond >= 0xe) {
10297         return false;
10298     }
10299     if (s->condexec_mask) {
10300         unallocated_encoding(s);
10301         return true;
10302     }
10303     arm_skip_unless(s, a->cond);
10304     gen_jmp(s, read_pc(s) + a->imm);
10305     return true;
10306 }
10307
10308 static bool trans_BL(DisasContext *s, arg_i *a)
10309 {
10310     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
10311     gen_jmp(s, read_pc(s) + a->imm);
10312     return true;
10313 }
10314
10315 static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
10316 {
10317     TCGv_i32 tmp;
10318
10319     /* For A32, ARCH(5) is checked near the start of the uncond block. */
10320     if (s->thumb && (a->imm & 2)) {
10321         return false;
10322     }
10323     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
10324     tmp = tcg_const_i32(!s->thumb);
10325     store_cpu_field(tmp, thumb);
10326     gen_jmp(s, (read_pc(s) & ~3) + a->imm);
10327     return true;
10328 }
10329
10330 static bool trans_BL_BLX_prefix(DisasContext *s, arg_BL_BLX_prefix *a)
10331 {
10332     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
10333     tcg_gen_movi_i32(cpu_R[14], read_pc(s) + (a->imm << 12));
10334     return true;
10335 }
10336
10337 static bool trans_BL_suffix(DisasContext *s, arg_BL_suffix *a)
10338 {
10339     TCGv_i32 tmp = tcg_temp_new_i32();
10340
10341     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
10342     tcg_gen_addi_i32(tmp, cpu_R[14], (a->imm << 1) | 1);
10343     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1);
10344     gen_bx(s, tmp);
10345     return true;
10346 }
10347
10348 static bool trans_BLX_suffix(DisasContext *s, arg_BLX_suffix *a)
10349 {
10350     TCGv_i32 tmp;
10351
10352     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
10353     if (!ENABLE_ARCH_5) {
10354         return false;
10355     }
10356     tmp = tcg_temp_new_i32();
10357     tcg_gen_addi_i32(tmp, cpu_R[14], a->imm << 1);
10358     tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
10359     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1);
10360     gen_bx(s, tmp);
10361     return true;
10362 }
10363
10364 static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
10365 {
10366     TCGv_i32 addr, tmp;
10367
10368     tmp = load_reg(s, a->rm);
10369     if (half) {
10370         tcg_gen_add_i32(tmp, tmp, tmp);
10371     }
10372     addr = load_reg(s, a->rn);
10373     tcg_gen_add_i32(addr, addr, tmp);
10374
10375     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
10376                     half ? MO_UW | s->be_data : MO_UB);
10377     tcg_temp_free_i32(addr);
10378
10379     tcg_gen_add_i32(tmp, tmp, tmp);
10380     tcg_gen_addi_i32(tmp, tmp, read_pc(s));
10381     store_reg(s, 15, tmp);
10382     return true;
10383 }
10384
10385 static bool trans_TBB(DisasContext *s, arg_tbranch *a)
10386 {
10387     return op_tbranch(s, a, false);
10388 }
10389
10390 static bool trans_TBH(DisasContext *s, arg_tbranch *a)
10391 {
10392     return op_tbranch(s, a, true);
10393 }
10394
10395 static bool trans_CBZ(DisasContext *s, arg_CBZ *a)
10396 {
10397     TCGv_i32 tmp = load_reg(s, a->rn);
10398
10399     arm_gen_condlabel(s);
10400     tcg_gen_brcondi_i32(a->nz ? TCG_COND_EQ : TCG_COND_NE,
10401                         tmp, 0, s->condlabel);
10402     tcg_temp_free_i32(tmp);
10403     gen_jmp(s, read_pc(s) + a->imm);
10404     return true;
10405 }
10406
10407 /*
10408  * Supervisor call - both T32 & A32 come here so we need to check
10409  * which mode we are in when checking for semihosting.
10410  */
10411
10412 static bool trans_SVC(DisasContext *s, arg_SVC *a)
10413 {
10414     const uint32_t semihost_imm = s->thumb ? 0xab : 0x123456;
10415
10416     if (!arm_dc_feature(s, ARM_FEATURE_M) && semihosting_enabled() &&
10417 #ifndef CONFIG_USER_ONLY
10418         !IS_USER(s) &&
10419 #endif
10420         (a->imm == semihost_imm)) {
10421         gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
10422     } else {
10423         gen_set_pc_im(s, s->base.pc_next);
10424         s->svc_imm = a->imm;
10425         s->base.is_jmp = DISAS_SWI;
10426     }
10427     return true;
10428 }
10429
10430 /*
10431  * Unconditional system instructions
10432  */
10433
10434 static bool trans_RFE(DisasContext *s, arg_RFE *a)
10435 {
10436     static const int8_t pre_offset[4] = {
10437         /* DA */ -4, /* IA */ 0, /* DB */ -8, /* IB */ 4
10438     };
10439     static const int8_t post_offset[4] = {
10440         /* DA */ -8, /* IA */ 4, /* DB */ -4, /* IB */ 0
10441     };
10442     TCGv_i32 addr, t1, t2;
10443
10444     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
10445         return false;
10446     }
10447     if (IS_USER(s)) {
10448         unallocated_encoding(s);
10449         return true;
10450     }
10451
10452     addr = load_reg(s, a->rn);
10453     tcg_gen_addi_i32(addr, addr, pre_offset[a->pu]);
10454
10455     /* Load PC into tmp and CPSR into tmp2.  */
10456     t1 = tcg_temp_new_i32();
10457     gen_aa32_ld32u(s, t1, addr, get_mem_index(s));
10458     tcg_gen_addi_i32(addr, addr, 4);
10459     t2 = tcg_temp_new_i32();
10460     gen_aa32_ld32u(s, t2, addr, get_mem_index(s));
10461
10462     if (a->w) {
10463         /* Base writeback.  */
10464         tcg_gen_addi_i32(addr, addr, post_offset[a->pu]);
10465         store_reg(s, a->rn, addr);
10466     } else {
10467         tcg_temp_free_i32(addr);
10468     }
10469     gen_rfe(s, t1, t2);
10470     return true;
10471 }
10472
10473 static bool trans_SRS(DisasContext *s, arg_SRS *a)
10474 {
10475     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
10476         return false;
10477     }
10478     gen_srs(s, a->mode, a->pu, a->w);
10479     return true;
10480 }
10481
10482 static bool trans_CPS(DisasContext *s, arg_CPS *a)
10483 {
10484     uint32_t mask, val;
10485
10486     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
10487         return false;
10488     }
10489     if (IS_USER(s)) {
10490         /* Implemented as NOP in user mode.  */
10491         return true;
10492     }
10493     /* TODO: There are quite a lot of UNPREDICTABLE argument combinations. */
10494
10495     mask = val = 0;
10496     if (a->imod & 2) {
10497         if (a->A) {
10498             mask |= CPSR_A;
10499         }
10500         if (a->I) {
10501             mask |= CPSR_I;
10502         }
10503         if (a->F) {
10504             mask |= CPSR_F;
10505         }
10506         if (a->imod & 1) {
10507             val |= mask;
10508         }
10509     }
10510     if (a->M) {
10511         mask |= CPSR_M;
10512         val |= a->mode;
10513     }
10514     if (mask) {
10515         gen_set_psr_im(s, mask, 0, val);
10516     }
10517     return true;
10518 }
10519
10520 static bool trans_CPS_v7m(DisasContext *s, arg_CPS_v7m *a)
10521 {
10522     TCGv_i32 tmp, addr, el;
10523
10524     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
10525         return false;
10526     }
10527     if (IS_USER(s)) {
10528         /* Implemented as NOP in user mode.  */
10529         return true;
10530     }
10531
10532     tmp = tcg_const_i32(a->im);
10533     /* FAULTMASK */
10534     if (a->F) {
10535         addr = tcg_const_i32(19);
10536         gen_helper_v7m_msr(cpu_env, addr, tmp);
10537         tcg_temp_free_i32(addr);
10538     }
10539     /* PRIMASK */
10540     if (a->I) {
10541         addr = tcg_const_i32(16);
10542         gen_helper_v7m_msr(cpu_env, addr, tmp);
10543         tcg_temp_free_i32(addr);
10544     }
10545     el = tcg_const_i32(s->current_el);
10546     gen_helper_rebuild_hflags_m32(cpu_env, el);
10547     tcg_temp_free_i32(el);
10548     tcg_temp_free_i32(tmp);
10549     gen_lookup_tb(s);
10550     return true;
10551 }
10552
10553 /*
10554  * Clear-Exclusive, Barriers
10555  */
10556
10557 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
10558 {
10559     if (s->thumb
10560         ? !ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)
10561         : !ENABLE_ARCH_6K) {
10562         return false;
10563     }
10564     gen_clrex(s);
10565     return true;
10566 }
10567
10568 static bool trans_DSB(DisasContext *s, arg_DSB *a)
10569 {
10570     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
10571         return false;
10572     }
10573     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
10574     return true;
10575 }
10576
10577 static bool trans_DMB(DisasContext *s, arg_DMB *a)
10578 {
10579     return trans_DSB(s, NULL);
10580 }
10581
10582 static bool trans_ISB(DisasContext *s, arg_ISB *a)
10583 {
10584     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
10585         return false;
10586     }
10587     /*
10588      * We need to break the TB after this insn to execute
10589      * self-modifying code correctly and also to take
10590      * any pending interrupts immediately.
10591      */
10592     gen_goto_tb(s, 0, s->base.pc_next);
10593     return true;
10594 }
10595
10596 static bool trans_SB(DisasContext *s, arg_SB *a)
10597 {
10598     if (!dc_isar_feature(aa32_sb, s)) {
10599         return false;
10600     }
10601     /*
10602      * TODO: There is no speculation barrier opcode
10603      * for TCG; MB and end the TB instead.
10604      */
10605     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
10606     gen_goto_tb(s, 0, s->base.pc_next);
10607     return true;
10608 }
10609
10610 static bool trans_SETEND(DisasContext *s, arg_SETEND *a)
10611 {
10612     if (!ENABLE_ARCH_6) {
10613         return false;
10614     }
10615     if (a->E != (s->be_data == MO_BE)) {
10616         gen_helper_setend(cpu_env);
10617         s->base.is_jmp = DISAS_UPDATE;
10618     }
10619     return true;
10620 }
10621
10622 /*
10623  * Preload instructions
10624  * All are nops, contingent on the appropriate arch level.
10625  */
10626
10627 static bool trans_PLD(DisasContext *s, arg_PLD *a)
10628 {
10629     return ENABLE_ARCH_5TE;
10630 }
10631
10632 static bool trans_PLDW(DisasContext *s, arg_PLD *a)
10633 {
10634     return arm_dc_feature(s, ARM_FEATURE_V7MP);
10635 }
10636
10637 static bool trans_PLI(DisasContext *s, arg_PLD *a)
10638 {
10639     return ENABLE_ARCH_7;
10640 }
10641
10642 /*
10643  * If-then
10644  */
10645
10646 static bool trans_IT(DisasContext *s, arg_IT *a)
10647 {
10648     int cond_mask = a->cond_mask;
10649
10650     /*
10651      * No actual code generated for this insn, just setup state.
10652      *
10653      * Combinations of firstcond and mask which set up an 0b1111
10654      * condition are UNPREDICTABLE; we take the CONSTRAINED
10655      * UNPREDICTABLE choice to treat 0b1111 the same as 0b1110,
10656      * i.e. both meaning "execute always".
10657      */
10658     s->condexec_cond = (cond_mask >> 4) & 0xe;
10659     s->condexec_mask = cond_mask & 0x1f;
10660     return true;
10661 }
10662
10663 /*
10664  * Legacy decoder.
10665  */
10666
10667 static void disas_arm_insn(DisasContext *s, unsigned int insn)
10668 {
10669     unsigned int cond = insn >> 28;
10670
10671     /* M variants do not implement ARM mode; this must raise the INVSTATE
10672      * UsageFault exception.
10673      */
10674     if (arm_dc_feature(s, ARM_FEATURE_M)) {
10675         gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
10676                            default_exception_el(s));
10677         return;
10678     }
10679
10680     if (cond == 0xf) {
10681         /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
10682          * choose to UNDEF. In ARMv5 and above the space is used
10683          * for miscellaneous unconditional instructions.
10684          */
10685         ARCH(5);
10686
10687         /* Unconditional instructions.  */
10688         /* TODO: Perhaps merge these into one decodetree output file.  */
10689         if (disas_a32_uncond(s, insn) ||
10690             disas_vfp_uncond(s, insn) ||
10691             disas_neon_dp(s, insn) ||
10692             disas_neon_ls(s, insn) ||
10693             disas_neon_shared(s, insn)) {
10694             return;
10695         }
10696         /* fall back to legacy decoder */
10697
10698         if (((insn >> 25) & 7) == 1) {
10699             /* NEON Data processing.  */
10700             if (disas_neon_data_insn(s, insn)) {
10701                 goto illegal_op;
10702             }
10703             return;
10704         }
10705         if ((insn & 0x0e000f00) == 0x0c000100) {
10706             if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
10707                 /* iWMMXt register transfer.  */
10708                 if (extract32(s->c15_cpar, 1, 1)) {
10709                     if (!disas_iwmmxt_insn(s, insn)) {
10710                         return;
10711                     }
10712                 }
10713             }
10714         }
10715         goto illegal_op;
10716     }
10717     if (cond != 0xe) {
10718         /* if not always execute, we generate a conditional jump to
10719            next instruction */
10720         arm_skip_unless(s, cond);
10721     }
10722
10723     /* TODO: Perhaps merge these into one decodetree output file.  */
10724     if (disas_a32(s, insn) ||
10725         disas_vfp(s, insn)) {
10726         return;
10727     }
10728     /* fall back to legacy decoder */
10729
10730     switch ((insn >> 24) & 0xf) {
10731     case 0xc:
10732     case 0xd:
10733     case 0xe:
10734         if (((insn >> 8) & 0xe) == 10) {
10735             /* VFP, but failed disas_vfp.  */
10736             goto illegal_op;
10737         }
10738         if (disas_coproc_insn(s, insn)) {
10739             /* Coprocessor.  */
10740             goto illegal_op;
10741         }
10742         break;
10743     default:
10744     illegal_op:
10745         unallocated_encoding(s);
10746         break;
10747     }
10748 }
10749
10750 static bool thumb_insn_is_16bit(DisasContext *s, uint32_t pc, uint32_t insn)
10751 {
10752     /*
10753      * Return true if this is a 16 bit instruction. We must be precise
10754      * about this (matching the decode).
10755      */
10756     if ((insn >> 11) < 0x1d) {
10757         /* Definitely a 16-bit instruction */
10758         return true;
10759     }
10760
10761     /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
10762      * first half of a 32-bit Thumb insn. Thumb-1 cores might
10763      * end up actually treating this as two 16-bit insns, though,
10764      * if it's half of a bl/blx pair that might span a page boundary.
10765      */
10766     if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
10767         arm_dc_feature(s, ARM_FEATURE_M)) {
10768         /* Thumb2 cores (including all M profile ones) always treat
10769          * 32-bit insns as 32-bit.
10770          */
10771         return false;
10772     }
10773
10774     if ((insn >> 11) == 0x1e && pc - s->page_start < TARGET_PAGE_SIZE - 3) {
10775         /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
10776          * is not on the next page; we merge this into a 32-bit
10777          * insn.
10778          */
10779         return false;
10780     }
10781     /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
10782      * 0b1111_1xxx_xxxx_xxxx : BL suffix;
10783      * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
10784      *  -- handle as single 16 bit insn
10785      */
10786     return true;
10787 }
10788
10789 /* Translate a 32-bit thumb instruction. */
10790 static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
10791 {
10792     /*
10793      * ARMv6-M supports a limited subset of Thumb2 instructions.
10794      * Other Thumb1 architectures allow only 32-bit
10795      * combined BL/BLX prefix and suffix.
10796      */
10797     if (arm_dc_feature(s, ARM_FEATURE_M) &&
10798         !arm_dc_feature(s, ARM_FEATURE_V7)) {
10799         int i;
10800         bool found = false;
10801         static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
10802                                                0xf3b08040 /* dsb */,
10803                                                0xf3b08050 /* dmb */,
10804                                                0xf3b08060 /* isb */,
10805                                                0xf3e08000 /* mrs */,
10806                                                0xf000d000 /* bl */};
10807         static const uint32_t armv6m_mask[] = {0xffe0d000,
10808                                                0xfff0d0f0,
10809                                                0xfff0d0f0,
10810                                                0xfff0d0f0,
10811                                                0xffe0d000,
10812                                                0xf800d000};
10813
10814         for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
10815             if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
10816                 found = true;
10817                 break;
10818             }
10819         }
10820         if (!found) {
10821             goto illegal_op;
10822         }
10823     } else if ((insn & 0xf800e800) != 0xf000e800)  {
10824         ARCH(6T2);
10825     }
10826
10827     if ((insn & 0xef000000) == 0xef000000) {
10828         /*
10829          * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
10830          * transform into
10831          * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
10832          */
10833         uint32_t a32_insn = (insn & 0xe2ffffff) |
10834             ((insn & (1 << 28)) >> 4) | (1 << 28);
10835
10836         if (disas_neon_dp(s, a32_insn)) {
10837             return;
10838         }
10839     }
10840
10841     if ((insn & 0xff100000) == 0xf9000000) {
10842         /*
10843          * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
10844          * transform into
10845          * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
10846          */
10847         uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000;
10848
10849         if (disas_neon_ls(s, a32_insn)) {
10850             return;
10851         }
10852     }
10853
10854     /*
10855      * TODO: Perhaps merge these into one decodetree output file.
10856      * Note disas_vfp is written for a32 with cond field in the
10857      * top nibble.  The t32 encoding requires 0xe in the top nibble.
10858      */
10859     if (disas_t32(s, insn) ||
10860         disas_vfp_uncond(s, insn) ||
10861         disas_neon_shared(s, insn) ||
10862         ((insn >> 28) == 0xe && disas_vfp(s, insn))) {
10863         return;
10864     }
10865     /* fall back to legacy decoder */
10866
10867     switch ((insn >> 25) & 0xf) {
10868     case 0: case 1: case 2: case 3:
10869         /* 16-bit instructions.  Should never happen.  */
10870         abort();
10871     case 6: case 7: case 14: case 15:
10872         /* Coprocessor.  */
10873         if (arm_dc_feature(s, ARM_FEATURE_M)) {
10874             /* 0b111x_11xx_xxxx_xxxx_xxxx_xxxx_xxxx_xxxx */
10875             if (extract32(insn, 24, 2) == 3) {
10876                 goto illegal_op; /* op0 = 0b11 : unallocated */
10877             }
10878
10879             if (((insn >> 8) & 0xe) == 10 &&
10880                 dc_isar_feature(aa32_fpsp_v2, s)) {
10881                 /* FP, and the CPU supports it */
10882                 goto illegal_op;
10883             } else {
10884                 /* All other insns: NOCP */
10885                 gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
10886                                    syn_uncategorized(),
10887                                    default_exception_el(s));
10888             }
10889             break;
10890         }
10891         if (((insn >> 24) & 3) == 3) {
10892             /* Translate into the equivalent ARM encoding.  */
10893             insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28);
10894             if (disas_neon_data_insn(s, insn)) {
10895                 goto illegal_op;
10896             }
10897         } else if (((insn >> 8) & 0xe) == 10) {
10898             /* VFP, but failed disas_vfp.  */
10899             goto illegal_op;
10900         } else {
10901             if (insn & (1 << 28))
10902                 goto illegal_op;
10903             if (disas_coproc_insn(s, insn)) {
10904                 goto illegal_op;
10905             }
10906         }
10907         break;
10908     case 12:
10909         goto illegal_op;
10910     default:
10911     illegal_op:
10912         unallocated_encoding(s);
10913     }
10914 }
10915
10916 static void disas_thumb_insn(DisasContext *s, uint32_t insn)
10917 {
10918     if (!disas_t16(s, insn)) {
10919         unallocated_encoding(s);
10920     }
10921 }
10922
10923 static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
10924 {
10925     /* Return true if the insn at dc->base.pc_next might cross a page boundary.
10926      * (False positives are OK, false negatives are not.)
10927      * We know this is a Thumb insn, and our caller ensures we are
10928      * only called if dc->base.pc_next is less than 4 bytes from the page
10929      * boundary, so we cross the page if the first 16 bits indicate
10930      * that this is a 32 bit insn.
10931      */
10932     uint16_t insn = arm_lduw_code(env, s->base.pc_next, s->sctlr_b);
10933
10934     return !thumb_insn_is_16bit(s, s->base.pc_next, insn);
10935 }
10936
10937 static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
10938 {
10939     DisasContext *dc = container_of(dcbase, DisasContext, base);
10940     CPUARMState *env = cs->env_ptr;
10941     ARMCPU *cpu = env_archcpu(env);
10942     uint32_t tb_flags = dc->base.tb->flags;
10943     uint32_t condexec, core_mmu_idx;
10944
10945     dc->isar = &cpu->isar;
10946     dc->condjmp = 0;
10947
10948     dc->aarch64 = 0;
10949     /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
10950      * there is no secure EL1, so we route exceptions to EL3.
10951      */
10952     dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
10953                                !arm_el_is_aa64(env, 3);
10954     dc->thumb = FIELD_EX32(tb_flags, TBFLAG_AM32, THUMB);
10955     dc->be_data = FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
10956     condexec = FIELD_EX32(tb_flags, TBFLAG_AM32, CONDEXEC);
10957     dc->condexec_mask = (condexec & 0xf) << 1;
10958     dc->condexec_cond = condexec >> 4;
10959
10960     core_mmu_idx = FIELD_EX32(tb_flags, TBFLAG_ANY, MMUIDX);
10961     dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
10962     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
10963 #if !defined(CONFIG_USER_ONLY)
10964     dc->user = (dc->current_el == 0);
10965 #endif
10966     dc->fp_excp_el = FIELD_EX32(tb_flags, TBFLAG_ANY, FPEXC_EL);
10967
10968     if (arm_feature(env, ARM_FEATURE_M)) {
10969         dc->vfp_enabled = 1;
10970         dc->be_data = MO_TE;
10971         dc->v7m_handler_mode = FIELD_EX32(tb_flags, TBFLAG_M32, HANDLER);
10972         dc->v8m_secure = arm_feature(env, ARM_FEATURE_M_SECURITY) &&
10973             regime_is_secure(env, dc->mmu_idx);
10974         dc->v8m_stackcheck = FIELD_EX32(tb_flags, TBFLAG_M32, STACKCHECK);
10975         dc->v8m_fpccr_s_wrong =
10976             FIELD_EX32(tb_flags, TBFLAG_M32, FPCCR_S_WRONG);
10977         dc->v7m_new_fp_ctxt_needed =
10978             FIELD_EX32(tb_flags, TBFLAG_M32, NEW_FP_CTXT_NEEDED);
10979         dc->v7m_lspact = FIELD_EX32(tb_flags, TBFLAG_M32, LSPACT);
10980     } else {
10981         dc->be_data =
10982             FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
10983         dc->debug_target_el =
10984             FIELD_EX32(tb_flags, TBFLAG_ANY, DEBUG_TARGET_EL);
10985         dc->sctlr_b = FIELD_EX32(tb_flags, TBFLAG_A32, SCTLR_B);
10986         dc->hstr_active = FIELD_EX32(tb_flags, TBFLAG_A32, HSTR_ACTIVE);
10987         dc->ns = FIELD_EX32(tb_flags, TBFLAG_A32, NS);
10988         dc->vfp_enabled = FIELD_EX32(tb_flags, TBFLAG_A32, VFPEN);
10989         if (arm_feature(env, ARM_FEATURE_XSCALE)) {
10990             dc->c15_cpar = FIELD_EX32(tb_flags, TBFLAG_A32, XSCALE_CPAR);
10991         } else {
10992             dc->vec_len = FIELD_EX32(tb_flags, TBFLAG_A32, VECLEN);
10993             dc->vec_stride = FIELD_EX32(tb_flags, TBFLAG_A32, VECSTRIDE);
10994         }
10995     }
10996     dc->cp_regs = cpu->cp_regs;
10997     dc->features = env->features;
10998
10999     /* Single step state. The code-generation logic here is:
11000      *  SS_ACTIVE == 0:
11001      *   generate code with no special handling for single-stepping (except
11002      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
11003      *   this happens anyway because those changes are all system register or
11004      *   PSTATE writes).
11005      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
11006      *   emit code for one insn
11007      *   emit code to clear PSTATE.SS
11008      *   emit code to generate software step exception for completed step
11009      *   end TB (as usual for having generated an exception)
11010      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
11011      *   emit code to generate a software step exception
11012      *   end the TB
11013      */
11014     dc->ss_active = FIELD_EX32(tb_flags, TBFLAG_ANY, SS_ACTIVE);
11015     dc->pstate_ss = FIELD_EX32(tb_flags, TBFLAG_ANY, PSTATE_SS);
11016     dc->is_ldex = false;
11017
11018     dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
11019
11020     /* If architectural single step active, limit to 1.  */
11021     if (is_singlestepping(dc)) {
11022         dc->base.max_insns = 1;
11023     }
11024
11025     /* ARM is a fixed-length ISA.  Bound the number of insns to execute
11026        to those left on the page.  */
11027     if (!dc->thumb) {
11028         int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
11029         dc->base.max_insns = MIN(dc->base.max_insns, bound);
11030     }
11031
11032     cpu_V0 = tcg_temp_new_i64();
11033     cpu_V1 = tcg_temp_new_i64();
11034     /* FIXME: cpu_M0 can probably be the same as cpu_V0.  */
11035     cpu_M0 = tcg_temp_new_i64();
11036 }
11037
11038 static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
11039 {
11040     DisasContext *dc = container_of(dcbase, DisasContext, base);
11041
11042     /* A note on handling of the condexec (IT) bits:
11043      *
11044      * We want to avoid the overhead of having to write the updated condexec
11045      * bits back to the CPUARMState for every instruction in an IT block. So:
11046      * (1) if the condexec bits are not already zero then we write
11047      * zero back into the CPUARMState now. This avoids complications trying
11048      * to do it at the end of the block. (For example if we don't do this
11049      * it's hard to identify whether we can safely skip writing condexec
11050      * at the end of the TB, which we definitely want to do for the case
11051      * where a TB doesn't do anything with the IT state at all.)
11052      * (2) if we are going to leave the TB then we call gen_set_condexec()
11053      * which will write the correct value into CPUARMState if zero is wrong.
11054      * This is done both for leaving the TB at the end, and for leaving
11055      * it because of an exception we know will happen, which is done in
11056      * gen_exception_insn(). The latter is necessary because we need to
11057      * leave the TB with the PC/IT state just prior to execution of the
11058      * instruction which caused the exception.
11059      * (3) if we leave the TB unexpectedly (eg a data abort on a load)
11060      * then the CPUARMState will be wrong and we need to reset it.
11061      * This is handled in the same way as restoration of the
11062      * PC in these situations; we save the value of the condexec bits
11063      * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
11064      * then uses this to restore them after an exception.
11065      *
11066      * Note that there are no instructions which can read the condexec
11067      * bits, and none which can write non-static values to them, so
11068      * we don't need to care about whether CPUARMState is correct in the
11069      * middle of a TB.
11070      */
11071
11072     /* Reset the conditional execution bits immediately. This avoids
11073        complications trying to do it at the end of the block.  */
11074     if (dc->condexec_mask || dc->condexec_cond) {
11075         TCGv_i32 tmp = tcg_temp_new_i32();
11076         tcg_gen_movi_i32(tmp, 0);
11077         store_cpu_field(tmp, condexec_bits);
11078     }
11079 }
11080
11081 static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
11082 {
11083     DisasContext *dc = container_of(dcbase, DisasContext, base);
11084
11085     tcg_gen_insn_start(dc->base.pc_next,
11086                        (dc->condexec_cond << 4) | (dc->condexec_mask >> 1),
11087                        0);
11088     dc->insn_start = tcg_last_op();
11089 }
11090
11091 static bool arm_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
11092                                     const CPUBreakpoint *bp)
11093 {
11094     DisasContext *dc = container_of(dcbase, DisasContext, base);
11095
11096     if (bp->flags & BP_CPU) {
11097         gen_set_condexec(dc);
11098         gen_set_pc_im(dc, dc->base.pc_next);
11099         gen_helper_check_breakpoints(cpu_env);
11100         /* End the TB early; it's likely not going to be executed */
11101         dc->base.is_jmp = DISAS_TOO_MANY;
11102     } else {
11103         gen_exception_internal_insn(dc, dc->base.pc_next, EXCP_DEBUG);
11104         /* The address covered by the breakpoint must be
11105            included in [tb->pc, tb->pc + tb->size) in order
11106            to for it to be properly cleared -- thus we
11107            increment the PC here so that the logic setting
11108            tb->size below does the right thing.  */
11109         /* TODO: Advance PC by correct instruction length to
11110          * avoid disassembler error messages */
11111         dc->base.pc_next += 2;
11112         dc->base.is_jmp = DISAS_NORETURN;
11113     }
11114
11115     return true;
11116 }
11117
11118 static bool arm_pre_translate_insn(DisasContext *dc)
11119 {
11120 #ifdef CONFIG_USER_ONLY
11121     /* Intercept jump to the magic kernel page.  */
11122     if (dc->base.pc_next >= 0xffff0000) {
11123         /* We always get here via a jump, so know we are not in a
11124            conditional execution block.  */
11125         gen_exception_internal(EXCP_KERNEL_TRAP);
11126         dc->base.is_jmp = DISAS_NORETURN;
11127         return true;
11128     }
11129 #endif
11130
11131     if (dc->ss_active && !dc->pstate_ss) {
11132         /* Singlestep state is Active-pending.
11133          * If we're in this state at the start of a TB then either
11134          *  a) we just took an exception to an EL which is being debugged
11135          *     and this is the first insn in the exception handler
11136          *  b) debug exceptions were masked and we just unmasked them
11137          *     without changing EL (eg by clearing PSTATE.D)
11138          * In either case we're going to take a swstep exception in the
11139          * "did not step an insn" case, and so the syndrome ISV and EX
11140          * bits should be zero.
11141          */
11142         assert(dc->base.num_insns == 1);
11143         gen_swstep_exception(dc, 0, 0);
11144         dc->base.is_jmp = DISAS_NORETURN;
11145         return true;
11146     }
11147
11148     return false;
11149 }
11150
11151 static void arm_post_translate_insn(DisasContext *dc)
11152 {
11153     if (dc->condjmp && !dc->base.is_jmp) {
11154         gen_set_label(dc->condlabel);
11155         dc->condjmp = 0;
11156     }
11157     translator_loop_temp_check(&dc->base);
11158 }
11159
11160 static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
11161 {
11162     DisasContext *dc = container_of(dcbase, DisasContext, base);
11163     CPUARMState *env = cpu->env_ptr;
11164     unsigned int insn;
11165
11166     if (arm_pre_translate_insn(dc)) {
11167         return;
11168     }
11169
11170     dc->pc_curr = dc->base.pc_next;
11171     insn = arm_ldl_code(env, dc->base.pc_next, dc->sctlr_b);
11172     dc->insn = insn;
11173     dc->base.pc_next += 4;
11174     disas_arm_insn(dc, insn);
11175
11176     arm_post_translate_insn(dc);
11177
11178     /* ARM is a fixed-length ISA.  We performed the cross-page check
11179        in init_disas_context by adjusting max_insns.  */
11180 }
11181
11182 static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
11183 {
11184     /* Return true if this Thumb insn is always unconditional,
11185      * even inside an IT block. This is true of only a very few
11186      * instructions: BKPT, HLT, and SG.
11187      *
11188      * A larger class of instructions are UNPREDICTABLE if used
11189      * inside an IT block; we do not need to detect those here, because
11190      * what we do by default (perform the cc check and update the IT
11191      * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
11192      * choice for those situations.
11193      *
11194      * insn is either a 16-bit or a 32-bit instruction; the two are
11195      * distinguishable because for the 16-bit case the top 16 bits
11196      * are zeroes, and that isn't a valid 32-bit encoding.
11197      */
11198     if ((insn & 0xffffff00) == 0xbe00) {
11199         /* BKPT */
11200         return true;
11201     }
11202
11203     if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
11204         !arm_dc_feature(s, ARM_FEATURE_M)) {
11205         /* HLT: v8A only. This is unconditional even when it is going to
11206          * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
11207          * For v7 cores this was a plain old undefined encoding and so
11208          * honours its cc check. (We might be using the encoding as
11209          * a semihosting trap, but we don't change the cc check behaviour
11210          * on that account, because a debugger connected to a real v7A
11211          * core and emulating semihosting traps by catching the UNDEF
11212          * exception would also only see cases where the cc check passed.
11213          * No guest code should be trying to do a HLT semihosting trap
11214          * in an IT block anyway.
11215          */
11216         return true;
11217     }
11218
11219     if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
11220         arm_dc_feature(s, ARM_FEATURE_M)) {
11221         /* SG: v8M only */
11222         return true;
11223     }
11224
11225     return false;
11226 }
11227
11228 static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
11229 {
11230     DisasContext *dc = container_of(dcbase, DisasContext, base);
11231     CPUARMState *env = cpu->env_ptr;
11232     uint32_t insn;
11233     bool is_16bit;
11234
11235     if (arm_pre_translate_insn(dc)) {
11236         return;
11237     }
11238
11239     dc->pc_curr = dc->base.pc_next;
11240     insn = arm_lduw_code(env, dc->base.pc_next, dc->sctlr_b);
11241     is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn);
11242     dc->base.pc_next += 2;
11243     if (!is_16bit) {
11244         uint32_t insn2 = arm_lduw_code(env, dc->base.pc_next, dc->sctlr_b);
11245
11246         insn = insn << 16 | insn2;
11247         dc->base.pc_next += 2;
11248     }
11249     dc->insn = insn;
11250
11251     if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
11252         uint32_t cond = dc->condexec_cond;
11253
11254         /*
11255          * Conditionally skip the insn. Note that both 0xe and 0xf mean
11256          * "always"; 0xf is not "never".
11257          */
11258         if (cond < 0x0e) {
11259             arm_skip_unless(dc, cond);
11260         }
11261     }
11262
11263     if (is_16bit) {
11264         disas_thumb_insn(dc, insn);
11265     } else {
11266         disas_thumb2_insn(dc, insn);
11267     }
11268
11269     /* Advance the Thumb condexec condition.  */
11270     if (dc->condexec_mask) {
11271         dc->condexec_cond = ((dc->condexec_cond & 0xe) |
11272                              ((dc->condexec_mask >> 4) & 1));
11273         dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
11274         if (dc->condexec_mask == 0) {
11275             dc->condexec_cond = 0;
11276         }
11277     }
11278
11279     arm_post_translate_insn(dc);
11280
11281     /* Thumb is a variable-length ISA.  Stop translation when the next insn
11282      * will touch a new page.  This ensures that prefetch aborts occur at
11283      * the right place.
11284      *
11285      * We want to stop the TB if the next insn starts in a new page,
11286      * or if it spans between this page and the next. This means that
11287      * if we're looking at the last halfword in the page we need to
11288      * see if it's a 16-bit Thumb insn (which will fit in this TB)
11289      * or a 32-bit Thumb insn (which won't).
11290      * This is to avoid generating a silly TB with a single 16-bit insn
11291      * in it at the end of this page (which would execute correctly
11292      * but isn't very efficient).
11293      */
11294     if (dc->base.is_jmp == DISAS_NEXT
11295         && (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE
11296             || (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE - 3
11297                 && insn_crosses_page(env, dc)))) {
11298         dc->base.is_jmp = DISAS_TOO_MANY;
11299     }
11300 }
11301
11302 static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
11303 {
11304     DisasContext *dc = container_of(dcbase, DisasContext, base);
11305
11306     if (tb_cflags(dc->base.tb) & CF_LAST_IO && dc->condjmp) {
11307         /* FIXME: This can theoretically happen with self-modifying code. */
11308         cpu_abort(cpu, "IO on conditional branch instruction");
11309     }
11310
11311     /* At this stage dc->condjmp will only be set when the skipped
11312        instruction was a conditional branch or trap, and the PC has
11313        already been written.  */
11314     gen_set_condexec(dc);
11315     if (dc->base.is_jmp == DISAS_BX_EXCRET) {
11316         /* Exception return branches need some special case code at the
11317          * end of the TB, which is complex enough that it has to
11318          * handle the single-step vs not and the condition-failed
11319          * insn codepath itself.
11320          */
11321         gen_bx_excret_final_code(dc);
11322     } else if (unlikely(is_singlestepping(dc))) {
11323         /* Unconditional and "condition passed" instruction codepath. */
11324         switch (dc->base.is_jmp) {
11325         case DISAS_SWI:
11326             gen_ss_advance(dc);
11327             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
11328                           default_exception_el(dc));
11329             break;
11330         case DISAS_HVC:
11331             gen_ss_advance(dc);
11332             gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
11333             break;
11334         case DISAS_SMC:
11335             gen_ss_advance(dc);
11336             gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
11337             break;
11338         case DISAS_NEXT:
11339         case DISAS_TOO_MANY:
11340         case DISAS_UPDATE:
11341             gen_set_pc_im(dc, dc->base.pc_next);
11342             /* fall through */
11343         default:
11344             /* FIXME: Single stepping a WFI insn will not halt the CPU. */
11345             gen_singlestep_exception(dc);
11346             break;
11347         case DISAS_NORETURN:
11348             break;
11349         }
11350     } else {
11351         /* While branches must always occur at the end of an IT block,
11352            there are a few other things that can cause us to terminate
11353            the TB in the middle of an IT block:
11354             - Exception generating instructions (bkpt, swi, undefined).
11355             - Page boundaries.
11356             - Hardware watchpoints.
11357            Hardware breakpoints have already been handled and skip this code.
11358          */
11359         switch(dc->base.is_jmp) {
11360         case DISAS_NEXT:
11361         case DISAS_TOO_MANY:
11362             gen_goto_tb(dc, 1, dc->base.pc_next);
11363             break;
11364         case DISAS_JUMP:
11365             gen_goto_ptr();
11366             break;
11367         case DISAS_UPDATE:
11368             gen_set_pc_im(dc, dc->base.pc_next);
11369             /* fall through */
11370         default:
11371             /* indicate that the hash table must be used to find the next TB */
11372             tcg_gen_exit_tb(NULL, 0);
11373             break;
11374         case DISAS_NORETURN:
11375             /* nothing more to generate */
11376             break;
11377         case DISAS_WFI:
11378         {
11379             TCGv_i32 tmp = tcg_const_i32((dc->thumb &&
11380                                           !(dc->insn & (1U << 31))) ? 2 : 4);
11381
11382             gen_helper_wfi(cpu_env, tmp);
11383             tcg_temp_free_i32(tmp);
11384             /* The helper doesn't necessarily throw an exception, but we
11385              * must go back to the main loop to check for interrupts anyway.
11386              */
11387             tcg_gen_exit_tb(NULL, 0);
11388             break;
11389         }
11390         case DISAS_WFE:
11391             gen_helper_wfe(cpu_env);
11392             break;
11393         case DISAS_YIELD:
11394             gen_helper_yield(cpu_env);
11395             break;
11396         case DISAS_SWI:
11397             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
11398                           default_exception_el(dc));
11399             break;
11400         case DISAS_HVC:
11401             gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
11402             break;
11403         case DISAS_SMC:
11404             gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
11405             break;
11406         }
11407     }
11408
11409     if (dc->condjmp) {
11410         /* "Condition failed" instruction codepath for the branch/trap insn */
11411         gen_set_label(dc->condlabel);
11412         gen_set_condexec(dc);
11413         if (unlikely(is_singlestepping(dc))) {
11414             gen_set_pc_im(dc, dc->base.pc_next);
11415             gen_singlestep_exception(dc);
11416         } else {
11417             gen_goto_tb(dc, 1, dc->base.pc_next);
11418         }
11419     }
11420 }
11421
11422 static void arm_tr_disas_log(const DisasContextBase *dcbase, CPUState *cpu)
11423 {
11424     DisasContext *dc = container_of(dcbase, DisasContext, base);
11425
11426     qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
11427     log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
11428 }
11429
11430 static const TranslatorOps arm_translator_ops = {
11431     .init_disas_context = arm_tr_init_disas_context,
11432     .tb_start           = arm_tr_tb_start,
11433     .insn_start         = arm_tr_insn_start,
11434     .breakpoint_check   = arm_tr_breakpoint_check,
11435     .translate_insn     = arm_tr_translate_insn,
11436     .tb_stop            = arm_tr_tb_stop,
11437     .disas_log          = arm_tr_disas_log,
11438 };
11439
11440 static const TranslatorOps thumb_translator_ops = {
11441     .init_disas_context = arm_tr_init_disas_context,
11442     .tb_start           = arm_tr_tb_start,
11443     .insn_start         = arm_tr_insn_start,
11444     .breakpoint_check   = arm_tr_breakpoint_check,
11445     .translate_insn     = thumb_tr_translate_insn,
11446     .tb_stop            = arm_tr_tb_stop,
11447     .disas_log          = arm_tr_disas_log,
11448 };
11449
11450 /* generate intermediate code for basic block 'tb'.  */
11451 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
11452 {
11453     DisasContext dc = { };
11454     const TranslatorOps *ops = &arm_translator_ops;
11455
11456     if (FIELD_EX32(tb->flags, TBFLAG_AM32, THUMB)) {
11457         ops = &thumb_translator_ops;
11458     }
11459 #ifdef TARGET_AARCH64
11460     if (FIELD_EX32(tb->flags, TBFLAG_ANY, AARCH64_STATE)) {
11461         ops = &aarch64_translator_ops;
11462     }
11463 #endif
11464
11465     translator_loop(ops, &dc.base, cpu, tb, max_insns);
11466 }
11467
11468 void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb,
11469                           target_ulong *data)
11470 {
11471     if (is_a64(env)) {
11472         env->pc = data[0];
11473         env->condexec_bits = 0;
11474         env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
11475     } else {
11476         env->regs[15] = data[0];
11477         env->condexec_bits = data[1];
11478         env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
11479     }
11480 }