target/arm/translate.c

   1 /*
   2  *  ARM translation
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *  Copyright (c) 2005-2007 CodeSourcery
   6  *  Copyright (c) 2007 OpenedHand, Ltd.
   7  *
   8  * This library is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2 of the License, or (at your option) any later version.
  12  *
  13  * This library is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  20  */
  21 #include "qemu/osdep.h"
  22
  23 #include "cpu.h"
  24 #include "internals.h"
  25 #include "disas/disas.h"
  26 #include "exec/exec-all.h"
  27 #include "tcg/tcg-op.h"
  28 #include "tcg/tcg-op-gvec.h"
  29 #include "qemu/log.h"
  30 #include "qemu/bitops.h"
  31 #include "arm_ldst.h"
  32 #include "hw/semihosting/semihost.h"
  33
  34 #include "exec/helper-proto.h"
  35 #include "exec/helper-gen.h"
  36
  37 #include "trace-tcg.h"
  38 #include "exec/log.h"
  39
  40
  41 #define ENABLE_ARCH_4T    arm_dc_feature(s, ARM_FEATURE_V4T)
  42 #define ENABLE_ARCH_5     arm_dc_feature(s, ARM_FEATURE_V5)
  43 /* currently all emulated v5 cores are also v5TE, so don't bother */
  44 #define ENABLE_ARCH_5TE   arm_dc_feature(s, ARM_FEATURE_V5)
  45 #define ENABLE_ARCH_5J    dc_isar_feature(aa32_jazelle, s)
  46 #define ENABLE_ARCH_6     arm_dc_feature(s, ARM_FEATURE_V6)
  47 #define ENABLE_ARCH_6K    arm_dc_feature(s, ARM_FEATURE_V6K)
  48 #define ENABLE_ARCH_6T2   arm_dc_feature(s, ARM_FEATURE_THUMB2)
  49 #define ENABLE_ARCH_7     arm_dc_feature(s, ARM_FEATURE_V7)
  50 #define ENABLE_ARCH_8     arm_dc_feature(s, ARM_FEATURE_V8)
  51
  52 #define ARCH(x) do { if (!ENABLE_ARCH_##x) goto illegal_op; } while(0)
  53
  54 #include "translate.h"
  55
  56 #if defined(CONFIG_USER_ONLY)
  57 #define IS_USER(s) 1
  58 #else
  59 #define IS_USER(s) (s->user)
  60 #endif
  61
  62 /* We reuse the same 64-bit temporaries for efficiency.  */
  63 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
  64 static TCGv_i32 cpu_R[16];
  65 TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
  66 TCGv_i64 cpu_exclusive_addr;
  67 TCGv_i64 cpu_exclusive_val;
  68
  69 #include "exec/gen-icount.h"
  70
  71 static const char * const regnames[] =
  72     { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
  73       "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
  74
  75 /* Function prototypes for gen_ functions calling Neon helpers.  */
  76 typedef void NeonGenThreeOpEnvFn(TCGv_i32, TCGv_env, TCGv_i32,
  77                                  TCGv_i32, TCGv_i32);
  78 /* Function prototypes for gen_ functions for fix point conversions */
  79 typedef void VFPGenFixPointFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
  80
  81 /* initialize TCG globals.  */
  82 void arm_translate_init(void)
  83 {
  84     int i;
  85
  86     for (i = 0; i < 16; i++) {
  87         cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
  88                                           offsetof(CPUARMState, regs[i]),
  89                                           regnames[i]);
  90     }
  91     cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF");
  92     cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF");
  93     cpu_VF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, VF), "VF");
  94     cpu_ZF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, ZF), "ZF");
  95
  96     cpu_exclusive_addr = tcg_global_mem_new_i64(cpu_env,
  97         offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
  98     cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
  99         offsetof(CPUARMState, exclusive_val), "exclusive_val");
 100
 101     a64_translate_init();
 102 }
 103
 104 /* Flags for the disas_set_da_iss info argument:
 105  * lower bits hold the Rt register number, higher bits are flags.
 106  */
 107 typedef enum ISSInfo {
 108     ISSNone = 0,
 109     ISSRegMask = 0x1f,
 110     ISSInvalid = (1 << 5),
 111     ISSIsAcqRel = (1 << 6),
 112     ISSIsWrite = (1 << 7),
 113     ISSIs16Bit = (1 << 8),
 114 } ISSInfo;
 115
 116 /* Save the syndrome information for a Data Abort */
 117 static void disas_set_da_iss(DisasContext *s, MemOp memop, ISSInfo issinfo)
 118 {
 119     uint32_t syn;
 120     int sas = memop & MO_SIZE;
 121     bool sse = memop & MO_SIGN;
 122     bool is_acqrel = issinfo & ISSIsAcqRel;
 123     bool is_write = issinfo & ISSIsWrite;
 124     bool is_16bit = issinfo & ISSIs16Bit;
 125     int srt = issinfo & ISSRegMask;
 126
 127     if (issinfo & ISSInvalid) {
 128         /* Some callsites want to conditionally provide ISS info,
 129          * eg "only if this was not a writeback"
 130          */
 131         return;
 132     }
 133
 134     if (srt == 15) {
 135         /* For AArch32, insns where the src/dest is R15 never generate
 136          * ISS information. Catching that here saves checking at all
 137          * the call sites.
 138          */
 139         return;
 140     }
 141
 142     syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
 143                                   0, 0, 0, is_write, 0, is_16bit);
 144     disas_set_insn_syndrome(s, syn);
 145 }
 146
 147 static inline int get_a32_user_mem_index(DisasContext *s)
 148 {
 149     /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
 150      * insns:
 151      *  if PL2, UNPREDICTABLE (we choose to implement as if PL0)
 152      *  otherwise, access as if at PL0.
 153      */
 154     switch (s->mmu_idx) {
 155     case ARMMMUIdx_E2:        /* this one is UNPREDICTABLE */
 156     case ARMMMUIdx_E10_0:
 157     case ARMMMUIdx_E10_1:
 158     case ARMMMUIdx_E10_1_PAN:
 159         return arm_to_core_mmu_idx(ARMMMUIdx_E10_0);
 160     case ARMMMUIdx_SE3:
 161     case ARMMMUIdx_SE10_0:
 162     case ARMMMUIdx_SE10_1:
 163     case ARMMMUIdx_SE10_1_PAN:
 164         return arm_to_core_mmu_idx(ARMMMUIdx_SE10_0);
 165     case ARMMMUIdx_MUser:
 166     case ARMMMUIdx_MPriv:
 167         return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
 168     case ARMMMUIdx_MUserNegPri:
 169     case ARMMMUIdx_MPrivNegPri:
 170         return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
 171     case ARMMMUIdx_MSUser:
 172     case ARMMMUIdx_MSPriv:
 173         return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
 174     case ARMMMUIdx_MSUserNegPri:
 175     case ARMMMUIdx_MSPrivNegPri:
 176         return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
 177     default:
 178         g_assert_not_reached();
 179     }
 180 }
 181
 182 static inline TCGv_i32 load_cpu_offset(int offset)
 183 {
 184     TCGv_i32 tmp = tcg_temp_new_i32();
 185     tcg_gen_ld_i32(tmp, cpu_env, offset);
 186     return tmp;
 187 }
 188
 189 #define load_cpu_field(name) load_cpu_offset(offsetof(CPUARMState, name))
 190
 191 static inline void store_cpu_offset(TCGv_i32 var, int offset)
 192 {
 193     tcg_gen_st_i32(var, cpu_env, offset);
 194     tcg_temp_free_i32(var);
 195 }
 196
 197 #define store_cpu_field(var, name) \
 198     store_cpu_offset(var, offsetof(CPUARMState, name))
 199
 200 /* The architectural value of PC.  */
 201 static uint32_t read_pc(DisasContext *s)
 202 {
 203     return s->pc_curr + (s->thumb ? 4 : 8);
 204 }
 205
 206 /* Set a variable to the value of a CPU register.  */
 207 static void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
 208 {
 209     if (reg == 15) {
 210         tcg_gen_movi_i32(var, read_pc(s));
 211     } else {
 212         tcg_gen_mov_i32(var, cpu_R[reg]);
 213     }
 214 }
 215
 216 /* Create a new temporary and set it to the value of a CPU register.  */
 217 static inline TCGv_i32 load_reg(DisasContext *s, int reg)
 218 {
 219     TCGv_i32 tmp = tcg_temp_new_i32();
 220     load_reg_var(s, tmp, reg);
 221     return tmp;
 222 }
 223
 224 /*
 225  * Create a new temp, REG + OFS, except PC is ALIGN(PC, 4).
 226  * This is used for load/store for which use of PC implies (literal),
 227  * or ADD that implies ADR.
 228  */
 229 static TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
 230 {
 231     TCGv_i32 tmp = tcg_temp_new_i32();
 232
 233     if (reg == 15) {
 234         tcg_gen_movi_i32(tmp, (read_pc(s) & ~3) + ofs);
 235     } else {
 236         tcg_gen_addi_i32(tmp, cpu_R[reg], ofs);
 237     }
 238     return tmp;
 239 }
 240
 241 /* Set a CPU register.  The source must be a temporary and will be
 242    marked as dead.  */
 243 static void store_reg(DisasContext *s, int reg, TCGv_i32 var)
 244 {
 245     if (reg == 15) {
 246         /* In Thumb mode, we must ignore bit 0.
 247          * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
 248          * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
 249          * We choose to ignore [1:0] in ARM mode for all architecture versions.
 250          */
 251         tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
 252         s->base.is_jmp = DISAS_JUMP;
 253     }
 254     tcg_gen_mov_i32(cpu_R[reg], var);
 255     tcg_temp_free_i32(var);
 256 }
 257
 258 /*
 259  * Variant of store_reg which applies v8M stack-limit checks before updating
 260  * SP. If the check fails this will result in an exception being taken.
 261  * We disable the stack checks for CONFIG_USER_ONLY because we have
 262  * no idea what the stack limits should be in that case.
 263  * If stack checking is not being done this just acts like store_reg().
 264  */
 265 static void store_sp_checked(DisasContext *s, TCGv_i32 var)
 266 {
 267 #ifndef CONFIG_USER_ONLY
 268     if (s->v8m_stackcheck) {
 269         gen_helper_v8m_stackcheck(cpu_env, var);
 270     }
 271 #endif
 272     store_reg(s, 13, var);
 273 }
 274
 275 /* Value extensions.  */
 276 #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
 277 #define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
 278 #define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
 279 #define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
 280
 281 #define gen_sxtb16(var) gen_helper_sxtb16(var, var)
 282 #define gen_uxtb16(var) gen_helper_uxtb16(var, var)
 283
 284
 285 static inline void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
 286 {
 287     TCGv_i32 tmp_mask = tcg_const_i32(mask);
 288     gen_helper_cpsr_write(cpu_env, var, tmp_mask);
 289     tcg_temp_free_i32(tmp_mask);
 290 }
 291 /* Set NZCV flags from the high 4 bits of var.  */
 292 #define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
 293
 294 static void gen_exception_internal(int excp)
 295 {
 296     TCGv_i32 tcg_excp = tcg_const_i32(excp);
 297
 298     assert(excp_is_internal(excp));
 299     gen_helper_exception_internal(cpu_env, tcg_excp);
 300     tcg_temp_free_i32(tcg_excp);
 301 }
 302
 303 static void gen_step_complete_exception(DisasContext *s)
 304 {
 305     /* We just completed step of an insn. Move from Active-not-pending
 306      * to Active-pending, and then also take the swstep exception.
 307      * This corresponds to making the (IMPDEF) choice to prioritize
 308      * swstep exceptions over asynchronous exceptions taken to an exception
 309      * level where debug is disabled. This choice has the advantage that
 310      * we do not need to maintain internal state corresponding to the
 311      * ISV/EX syndrome bits between completion of the step and generation
 312      * of the exception, and our syndrome information is always correct.
 313      */
 314     gen_ss_advance(s);
 315     gen_swstep_exception(s, 1, s->is_ldex);
 316     s->base.is_jmp = DISAS_NORETURN;
 317 }
 318
 319 static void gen_singlestep_exception(DisasContext *s)
 320 {
 321     /* Generate the right kind of exception for singlestep, which is
 322      * either the architectural singlestep or EXCP_DEBUG for QEMU's
 323      * gdb singlestepping.
 324      */
 325     if (s->ss_active) {
 326         gen_step_complete_exception(s);
 327     } else {
 328         gen_exception_internal(EXCP_DEBUG);
 329     }
 330 }
 331
 332 static inline bool is_singlestepping(DisasContext *s)
 333 {
 334     /* Return true if we are singlestepping either because of
 335      * architectural singlestep or QEMU gdbstub singlestep. This does
 336      * not include the command line '-singlestep' mode which is rather
 337      * misnamed as it only means "one instruction per TB" and doesn't
 338      * affect the code we generate.
 339      */
 340     return s->base.singlestep_enabled || s->ss_active;
 341 }
 342
 343 static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
 344 {
 345     TCGv_i32 tmp1 = tcg_temp_new_i32();
 346     TCGv_i32 tmp2 = tcg_temp_new_i32();
 347     tcg_gen_ext16s_i32(tmp1, a);
 348     tcg_gen_ext16s_i32(tmp2, b);
 349     tcg_gen_mul_i32(tmp1, tmp1, tmp2);
 350     tcg_temp_free_i32(tmp2);
 351     tcg_gen_sari_i32(a, a, 16);
 352     tcg_gen_sari_i32(b, b, 16);
 353     tcg_gen_mul_i32(b, b, a);
 354     tcg_gen_mov_i32(a, tmp1);
 355     tcg_temp_free_i32(tmp1);
 356 }
 357
 358 /* Byteswap each halfword.  */
 359 static void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
 360 {
 361     TCGv_i32 tmp = tcg_temp_new_i32();
 362     TCGv_i32 mask = tcg_const_i32(0x00ff00ff);
 363     tcg_gen_shri_i32(tmp, var, 8);
 364     tcg_gen_and_i32(tmp, tmp, mask);
 365     tcg_gen_and_i32(var, var, mask);
 366     tcg_gen_shli_i32(var, var, 8);
 367     tcg_gen_or_i32(dest, var, tmp);
 368     tcg_temp_free_i32(mask);
 369     tcg_temp_free_i32(tmp);
 370 }
 371
 372 /* Byteswap low halfword and sign extend.  */
 373 static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
 374 {
 375     tcg_gen_ext16u_i32(var, var);
 376     tcg_gen_bswap16_i32(var, var);
 377     tcg_gen_ext16s_i32(dest, var);
 378 }
 379
 380 /* 32x32->64 multiply.  Marks inputs as dead.  */
 381 static TCGv_i64 gen_mulu_i64_i32(TCGv_i32 a, TCGv_i32 b)
 382 {
 383     TCGv_i32 lo = tcg_temp_new_i32();
 384     TCGv_i32 hi = tcg_temp_new_i32();
 385     TCGv_i64 ret;
 386
 387     tcg_gen_mulu2_i32(lo, hi, a, b);
 388     tcg_temp_free_i32(a);
 389     tcg_temp_free_i32(b);
 390
 391     ret = tcg_temp_new_i64();
 392     tcg_gen_concat_i32_i64(ret, lo, hi);
 393     tcg_temp_free_i32(lo);
 394     tcg_temp_free_i32(hi);
 395
 396     return ret;
 397 }
 398
 399 static TCGv_i64 gen_muls_i64_i32(TCGv_i32 a, TCGv_i32 b)
 400 {
 401     TCGv_i32 lo = tcg_temp_new_i32();
 402     TCGv_i32 hi = tcg_temp_new_i32();
 403     TCGv_i64 ret;
 404
 405     tcg_gen_muls2_i32(lo, hi, a, b);
 406     tcg_temp_free_i32(a);
 407     tcg_temp_free_i32(b);
 408
 409     ret = tcg_temp_new_i64();
 410     tcg_gen_concat_i32_i64(ret, lo, hi);
 411     tcg_temp_free_i32(lo);
 412     tcg_temp_free_i32(hi);
 413
 414     return ret;
 415 }
 416
 417 /* Swap low and high halfwords.  */
 418 static void gen_swap_half(TCGv_i32 var)
 419 {
 420     tcg_gen_rotri_i32(var, var, 16);
 421 }
 422
 423 /* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
 424     tmp = (t0 ^ t1) & 0x8000;
 425     t0 &= ~0x8000;
 426     t1 &= ~0x8000;
 427     t0 = (t0 + t1) ^ tmp;
 428  */
 429
 430 static void gen_add16(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 431 {
 432     TCGv_i32 tmp = tcg_temp_new_i32();
 433     tcg_gen_xor_i32(tmp, t0, t1);
 434     tcg_gen_andi_i32(tmp, tmp, 0x8000);
 435     tcg_gen_andi_i32(t0, t0, ~0x8000);
 436     tcg_gen_andi_i32(t1, t1, ~0x8000);
 437     tcg_gen_add_i32(t0, t0, t1);
 438     tcg_gen_xor_i32(dest, t0, tmp);
 439     tcg_temp_free_i32(tmp);
 440 }
 441
 442 /* Set N and Z flags from var.  */
 443 static inline void gen_logic_CC(TCGv_i32 var)
 444 {
 445     tcg_gen_mov_i32(cpu_NF, var);
 446     tcg_gen_mov_i32(cpu_ZF, var);
 447 }
 448
 449 /* dest = T0 + T1 + CF. */
 450 static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 451 {
 452     tcg_gen_add_i32(dest, t0, t1);
 453     tcg_gen_add_i32(dest, dest, cpu_CF);
 454 }
 455
 456 /* dest = T0 - T1 + CF - 1.  */
 457 static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 458 {
 459     tcg_gen_sub_i32(dest, t0, t1);
 460     tcg_gen_add_i32(dest, dest, cpu_CF);
 461     tcg_gen_subi_i32(dest, dest, 1);
 462 }
 463
 464 /* dest = T0 + T1. Compute C, N, V and Z flags */
 465 static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 466 {
 467     TCGv_i32 tmp = tcg_temp_new_i32();
 468     tcg_gen_movi_i32(tmp, 0);
 469     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
 470     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 471     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 472     tcg_gen_xor_i32(tmp, t0, t1);
 473     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 474     tcg_temp_free_i32(tmp);
 475     tcg_gen_mov_i32(dest, cpu_NF);
 476 }
 477
 478 /* dest = T0 + T1 + CF.  Compute C, N, V and Z flags */
 479 static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 480 {
 481     TCGv_i32 tmp = tcg_temp_new_i32();
 482     if (TCG_TARGET_HAS_add2_i32) {
 483         tcg_gen_movi_i32(tmp, 0);
 484         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
 485         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
 486     } else {
 487         TCGv_i64 q0 = tcg_temp_new_i64();
 488         TCGv_i64 q1 = tcg_temp_new_i64();
 489         tcg_gen_extu_i32_i64(q0, t0);
 490         tcg_gen_extu_i32_i64(q1, t1);
 491         tcg_gen_add_i64(q0, q0, q1);
 492         tcg_gen_extu_i32_i64(q1, cpu_CF);
 493         tcg_gen_add_i64(q0, q0, q1);
 494         tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
 495         tcg_temp_free_i64(q0);
 496         tcg_temp_free_i64(q1);
 497     }
 498     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 499     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 500     tcg_gen_xor_i32(tmp, t0, t1);
 501     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 502     tcg_temp_free_i32(tmp);
 503     tcg_gen_mov_i32(dest, cpu_NF);
 504 }
 505
 506 /* dest = T0 - T1. Compute C, N, V and Z flags */
 507 static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 508 {
 509     TCGv_i32 tmp;
 510     tcg_gen_sub_i32(cpu_NF, t0, t1);
 511     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 512     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
 513     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 514     tmp = tcg_temp_new_i32();
 515     tcg_gen_xor_i32(tmp, t0, t1);
 516     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 517     tcg_temp_free_i32(tmp);
 518     tcg_gen_mov_i32(dest, cpu_NF);
 519 }
 520
 521 /* dest = T0 + ~T1 + CF.  Compute C, N, V and Z flags */
 522 static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 523 {
 524     TCGv_i32 tmp = tcg_temp_new_i32();
 525     tcg_gen_not_i32(tmp, t1);
 526     gen_adc_CC(dest, t0, tmp);
 527     tcg_temp_free_i32(tmp);
 528 }
 529
 530 #define GEN_SHIFT(name)                                               \
 531 static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)       \
 532 {                                                                     \
 533     TCGv_i32 tmp1, tmp2, tmp3;                                        \
 534     tmp1 = tcg_temp_new_i32();                                        \
 535     tcg_gen_andi_i32(tmp1, t1, 0xff);                                 \
 536     tmp2 = tcg_const_i32(0);                                          \
 537     tmp3 = tcg_const_i32(0x1f);                                       \
 538     tcg_gen_movcond_i32(TCG_COND_GTU, tmp2, tmp1, tmp3, tmp2, t0);    \
 539     tcg_temp_free_i32(tmp3);                                          \
 540     tcg_gen_andi_i32(tmp1, tmp1, 0x1f);                               \
 541     tcg_gen_##name##_i32(dest, tmp2, tmp1);                           \
 542     tcg_temp_free_i32(tmp2);                                          \
 543     tcg_temp_free_i32(tmp1);                                          \
 544 }
 545 GEN_SHIFT(shl)
 546 GEN_SHIFT(shr)
 547 #undef GEN_SHIFT
 548
 549 static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 550 {
 551     TCGv_i32 tmp1, tmp2;
 552     tmp1 = tcg_temp_new_i32();
 553     tcg_gen_andi_i32(tmp1, t1, 0xff);
 554     tmp2 = tcg_const_i32(0x1f);
 555     tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
 556     tcg_temp_free_i32(tmp2);
 557     tcg_gen_sar_i32(dest, t0, tmp1);
 558     tcg_temp_free_i32(tmp1);
 559 }
 560
 561 static void shifter_out_im(TCGv_i32 var, int shift)
 562 {
 563     tcg_gen_extract_i32(cpu_CF, var, shift, 1);
 564 }
 565
 566 /* Shift by immediate.  Includes special handling for shift == 0.  */
 567 static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
 568                                     int shift, int flags)
 569 {
 570     switch (shiftop) {
 571     case 0: /* LSL */
 572         if (shift != 0) {
 573             if (flags)
 574                 shifter_out_im(var, 32 - shift);
 575             tcg_gen_shli_i32(var, var, shift);
 576         }
 577         break;
 578     case 1: /* LSR */
 579         if (shift == 0) {
 580             if (flags) {
 581                 tcg_gen_shri_i32(cpu_CF, var, 31);
 582             }
 583             tcg_gen_movi_i32(var, 0);
 584         } else {
 585             if (flags)
 586                 shifter_out_im(var, shift - 1);
 587             tcg_gen_shri_i32(var, var, shift);
 588         }
 589         break;
 590     case 2: /* ASR */
 591         if (shift == 0)
 592             shift = 32;
 593         if (flags)
 594             shifter_out_im(var, shift - 1);
 595         if (shift == 32)
 596           shift = 31;
 597         tcg_gen_sari_i32(var, var, shift);
 598         break;
 599     case 3: /* ROR/RRX */
 600         if (shift != 0) {
 601             if (flags)
 602                 shifter_out_im(var, shift - 1);
 603             tcg_gen_rotri_i32(var, var, shift); break;
 604         } else {
 605             TCGv_i32 tmp = tcg_temp_new_i32();
 606             tcg_gen_shli_i32(tmp, cpu_CF, 31);
 607             if (flags)
 608                 shifter_out_im(var, 0);
 609             tcg_gen_shri_i32(var, var, 1);
 610             tcg_gen_or_i32(var, var, tmp);
 611             tcg_temp_free_i32(tmp);
 612         }
 613     }
 614 };
 615
 616 static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
 617                                      TCGv_i32 shift, int flags)
 618 {
 619     if (flags) {
 620         switch (shiftop) {
 621         case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
 622         case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
 623         case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
 624         case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
 625         }
 626     } else {
 627         switch (shiftop) {
 628         case 0:
 629             gen_shl(var, var, shift);
 630             break;
 631         case 1:
 632             gen_shr(var, var, shift);
 633             break;
 634         case 2:
 635             gen_sar(var, var, shift);
 636             break;
 637         case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
 638                 tcg_gen_rotr_i32(var, var, shift); break;
 639         }
 640     }
 641     tcg_temp_free_i32(shift);
 642 }
 643
 644 /*
 645  * Generate a conditional based on ARM condition code cc.
 646  * This is common between ARM and Aarch64 targets.
 647  */
 648 void arm_test_cc(DisasCompare *cmp, int cc)
 649 {
 650     TCGv_i32 value;
 651     TCGCond cond;
 652     bool global = true;
 653
 654     switch (cc) {
 655     case 0: /* eq: Z */
 656     case 1: /* ne: !Z */
 657         cond = TCG_COND_EQ;
 658         value = cpu_ZF;
 659         break;
 660
 661     case 2: /* cs: C */
 662     case 3: /* cc: !C */
 663         cond = TCG_COND_NE;
 664         value = cpu_CF;
 665         break;
 666
 667     case 4: /* mi: N */
 668     case 5: /* pl: !N */
 669         cond = TCG_COND_LT;
 670         value = cpu_NF;
 671         break;
 672
 673     case 6: /* vs: V */
 674     case 7: /* vc: !V */
 675         cond = TCG_COND_LT;
 676         value = cpu_VF;
 677         break;
 678
 679     case 8: /* hi: C && !Z */
 680     case 9: /* ls: !C || Z -> !(C && !Z) */
 681         cond = TCG_COND_NE;
 682         value = tcg_temp_new_i32();
 683         global = false;
 684         /* CF is 1 for C, so -CF is an all-bits-set mask for C;
 685            ZF is non-zero for !Z; so AND the two subexpressions.  */
 686         tcg_gen_neg_i32(value, cpu_CF);
 687         tcg_gen_and_i32(value, value, cpu_ZF);
 688         break;
 689
 690     case 10: /* ge: N == V -> N ^ V == 0 */
 691     case 11: /* lt: N != V -> N ^ V != 0 */
 692         /* Since we're only interested in the sign bit, == 0 is >= 0.  */
 693         cond = TCG_COND_GE;
 694         value = tcg_temp_new_i32();
 695         global = false;
 696         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
 697         break;
 698
 699     case 12: /* gt: !Z && N == V */
 700     case 13: /* le: Z || N != V */
 701         cond = TCG_COND_NE;
 702         value = tcg_temp_new_i32();
 703         global = false;
 704         /* (N == V) is equal to the sign bit of ~(NF ^ VF).  Propagate
 705          * the sign bit then AND with ZF to yield the result.  */
 706         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
 707         tcg_gen_sari_i32(value, value, 31);
 708         tcg_gen_andc_i32(value, cpu_ZF, value);
 709         break;
 710
 711     case 14: /* always */
 712     case 15: /* always */
 713         /* Use the ALWAYS condition, which will fold early.
 714          * It doesn't matter what we use for the value.  */
 715         cond = TCG_COND_ALWAYS;
 716         value = cpu_ZF;
 717         goto no_invert;
 718
 719     default:
 720         fprintf(stderr, "Bad condition code 0x%x\n", cc);
 721         abort();
 722     }
 723
 724     if (cc & 1) {
 725         cond = tcg_invert_cond(cond);
 726     }
 727
 728  no_invert:
 729     cmp->cond = cond;
 730     cmp->value = value;
 731     cmp->value_global = global;
 732 }
 733
 734 void arm_free_cc(DisasCompare *cmp)
 735 {
 736     if (!cmp->value_global) {
 737         tcg_temp_free_i32(cmp->value);
 738     }
 739 }
 740
 741 void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
 742 {
 743     tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
 744 }
 745
 746 void arm_gen_test_cc(int cc, TCGLabel *label)
 747 {
 748     DisasCompare cmp;
 749     arm_test_cc(&cmp, cc);
 750     arm_jump_cc(&cmp, label);
 751     arm_free_cc(&cmp);
 752 }
 753
 754 static inline void gen_set_condexec(DisasContext *s)
 755 {
 756     if (s->condexec_mask) {
 757         uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
 758         TCGv_i32 tmp = tcg_temp_new_i32();
 759         tcg_gen_movi_i32(tmp, val);
 760         store_cpu_field(tmp, condexec_bits);
 761     }
 762 }
 763
 764 static inline void gen_set_pc_im(DisasContext *s, target_ulong val)
 765 {
 766     tcg_gen_movi_i32(cpu_R[15], val);
 767 }
 768
 769 /* Set PC and Thumb state from var.  var is marked as dead.  */
 770 static inline void gen_bx(DisasContext *s, TCGv_i32 var)
 771 {
 772     s->base.is_jmp = DISAS_JUMP;
 773     tcg_gen_andi_i32(cpu_R[15], var, ~1);
 774     tcg_gen_andi_i32(var, var, 1);
 775     store_cpu_field(var, thumb);
 776 }
 777
 778 /*
 779  * Set PC and Thumb state from var. var is marked as dead.
 780  * For M-profile CPUs, include logic to detect exception-return
 781  * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
 782  * and BX reg, and no others, and happens only for code in Handler mode.
 783  * The Security Extension also requires us to check for the FNC_RETURN
 784  * which signals a function return from non-secure state; this can happen
 785  * in both Handler and Thread mode.
 786  * To avoid having to do multiple comparisons in inline generated code,
 787  * we make the check we do here loose, so it will match for EXC_RETURN
 788  * in Thread mode. For system emulation do_v7m_exception_exit() checks
 789  * for these spurious cases and returns without doing anything (giving
 790  * the same behaviour as for a branch to a non-magic address).
 791  *
 792  * In linux-user mode it is unclear what the right behaviour for an
 793  * attempted FNC_RETURN should be, because in real hardware this will go
 794  * directly to Secure code (ie not the Linux kernel) which will then treat
 795  * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN
 796  * attempt behave the way it would on a CPU without the security extension,
 797  * which is to say "like a normal branch". That means we can simply treat
 798  * all branches as normal with no magic address behaviour.
 799  */
 800 static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
 801 {
 802     /* Generate the same code here as for a simple bx, but flag via
 803      * s->base.is_jmp that we need to do the rest of the work later.
 804      */
 805     gen_bx(s, var);
 806 #ifndef CONFIG_USER_ONLY
 807     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
 808         (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
 809         s->base.is_jmp = DISAS_BX_EXCRET;
 810     }
 811 #endif
 812 }
 813
 814 static inline void gen_bx_excret_final_code(DisasContext *s)
 815 {
 816     /* Generate the code to finish possible exception return and end the TB */
 817     TCGLabel *excret_label = gen_new_label();
 818     uint32_t min_magic;
 819
 820     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
 821         /* Covers FNC_RETURN and EXC_RETURN magic */
 822         min_magic = FNC_RETURN_MIN_MAGIC;
 823     } else {
 824         /* EXC_RETURN magic only */
 825         min_magic = EXC_RETURN_MIN_MAGIC;
 826     }
 827
 828     /* Is the new PC value in the magic range indicating exception return? */
 829     tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label);
 830     /* No: end the TB as we would for a DISAS_JMP */
 831     if (is_singlestepping(s)) {
 832         gen_singlestep_exception(s);
 833     } else {
 834         tcg_gen_exit_tb(NULL, 0);
 835     }
 836     gen_set_label(excret_label);
 837     /* Yes: this is an exception return.
 838      * At this point in runtime env->regs[15] and env->thumb will hold
 839      * the exception-return magic number, which do_v7m_exception_exit()
 840      * will read. Nothing else will be able to see those values because
 841      * the cpu-exec main loop guarantees that we will always go straight
 842      * from raising the exception to the exception-handling code.
 843      *
 844      * gen_ss_advance(s) does nothing on M profile currently but
 845      * calling it is conceptually the right thing as we have executed
 846      * this instruction (compare SWI, HVC, SMC handling).
 847      */
 848     gen_ss_advance(s);
 849     gen_exception_internal(EXCP_EXCEPTION_EXIT);
 850 }
 851
 852 static inline void gen_bxns(DisasContext *s, int rm)
 853 {
 854     TCGv_i32 var = load_reg(s, rm);
 855
 856     /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
 857      * we need to sync state before calling it, but:
 858      *  - we don't need to do gen_set_pc_im() because the bxns helper will
 859      *    always set the PC itself
 860      *  - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
 861      *    unless it's outside an IT block or the last insn in an IT block,
 862      *    so we know that condexec == 0 (already set at the top of the TB)
 863      *    is correct in the non-UNPREDICTABLE cases, and we can choose
 864      *    "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
 865      */
 866     gen_helper_v7m_bxns(cpu_env, var);
 867     tcg_temp_free_i32(var);
 868     s->base.is_jmp = DISAS_EXIT;
 869 }
 870
 871 static inline void gen_blxns(DisasContext *s, int rm)
 872 {
 873     TCGv_i32 var = load_reg(s, rm);
 874
 875     /* We don't need to sync condexec state, for the same reason as bxns.
 876      * We do however need to set the PC, because the blxns helper reads it.
 877      * The blxns helper may throw an exception.
 878      */
 879     gen_set_pc_im(s, s->base.pc_next);
 880     gen_helper_v7m_blxns(cpu_env, var);
 881     tcg_temp_free_i32(var);
 882     s->base.is_jmp = DISAS_EXIT;
 883 }
 884
 885 /* Variant of store_reg which uses branch&exchange logic when storing
 886    to r15 in ARM architecture v7 and above. The source must be a temporary
 887    and will be marked as dead. */
 888 static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
 889 {
 890     if (reg == 15 && ENABLE_ARCH_7) {
 891         gen_bx(s, var);
 892     } else {
 893         store_reg(s, reg, var);
 894     }
 895 }
 896
 897 /* Variant of store_reg which uses branch&exchange logic when storing
 898  * to r15 in ARM architecture v5T and above. This is used for storing
 899  * the results of a LDR/LDM/POP into r15, and corresponds to the cases
 900  * in the ARM ARM which use the LoadWritePC() pseudocode function. */
 901 static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
 902 {
 903     if (reg == 15 && ENABLE_ARCH_5) {
 904         gen_bx_excret(s, var);
 905     } else {
 906         store_reg(s, reg, var);
 907     }
 908 }
 909
 910 #ifdef CONFIG_USER_ONLY
 911 #define IS_USER_ONLY 1
 912 #else
 913 #define IS_USER_ONLY 0
 914 #endif
 915
 916 /* Abstractions of "generate code to do a guest load/store for
 917  * AArch32", where a vaddr is always 32 bits (and is zero
 918  * extended if we're a 64 bit core) and  data is also
 919  * 32 bits unless specifically doing a 64 bit access.
 920  * These functions work like tcg_gen_qemu_{ld,st}* except
 921  * that the address argument is TCGv_i32 rather than TCGv.
 922  */
 923
 924 static inline TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
 925 {
 926     TCGv addr = tcg_temp_new();
 927     tcg_gen_extu_i32_tl(addr, a32);
 928
 929     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
 930     if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
 931         tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
 932     }
 933     return addr;
 934 }
 935
 936 static void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
 937                             int index, MemOp opc)
 938 {
 939     TCGv addr;
 940
 941     if (arm_dc_feature(s, ARM_FEATURE_M) &&
 942         !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
 943         opc |= MO_ALIGN;
 944     }
 945
 946     addr = gen_aa32_addr(s, a32, opc);
 947     tcg_gen_qemu_ld_i32(val, addr, index, opc);
 948     tcg_temp_free(addr);
 949 }
 950
 951 static void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
 952                             int index, MemOp opc)
 953 {
 954     TCGv addr;
 955
 956     if (arm_dc_feature(s, ARM_FEATURE_M) &&
 957         !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
 958         opc |= MO_ALIGN;
 959     }
 960
 961     addr = gen_aa32_addr(s, a32, opc);
 962     tcg_gen_qemu_st_i32(val, addr, index, opc);
 963     tcg_temp_free(addr);
 964 }
 965
 966 #define DO_GEN_LD(SUFF, OPC)                                             \
 967 static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val,      \
 968                                      TCGv_i32 a32, int index)            \
 969 {                                                                        \
 970     gen_aa32_ld_i32(s, val, a32, index, OPC | s->be_data);               \
 971 }
 972
 973 #define DO_GEN_ST(SUFF, OPC)                                             \
 974 static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val,      \
 975                                      TCGv_i32 a32, int index)            \
 976 {                                                                        \
 977     gen_aa32_st_i32(s, val, a32, index, OPC | s->be_data);               \
 978 }
 979
 980 static inline void gen_aa32_frob64(DisasContext *s, TCGv_i64 val)
 981 {
 982     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
 983     if (!IS_USER_ONLY && s->sctlr_b) {
 984         tcg_gen_rotri_i64(val, val, 32);
 985     }
 986 }
 987
 988 static void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
 989                             int index, MemOp opc)
 990 {
 991     TCGv addr = gen_aa32_addr(s, a32, opc);
 992     tcg_gen_qemu_ld_i64(val, addr, index, opc);
 993     gen_aa32_frob64(s, val);
 994     tcg_temp_free(addr);
 995 }
 996
 997 static inline void gen_aa32_ld64(DisasContext *s, TCGv_i64 val,
 998                                  TCGv_i32 a32, int index)
 999 {
1000     gen_aa32_ld_i64(s, val, a32, index, MO_Q | s->be_data);
1001 }
1002
1003 static void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1004                             int index, MemOp opc)
1005 {
1006     TCGv addr = gen_aa32_addr(s, a32, opc);
1007
1008     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
1009     if (!IS_USER_ONLY && s->sctlr_b) {
1010         TCGv_i64 tmp = tcg_temp_new_i64();
1011         tcg_gen_rotri_i64(tmp, val, 32);
1012         tcg_gen_qemu_st_i64(tmp, addr, index, opc);
1013         tcg_temp_free_i64(tmp);
1014     } else {
1015         tcg_gen_qemu_st_i64(val, addr, index, opc);
1016     }
1017     tcg_temp_free(addr);
1018 }
1019
1020 static inline void gen_aa32_st64(DisasContext *s, TCGv_i64 val,
1021                                  TCGv_i32 a32, int index)
1022 {
1023     gen_aa32_st_i64(s, val, a32, index, MO_Q | s->be_data);
1024 }
1025
1026 DO_GEN_LD(8u, MO_UB)
1027 DO_GEN_LD(16u, MO_UW)
1028 DO_GEN_LD(32u, MO_UL)
1029 DO_GEN_ST(8, MO_UB)
1030 DO_GEN_ST(16, MO_UW)
1031 DO_GEN_ST(32, MO_UL)
1032
1033 static inline void gen_hvc(DisasContext *s, int imm16)
1034 {
1035     /* The pre HVC helper handles cases when HVC gets trapped
1036      * as an undefined insn by runtime configuration (ie before
1037      * the insn really executes).
1038      */
1039     gen_set_pc_im(s, s->pc_curr);
1040     gen_helper_pre_hvc(cpu_env);
1041     /* Otherwise we will treat this as a real exception which
1042      * happens after execution of the insn. (The distinction matters
1043      * for the PC value reported to the exception handler and also
1044      * for single stepping.)
1045      */
1046     s->svc_imm = imm16;
1047     gen_set_pc_im(s, s->base.pc_next);
1048     s->base.is_jmp = DISAS_HVC;
1049 }
1050
1051 static inline void gen_smc(DisasContext *s)
1052 {
1053     /* As with HVC, we may take an exception either before or after
1054      * the insn executes.
1055      */
1056     TCGv_i32 tmp;
1057
1058     gen_set_pc_im(s, s->pc_curr);
1059     tmp = tcg_const_i32(syn_aa32_smc());
1060     gen_helper_pre_smc(cpu_env, tmp);
1061     tcg_temp_free_i32(tmp);
1062     gen_set_pc_im(s, s->base.pc_next);
1063     s->base.is_jmp = DISAS_SMC;
1064 }
1065
1066 static void gen_exception_internal_insn(DisasContext *s, uint32_t pc, int excp)
1067 {
1068     gen_set_condexec(s);
1069     gen_set_pc_im(s, pc);
1070     gen_exception_internal(excp);
1071     s->base.is_jmp = DISAS_NORETURN;
1072 }
1073
1074 static void gen_exception_insn(DisasContext *s, uint32_t pc, int excp,
1075                                int syn, uint32_t target_el)
1076 {
1077     gen_set_condexec(s);
1078     gen_set_pc_im(s, pc);
1079     gen_exception(excp, syn, target_el);
1080     s->base.is_jmp = DISAS_NORETURN;
1081 }
1082
1083 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn)
1084 {
1085     TCGv_i32 tcg_syn;
1086
1087     gen_set_condexec(s);
1088     gen_set_pc_im(s, s->pc_curr);
1089     tcg_syn = tcg_const_i32(syn);
1090     gen_helper_exception_bkpt_insn(cpu_env, tcg_syn);
1091     tcg_temp_free_i32(tcg_syn);
1092     s->base.is_jmp = DISAS_NORETURN;
1093 }
1094
1095 static void unallocated_encoding(DisasContext *s)
1096 {
1097     /* Unallocated and reserved encodings are uncategorized */
1098     gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(),
1099                        default_exception_el(s));
1100 }
1101
1102 /* Force a TB lookup after an instruction that changes the CPU state.  */
1103 static inline void gen_lookup_tb(DisasContext *s)
1104 {
1105     tcg_gen_movi_i32(cpu_R[15], s->base.pc_next);
1106     s->base.is_jmp = DISAS_EXIT;
1107 }
1108
1109 static inline void gen_hlt(DisasContext *s, int imm)
1110 {
1111     /* HLT. This has two purposes.
1112      * Architecturally, it is an external halting debug instruction.
1113      * Since QEMU doesn't implement external debug, we treat this as
1114      * it is required for halting debug disabled: it will UNDEF.
1115      * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
1116      * and "HLT 0xF000" is an A32 semihosting syscall. These traps
1117      * must trigger semihosting even for ARMv7 and earlier, where
1118      * HLT was an undefined encoding.
1119      * In system mode, we don't allow userspace access to
1120      * semihosting, to provide some semblance of security
1121      * (and for consistency with our 32-bit semihosting).
1122      */
1123     if (semihosting_enabled() &&
1124 #ifndef CONFIG_USER_ONLY
1125         s->current_el != 0 &&
1126 #endif
1127         (imm == (s->thumb ? 0x3c : 0xf000))) {
1128         gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
1129         return;
1130     }
1131
1132     unallocated_encoding(s);
1133 }
1134
1135 static TCGv_ptr get_fpstatus_ptr(int neon)
1136 {
1137     TCGv_ptr statusptr = tcg_temp_new_ptr();
1138     int offset;
1139     if (neon) {
1140         offset = offsetof(CPUARMState, vfp.standard_fp_status);
1141     } else {
1142         offset = offsetof(CPUARMState, vfp.fp_status);
1143     }
1144     tcg_gen_addi_ptr(statusptr, cpu_env, offset);
1145     return statusptr;
1146 }
1147
1148 static inline long vfp_reg_offset(bool dp, unsigned reg)
1149 {
1150     if (dp) {
1151         return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
1152     } else {
1153         long ofs = offsetof(CPUARMState, vfp.zregs[reg >> 2].d[(reg >> 1) & 1]);
1154         if (reg & 1) {
1155             ofs += offsetof(CPU_DoubleU, l.upper);
1156         } else {
1157             ofs += offsetof(CPU_DoubleU, l.lower);
1158         }
1159         return ofs;
1160     }
1161 }
1162
1163 /* Return the offset of a 32-bit piece of a NEON register.
1164    zero is the least significant end of the register.  */
1165 static inline long
1166 neon_reg_offset (int reg, int n)
1167 {
1168     int sreg;
1169     sreg = reg * 2 + n;
1170     return vfp_reg_offset(0, sreg);
1171 }
1172
1173 /* Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
1174  * where 0 is the least significant end of the register.
1175  */
1176 static inline long
1177 neon_element_offset(int reg, int element, MemOp size)
1178 {
1179     int element_size = 1 << size;
1180     int ofs = element * element_size;
1181 #ifdef HOST_WORDS_BIGENDIAN
1182     /* Calculate the offset assuming fully little-endian,
1183      * then XOR to account for the order of the 8-byte units.
1184      */
1185     if (element_size < 8) {
1186         ofs ^= 8 - element_size;
1187     }
1188 #endif
1189     return neon_reg_offset(reg, 0) + ofs;
1190 }
1191
1192 static TCGv_i32 neon_load_reg(int reg, int pass)
1193 {
1194     TCGv_i32 tmp = tcg_temp_new_i32();
1195     tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass));
1196     return tmp;
1197 }
1198
1199 static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop)
1200 {
1201     long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
1202
1203     switch (mop) {
1204     case MO_UB:
1205         tcg_gen_ld8u_i32(var, cpu_env, offset);
1206         break;
1207     case MO_UW:
1208         tcg_gen_ld16u_i32(var, cpu_env, offset);
1209         break;
1210     case MO_UL:
1211         tcg_gen_ld_i32(var, cpu_env, offset);
1212         break;
1213     default:
1214         g_assert_not_reached();
1215     }
1216 }
1217
1218 static void neon_load_element64(TCGv_i64 var, int reg, int ele, MemOp mop)
1219 {
1220     long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
1221
1222     switch (mop) {
1223     case MO_UB:
1224         tcg_gen_ld8u_i64(var, cpu_env, offset);
1225         break;
1226     case MO_UW:
1227         tcg_gen_ld16u_i64(var, cpu_env, offset);
1228         break;
1229     case MO_UL:
1230         tcg_gen_ld32u_i64(var, cpu_env, offset);
1231         break;
1232     case MO_Q:
1233         tcg_gen_ld_i64(var, cpu_env, offset);
1234         break;
1235     default:
1236         g_assert_not_reached();
1237     }
1238 }
1239
1240 static void neon_store_reg(int reg, int pass, TCGv_i32 var)
1241 {
1242     tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass));
1243     tcg_temp_free_i32(var);
1244 }
1245
1246 static void neon_store_element(int reg, int ele, MemOp size, TCGv_i32 var)
1247 {
1248     long offset = neon_element_offset(reg, ele, size);
1249
1250     switch (size) {
1251     case MO_8:
1252         tcg_gen_st8_i32(var, cpu_env, offset);
1253         break;
1254     case MO_16:
1255         tcg_gen_st16_i32(var, cpu_env, offset);
1256         break;
1257     case MO_32:
1258         tcg_gen_st_i32(var, cpu_env, offset);
1259         break;
1260     default:
1261         g_assert_not_reached();
1262     }
1263 }
1264
1265 static void neon_store_element64(int reg, int ele, MemOp size, TCGv_i64 var)
1266 {
1267     long offset = neon_element_offset(reg, ele, size);
1268
1269     switch (size) {
1270     case MO_8:
1271         tcg_gen_st8_i64(var, cpu_env, offset);
1272         break;
1273     case MO_16:
1274         tcg_gen_st16_i64(var, cpu_env, offset);
1275         break;
1276     case MO_32:
1277         tcg_gen_st32_i64(var, cpu_env, offset);
1278         break;
1279     case MO_64:
1280         tcg_gen_st_i64(var, cpu_env, offset);
1281         break;
1282     default:
1283         g_assert_not_reached();
1284     }
1285 }
1286
1287 static inline void neon_load_reg64(TCGv_i64 var, int reg)
1288 {
1289     tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg));
1290 }
1291
1292 static inline void neon_store_reg64(TCGv_i64 var, int reg)
1293 {
1294     tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg));
1295 }
1296
1297 static inline void neon_load_reg32(TCGv_i32 var, int reg)
1298 {
1299     tcg_gen_ld_i32(var, cpu_env, vfp_reg_offset(false, reg));
1300 }
1301
1302 static inline void neon_store_reg32(TCGv_i32 var, int reg)
1303 {
1304     tcg_gen_st_i32(var, cpu_env, vfp_reg_offset(false, reg));
1305 }
1306
1307 static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
1308 {
1309     TCGv_ptr ret = tcg_temp_new_ptr();
1310     tcg_gen_addi_ptr(ret, cpu_env, vfp_reg_offset(dp, reg));
1311     return ret;
1312 }
1313
1314 #define ARM_CP_RW_BIT   (1 << 20)
1315
1316 /* Include the VFP and Neon decoders */
1317 #include "translate-vfp.inc.c"
1318 #include "translate-neon.inc.c"
1319
1320 static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1321 {
1322     tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1323 }
1324
1325 static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1326 {
1327     tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1328 }
1329
1330 static inline TCGv_i32 iwmmxt_load_creg(int reg)
1331 {
1332     TCGv_i32 var = tcg_temp_new_i32();
1333     tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1334     return var;
1335 }
1336
1337 static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1338 {
1339     tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1340     tcg_temp_free_i32(var);
1341 }
1342
1343 static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1344 {
1345     iwmmxt_store_reg(cpu_M0, rn);
1346 }
1347
1348 static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1349 {
1350     iwmmxt_load_reg(cpu_M0, rn);
1351 }
1352
1353 static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1354 {
1355     iwmmxt_load_reg(cpu_V1, rn);
1356     tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1357 }
1358
1359 static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1360 {
1361     iwmmxt_load_reg(cpu_V1, rn);
1362     tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1363 }
1364
1365 static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1366 {
1367     iwmmxt_load_reg(cpu_V1, rn);
1368     tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1369 }
1370
1371 #define IWMMXT_OP(name) \
1372 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1373 { \
1374     iwmmxt_load_reg(cpu_V1, rn); \
1375     gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1376 }
1377
1378 #define IWMMXT_OP_ENV(name) \
1379 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1380 { \
1381     iwmmxt_load_reg(cpu_V1, rn); \
1382     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1383 }
1384
1385 #define IWMMXT_OP_ENV_SIZE(name) \
1386 IWMMXT_OP_ENV(name##b) \
1387 IWMMXT_OP_ENV(name##w) \
1388 IWMMXT_OP_ENV(name##l)
1389
1390 #define IWMMXT_OP_ENV1(name) \
1391 static inline void gen_op_iwmmxt_##name##_M0(void) \
1392 { \
1393     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1394 }
1395
1396 IWMMXT_OP(maddsq)
1397 IWMMXT_OP(madduq)
1398 IWMMXT_OP(sadb)
1399 IWMMXT_OP(sadw)
1400 IWMMXT_OP(mulslw)
1401 IWMMXT_OP(mulshw)
1402 IWMMXT_OP(mululw)
1403 IWMMXT_OP(muluhw)
1404 IWMMXT_OP(macsw)
1405 IWMMXT_OP(macuw)
1406
1407 IWMMXT_OP_ENV_SIZE(unpackl)
1408 IWMMXT_OP_ENV_SIZE(unpackh)
1409
1410 IWMMXT_OP_ENV1(unpacklub)
1411 IWMMXT_OP_ENV1(unpackluw)
1412 IWMMXT_OP_ENV1(unpacklul)
1413 IWMMXT_OP_ENV1(unpackhub)
1414 IWMMXT_OP_ENV1(unpackhuw)
1415 IWMMXT_OP_ENV1(unpackhul)
1416 IWMMXT_OP_ENV1(unpacklsb)
1417 IWMMXT_OP_ENV1(unpacklsw)
1418 IWMMXT_OP_ENV1(unpacklsl)
1419 IWMMXT_OP_ENV1(unpackhsb)
1420 IWMMXT_OP_ENV1(unpackhsw)
1421 IWMMXT_OP_ENV1(unpackhsl)
1422
1423 IWMMXT_OP_ENV_SIZE(cmpeq)
1424 IWMMXT_OP_ENV_SIZE(cmpgtu)
1425 IWMMXT_OP_ENV_SIZE(cmpgts)
1426
1427 IWMMXT_OP_ENV_SIZE(mins)
1428 IWMMXT_OP_ENV_SIZE(minu)
1429 IWMMXT_OP_ENV_SIZE(maxs)
1430 IWMMXT_OP_ENV_SIZE(maxu)
1431
1432 IWMMXT_OP_ENV_SIZE(subn)
1433 IWMMXT_OP_ENV_SIZE(addn)
1434 IWMMXT_OP_ENV_SIZE(subu)
1435 IWMMXT_OP_ENV_SIZE(addu)
1436 IWMMXT_OP_ENV_SIZE(subs)
1437 IWMMXT_OP_ENV_SIZE(adds)
1438
1439 IWMMXT_OP_ENV(avgb0)
1440 IWMMXT_OP_ENV(avgb1)
1441 IWMMXT_OP_ENV(avgw0)
1442 IWMMXT_OP_ENV(avgw1)
1443
1444 IWMMXT_OP_ENV(packuw)
1445 IWMMXT_OP_ENV(packul)
1446 IWMMXT_OP_ENV(packuq)
1447 IWMMXT_OP_ENV(packsw)
1448 IWMMXT_OP_ENV(packsl)
1449 IWMMXT_OP_ENV(packsq)
1450
1451 static void gen_op_iwmmxt_set_mup(void)
1452 {
1453     TCGv_i32 tmp;
1454     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1455     tcg_gen_ori_i32(tmp, tmp, 2);
1456     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1457 }
1458
1459 static void gen_op_iwmmxt_set_cup(void)
1460 {
1461     TCGv_i32 tmp;
1462     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1463     tcg_gen_ori_i32(tmp, tmp, 1);
1464     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1465 }
1466
1467 static void gen_op_iwmmxt_setpsr_nz(void)
1468 {
1469     TCGv_i32 tmp = tcg_temp_new_i32();
1470     gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1471     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1472 }
1473
1474 static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1475 {
1476     iwmmxt_load_reg(cpu_V1, rn);
1477     tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1478     tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1479 }
1480
1481 static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1482                                      TCGv_i32 dest)
1483 {
1484     int rd;
1485     uint32_t offset;
1486     TCGv_i32 tmp;
1487
1488     rd = (insn >> 16) & 0xf;
1489     tmp = load_reg(s, rd);
1490
1491     offset = (insn & 0xff) << ((insn >> 7) & 2);
1492     if (insn & (1 << 24)) {
1493         /* Pre indexed */
1494         if (insn & (1 << 23))
1495             tcg_gen_addi_i32(tmp, tmp, offset);
1496         else
1497             tcg_gen_addi_i32(tmp, tmp, -offset);
1498         tcg_gen_mov_i32(dest, tmp);
1499         if (insn & (1 << 21))
1500             store_reg(s, rd, tmp);
1501         else
1502             tcg_temp_free_i32(tmp);
1503     } else if (insn & (1 << 21)) {
1504         /* Post indexed */
1505         tcg_gen_mov_i32(dest, tmp);
1506         if (insn & (1 << 23))
1507             tcg_gen_addi_i32(tmp, tmp, offset);
1508         else
1509             tcg_gen_addi_i32(tmp, tmp, -offset);
1510         store_reg(s, rd, tmp);
1511     } else if (!(insn & (1 << 23)))
1512         return 1;
1513     return 0;
1514 }
1515
1516 static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1517 {
1518     int rd = (insn >> 0) & 0xf;
1519     TCGv_i32 tmp;
1520
1521     if (insn & (1 << 8)) {
1522         if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1523             return 1;
1524         } else {
1525             tmp = iwmmxt_load_creg(rd);
1526         }
1527     } else {
1528         tmp = tcg_temp_new_i32();
1529         iwmmxt_load_reg(cpu_V0, rd);
1530         tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1531     }
1532     tcg_gen_andi_i32(tmp, tmp, mask);
1533     tcg_gen_mov_i32(dest, tmp);
1534     tcg_temp_free_i32(tmp);
1535     return 0;
1536 }
1537
1538 /* Disassemble an iwMMXt instruction.  Returns nonzero if an error occurred
1539    (ie. an undefined instruction).  */
1540 static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1541 {
1542     int rd, wrd;
1543     int rdhi, rdlo, rd0, rd1, i;
1544     TCGv_i32 addr;
1545     TCGv_i32 tmp, tmp2, tmp3;
1546
1547     if ((insn & 0x0e000e00) == 0x0c000000) {
1548         if ((insn & 0x0fe00ff0) == 0x0c400000) {
1549             wrd = insn & 0xf;
1550             rdlo = (insn >> 12) & 0xf;
1551             rdhi = (insn >> 16) & 0xf;
1552             if (insn & ARM_CP_RW_BIT) {                         /* TMRRC */
1553                 iwmmxt_load_reg(cpu_V0, wrd);
1554                 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1555                 tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
1556             } else {                                    /* TMCRR */
1557                 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1558                 iwmmxt_store_reg(cpu_V0, wrd);
1559                 gen_op_iwmmxt_set_mup();
1560             }
1561             return 0;
1562         }
1563
1564         wrd = (insn >> 12) & 0xf;
1565         addr = tcg_temp_new_i32();
1566         if (gen_iwmmxt_address(s, insn, addr)) {
1567             tcg_temp_free_i32(addr);
1568             return 1;
1569         }
1570         if (insn & ARM_CP_RW_BIT) {
1571             if ((insn >> 28) == 0xf) {                  /* WLDRW wCx */
1572                 tmp = tcg_temp_new_i32();
1573                 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1574                 iwmmxt_store_creg(wrd, tmp);
1575             } else {
1576                 i = 1;
1577                 if (insn & (1 << 8)) {
1578                     if (insn & (1 << 22)) {             /* WLDRD */
1579                         gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
1580                         i = 0;
1581                     } else {                            /* WLDRW wRd */
1582                         tmp = tcg_temp_new_i32();
1583                         gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1584                     }
1585                 } else {
1586                     tmp = tcg_temp_new_i32();
1587                     if (insn & (1 << 22)) {             /* WLDRH */
1588                         gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1589                     } else {                            /* WLDRB */
1590                         gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
1591                     }
1592                 }
1593                 if (i) {
1594                     tcg_gen_extu_i32_i64(cpu_M0, tmp);
1595                     tcg_temp_free_i32(tmp);
1596                 }
1597                 gen_op_iwmmxt_movq_wRn_M0(wrd);
1598             }
1599         } else {
1600             if ((insn >> 28) == 0xf) {                  /* WSTRW wCx */
1601                 tmp = iwmmxt_load_creg(wrd);
1602                 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1603             } else {
1604                 gen_op_iwmmxt_movq_M0_wRn(wrd);
1605                 tmp = tcg_temp_new_i32();
1606                 if (insn & (1 << 8)) {
1607                     if (insn & (1 << 22)) {             /* WSTRD */
1608                         gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
1609                     } else {                            /* WSTRW wRd */
1610                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1611                         gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1612                     }
1613                 } else {
1614                     if (insn & (1 << 22)) {             /* WSTRH */
1615                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1616                         gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1617                     } else {                            /* WSTRB */
1618                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1619                         gen_aa32_st8(s, tmp, addr, get_mem_index(s));
1620                     }
1621                 }
1622             }
1623             tcg_temp_free_i32(tmp);
1624         }
1625         tcg_temp_free_i32(addr);
1626         return 0;
1627     }
1628
1629     if ((insn & 0x0f000000) != 0x0e000000)
1630         return 1;
1631
1632     switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1633     case 0x000:                                                 /* WOR */
1634         wrd = (insn >> 12) & 0xf;
1635         rd0 = (insn >> 0) & 0xf;
1636         rd1 = (insn >> 16) & 0xf;
1637         gen_op_iwmmxt_movq_M0_wRn(rd0);
1638         gen_op_iwmmxt_orq_M0_wRn(rd1);
1639         gen_op_iwmmxt_setpsr_nz();
1640         gen_op_iwmmxt_movq_wRn_M0(wrd);
1641         gen_op_iwmmxt_set_mup();
1642         gen_op_iwmmxt_set_cup();
1643         break;
1644     case 0x011:                                                 /* TMCR */
1645         if (insn & 0xf)
1646             return 1;
1647         rd = (insn >> 12) & 0xf;
1648         wrd = (insn >> 16) & 0xf;
1649         switch (wrd) {
1650         case ARM_IWMMXT_wCID:
1651         case ARM_IWMMXT_wCASF:
1652             break;
1653         case ARM_IWMMXT_wCon:
1654             gen_op_iwmmxt_set_cup();
1655             /* Fall through.  */
1656         case ARM_IWMMXT_wCSSF:
1657             tmp = iwmmxt_load_creg(wrd);
1658             tmp2 = load_reg(s, rd);
1659             tcg_gen_andc_i32(tmp, tmp, tmp2);
1660             tcg_temp_free_i32(tmp2);
1661             iwmmxt_store_creg(wrd, tmp);
1662             break;
1663         case ARM_IWMMXT_wCGR0:
1664         case ARM_IWMMXT_wCGR1:
1665         case ARM_IWMMXT_wCGR2:
1666         case ARM_IWMMXT_wCGR3:
1667             gen_op_iwmmxt_set_cup();
1668             tmp = load_reg(s, rd);
1669             iwmmxt_store_creg(wrd, tmp);
1670             break;
1671         default:
1672             return 1;
1673         }
1674         break;
1675     case 0x100:                                                 /* WXOR */
1676         wrd = (insn >> 12) & 0xf;
1677         rd0 = (insn >> 0) & 0xf;
1678         rd1 = (insn >> 16) & 0xf;
1679         gen_op_iwmmxt_movq_M0_wRn(rd0);
1680         gen_op_iwmmxt_xorq_M0_wRn(rd1);
1681         gen_op_iwmmxt_setpsr_nz();
1682         gen_op_iwmmxt_movq_wRn_M0(wrd);
1683         gen_op_iwmmxt_set_mup();
1684         gen_op_iwmmxt_set_cup();
1685         break;
1686     case 0x111:                                                 /* TMRC */
1687         if (insn & 0xf)
1688             return 1;
1689         rd = (insn >> 12) & 0xf;
1690         wrd = (insn >> 16) & 0xf;
1691         tmp = iwmmxt_load_creg(wrd);
1692         store_reg(s, rd, tmp);
1693         break;
1694     case 0x300:                                                 /* WANDN */
1695         wrd = (insn >> 12) & 0xf;
1696         rd0 = (insn >> 0) & 0xf;
1697         rd1 = (insn >> 16) & 0xf;
1698         gen_op_iwmmxt_movq_M0_wRn(rd0);
1699         tcg_gen_neg_i64(cpu_M0, cpu_M0);
1700         gen_op_iwmmxt_andq_M0_wRn(rd1);
1701         gen_op_iwmmxt_setpsr_nz();
1702         gen_op_iwmmxt_movq_wRn_M0(wrd);
1703         gen_op_iwmmxt_set_mup();
1704         gen_op_iwmmxt_set_cup();
1705         break;
1706     case 0x200:                                                 /* WAND */
1707         wrd = (insn >> 12) & 0xf;
1708         rd0 = (insn >> 0) & 0xf;
1709         rd1 = (insn >> 16) & 0xf;
1710         gen_op_iwmmxt_movq_M0_wRn(rd0);
1711         gen_op_iwmmxt_andq_M0_wRn(rd1);
1712         gen_op_iwmmxt_setpsr_nz();
1713         gen_op_iwmmxt_movq_wRn_M0(wrd);
1714         gen_op_iwmmxt_set_mup();
1715         gen_op_iwmmxt_set_cup();
1716         break;
1717     case 0x810: case 0xa10:                             /* WMADD */
1718         wrd = (insn >> 12) & 0xf;
1719         rd0 = (insn >> 0) & 0xf;
1720         rd1 = (insn >> 16) & 0xf;
1721         gen_op_iwmmxt_movq_M0_wRn(rd0);
1722         if (insn & (1 << 21))
1723             gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1724         else
1725             gen_op_iwmmxt_madduq_M0_wRn(rd1);
1726         gen_op_iwmmxt_movq_wRn_M0(wrd);
1727         gen_op_iwmmxt_set_mup();
1728         break;
1729     case 0x10e: case 0x50e: case 0x90e: case 0xd0e:     /* WUNPCKIL */
1730         wrd = (insn >> 12) & 0xf;
1731         rd0 = (insn >> 16) & 0xf;
1732         rd1 = (insn >> 0) & 0xf;
1733         gen_op_iwmmxt_movq_M0_wRn(rd0);
1734         switch ((insn >> 22) & 3) {
1735         case 0:
1736             gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1737             break;
1738         case 1:
1739             gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1740             break;
1741         case 2:
1742             gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1743             break;
1744         case 3:
1745             return 1;
1746         }
1747         gen_op_iwmmxt_movq_wRn_M0(wrd);
1748         gen_op_iwmmxt_set_mup();
1749         gen_op_iwmmxt_set_cup();
1750         break;
1751     case 0x10c: case 0x50c: case 0x90c: case 0xd0c:     /* WUNPCKIH */
1752         wrd = (insn >> 12) & 0xf;
1753         rd0 = (insn >> 16) & 0xf;
1754         rd1 = (insn >> 0) & 0xf;
1755         gen_op_iwmmxt_movq_M0_wRn(rd0);
1756         switch ((insn >> 22) & 3) {
1757         case 0:
1758             gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1759             break;
1760         case 1:
1761             gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1762             break;
1763         case 2:
1764             gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1765             break;
1766         case 3:
1767             return 1;
1768         }
1769         gen_op_iwmmxt_movq_wRn_M0(wrd);
1770         gen_op_iwmmxt_set_mup();
1771         gen_op_iwmmxt_set_cup();
1772         break;
1773     case 0x012: case 0x112: case 0x412: case 0x512:     /* WSAD */
1774         wrd = (insn >> 12) & 0xf;
1775         rd0 = (insn >> 16) & 0xf;
1776         rd1 = (insn >> 0) & 0xf;
1777         gen_op_iwmmxt_movq_M0_wRn(rd0);
1778         if (insn & (1 << 22))
1779             gen_op_iwmmxt_sadw_M0_wRn(rd1);
1780         else
1781             gen_op_iwmmxt_sadb_M0_wRn(rd1);
1782         if (!(insn & (1 << 20)))
1783             gen_op_iwmmxt_addl_M0_wRn(wrd);
1784         gen_op_iwmmxt_movq_wRn_M0(wrd);
1785         gen_op_iwmmxt_set_mup();
1786         break;
1787     case 0x010: case 0x110: case 0x210: case 0x310:     /* WMUL */
1788         wrd = (insn >> 12) & 0xf;
1789         rd0 = (insn >> 16) & 0xf;
1790         rd1 = (insn >> 0) & 0xf;
1791         gen_op_iwmmxt_movq_M0_wRn(rd0);
1792         if (insn & (1 << 21)) {
1793             if (insn & (1 << 20))
1794                 gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1795             else
1796                 gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1797         } else {
1798             if (insn & (1 << 20))
1799                 gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1800             else
1801                 gen_op_iwmmxt_mululw_M0_wRn(rd1);
1802         }
1803         gen_op_iwmmxt_movq_wRn_M0(wrd);
1804         gen_op_iwmmxt_set_mup();
1805         break;
1806     case 0x410: case 0x510: case 0x610: case 0x710:     /* WMAC */
1807         wrd = (insn >> 12) & 0xf;
1808         rd0 = (insn >> 16) & 0xf;
1809         rd1 = (insn >> 0) & 0xf;
1810         gen_op_iwmmxt_movq_M0_wRn(rd0);
1811         if (insn & (1 << 21))
1812             gen_op_iwmmxt_macsw_M0_wRn(rd1);
1813         else
1814             gen_op_iwmmxt_macuw_M0_wRn(rd1);
1815         if (!(insn & (1 << 20))) {
1816             iwmmxt_load_reg(cpu_V1, wrd);
1817             tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1818         }
1819         gen_op_iwmmxt_movq_wRn_M0(wrd);
1820         gen_op_iwmmxt_set_mup();
1821         break;
1822     case 0x006: case 0x406: case 0x806: case 0xc06:     /* WCMPEQ */
1823         wrd = (insn >> 12) & 0xf;
1824         rd0 = (insn >> 16) & 0xf;
1825         rd1 = (insn >> 0) & 0xf;
1826         gen_op_iwmmxt_movq_M0_wRn(rd0);
1827         switch ((insn >> 22) & 3) {
1828         case 0:
1829             gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1830             break;
1831         case 1:
1832             gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1833             break;
1834         case 2:
1835             gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1836             break;
1837         case 3:
1838             return 1;
1839         }
1840         gen_op_iwmmxt_movq_wRn_M0(wrd);
1841         gen_op_iwmmxt_set_mup();
1842         gen_op_iwmmxt_set_cup();
1843         break;
1844     case 0x800: case 0x900: case 0xc00: case 0xd00:     /* WAVG2 */
1845         wrd = (insn >> 12) & 0xf;
1846         rd0 = (insn >> 16) & 0xf;
1847         rd1 = (insn >> 0) & 0xf;
1848         gen_op_iwmmxt_movq_M0_wRn(rd0);
1849         if (insn & (1 << 22)) {
1850             if (insn & (1 << 20))
1851                 gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1852             else
1853                 gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1854         } else {
1855             if (insn & (1 << 20))
1856                 gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1857             else
1858                 gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1859         }
1860         gen_op_iwmmxt_movq_wRn_M0(wrd);
1861         gen_op_iwmmxt_set_mup();
1862         gen_op_iwmmxt_set_cup();
1863         break;
1864     case 0x802: case 0x902: case 0xa02: case 0xb02:     /* WALIGNR */
1865         wrd = (insn >> 12) & 0xf;
1866         rd0 = (insn >> 16) & 0xf;
1867         rd1 = (insn >> 0) & 0xf;
1868         gen_op_iwmmxt_movq_M0_wRn(rd0);
1869         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1870         tcg_gen_andi_i32(tmp, tmp, 7);
1871         iwmmxt_load_reg(cpu_V1, rd1);
1872         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1873         tcg_temp_free_i32(tmp);
1874         gen_op_iwmmxt_movq_wRn_M0(wrd);
1875         gen_op_iwmmxt_set_mup();
1876         break;
1877     case 0x601: case 0x605: case 0x609: case 0x60d:     /* TINSR */
1878         if (((insn >> 6) & 3) == 3)
1879             return 1;
1880         rd = (insn >> 12) & 0xf;
1881         wrd = (insn >> 16) & 0xf;
1882         tmp = load_reg(s, rd);
1883         gen_op_iwmmxt_movq_M0_wRn(wrd);
1884         switch ((insn >> 6) & 3) {
1885         case 0:
1886             tmp2 = tcg_const_i32(0xff);
1887             tmp3 = tcg_const_i32((insn & 7) << 3);
1888             break;
1889         case 1:
1890             tmp2 = tcg_const_i32(0xffff);
1891             tmp3 = tcg_const_i32((insn & 3) << 4);
1892             break;
1893         case 2:
1894             tmp2 = tcg_const_i32(0xffffffff);
1895             tmp3 = tcg_const_i32((insn & 1) << 5);
1896             break;
1897         default:
1898             tmp2 = NULL;
1899             tmp3 = NULL;
1900         }
1901         gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1902         tcg_temp_free_i32(tmp3);
1903         tcg_temp_free_i32(tmp2);
1904         tcg_temp_free_i32(tmp);
1905         gen_op_iwmmxt_movq_wRn_M0(wrd);
1906         gen_op_iwmmxt_set_mup();
1907         break;
1908     case 0x107: case 0x507: case 0x907: case 0xd07:     /* TEXTRM */
1909         rd = (insn >> 12) & 0xf;
1910         wrd = (insn >> 16) & 0xf;
1911         if (rd == 15 || ((insn >> 22) & 3) == 3)
1912             return 1;
1913         gen_op_iwmmxt_movq_M0_wRn(wrd);
1914         tmp = tcg_temp_new_i32();
1915         switch ((insn >> 22) & 3) {
1916         case 0:
1917             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1918             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1919             if (insn & 8) {
1920                 tcg_gen_ext8s_i32(tmp, tmp);
1921             } else {
1922                 tcg_gen_andi_i32(tmp, tmp, 0xff);
1923             }
1924             break;
1925         case 1:
1926             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1927             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1928             if (insn & 8) {
1929                 tcg_gen_ext16s_i32(tmp, tmp);
1930             } else {
1931                 tcg_gen_andi_i32(tmp, tmp, 0xffff);
1932             }
1933             break;
1934         case 2:
1935             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1936             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1937             break;
1938         }
1939         store_reg(s, rd, tmp);
1940         break;
1941     case 0x117: case 0x517: case 0x917: case 0xd17:     /* TEXTRC */
1942         if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1943             return 1;
1944         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1945         switch ((insn >> 22) & 3) {
1946         case 0:
1947             tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
1948             break;
1949         case 1:
1950             tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
1951             break;
1952         case 2:
1953             tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
1954             break;
1955         }
1956         tcg_gen_shli_i32(tmp, tmp, 28);
1957         gen_set_nzcv(tmp);
1958         tcg_temp_free_i32(tmp);
1959         break;
1960     case 0x401: case 0x405: case 0x409: case 0x40d:     /* TBCST */
1961         if (((insn >> 6) & 3) == 3)
1962             return 1;
1963         rd = (insn >> 12) & 0xf;
1964         wrd = (insn >> 16) & 0xf;
1965         tmp = load_reg(s, rd);
1966         switch ((insn >> 6) & 3) {
1967         case 0:
1968             gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
1969             break;
1970         case 1:
1971             gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
1972             break;
1973         case 2:
1974             gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
1975             break;
1976         }
1977         tcg_temp_free_i32(tmp);
1978         gen_op_iwmmxt_movq_wRn_M0(wrd);
1979         gen_op_iwmmxt_set_mup();
1980         break;
1981     case 0x113: case 0x513: case 0x913: case 0xd13:     /* TANDC */
1982         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1983             return 1;
1984         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1985         tmp2 = tcg_temp_new_i32();
1986         tcg_gen_mov_i32(tmp2, tmp);
1987         switch ((insn >> 22) & 3) {
1988         case 0:
1989             for (i = 0; i < 7; i ++) {
1990                 tcg_gen_shli_i32(tmp2, tmp2, 4);
1991                 tcg_gen_and_i32(tmp, tmp, tmp2);
1992             }
1993             break;
1994         case 1:
1995             for (i = 0; i < 3; i ++) {
1996                 tcg_gen_shli_i32(tmp2, tmp2, 8);
1997                 tcg_gen_and_i32(tmp, tmp, tmp2);
1998             }
1999             break;
2000         case 2:
2001             tcg_gen_shli_i32(tmp2, tmp2, 16);
2002             tcg_gen_and_i32(tmp, tmp, tmp2);
2003             break;
2004         }
2005         gen_set_nzcv(tmp);
2006         tcg_temp_free_i32(tmp2);
2007         tcg_temp_free_i32(tmp);
2008         break;
2009     case 0x01c: case 0x41c: case 0x81c: case 0xc1c:     /* WACC */
2010         wrd = (insn >> 12) & 0xf;
2011         rd0 = (insn >> 16) & 0xf;
2012         gen_op_iwmmxt_movq_M0_wRn(rd0);
2013         switch ((insn >> 22) & 3) {
2014         case 0:
2015             gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
2016             break;
2017         case 1:
2018             gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
2019             break;
2020         case 2:
2021             gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
2022             break;
2023         case 3:
2024             return 1;
2025         }
2026         gen_op_iwmmxt_movq_wRn_M0(wrd);
2027         gen_op_iwmmxt_set_mup();
2028         break;
2029     case 0x115: case 0x515: case 0x915: case 0xd15:     /* TORC */
2030         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
2031             return 1;
2032         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
2033         tmp2 = tcg_temp_new_i32();
2034         tcg_gen_mov_i32(tmp2, tmp);
2035         switch ((insn >> 22) & 3) {
2036         case 0:
2037             for (i = 0; i < 7; i ++) {
2038                 tcg_gen_shli_i32(tmp2, tmp2, 4);
2039                 tcg_gen_or_i32(tmp, tmp, tmp2);
2040             }
2041             break;
2042         case 1:
2043             for (i = 0; i < 3; i ++) {
2044                 tcg_gen_shli_i32(tmp2, tmp2, 8);
2045                 tcg_gen_or_i32(tmp, tmp, tmp2);
2046             }
2047             break;
2048         case 2:
2049             tcg_gen_shli_i32(tmp2, tmp2, 16);
2050             tcg_gen_or_i32(tmp, tmp, tmp2);
2051             break;
2052         }
2053         gen_set_nzcv(tmp);
2054         tcg_temp_free_i32(tmp2);
2055         tcg_temp_free_i32(tmp);
2056         break;
2057     case 0x103: case 0x503: case 0x903: case 0xd03:     /* TMOVMSK */
2058         rd = (insn >> 12) & 0xf;
2059         rd0 = (insn >> 16) & 0xf;
2060         if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
2061             return 1;
2062         gen_op_iwmmxt_movq_M0_wRn(rd0);
2063         tmp = tcg_temp_new_i32();
2064         switch ((insn >> 22) & 3) {
2065         case 0:
2066             gen_helper_iwmmxt_msbb(tmp, cpu_M0);
2067             break;
2068         case 1:
2069             gen_helper_iwmmxt_msbw(tmp, cpu_M0);
2070             break;
2071         case 2:
2072             gen_helper_iwmmxt_msbl(tmp, cpu_M0);
2073             break;
2074         }
2075         store_reg(s, rd, tmp);
2076         break;
2077     case 0x106: case 0x306: case 0x506: case 0x706:     /* WCMPGT */
2078     case 0x906: case 0xb06: case 0xd06: case 0xf06:
2079         wrd = (insn >> 12) & 0xf;
2080         rd0 = (insn >> 16) & 0xf;
2081         rd1 = (insn >> 0) & 0xf;
2082         gen_op_iwmmxt_movq_M0_wRn(rd0);
2083         switch ((insn >> 22) & 3) {
2084         case 0:
2085             if (insn & (1 << 21))
2086                 gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2087             else
2088                 gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2089             break;
2090         case 1:
2091             if (insn & (1 << 21))
2092                 gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2093             else
2094                 gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2095             break;
2096         case 2:
2097             if (insn & (1 << 21))
2098                 gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2099             else
2100                 gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2101             break;
2102         case 3:
2103             return 1;
2104         }
2105         gen_op_iwmmxt_movq_wRn_M0(wrd);
2106         gen_op_iwmmxt_set_mup();
2107         gen_op_iwmmxt_set_cup();
2108         break;
2109     case 0x00e: case 0x20e: case 0x40e: case 0x60e:     /* WUNPCKEL */
2110     case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2111         wrd = (insn >> 12) & 0xf;
2112         rd0 = (insn >> 16) & 0xf;
2113         gen_op_iwmmxt_movq_M0_wRn(rd0);
2114         switch ((insn >> 22) & 3) {
2115         case 0:
2116             if (insn & (1 << 21))
2117                 gen_op_iwmmxt_unpacklsb_M0();
2118             else
2119                 gen_op_iwmmxt_unpacklub_M0();
2120             break;
2121         case 1:
2122             if (insn & (1 << 21))
2123                 gen_op_iwmmxt_unpacklsw_M0();
2124             else
2125                 gen_op_iwmmxt_unpackluw_M0();
2126             break;
2127         case 2:
2128             if (insn & (1 << 21))
2129                 gen_op_iwmmxt_unpacklsl_M0();
2130             else
2131                 gen_op_iwmmxt_unpacklul_M0();
2132             break;
2133         case 3:
2134             return 1;
2135         }
2136         gen_op_iwmmxt_movq_wRn_M0(wrd);
2137         gen_op_iwmmxt_set_mup();
2138         gen_op_iwmmxt_set_cup();
2139         break;
2140     case 0x00c: case 0x20c: case 0x40c: case 0x60c:     /* WUNPCKEH */
2141     case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2142         wrd = (insn >> 12) & 0xf;
2143         rd0 = (insn >> 16) & 0xf;
2144         gen_op_iwmmxt_movq_M0_wRn(rd0);
2145         switch ((insn >> 22) & 3) {
2146         case 0:
2147             if (insn & (1 << 21))
2148                 gen_op_iwmmxt_unpackhsb_M0();
2149             else
2150                 gen_op_iwmmxt_unpackhub_M0();
2151             break;
2152         case 1:
2153             if (insn & (1 << 21))
2154                 gen_op_iwmmxt_unpackhsw_M0();
2155             else
2156                 gen_op_iwmmxt_unpackhuw_M0();
2157             break;
2158         case 2:
2159             if (insn & (1 << 21))
2160                 gen_op_iwmmxt_unpackhsl_M0();
2161             else
2162                 gen_op_iwmmxt_unpackhul_M0();
2163             break;
2164         case 3:
2165             return 1;
2166         }
2167         gen_op_iwmmxt_movq_wRn_M0(wrd);
2168         gen_op_iwmmxt_set_mup();
2169         gen_op_iwmmxt_set_cup();
2170         break;
2171     case 0x204: case 0x604: case 0xa04: case 0xe04:     /* WSRL */
2172     case 0x214: case 0x614: case 0xa14: case 0xe14:
2173         if (((insn >> 22) & 3) == 0)
2174             return 1;
2175         wrd = (insn >> 12) & 0xf;
2176         rd0 = (insn >> 16) & 0xf;
2177         gen_op_iwmmxt_movq_M0_wRn(rd0);
2178         tmp = tcg_temp_new_i32();
2179         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2180             tcg_temp_free_i32(tmp);
2181             return 1;
2182         }
2183         switch ((insn >> 22) & 3) {
2184         case 1:
2185             gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2186             break;
2187         case 2:
2188             gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2189             break;
2190         case 3:
2191             gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2192             break;
2193         }
2194         tcg_temp_free_i32(tmp);
2195         gen_op_iwmmxt_movq_wRn_M0(wrd);
2196         gen_op_iwmmxt_set_mup();
2197         gen_op_iwmmxt_set_cup();
2198         break;
2199     case 0x004: case 0x404: case 0x804: case 0xc04:     /* WSRA */
2200     case 0x014: case 0x414: case 0x814: case 0xc14:
2201         if (((insn >> 22) & 3) == 0)
2202             return 1;
2203         wrd = (insn >> 12) & 0xf;
2204         rd0 = (insn >> 16) & 0xf;
2205         gen_op_iwmmxt_movq_M0_wRn(rd0);
2206         tmp = tcg_temp_new_i32();
2207         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2208             tcg_temp_free_i32(tmp);
2209             return 1;
2210         }
2211         switch ((insn >> 22) & 3) {
2212         case 1:
2213             gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2214             break;
2215         case 2:
2216             gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2217             break;
2218         case 3:
2219             gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2220             break;
2221         }
2222         tcg_temp_free_i32(tmp);
2223         gen_op_iwmmxt_movq_wRn_M0(wrd);
2224         gen_op_iwmmxt_set_mup();
2225         gen_op_iwmmxt_set_cup();
2226         break;
2227     case 0x104: case 0x504: case 0x904: case 0xd04:     /* WSLL */
2228     case 0x114: case 0x514: case 0x914: case 0xd14:
2229         if (((insn >> 22) & 3) == 0)
2230             return 1;
2231         wrd = (insn >> 12) & 0xf;
2232         rd0 = (insn >> 16) & 0xf;
2233         gen_op_iwmmxt_movq_M0_wRn(rd0);
2234         tmp = tcg_temp_new_i32();
2235         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2236             tcg_temp_free_i32(tmp);
2237             return 1;
2238         }
2239         switch ((insn >> 22) & 3) {
2240         case 1:
2241             gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2242             break;
2243         case 2:
2244             gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2245             break;
2246         case 3:
2247             gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2248             break;
2249         }
2250         tcg_temp_free_i32(tmp);
2251         gen_op_iwmmxt_movq_wRn_M0(wrd);
2252         gen_op_iwmmxt_set_mup();
2253         gen_op_iwmmxt_set_cup();
2254         break;
2255     case 0x304: case 0x704: case 0xb04: case 0xf04:     /* WROR */
2256     case 0x314: case 0x714: case 0xb14: case 0xf14:
2257         if (((insn >> 22) & 3) == 0)
2258             return 1;
2259         wrd = (insn >> 12) & 0xf;
2260         rd0 = (insn >> 16) & 0xf;
2261         gen_op_iwmmxt_movq_M0_wRn(rd0);
2262         tmp = tcg_temp_new_i32();
2263         switch ((insn >> 22) & 3) {
2264         case 1:
2265             if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2266                 tcg_temp_free_i32(tmp);
2267                 return 1;
2268             }
2269             gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2270             break;
2271         case 2:
2272             if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2273                 tcg_temp_free_i32(tmp);
2274                 return 1;
2275             }
2276             gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2277             break;
2278         case 3:
2279             if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2280                 tcg_temp_free_i32(tmp);
2281                 return 1;
2282             }
2283             gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2284             break;
2285         }
2286         tcg_temp_free_i32(tmp);
2287         gen_op_iwmmxt_movq_wRn_M0(wrd);
2288         gen_op_iwmmxt_set_mup();
2289         gen_op_iwmmxt_set_cup();
2290         break;
2291     case 0x116: case 0x316: case 0x516: case 0x716:     /* WMIN */
2292     case 0x916: case 0xb16: case 0xd16: case 0xf16:
2293         wrd = (insn >> 12) & 0xf;
2294         rd0 = (insn >> 16) & 0xf;
2295         rd1 = (insn >> 0) & 0xf;
2296         gen_op_iwmmxt_movq_M0_wRn(rd0);
2297         switch ((insn >> 22) & 3) {
2298         case 0:
2299             if (insn & (1 << 21))
2300                 gen_op_iwmmxt_minsb_M0_wRn(rd1);
2301             else
2302                 gen_op_iwmmxt_minub_M0_wRn(rd1);
2303             break;
2304         case 1:
2305             if (insn & (1 << 21))
2306                 gen_op_iwmmxt_minsw_M0_wRn(rd1);
2307             else
2308                 gen_op_iwmmxt_minuw_M0_wRn(rd1);
2309             break;
2310         case 2:
2311             if (insn & (1 << 21))
2312                 gen_op_iwmmxt_minsl_M0_wRn(rd1);
2313             else
2314                 gen_op_iwmmxt_minul_M0_wRn(rd1);
2315             break;
2316         case 3:
2317             return 1;
2318         }
2319         gen_op_iwmmxt_movq_wRn_M0(wrd);
2320         gen_op_iwmmxt_set_mup();
2321         break;
2322     case 0x016: case 0x216: case 0x416: case 0x616:     /* WMAX */
2323     case 0x816: case 0xa16: case 0xc16: case 0xe16:
2324         wrd = (insn >> 12) & 0xf;
2325         rd0 = (insn >> 16) & 0xf;
2326         rd1 = (insn >> 0) & 0xf;
2327         gen_op_iwmmxt_movq_M0_wRn(rd0);
2328         switch ((insn >> 22) & 3) {
2329         case 0:
2330             if (insn & (1 << 21))
2331                 gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2332             else
2333                 gen_op_iwmmxt_maxub_M0_wRn(rd1);
2334             break;
2335         case 1:
2336             if (insn & (1 << 21))
2337                 gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2338             else
2339                 gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2340             break;
2341         case 2:
2342             if (insn & (1 << 21))
2343                 gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2344             else
2345                 gen_op_iwmmxt_maxul_M0_wRn(rd1);
2346             break;
2347         case 3:
2348             return 1;
2349         }
2350         gen_op_iwmmxt_movq_wRn_M0(wrd);
2351         gen_op_iwmmxt_set_mup();
2352         break;
2353     case 0x002: case 0x102: case 0x202: case 0x302:     /* WALIGNI */
2354     case 0x402: case 0x502: case 0x602: case 0x702:
2355         wrd = (insn >> 12) & 0xf;
2356         rd0 = (insn >> 16) & 0xf;
2357         rd1 = (insn >> 0) & 0xf;
2358         gen_op_iwmmxt_movq_M0_wRn(rd0);
2359         tmp = tcg_const_i32((insn >> 20) & 3);
2360         iwmmxt_load_reg(cpu_V1, rd1);
2361         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
2362         tcg_temp_free_i32(tmp);
2363         gen_op_iwmmxt_movq_wRn_M0(wrd);
2364         gen_op_iwmmxt_set_mup();
2365         break;
2366     case 0x01a: case 0x11a: case 0x21a: case 0x31a:     /* WSUB */
2367     case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2368     case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2369     case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2370         wrd = (insn >> 12) & 0xf;
2371         rd0 = (insn >> 16) & 0xf;
2372         rd1 = (insn >> 0) & 0xf;
2373         gen_op_iwmmxt_movq_M0_wRn(rd0);
2374         switch ((insn >> 20) & 0xf) {
2375         case 0x0:
2376             gen_op_iwmmxt_subnb_M0_wRn(rd1);
2377             break;
2378         case 0x1:
2379             gen_op_iwmmxt_subub_M0_wRn(rd1);
2380             break;
2381         case 0x3:
2382             gen_op_iwmmxt_subsb_M0_wRn(rd1);
2383             break;
2384         case 0x4:
2385             gen_op_iwmmxt_subnw_M0_wRn(rd1);
2386             break;
2387         case 0x5:
2388             gen_op_iwmmxt_subuw_M0_wRn(rd1);
2389             break;
2390         case 0x7:
2391             gen_op_iwmmxt_subsw_M0_wRn(rd1);
2392             break;
2393         case 0x8:
2394             gen_op_iwmmxt_subnl_M0_wRn(rd1);
2395             break;
2396         case 0x9:
2397             gen_op_iwmmxt_subul_M0_wRn(rd1);
2398             break;
2399         case 0xb:
2400             gen_op_iwmmxt_subsl_M0_wRn(rd1);
2401             break;
2402         default:
2403             return 1;
2404         }
2405         gen_op_iwmmxt_movq_wRn_M0(wrd);
2406         gen_op_iwmmxt_set_mup();
2407         gen_op_iwmmxt_set_cup();
2408         break;
2409     case 0x01e: case 0x11e: case 0x21e: case 0x31e:     /* WSHUFH */
2410     case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2411     case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2412     case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2413         wrd = (insn >> 12) & 0xf;
2414         rd0 = (insn >> 16) & 0xf;
2415         gen_op_iwmmxt_movq_M0_wRn(rd0);
2416         tmp = tcg_const_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2417         gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2418         tcg_temp_free_i32(tmp);
2419         gen_op_iwmmxt_movq_wRn_M0(wrd);
2420         gen_op_iwmmxt_set_mup();
2421         gen_op_iwmmxt_set_cup();
2422         break;
2423     case 0x018: case 0x118: case 0x218: case 0x318:     /* WADD */
2424     case 0x418: case 0x518: case 0x618: case 0x718:
2425     case 0x818: case 0x918: case 0xa18: case 0xb18:
2426     case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2427         wrd = (insn >> 12) & 0xf;
2428         rd0 = (insn >> 16) & 0xf;
2429         rd1 = (insn >> 0) & 0xf;
2430         gen_op_iwmmxt_movq_M0_wRn(rd0);
2431         switch ((insn >> 20) & 0xf) {
2432         case 0x0:
2433             gen_op_iwmmxt_addnb_M0_wRn(rd1);
2434             break;
2435         case 0x1:
2436             gen_op_iwmmxt_addub_M0_wRn(rd1);
2437             break;
2438         case 0x3:
2439             gen_op_iwmmxt_addsb_M0_wRn(rd1);
2440             break;
2441         case 0x4:
2442             gen_op_iwmmxt_addnw_M0_wRn(rd1);
2443             break;
2444         case 0x5:
2445             gen_op_iwmmxt_adduw_M0_wRn(rd1);
2446             break;
2447         case 0x7:
2448             gen_op_iwmmxt_addsw_M0_wRn(rd1);
2449             break;
2450         case 0x8:
2451             gen_op_iwmmxt_addnl_M0_wRn(rd1);
2452             break;
2453         case 0x9:
2454             gen_op_iwmmxt_addul_M0_wRn(rd1);
2455             break;
2456         case 0xb:
2457             gen_op_iwmmxt_addsl_M0_wRn(rd1);
2458             break;
2459         default:
2460             return 1;
2461         }
2462         gen_op_iwmmxt_movq_wRn_M0(wrd);
2463         gen_op_iwmmxt_set_mup();
2464         gen_op_iwmmxt_set_cup();
2465         break;
2466     case 0x008: case 0x108: case 0x208: case 0x308:     /* WPACK */
2467     case 0x408: case 0x508: case 0x608: case 0x708:
2468     case 0x808: case 0x908: case 0xa08: case 0xb08:
2469     case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2470         if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2471             return 1;
2472         wrd = (insn >> 12) & 0xf;
2473         rd0 = (insn >> 16) & 0xf;
2474         rd1 = (insn >> 0) & 0xf;
2475         gen_op_iwmmxt_movq_M0_wRn(rd0);
2476         switch ((insn >> 22) & 3) {
2477         case 1:
2478             if (insn & (1 << 21))
2479                 gen_op_iwmmxt_packsw_M0_wRn(rd1);
2480             else
2481                 gen_op_iwmmxt_packuw_M0_wRn(rd1);
2482             break;
2483         case 2:
2484             if (insn & (1 << 21))
2485                 gen_op_iwmmxt_packsl_M0_wRn(rd1);
2486             else
2487                 gen_op_iwmmxt_packul_M0_wRn(rd1);
2488             break;
2489         case 3:
2490             if (insn & (1 << 21))
2491                 gen_op_iwmmxt_packsq_M0_wRn(rd1);
2492             else
2493                 gen_op_iwmmxt_packuq_M0_wRn(rd1);
2494             break;
2495         }
2496         gen_op_iwmmxt_movq_wRn_M0(wrd);
2497         gen_op_iwmmxt_set_mup();
2498         gen_op_iwmmxt_set_cup();
2499         break;
2500     case 0x201: case 0x203: case 0x205: case 0x207:
2501     case 0x209: case 0x20b: case 0x20d: case 0x20f:
2502     case 0x211: case 0x213: case 0x215: case 0x217:
2503     case 0x219: case 0x21b: case 0x21d: case 0x21f:
2504         wrd = (insn >> 5) & 0xf;
2505         rd0 = (insn >> 12) & 0xf;
2506         rd1 = (insn >> 0) & 0xf;
2507         if (rd0 == 0xf || rd1 == 0xf)
2508             return 1;
2509         gen_op_iwmmxt_movq_M0_wRn(wrd);
2510         tmp = load_reg(s, rd0);
2511         tmp2 = load_reg(s, rd1);
2512         switch ((insn >> 16) & 0xf) {
2513         case 0x0:                                       /* TMIA */
2514             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2515             break;
2516         case 0x8:                                       /* TMIAPH */
2517             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2518             break;
2519         case 0xc: case 0xd: case 0xe: case 0xf:                 /* TMIAxy */
2520             if (insn & (1 << 16))
2521                 tcg_gen_shri_i32(tmp, tmp, 16);
2522             if (insn & (1 << 17))
2523                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2524             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2525             break;
2526         default:
2527             tcg_temp_free_i32(tmp2);
2528             tcg_temp_free_i32(tmp);
2529             return 1;
2530         }
2531         tcg_temp_free_i32(tmp2);
2532         tcg_temp_free_i32(tmp);
2533         gen_op_iwmmxt_movq_wRn_M0(wrd);
2534         gen_op_iwmmxt_set_mup();
2535         break;
2536     default:
2537         return 1;
2538     }
2539
2540     return 0;
2541 }
2542
2543 /* Disassemble an XScale DSP instruction.  Returns nonzero if an error occurred
2544    (ie. an undefined instruction).  */
2545 static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2546 {
2547     int acc, rd0, rd1, rdhi, rdlo;
2548     TCGv_i32 tmp, tmp2;
2549
2550     if ((insn & 0x0ff00f10) == 0x0e200010) {
2551         /* Multiply with Internal Accumulate Format */
2552         rd0 = (insn >> 12) & 0xf;
2553         rd1 = insn & 0xf;
2554         acc = (insn >> 5) & 7;
2555
2556         if (acc != 0)
2557             return 1;
2558
2559         tmp = load_reg(s, rd0);
2560         tmp2 = load_reg(s, rd1);
2561         switch ((insn >> 16) & 0xf) {
2562         case 0x0:                                       /* MIA */
2563             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2564             break;
2565         case 0x8:                                       /* MIAPH */
2566             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2567             break;
2568         case 0xc:                                       /* MIABB */
2569         case 0xd:                                       /* MIABT */
2570         case 0xe:                                       /* MIATB */
2571         case 0xf:                                       /* MIATT */
2572             if (insn & (1 << 16))
2573                 tcg_gen_shri_i32(tmp, tmp, 16);
2574             if (insn & (1 << 17))
2575                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2576             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2577             break;
2578         default:
2579             return 1;
2580         }
2581         tcg_temp_free_i32(tmp2);
2582         tcg_temp_free_i32(tmp);
2583
2584         gen_op_iwmmxt_movq_wRn_M0(acc);
2585         return 0;
2586     }
2587
2588     if ((insn & 0x0fe00ff8) == 0x0c400000) {
2589         /* Internal Accumulator Access Format */
2590         rdhi = (insn >> 16) & 0xf;
2591         rdlo = (insn >> 12) & 0xf;
2592         acc = insn & 7;
2593
2594         if (acc != 0)
2595             return 1;
2596
2597         if (insn & ARM_CP_RW_BIT) {                     /* MRA */
2598             iwmmxt_load_reg(cpu_V0, acc);
2599             tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2600             tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
2601             tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2602         } else {                                        /* MAR */
2603             tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2604             iwmmxt_store_reg(cpu_V0, acc);
2605         }
2606         return 0;
2607     }
2608
2609     return 1;
2610 }
2611
2612 #define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n))
2613 #define VFP_DREG(reg, insn, bigbit, smallbit) do { \
2614     if (dc_isar_feature(aa32_simd_r32, s)) { \
2615         reg = (((insn) >> (bigbit)) & 0x0f) \
2616               | (((insn) >> ((smallbit) - 4)) & 0x10); \
2617     } else { \
2618         if (insn & (1 << (smallbit))) \
2619             return 1; \
2620         reg = ((insn) >> (bigbit)) & 0x0f; \
2621     }} while (0)
2622
2623 #define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22)
2624 #define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16,  7)
2625 #define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn,  0,  5)
2626
2627 static void gen_neon_dup_low16(TCGv_i32 var)
2628 {
2629     TCGv_i32 tmp = tcg_temp_new_i32();
2630     tcg_gen_ext16u_i32(var, var);
2631     tcg_gen_shli_i32(tmp, var, 16);
2632     tcg_gen_or_i32(var, var, tmp);
2633     tcg_temp_free_i32(tmp);
2634 }
2635
2636 static void gen_neon_dup_high16(TCGv_i32 var)
2637 {
2638     TCGv_i32 tmp = tcg_temp_new_i32();
2639     tcg_gen_andi_i32(var, var, 0xffff0000);
2640     tcg_gen_shri_i32(tmp, var, 16);
2641     tcg_gen_or_i32(var, var, tmp);
2642     tcg_temp_free_i32(tmp);
2643 }
2644
2645 static inline bool use_goto_tb(DisasContext *s, target_ulong dest)
2646 {
2647 #ifndef CONFIG_USER_ONLY
2648     return (s->base.tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK) ||
2649            ((s->base.pc_next - 1) & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
2650 #else
2651     return true;
2652 #endif
2653 }
2654
2655 static void gen_goto_ptr(void)
2656 {
2657     tcg_gen_lookup_and_goto_ptr();
2658 }
2659
2660 /* This will end the TB but doesn't guarantee we'll return to
2661  * cpu_loop_exec. Any live exit_requests will be processed as we
2662  * enter the next TB.
2663  */
2664 static void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
2665 {
2666     if (use_goto_tb(s, dest)) {
2667         tcg_gen_goto_tb(n);
2668         gen_set_pc_im(s, dest);
2669         tcg_gen_exit_tb(s->base.tb, n);
2670     } else {
2671         gen_set_pc_im(s, dest);
2672         gen_goto_ptr();
2673     }
2674     s->base.is_jmp = DISAS_NORETURN;
2675 }
2676
2677 static inline void gen_jmp (DisasContext *s, uint32_t dest)
2678 {
2679     if (unlikely(is_singlestepping(s))) {
2680         /* An indirect jump so that we still trigger the debug exception.  */
2681         gen_set_pc_im(s, dest);
2682         s->base.is_jmp = DISAS_JUMP;
2683     } else {
2684         gen_goto_tb(s, 0, dest);
2685     }
2686 }
2687
2688 static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
2689 {
2690     if (x)
2691         tcg_gen_sari_i32(t0, t0, 16);
2692     else
2693         gen_sxth(t0);
2694     if (y)
2695         tcg_gen_sari_i32(t1, t1, 16);
2696     else
2697         gen_sxth(t1);
2698     tcg_gen_mul_i32(t0, t0, t1);
2699 }
2700
2701 /* Return the mask of PSR bits set by a MSR instruction.  */
2702 static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
2703 {
2704     uint32_t mask = 0;
2705
2706     if (flags & (1 << 0)) {
2707         mask |= 0xff;
2708     }
2709     if (flags & (1 << 1)) {
2710         mask |= 0xff00;
2711     }
2712     if (flags & (1 << 2)) {
2713         mask |= 0xff0000;
2714     }
2715     if (flags & (1 << 3)) {
2716         mask |= 0xff000000;
2717     }
2718
2719     /* Mask out undefined and reserved bits.  */
2720     mask &= aarch32_cpsr_valid_mask(s->features, s->isar);
2721
2722     /* Mask out execution state.  */
2723     if (!spsr) {
2724         mask &= ~CPSR_EXEC;
2725     }
2726
2727     /* Mask out privileged bits.  */
2728     if (IS_USER(s)) {
2729         mask &= CPSR_USER;
2730     }
2731     return mask;
2732 }
2733
2734 /* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
2735 static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
2736 {
2737     TCGv_i32 tmp;
2738     if (spsr) {
2739         /* ??? This is also undefined in system mode.  */
2740         if (IS_USER(s))
2741             return 1;
2742
2743         tmp = load_cpu_field(spsr);
2744         tcg_gen_andi_i32(tmp, tmp, ~mask);
2745         tcg_gen_andi_i32(t0, t0, mask);
2746         tcg_gen_or_i32(tmp, tmp, t0);
2747         store_cpu_field(tmp, spsr);
2748     } else {
2749         gen_set_cpsr(t0, mask);
2750     }
2751     tcg_temp_free_i32(t0);
2752     gen_lookup_tb(s);
2753     return 0;
2754 }
2755
2756 /* Returns nonzero if access to the PSR is not permitted.  */
2757 static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
2758 {
2759     TCGv_i32 tmp;
2760     tmp = tcg_temp_new_i32();
2761     tcg_gen_movi_i32(tmp, val);
2762     return gen_set_psr(s, mask, spsr, tmp);
2763 }
2764
2765 static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
2766                                      int *tgtmode, int *regno)
2767 {
2768     /* Decode the r and sysm fields of MSR/MRS banked accesses into
2769      * the target mode and register number, and identify the various
2770      * unpredictable cases.
2771      * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
2772      *  + executed in user mode
2773      *  + using R15 as the src/dest register
2774      *  + accessing an unimplemented register
2775      *  + accessing a register that's inaccessible at current PL/security state*
2776      *  + accessing a register that you could access with a different insn
2777      * We choose to UNDEF in all these cases.
2778      * Since we don't know which of the various AArch32 modes we are in
2779      * we have to defer some checks to runtime.
2780      * Accesses to Monitor mode registers from Secure EL1 (which implies
2781      * that EL3 is AArch64) must trap to EL3.
2782      *
2783      * If the access checks fail this function will emit code to take
2784      * an exception and return false. Otherwise it will return true,
2785      * and set *tgtmode and *regno appropriately.
2786      */
2787     int exc_target = default_exception_el(s);
2788
2789     /* These instructions are present only in ARMv8, or in ARMv7 with the
2790      * Virtualization Extensions.
2791      */
2792     if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
2793         !arm_dc_feature(s, ARM_FEATURE_EL2)) {
2794         goto undef;
2795     }
2796
2797     if (IS_USER(s) || rn == 15) {
2798         goto undef;
2799     }
2800
2801     /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
2802      * of registers into (r, sysm).
2803      */
2804     if (r) {
2805         /* SPSRs for other modes */
2806         switch (sysm) {
2807         case 0xe: /* SPSR_fiq */
2808             *tgtmode = ARM_CPU_MODE_FIQ;
2809             break;
2810         case 0x10: /* SPSR_irq */
2811             *tgtmode = ARM_CPU_MODE_IRQ;
2812             break;
2813         case 0x12: /* SPSR_svc */
2814             *tgtmode = ARM_CPU_MODE_SVC;
2815             break;
2816         case 0x14: /* SPSR_abt */
2817             *tgtmode = ARM_CPU_MODE_ABT;
2818             break;
2819         case 0x16: /* SPSR_und */
2820             *tgtmode = ARM_CPU_MODE_UND;
2821             break;
2822         case 0x1c: /* SPSR_mon */
2823             *tgtmode = ARM_CPU_MODE_MON;
2824             break;
2825         case 0x1e: /* SPSR_hyp */
2826             *tgtmode = ARM_CPU_MODE_HYP;
2827             break;
2828         default: /* unallocated */
2829             goto undef;
2830         }
2831         /* We arbitrarily assign SPSR a register number of 16. */
2832         *regno = 16;
2833     } else {
2834         /* general purpose registers for other modes */
2835         switch (sysm) {
2836         case 0x0 ... 0x6:   /* 0b00xxx : r8_usr ... r14_usr */
2837             *tgtmode = ARM_CPU_MODE_USR;
2838             *regno = sysm + 8;
2839             break;
2840         case 0x8 ... 0xe:   /* 0b01xxx : r8_fiq ... r14_fiq */
2841             *tgtmode = ARM_CPU_MODE_FIQ;
2842             *regno = sysm;
2843             break;
2844         case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
2845             *tgtmode = ARM_CPU_MODE_IRQ;
2846             *regno = sysm & 1 ? 13 : 14;
2847             break;
2848         case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
2849             *tgtmode = ARM_CPU_MODE_SVC;
2850             *regno = sysm & 1 ? 13 : 14;
2851             break;
2852         case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
2853             *tgtmode = ARM_CPU_MODE_ABT;
2854             *regno = sysm & 1 ? 13 : 14;
2855             break;
2856         case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
2857             *tgtmode = ARM_CPU_MODE_UND;
2858             *regno = sysm & 1 ? 13 : 14;
2859             break;
2860         case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
2861             *tgtmode = ARM_CPU_MODE_MON;
2862             *regno = sysm & 1 ? 13 : 14;
2863             break;
2864         case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
2865             *tgtmode = ARM_CPU_MODE_HYP;
2866             /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
2867             *regno = sysm & 1 ? 13 : 17;
2868             break;
2869         default: /* unallocated */
2870             goto undef;
2871         }
2872     }
2873
2874     /* Catch the 'accessing inaccessible register' cases we can detect
2875      * at translate time.
2876      */
2877     switch (*tgtmode) {
2878     case ARM_CPU_MODE_MON:
2879         if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
2880             goto undef;
2881         }
2882         if (s->current_el == 1) {
2883             /* If we're in Secure EL1 (which implies that EL3 is AArch64)
2884              * then accesses to Mon registers trap to EL3
2885              */
2886             exc_target = 3;
2887             goto undef;
2888         }
2889         break;
2890     case ARM_CPU_MODE_HYP:
2891         /*
2892          * SPSR_hyp and r13_hyp can only be accessed from Monitor mode
2893          * (and so we can forbid accesses from EL2 or below). elr_hyp
2894          * can be accessed also from Hyp mode, so forbid accesses from
2895          * EL0 or EL1.
2896          */
2897         if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
2898             (s->current_el < 3 && *regno != 17)) {
2899             goto undef;
2900         }
2901         break;
2902     default:
2903         break;
2904     }
2905
2906     return true;
2907
2908 undef:
2909     /* If we get here then some access check did not pass */
2910     gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
2911                        syn_uncategorized(), exc_target);
2912     return false;
2913 }
2914
2915 static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
2916 {
2917     TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
2918     int tgtmode = 0, regno = 0;
2919
2920     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2921         return;
2922     }
2923
2924     /* Sync state because msr_banked() can raise exceptions */
2925     gen_set_condexec(s);
2926     gen_set_pc_im(s, s->pc_curr);
2927     tcg_reg = load_reg(s, rn);
2928     tcg_tgtmode = tcg_const_i32(tgtmode);
2929     tcg_regno = tcg_const_i32(regno);
2930     gen_helper_msr_banked(cpu_env, tcg_reg, tcg_tgtmode, tcg_regno);
2931     tcg_temp_free_i32(tcg_tgtmode);
2932     tcg_temp_free_i32(tcg_regno);
2933     tcg_temp_free_i32(tcg_reg);
2934     s->base.is_jmp = DISAS_UPDATE;
2935 }
2936
2937 static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
2938 {
2939     TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
2940     int tgtmode = 0, regno = 0;
2941
2942     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2943         return;
2944     }
2945
2946     /* Sync state because mrs_banked() can raise exceptions */
2947     gen_set_condexec(s);
2948     gen_set_pc_im(s, s->pc_curr);
2949     tcg_reg = tcg_temp_new_i32();
2950     tcg_tgtmode = tcg_const_i32(tgtmode);
2951     tcg_regno = tcg_const_i32(regno);
2952     gen_helper_mrs_banked(tcg_reg, cpu_env, tcg_tgtmode, tcg_regno);
2953     tcg_temp_free_i32(tcg_tgtmode);
2954     tcg_temp_free_i32(tcg_regno);
2955     store_reg(s, rn, tcg_reg);
2956     s->base.is_jmp = DISAS_UPDATE;
2957 }
2958
2959 /* Store value to PC as for an exception return (ie don't
2960  * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
2961  * will do the masking based on the new value of the Thumb bit.
2962  */
2963 static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
2964 {
2965     tcg_gen_mov_i32(cpu_R[15], pc);
2966     tcg_temp_free_i32(pc);
2967 }
2968
2969 /* Generate a v6 exception return.  Marks both values as dead.  */
2970 static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
2971 {
2972     store_pc_exc_ret(s, pc);
2973     /* The cpsr_write_eret helper will mask the low bits of PC
2974      * appropriately depending on the new Thumb bit, so it must
2975      * be called after storing the new PC.
2976      */
2977     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
2978         gen_io_start();
2979     }
2980     gen_helper_cpsr_write_eret(cpu_env, cpsr);
2981     tcg_temp_free_i32(cpsr);
2982     /* Must exit loop to check un-masked IRQs */
2983     s->base.is_jmp = DISAS_EXIT;
2984 }
2985
2986 /* Generate an old-style exception return. Marks pc as dead. */
2987 static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
2988 {
2989     gen_rfe(s, pc, load_cpu_field(spsr));
2990 }
2991
2992 #define CPU_V001 cpu_V0, cpu_V0, cpu_V1
2993
2994 static inline void gen_neon_add(int size, TCGv_i32 t0, TCGv_i32 t1)
2995 {
2996     switch (size) {
2997     case 0: gen_helper_neon_add_u8(t0, t0, t1); break;
2998     case 1: gen_helper_neon_add_u16(t0, t0, t1); break;
2999     case 2: tcg_gen_add_i32(t0, t0, t1); break;
3000     default: abort();
3001     }
3002 }
3003
3004 static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1)
3005 {
3006     switch (size) {
3007     case 0: gen_helper_neon_sub_u8(t0, t1, t0); break;
3008     case 1: gen_helper_neon_sub_u16(t0, t1, t0); break;
3009     case 2: tcg_gen_sub_i32(t0, t1, t0); break;
3010     default: return;
3011     }
3012 }
3013
3014 #define GEN_NEON_INTEGER_OP_ENV(name) do { \
3015     switch ((size << 1) | u) { \
3016     case 0: \
3017         gen_helper_neon_##name##_s8(tmp, cpu_env, tmp, tmp2); \
3018         break; \
3019     case 1: \
3020         gen_helper_neon_##name##_u8(tmp, cpu_env, tmp, tmp2); \
3021         break; \
3022     case 2: \
3023         gen_helper_neon_##name##_s16(tmp, cpu_env, tmp, tmp2); \
3024         break; \
3025     case 3: \
3026         gen_helper_neon_##name##_u16(tmp, cpu_env, tmp, tmp2); \
3027         break; \
3028     case 4: \
3029         gen_helper_neon_##name##_s32(tmp, cpu_env, tmp, tmp2); \
3030         break; \
3031     case 5: \
3032         gen_helper_neon_##name##_u32(tmp, cpu_env, tmp, tmp2); \
3033         break; \
3034     default: return 1; \
3035     }} while (0)
3036
3037 #define GEN_NEON_INTEGER_OP(name) do { \
3038     switch ((size << 1) | u) { \
3039     case 0: \
3040         gen_helper_neon_##name##_s8(tmp, tmp, tmp2); \
3041         break; \
3042     case 1: \
3043         gen_helper_neon_##name##_u8(tmp, tmp, tmp2); \
3044         break; \
3045     case 2: \
3046         gen_helper_neon_##name##_s16(tmp, tmp, tmp2); \
3047         break; \
3048     case 3: \
3049         gen_helper_neon_##name##_u16(tmp, tmp, tmp2); \
3050         break; \
3051     case 4: \
3052         gen_helper_neon_##name##_s32(tmp, tmp, tmp2); \
3053         break; \
3054     case 5: \
3055         gen_helper_neon_##name##_u32(tmp, tmp, tmp2); \
3056         break; \
3057     default: return 1; \
3058     }} while (0)
3059
3060 static TCGv_i32 neon_load_scratch(int scratch)
3061 {
3062     TCGv_i32 tmp = tcg_temp_new_i32();
3063     tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
3064     return tmp;
3065 }
3066
3067 static void neon_store_scratch(int scratch, TCGv_i32 var)
3068 {
3069     tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
3070     tcg_temp_free_i32(var);
3071 }
3072
3073 static inline TCGv_i32 neon_get_scalar(int size, int reg)
3074 {
3075     TCGv_i32 tmp;
3076     if (size == 1) {
3077         tmp = neon_load_reg(reg & 7, reg >> 4);
3078         if (reg & 8) {
3079             gen_neon_dup_high16(tmp);
3080         } else {
3081             gen_neon_dup_low16(tmp);
3082         }
3083     } else {
3084         tmp = neon_load_reg(reg & 15, reg >> 4);
3085     }
3086     return tmp;
3087 }
3088
3089 static int gen_neon_unzip(int rd, int rm, int size, int q)
3090 {
3091     TCGv_ptr pd, pm;
3092
3093     if (!q && size == 2) {
3094         return 1;
3095     }
3096     pd = vfp_reg_ptr(true, rd);
3097     pm = vfp_reg_ptr(true, rm);
3098     if (q) {
3099         switch (size) {
3100         case 0:
3101             gen_helper_neon_qunzip8(pd, pm);
3102             break;
3103         case 1:
3104             gen_helper_neon_qunzip16(pd, pm);
3105             break;
3106         case 2:
3107             gen_helper_neon_qunzip32(pd, pm);
3108             break;
3109         default:
3110             abort();
3111         }
3112     } else {
3113         switch (size) {
3114         case 0:
3115             gen_helper_neon_unzip8(pd, pm);
3116             break;
3117         case 1:
3118             gen_helper_neon_unzip16(pd, pm);
3119             break;
3120         default:
3121             abort();
3122         }
3123     }
3124     tcg_temp_free_ptr(pd);
3125     tcg_temp_free_ptr(pm);
3126     return 0;
3127 }
3128
3129 static int gen_neon_zip(int rd, int rm, int size, int q)
3130 {
3131     TCGv_ptr pd, pm;
3132
3133     if (!q && size == 2) {
3134         return 1;
3135     }
3136     pd = vfp_reg_ptr(true, rd);
3137     pm = vfp_reg_ptr(true, rm);
3138     if (q) {
3139         switch (size) {
3140         case 0:
3141             gen_helper_neon_qzip8(pd, pm);
3142             break;
3143         case 1:
3144             gen_helper_neon_qzip16(pd, pm);
3145             break;
3146         case 2:
3147             gen_helper_neon_qzip32(pd, pm);
3148             break;
3149         default:
3150             abort();
3151         }
3152     } else {
3153         switch (size) {
3154         case 0:
3155             gen_helper_neon_zip8(pd, pm);
3156             break;
3157         case 1:
3158             gen_helper_neon_zip16(pd, pm);
3159             break;
3160         default:
3161             abort();
3162         }
3163     }
3164     tcg_temp_free_ptr(pd);
3165     tcg_temp_free_ptr(pm);
3166     return 0;
3167 }
3168
3169 static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
3170 {
3171     TCGv_i32 rd, tmp;
3172
3173     rd = tcg_temp_new_i32();
3174     tmp = tcg_temp_new_i32();
3175
3176     tcg_gen_shli_i32(rd, t0, 8);
3177     tcg_gen_andi_i32(rd, rd, 0xff00ff00);
3178     tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
3179     tcg_gen_or_i32(rd, rd, tmp);
3180
3181     tcg_gen_shri_i32(t1, t1, 8);
3182     tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
3183     tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
3184     tcg_gen_or_i32(t1, t1, tmp);
3185     tcg_gen_mov_i32(t0, rd);
3186
3187     tcg_temp_free_i32(tmp);
3188     tcg_temp_free_i32(rd);
3189 }
3190
3191 static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
3192 {
3193     TCGv_i32 rd, tmp;
3194
3195     rd = tcg_temp_new_i32();
3196     tmp = tcg_temp_new_i32();
3197
3198     tcg_gen_shli_i32(rd, t0, 16);
3199     tcg_gen_andi_i32(tmp, t1, 0xffff);
3200     tcg_gen_or_i32(rd, rd, tmp);
3201     tcg_gen_shri_i32(t1, t1, 16);
3202     tcg_gen_andi_i32(tmp, t0, 0xffff0000);
3203     tcg_gen_or_i32(t1, t1, tmp);
3204     tcg_gen_mov_i32(t0, rd);
3205
3206     tcg_temp_free_i32(tmp);
3207     tcg_temp_free_i32(rd);
3208 }
3209
3210 static inline void gen_neon_narrow(int size, TCGv_i32 dest, TCGv_i64 src)
3211 {
3212     switch (size) {
3213     case 0: gen_helper_neon_narrow_u8(dest, src); break;
3214     case 1: gen_helper_neon_narrow_u16(dest, src); break;
3215     case 2: tcg_gen_extrl_i64_i32(dest, src); break;
3216     default: abort();
3217     }
3218 }
3219
3220 static inline void gen_neon_narrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
3221 {
3222     switch (size) {
3223     case 0: gen_helper_neon_narrow_sat_s8(dest, cpu_env, src); break;
3224     case 1: gen_helper_neon_narrow_sat_s16(dest, cpu_env, src); break;
3225     case 2: gen_helper_neon_narrow_sat_s32(dest, cpu_env, src); break;
3226     default: abort();
3227     }
3228 }
3229
3230 static inline void gen_neon_narrow_satu(int size, TCGv_i32 dest, TCGv_i64 src)
3231 {
3232     switch (size) {
3233     case 0: gen_helper_neon_narrow_sat_u8(dest, cpu_env, src); break;
3234     case 1: gen_helper_neon_narrow_sat_u16(dest, cpu_env, src); break;
3235     case 2: gen_helper_neon_narrow_sat_u32(dest, cpu_env, src); break;
3236     default: abort();
3237     }
3238 }
3239
3240 static inline void gen_neon_unarrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
3241 {
3242     switch (size) {
3243     case 0: gen_helper_neon_unarrow_sat8(dest, cpu_env, src); break;
3244     case 1: gen_helper_neon_unarrow_sat16(dest, cpu_env, src); break;
3245     case 2: gen_helper_neon_unarrow_sat32(dest, cpu_env, src); break;
3246     default: abort();
3247     }
3248 }
3249
3250 static inline void gen_neon_shift_narrow(int size, TCGv_i32 var, TCGv_i32 shift,
3251                                          int q, int u)
3252 {
3253     if (q) {
3254         if (u) {
3255             switch (size) {
3256             case 1: gen_helper_neon_rshl_u16(var, var, shift); break;
3257             case 2: gen_helper_neon_rshl_u32(var, var, shift); break;
3258             default: abort();
3259             }
3260         } else {
3261             switch (size) {
3262             case 1: gen_helper_neon_rshl_s16(var, var, shift); break;
3263             case 2: gen_helper_neon_rshl_s32(var, var, shift); break;
3264             default: abort();
3265             }
3266         }
3267     } else {
3268         if (u) {
3269             switch (size) {
3270             case 1: gen_helper_neon_shl_u16(var, var, shift); break;
3271             case 2: gen_ushl_i32(var, var, shift); break;
3272             default: abort();
3273             }
3274         } else {
3275             switch (size) {
3276             case 1: gen_helper_neon_shl_s16(var, var, shift); break;
3277             case 2: gen_sshl_i32(var, var, shift); break;
3278             default: abort();
3279             }
3280         }
3281     }
3282 }
3283
3284 static inline void gen_neon_widen(TCGv_i64 dest, TCGv_i32 src, int size, int u)
3285 {
3286     if (u) {
3287         switch (size) {
3288         case 0: gen_helper_neon_widen_u8(dest, src); break;
3289         case 1: gen_helper_neon_widen_u16(dest, src); break;
3290         case 2: tcg_gen_extu_i32_i64(dest, src); break;
3291         default: abort();
3292         }
3293     } else {
3294         switch (size) {
3295         case 0: gen_helper_neon_widen_s8(dest, src); break;
3296         case 1: gen_helper_neon_widen_s16(dest, src); break;
3297         case 2: tcg_gen_ext_i32_i64(dest, src); break;
3298         default: abort();
3299         }
3300     }
3301     tcg_temp_free_i32(src);
3302 }
3303
3304 static inline void gen_neon_addl(int size)
3305 {
3306     switch (size) {
3307     case 0: gen_helper_neon_addl_u16(CPU_V001); break;
3308     case 1: gen_helper_neon_addl_u32(CPU_V001); break;
3309     case 2: tcg_gen_add_i64(CPU_V001); break;
3310     default: abort();
3311     }
3312 }
3313
3314 static inline void gen_neon_subl(int size)
3315 {
3316     switch (size) {
3317     case 0: gen_helper_neon_subl_u16(CPU_V001); break;
3318     case 1: gen_helper_neon_subl_u32(CPU_V001); break;
3319     case 2: tcg_gen_sub_i64(CPU_V001); break;
3320     default: abort();
3321     }
3322 }
3323
3324 static inline void gen_neon_negl(TCGv_i64 var, int size)
3325 {
3326     switch (size) {
3327     case 0: gen_helper_neon_negl_u16(var, var); break;
3328     case 1: gen_helper_neon_negl_u32(var, var); break;
3329     case 2:
3330         tcg_gen_neg_i64(var, var);
3331         break;
3332     default: abort();
3333     }
3334 }
3335
3336 static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size)
3337 {
3338     switch (size) {
3339     case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break;
3340     case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break;
3341     default: abort();
3342     }
3343 }
3344
3345 static inline void gen_neon_mull(TCGv_i64 dest, TCGv_i32 a, TCGv_i32 b,
3346                                  int size, int u)
3347 {
3348     TCGv_i64 tmp;
3349
3350     switch ((size << 1) | u) {
3351     case 0: gen_helper_neon_mull_s8(dest, a, b); break;
3352     case 1: gen_helper_neon_mull_u8(dest, a, b); break;
3353     case 2: gen_helper_neon_mull_s16(dest, a, b); break;
3354     case 3: gen_helper_neon_mull_u16(dest, a, b); break;
3355     case 4:
3356         tmp = gen_muls_i64_i32(a, b);
3357         tcg_gen_mov_i64(dest, tmp);
3358         tcg_temp_free_i64(tmp);
3359         break;
3360     case 5:
3361         tmp = gen_mulu_i64_i32(a, b);
3362         tcg_gen_mov_i64(dest, tmp);
3363         tcg_temp_free_i64(tmp);
3364         break;
3365     default: abort();
3366     }
3367
3368     /* gen_helper_neon_mull_[su]{8|16} do not free their parameters.
3369        Don't forget to clean them now.  */
3370     if (size < 2) {
3371         tcg_temp_free_i32(a);
3372         tcg_temp_free_i32(b);
3373     }
3374 }
3375
3376 static void gen_neon_narrow_op(int op, int u, int size,
3377                                TCGv_i32 dest, TCGv_i64 src)
3378 {
3379     if (op) {
3380         if (u) {
3381             gen_neon_unarrow_sats(size, dest, src);
3382         } else {
3383             gen_neon_narrow(size, dest, src);
3384         }
3385     } else {
3386         if (u) {
3387             gen_neon_narrow_satu(size, dest, src);
3388         } else {
3389             gen_neon_narrow_sats(size, dest, src);
3390         }
3391     }
3392 }
3393
3394 /* Symbolic constants for op fields for Neon 3-register same-length.
3395  * The values correspond to bits [11:8,4]; see the ARM ARM DDI0406B
3396  * table A7-9.
3397  */
3398 #define NEON_3R_VHADD 0
3399 #define NEON_3R_VQADD 1
3400 #define NEON_3R_VRHADD 2
3401 #define NEON_3R_LOGIC 3 /* VAND,VBIC,VORR,VMOV,VORN,VEOR,VBIF,VBIT,VBSL */
3402 #define NEON_3R_VHSUB 4
3403 #define NEON_3R_VQSUB 5
3404 #define NEON_3R_VCGT 6
3405 #define NEON_3R_VCGE 7
3406 #define NEON_3R_VSHL 8
3407 #define NEON_3R_VQSHL 9
3408 #define NEON_3R_VRSHL 10
3409 #define NEON_3R_VQRSHL 11
3410 #define NEON_3R_VMAX 12
3411 #define NEON_3R_VMIN 13
3412 #define NEON_3R_VABD 14
3413 #define NEON_3R_VABA 15
3414 #define NEON_3R_VADD_VSUB 16
3415 #define NEON_3R_VTST_VCEQ 17
3416 #define NEON_3R_VML 18 /* VMLA, VMLS */
3417 #define NEON_3R_VMUL 19
3418 #define NEON_3R_VPMAX 20
3419 #define NEON_3R_VPMIN 21
3420 #define NEON_3R_VQDMULH_VQRDMULH 22
3421 #define NEON_3R_VPADD_VQRDMLAH 23
3422 #define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */
3423 #define NEON_3R_VFM_VQRDMLSH 25 /* VFMA, VFMS, VQRDMLSH */
3424 #define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
3425 #define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
3426 #define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */
3427 #define NEON_3R_FLOAT_ACMP 29 /* float VACGE, VACGT, VACLE, VACLT */
3428 #define NEON_3R_FLOAT_MINMAX 30 /* float VMIN, VMAX */
3429 #define NEON_3R_FLOAT_MISC 31 /* float VRECPS, VRSQRTS, VMAXNM/MINNM */
3430
3431 static const uint8_t neon_3r_sizes[] = {
3432     [NEON_3R_VHADD] = 0x7,
3433     [NEON_3R_VQADD] = 0xf,
3434     [NEON_3R_VRHADD] = 0x7,
3435     [NEON_3R_LOGIC] = 0xf, /* size field encodes op type */
3436     [NEON_3R_VHSUB] = 0x7,
3437     [NEON_3R_VQSUB] = 0xf,
3438     [NEON_3R_VCGT] = 0x7,
3439     [NEON_3R_VCGE] = 0x7,
3440     [NEON_3R_VSHL] = 0xf,
3441     [NEON_3R_VQSHL] = 0xf,
3442     [NEON_3R_VRSHL] = 0xf,
3443     [NEON_3R_VQRSHL] = 0xf,
3444     [NEON_3R_VMAX] = 0x7,
3445     [NEON_3R_VMIN] = 0x7,
3446     [NEON_3R_VABD] = 0x7,
3447     [NEON_3R_VABA] = 0x7,
3448     [NEON_3R_VADD_VSUB] = 0xf,
3449     [NEON_3R_VTST_VCEQ] = 0x7,
3450     [NEON_3R_VML] = 0x7,
3451     [NEON_3R_VMUL] = 0x7,
3452     [NEON_3R_VPMAX] = 0x7,
3453     [NEON_3R_VPMIN] = 0x7,
3454     [NEON_3R_VQDMULH_VQRDMULH] = 0x6,
3455     [NEON_3R_VPADD_VQRDMLAH] = 0x7,
3456     [NEON_3R_SHA] = 0xf, /* size field encodes op type */
3457     [NEON_3R_VFM_VQRDMLSH] = 0x7, /* For VFM, size bit 1 encodes op */
3458     [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
3459     [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
3460     [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */
3461     [NEON_3R_FLOAT_ACMP] = 0x5, /* size bit 1 encodes op */
3462     [NEON_3R_FLOAT_MINMAX] = 0x5, /* size bit 1 encodes op */
3463     [NEON_3R_FLOAT_MISC] = 0x5, /* size bit 1 encodes op */
3464 };
3465
3466 /* Symbolic constants for op fields for Neon 2-register miscellaneous.
3467  * The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B
3468  * table A7-13.
3469  */
3470 #define NEON_2RM_VREV64 0
3471 #define NEON_2RM_VREV32 1
3472 #define NEON_2RM_VREV16 2
3473 #define NEON_2RM_VPADDL 4
3474 #define NEON_2RM_VPADDL_U 5
3475 #define NEON_2RM_AESE 6 /* Includes AESD */
3476 #define NEON_2RM_AESMC 7 /* Includes AESIMC */
3477 #define NEON_2RM_VCLS 8
3478 #define NEON_2RM_VCLZ 9
3479 #define NEON_2RM_VCNT 10
3480 #define NEON_2RM_VMVN 11
3481 #define NEON_2RM_VPADAL 12
3482 #define NEON_2RM_VPADAL_U 13
3483 #define NEON_2RM_VQABS 14
3484 #define NEON_2RM_VQNEG 15
3485 #define NEON_2RM_VCGT0 16
3486 #define NEON_2RM_VCGE0 17
3487 #define NEON_2RM_VCEQ0 18
3488 #define NEON_2RM_VCLE0 19
3489 #define NEON_2RM_VCLT0 20
3490 #define NEON_2RM_SHA1H 21
3491 #define NEON_2RM_VABS 22
3492 #define NEON_2RM_VNEG 23
3493 #define NEON_2RM_VCGT0_F 24
3494 #define NEON_2RM_VCGE0_F 25
3495 #define NEON_2RM_VCEQ0_F 26
3496 #define NEON_2RM_VCLE0_F 27
3497 #define NEON_2RM_VCLT0_F 28
3498 #define NEON_2RM_VABS_F 30
3499 #define NEON_2RM_VNEG_F 31
3500 #define NEON_2RM_VSWP 32
3501 #define NEON_2RM_VTRN 33
3502 #define NEON_2RM_VUZP 34
3503 #define NEON_2RM_VZIP 35
3504 #define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */
3505 #define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */
3506 #define NEON_2RM_VSHLL 38
3507 #define NEON_2RM_SHA1SU1 39 /* Includes SHA256SU0 */
3508 #define NEON_2RM_VRINTN 40
3509 #define NEON_2RM_VRINTX 41
3510 #define NEON_2RM_VRINTA 42
3511 #define NEON_2RM_VRINTZ 43
3512 #define NEON_2RM_VCVT_F16_F32 44
3513 #define NEON_2RM_VRINTM 45
3514 #define NEON_2RM_VCVT_F32_F16 46
3515 #define NEON_2RM_VRINTP 47
3516 #define NEON_2RM_VCVTAU 48
3517 #define NEON_2RM_VCVTAS 49
3518 #define NEON_2RM_VCVTNU 50
3519 #define NEON_2RM_VCVTNS 51
3520 #define NEON_2RM_VCVTPU 52
3521 #define NEON_2RM_VCVTPS 53
3522 #define NEON_2RM_VCVTMU 54
3523 #define NEON_2RM_VCVTMS 55
3524 #define NEON_2RM_VRECPE 56
3525 #define NEON_2RM_VRSQRTE 57
3526 #define NEON_2RM_VRECPE_F 58
3527 #define NEON_2RM_VRSQRTE_F 59
3528 #define NEON_2RM_VCVT_FS 60
3529 #define NEON_2RM_VCVT_FU 61
3530 #define NEON_2RM_VCVT_SF 62
3531 #define NEON_2RM_VCVT_UF 63
3532
3533 static bool neon_2rm_is_v8_op(int op)
3534 {
3535     /* Return true if this neon 2reg-misc op is ARMv8 and up */
3536     switch (op) {
3537     case NEON_2RM_VRINTN:
3538     case NEON_2RM_VRINTA:
3539     case NEON_2RM_VRINTM:
3540     case NEON_2RM_VRINTP:
3541     case NEON_2RM_VRINTZ:
3542     case NEON_2RM_VRINTX:
3543     case NEON_2RM_VCVTAU:
3544     case NEON_2RM_VCVTAS:
3545     case NEON_2RM_VCVTNU:
3546     case NEON_2RM_VCVTNS:
3547     case NEON_2RM_VCVTPU:
3548     case NEON_2RM_VCVTPS:
3549     case NEON_2RM_VCVTMU:
3550     case NEON_2RM_VCVTMS:
3551         return true;
3552     default:
3553         return false;
3554     }
3555 }
3556
3557 /* Each entry in this array has bit n set if the insn allows
3558  * size value n (otherwise it will UNDEF). Since unallocated
3559  * op values will have no bits set they always UNDEF.
3560  */
3561 static const uint8_t neon_2rm_sizes[] = {
3562     [NEON_2RM_VREV64] = 0x7,
3563     [NEON_2RM_VREV32] = 0x3,
3564     [NEON_2RM_VREV16] = 0x1,
3565     [NEON_2RM_VPADDL] = 0x7,
3566     [NEON_2RM_VPADDL_U] = 0x7,
3567     [NEON_2RM_AESE] = 0x1,
3568     [NEON_2RM_AESMC] = 0x1,
3569     [NEON_2RM_VCLS] = 0x7,
3570     [NEON_2RM_VCLZ] = 0x7,
3571     [NEON_2RM_VCNT] = 0x1,
3572     [NEON_2RM_VMVN] = 0x1,
3573     [NEON_2RM_VPADAL] = 0x7,
3574     [NEON_2RM_VPADAL_U] = 0x7,
3575     [NEON_2RM_VQABS] = 0x7,
3576     [NEON_2RM_VQNEG] = 0x7,
3577     [NEON_2RM_VCGT0] = 0x7,
3578     [NEON_2RM_VCGE0] = 0x7,
3579     [NEON_2RM_VCEQ0] = 0x7,
3580     [NEON_2RM_VCLE0] = 0x7,
3581     [NEON_2RM_VCLT0] = 0x7,
3582     [NEON_2RM_SHA1H] = 0x4,
3583     [NEON_2RM_VABS] = 0x7,
3584     [NEON_2RM_VNEG] = 0x7,
3585     [NEON_2RM_VCGT0_F] = 0x4,
3586     [NEON_2RM_VCGE0_F] = 0x4,
3587     [NEON_2RM_VCEQ0_F] = 0x4,
3588     [NEON_2RM_VCLE0_F] = 0x4,
3589     [NEON_2RM_VCLT0_F] = 0x4,
3590     [NEON_2RM_VABS_F] = 0x4,
3591     [NEON_2RM_VNEG_F] = 0x4,
3592     [NEON_2RM_VSWP] = 0x1,
3593     [NEON_2RM_VTRN] = 0x7,
3594     [NEON_2RM_VUZP] = 0x7,
3595     [NEON_2RM_VZIP] = 0x7,
3596     [NEON_2RM_VMOVN] = 0x7,
3597     [NEON_2RM_VQMOVN] = 0x7,
3598     [NEON_2RM_VSHLL] = 0x7,
3599     [NEON_2RM_SHA1SU1] = 0x4,
3600     [NEON_2RM_VRINTN] = 0x4,
3601     [NEON_2RM_VRINTX] = 0x4,
3602     [NEON_2RM_VRINTA] = 0x4,
3603     [NEON_2RM_VRINTZ] = 0x4,
3604     [NEON_2RM_VCVT_F16_F32] = 0x2,
3605     [NEON_2RM_VRINTM] = 0x4,
3606     [NEON_2RM_VCVT_F32_F16] = 0x2,
3607     [NEON_2RM_VRINTP] = 0x4,
3608     [NEON_2RM_VCVTAU] = 0x4,
3609     [NEON_2RM_VCVTAS] = 0x4,
3610     [NEON_2RM_VCVTNU] = 0x4,
3611     [NEON_2RM_VCVTNS] = 0x4,
3612     [NEON_2RM_VCVTPU] = 0x4,
3613     [NEON_2RM_VCVTPS] = 0x4,
3614     [NEON_2RM_VCVTMU] = 0x4,
3615     [NEON_2RM_VCVTMS] = 0x4,
3616     [NEON_2RM_VRECPE] = 0x4,
3617     [NEON_2RM_VRSQRTE] = 0x4,
3618     [NEON_2RM_VRECPE_F] = 0x4,
3619     [NEON_2RM_VRSQRTE_F] = 0x4,
3620     [NEON_2RM_VCVT_FS] = 0x4,
3621     [NEON_2RM_VCVT_FU] = 0x4,
3622     [NEON_2RM_VCVT_SF] = 0x4,
3623     [NEON_2RM_VCVT_UF] = 0x4,
3624 };
3625
3626 static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
3627                             uint32_t opr_sz, uint32_t max_sz,
3628                             gen_helper_gvec_3_ptr *fn)
3629 {
3630     TCGv_ptr qc_ptr = tcg_temp_new_ptr();
3631
3632     tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
3633     tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
3634                        opr_sz, max_sz, 0, fn);
3635     tcg_temp_free_ptr(qc_ptr);
3636 }
3637
3638 void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3639                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3640 {
3641     static gen_helper_gvec_3_ptr * const fns[2] = {
3642         gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
3643     };
3644     tcg_debug_assert(vece >= 1 && vece <= 2);
3645     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
3646 }
3647
3648 void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3649                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3650 {
3651     static gen_helper_gvec_3_ptr * const fns[2] = {
3652         gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
3653     };
3654     tcg_debug_assert(vece >= 1 && vece <= 2);
3655     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
3656 }
3657
3658 #define GEN_CMP0(NAME, COND)                                            \
3659     static void gen_##NAME##0_i32(TCGv_i32 d, TCGv_i32 a)               \
3660     {                                                                   \
3661         tcg_gen_setcondi_i32(COND, d, a, 0);                            \
3662         tcg_gen_neg_i32(d, d);                                          \
3663     }                                                                   \
3664     static void gen_##NAME##0_i64(TCGv_i64 d, TCGv_i64 a)               \
3665     {                                                                   \
3666         tcg_gen_setcondi_i64(COND, d, a, 0);                            \
3667         tcg_gen_neg_i64(d, d);                                          \
3668     }                                                                   \
3669     static void gen_##NAME##0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) \
3670     {                                                                   \
3671         TCGv_vec zero = tcg_const_zeros_vec_matching(d);                \
3672         tcg_gen_cmp_vec(COND, vece, d, a, zero);                        \
3673         tcg_temp_free_vec(zero);                                        \
3674     }                                                                   \
3675     void gen_gvec_##NAME##0(unsigned vece, uint32_t d, uint32_t m,      \
3676                             uint32_t opr_sz, uint32_t max_sz)           \
3677     {                                                                   \
3678         const GVecGen2 op[4] = {                                        \
3679             { .fno = gen_helper_gvec_##NAME##0_b,                       \
3680               .fniv = gen_##NAME##0_vec,                                \
3681               .opt_opc = vecop_list_cmp,                                \
3682               .vece = MO_8 },                                           \
3683             { .fno = gen_helper_gvec_##NAME##0_h,                       \
3684               .fniv = gen_##NAME##0_vec,                                \
3685               .opt_opc = vecop_list_cmp,                                \
3686               .vece = MO_16 },                                          \
3687             { .fni4 = gen_##NAME##0_i32,                                \
3688               .fniv = gen_##NAME##0_vec,                                \
3689               .opt_opc = vecop_list_cmp,                                \
3690               .vece = MO_32 },                                          \
3691             { .fni8 = gen_##NAME##0_i64,                                \
3692               .fniv = gen_##NAME##0_vec,                                \
3693               .opt_opc = vecop_list_cmp,                                \
3694               .prefer_i64 = TCG_TARGET_REG_BITS == 64,                  \
3695               .vece = MO_64 },                                          \
3696         };                                                              \
3697         tcg_gen_gvec_2(d, m, opr_sz, max_sz, &op[vece]);                \
3698     }
3699
3700 static const TCGOpcode vecop_list_cmp[] = {
3701     INDEX_op_cmp_vec, 0
3702 };
3703
3704 GEN_CMP0(ceq, TCG_COND_EQ)
3705 GEN_CMP0(cle, TCG_COND_LE)
3706 GEN_CMP0(cge, TCG_COND_GE)
3707 GEN_CMP0(clt, TCG_COND_LT)
3708 GEN_CMP0(cgt, TCG_COND_GT)
3709
3710 #undef GEN_CMP0
3711
3712 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3713 {
3714     tcg_gen_vec_sar8i_i64(a, a, shift);
3715     tcg_gen_vec_add8_i64(d, d, a);
3716 }
3717
3718 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3719 {
3720     tcg_gen_vec_sar16i_i64(a, a, shift);
3721     tcg_gen_vec_add16_i64(d, d, a);
3722 }
3723
3724 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3725 {
3726     tcg_gen_sari_i32(a, a, shift);
3727     tcg_gen_add_i32(d, d, a);
3728 }
3729
3730 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3731 {
3732     tcg_gen_sari_i64(a, a, shift);
3733     tcg_gen_add_i64(d, d, a);
3734 }
3735
3736 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3737 {
3738     tcg_gen_sari_vec(vece, a, a, sh);
3739     tcg_gen_add_vec(vece, d, d, a);
3740 }
3741
3742 void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3743                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3744 {
3745     static const TCGOpcode vecop_list[] = {
3746         INDEX_op_sari_vec, INDEX_op_add_vec, 0
3747     };
3748     static const GVecGen2i ops[4] = {
3749         { .fni8 = gen_ssra8_i64,
3750           .fniv = gen_ssra_vec,
3751           .fno = gen_helper_gvec_ssra_b,
3752           .load_dest = true,
3753           .opt_opc = vecop_list,
3754           .vece = MO_8 },
3755         { .fni8 = gen_ssra16_i64,
3756           .fniv = gen_ssra_vec,
3757           .fno = gen_helper_gvec_ssra_h,
3758           .load_dest = true,
3759           .opt_opc = vecop_list,
3760           .vece = MO_16 },
3761         { .fni4 = gen_ssra32_i32,
3762           .fniv = gen_ssra_vec,
3763           .fno = gen_helper_gvec_ssra_s,
3764           .load_dest = true,
3765           .opt_opc = vecop_list,
3766           .vece = MO_32 },
3767         { .fni8 = gen_ssra64_i64,
3768           .fniv = gen_ssra_vec,
3769           .fno = gen_helper_gvec_ssra_b,
3770           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3771           .opt_opc = vecop_list,
3772           .load_dest = true,
3773           .vece = MO_64 },
3774     };
3775
3776     /* tszimm encoding produces immediates in the range [1..esize]. */
3777     tcg_debug_assert(shift > 0);
3778     tcg_debug_assert(shift <= (8 << vece));
3779
3780     /*
3781      * Shifts larger than the element size are architecturally valid.
3782      * Signed results in all sign bits.
3783      */
3784     shift = MIN(shift, (8 << vece) - 1);
3785     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3786 }
3787
3788 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3789 {
3790     tcg_gen_vec_shr8i_i64(a, a, shift);
3791     tcg_gen_vec_add8_i64(d, d, a);
3792 }
3793
3794 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3795 {
3796     tcg_gen_vec_shr16i_i64(a, a, shift);
3797     tcg_gen_vec_add16_i64(d, d, a);
3798 }
3799
3800 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3801 {
3802     tcg_gen_shri_i32(a, a, shift);
3803     tcg_gen_add_i32(d, d, a);
3804 }
3805
3806 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3807 {
3808     tcg_gen_shri_i64(a, a, shift);
3809     tcg_gen_add_i64(d, d, a);
3810 }
3811
3812 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3813 {
3814     tcg_gen_shri_vec(vece, a, a, sh);
3815     tcg_gen_add_vec(vece, d, d, a);
3816 }
3817
3818 void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3819                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3820 {
3821     static const TCGOpcode vecop_list[] = {
3822         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3823     };
3824     static const GVecGen2i ops[4] = {
3825         { .fni8 = gen_usra8_i64,
3826           .fniv = gen_usra_vec,
3827           .fno = gen_helper_gvec_usra_b,
3828           .load_dest = true,
3829           .opt_opc = vecop_list,
3830           .vece = MO_8, },
3831         { .fni8 = gen_usra16_i64,
3832           .fniv = gen_usra_vec,
3833           .fno = gen_helper_gvec_usra_h,
3834           .load_dest = true,
3835           .opt_opc = vecop_list,
3836           .vece = MO_16, },
3837         { .fni4 = gen_usra32_i32,
3838           .fniv = gen_usra_vec,
3839           .fno = gen_helper_gvec_usra_s,
3840           .load_dest = true,
3841           .opt_opc = vecop_list,
3842           .vece = MO_32, },
3843         { .fni8 = gen_usra64_i64,
3844           .fniv = gen_usra_vec,
3845           .fno = gen_helper_gvec_usra_d,
3846           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3847           .load_dest = true,
3848           .opt_opc = vecop_list,
3849           .vece = MO_64, },
3850     };
3851
3852     /* tszimm encoding produces immediates in the range [1..esize]. */
3853     tcg_debug_assert(shift > 0);
3854     tcg_debug_assert(shift <= (8 << vece));
3855
3856     /*
3857      * Shifts larger than the element size are architecturally valid.
3858      * Unsigned results in all zeros as input to accumulate: nop.
3859      */
3860     if (shift < (8 << vece)) {
3861         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3862     } else {
3863         /* Nop, but we do need to clear the tail. */
3864         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3865     }
3866 }
3867
3868 /*
3869  * Shift one less than the requested amount, and the low bit is
3870  * the rounding bit.  For the 8 and 16-bit operations, because we
3871  * mask the low bit, we can perform a normal integer shift instead
3872  * of a vector shift.
3873  */
3874 static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3875 {
3876     TCGv_i64 t = tcg_temp_new_i64();
3877
3878     tcg_gen_shri_i64(t, a, sh - 1);
3879     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3880     tcg_gen_vec_sar8i_i64(d, a, sh);
3881     tcg_gen_vec_add8_i64(d, d, t);
3882     tcg_temp_free_i64(t);
3883 }
3884
3885 static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3886 {
3887     TCGv_i64 t = tcg_temp_new_i64();
3888
3889     tcg_gen_shri_i64(t, a, sh - 1);
3890     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3891     tcg_gen_vec_sar16i_i64(d, a, sh);
3892     tcg_gen_vec_add16_i64(d, d, t);
3893     tcg_temp_free_i64(t);
3894 }
3895
3896 static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3897 {
3898     TCGv_i32 t = tcg_temp_new_i32();
3899
3900     tcg_gen_extract_i32(t, a, sh - 1, 1);
3901     tcg_gen_sari_i32(d, a, sh);
3902     tcg_gen_add_i32(d, d, t);
3903     tcg_temp_free_i32(t);
3904 }
3905
3906 static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3907 {
3908     TCGv_i64 t = tcg_temp_new_i64();
3909
3910     tcg_gen_extract_i64(t, a, sh - 1, 1);
3911     tcg_gen_sari_i64(d, a, sh);
3912     tcg_gen_add_i64(d, d, t);
3913     tcg_temp_free_i64(t);
3914 }
3915
3916 static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3917 {
3918     TCGv_vec t = tcg_temp_new_vec_matching(d);
3919     TCGv_vec ones = tcg_temp_new_vec_matching(d);
3920
3921     tcg_gen_shri_vec(vece, t, a, sh - 1);
3922     tcg_gen_dupi_vec(vece, ones, 1);
3923     tcg_gen_and_vec(vece, t, t, ones);
3924     tcg_gen_sari_vec(vece, d, a, sh);
3925     tcg_gen_add_vec(vece, d, d, t);
3926
3927     tcg_temp_free_vec(t);
3928     tcg_temp_free_vec(ones);
3929 }
3930
3931 void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3932                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3933 {
3934     static const TCGOpcode vecop_list[] = {
3935         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3936     };
3937     static const GVecGen2i ops[4] = {
3938         { .fni8 = gen_srshr8_i64,
3939           .fniv = gen_srshr_vec,
3940           .fno = gen_helper_gvec_srshr_b,
3941           .opt_opc = vecop_list,
3942           .vece = MO_8 },
3943         { .fni8 = gen_srshr16_i64,
3944           .fniv = gen_srshr_vec,
3945           .fno = gen_helper_gvec_srshr_h,
3946           .opt_opc = vecop_list,
3947           .vece = MO_16 },
3948         { .fni4 = gen_srshr32_i32,
3949           .fniv = gen_srshr_vec,
3950           .fno = gen_helper_gvec_srshr_s,
3951           .opt_opc = vecop_list,
3952           .vece = MO_32 },
3953         { .fni8 = gen_srshr64_i64,
3954           .fniv = gen_srshr_vec,
3955           .fno = gen_helper_gvec_srshr_d,
3956           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3957           .opt_opc = vecop_list,
3958           .vece = MO_64 },
3959     };
3960
3961     /* tszimm encoding produces immediates in the range [1..esize] */
3962     tcg_debug_assert(shift > 0);
3963     tcg_debug_assert(shift <= (8 << vece));
3964
3965     if (shift == (8 << vece)) {
3966         /*
3967          * Shifts larger than the element size are architecturally valid.
3968          * Signed results in all sign bits.  With rounding, this produces
3969          *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3970          * I.e. always zero.
3971          */
3972         tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
3973     } else {
3974         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3975     }
3976 }
3977
3978 static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3979 {
3980     TCGv_i64 t = tcg_temp_new_i64();
3981
3982     gen_srshr8_i64(t, a, sh);
3983     tcg_gen_vec_add8_i64(d, d, t);
3984     tcg_temp_free_i64(t);
3985 }
3986
3987 static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3988 {
3989     TCGv_i64 t = tcg_temp_new_i64();
3990
3991     gen_srshr16_i64(t, a, sh);
3992     tcg_gen_vec_add16_i64(d, d, t);
3993     tcg_temp_free_i64(t);
3994 }
3995
3996 static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3997 {
3998     TCGv_i32 t = tcg_temp_new_i32();
3999
4000     gen_srshr32_i32(t, a, sh);
4001     tcg_gen_add_i32(d, d, t);
4002     tcg_temp_free_i32(t);
4003 }
4004
4005 static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4006 {
4007     TCGv_i64 t = tcg_temp_new_i64();
4008
4009     gen_srshr64_i64(t, a, sh);
4010     tcg_gen_add_i64(d, d, t);
4011     tcg_temp_free_i64(t);
4012 }
4013
4014 static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4015 {
4016     TCGv_vec t = tcg_temp_new_vec_matching(d);
4017
4018     gen_srshr_vec(vece, t, a, sh);
4019     tcg_gen_add_vec(vece, d, d, t);
4020     tcg_temp_free_vec(t);
4021 }
4022
4023 void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4024                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4025 {
4026     static const TCGOpcode vecop_list[] = {
4027         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
4028     };
4029     static const GVecGen2i ops[4] = {
4030         { .fni8 = gen_srsra8_i64,
4031           .fniv = gen_srsra_vec,
4032           .fno = gen_helper_gvec_srsra_b,
4033           .opt_opc = vecop_list,
4034           .load_dest = true,
4035           .vece = MO_8 },
4036         { .fni8 = gen_srsra16_i64,
4037           .fniv = gen_srsra_vec,
4038           .fno = gen_helper_gvec_srsra_h,
4039           .opt_opc = vecop_list,
4040           .load_dest = true,
4041           .vece = MO_16 },
4042         { .fni4 = gen_srsra32_i32,
4043           .fniv = gen_srsra_vec,
4044           .fno = gen_helper_gvec_srsra_s,
4045           .opt_opc = vecop_list,
4046           .load_dest = true,
4047           .vece = MO_32 },
4048         { .fni8 = gen_srsra64_i64,
4049           .fniv = gen_srsra_vec,
4050           .fno = gen_helper_gvec_srsra_d,
4051           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4052           .opt_opc = vecop_list,
4053           .load_dest = true,
4054           .vece = MO_64 },
4055     };
4056
4057     /* tszimm encoding produces immediates in the range [1..esize] */
4058     tcg_debug_assert(shift > 0);
4059     tcg_debug_assert(shift <= (8 << vece));
4060
4061     /*
4062      * Shifts larger than the element size are architecturally valid.
4063      * Signed results in all sign bits.  With rounding, this produces
4064      *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
4065      * I.e. always zero.  With accumulation, this leaves D unchanged.
4066      */
4067     if (shift == (8 << vece)) {
4068         /* Nop, but we do need to clear the tail. */
4069         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
4070     } else {
4071         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4072     }
4073 }
4074
4075 static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4076 {
4077     TCGv_i64 t = tcg_temp_new_i64();
4078
4079     tcg_gen_shri_i64(t, a, sh - 1);
4080     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
4081     tcg_gen_vec_shr8i_i64(d, a, sh);
4082     tcg_gen_vec_add8_i64(d, d, t);
4083     tcg_temp_free_i64(t);
4084 }
4085
4086 static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4087 {
4088     TCGv_i64 t = tcg_temp_new_i64();
4089
4090     tcg_gen_shri_i64(t, a, sh - 1);
4091     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
4092     tcg_gen_vec_shr16i_i64(d, a, sh);
4093     tcg_gen_vec_add16_i64(d, d, t);
4094     tcg_temp_free_i64(t);
4095 }
4096
4097 static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
4098 {
4099     TCGv_i32 t = tcg_temp_new_i32();
4100
4101     tcg_gen_extract_i32(t, a, sh - 1, 1);
4102     tcg_gen_shri_i32(d, a, sh);
4103     tcg_gen_add_i32(d, d, t);
4104     tcg_temp_free_i32(t);
4105 }
4106
4107 static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4108 {
4109     TCGv_i64 t = tcg_temp_new_i64();
4110
4111     tcg_gen_extract_i64(t, a, sh - 1, 1);
4112     tcg_gen_shri_i64(d, a, sh);
4113     tcg_gen_add_i64(d, d, t);
4114     tcg_temp_free_i64(t);
4115 }
4116
4117 static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
4118 {
4119     TCGv_vec t = tcg_temp_new_vec_matching(d);
4120     TCGv_vec ones = tcg_temp_new_vec_matching(d);
4121
4122     tcg_gen_shri_vec(vece, t, a, shift - 1);
4123     tcg_gen_dupi_vec(vece, ones, 1);
4124     tcg_gen_and_vec(vece, t, t, ones);
4125     tcg_gen_shri_vec(vece, d, a, shift);
4126     tcg_gen_add_vec(vece, d, d, t);
4127
4128     tcg_temp_free_vec(t);
4129     tcg_temp_free_vec(ones);
4130 }
4131
4132 void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4133                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4134 {
4135     static const TCGOpcode vecop_list[] = {
4136         INDEX_op_shri_vec, INDEX_op_add_vec, 0
4137     };
4138     static const GVecGen2i ops[4] = {
4139         { .fni8 = gen_urshr8_i64,
4140           .fniv = gen_urshr_vec,
4141           .fno = gen_helper_gvec_urshr_b,
4142           .opt_opc = vecop_list,
4143           .vece = MO_8 },
4144         { .fni8 = gen_urshr16_i64,
4145           .fniv = gen_urshr_vec,
4146           .fno = gen_helper_gvec_urshr_h,
4147           .opt_opc = vecop_list,
4148           .vece = MO_16 },
4149         { .fni4 = gen_urshr32_i32,
4150           .fniv = gen_urshr_vec,
4151           .fno = gen_helper_gvec_urshr_s,
4152           .opt_opc = vecop_list,
4153           .vece = MO_32 },
4154         { .fni8 = gen_urshr64_i64,
4155           .fniv = gen_urshr_vec,
4156           .fno = gen_helper_gvec_urshr_d,
4157           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4158           .opt_opc = vecop_list,
4159           .vece = MO_64 },
4160     };
4161
4162     /* tszimm encoding produces immediates in the range [1..esize] */
4163     tcg_debug_assert(shift > 0);
4164     tcg_debug_assert(shift <= (8 << vece));
4165
4166     if (shift == (8 << vece)) {
4167         /*
4168          * Shifts larger than the element size are architecturally valid.
4169          * Unsigned results in zero.  With rounding, this produces a
4170          * copy of the most significant bit.
4171          */
4172         tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
4173     } else {
4174         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4175     }
4176 }
4177
4178 static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4179 {
4180     TCGv_i64 t = tcg_temp_new_i64();
4181
4182     if (sh == 8) {
4183         tcg_gen_vec_shr8i_i64(t, a, 7);
4184     } else {
4185         gen_urshr8_i64(t, a, sh);
4186     }
4187     tcg_gen_vec_add8_i64(d, d, t);
4188     tcg_temp_free_i64(t);
4189 }
4190
4191 static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4192 {
4193     TCGv_i64 t = tcg_temp_new_i64();
4194
4195     if (sh == 16) {
4196         tcg_gen_vec_shr16i_i64(t, a, 15);
4197     } else {
4198         gen_urshr16_i64(t, a, sh);
4199     }
4200     tcg_gen_vec_add16_i64(d, d, t);
4201     tcg_temp_free_i64(t);
4202 }
4203
4204 static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
4205 {
4206     TCGv_i32 t = tcg_temp_new_i32();
4207
4208     if (sh == 32) {
4209         tcg_gen_shri_i32(t, a, 31);
4210     } else {
4211         gen_urshr32_i32(t, a, sh);
4212     }
4213     tcg_gen_add_i32(d, d, t);
4214     tcg_temp_free_i32(t);
4215 }
4216
4217 static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4218 {
4219     TCGv_i64 t = tcg_temp_new_i64();
4220
4221     if (sh == 64) {
4222         tcg_gen_shri_i64(t, a, 63);
4223     } else {
4224         gen_urshr64_i64(t, a, sh);
4225     }
4226     tcg_gen_add_i64(d, d, t);
4227     tcg_temp_free_i64(t);
4228 }
4229
4230 static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4231 {
4232     TCGv_vec t = tcg_temp_new_vec_matching(d);
4233
4234     if (sh == (8 << vece)) {
4235         tcg_gen_shri_vec(vece, t, a, sh - 1);
4236     } else {
4237         gen_urshr_vec(vece, t, a, sh);
4238     }
4239     tcg_gen_add_vec(vece, d, d, t);
4240     tcg_temp_free_vec(t);
4241 }
4242
4243 void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4244                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4245 {
4246     static const TCGOpcode vecop_list[] = {
4247         INDEX_op_shri_vec, INDEX_op_add_vec, 0
4248     };
4249     static const GVecGen2i ops[4] = {
4250         { .fni8 = gen_ursra8_i64,
4251           .fniv = gen_ursra_vec,
4252           .fno = gen_helper_gvec_ursra_b,
4253           .opt_opc = vecop_list,
4254           .load_dest = true,
4255           .vece = MO_8 },
4256         { .fni8 = gen_ursra16_i64,
4257           .fniv = gen_ursra_vec,
4258           .fno = gen_helper_gvec_ursra_h,
4259           .opt_opc = vecop_list,
4260           .load_dest = true,
4261           .vece = MO_16 },
4262         { .fni4 = gen_ursra32_i32,
4263           .fniv = gen_ursra_vec,
4264           .fno = gen_helper_gvec_ursra_s,
4265           .opt_opc = vecop_list,
4266           .load_dest = true,
4267           .vece = MO_32 },
4268         { .fni8 = gen_ursra64_i64,
4269           .fniv = gen_ursra_vec,
4270           .fno = gen_helper_gvec_ursra_d,
4271           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4272           .opt_opc = vecop_list,
4273           .load_dest = true,
4274           .vece = MO_64 },
4275     };
4276
4277     /* tszimm encoding produces immediates in the range [1..esize] */
4278     tcg_debug_assert(shift > 0);
4279     tcg_debug_assert(shift <= (8 << vece));
4280
4281     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4282 }
4283
4284 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4285 {
4286     uint64_t mask = dup_const(MO_8, 0xff >> shift);
4287     TCGv_i64 t = tcg_temp_new_i64();
4288
4289     tcg_gen_shri_i64(t, a, shift);
4290     tcg_gen_andi_i64(t, t, mask);
4291     tcg_gen_andi_i64(d, d, ~mask);
4292     tcg_gen_or_i64(d, d, t);
4293     tcg_temp_free_i64(t);
4294 }
4295
4296 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4297 {
4298     uint64_t mask = dup_const(MO_16, 0xffff >> shift);
4299     TCGv_i64 t = tcg_temp_new_i64();
4300
4301     tcg_gen_shri_i64(t, a, shift);
4302     tcg_gen_andi_i64(t, t, mask);
4303     tcg_gen_andi_i64(d, d, ~mask);
4304     tcg_gen_or_i64(d, d, t);
4305     tcg_temp_free_i64(t);
4306 }
4307
4308 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
4309 {
4310     tcg_gen_shri_i32(a, a, shift);
4311     tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
4312 }
4313
4314 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4315 {
4316     tcg_gen_shri_i64(a, a, shift);
4317     tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
4318 }
4319
4320 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4321 {
4322     TCGv_vec t = tcg_temp_new_vec_matching(d);
4323     TCGv_vec m = tcg_temp_new_vec_matching(d);
4324
4325     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
4326     tcg_gen_shri_vec(vece, t, a, sh);
4327     tcg_gen_and_vec(vece, d, d, m);
4328     tcg_gen_or_vec(vece, d, d, t);
4329
4330     tcg_temp_free_vec(t);
4331     tcg_temp_free_vec(m);
4332 }
4333
4334 void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4335                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4336 {
4337     static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
4338     const GVecGen2i ops[4] = {
4339         { .fni8 = gen_shr8_ins_i64,
4340           .fniv = gen_shr_ins_vec,
4341           .fno = gen_helper_gvec_sri_b,
4342           .load_dest = true,
4343           .opt_opc = vecop_list,
4344           .vece = MO_8 },
4345         { .fni8 = gen_shr16_ins_i64,
4346           .fniv = gen_shr_ins_vec,
4347           .fno = gen_helper_gvec_sri_h,
4348           .load_dest = true,
4349           .opt_opc = vecop_list,
4350           .vece = MO_16 },
4351         { .fni4 = gen_shr32_ins_i32,
4352           .fniv = gen_shr_ins_vec,
4353           .fno = gen_helper_gvec_sri_s,
4354           .load_dest = true,
4355           .opt_opc = vecop_list,
4356           .vece = MO_32 },
4357         { .fni8 = gen_shr64_ins_i64,
4358           .fniv = gen_shr_ins_vec,
4359           .fno = gen_helper_gvec_sri_d,
4360           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4361           .load_dest = true,
4362           .opt_opc = vecop_list,
4363           .vece = MO_64 },
4364     };
4365
4366     /* tszimm encoding produces immediates in the range [1..esize]. */
4367     tcg_debug_assert(shift > 0);
4368     tcg_debug_assert(shift <= (8 << vece));
4369
4370     /* Shift of esize leaves destination unchanged. */
4371     if (shift < (8 << vece)) {
4372         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4373     } else {
4374         /* Nop, but we do need to clear the tail. */
4375         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
4376     }
4377 }
4378
4379 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4380 {
4381     uint64_t mask = dup_const(MO_8, 0xff << shift);
4382     TCGv_i64 t = tcg_temp_new_i64();
4383
4384     tcg_gen_shli_i64(t, a, shift);
4385     tcg_gen_andi_i64(t, t, mask);
4386     tcg_gen_andi_i64(d, d, ~mask);
4387     tcg_gen_or_i64(d, d, t);
4388     tcg_temp_free_i64(t);
4389 }
4390
4391 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4392 {
4393     uint64_t mask = dup_const(MO_16, 0xffff << shift);
4394     TCGv_i64 t = tcg_temp_new_i64();
4395
4396     tcg_gen_shli_i64(t, a, shift);
4397     tcg_gen_andi_i64(t, t, mask);
4398     tcg_gen_andi_i64(d, d, ~mask);
4399     tcg_gen_or_i64(d, d, t);
4400     tcg_temp_free_i64(t);
4401 }
4402
4403 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
4404 {
4405     tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
4406 }
4407
4408 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4409 {
4410     tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
4411 }
4412
4413 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4414 {
4415     TCGv_vec t = tcg_temp_new_vec_matching(d);
4416     TCGv_vec m = tcg_temp_new_vec_matching(d);
4417
4418     tcg_gen_shli_vec(vece, t, a, sh);
4419     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
4420     tcg_gen_and_vec(vece, d, d, m);
4421     tcg_gen_or_vec(vece, d, d, t);
4422
4423     tcg_temp_free_vec(t);
4424     tcg_temp_free_vec(m);
4425 }
4426
4427 void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4428                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4429 {
4430     static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
4431     const GVecGen2i ops[4] = {
4432         { .fni8 = gen_shl8_ins_i64,
4433           .fniv = gen_shl_ins_vec,
4434           .fno = gen_helper_gvec_sli_b,
4435           .load_dest = true,
4436           .opt_opc = vecop_list,
4437           .vece = MO_8 },
4438         { .fni8 = gen_shl16_ins_i64,
4439           .fniv = gen_shl_ins_vec,
4440           .fno = gen_helper_gvec_sli_h,
4441           .load_dest = true,
4442           .opt_opc = vecop_list,
4443           .vece = MO_16 },
4444         { .fni4 = gen_shl32_ins_i32,
4445           .fniv = gen_shl_ins_vec,
4446           .fno = gen_helper_gvec_sli_s,
4447           .load_dest = true,
4448           .opt_opc = vecop_list,
4449           .vece = MO_32 },
4450         { .fni8 = gen_shl64_ins_i64,
4451           .fniv = gen_shl_ins_vec,
4452           .fno = gen_helper_gvec_sli_d,
4453           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4454           .load_dest = true,
4455           .opt_opc = vecop_list,
4456           .vece = MO_64 },
4457     };
4458
4459     /* tszimm encoding produces immediates in the range [0..esize-1]. */
4460     tcg_debug_assert(shift >= 0);
4461     tcg_debug_assert(shift < (8 << vece));
4462
4463     if (shift == 0) {
4464         tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
4465     } else {
4466         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4467     }
4468 }
4469
4470 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4471 {
4472     gen_helper_neon_mul_u8(a, a, b);
4473     gen_helper_neon_add_u8(d, d, a);
4474 }
4475
4476 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4477 {
4478     gen_helper_neon_mul_u8(a, a, b);
4479     gen_helper_neon_sub_u8(d, d, a);
4480 }
4481
4482 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4483 {
4484     gen_helper_neon_mul_u16(a, a, b);
4485     gen_helper_neon_add_u16(d, d, a);
4486 }
4487
4488 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4489 {
4490     gen_helper_neon_mul_u16(a, a, b);
4491     gen_helper_neon_sub_u16(d, d, a);
4492 }
4493
4494 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4495 {
4496     tcg_gen_mul_i32(a, a, b);
4497     tcg_gen_add_i32(d, d, a);
4498 }
4499
4500 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4501 {
4502     tcg_gen_mul_i32(a, a, b);
4503     tcg_gen_sub_i32(d, d, a);
4504 }
4505
4506 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4507 {
4508     tcg_gen_mul_i64(a, a, b);
4509     tcg_gen_add_i64(d, d, a);
4510 }
4511
4512 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4513 {
4514     tcg_gen_mul_i64(a, a, b);
4515     tcg_gen_sub_i64(d, d, a);
4516 }
4517
4518 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4519 {
4520     tcg_gen_mul_vec(vece, a, a, b);
4521     tcg_gen_add_vec(vece, d, d, a);
4522 }
4523
4524 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4525 {
4526     tcg_gen_mul_vec(vece, a, a, b);
4527     tcg_gen_sub_vec(vece, d, d, a);
4528 }
4529
4530 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
4531  * these tables are shared with AArch64 which does support them.
4532  */
4533 void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4534                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4535 {
4536     static const TCGOpcode vecop_list[] = {
4537         INDEX_op_mul_vec, INDEX_op_add_vec, 0
4538     };
4539     static const GVecGen3 ops[4] = {
4540         { .fni4 = gen_mla8_i32,
4541           .fniv = gen_mla_vec,
4542           .load_dest = true,
4543           .opt_opc = vecop_list,
4544           .vece = MO_8 },
4545         { .fni4 = gen_mla16_i32,
4546           .fniv = gen_mla_vec,
4547           .load_dest = true,
4548           .opt_opc = vecop_list,
4549           .vece = MO_16 },
4550         { .fni4 = gen_mla32_i32,
4551           .fniv = gen_mla_vec,
4552           .load_dest = true,
4553           .opt_opc = vecop_list,
4554           .vece = MO_32 },
4555         { .fni8 = gen_mla64_i64,
4556           .fniv = gen_mla_vec,
4557           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4558           .load_dest = true,
4559           .opt_opc = vecop_list,
4560           .vece = MO_64 },
4561     };
4562     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4563 }
4564
4565 void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4566                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4567 {
4568     static const TCGOpcode vecop_list[] = {
4569         INDEX_op_mul_vec, INDEX_op_sub_vec, 0
4570     };
4571     static const GVecGen3 ops[4] = {
4572         { .fni4 = gen_mls8_i32,
4573           .fniv = gen_mls_vec,
4574           .load_dest = true,
4575           .opt_opc = vecop_list,
4576           .vece = MO_8 },
4577         { .fni4 = gen_mls16_i32,
4578           .fniv = gen_mls_vec,
4579           .load_dest = true,
4580           .opt_opc = vecop_list,
4581           .vece = MO_16 },
4582         { .fni4 = gen_mls32_i32,
4583           .fniv = gen_mls_vec,
4584           .load_dest = true,
4585           .opt_opc = vecop_list,
4586           .vece = MO_32 },
4587         { .fni8 = gen_mls64_i64,
4588           .fniv = gen_mls_vec,
4589           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4590           .load_dest = true,
4591           .opt_opc = vecop_list,
4592           .vece = MO_64 },
4593     };
4594     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4595 }
4596
4597 /* CMTST : test is "if (X & Y != 0)". */
4598 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4599 {
4600     tcg_gen_and_i32(d, a, b);
4601     tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
4602     tcg_gen_neg_i32(d, d);
4603 }
4604
4605 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4606 {
4607     tcg_gen_and_i64(d, a, b);
4608     tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
4609     tcg_gen_neg_i64(d, d);
4610 }
4611
4612 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4613 {
4614     tcg_gen_and_vec(vece, d, a, b);
4615     tcg_gen_dupi_vec(vece, a, 0);
4616     tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
4617 }
4618
4619 void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4620                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4621 {
4622     static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
4623     static const GVecGen3 ops[4] = {
4624         { .fni4 = gen_helper_neon_tst_u8,
4625           .fniv = gen_cmtst_vec,
4626           .opt_opc = vecop_list,
4627           .vece = MO_8 },
4628         { .fni4 = gen_helper_neon_tst_u16,
4629           .fniv = gen_cmtst_vec,
4630           .opt_opc = vecop_list,
4631           .vece = MO_16 },
4632         { .fni4 = gen_cmtst_i32,
4633           .fniv = gen_cmtst_vec,
4634           .opt_opc = vecop_list,
4635           .vece = MO_32 },
4636         { .fni8 = gen_cmtst_i64,
4637           .fniv = gen_cmtst_vec,
4638           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4639           .opt_opc = vecop_list,
4640           .vece = MO_64 },
4641     };
4642     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4643 }
4644
4645 void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4646 {
4647     TCGv_i32 lval = tcg_temp_new_i32();
4648     TCGv_i32 rval = tcg_temp_new_i32();
4649     TCGv_i32 lsh = tcg_temp_new_i32();
4650     TCGv_i32 rsh = tcg_temp_new_i32();
4651     TCGv_i32 zero = tcg_const_i32(0);
4652     TCGv_i32 max = tcg_const_i32(32);
4653
4654     /*
4655      * Rely on the TCG guarantee that out of range shifts produce
4656      * unspecified results, not undefined behaviour (i.e. no trap).
4657      * Discard out-of-range results after the fact.
4658      */
4659     tcg_gen_ext8s_i32(lsh, shift);
4660     tcg_gen_neg_i32(rsh, lsh);
4661     tcg_gen_shl_i32(lval, src, lsh);
4662     tcg_gen_shr_i32(rval, src, rsh);
4663     tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
4664     tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
4665
4666     tcg_temp_free_i32(lval);
4667     tcg_temp_free_i32(rval);
4668     tcg_temp_free_i32(lsh);
4669     tcg_temp_free_i32(rsh);
4670     tcg_temp_free_i32(zero);
4671     tcg_temp_free_i32(max);
4672 }
4673
4674 void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4675 {
4676     TCGv_i64 lval = tcg_temp_new_i64();
4677     TCGv_i64 rval = tcg_temp_new_i64();
4678     TCGv_i64 lsh = tcg_temp_new_i64();
4679     TCGv_i64 rsh = tcg_temp_new_i64();
4680     TCGv_i64 zero = tcg_const_i64(0);
4681     TCGv_i64 max = tcg_const_i64(64);
4682
4683     /*
4684      * Rely on the TCG guarantee that out of range shifts produce
4685      * unspecified results, not undefined behaviour (i.e. no trap).
4686      * Discard out-of-range results after the fact.
4687      */
4688     tcg_gen_ext8s_i64(lsh, shift);
4689     tcg_gen_neg_i64(rsh, lsh);
4690     tcg_gen_shl_i64(lval, src, lsh);
4691     tcg_gen_shr_i64(rval, src, rsh);
4692     tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
4693     tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
4694
4695     tcg_temp_free_i64(lval);
4696     tcg_temp_free_i64(rval);
4697     tcg_temp_free_i64(lsh);
4698     tcg_temp_free_i64(rsh);
4699     tcg_temp_free_i64(zero);
4700     tcg_temp_free_i64(max);
4701 }
4702
4703 static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
4704                          TCGv_vec src, TCGv_vec shift)
4705 {
4706     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4707     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4708     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4709     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4710     TCGv_vec msk, max;
4711
4712     tcg_gen_neg_vec(vece, rsh, shift);
4713     if (vece == MO_8) {
4714         tcg_gen_mov_vec(lsh, shift);
4715     } else {
4716         msk = tcg_temp_new_vec_matching(dst);
4717         tcg_gen_dupi_vec(vece, msk, 0xff);
4718         tcg_gen_and_vec(vece, lsh, shift, msk);
4719         tcg_gen_and_vec(vece, rsh, rsh, msk);
4720         tcg_temp_free_vec(msk);
4721     }
4722
4723     /*
4724      * Rely on the TCG guarantee that out of range shifts produce
4725      * unspecified results, not undefined behaviour (i.e. no trap).
4726      * Discard out-of-range results after the fact.
4727      */
4728     tcg_gen_shlv_vec(vece, lval, src, lsh);
4729     tcg_gen_shrv_vec(vece, rval, src, rsh);
4730
4731     max = tcg_temp_new_vec_matching(dst);
4732     tcg_gen_dupi_vec(vece, max, 8 << vece);
4733
4734     /*
4735      * The choice of LT (signed) and GEU (unsigned) are biased toward
4736      * the instructions of the x86_64 host.  For MO_8, the whole byte
4737      * is significant so we must use an unsigned compare; otherwise we
4738      * have already masked to a byte and so a signed compare works.
4739      * Other tcg hosts have a full set of comparisons and do not care.
4740      */
4741     if (vece == MO_8) {
4742         tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
4743         tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
4744         tcg_gen_andc_vec(vece, lval, lval, lsh);
4745         tcg_gen_andc_vec(vece, rval, rval, rsh);
4746     } else {
4747         tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
4748         tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
4749         tcg_gen_and_vec(vece, lval, lval, lsh);
4750         tcg_gen_and_vec(vece, rval, rval, rsh);
4751     }
4752     tcg_gen_or_vec(vece, dst, lval, rval);
4753
4754     tcg_temp_free_vec(max);
4755     tcg_temp_free_vec(lval);
4756     tcg_temp_free_vec(rval);
4757     tcg_temp_free_vec(lsh);
4758     tcg_temp_free_vec(rsh);
4759 }
4760
4761 void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4762                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4763 {
4764     static const TCGOpcode vecop_list[] = {
4765         INDEX_op_neg_vec, INDEX_op_shlv_vec,
4766         INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
4767     };
4768     static const GVecGen3 ops[4] = {
4769         { .fniv = gen_ushl_vec,
4770           .fno = gen_helper_gvec_ushl_b,
4771           .opt_opc = vecop_list,
4772           .vece = MO_8 },
4773         { .fniv = gen_ushl_vec,
4774           .fno = gen_helper_gvec_ushl_h,
4775           .opt_opc = vecop_list,
4776           .vece = MO_16 },
4777         { .fni4 = gen_ushl_i32,
4778           .fniv = gen_ushl_vec,
4779           .opt_opc = vecop_list,
4780           .vece = MO_32 },
4781         { .fni8 = gen_ushl_i64,
4782           .fniv = gen_ushl_vec,
4783           .opt_opc = vecop_list,
4784           .vece = MO_64 },
4785     };
4786     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4787 }
4788
4789 void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4790 {
4791     TCGv_i32 lval = tcg_temp_new_i32();
4792     TCGv_i32 rval = tcg_temp_new_i32();
4793     TCGv_i32 lsh = tcg_temp_new_i32();
4794     TCGv_i32 rsh = tcg_temp_new_i32();
4795     TCGv_i32 zero = tcg_const_i32(0);
4796     TCGv_i32 max = tcg_const_i32(31);
4797
4798     /*
4799      * Rely on the TCG guarantee that out of range shifts produce
4800      * unspecified results, not undefined behaviour (i.e. no trap).
4801      * Discard out-of-range results after the fact.
4802      */
4803     tcg_gen_ext8s_i32(lsh, shift);
4804     tcg_gen_neg_i32(rsh, lsh);
4805     tcg_gen_shl_i32(lval, src, lsh);
4806     tcg_gen_umin_i32(rsh, rsh, max);
4807     tcg_gen_sar_i32(rval, src, rsh);
4808     tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
4809     tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
4810
4811     tcg_temp_free_i32(lval);
4812     tcg_temp_free_i32(rval);
4813     tcg_temp_free_i32(lsh);
4814     tcg_temp_free_i32(rsh);
4815     tcg_temp_free_i32(zero);
4816     tcg_temp_free_i32(max);
4817 }
4818
4819 void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4820 {
4821     TCGv_i64 lval = tcg_temp_new_i64();
4822     TCGv_i64 rval = tcg_temp_new_i64();
4823     TCGv_i64 lsh = tcg_temp_new_i64();
4824     TCGv_i64 rsh = tcg_temp_new_i64();
4825     TCGv_i64 zero = tcg_const_i64(0);
4826     TCGv_i64 max = tcg_const_i64(63);
4827
4828     /*
4829      * Rely on the TCG guarantee that out of range shifts produce
4830      * unspecified results, not undefined behaviour (i.e. no trap).
4831      * Discard out-of-range results after the fact.
4832      */
4833     tcg_gen_ext8s_i64(lsh, shift);
4834     tcg_gen_neg_i64(rsh, lsh);
4835     tcg_gen_shl_i64(lval, src, lsh);
4836     tcg_gen_umin_i64(rsh, rsh, max);
4837     tcg_gen_sar_i64(rval, src, rsh);
4838     tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
4839     tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
4840
4841     tcg_temp_free_i64(lval);
4842     tcg_temp_free_i64(rval);
4843     tcg_temp_free_i64(lsh);
4844     tcg_temp_free_i64(rsh);
4845     tcg_temp_free_i64(zero);
4846     tcg_temp_free_i64(max);
4847 }
4848
4849 static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
4850                          TCGv_vec src, TCGv_vec shift)
4851 {
4852     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4853     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4854     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4855     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4856     TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
4857
4858     /*
4859      * Rely on the TCG guarantee that out of range shifts produce
4860      * unspecified results, not undefined behaviour (i.e. no trap).
4861      * Discard out-of-range results after the fact.
4862      */
4863     tcg_gen_neg_vec(vece, rsh, shift);
4864     if (vece == MO_8) {
4865         tcg_gen_mov_vec(lsh, shift);
4866     } else {
4867         tcg_gen_dupi_vec(vece, tmp, 0xff);
4868         tcg_gen_and_vec(vece, lsh, shift, tmp);
4869         tcg_gen_and_vec(vece, rsh, rsh, tmp);
4870     }
4871
4872     /* Bound rsh so out of bound right shift gets -1.  */
4873     tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
4874     tcg_gen_umin_vec(vece, rsh, rsh, tmp);
4875     tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
4876
4877     tcg_gen_shlv_vec(vece, lval, src, lsh);
4878     tcg_gen_sarv_vec(vece, rval, src, rsh);
4879
4880     /* Select in-bound left shift.  */
4881     tcg_gen_andc_vec(vece, lval, lval, tmp);
4882
4883     /* Select between left and right shift.  */
4884     if (vece == MO_8) {
4885         tcg_gen_dupi_vec(vece, tmp, 0);
4886         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
4887     } else {
4888         tcg_gen_dupi_vec(vece, tmp, 0x80);
4889         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
4890     }
4891
4892     tcg_temp_free_vec(lval);
4893     tcg_temp_free_vec(rval);
4894     tcg_temp_free_vec(lsh);
4895     tcg_temp_free_vec(rsh);
4896     tcg_temp_free_vec(tmp);
4897 }
4898
4899 void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4900                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4901 {
4902     static const TCGOpcode vecop_list[] = {
4903         INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
4904         INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
4905     };
4906     static const GVecGen3 ops[4] = {
4907         { .fniv = gen_sshl_vec,
4908           .fno = gen_helper_gvec_sshl_b,
4909           .opt_opc = vecop_list,
4910           .vece = MO_8 },
4911         { .fniv = gen_sshl_vec,
4912           .fno = gen_helper_gvec_sshl_h,
4913           .opt_opc = vecop_list,
4914           .vece = MO_16 },
4915         { .fni4 = gen_sshl_i32,
4916           .fniv = gen_sshl_vec,
4917           .opt_opc = vecop_list,
4918           .vece = MO_32 },
4919         { .fni8 = gen_sshl_i64,
4920           .fniv = gen_sshl_vec,
4921           .opt_opc = vecop_list,
4922           .vece = MO_64 },
4923     };
4924     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4925 }
4926
4927 static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4928                           TCGv_vec a, TCGv_vec b)
4929 {
4930     TCGv_vec x = tcg_temp_new_vec_matching(t);
4931     tcg_gen_add_vec(vece, x, a, b);
4932     tcg_gen_usadd_vec(vece, t, a, b);
4933     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4934     tcg_gen_or_vec(vece, sat, sat, x);
4935     tcg_temp_free_vec(x);
4936 }
4937
4938 void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4939                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4940 {
4941     static const TCGOpcode vecop_list[] = {
4942         INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4943     };
4944     static const GVecGen4 ops[4] = {
4945         { .fniv = gen_uqadd_vec,
4946           .fno = gen_helper_gvec_uqadd_b,
4947           .write_aofs = true,
4948           .opt_opc = vecop_list,
4949           .vece = MO_8 },
4950         { .fniv = gen_uqadd_vec,
4951           .fno = gen_helper_gvec_uqadd_h,
4952           .write_aofs = true,
4953           .opt_opc = vecop_list,
4954           .vece = MO_16 },
4955         { .fniv = gen_uqadd_vec,
4956           .fno = gen_helper_gvec_uqadd_s,
4957           .write_aofs = true,
4958           .opt_opc = vecop_list,
4959           .vece = MO_32 },
4960         { .fniv = gen_uqadd_vec,
4961           .fno = gen_helper_gvec_uqadd_d,
4962           .write_aofs = true,
4963           .opt_opc = vecop_list,
4964           .vece = MO_64 },
4965     };
4966     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4967                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4968 }
4969
4970 static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4971                           TCGv_vec a, TCGv_vec b)
4972 {
4973     TCGv_vec x = tcg_temp_new_vec_matching(t);
4974     tcg_gen_add_vec(vece, x, a, b);
4975     tcg_gen_ssadd_vec(vece, t, a, b);
4976     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4977     tcg_gen_or_vec(vece, sat, sat, x);
4978     tcg_temp_free_vec(x);
4979 }
4980
4981 void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4982                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4983 {
4984     static const TCGOpcode vecop_list[] = {
4985         INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4986     };
4987     static const GVecGen4 ops[4] = {
4988         { .fniv = gen_sqadd_vec,
4989           .fno = gen_helper_gvec_sqadd_b,
4990           .opt_opc = vecop_list,
4991           .write_aofs = true,
4992           .vece = MO_8 },
4993         { .fniv = gen_sqadd_vec,
4994           .fno = gen_helper_gvec_sqadd_h,
4995           .opt_opc = vecop_list,
4996           .write_aofs = true,
4997           .vece = MO_16 },
4998         { .fniv = gen_sqadd_vec,
4999           .fno = gen_helper_gvec_sqadd_s,
5000           .opt_opc = vecop_list,
5001           .write_aofs = true,
5002           .vece = MO_32 },
5003         { .fniv = gen_sqadd_vec,
5004           .fno = gen_helper_gvec_sqadd_d,
5005           .opt_opc = vecop_list,
5006           .write_aofs = true,
5007           .vece = MO_64 },
5008     };
5009     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
5010                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
5011 }
5012
5013 static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
5014                           TCGv_vec a, TCGv_vec b)
5015 {
5016     TCGv_vec x = tcg_temp_new_vec_matching(t);
5017     tcg_gen_sub_vec(vece, x, a, b);
5018     tcg_gen_ussub_vec(vece, t, a, b);
5019     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
5020     tcg_gen_or_vec(vece, sat, sat, x);
5021     tcg_temp_free_vec(x);
5022 }
5023
5024 void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
5025                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
5026 {
5027     static const TCGOpcode vecop_list[] = {
5028         INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
5029     };
5030     static const GVecGen4 ops[4] = {
5031         { .fniv = gen_uqsub_vec,
5032           .fno = gen_helper_gvec_uqsub_b,
5033           .opt_opc = vecop_list,
5034           .write_aofs = true,
5035           .vece = MO_8 },
5036         { .fniv = gen_uqsub_vec,
5037           .fno = gen_helper_gvec_uqsub_h,
5038           .opt_opc = vecop_list,
5039           .write_aofs = true,
5040           .vece = MO_16 },
5041         { .fniv = gen_uqsub_vec,
5042           .fno = gen_helper_gvec_uqsub_s,
5043           .opt_opc = vecop_list,
5044           .write_aofs = true,
5045           .vece = MO_32 },
5046         { .fniv = gen_uqsub_vec,
5047           .fno = gen_helper_gvec_uqsub_d,
5048           .opt_opc = vecop_list,
5049           .write_aofs = true,
5050           .vece = MO_64 },
5051     };
5052     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
5053                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
5054 }
5055
5056 static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
5057                           TCGv_vec a, TCGv_vec b)
5058 {
5059     TCGv_vec x = tcg_temp_new_vec_matching(t);
5060     tcg_gen_sub_vec(vece, x, a, b);
5061     tcg_gen_sssub_vec(vece, t, a, b);
5062     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
5063     tcg_gen_or_vec(vece, sat, sat, x);
5064     tcg_temp_free_vec(x);
5065 }
5066
5067 void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
5068                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
5069 {
5070     static const TCGOpcode vecop_list[] = {
5071         INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
5072     };
5073     static const GVecGen4 ops[4] = {
5074         { .fniv = gen_sqsub_vec,
5075           .fno = gen_helper_gvec_sqsub_b,
5076           .opt_opc = vecop_list,
5077           .write_aofs = true,
5078           .vece = MO_8 },
5079         { .fniv = gen_sqsub_vec,
5080           .fno = gen_helper_gvec_sqsub_h,
5081           .opt_opc = vecop_list,
5082           .write_aofs = true,
5083           .vece = MO_16 },
5084         { .fniv = gen_sqsub_vec,
5085           .fno = gen_helper_gvec_sqsub_s,
5086           .opt_opc = vecop_list,
5087           .write_aofs = true,
5088           .vece = MO_32 },
5089         { .fniv = gen_sqsub_vec,
5090           .fno = gen_helper_gvec_sqsub_d,
5091           .opt_opc = vecop_list,
5092           .write_aofs = true,
5093           .vece = MO_64 },
5094     };
5095     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
5096                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
5097 }
5098
5099 static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
5100 {
5101     TCGv_i32 t = tcg_temp_new_i32();
5102
5103     tcg_gen_sub_i32(t, a, b);
5104     tcg_gen_sub_i32(d, b, a);
5105     tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
5106     tcg_temp_free_i32(t);
5107 }
5108
5109 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
5110 {
5111     TCGv_i64 t = tcg_temp_new_i64();
5112
5113     tcg_gen_sub_i64(t, a, b);
5114     tcg_gen_sub_i64(d, b, a);
5115     tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
5116     tcg_temp_free_i64(t);
5117 }
5118
5119 static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
5120 {
5121     TCGv_vec t = tcg_temp_new_vec_matching(d);
5122
5123     tcg_gen_smin_vec(vece, t, a, b);
5124     tcg_gen_smax_vec(vece, d, a, b);
5125     tcg_gen_sub_vec(vece, d, d, t);
5126     tcg_temp_free_vec(t);
5127 }
5128
5129 void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
5130                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
5131 {
5132     static const TCGOpcode vecop_list[] = {
5133         INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
5134     };
5135     static const GVecGen3 ops[4] = {
5136         { .fniv = gen_sabd_vec,
5137           .fno = gen_helper_gvec_sabd_b,
5138           .opt_opc = vecop_list,
5139           .vece = MO_8 },
5140         { .fniv = gen_sabd_vec,
5141           .fno = gen_helper_gvec_sabd_h,
5142           .opt_opc = vecop_list,
5143           .vece = MO_16 },
5144         { .fni4 = gen_sabd_i32,
5145           .fniv = gen_sabd_vec,
5146           .fno = gen_helper_gvec_sabd_s,
5147           .opt_opc = vecop_list,
5148           .vece = MO_32 },
5149         { .fni8 = gen_sabd_i64,
5150           .fniv = gen_sabd_vec,
5151           .fno = gen_helper_gvec_sabd_d,
5152           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
5153           .opt_opc = vecop_list,
5154           .vece = MO_64 },
5155     };
5156     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
5157 }
5158
5159 static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
5160 {
5161     TCGv_i32 t = tcg_temp_new_i32();
5162
5163     tcg_gen_sub_i32(t, a, b);
5164     tcg_gen_sub_i32(d, b, a);
5165     tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
5166     tcg_temp_free_i32(t);
5167 }
5168
5169 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
5170 {
5171     TCGv_i64 t = tcg_temp_new_i64();
5172
5173     tcg_gen_sub_i64(t, a, b);
5174     tcg_gen_sub_i64(d, b, a);
5175     tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
5176     tcg_temp_free_i64(t);
5177 }
5178
5179 static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
5180 {
5181     TCGv_vec t = tcg_temp_new_vec_matching(d);
5182
5183     tcg_gen_umin_vec(vece, t, a, b);
5184     tcg_gen_umax_vec(vece, d, a, b);
5185     tcg_gen_sub_vec(vece, d, d, t);
5186     tcg_temp_free_vec(t);
5187 }
5188
5189 void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
5190                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
5191 {
5192     static const TCGOpcode vecop_list[] = {
5193         INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
5194     };
5195     static const GVecGen3 ops[4] = {
5196         { .fniv = gen_uabd_vec,
5197           .fno = gen_helper_gvec_uabd_b,
5198           .opt_opc = vecop_list,
5199           .vece = MO_8 },
5200         { .fniv = gen_uabd_vec,
5201           .fno = gen_helper_gvec_uabd_h,
5202           .opt_opc = vecop_list,
5203           .vece = MO_16 },
5204         { .fni4 = gen_uabd_i32,
5205           .fniv = gen_uabd_vec,
5206           .fno = gen_helper_gvec_uabd_s,
5207           .opt_opc = vecop_list,
5208           .vece = MO_32 },
5209         { .fni8 = gen_uabd_i64,
5210           .fniv = gen_uabd_vec,
5211           .fno = gen_helper_gvec_uabd_d,
5212           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
5213           .opt_opc = vecop_list,
5214           .vece = MO_64 },
5215     };
5216     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
5217 }
5218
5219 static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
5220 {
5221     TCGv_i32 t = tcg_temp_new_i32();
5222     gen_sabd_i32(t, a, b);
5223     tcg_gen_add_i32(d, d, t);
5224     tcg_temp_free_i32(t);
5225 }
5226
5227 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
5228 {
5229     TCGv_i64 t = tcg_temp_new_i64();
5230     gen_sabd_i64(t, a, b);
5231     tcg_gen_add_i64(d, d, t);
5232     tcg_temp_free_i64(t);
5233 }
5234
5235 static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
5236 {
5237     TCGv_vec t = tcg_temp_new_vec_matching(d);
5238     gen_sabd_vec(vece, t, a, b);
5239     tcg_gen_add_vec(vece, d, d, t);
5240     tcg_temp_free_vec(t);
5241 }
5242
5243 void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
5244                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
5245 {
5246     static const TCGOpcode vecop_list[] = {
5247         INDEX_op_sub_vec, INDEX_op_add_vec,
5248         INDEX_op_smin_vec, INDEX_op_smax_vec, 0
5249     };
5250     static const GVecGen3 ops[4] = {
5251         { .fniv = gen_saba_vec,
5252           .fno = gen_helper_gvec_saba_b,
5253           .opt_opc = vecop_list,
5254           .load_dest = true,
5255           .vece = MO_8 },
5256         { .fniv = gen_saba_vec,
5257           .fno = gen_helper_gvec_saba_h,
5258           .opt_opc = vecop_list,
5259           .load_dest = true,
5260           .vece = MO_16 },
5261         { .fni4 = gen_saba_i32,
5262           .fniv = gen_saba_vec,
5263           .fno = gen_helper_gvec_saba_s,
5264           .opt_opc = vecop_list,
5265           .load_dest = true,
5266           .vece = MO_32 },
5267         { .fni8 = gen_saba_i64,
5268           .fniv = gen_saba_vec,
5269           .fno = gen_helper_gvec_saba_d,
5270           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
5271           .opt_opc = vecop_list,
5272           .load_dest = true,
5273           .vece = MO_64 },
5274     };
5275     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
5276 }
5277
5278 static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
5279 {
5280     TCGv_i32 t = tcg_temp_new_i32();
5281     gen_uabd_i32(t, a, b);
5282     tcg_gen_add_i32(d, d, t);
5283     tcg_temp_free_i32(t);
5284 }
5285
5286 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
5287 {
5288     TCGv_i64 t = tcg_temp_new_i64();
5289     gen_uabd_i64(t, a, b);
5290     tcg_gen_add_i64(d, d, t);
5291     tcg_temp_free_i64(t);
5292 }
5293
5294 static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
5295 {
5296     TCGv_vec t = tcg_temp_new_vec_matching(d);
5297     gen_uabd_vec(vece, t, a, b);
5298     tcg_gen_add_vec(vece, d, d, t);
5299     tcg_temp_free_vec(t);
5300 }
5301
5302 void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
5303                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
5304 {
5305     static const TCGOpcode vecop_list[] = {
5306         INDEX_op_sub_vec, INDEX_op_add_vec,
5307         INDEX_op_umin_vec, INDEX_op_umax_vec, 0
5308     };
5309     static const GVecGen3 ops[4] = {
5310         { .fniv = gen_uaba_vec,
5311           .fno = gen_helper_gvec_uaba_b,
5312           .opt_opc = vecop_list,
5313           .load_dest = true,
5314           .vece = MO_8 },
5315         { .fniv = gen_uaba_vec,
5316           .fno = gen_helper_gvec_uaba_h,
5317           .opt_opc = vecop_list,
5318           .load_dest = true,
5319           .vece = MO_16 },
5320         { .fni4 = gen_uaba_i32,
5321           .fniv = gen_uaba_vec,
5322           .fno = gen_helper_gvec_uaba_s,
5323           .opt_opc = vecop_list,
5324           .load_dest = true,
5325           .vece = MO_32 },
5326         { .fni8 = gen_uaba_i64,
5327           .fniv = gen_uaba_vec,
5328           .fno = gen_helper_gvec_uaba_d,
5329           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
5330           .opt_opc = vecop_list,
5331           .load_dest = true,
5332           .vece = MO_64 },
5333     };
5334     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
5335 }
5336
5337 /* Translate a NEON data processing instruction.  Return nonzero if the
5338    instruction is invalid.
5339    We process data in a mixture of 32-bit and 64-bit chunks.
5340    Mostly we use 32-bit chunks so we can use normal scalar instructions.  */
5341
5342 static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
5343 {
5344     int op;
5345     int q;
5346     int rd, rn, rm, rd_ofs, rn_ofs, rm_ofs;
5347     int size;
5348     int shift;
5349     int pass;
5350     int count;
5351     int u;
5352     int vec_size;
5353     uint32_t imm;
5354     TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5;
5355     TCGv_ptr ptr1, ptr2;
5356     TCGv_i64 tmp64;
5357
5358     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
5359         return 1;
5360     }
5361
5362     /* FIXME: this access check should not take precedence over UNDEF
5363      * for invalid encodings; we will generate incorrect syndrome information
5364      * for attempts to execute invalid vfp/neon encodings with FP disabled.
5365      */
5366     if (s->fp_excp_el) {
5367         gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
5368                            syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
5369         return 0;
5370     }
5371
5372     if (!s->vfp_enabled)
5373       return 1;
5374     q = (insn & (1 << 6)) != 0;
5375     u = (insn >> 24) & 1;
5376     VFP_DREG_D(rd, insn);
5377     VFP_DREG_N(rn, insn);
5378     VFP_DREG_M(rm, insn);
5379     size = (insn >> 20) & 3;
5380     vec_size = q ? 16 : 8;
5381     rd_ofs = neon_reg_offset(rd, 0);
5382     rn_ofs = neon_reg_offset(rn, 0);
5383     rm_ofs = neon_reg_offset(rm, 0);
5384
5385     if ((insn & (1 << 23)) == 0) {
5386         /* Three register same length.  */
5387         op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1);
5388         /* Catch invalid op and bad size combinations: UNDEF */
5389         if ((neon_3r_sizes[op] & (1 << size)) == 0) {
5390             return 1;
5391         }
5392         /* All insns of this form UNDEF for either this condition or the
5393          * superset of cases "Q==1"; we catch the latter later.
5394          */
5395         if (q && ((rd | rn | rm) & 1)) {
5396             return 1;
5397         }
5398         switch (op) {
5399         case NEON_3R_VFM_VQRDMLSH:
5400             if (!u) {
5401                 /* VFM, VFMS */
5402                 if (size == 1) {
5403                     return 1;
5404                 }
5405                 break;
5406             }
5407             /* VQRDMLSH : handled by decodetree */
5408             return 1;
5409
5410         case NEON_3R_VADD_VSUB:
5411         case NEON_3R_LOGIC:
5412         case NEON_3R_VMAX:
5413         case NEON_3R_VMIN:
5414         case NEON_3R_VTST_VCEQ:
5415         case NEON_3R_VCGT:
5416         case NEON_3R_VCGE:
5417         case NEON_3R_VQADD:
5418         case NEON_3R_VQSUB:
5419         case NEON_3R_VMUL:
5420         case NEON_3R_VML:
5421         case NEON_3R_VSHL:
5422         case NEON_3R_SHA:
5423         case NEON_3R_VHADD:
5424         case NEON_3R_VRHADD:
5425         case NEON_3R_VHSUB:
5426         case NEON_3R_VABD:
5427         case NEON_3R_VABA:
5428         case NEON_3R_VQSHL:
5429         case NEON_3R_VRSHL:
5430         case NEON_3R_VQRSHL:
5431         case NEON_3R_VPMAX:
5432         case NEON_3R_VPMIN:
5433         case NEON_3R_VPADD_VQRDMLAH:
5434         case NEON_3R_VQDMULH_VQRDMULH:
5435         case NEON_3R_FLOAT_ARITH:
5436         case NEON_3R_FLOAT_MULTIPLY:
5437         case NEON_3R_FLOAT_CMP:
5438         case NEON_3R_FLOAT_ACMP:
5439             /* Already handled by decodetree */
5440             return 1;
5441         }
5442
5443         if (size == 3) {
5444             /* 64-bit element instructions: handled by decodetree */
5445             return 1;
5446         }
5447         switch (op) {
5448         case NEON_3R_FLOAT_MINMAX:
5449             if (u) {
5450                 return 1; /* VPMIN/VPMAX handled by decodetree */
5451             }
5452             break;
5453         case NEON_3R_FLOAT_MISC:
5454             /* VMAXNM/VMINNM in ARMv8 */
5455             if (u && !arm_dc_feature(s, ARM_FEATURE_V8)) {
5456                 return 1;
5457             }
5458             break;
5459         case NEON_3R_VFM_VQRDMLSH:
5460             if (!dc_isar_feature(aa32_simdfmac, s)) {
5461                 return 1;
5462             }
5463             break;
5464         default:
5465             break;
5466         }
5467
5468         for (pass = 0; pass < (q ? 4 : 2); pass++) {
5469
5470         /* Elementwise.  */
5471         tmp = neon_load_reg(rn, pass);
5472         tmp2 = neon_load_reg(rm, pass);
5473         switch (op) {
5474         case NEON_3R_FLOAT_MINMAX:
5475         {
5476             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5477             if (size == 0) {
5478                 gen_helper_vfp_maxs(tmp, tmp, tmp2, fpstatus);
5479             } else {
5480                 gen_helper_vfp_mins(tmp, tmp, tmp2, fpstatus);
5481             }
5482             tcg_temp_free_ptr(fpstatus);
5483             break;
5484         }
5485         case NEON_3R_FLOAT_MISC:
5486             if (u) {
5487                 /* VMAXNM/VMINNM */
5488                 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5489                 if (size == 0) {
5490                     gen_helper_vfp_maxnums(tmp, tmp, tmp2, fpstatus);
5491                 } else {
5492                     gen_helper_vfp_minnums(tmp, tmp, tmp2, fpstatus);
5493                 }
5494                 tcg_temp_free_ptr(fpstatus);
5495             } else {
5496                 if (size == 0) {
5497                     gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env);
5498                 } else {
5499                     gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env);
5500               }
5501             }
5502             break;
5503         case NEON_3R_VFM_VQRDMLSH:
5504         {
5505             /* VFMA, VFMS: fused multiply-add */
5506             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5507             TCGv_i32 tmp3 = neon_load_reg(rd, pass);
5508             if (size) {
5509                 /* VFMS */
5510                 gen_helper_vfp_negs(tmp, tmp);
5511             }
5512             gen_helper_vfp_muladds(tmp, tmp, tmp2, tmp3, fpstatus);
5513             tcg_temp_free_i32(tmp3);
5514             tcg_temp_free_ptr(fpstatus);
5515             break;
5516         }
5517         default:
5518             abort();
5519         }
5520         tcg_temp_free_i32(tmp2);
5521
5522         neon_store_reg(rd, pass, tmp);
5523
5524         } /* for pass */
5525         /* End of 3 register same size operations.  */
5526     } else if (insn & (1 << 4)) {
5527         if ((insn & 0x00380080) != 0) {
5528             /* Two registers and shift.  */
5529             op = (insn >> 8) & 0xf;
5530             if (insn & (1 << 7)) {
5531                 /* 64-bit shift. */
5532                 if (op > 7) {
5533                     return 1;
5534                 }
5535                 size = 3;
5536             } else {
5537                 size = 2;
5538                 while ((insn & (1 << (size + 19))) == 0)
5539                     size--;
5540             }
5541             shift = (insn >> 16) & ((1 << (3 + size)) - 1);
5542             if (op < 8) {
5543                 /* Shift by immediate:
5544                    VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU.  */
5545                 if (q && ((rd | rm) & 1)) {
5546                     return 1;
5547                 }
5548                 if (!u && (op == 4 || op == 6)) {
5549                     return 1;
5550                 }
5551                 /* Right shifts are encoded as N - shift, where N is the
5552                    element size in bits.  */
5553                 if (op <= 4) {
5554                     shift = shift - (1 << (size + 3));
5555                 }
5556
5557                 switch (op) {
5558                 case 0:  /* VSHR */
5559                     /* Right shift comes here negative.  */
5560                     shift = -shift;
5561                     /* Shifts larger than the element size are architecturally
5562                      * valid.  Unsigned results in all zeros; signed results
5563                      * in all sign bits.
5564                      */
5565                     if (!u) {
5566                         tcg_gen_gvec_sari(size, rd_ofs, rm_ofs,
5567                                           MIN(shift, (8 << size) - 1),
5568                                           vec_size, vec_size);
5569                     } else if (shift >= 8 << size) {
5570                         tcg_gen_gvec_dup_imm(MO_8, rd_ofs, vec_size,
5571                                              vec_size, 0);
5572                     } else {
5573                         tcg_gen_gvec_shri(size, rd_ofs, rm_ofs, shift,
5574                                           vec_size, vec_size);
5575                     }
5576                     return 0;
5577
5578                 case 1:  /* VSRA */
5579                     /* Right shift comes here negative.  */
5580                     shift = -shift;
5581                     if (u) {
5582                         gen_gvec_usra(size, rd_ofs, rm_ofs, shift,
5583                                       vec_size, vec_size);
5584                     } else {
5585                         gen_gvec_ssra(size, rd_ofs, rm_ofs, shift,
5586                                       vec_size, vec_size);
5587                     }
5588                     return 0;
5589
5590                 case 2: /* VRSHR */
5591                     /* Right shift comes here negative.  */
5592                     shift = -shift;
5593                     if (u) {
5594                         gen_gvec_urshr(size, rd_ofs, rm_ofs, shift,
5595                                        vec_size, vec_size);
5596                     } else {
5597                         gen_gvec_srshr(size, rd_ofs, rm_ofs, shift,
5598                                        vec_size, vec_size);
5599                     }
5600                     return 0;
5601
5602                 case 3: /* VRSRA */
5603                     /* Right shift comes here negative.  */
5604                     shift = -shift;
5605                     if (u) {
5606                         gen_gvec_ursra(size, rd_ofs, rm_ofs, shift,
5607                                        vec_size, vec_size);
5608                     } else {
5609                         gen_gvec_srsra(size, rd_ofs, rm_ofs, shift,
5610                                        vec_size, vec_size);
5611                     }
5612                     return 0;
5613
5614                 case 4: /* VSRI */
5615                     if (!u) {
5616                         return 1;
5617                     }
5618                     /* Right shift comes here negative.  */
5619                     shift = -shift;
5620                     gen_gvec_sri(size, rd_ofs, rm_ofs, shift,
5621                                  vec_size, vec_size);
5622                     return 0;
5623
5624                 case 5: /* VSHL, VSLI */
5625                     if (u) { /* VSLI */
5626                         gen_gvec_sli(size, rd_ofs, rm_ofs, shift,
5627                                      vec_size, vec_size);
5628                     } else { /* VSHL */
5629                         tcg_gen_gvec_shli(size, rd_ofs, rm_ofs, shift,
5630                                           vec_size, vec_size);
5631                     }
5632                     return 0;
5633                 }
5634
5635                 if (size == 3) {
5636                     count = q + 1;
5637                 } else {
5638                     count = q ? 4: 2;
5639                 }
5640
5641                 /* To avoid excessive duplication of ops we implement shift
5642                  * by immediate using the variable shift operations.
5643                   */
5644                 imm = dup_const(size, shift);
5645
5646                 for (pass = 0; pass < count; pass++) {
5647                     if (size == 3) {
5648                         neon_load_reg64(cpu_V0, rm + pass);
5649                         tcg_gen_movi_i64(cpu_V1, imm);
5650                         switch (op) {
5651                         case 6: /* VQSHLU */
5652                             gen_helper_neon_qshlu_s64(cpu_V0, cpu_env,
5653                                                       cpu_V0, cpu_V1);
5654                             break;
5655                         case 7: /* VQSHL */
5656                             if (u) {
5657                                 gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
5658                                                          cpu_V0, cpu_V1);
5659                             } else {
5660                                 gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
5661                                                          cpu_V0, cpu_V1);
5662                             }
5663                             break;
5664                         default:
5665                             g_assert_not_reached();
5666                         }
5667                         neon_store_reg64(cpu_V0, rd + pass);
5668                     } else { /* size < 3 */
5669                         /* Operands in T0 and T1.  */
5670                         tmp = neon_load_reg(rm, pass);
5671                         tmp2 = tcg_temp_new_i32();
5672                         tcg_gen_movi_i32(tmp2, imm);
5673                         switch (op) {
5674                         case 6: /* VQSHLU */
5675                             switch (size) {
5676                             case 0:
5677                                 gen_helper_neon_qshlu_s8(tmp, cpu_env,
5678                                                          tmp, tmp2);
5679                                 break;
5680                             case 1:
5681                                 gen_helper_neon_qshlu_s16(tmp, cpu_env,
5682                                                           tmp, tmp2);
5683                                 break;
5684                             case 2:
5685                                 gen_helper_neon_qshlu_s32(tmp, cpu_env,
5686                                                           tmp, tmp2);
5687                                 break;
5688                             default:
5689                                 abort();
5690                             }
5691                             break;
5692                         case 7: /* VQSHL */
5693                             GEN_NEON_INTEGER_OP_ENV(qshl);
5694                             break;
5695                         default:
5696                             g_assert_not_reached();
5697                         }
5698                         tcg_temp_free_i32(tmp2);
5699                         neon_store_reg(rd, pass, tmp);
5700                     }
5701                 } /* for pass */
5702             } else if (op < 10) {
5703                 /* Shift by immediate and narrow:
5704                    VSHRN, VRSHRN, VQSHRN, VQRSHRN.  */
5705                 int input_unsigned = (op == 8) ? !u : u;
5706                 if (rm & 1) {
5707                     return 1;
5708                 }
5709                 shift = shift - (1 << (size + 3));
5710                 size++;
5711                 if (size == 3) {
5712                     tmp64 = tcg_const_i64(shift);
5713                     neon_load_reg64(cpu_V0, rm);
5714                     neon_load_reg64(cpu_V1, rm + 1);
5715                     for (pass = 0; pass < 2; pass++) {
5716                         TCGv_i64 in;
5717                         if (pass == 0) {
5718                             in = cpu_V0;
5719                         } else {
5720                             in = cpu_V1;
5721                         }
5722                         if (q) {
5723                             if (input_unsigned) {
5724                                 gen_helper_neon_rshl_u64(cpu_V0, in, tmp64);
5725                             } else {
5726                                 gen_helper_neon_rshl_s64(cpu_V0, in, tmp64);
5727                             }
5728                         } else {
5729                             if (input_unsigned) {
5730                                 gen_ushl_i64(cpu_V0, in, tmp64);
5731                             } else {
5732                                 gen_sshl_i64(cpu_V0, in, tmp64);
5733                             }
5734                         }
5735                         tmp = tcg_temp_new_i32();
5736                         gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
5737                         neon_store_reg(rd, pass, tmp);
5738                     } /* for pass */
5739                     tcg_temp_free_i64(tmp64);
5740                 } else {
5741                     if (size == 1) {
5742                         imm = (uint16_t)shift;
5743                         imm |= imm << 16;
5744                     } else {
5745                         /* size == 2 */
5746                         imm = (uint32_t)shift;
5747                     }
5748                     tmp2 = tcg_const_i32(imm);
5749                     tmp4 = neon_load_reg(rm + 1, 0);
5750                     tmp5 = neon_load_reg(rm + 1, 1);
5751                     for (pass = 0; pass < 2; pass++) {
5752                         if (pass == 0) {
5753                             tmp = neon_load_reg(rm, 0);
5754                         } else {
5755                             tmp = tmp4;
5756                         }
5757                         gen_neon_shift_narrow(size, tmp, tmp2, q,
5758                                               input_unsigned);
5759                         if (pass == 0) {
5760                             tmp3 = neon_load_reg(rm, 1);
5761                         } else {
5762                             tmp3 = tmp5;
5763                         }
5764                         gen_neon_shift_narrow(size, tmp3, tmp2, q,
5765                                               input_unsigned);
5766                         tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3);
5767                         tcg_temp_free_i32(tmp);
5768                         tcg_temp_free_i32(tmp3);
5769                         tmp = tcg_temp_new_i32();
5770                         gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
5771                         neon_store_reg(rd, pass, tmp);
5772                     } /* for pass */
5773                     tcg_temp_free_i32(tmp2);
5774                 }
5775             } else if (op == 10) {
5776                 /* VSHLL, VMOVL */
5777                 if (q || (rd & 1)) {
5778                     return 1;
5779                 }
5780                 tmp = neon_load_reg(rm, 0);
5781                 tmp2 = neon_load_reg(rm, 1);
5782                 for (pass = 0; pass < 2; pass++) {
5783                     if (pass == 1)
5784                         tmp = tmp2;
5785
5786                     gen_neon_widen(cpu_V0, tmp, size, u);
5787
5788                     if (shift != 0) {
5789                         /* The shift is less than the width of the source
5790                            type, so we can just shift the whole register.  */
5791                         tcg_gen_shli_i64(cpu_V0, cpu_V0, shift);
5792                         /* Widen the result of shift: we need to clear
5793                          * the potential overflow bits resulting from
5794                          * left bits of the narrow input appearing as
5795                          * right bits of left the neighbour narrow
5796                          * input.  */
5797                         if (size < 2 || !u) {
5798                             uint64_t imm64;
5799                             if (size == 0) {
5800                                 imm = (0xffu >> (8 - shift));
5801                                 imm |= imm << 16;
5802                             } else if (size == 1) {
5803                                 imm = 0xffff >> (16 - shift);
5804                             } else {
5805                                 /* size == 2 */
5806                                 imm = 0xffffffff >> (32 - shift);
5807                             }
5808                             if (size < 2) {
5809                                 imm64 = imm | (((uint64_t)imm) << 32);
5810                             } else {
5811                                 imm64 = imm;
5812                             }
5813                             tcg_gen_andi_i64(cpu_V0, cpu_V0, ~imm64);
5814                         }
5815                     }
5816                     neon_store_reg64(cpu_V0, rd + pass);
5817                 }
5818             } else if (op >= 14) {
5819                 /* VCVT fixed-point.  */
5820                 TCGv_ptr fpst;
5821                 TCGv_i32 shiftv;
5822                 VFPGenFixPointFn *fn;
5823
5824                 if (!(insn & (1 << 21)) || (q && ((rd | rm) & 1))) {
5825                     return 1;
5826                 }
5827
5828                 if (!(op & 1)) {
5829                     if (u) {
5830                         fn = gen_helper_vfp_ultos;
5831                     } else {
5832                         fn = gen_helper_vfp_sltos;
5833                     }
5834                 } else {
5835                     if (u) {
5836                         fn = gen_helper_vfp_touls_round_to_zero;
5837                     } else {
5838                         fn = gen_helper_vfp_tosls_round_to_zero;
5839                     }
5840                 }
5841
5842                 /* We have already masked out the must-be-1 top bit of imm6,
5843                  * hence this 32-shift where the ARM ARM has 64-imm6.
5844                  */
5845                 shift = 32 - shift;
5846                 fpst = get_fpstatus_ptr(1);
5847                 shiftv = tcg_const_i32(shift);
5848                 for (pass = 0; pass < (q ? 4 : 2); pass++) {
5849                     TCGv_i32 tmpf = neon_load_reg(rm, pass);
5850                     fn(tmpf, tmpf, shiftv, fpst);
5851                     neon_store_reg(rd, pass, tmpf);
5852                 }
5853                 tcg_temp_free_ptr(fpst);
5854                 tcg_temp_free_i32(shiftv);
5855             } else {
5856                 return 1;
5857             }
5858         } else { /* (insn & 0x00380080) == 0 */
5859             int invert, reg_ofs, vec_size;
5860
5861             if (q && (rd & 1)) {
5862                 return 1;
5863             }
5864
5865             op = (insn >> 8) & 0xf;
5866             /* One register and immediate.  */
5867             imm = (u << 7) | ((insn >> 12) & 0x70) | (insn & 0xf);
5868             invert = (insn & (1 << 5)) != 0;
5869             /* Note that op = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE.
5870              * We choose to not special-case this and will behave as if a
5871              * valid constant encoding of 0 had been given.
5872              */
5873             switch (op) {
5874             case 0: case 1:
5875                 /* no-op */
5876                 break;
5877             case 2: case 3:
5878                 imm <<= 8;
5879                 break;
5880             case 4: case 5:
5881                 imm <<= 16;
5882                 break;
5883             case 6: case 7:
5884                 imm <<= 24;
5885                 break;
5886             case 8: case 9:
5887                 imm |= imm << 16;
5888                 break;
5889             case 10: case 11:
5890                 imm = (imm << 8) | (imm << 24);
5891                 break;
5892             case 12:
5893                 imm = (imm << 8) | 0xff;
5894                 break;
5895             case 13:
5896                 imm = (imm << 16) | 0xffff;
5897                 break;
5898             case 14:
5899                 imm |= (imm << 8) | (imm << 16) | (imm << 24);
5900                 if (invert) {
5901                     imm = ~imm;
5902                 }
5903                 break;
5904             case 15:
5905                 if (invert) {
5906                     return 1;
5907                 }
5908                 imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
5909                       | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
5910                 break;
5911             }
5912             if (invert) {
5913                 imm = ~imm;
5914             }
5915
5916             reg_ofs = neon_reg_offset(rd, 0);
5917             vec_size = q ? 16 : 8;
5918
5919             if (op & 1 && op < 12) {
5920                 if (invert) {
5921                     /* The immediate value has already been inverted,
5922                      * so BIC becomes AND.
5923                      */
5924                     tcg_gen_gvec_andi(MO_32, reg_ofs, reg_ofs, imm,
5925                                       vec_size, vec_size);
5926                 } else {
5927                     tcg_gen_gvec_ori(MO_32, reg_ofs, reg_ofs, imm,
5928                                      vec_size, vec_size);
5929                 }
5930             } else {
5931                 /* VMOV, VMVN.  */
5932                 if (op == 14 && invert) {
5933                     TCGv_i64 t64 = tcg_temp_new_i64();
5934
5935                     for (pass = 0; pass <= q; ++pass) {
5936                         uint64_t val = 0;
5937                         int n;
5938
5939                         for (n = 0; n < 8; n++) {
5940                             if (imm & (1 << (n + pass * 8))) {
5941                                 val |= 0xffull << (n * 8);
5942                             }
5943                         }
5944                         tcg_gen_movi_i64(t64, val);
5945                         neon_store_reg64(t64, rd + pass);
5946                     }
5947                     tcg_temp_free_i64(t64);
5948                 } else {
5949                     tcg_gen_gvec_dup_imm(MO_32, reg_ofs, vec_size,
5950                                          vec_size, imm);
5951                 }
5952             }
5953         }
5954     } else { /* (insn & 0x00800010 == 0x00800000) */
5955         if (size != 3) {
5956             op = (insn >> 8) & 0xf;
5957             if ((insn & (1 << 6)) == 0) {
5958                 /* Three registers of different lengths.  */
5959                 int src1_wide;
5960                 int src2_wide;
5961                 int prewiden;
5962                 /* undefreq: bit 0 : UNDEF if size == 0
5963                  *           bit 1 : UNDEF if size == 1
5964                  *           bit 2 : UNDEF if size == 2
5965                  *           bit 3 : UNDEF if U == 1
5966                  * Note that [2:0] set implies 'always UNDEF'
5967                  */
5968                 int undefreq;
5969                 /* prewiden, src1_wide, src2_wide, undefreq */
5970                 static const int neon_3reg_wide[16][4] = {
5971                     {1, 0, 0, 0}, /* VADDL */
5972                     {1, 1, 0, 0}, /* VADDW */
5973                     {1, 0, 0, 0}, /* VSUBL */
5974                     {1, 1, 0, 0}, /* VSUBW */
5975                     {0, 1, 1, 0}, /* VADDHN */
5976                     {0, 0, 0, 0}, /* VABAL */
5977                     {0, 1, 1, 0}, /* VSUBHN */
5978                     {0, 0, 0, 0}, /* VABDL */
5979                     {0, 0, 0, 0}, /* VMLAL */
5980                     {0, 0, 0, 9}, /* VQDMLAL */
5981                     {0, 0, 0, 0}, /* VMLSL */
5982                     {0, 0, 0, 9}, /* VQDMLSL */
5983                     {0, 0, 0, 0}, /* Integer VMULL */
5984                     {0, 0, 0, 9}, /* VQDMULL */
5985                     {0, 0, 0, 0xa}, /* Polynomial VMULL */
5986                     {0, 0, 0, 7}, /* Reserved: always UNDEF */
5987                 };
5988
5989                 prewiden = neon_3reg_wide[op][0];
5990                 src1_wide = neon_3reg_wide[op][1];
5991                 src2_wide = neon_3reg_wide[op][2];
5992                 undefreq = neon_3reg_wide[op][3];
5993
5994                 if ((undefreq & (1 << size)) ||
5995                     ((undefreq & 8) && u)) {
5996                     return 1;
5997                 }
5998                 if ((src1_wide && (rn & 1)) ||
5999                     (src2_wide && (rm & 1)) ||
6000                     (!src2_wide && (rd & 1))) {
6001                     return 1;
6002                 }
6003
6004                 /* Handle polynomial VMULL in a single pass.  */
6005                 if (op == 14) {
6006                     if (size == 0) {
6007                         /* VMULL.P8 */
6008                         tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16,
6009                                            0, gen_helper_neon_pmull_h);
6010                     } else {
6011                         /* VMULL.P64 */
6012                         if (!dc_isar_feature(aa32_pmull, s)) {
6013                             return 1;
6014                         }
6015                         tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16,
6016                                            0, gen_helper_gvec_pmull_q);
6017                     }
6018                     return 0;
6019                 }
6020
6021                 /* Avoid overlapping operands.  Wide source operands are
6022                    always aligned so will never overlap with wide
6023                    destinations in problematic ways.  */
6024                 if (rd == rm && !src2_wide) {
6025                     tmp = neon_load_reg(rm, 1);
6026                     neon_store_scratch(2, tmp);
6027                 } else if (rd == rn && !src1_wide) {
6028                     tmp = neon_load_reg(rn, 1);
6029                     neon_store_scratch(2, tmp);
6030                 }
6031                 tmp3 = NULL;
6032                 for (pass = 0; pass < 2; pass++) {
6033                     if (src1_wide) {
6034                         neon_load_reg64(cpu_V0, rn + pass);
6035                         tmp = NULL;
6036                     } else {
6037                         if (pass == 1 && rd == rn) {
6038                             tmp = neon_load_scratch(2);
6039                         } else {
6040                             tmp = neon_load_reg(rn, pass);
6041                         }
6042                         if (prewiden) {
6043                             gen_neon_widen(cpu_V0, tmp, size, u);
6044                         }
6045                     }
6046                     if (src2_wide) {
6047                         neon_load_reg64(cpu_V1, rm + pass);
6048                         tmp2 = NULL;
6049                     } else {
6050                         if (pass == 1 && rd == rm) {
6051                             tmp2 = neon_load_scratch(2);
6052                         } else {
6053                             tmp2 = neon_load_reg(rm, pass);
6054                         }
6055                         if (prewiden) {
6056                             gen_neon_widen(cpu_V1, tmp2, size, u);
6057                         }
6058                     }
6059                     switch (op) {
6060                     case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */
6061                         gen_neon_addl(size);
6062                         break;
6063                     case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHN, VRSUBHN */
6064                         gen_neon_subl(size);
6065                         break;
6066                     case 5: case 7: /* VABAL, VABDL */
6067                         switch ((size << 1) | u) {
6068                         case 0:
6069                             gen_helper_neon_abdl_s16(cpu_V0, tmp, tmp2);
6070                             break;
6071                         case 1:
6072                             gen_helper_neon_abdl_u16(cpu_V0, tmp, tmp2);
6073                             break;
6074                         case 2:
6075                             gen_helper_neon_abdl_s32(cpu_V0, tmp, tmp2);
6076                             break;
6077                         case 3:
6078                             gen_helper_neon_abdl_u32(cpu_V0, tmp, tmp2);
6079                             break;
6080                         case 4:
6081                             gen_helper_neon_abdl_s64(cpu_V0, tmp, tmp2);
6082                             break;
6083                         case 5:
6084                             gen_helper_neon_abdl_u64(cpu_V0, tmp, tmp2);
6085                             break;
6086                         default: abort();
6087                         }
6088                         tcg_temp_free_i32(tmp2);
6089                         tcg_temp_free_i32(tmp);
6090                         break;
6091                     case 8: case 9: case 10: case 11: case 12: case 13:
6092                         /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
6093                         gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
6094                         break;
6095                     default: /* 15 is RESERVED: caught earlier  */
6096                         abort();
6097                     }
6098                     if (op == 13) {
6099                         /* VQDMULL */
6100                         gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6101                         neon_store_reg64(cpu_V0, rd + pass);
6102                     } else if (op == 5 || (op >= 8 && op <= 11)) {
6103                         /* Accumulate.  */
6104                         neon_load_reg64(cpu_V1, rd + pass);
6105                         switch (op) {
6106                         case 10: /* VMLSL */
6107                             gen_neon_negl(cpu_V0, size);
6108                             /* Fall through */
6109                         case 5: case 8: /* VABAL, VMLAL */
6110                             gen_neon_addl(size);
6111                             break;
6112                         case 9: case 11: /* VQDMLAL, VQDMLSL */
6113                             gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6114                             if (op == 11) {
6115                                 gen_neon_negl(cpu_V0, size);
6116                             }
6117                             gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
6118                             break;
6119                         default:
6120                             abort();
6121                         }
6122                         neon_store_reg64(cpu_V0, rd + pass);
6123                     } else if (op == 4 || op == 6) {
6124                         /* Narrowing operation.  */
6125                         tmp = tcg_temp_new_i32();
6126                         if (!u) {
6127                             switch (size) {
6128                             case 0:
6129                                 gen_helper_neon_narrow_high_u8(tmp, cpu_V0);
6130                                 break;
6131                             case 1:
6132                                 gen_helper_neon_narrow_high_u16(tmp, cpu_V0);
6133                                 break;
6134                             case 2:
6135                                 tcg_gen_extrh_i64_i32(tmp, cpu_V0);
6136                                 break;
6137                             default: abort();
6138                             }
6139                         } else {
6140                             switch (size) {
6141                             case 0:
6142                                 gen_helper_neon_narrow_round_high_u8(tmp, cpu_V0);
6143                                 break;
6144                             case 1:
6145                                 gen_helper_neon_narrow_round_high_u16(tmp, cpu_V0);
6146                                 break;
6147                             case 2:
6148                                 tcg_gen_addi_i64(cpu_V0, cpu_V0, 1u << 31);
6149                                 tcg_gen_extrh_i64_i32(tmp, cpu_V0);
6150                                 break;
6151                             default: abort();
6152                             }
6153                         }
6154                         if (pass == 0) {
6155                             tmp3 = tmp;
6156                         } else {
6157                             neon_store_reg(rd, 0, tmp3);
6158                             neon_store_reg(rd, 1, tmp);
6159                         }
6160                     } else {
6161                         /* Write back the result.  */
6162                         neon_store_reg64(cpu_V0, rd + pass);
6163                     }
6164                 }
6165             } else {
6166                 /* Two registers and a scalar. NB that for ops of this form
6167                  * the ARM ARM labels bit 24 as Q, but it is in our variable
6168                  * 'u', not 'q'.
6169                  */
6170                 if (size == 0) {
6171                     return 1;
6172                 }
6173                 switch (op) {
6174                 case 1: /* Float VMLA scalar */
6175                 case 5: /* Floating point VMLS scalar */
6176                 case 9: /* Floating point VMUL scalar */
6177                     if (size == 1) {
6178                         return 1;
6179                     }
6180                     /* fall through */
6181                 case 0: /* Integer VMLA scalar */
6182                 case 4: /* Integer VMLS scalar */
6183                 case 8: /* Integer VMUL scalar */
6184                 case 12: /* VQDMULH scalar */
6185                 case 13: /* VQRDMULH scalar */
6186                     if (u && ((rd | rn) & 1)) {
6187                         return 1;
6188                     }
6189                     tmp = neon_get_scalar(size, rm);
6190                     neon_store_scratch(0, tmp);
6191                     for (pass = 0; pass < (u ? 4 : 2); pass++) {
6192                         tmp = neon_load_scratch(0);
6193                         tmp2 = neon_load_reg(rn, pass);
6194                         if (op == 12) {
6195                             if (size == 1) {
6196                                 gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
6197                             } else {
6198                                 gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
6199                             }
6200                         } else if (op == 13) {
6201                             if (size == 1) {
6202                                 gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
6203                             } else {
6204                                 gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
6205                             }
6206                         } else if (op & 1) {
6207                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6208                             gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
6209                             tcg_temp_free_ptr(fpstatus);
6210                         } else {
6211                             switch (size) {
6212                             case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
6213                             case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
6214                             case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
6215                             default: abort();
6216                             }
6217                         }
6218                         tcg_temp_free_i32(tmp2);
6219                         if (op < 8) {
6220                             /* Accumulate.  */
6221                             tmp2 = neon_load_reg(rd, pass);
6222                             switch (op) {
6223                             case 0:
6224                                 gen_neon_add(size, tmp, tmp2);
6225                                 break;
6226                             case 1:
6227                             {
6228                                 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6229                                 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
6230                                 tcg_temp_free_ptr(fpstatus);
6231                                 break;
6232                             }
6233                             case 4:
6234                                 gen_neon_rsb(size, tmp, tmp2);
6235                                 break;
6236                             case 5:
6237                             {
6238                                 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6239                                 gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
6240                                 tcg_temp_free_ptr(fpstatus);
6241                                 break;
6242                             }
6243                             default:
6244                                 abort();
6245                             }
6246                             tcg_temp_free_i32(tmp2);
6247                         }
6248                         neon_store_reg(rd, pass, tmp);
6249                     }
6250                     break;
6251                 case 3: /* VQDMLAL scalar */
6252                 case 7: /* VQDMLSL scalar */
6253                 case 11: /* VQDMULL scalar */
6254                     if (u == 1) {
6255                         return 1;
6256                     }
6257                     /* fall through */
6258                 case 2: /* VMLAL sclar */
6259                 case 6: /* VMLSL scalar */
6260                 case 10: /* VMULL scalar */
6261                     if (rd & 1) {
6262                         return 1;
6263                     }
6264                     tmp2 = neon_get_scalar(size, rm);
6265                     /* We need a copy of tmp2 because gen_neon_mull
6266                      * deletes it during pass 0.  */
6267                     tmp4 = tcg_temp_new_i32();
6268                     tcg_gen_mov_i32(tmp4, tmp2);
6269                     tmp3 = neon_load_reg(rn, 1);
6270
6271                     for (pass = 0; pass < 2; pass++) {
6272                         if (pass == 0) {
6273                             tmp = neon_load_reg(rn, 0);
6274                         } else {
6275                             tmp = tmp3;
6276                             tmp2 = tmp4;
6277                         }
6278                         gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
6279                         if (op != 11) {
6280                             neon_load_reg64(cpu_V1, rd + pass);
6281                         }
6282                         switch (op) {
6283                         case 6:
6284                             gen_neon_negl(cpu_V0, size);
6285                             /* Fall through */
6286                         case 2:
6287                             gen_neon_addl(size);
6288                             break;
6289                         case 3: case 7:
6290                             gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6291                             if (op == 7) {
6292                                 gen_neon_negl(cpu_V0, size);
6293                             }
6294                             gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
6295                             break;
6296                         case 10:
6297                             /* no-op */
6298                             break;
6299                         case 11:
6300                             gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6301                             break;
6302                         default:
6303                             abort();
6304                         }
6305                         neon_store_reg64(cpu_V0, rd + pass);
6306                     }
6307                     break;
6308                 case 14: /* VQRDMLAH scalar */
6309                 case 15: /* VQRDMLSH scalar */
6310                     {
6311                         NeonGenThreeOpEnvFn *fn;
6312
6313                         if (!dc_isar_feature(aa32_rdm, s)) {
6314                             return 1;
6315                         }
6316                         if (u && ((rd | rn) & 1)) {
6317                             return 1;
6318                         }
6319                         if (op == 14) {
6320                             if (size == 1) {
6321                                 fn = gen_helper_neon_qrdmlah_s16;
6322                             } else {
6323                                 fn = gen_helper_neon_qrdmlah_s32;
6324                             }
6325                         } else {
6326                             if (size == 1) {
6327                                 fn = gen_helper_neon_qrdmlsh_s16;
6328                             } else {
6329                                 fn = gen_helper_neon_qrdmlsh_s32;
6330                             }
6331                         }
6332
6333                         tmp2 = neon_get_scalar(size, rm);
6334                         for (pass = 0; pass < (u ? 4 : 2); pass++) {
6335                             tmp = neon_load_reg(rn, pass);
6336                             tmp3 = neon_load_reg(rd, pass);
6337                             fn(tmp, cpu_env, tmp, tmp2, tmp3);
6338                             tcg_temp_free_i32(tmp3);
6339                             neon_store_reg(rd, pass, tmp);
6340                         }
6341                         tcg_temp_free_i32(tmp2);
6342                     }
6343                     break;
6344                 default:
6345                     g_assert_not_reached();
6346                 }
6347             }
6348         } else { /* size == 3 */
6349             if (!u) {
6350                 /* Extract.  */
6351                 imm = (insn >> 8) & 0xf;
6352
6353                 if (imm > 7 && !q)
6354                     return 1;
6355
6356                 if (q && ((rd | rn | rm) & 1)) {
6357                     return 1;
6358                 }
6359
6360                 if (imm == 0) {
6361                     neon_load_reg64(cpu_V0, rn);
6362                     if (q) {
6363                         neon_load_reg64(cpu_V1, rn + 1);
6364                     }
6365                 } else if (imm == 8) {
6366                     neon_load_reg64(cpu_V0, rn + 1);
6367                     if (q) {
6368                         neon_load_reg64(cpu_V1, rm);
6369                     }
6370                 } else if (q) {
6371                     tmp64 = tcg_temp_new_i64();
6372                     if (imm < 8) {
6373                         neon_load_reg64(cpu_V0, rn);
6374                         neon_load_reg64(tmp64, rn + 1);
6375                     } else {
6376                         neon_load_reg64(cpu_V0, rn + 1);
6377                         neon_load_reg64(tmp64, rm);
6378                     }
6379                     tcg_gen_shri_i64(cpu_V0, cpu_V0, (imm & 7) * 8);
6380                     tcg_gen_shli_i64(cpu_V1, tmp64, 64 - ((imm & 7) * 8));
6381                     tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
6382                     if (imm < 8) {
6383                         neon_load_reg64(cpu_V1, rm);
6384                     } else {
6385                         neon_load_reg64(cpu_V1, rm + 1);
6386                         imm -= 8;
6387                     }
6388                     tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
6389                     tcg_gen_shri_i64(tmp64, tmp64, imm * 8);
6390                     tcg_gen_or_i64(cpu_V1, cpu_V1, tmp64);
6391                     tcg_temp_free_i64(tmp64);
6392                 } else {
6393                     /* BUGFIX */
6394                     neon_load_reg64(cpu_V0, rn);
6395                     tcg_gen_shri_i64(cpu_V0, cpu_V0, imm * 8);
6396                     neon_load_reg64(cpu_V1, rm);
6397                     tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
6398                     tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
6399                 }
6400                 neon_store_reg64(cpu_V0, rd);
6401                 if (q) {
6402                     neon_store_reg64(cpu_V1, rd + 1);
6403                 }
6404             } else if ((insn & (1 << 11)) == 0) {
6405                 /* Two register misc.  */
6406                 op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf);
6407                 size = (insn >> 18) & 3;
6408                 /* UNDEF for unknown op values and bad op-size combinations */
6409                 if ((neon_2rm_sizes[op] & (1 << size)) == 0) {
6410                     return 1;
6411                 }
6412                 if (neon_2rm_is_v8_op(op) &&
6413                     !arm_dc_feature(s, ARM_FEATURE_V8)) {
6414                     return 1;
6415                 }
6416                 if ((op != NEON_2RM_VMOVN && op != NEON_2RM_VQMOVN) &&
6417                     q && ((rm | rd) & 1)) {
6418                     return 1;
6419                 }
6420                 switch (op) {
6421                 case NEON_2RM_VREV64:
6422                     for (pass = 0; pass < (q ? 2 : 1); pass++) {
6423                         tmp = neon_load_reg(rm, pass * 2);
6424                         tmp2 = neon_load_reg(rm, pass * 2 + 1);
6425                         switch (size) {
6426                         case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
6427                         case 1: gen_swap_half(tmp); break;
6428                         case 2: /* no-op */ break;
6429                         default: abort();
6430                         }
6431                         neon_store_reg(rd, pass * 2 + 1, tmp);
6432                         if (size == 2) {
6433                             neon_store_reg(rd, pass * 2, tmp2);
6434                         } else {
6435                             switch (size) {
6436                             case 0: tcg_gen_bswap32_i32(tmp2, tmp2); break;
6437                             case 1: gen_swap_half(tmp2); break;
6438                             default: abort();
6439                             }
6440                             neon_store_reg(rd, pass * 2, tmp2);
6441                         }
6442                     }
6443                     break;
6444                 case NEON_2RM_VPADDL: case NEON_2RM_VPADDL_U:
6445                 case NEON_2RM_VPADAL: case NEON_2RM_VPADAL_U:
6446                     for (pass = 0; pass < q + 1; pass++) {
6447                         tmp = neon_load_reg(rm, pass * 2);
6448                         gen_neon_widen(cpu_V0, tmp, size, op & 1);
6449                         tmp = neon_load_reg(rm, pass * 2 + 1);
6450                         gen_neon_widen(cpu_V1, tmp, size, op & 1);
6451                         switch (size) {
6452                         case 0: gen_helper_neon_paddl_u16(CPU_V001); break;
6453                         case 1: gen_helper_neon_paddl_u32(CPU_V001); break;
6454                         case 2: tcg_gen_add_i64(CPU_V001); break;
6455                         default: abort();
6456                         }
6457                         if (op >= NEON_2RM_VPADAL) {
6458                             /* Accumulate.  */
6459                             neon_load_reg64(cpu_V1, rd + pass);
6460                             gen_neon_addl(size);
6461                         }
6462                         neon_store_reg64(cpu_V0, rd + pass);
6463                     }
6464                     break;
6465                 case NEON_2RM_VTRN:
6466                     if (size == 2) {
6467                         int n;
6468                         for (n = 0; n < (q ? 4 : 2); n += 2) {
6469                             tmp = neon_load_reg(rm, n);
6470                             tmp2 = neon_load_reg(rd, n + 1);
6471                             neon_store_reg(rm, n, tmp2);
6472                             neon_store_reg(rd, n + 1, tmp);
6473                         }
6474                     } else {
6475                         goto elementwise;
6476                     }
6477                     break;
6478                 case NEON_2RM_VUZP:
6479                     if (gen_neon_unzip(rd, rm, size, q)) {
6480                         return 1;
6481                     }
6482                     break;
6483                 case NEON_2RM_VZIP:
6484                     if (gen_neon_zip(rd, rm, size, q)) {
6485                         return 1;
6486                     }
6487                     break;
6488                 case NEON_2RM_VMOVN: case NEON_2RM_VQMOVN:
6489                     /* also VQMOVUN; op field and mnemonics don't line up */
6490                     if (rm & 1) {
6491                         return 1;
6492                     }
6493                     tmp2 = NULL;
6494                     for (pass = 0; pass < 2; pass++) {
6495                         neon_load_reg64(cpu_V0, rm + pass);
6496                         tmp = tcg_temp_new_i32();
6497                         gen_neon_narrow_op(op == NEON_2RM_VMOVN, q, size,
6498                                            tmp, cpu_V0);
6499                         if (pass == 0) {
6500                             tmp2 = tmp;
6501                         } else {
6502                             neon_store_reg(rd, 0, tmp2);
6503                             neon_store_reg(rd, 1, tmp);
6504                         }
6505                     }
6506                     break;
6507                 case NEON_2RM_VSHLL:
6508                     if (q || (rd & 1)) {
6509                         return 1;
6510                     }
6511                     tmp = neon_load_reg(rm, 0);
6512                     tmp2 = neon_load_reg(rm, 1);
6513                     for (pass = 0; pass < 2; pass++) {
6514                         if (pass == 1)
6515                             tmp = tmp2;
6516                         gen_neon_widen(cpu_V0, tmp, size, 1);
6517                         tcg_gen_shli_i64(cpu_V0, cpu_V0, 8 << size);
6518                         neon_store_reg64(cpu_V0, rd + pass);
6519                     }
6520                     break;
6521                 case NEON_2RM_VCVT_F16_F32:
6522                 {
6523                     TCGv_ptr fpst;
6524                     TCGv_i32 ahp;
6525
6526                     if (!dc_isar_feature(aa32_fp16_spconv, s) ||
6527                         q || (rm & 1)) {
6528                         return 1;
6529                     }
6530                     fpst = get_fpstatus_ptr(true);
6531                     ahp = get_ahp_flag();
6532                     tmp = neon_load_reg(rm, 0);
6533                     gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
6534                     tmp2 = neon_load_reg(rm, 1);
6535                     gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
6536                     tcg_gen_shli_i32(tmp2, tmp2, 16);
6537                     tcg_gen_or_i32(tmp2, tmp2, tmp);
6538                     tcg_temp_free_i32(tmp);
6539                     tmp = neon_load_reg(rm, 2);
6540                     gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
6541                     tmp3 = neon_load_reg(rm, 3);
6542                     neon_store_reg(rd, 0, tmp2);
6543                     gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
6544                     tcg_gen_shli_i32(tmp3, tmp3, 16);
6545                     tcg_gen_or_i32(tmp3, tmp3, tmp);
6546                     neon_store_reg(rd, 1, tmp3);
6547                     tcg_temp_free_i32(tmp);
6548                     tcg_temp_free_i32(ahp);
6549                     tcg_temp_free_ptr(fpst);
6550                     break;
6551                 }
6552                 case NEON_2RM_VCVT_F32_F16:
6553                 {
6554                     TCGv_ptr fpst;
6555                     TCGv_i32 ahp;
6556                     if (!dc_isar_feature(aa32_fp16_spconv, s) ||
6557                         q || (rd & 1)) {
6558                         return 1;
6559                     }
6560                     fpst = get_fpstatus_ptr(true);
6561                     ahp = get_ahp_flag();
6562                     tmp3 = tcg_temp_new_i32();
6563                     tmp = neon_load_reg(rm, 0);
6564                     tmp2 = neon_load_reg(rm, 1);
6565                     tcg_gen_ext16u_i32(tmp3, tmp);
6566                     gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
6567                     neon_store_reg(rd, 0, tmp3);
6568                     tcg_gen_shri_i32(tmp, tmp, 16);
6569                     gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
6570                     neon_store_reg(rd, 1, tmp);
6571                     tmp3 = tcg_temp_new_i32();
6572                     tcg_gen_ext16u_i32(tmp3, tmp2);
6573                     gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
6574                     neon_store_reg(rd, 2, tmp3);
6575                     tcg_gen_shri_i32(tmp2, tmp2, 16);
6576                     gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
6577                     neon_store_reg(rd, 3, tmp2);
6578                     tcg_temp_free_i32(ahp);
6579                     tcg_temp_free_ptr(fpst);
6580                     break;
6581                 }
6582                 case NEON_2RM_AESE: case NEON_2RM_AESMC:
6583                     if (!dc_isar_feature(aa32_aes, s) || ((rm | rd) & 1)) {
6584                         return 1;
6585                     }
6586                     ptr1 = vfp_reg_ptr(true, rd);
6587                     ptr2 = vfp_reg_ptr(true, rm);
6588
6589                      /* Bit 6 is the lowest opcode bit; it distinguishes between
6590                       * encryption (AESE/AESMC) and decryption (AESD/AESIMC)
6591                       */
6592                     tmp3 = tcg_const_i32(extract32(insn, 6, 1));
6593
6594                     if (op == NEON_2RM_AESE) {
6595                         gen_helper_crypto_aese(ptr1, ptr2, tmp3);
6596                     } else {
6597                         gen_helper_crypto_aesmc(ptr1, ptr2, tmp3);
6598                     }
6599                     tcg_temp_free_ptr(ptr1);
6600                     tcg_temp_free_ptr(ptr2);
6601                     tcg_temp_free_i32(tmp3);
6602                     break;
6603                 case NEON_2RM_SHA1H:
6604                     if (!dc_isar_feature(aa32_sha1, s) || ((rm | rd) & 1)) {
6605                         return 1;
6606                     }
6607                     ptr1 = vfp_reg_ptr(true, rd);
6608                     ptr2 = vfp_reg_ptr(true, rm);
6609
6610                     gen_helper_crypto_sha1h(ptr1, ptr2);
6611
6612                     tcg_temp_free_ptr(ptr1);
6613                     tcg_temp_free_ptr(ptr2);
6614                     break;
6615                 case NEON_2RM_SHA1SU1:
6616                     if ((rm | rd) & 1) {
6617                             return 1;
6618                     }
6619                     /* bit 6 (q): set -> SHA256SU0, cleared -> SHA1SU1 */
6620                     if (q) {
6621                         if (!dc_isar_feature(aa32_sha2, s)) {
6622                             return 1;
6623                         }
6624                     } else if (!dc_isar_feature(aa32_sha1, s)) {
6625                         return 1;
6626                     }
6627                     ptr1 = vfp_reg_ptr(true, rd);
6628                     ptr2 = vfp_reg_ptr(true, rm);
6629                     if (q) {
6630                         gen_helper_crypto_sha256su0(ptr1, ptr2);
6631                     } else {
6632                         gen_helper_crypto_sha1su1(ptr1, ptr2);
6633                     }
6634                     tcg_temp_free_ptr(ptr1);
6635                     tcg_temp_free_ptr(ptr2);
6636                     break;
6637
6638                 case NEON_2RM_VMVN:
6639                     tcg_gen_gvec_not(0, rd_ofs, rm_ofs, vec_size, vec_size);
6640                     break;
6641                 case NEON_2RM_VNEG:
6642                     tcg_gen_gvec_neg(size, rd_ofs, rm_ofs, vec_size, vec_size);
6643                     break;
6644                 case NEON_2RM_VABS:
6645                     tcg_gen_gvec_abs(size, rd_ofs, rm_ofs, vec_size, vec_size);
6646                     break;
6647
6648                 case NEON_2RM_VCEQ0:
6649                     gen_gvec_ceq0(size, rd_ofs, rm_ofs, vec_size, vec_size);
6650                     break;
6651                 case NEON_2RM_VCGT0:
6652                     gen_gvec_cgt0(size, rd_ofs, rm_ofs, vec_size, vec_size);
6653                     break;
6654                 case NEON_2RM_VCLE0:
6655                     gen_gvec_cle0(size, rd_ofs, rm_ofs, vec_size, vec_size);
6656                     break;
6657                 case NEON_2RM_VCGE0:
6658                     gen_gvec_cge0(size, rd_ofs, rm_ofs, vec_size, vec_size);
6659                     break;
6660                 case NEON_2RM_VCLT0:
6661                     gen_gvec_clt0(size, rd_ofs, rm_ofs, vec_size, vec_size);
6662                     break;
6663
6664                 default:
6665                 elementwise:
6666                     for (pass = 0; pass < (q ? 4 : 2); pass++) {
6667                         tmp = neon_load_reg(rm, pass);
6668                         switch (op) {
6669                         case NEON_2RM_VREV32:
6670                             switch (size) {
6671                             case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
6672                             case 1: gen_swap_half(tmp); break;
6673                             default: abort();
6674                             }
6675                             break;
6676                         case NEON_2RM_VREV16:
6677                             gen_rev16(tmp, tmp);
6678                             break;
6679                         case NEON_2RM_VCLS:
6680                             switch (size) {
6681                             case 0: gen_helper_neon_cls_s8(tmp, tmp); break;
6682                             case 1: gen_helper_neon_cls_s16(tmp, tmp); break;
6683                             case 2: gen_helper_neon_cls_s32(tmp, tmp); break;
6684                             default: abort();
6685                             }
6686                             break;
6687                         case NEON_2RM_VCLZ:
6688                             switch (size) {
6689                             case 0: gen_helper_neon_clz_u8(tmp, tmp); break;
6690                             case 1: gen_helper_neon_clz_u16(tmp, tmp); break;
6691                             case 2: tcg_gen_clzi_i32(tmp, tmp, 32); break;
6692                             default: abort();
6693                             }
6694                             break;
6695                         case NEON_2RM_VCNT:
6696                             gen_helper_neon_cnt_u8(tmp, tmp);
6697                             break;
6698                         case NEON_2RM_VQABS:
6699                             switch (size) {
6700                             case 0:
6701                                 gen_helper_neon_qabs_s8(tmp, cpu_env, tmp);
6702                                 break;
6703                             case 1:
6704                                 gen_helper_neon_qabs_s16(tmp, cpu_env, tmp);
6705                                 break;
6706                             case 2:
6707                                 gen_helper_neon_qabs_s32(tmp, cpu_env, tmp);
6708                                 break;
6709                             default: abort();
6710                             }
6711                             break;
6712                         case NEON_2RM_VQNEG:
6713                             switch (size) {
6714                             case 0:
6715                                 gen_helper_neon_qneg_s8(tmp, cpu_env, tmp);
6716                                 break;
6717                             case 1:
6718                                 gen_helper_neon_qneg_s16(tmp, cpu_env, tmp);
6719                                 break;
6720                             case 2:
6721                                 gen_helper_neon_qneg_s32(tmp, cpu_env, tmp);
6722                                 break;
6723                             default: abort();
6724                             }
6725                             break;
6726                         case NEON_2RM_VCGT0_F:
6727                         {
6728                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6729                             tmp2 = tcg_const_i32(0);
6730                             gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
6731                             tcg_temp_free_i32(tmp2);
6732                             tcg_temp_free_ptr(fpstatus);
6733                             break;
6734                         }
6735                         case NEON_2RM_VCGE0_F:
6736                         {
6737                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6738                             tmp2 = tcg_const_i32(0);
6739                             gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
6740                             tcg_temp_free_i32(tmp2);
6741                             tcg_temp_free_ptr(fpstatus);
6742                             break;
6743                         }
6744                         case NEON_2RM_VCEQ0_F:
6745                         {
6746                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6747                             tmp2 = tcg_const_i32(0);
6748                             gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
6749                             tcg_temp_free_i32(tmp2);
6750                             tcg_temp_free_ptr(fpstatus);
6751                             break;
6752                         }
6753                         case NEON_2RM_VCLE0_F:
6754                         {
6755                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6756                             tmp2 = tcg_const_i32(0);
6757                             gen_helper_neon_cge_f32(tmp, tmp2, tmp, fpstatus);
6758                             tcg_temp_free_i32(tmp2);
6759                             tcg_temp_free_ptr(fpstatus);
6760                             break;
6761                         }
6762                         case NEON_2RM_VCLT0_F:
6763                         {
6764                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6765                             tmp2 = tcg_const_i32(0);
6766                             gen_helper_neon_cgt_f32(tmp, tmp2, tmp, fpstatus);
6767                             tcg_temp_free_i32(tmp2);
6768                             tcg_temp_free_ptr(fpstatus);
6769                             break;
6770                         }
6771                         case NEON_2RM_VABS_F:
6772                             gen_helper_vfp_abss(tmp, tmp);
6773                             break;
6774                         case NEON_2RM_VNEG_F:
6775                             gen_helper_vfp_negs(tmp, tmp);
6776                             break;
6777                         case NEON_2RM_VSWP:
6778                             tmp2 = neon_load_reg(rd, pass);
6779                             neon_store_reg(rm, pass, tmp2);
6780                             break;
6781                         case NEON_2RM_VTRN:
6782                             tmp2 = neon_load_reg(rd, pass);
6783                             switch (size) {
6784                             case 0: gen_neon_trn_u8(tmp, tmp2); break;
6785                             case 1: gen_neon_trn_u16(tmp, tmp2); break;
6786                             default: abort();
6787                             }
6788                             neon_store_reg(rm, pass, tmp2);
6789                             break;
6790                         case NEON_2RM_VRINTN:
6791                         case NEON_2RM_VRINTA:
6792                         case NEON_2RM_VRINTM:
6793                         case NEON_2RM_VRINTP:
6794                         case NEON_2RM_VRINTZ:
6795                         {
6796                             TCGv_i32 tcg_rmode;
6797                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6798                             int rmode;
6799
6800                             if (op == NEON_2RM_VRINTZ) {
6801                                 rmode = FPROUNDING_ZERO;
6802                             } else {
6803                                 rmode = fp_decode_rm[((op & 0x6) >> 1) ^ 1];
6804                             }
6805
6806                             tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
6807                             gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6808                                                       cpu_env);
6809                             gen_helper_rints(tmp, tmp, fpstatus);
6810                             gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6811                                                       cpu_env);
6812                             tcg_temp_free_ptr(fpstatus);
6813                             tcg_temp_free_i32(tcg_rmode);
6814                             break;
6815                         }
6816                         case NEON_2RM_VRINTX:
6817                         {
6818                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6819                             gen_helper_rints_exact(tmp, tmp, fpstatus);
6820                             tcg_temp_free_ptr(fpstatus);
6821                             break;
6822                         }
6823                         case NEON_2RM_VCVTAU:
6824                         case NEON_2RM_VCVTAS:
6825                         case NEON_2RM_VCVTNU:
6826                         case NEON_2RM_VCVTNS:
6827                         case NEON_2RM_VCVTPU:
6828                         case NEON_2RM_VCVTPS:
6829                         case NEON_2RM_VCVTMU:
6830                         case NEON_2RM_VCVTMS:
6831                         {
6832                             bool is_signed = !extract32(insn, 7, 1);
6833                             TCGv_ptr fpst = get_fpstatus_ptr(1);
6834                             TCGv_i32 tcg_rmode, tcg_shift;
6835                             int rmode = fp_decode_rm[extract32(insn, 8, 2)];
6836
6837                             tcg_shift = tcg_const_i32(0);
6838                             tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
6839                             gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6840                                                       cpu_env);
6841
6842                             if (is_signed) {
6843                                 gen_helper_vfp_tosls(tmp, tmp,
6844                                                      tcg_shift, fpst);
6845                             } else {
6846                                 gen_helper_vfp_touls(tmp, tmp,
6847                                                      tcg_shift, fpst);
6848                             }
6849
6850                             gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6851                                                       cpu_env);
6852                             tcg_temp_free_i32(tcg_rmode);
6853                             tcg_temp_free_i32(tcg_shift);
6854                             tcg_temp_free_ptr(fpst);
6855                             break;
6856                         }
6857                         case NEON_2RM_VRECPE:
6858                             gen_helper_recpe_u32(tmp, tmp);
6859                             break;
6860                         case NEON_2RM_VRSQRTE:
6861                             gen_helper_rsqrte_u32(tmp, tmp);
6862                             break;
6863                         case NEON_2RM_VRECPE_F:
6864                         {
6865                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6866                             gen_helper_recpe_f32(tmp, tmp, fpstatus);
6867                             tcg_temp_free_ptr(fpstatus);
6868                             break;
6869                         }
6870                         case NEON_2RM_VRSQRTE_F:
6871                         {
6872                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6873                             gen_helper_rsqrte_f32(tmp, tmp, fpstatus);
6874                             tcg_temp_free_ptr(fpstatus);
6875                             break;
6876                         }
6877                         case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */
6878                         {
6879                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6880                             gen_helper_vfp_sitos(tmp, tmp, fpstatus);
6881                             tcg_temp_free_ptr(fpstatus);
6882                             break;
6883                         }
6884                         case NEON_2RM_VCVT_FU: /* VCVT.F32.U32 */
6885                         {
6886                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6887                             gen_helper_vfp_uitos(tmp, tmp, fpstatus);
6888                             tcg_temp_free_ptr(fpstatus);
6889                             break;
6890                         }
6891                         case NEON_2RM_VCVT_SF: /* VCVT.S32.F32 */
6892                         {
6893                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6894                             gen_helper_vfp_tosizs(tmp, tmp, fpstatus);
6895                             tcg_temp_free_ptr(fpstatus);
6896                             break;
6897                         }
6898                         case NEON_2RM_VCVT_UF: /* VCVT.U32.F32 */
6899                         {
6900                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6901                             gen_helper_vfp_touizs(tmp, tmp, fpstatus);
6902                             tcg_temp_free_ptr(fpstatus);
6903                             break;
6904                         }
6905                         default:
6906                             /* Reserved op values were caught by the
6907                              * neon_2rm_sizes[] check earlier.
6908                              */
6909                             abort();
6910                         }
6911                         neon_store_reg(rd, pass, tmp);
6912                     }
6913                     break;
6914                 }
6915             } else if ((insn & (1 << 10)) == 0) {
6916                 /* VTBL, VTBX.  */
6917                 int n = ((insn >> 8) & 3) + 1;
6918                 if ((rn + n) > 32) {
6919                     /* This is UNPREDICTABLE; we choose to UNDEF to avoid the
6920                      * helper function running off the end of the register file.
6921                      */
6922                     return 1;
6923                 }
6924                 n <<= 3;
6925                 if (insn & (1 << 6)) {
6926                     tmp = neon_load_reg(rd, 0);
6927                 } else {
6928                     tmp = tcg_temp_new_i32();
6929                     tcg_gen_movi_i32(tmp, 0);
6930                 }
6931                 tmp2 = neon_load_reg(rm, 0);
6932                 ptr1 = vfp_reg_ptr(true, rn);
6933                 tmp5 = tcg_const_i32(n);
6934                 gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp5);
6935                 tcg_temp_free_i32(tmp);
6936                 if (insn & (1 << 6)) {
6937                     tmp = neon_load_reg(rd, 1);
6938                 } else {
6939                     tmp = tcg_temp_new_i32();
6940                     tcg_gen_movi_i32(tmp, 0);
6941                 }
6942                 tmp3 = neon_load_reg(rm, 1);
6943                 gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp5);
6944                 tcg_temp_free_i32(tmp5);
6945                 tcg_temp_free_ptr(ptr1);
6946                 neon_store_reg(rd, 0, tmp2);
6947                 neon_store_reg(rd, 1, tmp3);
6948                 tcg_temp_free_i32(tmp);
6949             } else if ((insn & 0x380) == 0) {
6950                 /* VDUP */
6951                 int element;
6952                 MemOp size;
6953
6954                 if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) {
6955                     return 1;
6956                 }
6957                 if (insn & (1 << 16)) {
6958                     size = MO_8;
6959                     element = (insn >> 17) & 7;
6960                 } else if (insn & (1 << 17)) {
6961                     size = MO_16;
6962                     element = (insn >> 18) & 3;
6963                 } else {
6964                     size = MO_32;
6965                     element = (insn >> 19) & 1;
6966                 }
6967                 tcg_gen_gvec_dup_mem(size, neon_reg_offset(rd, 0),
6968                                      neon_element_offset(rm, element, size),
6969                                      q ? 16 : 8, q ? 16 : 8);
6970             } else {
6971                 return 1;
6972             }
6973         }
6974     }
6975     return 0;
6976 }
6977
6978 static int disas_coproc_insn(DisasContext *s, uint32_t insn)
6979 {
6980     int cpnum, is64, crn, crm, opc1, opc2, isread, rt, rt2;
6981     const ARMCPRegInfo *ri;
6982
6983     cpnum = (insn >> 8) & 0xf;
6984
6985     /* First check for coprocessor space used for XScale/iwMMXt insns */
6986     if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cpnum < 2)) {
6987         if (extract32(s->c15_cpar, cpnum, 1) == 0) {
6988             return 1;
6989         }
6990         if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
6991             return disas_iwmmxt_insn(s, insn);
6992         } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
6993             return disas_dsp_insn(s, insn);
6994         }
6995         return 1;
6996     }
6997
6998     /* Otherwise treat as a generic register access */
6999     is64 = (insn & (1 << 25)) == 0;
7000     if (!is64 && ((insn & (1 << 4)) == 0)) {
7001         /* cdp */
7002         return 1;
7003     }
7004
7005     crm = insn & 0xf;
7006     if (is64) {
7007         crn = 0;
7008         opc1 = (insn >> 4) & 0xf;
7009         opc2 = 0;
7010         rt2 = (insn >> 16) & 0xf;
7011     } else {
7012         crn = (insn >> 16) & 0xf;
7013         opc1 = (insn >> 21) & 7;
7014         opc2 = (insn >> 5) & 7;
7015         rt2 = 0;
7016     }
7017     isread = (insn >> 20) & 1;
7018     rt = (insn >> 12) & 0xf;
7019
7020     ri = get_arm_cp_reginfo(s->cp_regs,
7021             ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2));
7022     if (ri) {
7023         bool need_exit_tb;
7024
7025         /* Check access permissions */
7026         if (!cp_access_ok(s->current_el, ri, isread)) {
7027             return 1;
7028         }
7029
7030         if (s->hstr_active || ri->accessfn ||
7031             (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
7032             /* Emit code to perform further access permissions checks at
7033              * runtime; this may result in an exception.
7034              * Note that on XScale all cp0..c13 registers do an access check
7035              * call in order to handle c15_cpar.
7036              */
7037             TCGv_ptr tmpptr;
7038             TCGv_i32 tcg_syn, tcg_isread;
7039             uint32_t syndrome;
7040
7041             /* Note that since we are an implementation which takes an
7042              * exception on a trapped conditional instruction only if the
7043              * instruction passes its condition code check, we can take
7044              * advantage of the clause in the ARM ARM that allows us to set
7045              * the COND field in the instruction to 0xE in all cases.
7046              * We could fish the actual condition out of the insn (ARM)
7047              * or the condexec bits (Thumb) but it isn't necessary.
7048              */
7049             switch (cpnum) {
7050             case 14:
7051                 if (is64) {
7052                     syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
7053                                                  isread, false);
7054                 } else {
7055                     syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
7056                                                 rt, isread, false);
7057                 }
7058                 break;
7059             case 15:
7060                 if (is64) {
7061                     syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
7062                                                  isread, false);
7063                 } else {
7064                     syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
7065                                                 rt, isread, false);
7066                 }
7067                 break;
7068             default:
7069                 /* ARMv8 defines that only coprocessors 14 and 15 exist,
7070                  * so this can only happen if this is an ARMv7 or earlier CPU,
7071                  * in which case the syndrome information won't actually be
7072                  * guest visible.
7073                  */
7074                 assert(!arm_dc_feature(s, ARM_FEATURE_V8));
7075                 syndrome = syn_uncategorized();
7076                 break;
7077             }
7078
7079             gen_set_condexec(s);
7080             gen_set_pc_im(s, s->pc_curr);
7081             tmpptr = tcg_const_ptr(ri);
7082             tcg_syn = tcg_const_i32(syndrome);
7083             tcg_isread = tcg_const_i32(isread);
7084             gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn,
7085                                            tcg_isread);
7086             tcg_temp_free_ptr(tmpptr);
7087             tcg_temp_free_i32(tcg_syn);
7088             tcg_temp_free_i32(tcg_isread);
7089         } else if (ri->type & ARM_CP_RAISES_EXC) {
7090             /*
7091              * The readfn or writefn might raise an exception;
7092              * synchronize the CPU state in case it does.
7093              */
7094             gen_set_condexec(s);
7095             gen_set_pc_im(s, s->pc_curr);
7096         }
7097
7098         /* Handle special cases first */
7099         switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
7100         case ARM_CP_NOP:
7101             return 0;
7102         case ARM_CP_WFI:
7103             if (isread) {
7104                 return 1;
7105             }
7106             gen_set_pc_im(s, s->base.pc_next);
7107             s->base.is_jmp = DISAS_WFI;
7108             return 0;
7109         default:
7110             break;
7111         }
7112
7113         if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
7114             gen_io_start();
7115         }
7116
7117         if (isread) {
7118             /* Read */
7119             if (is64) {
7120                 TCGv_i64 tmp64;
7121                 TCGv_i32 tmp;
7122                 if (ri->type & ARM_CP_CONST) {
7123                     tmp64 = tcg_const_i64(ri->resetvalue);
7124                 } else if (ri->readfn) {
7125                     TCGv_ptr tmpptr;
7126                     tmp64 = tcg_temp_new_i64();
7127                     tmpptr = tcg_const_ptr(ri);
7128                     gen_helper_get_cp_reg64(tmp64, cpu_env, tmpptr);
7129                     tcg_temp_free_ptr(tmpptr);
7130                 } else {
7131                     tmp64 = tcg_temp_new_i64();
7132                     tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
7133                 }
7134                 tmp = tcg_temp_new_i32();
7135                 tcg_gen_extrl_i64_i32(tmp, tmp64);
7136                 store_reg(s, rt, tmp);
7137                 tmp = tcg_temp_new_i32();
7138                 tcg_gen_extrh_i64_i32(tmp, tmp64);
7139                 tcg_temp_free_i64(tmp64);
7140                 store_reg(s, rt2, tmp);
7141             } else {
7142                 TCGv_i32 tmp;
7143                 if (ri->type & ARM_CP_CONST) {
7144                     tmp = tcg_const_i32(ri->resetvalue);
7145                 } else if (ri->readfn) {
7146                     TCGv_ptr tmpptr;
7147                     tmp = tcg_temp_new_i32();
7148                     tmpptr = tcg_const_ptr(ri);
7149                     gen_helper_get_cp_reg(tmp, cpu_env, tmpptr);
7150                     tcg_temp_free_ptr(tmpptr);
7151                 } else {
7152                     tmp = load_cpu_offset(ri->fieldoffset);
7153                 }
7154                 if (rt == 15) {
7155                     /* Destination register of r15 for 32 bit loads sets
7156                      * the condition codes from the high 4 bits of the value
7157                      */
7158                     gen_set_nzcv(tmp);
7159                     tcg_temp_free_i32(tmp);
7160                 } else {
7161                     store_reg(s, rt, tmp);
7162                 }
7163             }
7164         } else {
7165             /* Write */
7166             if (ri->type & ARM_CP_CONST) {
7167                 /* If not forbidden by access permissions, treat as WI */
7168                 return 0;
7169             }
7170
7171             if (is64) {
7172                 TCGv_i32 tmplo, tmphi;
7173                 TCGv_i64 tmp64 = tcg_temp_new_i64();
7174                 tmplo = load_reg(s, rt);
7175                 tmphi = load_reg(s, rt2);
7176                 tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
7177                 tcg_temp_free_i32(tmplo);
7178                 tcg_temp_free_i32(tmphi);
7179                 if (ri->writefn) {
7180                     TCGv_ptr tmpptr = tcg_const_ptr(ri);
7181                     gen_helper_set_cp_reg64(cpu_env, tmpptr, tmp64);
7182                     tcg_temp_free_ptr(tmpptr);
7183                 } else {
7184                     tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
7185                 }
7186                 tcg_temp_free_i64(tmp64);
7187             } else {
7188                 if (ri->writefn) {
7189                     TCGv_i32 tmp;
7190                     TCGv_ptr tmpptr;
7191                     tmp = load_reg(s, rt);
7192                     tmpptr = tcg_const_ptr(ri);
7193                     gen_helper_set_cp_reg(cpu_env, tmpptr, tmp);
7194                     tcg_temp_free_ptr(tmpptr);
7195                     tcg_temp_free_i32(tmp);
7196                 } else {
7197                     TCGv_i32 tmp = load_reg(s, rt);
7198                     store_cpu_offset(tmp, ri->fieldoffset);
7199                 }
7200             }
7201         }
7202
7203         /* I/O operations must end the TB here (whether read or write) */
7204         need_exit_tb = ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) &&
7205                         (ri->type & ARM_CP_IO));
7206
7207         if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
7208             /*
7209              * A write to any coprocessor register that ends a TB
7210              * must rebuild the hflags for the next TB.
7211              */
7212             TCGv_i32 tcg_el = tcg_const_i32(s->current_el);
7213             if (arm_dc_feature(s, ARM_FEATURE_M)) {
7214                 gen_helper_rebuild_hflags_m32(cpu_env, tcg_el);
7215             } else {
7216                 if (ri->type & ARM_CP_NEWEL) {
7217                     gen_helper_rebuild_hflags_a32_newel(cpu_env);
7218                 } else {
7219                     gen_helper_rebuild_hflags_a32(cpu_env, tcg_el);
7220                 }
7221             }
7222             tcg_temp_free_i32(tcg_el);
7223             /*
7224              * We default to ending the TB on a coprocessor register write,
7225              * but allow this to be suppressed by the register definition
7226              * (usually only necessary to work around guest bugs).
7227              */
7228             need_exit_tb = true;
7229         }
7230         if (need_exit_tb) {
7231             gen_lookup_tb(s);
7232         }
7233
7234         return 0;
7235     }
7236
7237     /* Unknown register; this might be a guest error or a QEMU
7238      * unimplemented feature.
7239      */
7240     if (is64) {
7241         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
7242                       "64 bit system register cp:%d opc1: %d crm:%d "
7243                       "(%s)\n",
7244                       isread ? "read" : "write", cpnum, opc1, crm,
7245                       s->ns ? "non-secure" : "secure");
7246     } else {
7247         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
7248                       "system register cp:%d opc1:%d crn:%d crm:%d opc2:%d "
7249                       "(%s)\n",
7250                       isread ? "read" : "write", cpnum, opc1, crn, crm, opc2,
7251                       s->ns ? "non-secure" : "secure");
7252     }
7253
7254     return 1;
7255 }
7256
7257
7258 /* Store a 64-bit value to a register pair.  Clobbers val.  */
7259 static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
7260 {
7261     TCGv_i32 tmp;
7262     tmp = tcg_temp_new_i32();
7263     tcg_gen_extrl_i64_i32(tmp, val);
7264     store_reg(s, rlow, tmp);
7265     tmp = tcg_temp_new_i32();
7266     tcg_gen_extrh_i64_i32(tmp, val);
7267     store_reg(s, rhigh, tmp);
7268 }
7269
7270 /* load and add a 64-bit value from a register pair.  */
7271 static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
7272 {
7273     TCGv_i64 tmp;
7274     TCGv_i32 tmpl;
7275     TCGv_i32 tmph;
7276
7277     /* Load 64-bit value rd:rn.  */
7278     tmpl = load_reg(s, rlow);
7279     tmph = load_reg(s, rhigh);
7280     tmp = tcg_temp_new_i64();
7281     tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
7282     tcg_temp_free_i32(tmpl);
7283     tcg_temp_free_i32(tmph);
7284     tcg_gen_add_i64(val, val, tmp);
7285     tcg_temp_free_i64(tmp);
7286 }
7287
7288 /* Set N and Z flags from hi|lo.  */
7289 static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
7290 {
7291     tcg_gen_mov_i32(cpu_NF, hi);
7292     tcg_gen_or_i32(cpu_ZF, lo, hi);
7293 }
7294
7295 /* Load/Store exclusive instructions are implemented by remembering
7296    the value/address loaded, and seeing if these are the same
7297    when the store is performed.  This should be sufficient to implement
7298    the architecturally mandated semantics, and avoids having to monitor
7299    regular stores.  The compare vs the remembered value is done during
7300    the cmpxchg operation, but we must compare the addresses manually.  */
7301 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
7302                                TCGv_i32 addr, int size)
7303 {
7304     TCGv_i32 tmp = tcg_temp_new_i32();
7305     MemOp opc = size | MO_ALIGN | s->be_data;
7306
7307     s->is_ldex = true;
7308
7309     if (size == 3) {
7310         TCGv_i32 tmp2 = tcg_temp_new_i32();
7311         TCGv_i64 t64 = tcg_temp_new_i64();
7312
7313         /* For AArch32, architecturally the 32-bit word at the lowest
7314          * address is always Rt and the one at addr+4 is Rt2, even if
7315          * the CPU is big-endian. That means we don't want to do a
7316          * gen_aa32_ld_i64(), which invokes gen_aa32_frob64() as if
7317          * for an architecturally 64-bit access, but instead do a
7318          * 64-bit access using MO_BE if appropriate and then split
7319          * the two halves.
7320          * This only makes a difference for BE32 user-mode, where
7321          * frob64() must not flip the two halves of the 64-bit data
7322          * but this code must treat BE32 user-mode like BE32 system.
7323          */
7324         TCGv taddr = gen_aa32_addr(s, addr, opc);
7325
7326         tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
7327         tcg_temp_free(taddr);
7328         tcg_gen_mov_i64(cpu_exclusive_val, t64);
7329         if (s->be_data == MO_BE) {
7330             tcg_gen_extr_i64_i32(tmp2, tmp, t64);
7331         } else {
7332             tcg_gen_extr_i64_i32(tmp, tmp2, t64);
7333         }
7334         tcg_temp_free_i64(t64);
7335
7336         store_reg(s, rt2, tmp2);
7337     } else {
7338         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
7339         tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
7340     }
7341
7342     store_reg(s, rt, tmp);
7343     tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
7344 }
7345
7346 static void gen_clrex(DisasContext *s)
7347 {
7348     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
7349 }
7350
7351 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
7352                                 TCGv_i32 addr, int size)
7353 {
7354     TCGv_i32 t0, t1, t2;
7355     TCGv_i64 extaddr;
7356     TCGv taddr;
7357     TCGLabel *done_label;
7358     TCGLabel *fail_label;
7359     MemOp opc = size | MO_ALIGN | s->be_data;
7360
7361     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
7362          [addr] = {Rt};
7363          {Rd} = 0;
7364        } else {
7365          {Rd} = 1;
7366        } */
7367     fail_label = gen_new_label();
7368     done_label = gen_new_label();
7369     extaddr = tcg_temp_new_i64();
7370     tcg_gen_extu_i32_i64(extaddr, addr);
7371     tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
7372     tcg_temp_free_i64(extaddr);
7373
7374     taddr = gen_aa32_addr(s, addr, opc);
7375     t0 = tcg_temp_new_i32();
7376     t1 = load_reg(s, rt);
7377     if (size == 3) {
7378         TCGv_i64 o64 = tcg_temp_new_i64();
7379         TCGv_i64 n64 = tcg_temp_new_i64();
7380
7381         t2 = load_reg(s, rt2);
7382         /* For AArch32, architecturally the 32-bit word at the lowest
7383          * address is always Rt and the one at addr+4 is Rt2, even if
7384          * the CPU is big-endian. Since we're going to treat this as a
7385          * single 64-bit BE store, we need to put the two halves in the
7386          * opposite order for BE to LE, so that they end up in the right
7387          * places.
7388          * We don't want gen_aa32_frob64() because that does the wrong
7389          * thing for BE32 usermode.
7390          */
7391         if (s->be_data == MO_BE) {
7392             tcg_gen_concat_i32_i64(n64, t2, t1);
7393         } else {
7394             tcg_gen_concat_i32_i64(n64, t1, t2);
7395         }
7396         tcg_temp_free_i32(t2);
7397
7398         tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
7399                                    get_mem_index(s), opc);
7400         tcg_temp_free_i64(n64);
7401
7402         tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
7403         tcg_gen_extrl_i64_i32(t0, o64);
7404
7405         tcg_temp_free_i64(o64);
7406     } else {
7407         t2 = tcg_temp_new_i32();
7408         tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
7409         tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
7410         tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
7411         tcg_temp_free_i32(t2);
7412     }
7413     tcg_temp_free_i32(t1);
7414     tcg_temp_free(taddr);
7415     tcg_gen_mov_i32(cpu_R[rd], t0);
7416     tcg_temp_free_i32(t0);
7417     tcg_gen_br(done_label);
7418
7419     gen_set_label(fail_label);
7420     tcg_gen_movi_i32(cpu_R[rd], 1);
7421     gen_set_label(done_label);
7422     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
7423 }
7424
7425 /* gen_srs:
7426  * @env: CPUARMState
7427  * @s: DisasContext
7428  * @mode: mode field from insn (which stack to store to)
7429  * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
7430  * @writeback: true if writeback bit set
7431  *
7432  * Generate code for the SRS (Store Return State) insn.
7433  */
7434 static void gen_srs(DisasContext *s,
7435                     uint32_t mode, uint32_t amode, bool writeback)
7436 {
7437     int32_t offset;
7438     TCGv_i32 addr, tmp;
7439     bool undef = false;
7440
7441     /* SRS is:
7442      * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
7443      *   and specified mode is monitor mode
7444      * - UNDEFINED in Hyp mode
7445      * - UNPREDICTABLE in User or System mode
7446      * - UNPREDICTABLE if the specified mode is:
7447      * -- not implemented
7448      * -- not a valid mode number
7449      * -- a mode that's at a higher exception level
7450      * -- Monitor, if we are Non-secure
7451      * For the UNPREDICTABLE cases we choose to UNDEF.
7452      */
7453     if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
7454         gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(), 3);
7455         return;
7456     }
7457
7458     if (s->current_el == 0 || s->current_el == 2) {
7459         undef = true;
7460     }
7461
7462     switch (mode) {
7463     case ARM_CPU_MODE_USR:
7464     case ARM_CPU_MODE_FIQ:
7465     case ARM_CPU_MODE_IRQ:
7466     case ARM_CPU_MODE_SVC:
7467     case ARM_CPU_MODE_ABT:
7468     case ARM_CPU_MODE_UND:
7469     case ARM_CPU_MODE_SYS:
7470         break;
7471     case ARM_CPU_MODE_HYP:
7472         if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
7473             undef = true;
7474         }
7475         break;
7476     case ARM_CPU_MODE_MON:
7477         /* No need to check specifically for "are we non-secure" because
7478          * we've already made EL0 UNDEF and handled the trap for S-EL1;
7479          * so if this isn't EL3 then we must be non-secure.
7480          */
7481         if (s->current_el != 3) {
7482             undef = true;
7483         }
7484         break;
7485     default:
7486         undef = true;
7487     }
7488
7489     if (undef) {
7490         unallocated_encoding(s);
7491         return;
7492     }
7493
7494     addr = tcg_temp_new_i32();
7495     tmp = tcg_const_i32(mode);
7496     /* get_r13_banked() will raise an exception if called from System mode */
7497     gen_set_condexec(s);
7498     gen_set_pc_im(s, s->pc_curr);
7499     gen_helper_get_r13_banked(addr, cpu_env, tmp);
7500     tcg_temp_free_i32(tmp);
7501     switch (amode) {
7502     case 0: /* DA */
7503         offset = -4;
7504         break;
7505     case 1: /* IA */
7506         offset = 0;
7507         break;
7508     case 2: /* DB */
7509         offset = -8;
7510         break;
7511     case 3: /* IB */
7512         offset = 4;
7513         break;
7514     default:
7515         abort();
7516     }
7517     tcg_gen_addi_i32(addr, addr, offset);
7518     tmp = load_reg(s, 14);
7519     gen_aa32_st32(s, tmp, addr, get_mem_index(s));
7520     tcg_temp_free_i32(tmp);
7521     tmp = load_cpu_field(spsr);
7522     tcg_gen_addi_i32(addr, addr, 4);
7523     gen_aa32_st32(s, tmp, addr, get_mem_index(s));
7524     tcg_temp_free_i32(tmp);
7525     if (writeback) {
7526         switch (amode) {
7527         case 0:
7528             offset = -8;
7529             break;
7530         case 1:
7531             offset = 4;
7532             break;
7533         case 2:
7534             offset = -4;
7535             break;
7536         case 3:
7537             offset = 0;
7538             break;
7539         default:
7540             abort();
7541         }
7542         tcg_gen_addi_i32(addr, addr, offset);
7543         tmp = tcg_const_i32(mode);
7544         gen_helper_set_r13_banked(cpu_env, tmp, addr);
7545         tcg_temp_free_i32(tmp);
7546     }
7547     tcg_temp_free_i32(addr);
7548     s->base.is_jmp = DISAS_UPDATE;
7549 }
7550
7551 /* Generate a label used for skipping this instruction */
7552 static void arm_gen_condlabel(DisasContext *s)
7553 {
7554     if (!s->condjmp) {
7555         s->condlabel = gen_new_label();
7556         s->condjmp = 1;
7557     }
7558 }
7559
7560 /* Skip this instruction if the ARM condition is false */
7561 static void arm_skip_unless(DisasContext *s, uint32_t cond)
7562 {
7563     arm_gen_condlabel(s);
7564     arm_gen_test_cc(cond ^ 1, s->condlabel);
7565 }
7566
7567
7568 /*
7569  * Constant expanders for the decoders.
7570  */
7571
7572 static int negate(DisasContext *s, int x)
7573 {
7574     return -x;
7575 }
7576
7577 static int plus_2(DisasContext *s, int x)
7578 {
7579     return x + 2;
7580 }
7581
7582 static int times_2(DisasContext *s, int x)
7583 {
7584     return x * 2;
7585 }
7586
7587 static int times_4(DisasContext *s, int x)
7588 {
7589     return x * 4;
7590 }
7591
7592 /* Return only the rotation part of T32ExpandImm.  */
7593 static int t32_expandimm_rot(DisasContext *s, int x)
7594 {
7595     return x & 0xc00 ? extract32(x, 7, 5) : 0;
7596 }
7597
7598 /* Return the unrotated immediate from T32ExpandImm.  */
7599 static int t32_expandimm_imm(DisasContext *s, int x)
7600 {
7601     int imm = extract32(x, 0, 8);
7602
7603     switch (extract32(x, 8, 4)) {
7604     case 0: /* XY */
7605         /* Nothing to do.  */
7606         break;
7607     case 1: /* 00XY00XY */
7608         imm *= 0x00010001;
7609         break;
7610     case 2: /* XY00XY00 */
7611         imm *= 0x01000100;
7612         break;
7613     case 3: /* XYXYXYXY */
7614         imm *= 0x01010101;
7615         break;
7616     default:
7617         /* Rotated constant.  */
7618         imm |= 0x80;
7619         break;
7620     }
7621     return imm;
7622 }
7623
7624 static int t32_branch24(DisasContext *s, int x)
7625 {
7626     /* Convert J1:J2 at x[22:21] to I2:I1, which involves I=J^~S.  */
7627     x ^= !(x < 0) * (3 << 21);
7628     /* Append the final zero.  */
7629     return x << 1;
7630 }
7631
7632 static int t16_setflags(DisasContext *s)
7633 {
7634     return s->condexec_mask == 0;
7635 }
7636
7637 static int t16_push_list(DisasContext *s, int x)
7638 {
7639     return (x & 0xff) | (x & 0x100) << (14 - 8);
7640 }
7641
7642 static int t16_pop_list(DisasContext *s, int x)
7643 {
7644     return (x & 0xff) | (x & 0x100) << (15 - 8);
7645 }
7646
7647 /*
7648  * Include the generated decoders.
7649  */
7650
7651 #include "decode-a32.inc.c"
7652 #include "decode-a32-uncond.inc.c"
7653 #include "decode-t32.inc.c"
7654 #include "decode-t16.inc.c"
7655
7656 /* Helpers to swap operands for reverse-subtract.  */
7657 static void gen_rsb(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
7658 {
7659     tcg_gen_sub_i32(dst, b, a);
7660 }
7661
7662 static void gen_rsb_CC(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
7663 {
7664     gen_sub_CC(dst, b, a);
7665 }
7666
7667 static void gen_rsc(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
7668 {
7669     gen_sub_carry(dest, b, a);
7670 }
7671
7672 static void gen_rsc_CC(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
7673 {
7674     gen_sbc_CC(dest, b, a);
7675 }
7676
7677 /*
7678  * Helpers for the data processing routines.
7679  *
7680  * After the computation store the results back.
7681  * This may be suppressed altogether (STREG_NONE), require a runtime
7682  * check against the stack limits (STREG_SP_CHECK), or generate an
7683  * exception return.  Oh, or store into a register.
7684  *
7685  * Always return true, indicating success for a trans_* function.
7686  */
7687 typedef enum {
7688    STREG_NONE,
7689    STREG_NORMAL,
7690    STREG_SP_CHECK,
7691    STREG_EXC_RET,
7692 } StoreRegKind;
7693
7694 static bool store_reg_kind(DisasContext *s, int rd,
7695                             TCGv_i32 val, StoreRegKind kind)
7696 {
7697     switch (kind) {
7698     case STREG_NONE:
7699         tcg_temp_free_i32(val);
7700         return true;
7701     case STREG_NORMAL:
7702         /* See ALUWritePC: Interworking only from a32 mode. */
7703         if (s->thumb) {
7704             store_reg(s, rd, val);
7705         } else {
7706             store_reg_bx(s, rd, val);
7707         }
7708         return true;
7709     case STREG_SP_CHECK:
7710         store_sp_checked(s, val);
7711         return true;
7712     case STREG_EXC_RET:
7713         gen_exception_return(s, val);
7714         return true;
7715     }
7716     g_assert_not_reached();
7717 }
7718
7719 /*
7720  * Data Processing (register)
7721  *
7722  * Operate, with set flags, one register source,
7723  * one immediate shifted register source, and a destination.
7724  */
7725 static bool op_s_rrr_shi(DisasContext *s, arg_s_rrr_shi *a,
7726                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
7727                          int logic_cc, StoreRegKind kind)
7728 {
7729     TCGv_i32 tmp1, tmp2;
7730
7731     tmp2 = load_reg(s, a->rm);
7732     gen_arm_shift_im(tmp2, a->shty, a->shim, logic_cc);
7733     tmp1 = load_reg(s, a->rn);
7734
7735     gen(tmp1, tmp1, tmp2);
7736     tcg_temp_free_i32(tmp2);
7737
7738     if (logic_cc) {
7739         gen_logic_CC(tmp1);
7740     }
7741     return store_reg_kind(s, a->rd, tmp1, kind);
7742 }
7743
7744 static bool op_s_rxr_shi(DisasContext *s, arg_s_rrr_shi *a,
7745                          void (*gen)(TCGv_i32, TCGv_i32),
7746                          int logic_cc, StoreRegKind kind)
7747 {
7748     TCGv_i32 tmp;
7749
7750     tmp = load_reg(s, a->rm);
7751     gen_arm_shift_im(tmp, a->shty, a->shim, logic_cc);
7752
7753     gen(tmp, tmp);
7754     if (logic_cc) {
7755         gen_logic_CC(tmp);
7756     }
7757     return store_reg_kind(s, a->rd, tmp, kind);
7758 }
7759
7760 /*
7761  * Data-processing (register-shifted register)
7762  *
7763  * Operate, with set flags, one register source,
7764  * one register shifted register source, and a destination.
7765  */
7766 static bool op_s_rrr_shr(DisasContext *s, arg_s_rrr_shr *a,
7767                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
7768                          int logic_cc, StoreRegKind kind)
7769 {
7770     TCGv_i32 tmp1, tmp2;
7771
7772     tmp1 = load_reg(s, a->rs);
7773     tmp2 = load_reg(s, a->rm);
7774     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
7775     tmp1 = load_reg(s, a->rn);
7776
7777     gen(tmp1, tmp1, tmp2);
7778     tcg_temp_free_i32(tmp2);
7779
7780     if (logic_cc) {
7781         gen_logic_CC(tmp1);
7782     }
7783     return store_reg_kind(s, a->rd, tmp1, kind);
7784 }
7785
7786 static bool op_s_rxr_shr(DisasContext *s, arg_s_rrr_shr *a,
7787                          void (*gen)(TCGv_i32, TCGv_i32),
7788                          int logic_cc, StoreRegKind kind)
7789 {
7790     TCGv_i32 tmp1, tmp2;
7791
7792     tmp1 = load_reg(s, a->rs);
7793     tmp2 = load_reg(s, a->rm);
7794     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
7795
7796     gen(tmp2, tmp2);
7797     if (logic_cc) {
7798         gen_logic_CC(tmp2);
7799     }
7800     return store_reg_kind(s, a->rd, tmp2, kind);
7801 }
7802
7803 /*
7804  * Data-processing (immediate)
7805  *
7806  * Operate, with set flags, one register source,
7807  * one rotated immediate, and a destination.
7808  *
7809  * Note that logic_cc && a->rot setting CF based on the msb of the
7810  * immediate is the reason why we must pass in the unrotated form
7811  * of the immediate.
7812  */
7813 static bool op_s_rri_rot(DisasContext *s, arg_s_rri_rot *a,
7814                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
7815                          int logic_cc, StoreRegKind kind)
7816 {
7817     TCGv_i32 tmp1, tmp2;
7818     uint32_t imm;
7819
7820     imm = ror32(a->imm, a->rot);
7821     if (logic_cc && a->rot) {
7822         tcg_gen_movi_i32(cpu_CF, imm >> 31);
7823     }
7824     tmp2 = tcg_const_i32(imm);
7825     tmp1 = load_reg(s, a->rn);
7826
7827     gen(tmp1, tmp1, tmp2);
7828     tcg_temp_free_i32(tmp2);
7829
7830     if (logic_cc) {
7831         gen_logic_CC(tmp1);
7832     }
7833     return store_reg_kind(s, a->rd, tmp1, kind);
7834 }
7835
7836 static bool op_s_rxi_rot(DisasContext *s, arg_s_rri_rot *a,
7837                          void (*gen)(TCGv_i32, TCGv_i32),
7838                          int logic_cc, StoreRegKind kind)
7839 {
7840     TCGv_i32 tmp;
7841     uint32_t imm;
7842
7843     imm = ror32(a->imm, a->rot);
7844     if (logic_cc && a->rot) {
7845         tcg_gen_movi_i32(cpu_CF, imm >> 31);
7846     }
7847     tmp = tcg_const_i32(imm);
7848
7849     gen(tmp, tmp);
7850     if (logic_cc) {
7851         gen_logic_CC(tmp);
7852     }
7853     return store_reg_kind(s, a->rd, tmp, kind);
7854 }
7855
7856 #define DO_ANY3(NAME, OP, L, K)                                         \
7857     static bool trans_##NAME##_rrri(DisasContext *s, arg_s_rrr_shi *a)  \
7858     { StoreRegKind k = (K); return op_s_rrr_shi(s, a, OP, L, k); }      \
7859     static bool trans_##NAME##_rrrr(DisasContext *s, arg_s_rrr_shr *a)  \
7860     { StoreRegKind k = (K); return op_s_rrr_shr(s, a, OP, L, k); }      \
7861     static bool trans_##NAME##_rri(DisasContext *s, arg_s_rri_rot *a)   \
7862     { StoreRegKind k = (K); return op_s_rri_rot(s, a, OP, L, k); }
7863
7864 #define DO_ANY2(NAME, OP, L, K)                                         \
7865     static bool trans_##NAME##_rxri(DisasContext *s, arg_s_rrr_shi *a)  \
7866     { StoreRegKind k = (K); return op_s_rxr_shi(s, a, OP, L, k); }      \
7867     static bool trans_##NAME##_rxrr(DisasContext *s, arg_s_rrr_shr *a)  \
7868     { StoreRegKind k = (K); return op_s_rxr_shr(s, a, OP, L, k); }      \
7869     static bool trans_##NAME##_rxi(DisasContext *s, arg_s_rri_rot *a)   \
7870     { StoreRegKind k = (K); return op_s_rxi_rot(s, a, OP, L, k); }
7871
7872 #define DO_CMP2(NAME, OP, L)                                            \
7873     static bool trans_##NAME##_xrri(DisasContext *s, arg_s_rrr_shi *a)  \
7874     { return op_s_rrr_shi(s, a, OP, L, STREG_NONE); }                   \
7875     static bool trans_##NAME##_xrrr(DisasContext *s, arg_s_rrr_shr *a)  \
7876     { return op_s_rrr_shr(s, a, OP, L, STREG_NONE); }                   \
7877     static bool trans_##NAME##_xri(DisasContext *s, arg_s_rri_rot *a)   \
7878     { return op_s_rri_rot(s, a, OP, L, STREG_NONE); }
7879
7880 DO_ANY3(AND, tcg_gen_and_i32, a->s, STREG_NORMAL)
7881 DO_ANY3(EOR, tcg_gen_xor_i32, a->s, STREG_NORMAL)
7882 DO_ANY3(ORR, tcg_gen_or_i32, a->s, STREG_NORMAL)
7883 DO_ANY3(BIC, tcg_gen_andc_i32, a->s, STREG_NORMAL)
7884
7885 DO_ANY3(RSB, a->s ? gen_rsb_CC : gen_rsb, false, STREG_NORMAL)
7886 DO_ANY3(ADC, a->s ? gen_adc_CC : gen_add_carry, false, STREG_NORMAL)
7887 DO_ANY3(SBC, a->s ? gen_sbc_CC : gen_sub_carry, false, STREG_NORMAL)
7888 DO_ANY3(RSC, a->s ? gen_rsc_CC : gen_rsc, false, STREG_NORMAL)
7889
7890 DO_CMP2(TST, tcg_gen_and_i32, true)
7891 DO_CMP2(TEQ, tcg_gen_xor_i32, true)
7892 DO_CMP2(CMN, gen_add_CC, false)
7893 DO_CMP2(CMP, gen_sub_CC, false)
7894
7895 DO_ANY3(ADD, a->s ? gen_add_CC : tcg_gen_add_i32, false,
7896         a->rd == 13 && a->rn == 13 ? STREG_SP_CHECK : STREG_NORMAL)
7897
7898 /*
7899  * Note for the computation of StoreRegKind we return out of the
7900  * middle of the functions that are expanded by DO_ANY3, and that
7901  * we modify a->s via that parameter before it is used by OP.
7902  */
7903 DO_ANY3(SUB, a->s ? gen_sub_CC : tcg_gen_sub_i32, false,
7904         ({
7905             StoreRegKind ret = STREG_NORMAL;
7906             if (a->rd == 15 && a->s) {
7907                 /*
7908                  * See ALUExceptionReturn:
7909                  * In User mode, UNPREDICTABLE; we choose UNDEF.
7910                  * In Hyp mode, UNDEFINED.
7911                  */
7912                 if (IS_USER(s) || s->current_el == 2) {
7913                     unallocated_encoding(s);
7914                     return true;
7915                 }
7916                 /* There is no writeback of nzcv to PSTATE.  */
7917                 a->s = 0;
7918                 ret = STREG_EXC_RET;
7919             } else if (a->rd == 13 && a->rn == 13) {
7920                 ret = STREG_SP_CHECK;
7921             }
7922             ret;
7923         }))
7924
7925 DO_ANY2(MOV, tcg_gen_mov_i32, a->s,
7926         ({
7927             StoreRegKind ret = STREG_NORMAL;
7928             if (a->rd == 15 && a->s) {
7929                 /*
7930                  * See ALUExceptionReturn:
7931                  * In User mode, UNPREDICTABLE; we choose UNDEF.
7932                  * In Hyp mode, UNDEFINED.
7933                  */
7934                 if (IS_USER(s) || s->current_el == 2) {
7935                     unallocated_encoding(s);
7936                     return true;
7937                 }
7938                 /* There is no writeback of nzcv to PSTATE.  */
7939                 a->s = 0;
7940                 ret = STREG_EXC_RET;
7941             } else if (a->rd == 13) {
7942                 ret = STREG_SP_CHECK;
7943             }
7944             ret;
7945         }))
7946
7947 DO_ANY2(MVN, tcg_gen_not_i32, a->s, STREG_NORMAL)
7948
7949 /*
7950  * ORN is only available with T32, so there is no register-shifted-register
7951  * form of the insn.  Using the DO_ANY3 macro would create an unused function.
7952  */
7953 static bool trans_ORN_rrri(DisasContext *s, arg_s_rrr_shi *a)
7954 {
7955     return op_s_rrr_shi(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
7956 }
7957
7958 static bool trans_ORN_rri(DisasContext *s, arg_s_rri_rot *a)
7959 {
7960     return op_s_rri_rot(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
7961 }
7962
7963 #undef DO_ANY3
7964 #undef DO_ANY2
7965 #undef DO_CMP2
7966
7967 static bool trans_ADR(DisasContext *s, arg_ri *a)
7968 {
7969     store_reg_bx(s, a->rd, add_reg_for_lit(s, 15, a->imm));
7970     return true;
7971 }
7972
7973 static bool trans_MOVW(DisasContext *s, arg_MOVW *a)
7974 {
7975     TCGv_i32 tmp;
7976
7977     if (!ENABLE_ARCH_6T2) {
7978         return false;
7979     }
7980
7981     tmp = tcg_const_i32(a->imm);
7982     store_reg(s, a->rd, tmp);
7983     return true;
7984 }
7985
7986 static bool trans_MOVT(DisasContext *s, arg_MOVW *a)
7987 {
7988     TCGv_i32 tmp;
7989
7990     if (!ENABLE_ARCH_6T2) {
7991         return false;
7992     }
7993
7994     tmp = load_reg(s, a->rd);
7995     tcg_gen_ext16u_i32(tmp, tmp);
7996     tcg_gen_ori_i32(tmp, tmp, a->imm << 16);
7997     store_reg(s, a->rd, tmp);
7998     return true;
7999 }
8000
8001 /*
8002  * Multiply and multiply accumulate
8003  */
8004
8005 static bool op_mla(DisasContext *s, arg_s_rrrr *a, bool add)
8006 {
8007     TCGv_i32 t1, t2;
8008
8009     t1 = load_reg(s, a->rn);
8010     t2 = load_reg(s, a->rm);
8011     tcg_gen_mul_i32(t1, t1, t2);
8012     tcg_temp_free_i32(t2);
8013     if (add) {
8014         t2 = load_reg(s, a->ra);
8015         tcg_gen_add_i32(t1, t1, t2);
8016         tcg_temp_free_i32(t2);
8017     }
8018     if (a->s) {
8019         gen_logic_CC(t1);
8020     }
8021     store_reg(s, a->rd, t1);
8022     return true;
8023 }
8024
8025 static bool trans_MUL(DisasContext *s, arg_MUL *a)
8026 {
8027     return op_mla(s, a, false);
8028 }
8029
8030 static bool trans_MLA(DisasContext *s, arg_MLA *a)
8031 {
8032     return op_mla(s, a, true);
8033 }
8034
8035 static bool trans_MLS(DisasContext *s, arg_MLS *a)
8036 {
8037     TCGv_i32 t1, t2;
8038
8039     if (!ENABLE_ARCH_6T2) {
8040         return false;
8041     }
8042     t1 = load_reg(s, a->rn);
8043     t2 = load_reg(s, a->rm);
8044     tcg_gen_mul_i32(t1, t1, t2);
8045     tcg_temp_free_i32(t2);
8046     t2 = load_reg(s, a->ra);
8047     tcg_gen_sub_i32(t1, t2, t1);
8048     tcg_temp_free_i32(t2);
8049     store_reg(s, a->rd, t1);
8050     return true;
8051 }
8052
8053 static bool op_mlal(DisasContext *s, arg_s_rrrr *a, bool uns, bool add)
8054 {
8055     TCGv_i32 t0, t1, t2, t3;
8056
8057     t0 = load_reg(s, a->rm);
8058     t1 = load_reg(s, a->rn);
8059     if (uns) {
8060         tcg_gen_mulu2_i32(t0, t1, t0, t1);
8061     } else {
8062         tcg_gen_muls2_i32(t0, t1, t0, t1);
8063     }
8064     if (add) {
8065         t2 = load_reg(s, a->ra);
8066         t3 = load_reg(s, a->rd);
8067         tcg_gen_add2_i32(t0, t1, t0, t1, t2, t3);
8068         tcg_temp_free_i32(t2);
8069         tcg_temp_free_i32(t3);
8070     }
8071     if (a->s) {
8072         gen_logicq_cc(t0, t1);
8073     }
8074     store_reg(s, a->ra, t0);
8075     store_reg(s, a->rd, t1);
8076     return true;
8077 }
8078
8079 static bool trans_UMULL(DisasContext *s, arg_UMULL *a)
8080 {
8081     return op_mlal(s, a, true, false);
8082 }
8083
8084 static bool trans_SMULL(DisasContext *s, arg_SMULL *a)
8085 {
8086     return op_mlal(s, a, false, false);
8087 }
8088
8089 static bool trans_UMLAL(DisasContext *s, arg_UMLAL *a)
8090 {
8091     return op_mlal(s, a, true, true);
8092 }
8093
8094 static bool trans_SMLAL(DisasContext *s, arg_SMLAL *a)
8095 {
8096     return op_mlal(s, a, false, true);
8097 }
8098
8099 static bool trans_UMAAL(DisasContext *s, arg_UMAAL *a)
8100 {
8101     TCGv_i32 t0, t1, t2, zero;
8102
8103     if (s->thumb
8104         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
8105         : !ENABLE_ARCH_6) {
8106         return false;
8107     }
8108
8109     t0 = load_reg(s, a->rm);
8110     t1 = load_reg(s, a->rn);
8111     tcg_gen_mulu2_i32(t0, t1, t0, t1);
8112     zero = tcg_const_i32(0);
8113     t2 = load_reg(s, a->ra);
8114     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
8115     tcg_temp_free_i32(t2);
8116     t2 = load_reg(s, a->rd);
8117     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
8118     tcg_temp_free_i32(t2);
8119     tcg_temp_free_i32(zero);
8120     store_reg(s, a->ra, t0);
8121     store_reg(s, a->rd, t1);
8122     return true;
8123 }
8124
8125 /*
8126  * Saturating addition and subtraction
8127  */
8128
8129 static bool op_qaddsub(DisasContext *s, arg_rrr *a, bool add, bool doub)
8130 {
8131     TCGv_i32 t0, t1;
8132
8133     if (s->thumb
8134         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
8135         : !ENABLE_ARCH_5TE) {
8136         return false;
8137     }
8138
8139     t0 = load_reg(s, a->rm);
8140     t1 = load_reg(s, a->rn);
8141     if (doub) {
8142         gen_helper_add_saturate(t1, cpu_env, t1, t1);
8143     }
8144     if (add) {
8145         gen_helper_add_saturate(t0, cpu_env, t0, t1);
8146     } else {
8147         gen_helper_sub_saturate(t0, cpu_env, t0, t1);
8148     }
8149     tcg_temp_free_i32(t1);
8150     store_reg(s, a->rd, t0);
8151     return true;
8152 }
8153
8154 #define DO_QADDSUB(NAME, ADD, DOUB) \
8155 static bool trans_##NAME(DisasContext *s, arg_rrr *a)    \
8156 {                                                        \
8157     return op_qaddsub(s, a, ADD, DOUB);                  \
8158 }
8159
8160 DO_QADDSUB(QADD, true, false)
8161 DO_QADDSUB(QSUB, false, false)
8162 DO_QADDSUB(QDADD, true, true)
8163 DO_QADDSUB(QDSUB, false, true)
8164
8165 #undef DO_QADDSUB
8166
8167 /*
8168  * Halfword multiply and multiply accumulate
8169  */
8170
8171 static bool op_smlaxxx(DisasContext *s, arg_rrrr *a,
8172                        int add_long, bool nt, bool mt)
8173 {
8174     TCGv_i32 t0, t1, tl, th;
8175
8176     if (s->thumb
8177         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
8178         : !ENABLE_ARCH_5TE) {
8179         return false;
8180     }
8181
8182     t0 = load_reg(s, a->rn);
8183     t1 = load_reg(s, a->rm);
8184     gen_mulxy(t0, t1, nt, mt);
8185     tcg_temp_free_i32(t1);
8186
8187     switch (add_long) {
8188     case 0:
8189         store_reg(s, a->rd, t0);
8190         break;
8191     case 1:
8192         t1 = load_reg(s, a->ra);
8193         gen_helper_add_setq(t0, cpu_env, t0, t1);
8194         tcg_temp_free_i32(t1);
8195         store_reg(s, a->rd, t0);
8196         break;
8197     case 2:
8198         tl = load_reg(s, a->ra);
8199         th = load_reg(s, a->rd);
8200         /* Sign-extend the 32-bit product to 64 bits.  */
8201         t1 = tcg_temp_new_i32();
8202         tcg_gen_sari_i32(t1, t0, 31);
8203         tcg_gen_add2_i32(tl, th, tl, th, t0, t1);
8204         tcg_temp_free_i32(t0);
8205         tcg_temp_free_i32(t1);
8206         store_reg(s, a->ra, tl);
8207         store_reg(s, a->rd, th);
8208         break;
8209     default:
8210         g_assert_not_reached();
8211     }
8212     return true;
8213 }
8214
8215 #define DO_SMLAX(NAME, add, nt, mt) \
8216 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
8217 {                                                          \
8218     return op_smlaxxx(s, a, add, nt, mt);                  \
8219 }
8220
8221 DO_SMLAX(SMULBB, 0, 0, 0)
8222 DO_SMLAX(SMULBT, 0, 0, 1)
8223 DO_SMLAX(SMULTB, 0, 1, 0)
8224 DO_SMLAX(SMULTT, 0, 1, 1)
8225
8226 DO_SMLAX(SMLABB, 1, 0, 0)
8227 DO_SMLAX(SMLABT, 1, 0, 1)
8228 DO_SMLAX(SMLATB, 1, 1, 0)
8229 DO_SMLAX(SMLATT, 1, 1, 1)
8230
8231 DO_SMLAX(SMLALBB, 2, 0, 0)
8232 DO_SMLAX(SMLALBT, 2, 0, 1)
8233 DO_SMLAX(SMLALTB, 2, 1, 0)
8234 DO_SMLAX(SMLALTT, 2, 1, 1)
8235
8236 #undef DO_SMLAX
8237
8238 static bool op_smlawx(DisasContext *s, arg_rrrr *a, bool add, bool mt)
8239 {
8240     TCGv_i32 t0, t1;
8241
8242     if (!ENABLE_ARCH_5TE) {
8243         return false;
8244     }
8245
8246     t0 = load_reg(s, a->rn);
8247     t1 = load_reg(s, a->rm);
8248     /*
8249      * Since the nominal result is product<47:16>, shift the 16-bit
8250      * input up by 16 bits, so that the result is at product<63:32>.
8251      */
8252     if (mt) {
8253         tcg_gen_andi_i32(t1, t1, 0xffff0000);
8254     } else {
8255         tcg_gen_shli_i32(t1, t1, 16);
8256     }
8257     tcg_gen_muls2_i32(t0, t1, t0, t1);
8258     tcg_temp_free_i32(t0);
8259     if (add) {
8260         t0 = load_reg(s, a->ra);
8261         gen_helper_add_setq(t1, cpu_env, t1, t0);
8262         tcg_temp_free_i32(t0);
8263     }
8264     store_reg(s, a->rd, t1);
8265     return true;
8266 }
8267
8268 #define DO_SMLAWX(NAME, add, mt) \
8269 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
8270 {                                                          \
8271     return op_smlawx(s, a, add, mt);                       \
8272 }
8273
8274 DO_SMLAWX(SMULWB, 0, 0)
8275 DO_SMLAWX(SMULWT, 0, 1)
8276 DO_SMLAWX(SMLAWB, 1, 0)
8277 DO_SMLAWX(SMLAWT, 1, 1)
8278
8279 #undef DO_SMLAWX
8280
8281 /*
8282  * MSR (immediate) and hints
8283  */
8284
8285 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
8286 {
8287     /*
8288      * When running single-threaded TCG code, use the helper to ensure that
8289      * the next round-robin scheduled vCPU gets a crack.  When running in
8290      * MTTCG we don't generate jumps to the helper as it won't affect the
8291      * scheduling of other vCPUs.
8292      */
8293     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
8294         gen_set_pc_im(s, s->base.pc_next);
8295         s->base.is_jmp = DISAS_YIELD;
8296     }
8297     return true;
8298 }
8299
8300 static bool trans_WFE(DisasContext *s, arg_WFE *a)
8301 {
8302     /*
8303      * When running single-threaded TCG code, use the helper to ensure that
8304      * the next round-robin scheduled vCPU gets a crack.  In MTTCG mode we
8305      * just skip this instruction.  Currently the SEV/SEVL instructions,
8306      * which are *one* of many ways to wake the CPU from WFE, are not
8307      * implemented so we can't sleep like WFI does.
8308      */
8309     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
8310         gen_set_pc_im(s, s->base.pc_next);
8311         s->base.is_jmp = DISAS_WFE;
8312     }
8313     return true;
8314 }
8315
8316 static bool trans_WFI(DisasContext *s, arg_WFI *a)
8317 {
8318     /* For WFI, halt the vCPU until an IRQ. */
8319     gen_set_pc_im(s, s->base.pc_next);
8320     s->base.is_jmp = DISAS_WFI;
8321     return true;
8322 }
8323
8324 static bool trans_NOP(DisasContext *s, arg_NOP *a)
8325 {
8326     return true;
8327 }
8328
8329 static bool trans_MSR_imm(DisasContext *s, arg_MSR_imm *a)
8330 {
8331     uint32_t val = ror32(a->imm, a->rot * 2);
8332     uint32_t mask = msr_mask(s, a->mask, a->r);
8333
8334     if (gen_set_psr_im(s, mask, a->r, val)) {
8335         unallocated_encoding(s);
8336     }
8337     return true;
8338 }
8339
8340 /*
8341  * Cyclic Redundancy Check
8342  */
8343
8344 static bool op_crc32(DisasContext *s, arg_rrr *a, bool c, MemOp sz)
8345 {
8346     TCGv_i32 t1, t2, t3;
8347
8348     if (!dc_isar_feature(aa32_crc32, s)) {
8349         return false;
8350     }
8351
8352     t1 = load_reg(s, a->rn);
8353     t2 = load_reg(s, a->rm);
8354     switch (sz) {
8355     case MO_8:
8356         gen_uxtb(t2);
8357         break;
8358     case MO_16:
8359         gen_uxth(t2);
8360         break;
8361     case MO_32:
8362         break;
8363     default:
8364         g_assert_not_reached();
8365     }
8366     t3 = tcg_const_i32(1 << sz);
8367     if (c) {
8368         gen_helper_crc32c(t1, t1, t2, t3);
8369     } else {
8370         gen_helper_crc32(t1, t1, t2, t3);
8371     }
8372     tcg_temp_free_i32(t2);
8373     tcg_temp_free_i32(t3);
8374     store_reg(s, a->rd, t1);
8375     return true;
8376 }
8377
8378 #define DO_CRC32(NAME, c, sz) \
8379 static bool trans_##NAME(DisasContext *s, arg_rrr *a)  \
8380     { return op_crc32(s, a, c, sz); }
8381
8382 DO_CRC32(CRC32B, false, MO_8)
8383 DO_CRC32(CRC32H, false, MO_16)
8384 DO_CRC32(CRC32W, false, MO_32)
8385 DO_CRC32(CRC32CB, true, MO_8)
8386 DO_CRC32(CRC32CH, true, MO_16)
8387 DO_CRC32(CRC32CW, true, MO_32)
8388
8389 #undef DO_CRC32
8390
8391 /*
8392  * Miscellaneous instructions
8393  */
8394
8395 static bool trans_MRS_bank(DisasContext *s, arg_MRS_bank *a)
8396 {
8397     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8398         return false;
8399     }
8400     gen_mrs_banked(s, a->r, a->sysm, a->rd);
8401     return true;
8402 }
8403
8404 static bool trans_MSR_bank(DisasContext *s, arg_MSR_bank *a)
8405 {
8406     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8407         return false;
8408     }
8409     gen_msr_banked(s, a->r, a->sysm, a->rn);
8410     return true;
8411 }
8412
8413 static bool trans_MRS_reg(DisasContext *s, arg_MRS_reg *a)
8414 {
8415     TCGv_i32 tmp;
8416
8417     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8418         return false;
8419     }
8420     if (a->r) {
8421         if (IS_USER(s)) {
8422             unallocated_encoding(s);
8423             return true;
8424         }
8425         tmp = load_cpu_field(spsr);
8426     } else {
8427         tmp = tcg_temp_new_i32();
8428         gen_helper_cpsr_read(tmp, cpu_env);
8429     }
8430     store_reg(s, a->rd, tmp);
8431     return true;
8432 }
8433
8434 static bool trans_MSR_reg(DisasContext *s, arg_MSR_reg *a)
8435 {
8436     TCGv_i32 tmp;
8437     uint32_t mask = msr_mask(s, a->mask, a->r);
8438
8439     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8440         return false;
8441     }
8442     tmp = load_reg(s, a->rn);
8443     if (gen_set_psr(s, mask, a->r, tmp)) {
8444         unallocated_encoding(s);
8445     }
8446     return true;
8447 }
8448
8449 static bool trans_MRS_v7m(DisasContext *s, arg_MRS_v7m *a)
8450 {
8451     TCGv_i32 tmp;
8452
8453     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8454         return false;
8455     }
8456     tmp = tcg_const_i32(a->sysm);
8457     gen_helper_v7m_mrs(tmp, cpu_env, tmp);
8458     store_reg(s, a->rd, tmp);
8459     return true;
8460 }
8461
8462 static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
8463 {
8464     TCGv_i32 addr, reg;
8465
8466     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8467         return false;
8468     }
8469     addr = tcg_const_i32((a->mask << 10) | a->sysm);
8470     reg = load_reg(s, a->rn);
8471     gen_helper_v7m_msr(cpu_env, addr, reg);
8472     tcg_temp_free_i32(addr);
8473     tcg_temp_free_i32(reg);
8474     /* If we wrote to CONTROL, the EL might have changed */
8475     gen_helper_rebuild_hflags_m32_newel(cpu_env);
8476     gen_lookup_tb(s);
8477     return true;
8478 }
8479
8480 static bool trans_BX(DisasContext *s, arg_BX *a)
8481 {
8482     if (!ENABLE_ARCH_4T) {
8483         return false;
8484     }
8485     gen_bx_excret(s, load_reg(s, a->rm));
8486     return true;
8487 }
8488
8489 static bool trans_BXJ(DisasContext *s, arg_BXJ *a)
8490 {
8491     if (!ENABLE_ARCH_5J || arm_dc_feature(s, ARM_FEATURE_M)) {
8492         return false;
8493     }
8494     /* Trivial implementation equivalent to bx.  */
8495     gen_bx(s, load_reg(s, a->rm));
8496     return true;
8497 }
8498
8499 static bool trans_BLX_r(DisasContext *s, arg_BLX_r *a)
8500 {
8501     TCGv_i32 tmp;
8502
8503     if (!ENABLE_ARCH_5) {
8504         return false;
8505     }
8506     tmp = load_reg(s, a->rm);
8507     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
8508     gen_bx(s, tmp);
8509     return true;
8510 }
8511
8512 /*
8513  * BXNS/BLXNS: only exist for v8M with the security extensions,
8514  * and always UNDEF if NonSecure.  We don't implement these in
8515  * the user-only mode either (in theory you can use them from
8516  * Secure User mode but they are too tied in to system emulation).
8517  */
8518 static bool trans_BXNS(DisasContext *s, arg_BXNS *a)
8519 {
8520     if (!s->v8m_secure || IS_USER_ONLY) {
8521         unallocated_encoding(s);
8522     } else {
8523         gen_bxns(s, a->rm);
8524     }
8525     return true;
8526 }
8527
8528 static bool trans_BLXNS(DisasContext *s, arg_BLXNS *a)
8529 {
8530     if (!s->v8m_secure || IS_USER_ONLY) {
8531         unallocated_encoding(s);
8532     } else {
8533         gen_blxns(s, a->rm);
8534     }
8535     return true;
8536 }
8537
8538 static bool trans_CLZ(DisasContext *s, arg_CLZ *a)
8539 {
8540     TCGv_i32 tmp;
8541
8542     if (!ENABLE_ARCH_5) {
8543         return false;
8544     }
8545     tmp = load_reg(s, a->rm);
8546     tcg_gen_clzi_i32(tmp, tmp, 32);
8547     store_reg(s, a->rd, tmp);
8548     return true;
8549 }
8550
8551 static bool trans_ERET(DisasContext *s, arg_ERET *a)
8552 {
8553     TCGv_i32 tmp;
8554
8555     if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
8556         return false;
8557     }
8558     if (IS_USER(s)) {
8559         unallocated_encoding(s);
8560         return true;
8561     }
8562     if (s->current_el == 2) {
8563         /* ERET from Hyp uses ELR_Hyp, not LR */
8564         tmp = load_cpu_field(elr_el[2]);
8565     } else {
8566         tmp = load_reg(s, 14);
8567     }
8568     gen_exception_return(s, tmp);
8569     return true;
8570 }
8571
8572 static bool trans_HLT(DisasContext *s, arg_HLT *a)
8573 {
8574     gen_hlt(s, a->imm);
8575     return true;
8576 }
8577
8578 static bool trans_BKPT(DisasContext *s, arg_BKPT *a)
8579 {
8580     if (!ENABLE_ARCH_5) {
8581         return false;
8582     }
8583     if (arm_dc_feature(s, ARM_FEATURE_M) &&
8584         semihosting_enabled() &&
8585 #ifndef CONFIG_USER_ONLY
8586         !IS_USER(s) &&
8587 #endif
8588         (a->imm == 0xab)) {
8589         gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
8590     } else {
8591         gen_exception_bkpt_insn(s, syn_aa32_bkpt(a->imm, false));
8592     }
8593     return true;
8594 }
8595
8596 static bool trans_HVC(DisasContext *s, arg_HVC *a)
8597 {
8598     if (!ENABLE_ARCH_7 || arm_dc_feature(s, ARM_FEATURE_M)) {
8599         return false;
8600     }
8601     if (IS_USER(s)) {
8602         unallocated_encoding(s);
8603     } else {
8604         gen_hvc(s, a->imm);
8605     }
8606     return true;
8607 }
8608
8609 static bool trans_SMC(DisasContext *s, arg_SMC *a)
8610 {
8611     if (!ENABLE_ARCH_6K || arm_dc_feature(s, ARM_FEATURE_M)) {
8612         return false;
8613     }
8614     if (IS_USER(s)) {
8615         unallocated_encoding(s);
8616     } else {
8617         gen_smc(s);
8618     }
8619     return true;
8620 }
8621
8622 static bool trans_SG(DisasContext *s, arg_SG *a)
8623 {
8624     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
8625         !arm_dc_feature(s, ARM_FEATURE_V8)) {
8626         return false;
8627     }
8628     /*
8629      * SG (v8M only)
8630      * The bulk of the behaviour for this instruction is implemented
8631      * in v7m_handle_execute_nsc(), which deals with the insn when
8632      * it is executed by a CPU in non-secure state from memory
8633      * which is Secure & NonSecure-Callable.
8634      * Here we only need to handle the remaining cases:
8635      *  * in NS memory (including the "security extension not
8636      *    implemented" case) : NOP
8637      *  * in S memory but CPU already secure (clear IT bits)
8638      * We know that the attribute for the memory this insn is
8639      * in must match the current CPU state, because otherwise
8640      * get_phys_addr_pmsav8 would have generated an exception.
8641      */
8642     if (s->v8m_secure) {
8643         /* Like the IT insn, we don't need to generate any code */
8644         s->condexec_cond = 0;
8645         s->condexec_mask = 0;
8646     }
8647     return true;
8648 }
8649
8650 static bool trans_TT(DisasContext *s, arg_TT *a)
8651 {
8652     TCGv_i32 addr, tmp;
8653
8654     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
8655         !arm_dc_feature(s, ARM_FEATURE_V8)) {
8656         return false;
8657     }
8658     if (a->rd == 13 || a->rd == 15 || a->rn == 15) {
8659         /* We UNDEF for these UNPREDICTABLE cases */
8660         unallocated_encoding(s);
8661         return true;
8662     }
8663     if (a->A && !s->v8m_secure) {
8664         /* This case is UNDEFINED.  */
8665         unallocated_encoding(s);
8666         return true;
8667     }
8668
8669     addr = load_reg(s, a->rn);
8670     tmp = tcg_const_i32((a->A << 1) | a->T);
8671     gen_helper_v7m_tt(tmp, cpu_env, addr, tmp);
8672     tcg_temp_free_i32(addr);
8673     store_reg(s, a->rd, tmp);
8674     return true;
8675 }
8676
8677 /*
8678  * Load/store register index
8679  */
8680
8681 static ISSInfo make_issinfo(DisasContext *s, int rd, bool p, bool w)
8682 {
8683     ISSInfo ret;
8684
8685     /* ISS not valid if writeback */
8686     if (p && !w) {
8687         ret = rd;
8688         if (s->base.pc_next - s->pc_curr == 2) {
8689             ret |= ISSIs16Bit;
8690         }
8691     } else {
8692         ret = ISSInvalid;
8693     }
8694     return ret;
8695 }
8696
8697 static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a)
8698 {
8699     TCGv_i32 addr = load_reg(s, a->rn);
8700
8701     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
8702         gen_helper_v8m_stackcheck(cpu_env, addr);
8703     }
8704
8705     if (a->p) {
8706         TCGv_i32 ofs = load_reg(s, a->rm);
8707         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
8708         if (a->u) {
8709             tcg_gen_add_i32(addr, addr, ofs);
8710         } else {
8711             tcg_gen_sub_i32(addr, addr, ofs);
8712         }
8713         tcg_temp_free_i32(ofs);
8714     }
8715     return addr;
8716 }
8717
8718 static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
8719                             TCGv_i32 addr, int address_offset)
8720 {
8721     if (!a->p) {
8722         TCGv_i32 ofs = load_reg(s, a->rm);
8723         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
8724         if (a->u) {
8725             tcg_gen_add_i32(addr, addr, ofs);
8726         } else {
8727             tcg_gen_sub_i32(addr, addr, ofs);
8728         }
8729         tcg_temp_free_i32(ofs);
8730     } else if (!a->w) {
8731         tcg_temp_free_i32(addr);
8732         return;
8733     }
8734     tcg_gen_addi_i32(addr, addr, address_offset);
8735     store_reg(s, a->rn, addr);
8736 }
8737
8738 static bool op_load_rr(DisasContext *s, arg_ldst_rr *a,
8739                        MemOp mop, int mem_idx)
8740 {
8741     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
8742     TCGv_i32 addr, tmp;
8743
8744     addr = op_addr_rr_pre(s, a);
8745
8746     tmp = tcg_temp_new_i32();
8747     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8748     disas_set_da_iss(s, mop, issinfo);
8749
8750     /*
8751      * Perform base writeback before the loaded value to
8752      * ensure correct behavior with overlapping index registers.
8753      */
8754     op_addr_rr_post(s, a, addr, 0);
8755     store_reg_from_load(s, a->rt, tmp);
8756     return true;
8757 }
8758
8759 static bool op_store_rr(DisasContext *s, arg_ldst_rr *a,
8760                         MemOp mop, int mem_idx)
8761 {
8762     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
8763     TCGv_i32 addr, tmp;
8764
8765     addr = op_addr_rr_pre(s, a);
8766
8767     tmp = load_reg(s, a->rt);
8768     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8769     disas_set_da_iss(s, mop, issinfo);
8770     tcg_temp_free_i32(tmp);
8771
8772     op_addr_rr_post(s, a, addr, 0);
8773     return true;
8774 }
8775
8776 static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
8777 {
8778     int mem_idx = get_mem_index(s);
8779     TCGv_i32 addr, tmp;
8780
8781     if (!ENABLE_ARCH_5TE) {
8782         return false;
8783     }
8784     if (a->rt & 1) {
8785         unallocated_encoding(s);
8786         return true;
8787     }
8788     addr = op_addr_rr_pre(s, a);
8789
8790     tmp = tcg_temp_new_i32();
8791     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8792     store_reg(s, a->rt, tmp);
8793
8794     tcg_gen_addi_i32(addr, addr, 4);
8795
8796     tmp = tcg_temp_new_i32();
8797     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8798     store_reg(s, a->rt + 1, tmp);
8799
8800     /* LDRD w/ base writeback is undefined if the registers overlap.  */
8801     op_addr_rr_post(s, a, addr, -4);
8802     return true;
8803 }
8804
8805 static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
8806 {
8807     int mem_idx = get_mem_index(s);
8808     TCGv_i32 addr, tmp;
8809
8810     if (!ENABLE_ARCH_5TE) {
8811         return false;
8812     }
8813     if (a->rt & 1) {
8814         unallocated_encoding(s);
8815         return true;
8816     }
8817     addr = op_addr_rr_pre(s, a);
8818
8819     tmp = load_reg(s, a->rt);
8820     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8821     tcg_temp_free_i32(tmp);
8822
8823     tcg_gen_addi_i32(addr, addr, 4);
8824
8825     tmp = load_reg(s, a->rt + 1);
8826     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8827     tcg_temp_free_i32(tmp);
8828
8829     op_addr_rr_post(s, a, addr, -4);
8830     return true;
8831 }
8832
8833 /*
8834  * Load/store immediate index
8835  */
8836
8837 static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a)
8838 {
8839     int ofs = a->imm;
8840
8841     if (!a->u) {
8842         ofs = -ofs;
8843     }
8844
8845     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
8846         /*
8847          * Stackcheck. Here we know 'addr' is the current SP;
8848          * U is set if we're moving SP up, else down. It is
8849          * UNKNOWN whether the limit check triggers when SP starts
8850          * below the limit and ends up above it; we chose to do so.
8851          */
8852         if (!a->u) {
8853             TCGv_i32 newsp = tcg_temp_new_i32();
8854             tcg_gen_addi_i32(newsp, cpu_R[13], ofs);
8855             gen_helper_v8m_stackcheck(cpu_env, newsp);
8856             tcg_temp_free_i32(newsp);
8857         } else {
8858             gen_helper_v8m_stackcheck(cpu_env, cpu_R[13]);
8859         }
8860     }
8861
8862     return add_reg_for_lit(s, a->rn, a->p ? ofs : 0);
8863 }
8864
8865 static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a,
8866                             TCGv_i32 addr, int address_offset)
8867 {
8868     if (!a->p) {
8869         if (a->u) {
8870             address_offset += a->imm;
8871         } else {
8872             address_offset -= a->imm;
8873         }
8874     } else if (!a->w) {
8875         tcg_temp_free_i32(addr);
8876         return;
8877     }
8878     tcg_gen_addi_i32(addr, addr, address_offset);
8879     store_reg(s, a->rn, addr);
8880 }
8881
8882 static bool op_load_ri(DisasContext *s, arg_ldst_ri *a,
8883                        MemOp mop, int mem_idx)
8884 {
8885     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
8886     TCGv_i32 addr, tmp;
8887
8888     addr = op_addr_ri_pre(s, a);
8889
8890     tmp = tcg_temp_new_i32();
8891     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8892     disas_set_da_iss(s, mop, issinfo);
8893
8894     /*
8895      * Perform base writeback before the loaded value to
8896      * ensure correct behavior with overlapping index registers.
8897      */
8898     op_addr_ri_post(s, a, addr, 0);
8899     store_reg_from_load(s, a->rt, tmp);
8900     return true;
8901 }
8902
8903 static bool op_store_ri(DisasContext *s, arg_ldst_ri *a,
8904                         MemOp mop, int mem_idx)
8905 {
8906     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
8907     TCGv_i32 addr, tmp;
8908
8909     addr = op_addr_ri_pre(s, a);
8910
8911     tmp = load_reg(s, a->rt);
8912     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8913     disas_set_da_iss(s, mop, issinfo);
8914     tcg_temp_free_i32(tmp);
8915
8916     op_addr_ri_post(s, a, addr, 0);
8917     return true;
8918 }
8919
8920 static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
8921 {
8922     int mem_idx = get_mem_index(s);
8923     TCGv_i32 addr, tmp;
8924
8925     addr = op_addr_ri_pre(s, a);
8926
8927     tmp = tcg_temp_new_i32();
8928     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8929     store_reg(s, a->rt, tmp);
8930
8931     tcg_gen_addi_i32(addr, addr, 4);
8932
8933     tmp = tcg_temp_new_i32();
8934     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8935     store_reg(s, rt2, tmp);
8936
8937     /* LDRD w/ base writeback is undefined if the registers overlap.  */
8938     op_addr_ri_post(s, a, addr, -4);
8939     return true;
8940 }
8941
8942 static bool trans_LDRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
8943 {
8944     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
8945         return false;
8946     }
8947     return op_ldrd_ri(s, a, a->rt + 1);
8948 }
8949
8950 static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
8951 {
8952     arg_ldst_ri b = {
8953         .u = a->u, .w = a->w, .p = a->p,
8954         .rn = a->rn, .rt = a->rt, .imm = a->imm
8955     };
8956     return op_ldrd_ri(s, &b, a->rt2);
8957 }
8958
8959 static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
8960 {
8961     int mem_idx = get_mem_index(s);
8962     TCGv_i32 addr, tmp;
8963
8964     addr = op_addr_ri_pre(s, a);
8965
8966     tmp = load_reg(s, a->rt);
8967     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8968     tcg_temp_free_i32(tmp);
8969
8970     tcg_gen_addi_i32(addr, addr, 4);
8971
8972     tmp = load_reg(s, rt2);
8973     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8974     tcg_temp_free_i32(tmp);
8975
8976     op_addr_ri_post(s, a, addr, -4);
8977     return true;
8978 }
8979
8980 static bool trans_STRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
8981 {
8982     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
8983         return false;
8984     }
8985     return op_strd_ri(s, a, a->rt + 1);
8986 }
8987
8988 static bool trans_STRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
8989 {
8990     arg_ldst_ri b = {
8991         .u = a->u, .w = a->w, .p = a->p,
8992         .rn = a->rn, .rt = a->rt, .imm = a->imm
8993     };
8994     return op_strd_ri(s, &b, a->rt2);
8995 }
8996
8997 #define DO_LDST(NAME, WHICH, MEMOP) \
8998 static bool trans_##NAME##_ri(DisasContext *s, arg_ldst_ri *a)        \
8999 {                                                                     \
9000     return op_##WHICH##_ri(s, a, MEMOP, get_mem_index(s));            \
9001 }                                                                     \
9002 static bool trans_##NAME##T_ri(DisasContext *s, arg_ldst_ri *a)       \
9003 {                                                                     \
9004     return op_##WHICH##_ri(s, a, MEMOP, get_a32_user_mem_index(s));   \
9005 }                                                                     \
9006 static bool trans_##NAME##_rr(DisasContext *s, arg_ldst_rr *a)        \
9007 {                                                                     \
9008     return op_##WHICH##_rr(s, a, MEMOP, get_mem_index(s));            \
9009 }                                                                     \
9010 static bool trans_##NAME##T_rr(DisasContext *s, arg_ldst_rr *a)       \
9011 {                                                                     \
9012     return op_##WHICH##_rr(s, a, MEMOP, get_a32_user_mem_index(s));   \
9013 }
9014
9015 DO_LDST(LDR, load, MO_UL)
9016 DO_LDST(LDRB, load, MO_UB)
9017 DO_LDST(LDRH, load, MO_UW)
9018 DO_LDST(LDRSB, load, MO_SB)
9019 DO_LDST(LDRSH, load, MO_SW)
9020
9021 DO_LDST(STR, store, MO_UL)
9022 DO_LDST(STRB, store, MO_UB)
9023 DO_LDST(STRH, store, MO_UW)
9024
9025 #undef DO_LDST
9026
9027 /*
9028  * Synchronization primitives
9029  */
9030
9031 static bool op_swp(DisasContext *s, arg_SWP *a, MemOp opc)
9032 {
9033     TCGv_i32 addr, tmp;
9034     TCGv taddr;
9035
9036     opc |= s->be_data;
9037     addr = load_reg(s, a->rn);
9038     taddr = gen_aa32_addr(s, addr, opc);
9039     tcg_temp_free_i32(addr);
9040
9041     tmp = load_reg(s, a->rt2);
9042     tcg_gen_atomic_xchg_i32(tmp, taddr, tmp, get_mem_index(s), opc);
9043     tcg_temp_free(taddr);
9044
9045     store_reg(s, a->rt, tmp);
9046     return true;
9047 }
9048
9049 static bool trans_SWP(DisasContext *s, arg_SWP *a)
9050 {
9051     return op_swp(s, a, MO_UL | MO_ALIGN);
9052 }
9053
9054 static bool trans_SWPB(DisasContext *s, arg_SWP *a)
9055 {
9056     return op_swp(s, a, MO_UB);
9057 }
9058
9059 /*
9060  * Load/Store Exclusive and Load-Acquire/Store-Release
9061  */
9062
9063 static bool op_strex(DisasContext *s, arg_STREX *a, MemOp mop, bool rel)
9064 {
9065     TCGv_i32 addr;
9066     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
9067     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
9068
9069     /* We UNDEF for these UNPREDICTABLE cases.  */
9070     if (a->rd == 15 || a->rn == 15 || a->rt == 15
9071         || a->rd == a->rn || a->rd == a->rt
9072         || (!v8a && s->thumb && (a->rd == 13 || a->rt == 13))
9073         || (mop == MO_64
9074             && (a->rt2 == 15
9075                 || a->rd == a->rt2
9076                 || (!v8a && s->thumb && a->rt2 == 13)))) {
9077         unallocated_encoding(s);
9078         return true;
9079     }
9080
9081     if (rel) {
9082         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
9083     }
9084
9085     addr = tcg_temp_local_new_i32();
9086     load_reg_var(s, addr, a->rn);
9087     tcg_gen_addi_i32(addr, addr, a->imm);
9088
9089     gen_store_exclusive(s, a->rd, a->rt, a->rt2, addr, mop);
9090     tcg_temp_free_i32(addr);
9091     return true;
9092 }
9093
9094 static bool trans_STREX(DisasContext *s, arg_STREX *a)
9095 {
9096     if (!ENABLE_ARCH_6) {
9097         return false;
9098     }
9099     return op_strex(s, a, MO_32, false);
9100 }
9101
9102 static bool trans_STREXD_a32(DisasContext *s, arg_STREX *a)
9103 {
9104     if (!ENABLE_ARCH_6K) {
9105         return false;
9106     }
9107     /* We UNDEF for these UNPREDICTABLE cases.  */
9108     if (a->rt & 1) {
9109         unallocated_encoding(s);
9110         return true;
9111     }
9112     a->rt2 = a->rt + 1;
9113     return op_strex(s, a, MO_64, false);
9114 }
9115
9116 static bool trans_STREXD_t32(DisasContext *s, arg_STREX *a)
9117 {
9118     return op_strex(s, a, MO_64, false);
9119 }
9120
9121 static bool trans_STREXB(DisasContext *s, arg_STREX *a)
9122 {
9123     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
9124         return false;
9125     }
9126     return op_strex(s, a, MO_8, false);
9127 }
9128
9129 static bool trans_STREXH(DisasContext *s, arg_STREX *a)
9130 {
9131     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
9132         return false;
9133     }
9134     return op_strex(s, a, MO_16, false);
9135 }
9136
9137 static bool trans_STLEX(DisasContext *s, arg_STREX *a)
9138 {
9139     if (!ENABLE_ARCH_8) {
9140         return false;
9141     }
9142     return op_strex(s, a, MO_32, true);
9143 }
9144
9145 static bool trans_STLEXD_a32(DisasContext *s, arg_STREX *a)
9146 {
9147     if (!ENABLE_ARCH_8) {
9148         return false;
9149     }
9150     /* We UNDEF for these UNPREDICTABLE cases.  */
9151     if (a->rt & 1) {
9152         unallocated_encoding(s);
9153         return true;
9154     }
9155     a->rt2 = a->rt + 1;
9156     return op_strex(s, a, MO_64, true);
9157 }
9158
9159 static bool trans_STLEXD_t32(DisasContext *s, arg_STREX *a)
9160 {
9161     if (!ENABLE_ARCH_8) {
9162         return false;
9163     }
9164     return op_strex(s, a, MO_64, true);
9165 }
9166
9167 static bool trans_STLEXB(DisasContext *s, arg_STREX *a)
9168 {
9169     if (!ENABLE_ARCH_8) {
9170         return false;
9171     }
9172     return op_strex(s, a, MO_8, true);
9173 }
9174
9175 static bool trans_STLEXH(DisasContext *s, arg_STREX *a)
9176 {
9177     if (!ENABLE_ARCH_8) {
9178         return false;
9179     }
9180     return op_strex(s, a, MO_16, true);
9181 }
9182
9183 static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop)
9184 {
9185     TCGv_i32 addr, tmp;
9186
9187     if (!ENABLE_ARCH_8) {
9188         return false;
9189     }
9190     /* We UNDEF for these UNPREDICTABLE cases.  */
9191     if (a->rn == 15 || a->rt == 15) {
9192         unallocated_encoding(s);
9193         return true;
9194     }
9195
9196     addr = load_reg(s, a->rn);
9197     tmp = load_reg(s, a->rt);
9198     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
9199     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | s->be_data);
9200     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite);
9201
9202     tcg_temp_free_i32(tmp);
9203     tcg_temp_free_i32(addr);
9204     return true;
9205 }
9206
9207 static bool trans_STL(DisasContext *s, arg_STL *a)
9208 {
9209     return op_stl(s, a, MO_UL);
9210 }
9211
9212 static bool trans_STLB(DisasContext *s, arg_STL *a)
9213 {
9214     return op_stl(s, a, MO_UB);
9215 }
9216
9217 static bool trans_STLH(DisasContext *s, arg_STL *a)
9218 {
9219     return op_stl(s, a, MO_UW);
9220 }
9221
9222 static bool op_ldrex(DisasContext *s, arg_LDREX *a, MemOp mop, bool acq)
9223 {
9224     TCGv_i32 addr;
9225     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
9226     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
9227
9228     /* We UNDEF for these UNPREDICTABLE cases.  */
9229     if (a->rn == 15 || a->rt == 15
9230         || (!v8a && s->thumb && a->rt == 13)
9231         || (mop == MO_64
9232             && (a->rt2 == 15 || a->rt == a->rt2
9233                 || (!v8a && s->thumb && a->rt2 == 13)))) {
9234         unallocated_encoding(s);
9235         return true;
9236     }
9237
9238     addr = tcg_temp_local_new_i32();
9239     load_reg_var(s, addr, a->rn);
9240     tcg_gen_addi_i32(addr, addr, a->imm);
9241
9242     gen_load_exclusive(s, a->rt, a->rt2, addr, mop);
9243     tcg_temp_free_i32(addr);
9244
9245     if (acq) {
9246         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
9247     }
9248     return true;
9249 }
9250
9251 static bool trans_LDREX(DisasContext *s, arg_LDREX *a)
9252 {
9253     if (!ENABLE_ARCH_6) {
9254         return false;
9255     }
9256     return op_ldrex(s, a, MO_32, false);
9257 }
9258
9259 static bool trans_LDREXD_a32(DisasContext *s, arg_LDREX *a)
9260 {
9261     if (!ENABLE_ARCH_6K) {
9262         return false;
9263     }
9264     /* We UNDEF for these UNPREDICTABLE cases.  */
9265     if (a->rt & 1) {
9266         unallocated_encoding(s);
9267         return true;
9268     }
9269     a->rt2 = a->rt + 1;
9270     return op_ldrex(s, a, MO_64, false);
9271 }
9272
9273 static bool trans_LDREXD_t32(DisasContext *s, arg_LDREX *a)
9274 {
9275     return op_ldrex(s, a, MO_64, false);
9276 }
9277
9278 static bool trans_LDREXB(DisasContext *s, arg_LDREX *a)
9279 {
9280     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
9281         return false;
9282     }
9283     return op_ldrex(s, a, MO_8, false);
9284 }
9285
9286 static bool trans_LDREXH(DisasContext *s, arg_LDREX *a)
9287 {
9288     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
9289         return false;
9290     }
9291     return op_ldrex(s, a, MO_16, false);
9292 }
9293
9294 static bool trans_LDAEX(DisasContext *s, arg_LDREX *a)
9295 {
9296     if (!ENABLE_ARCH_8) {
9297         return false;
9298     }
9299     return op_ldrex(s, a, MO_32, true);
9300 }
9301
9302 static bool trans_LDAEXD_a32(DisasContext *s, arg_LDREX *a)
9303 {
9304     if (!ENABLE_ARCH_8) {
9305         return false;
9306     }
9307     /* We UNDEF for these UNPREDICTABLE cases.  */
9308     if (a->rt & 1) {
9309         unallocated_encoding(s);
9310         return true;
9311     }
9312     a->rt2 = a->rt + 1;
9313     return op_ldrex(s, a, MO_64, true);
9314 }
9315
9316 static bool trans_LDAEXD_t32(DisasContext *s, arg_LDREX *a)
9317 {
9318     if (!ENABLE_ARCH_8) {
9319         return false;
9320     }
9321     return op_ldrex(s, a, MO_64, true);
9322 }
9323
9324 static bool trans_LDAEXB(DisasContext *s, arg_LDREX *a)
9325 {
9326     if (!ENABLE_ARCH_8) {
9327         return false;
9328     }
9329     return op_ldrex(s, a, MO_8, true);
9330 }
9331
9332 static bool trans_LDAEXH(DisasContext *s, arg_LDREX *a)
9333 {
9334     if (!ENABLE_ARCH_8) {
9335         return false;
9336     }
9337     return op_ldrex(s, a, MO_16, true);
9338 }
9339
9340 static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop)
9341 {
9342     TCGv_i32 addr, tmp;
9343
9344     if (!ENABLE_ARCH_8) {
9345         return false;
9346     }
9347     /* We UNDEF for these UNPREDICTABLE cases.  */
9348     if (a->rn == 15 || a->rt == 15) {
9349         unallocated_encoding(s);
9350         return true;
9351     }
9352
9353     addr = load_reg(s, a->rn);
9354     tmp = tcg_temp_new_i32();
9355     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | s->be_data);
9356     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel);
9357     tcg_temp_free_i32(addr);
9358
9359     store_reg(s, a->rt, tmp);
9360     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
9361     return true;
9362 }
9363
9364 static bool trans_LDA(DisasContext *s, arg_LDA *a)
9365 {
9366     return op_lda(s, a, MO_UL);
9367 }
9368
9369 static bool trans_LDAB(DisasContext *s, arg_LDA *a)
9370 {
9371     return op_lda(s, a, MO_UB);
9372 }
9373
9374 static bool trans_LDAH(DisasContext *s, arg_LDA *a)
9375 {
9376     return op_lda(s, a, MO_UW);
9377 }
9378
9379 /*
9380  * Media instructions
9381  */
9382
9383 static bool trans_USADA8(DisasContext *s, arg_USADA8 *a)
9384 {
9385     TCGv_i32 t1, t2;
9386
9387     if (!ENABLE_ARCH_6) {
9388         return false;
9389     }
9390
9391     t1 = load_reg(s, a->rn);
9392     t2 = load_reg(s, a->rm);
9393     gen_helper_usad8(t1, t1, t2);
9394     tcg_temp_free_i32(t2);
9395     if (a->ra != 15) {
9396         t2 = load_reg(s, a->ra);
9397         tcg_gen_add_i32(t1, t1, t2);
9398         tcg_temp_free_i32(t2);
9399     }
9400     store_reg(s, a->rd, t1);
9401     return true;
9402 }
9403
9404 static bool op_bfx(DisasContext *s, arg_UBFX *a, bool u)
9405 {
9406     TCGv_i32 tmp;
9407     int width = a->widthm1 + 1;
9408     int shift = a->lsb;
9409
9410     if (!ENABLE_ARCH_6T2) {
9411         return false;
9412     }
9413     if (shift + width > 32) {
9414         /* UNPREDICTABLE; we choose to UNDEF */
9415         unallocated_encoding(s);
9416         return true;
9417     }
9418
9419     tmp = load_reg(s, a->rn);
9420     if (u) {
9421         tcg_gen_extract_i32(tmp, tmp, shift, width);
9422     } else {
9423         tcg_gen_sextract_i32(tmp, tmp, shift, width);
9424     }
9425     store_reg(s, a->rd, tmp);
9426     return true;
9427 }
9428
9429 static bool trans_SBFX(DisasContext *s, arg_SBFX *a)
9430 {
9431     return op_bfx(s, a, false);
9432 }
9433
9434 static bool trans_UBFX(DisasContext *s, arg_UBFX *a)
9435 {
9436     return op_bfx(s, a, true);
9437 }
9438
9439 static bool trans_BFCI(DisasContext *s, arg_BFCI *a)
9440 {
9441     TCGv_i32 tmp;
9442     int msb = a->msb, lsb = a->lsb;
9443     int width;
9444
9445     if (!ENABLE_ARCH_6T2) {
9446         return false;
9447     }
9448     if (msb < lsb) {
9449         /* UNPREDICTABLE; we choose to UNDEF */
9450         unallocated_encoding(s);
9451         return true;
9452     }
9453
9454     width = msb + 1 - lsb;
9455     if (a->rn == 15) {
9456         /* BFC */
9457         tmp = tcg_const_i32(0);
9458     } else {
9459         /* BFI */
9460         tmp = load_reg(s, a->rn);
9461     }
9462     if (width != 32) {
9463         TCGv_i32 tmp2 = load_reg(s, a->rd);
9464         tcg_gen_deposit_i32(tmp, tmp2, tmp, lsb, width);
9465         tcg_temp_free_i32(tmp2);
9466     }
9467     store_reg(s, a->rd, tmp);
9468     return true;
9469 }
9470
9471 static bool trans_UDF(DisasContext *s, arg_UDF *a)
9472 {
9473     unallocated_encoding(s);
9474     return true;
9475 }
9476
9477 /*
9478  * Parallel addition and subtraction
9479  */
9480
9481 static bool op_par_addsub(DisasContext *s, arg_rrr *a,
9482                           void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
9483 {
9484     TCGv_i32 t0, t1;
9485
9486     if (s->thumb
9487         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9488         : !ENABLE_ARCH_6) {
9489         return false;
9490     }
9491
9492     t0 = load_reg(s, a->rn);
9493     t1 = load_reg(s, a->rm);
9494
9495     gen(t0, t0, t1);
9496
9497     tcg_temp_free_i32(t1);
9498     store_reg(s, a->rd, t0);
9499     return true;
9500 }
9501
9502 static bool op_par_addsub_ge(DisasContext *s, arg_rrr *a,
9503                              void (*gen)(TCGv_i32, TCGv_i32,
9504                                          TCGv_i32, TCGv_ptr))
9505 {
9506     TCGv_i32 t0, t1;
9507     TCGv_ptr ge;
9508
9509     if (s->thumb
9510         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9511         : !ENABLE_ARCH_6) {
9512         return false;
9513     }
9514
9515     t0 = load_reg(s, a->rn);
9516     t1 = load_reg(s, a->rm);
9517
9518     ge = tcg_temp_new_ptr();
9519     tcg_gen_addi_ptr(ge, cpu_env, offsetof(CPUARMState, GE));
9520     gen(t0, t0, t1, ge);
9521
9522     tcg_temp_free_ptr(ge);
9523     tcg_temp_free_i32(t1);
9524     store_reg(s, a->rd, t0);
9525     return true;
9526 }
9527
9528 #define DO_PAR_ADDSUB(NAME, helper) \
9529 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
9530 {                                                       \
9531     return op_par_addsub(s, a, helper);                 \
9532 }
9533
9534 #define DO_PAR_ADDSUB_GE(NAME, helper) \
9535 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
9536 {                                                       \
9537     return op_par_addsub_ge(s, a, helper);              \
9538 }
9539
9540 DO_PAR_ADDSUB_GE(SADD16, gen_helper_sadd16)
9541 DO_PAR_ADDSUB_GE(SASX, gen_helper_saddsubx)
9542 DO_PAR_ADDSUB_GE(SSAX, gen_helper_ssubaddx)
9543 DO_PAR_ADDSUB_GE(SSUB16, gen_helper_ssub16)
9544 DO_PAR_ADDSUB_GE(SADD8, gen_helper_sadd8)
9545 DO_PAR_ADDSUB_GE(SSUB8, gen_helper_ssub8)
9546
9547 DO_PAR_ADDSUB_GE(UADD16, gen_helper_uadd16)
9548 DO_PAR_ADDSUB_GE(UASX, gen_helper_uaddsubx)
9549 DO_PAR_ADDSUB_GE(USAX, gen_helper_usubaddx)
9550 DO_PAR_ADDSUB_GE(USUB16, gen_helper_usub16)
9551 DO_PAR_ADDSUB_GE(UADD8, gen_helper_uadd8)
9552 DO_PAR_ADDSUB_GE(USUB8, gen_helper_usub8)
9553
9554 DO_PAR_ADDSUB(QADD16, gen_helper_qadd16)
9555 DO_PAR_ADDSUB(QASX, gen_helper_qaddsubx)
9556 DO_PAR_ADDSUB(QSAX, gen_helper_qsubaddx)
9557 DO_PAR_ADDSUB(QSUB16, gen_helper_qsub16)
9558 DO_PAR_ADDSUB(QADD8, gen_helper_qadd8)
9559 DO_PAR_ADDSUB(QSUB8, gen_helper_qsub8)
9560
9561 DO_PAR_ADDSUB(UQADD16, gen_helper_uqadd16)
9562 DO_PAR_ADDSUB(UQASX, gen_helper_uqaddsubx)
9563 DO_PAR_ADDSUB(UQSAX, gen_helper_uqsubaddx)
9564 DO_PAR_ADDSUB(UQSUB16, gen_helper_uqsub16)
9565 DO_PAR_ADDSUB(UQADD8, gen_helper_uqadd8)
9566 DO_PAR_ADDSUB(UQSUB8, gen_helper_uqsub8)
9567
9568 DO_PAR_ADDSUB(SHADD16, gen_helper_shadd16)
9569 DO_PAR_ADDSUB(SHASX, gen_helper_shaddsubx)
9570 DO_PAR_ADDSUB(SHSAX, gen_helper_shsubaddx)
9571 DO_PAR_ADDSUB(SHSUB16, gen_helper_shsub16)
9572 DO_PAR_ADDSUB(SHADD8, gen_helper_shadd8)
9573 DO_PAR_ADDSUB(SHSUB8, gen_helper_shsub8)
9574
9575 DO_PAR_ADDSUB(UHADD16, gen_helper_uhadd16)
9576 DO_PAR_ADDSUB(UHASX, gen_helper_uhaddsubx)
9577 DO_PAR_ADDSUB(UHSAX, gen_helper_uhsubaddx)
9578 DO_PAR_ADDSUB(UHSUB16, gen_helper_uhsub16)
9579 DO_PAR_ADDSUB(UHADD8, gen_helper_uhadd8)
9580 DO_PAR_ADDSUB(UHSUB8, gen_helper_uhsub8)
9581
9582 #undef DO_PAR_ADDSUB
9583 #undef DO_PAR_ADDSUB_GE
9584
9585 /*
9586  * Packing, unpacking, saturation, and reversal
9587  */
9588
9589 static bool trans_PKH(DisasContext *s, arg_PKH *a)
9590 {
9591     TCGv_i32 tn, tm;
9592     int shift = a->imm;
9593
9594     if (s->thumb
9595         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9596         : !ENABLE_ARCH_6) {
9597         return false;
9598     }
9599
9600     tn = load_reg(s, a->rn);
9601     tm = load_reg(s, a->rm);
9602     if (a->tb) {
9603         /* PKHTB */
9604         if (shift == 0) {
9605             shift = 31;
9606         }
9607         tcg_gen_sari_i32(tm, tm, shift);
9608         tcg_gen_deposit_i32(tn, tn, tm, 0, 16);
9609     } else {
9610         /* PKHBT */
9611         tcg_gen_shli_i32(tm, tm, shift);
9612         tcg_gen_deposit_i32(tn, tm, tn, 0, 16);
9613     }
9614     tcg_temp_free_i32(tm);
9615     store_reg(s, a->rd, tn);
9616     return true;
9617 }
9618
9619 static bool op_sat(DisasContext *s, arg_sat *a,
9620                    void (*gen)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32))
9621 {
9622     TCGv_i32 tmp, satimm;
9623     int shift = a->imm;
9624
9625     if (!ENABLE_ARCH_6) {
9626         return false;
9627     }
9628
9629     tmp = load_reg(s, a->rn);
9630     if (a->sh) {
9631         tcg_gen_sari_i32(tmp, tmp, shift ? shift : 31);
9632     } else {
9633         tcg_gen_shli_i32(tmp, tmp, shift);
9634     }
9635
9636     satimm = tcg_const_i32(a->satimm);
9637     gen(tmp, cpu_env, tmp, satimm);
9638     tcg_temp_free_i32(satimm);
9639
9640     store_reg(s, a->rd, tmp);
9641     return true;
9642 }
9643
9644 static bool trans_SSAT(DisasContext *s, arg_sat *a)
9645 {
9646     return op_sat(s, a, gen_helper_ssat);
9647 }
9648
9649 static bool trans_USAT(DisasContext *s, arg_sat *a)
9650 {
9651     return op_sat(s, a, gen_helper_usat);
9652 }
9653
9654 static bool trans_SSAT16(DisasContext *s, arg_sat *a)
9655 {
9656     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9657         return false;
9658     }
9659     return op_sat(s, a, gen_helper_ssat16);
9660 }
9661
9662 static bool trans_USAT16(DisasContext *s, arg_sat *a)
9663 {
9664     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9665         return false;
9666     }
9667     return op_sat(s, a, gen_helper_usat16);
9668 }
9669
9670 static bool op_xta(DisasContext *s, arg_rrr_rot *a,
9671                    void (*gen_extract)(TCGv_i32, TCGv_i32),
9672                    void (*gen_add)(TCGv_i32, TCGv_i32, TCGv_i32))
9673 {
9674     TCGv_i32 tmp;
9675
9676     if (!ENABLE_ARCH_6) {
9677         return false;
9678     }
9679
9680     tmp = load_reg(s, a->rm);
9681     /*
9682      * TODO: In many cases we could do a shift instead of a rotate.
9683      * Combined with a simple extend, that becomes an extract.
9684      */
9685     tcg_gen_rotri_i32(tmp, tmp, a->rot * 8);
9686     gen_extract(tmp, tmp);
9687
9688     if (a->rn != 15) {
9689         TCGv_i32 tmp2 = load_reg(s, a->rn);
9690         gen_add(tmp, tmp, tmp2);
9691         tcg_temp_free_i32(tmp2);
9692     }
9693     store_reg(s, a->rd, tmp);
9694     return true;
9695 }
9696
9697 static bool trans_SXTAB(DisasContext *s, arg_rrr_rot *a)
9698 {
9699     return op_xta(s, a, tcg_gen_ext8s_i32, tcg_gen_add_i32);
9700 }
9701
9702 static bool trans_SXTAH(DisasContext *s, arg_rrr_rot *a)
9703 {
9704     return op_xta(s, a, tcg_gen_ext16s_i32, tcg_gen_add_i32);
9705 }
9706
9707 static bool trans_SXTAB16(DisasContext *s, arg_rrr_rot *a)
9708 {
9709     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9710         return false;
9711     }
9712     return op_xta(s, a, gen_helper_sxtb16, gen_add16);
9713 }
9714
9715 static bool trans_UXTAB(DisasContext *s, arg_rrr_rot *a)
9716 {
9717     return op_xta(s, a, tcg_gen_ext8u_i32, tcg_gen_add_i32);
9718 }
9719
9720 static bool trans_UXTAH(DisasContext *s, arg_rrr_rot *a)
9721 {
9722     return op_xta(s, a, tcg_gen_ext16u_i32, tcg_gen_add_i32);
9723 }
9724
9725 static bool trans_UXTAB16(DisasContext *s, arg_rrr_rot *a)
9726 {
9727     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9728         return false;
9729     }
9730     return op_xta(s, a, gen_helper_uxtb16, gen_add16);
9731 }
9732
9733 static bool trans_SEL(DisasContext *s, arg_rrr *a)
9734 {
9735     TCGv_i32 t1, t2, t3;
9736
9737     if (s->thumb
9738         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9739         : !ENABLE_ARCH_6) {
9740         return false;
9741     }
9742
9743     t1 = load_reg(s, a->rn);
9744     t2 = load_reg(s, a->rm);
9745     t3 = tcg_temp_new_i32();
9746     tcg_gen_ld_i32(t3, cpu_env, offsetof(CPUARMState, GE));
9747     gen_helper_sel_flags(t1, t3, t1, t2);
9748     tcg_temp_free_i32(t3);
9749     tcg_temp_free_i32(t2);
9750     store_reg(s, a->rd, t1);
9751     return true;
9752 }
9753
9754 static bool op_rr(DisasContext *s, arg_rr *a,
9755                   void (*gen)(TCGv_i32, TCGv_i32))
9756 {
9757     TCGv_i32 tmp;
9758
9759     tmp = load_reg(s, a->rm);
9760     gen(tmp, tmp);
9761     store_reg(s, a->rd, tmp);
9762     return true;
9763 }
9764
9765 static bool trans_REV(DisasContext *s, arg_rr *a)
9766 {
9767     if (!ENABLE_ARCH_6) {
9768         return false;
9769     }
9770     return op_rr(s, a, tcg_gen_bswap32_i32);
9771 }
9772
9773 static bool trans_REV16(DisasContext *s, arg_rr *a)
9774 {
9775     if (!ENABLE_ARCH_6) {
9776         return false;
9777     }
9778     return op_rr(s, a, gen_rev16);
9779 }
9780
9781 static bool trans_REVSH(DisasContext *s, arg_rr *a)
9782 {
9783     if (!ENABLE_ARCH_6) {
9784         return false;
9785     }
9786     return op_rr(s, a, gen_revsh);
9787 }
9788
9789 static bool trans_RBIT(DisasContext *s, arg_rr *a)
9790 {
9791     if (!ENABLE_ARCH_6T2) {
9792         return false;
9793     }
9794     return op_rr(s, a, gen_helper_rbit);
9795 }
9796
9797 /*
9798  * Signed multiply, signed and unsigned divide
9799  */
9800
9801 static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
9802 {
9803     TCGv_i32 t1, t2;
9804
9805     if (!ENABLE_ARCH_6) {
9806         return false;
9807     }
9808
9809     t1 = load_reg(s, a->rn);
9810     t2 = load_reg(s, a->rm);
9811     if (m_swap) {
9812         gen_swap_half(t2);
9813     }
9814     gen_smul_dual(t1, t2);
9815
9816     if (sub) {
9817         /* This subtraction cannot overflow. */
9818         tcg_gen_sub_i32(t1, t1, t2);
9819     } else {
9820         /*
9821          * This addition cannot overflow 32 bits; however it may
9822          * overflow considered as a signed operation, in which case
9823          * we must set the Q flag.
9824          */
9825         gen_helper_add_setq(t1, cpu_env, t1, t2);
9826     }
9827     tcg_temp_free_i32(t2);
9828
9829     if (a->ra != 15) {
9830         t2 = load_reg(s, a->ra);
9831         gen_helper_add_setq(t1, cpu_env, t1, t2);
9832         tcg_temp_free_i32(t2);
9833     }
9834     store_reg(s, a->rd, t1);
9835     return true;
9836 }
9837
9838 static bool trans_SMLAD(DisasContext *s, arg_rrrr *a)
9839 {
9840     return op_smlad(s, a, false, false);
9841 }
9842
9843 static bool trans_SMLADX(DisasContext *s, arg_rrrr *a)
9844 {
9845     return op_smlad(s, a, true, false);
9846 }
9847
9848 static bool trans_SMLSD(DisasContext *s, arg_rrrr *a)
9849 {
9850     return op_smlad(s, a, false, true);
9851 }
9852
9853 static bool trans_SMLSDX(DisasContext *s, arg_rrrr *a)
9854 {
9855     return op_smlad(s, a, true, true);
9856 }
9857
9858 static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
9859 {
9860     TCGv_i32 t1, t2;
9861     TCGv_i64 l1, l2;
9862
9863     if (!ENABLE_ARCH_6) {
9864         return false;
9865     }
9866
9867     t1 = load_reg(s, a->rn);
9868     t2 = load_reg(s, a->rm);
9869     if (m_swap) {
9870         gen_swap_half(t2);
9871     }
9872     gen_smul_dual(t1, t2);
9873
9874     l1 = tcg_temp_new_i64();
9875     l2 = tcg_temp_new_i64();
9876     tcg_gen_ext_i32_i64(l1, t1);
9877     tcg_gen_ext_i32_i64(l2, t2);
9878     tcg_temp_free_i32(t1);
9879     tcg_temp_free_i32(t2);
9880
9881     if (sub) {
9882         tcg_gen_sub_i64(l1, l1, l2);
9883     } else {
9884         tcg_gen_add_i64(l1, l1, l2);
9885     }
9886     tcg_temp_free_i64(l2);
9887
9888     gen_addq(s, l1, a->ra, a->rd);
9889     gen_storeq_reg(s, a->ra, a->rd, l1);
9890     tcg_temp_free_i64(l1);
9891     return true;
9892 }
9893
9894 static bool trans_SMLALD(DisasContext *s, arg_rrrr *a)
9895 {
9896     return op_smlald(s, a, false, false);
9897 }
9898
9899 static bool trans_SMLALDX(DisasContext *s, arg_rrrr *a)
9900 {
9901     return op_smlald(s, a, true, false);
9902 }
9903
9904 static bool trans_SMLSLD(DisasContext *s, arg_rrrr *a)
9905 {
9906     return op_smlald(s, a, false, true);
9907 }
9908
9909 static bool trans_SMLSLDX(DisasContext *s, arg_rrrr *a)
9910 {
9911     return op_smlald(s, a, true, true);
9912 }
9913
9914 static bool op_smmla(DisasContext *s, arg_rrrr *a, bool round, bool sub)
9915 {
9916     TCGv_i32 t1, t2;
9917
9918     if (s->thumb
9919         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9920         : !ENABLE_ARCH_6) {
9921         return false;
9922     }
9923
9924     t1 = load_reg(s, a->rn);
9925     t2 = load_reg(s, a->rm);
9926     tcg_gen_muls2_i32(t2, t1, t1, t2);
9927
9928     if (a->ra != 15) {
9929         TCGv_i32 t3 = load_reg(s, a->ra);
9930         if (sub) {
9931             /*
9932              * For SMMLS, we need a 64-bit subtract.  Borrow caused by
9933              * a non-zero multiplicand lowpart, and the correct result
9934              * lowpart for rounding.
9935              */
9936             TCGv_i32 zero = tcg_const_i32(0);
9937             tcg_gen_sub2_i32(t2, t1, zero, t3, t2, t1);
9938             tcg_temp_free_i32(zero);
9939         } else {
9940             tcg_gen_add_i32(t1, t1, t3);
9941         }
9942         tcg_temp_free_i32(t3);
9943     }
9944     if (round) {
9945         /*
9946          * Adding 0x80000000 to the 64-bit quantity means that we have
9947          * carry in to the high word when the low word has the msb set.
9948          */
9949         tcg_gen_shri_i32(t2, t2, 31);
9950         tcg_gen_add_i32(t1, t1, t2);
9951     }
9952     tcg_temp_free_i32(t2);
9953     store_reg(s, a->rd, t1);
9954     return true;
9955 }
9956
9957 static bool trans_SMMLA(DisasContext *s, arg_rrrr *a)
9958 {
9959     return op_smmla(s, a, false, false);
9960 }
9961
9962 static bool trans_SMMLAR(DisasContext *s, arg_rrrr *a)
9963 {
9964     return op_smmla(s, a, true, false);
9965 }
9966
9967 static bool trans_SMMLS(DisasContext *s, arg_rrrr *a)
9968 {
9969     return op_smmla(s, a, false, true);
9970 }
9971
9972 static bool trans_SMMLSR(DisasContext *s, arg_rrrr *a)
9973 {
9974     return op_smmla(s, a, true, true);
9975 }
9976
9977 static bool op_div(DisasContext *s, arg_rrr *a, bool u)
9978 {
9979     TCGv_i32 t1, t2;
9980
9981     if (s->thumb
9982         ? !dc_isar_feature(aa32_thumb_div, s)
9983         : !dc_isar_feature(aa32_arm_div, s)) {
9984         return false;
9985     }
9986
9987     t1 = load_reg(s, a->rn);
9988     t2 = load_reg(s, a->rm);
9989     if (u) {
9990         gen_helper_udiv(t1, t1, t2);
9991     } else {
9992         gen_helper_sdiv(t1, t1, t2);
9993     }
9994     tcg_temp_free_i32(t2);
9995     store_reg(s, a->rd, t1);
9996     return true;
9997 }
9998
9999 static bool trans_SDIV(DisasContext *s, arg_rrr *a)
10000 {
10001     return op_div(s, a, false);
10002 }
10003
10004 static bool trans_UDIV(DisasContext *s, arg_rrr *a)
10005 {
10006     return op_div(s, a, true);
10007 }
10008
10009 /*
10010  * Block data transfer
10011  */
10012
10013 static TCGv_i32 op_addr_block_pre(DisasContext *s, arg_ldst_block *a, int n)
10014 {
10015     TCGv_i32 addr = load_reg(s, a->rn);
10016
10017     if (a->b) {
10018         if (a->i) {
10019             /* pre increment */
10020             tcg_gen_addi_i32(addr, addr, 4);
10021         } else {
10022             /* pre decrement */
10023             tcg_gen_addi_i32(addr, addr, -(n * 4));
10024         }
10025     } else if (!a->i && n != 1) {
10026         /* post decrement */
10027         tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
10028     }
10029
10030     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
10031         /*
10032          * If the writeback is incrementing SP rather than
10033          * decrementing it, and the initial SP is below the
10034          * stack limit but the final written-back SP would
10035          * be above, then then we must not perform any memory
10036          * accesses, but it is IMPDEF whether we generate
10037          * an exception. We choose to do so in this case.
10038          * At this point 'addr' is the lowest address, so
10039          * either the original SP (if incrementing) or our
10040          * final SP (if decrementing), so that's what we check.
10041          */
10042         gen_helper_v8m_stackcheck(cpu_env, addr);
10043     }
10044
10045     return addr;
10046 }
10047
10048 static void op_addr_block_post(DisasContext *s, arg_ldst_block *a,
10049                                TCGv_i32 addr, int n)
10050 {
10051     if (a->w) {
10052         /* write back */
10053         if (!a->b) {
10054             if (a->i) {
10055                 /* post increment */
10056                 tcg_gen_addi_i32(addr, addr, 4);
10057             } else {
10058                 /* post decrement */
10059                 tcg_gen_addi_i32(addr, addr, -(n * 4));
10060             }
10061         } else if (!a->i && n != 1) {
10062             /* pre decrement */
10063             tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
10064         }
10065         store_reg(s, a->rn, addr);
10066     } else {
10067         tcg_temp_free_i32(addr);
10068     }
10069 }
10070
10071 static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n)
10072 {
10073     int i, j, n, list, mem_idx;
10074     bool user = a->u;
10075     TCGv_i32 addr, tmp, tmp2;
10076
10077     if (user) {
10078         /* STM (user) */
10079         if (IS_USER(s)) {
10080             /* Only usable in supervisor mode.  */
10081             unallocated_encoding(s);
10082             return true;
10083         }
10084     }
10085
10086     list = a->list;
10087     n = ctpop16(list);
10088     if (n < min_n || a->rn == 15) {
10089         unallocated_encoding(s);
10090         return true;
10091     }
10092
10093     addr = op_addr_block_pre(s, a, n);
10094     mem_idx = get_mem_index(s);
10095
10096     for (i = j = 0; i < 16; i++) {
10097         if (!(list & (1 << i))) {
10098             continue;
10099         }
10100
10101         if (user && i != 15) {
10102             tmp = tcg_temp_new_i32();
10103             tmp2 = tcg_const_i32(i);
10104             gen_helper_get_user_reg(tmp, cpu_env, tmp2);
10105             tcg_temp_free_i32(tmp2);
10106         } else {
10107             tmp = load_reg(s, i);
10108         }
10109         gen_aa32_st32(s, tmp, addr, mem_idx);
10110         tcg_temp_free_i32(tmp);
10111
10112         /* No need to add after the last transfer.  */
10113         if (++j != n) {
10114             tcg_gen_addi_i32(addr, addr, 4);
10115         }
10116     }
10117
10118     op_addr_block_post(s, a, addr, n);
10119     return true;
10120 }
10121
10122 static bool trans_STM(DisasContext *s, arg_ldst_block *a)
10123 {
10124     /* BitCount(list) < 1 is UNPREDICTABLE */
10125     return op_stm(s, a, 1);
10126 }
10127
10128 static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a)
10129 {
10130     /* Writeback register in register list is UNPREDICTABLE for T32.  */
10131     if (a->w && (a->list & (1 << a->rn))) {
10132         unallocated_encoding(s);
10133         return true;
10134     }
10135     /* BitCount(list) < 2 is UNPREDICTABLE */
10136     return op_stm(s, a, 2);
10137 }
10138
10139 static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n)
10140 {
10141     int i, j, n, list, mem_idx;
10142     bool loaded_base;
10143     bool user = a->u;
10144     bool exc_return = false;
10145     TCGv_i32 addr, tmp, tmp2, loaded_var;
10146
10147     if (user) {
10148         /* LDM (user), LDM (exception return) */
10149         if (IS_USER(s)) {
10150             /* Only usable in supervisor mode.  */
10151             unallocated_encoding(s);
10152             return true;
10153         }
10154         if (extract32(a->list, 15, 1)) {
10155             exc_return = true;
10156             user = false;
10157         } else {
10158             /* LDM (user) does not allow writeback.  */
10159             if (a->w) {
10160                 unallocated_encoding(s);
10161                 return true;
10162             }
10163         }
10164     }
10165
10166     list = a->list;
10167     n = ctpop16(list);
10168     if (n < min_n || a->rn == 15) {
10169         unallocated_encoding(s);
10170         return true;
10171     }
10172
10173     addr = op_addr_block_pre(s, a, n);
10174     mem_idx = get_mem_index(s);
10175     loaded_base = false;
10176     loaded_var = NULL;
10177
10178     for (i = j = 0; i < 16; i++) {
10179         if (!(list & (1 << i))) {
10180             continue;
10181         }
10182
10183         tmp = tcg_temp_new_i32();
10184         gen_aa32_ld32u(s, tmp, addr, mem_idx);
10185         if (user) {
10186             tmp2 = tcg_const_i32(i);
10187             gen_helper_set_user_reg(cpu_env, tmp2, tmp);
10188             tcg_temp_free_i32(tmp2);
10189             tcg_temp_free_i32(tmp);
10190         } else if (i == a->rn) {
10191             loaded_var = tmp;
10192             loaded_base = true;
10193         } else if (i == 15 && exc_return) {
10194             store_pc_exc_ret(s, tmp);
10195         } else {
10196             store_reg_from_load(s, i, tmp);
10197         }
10198
10199         /* No need to add after the last transfer.  */
10200         if (++j != n) {
10201             tcg_gen_addi_i32(addr, addr, 4);
10202         }
10203     }
10204
10205     op_addr_block_post(s, a, addr, n);
10206
10207     if (loaded_base) {
10208         /* Note that we reject base == pc above.  */
10209         store_reg(s, a->rn, loaded_var);
10210     }
10211
10212     if (exc_return) {
10213         /* Restore CPSR from SPSR.  */
10214         tmp = load_cpu_field(spsr);
10215         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
10216             gen_io_start();
10217         }
10218         gen_helper_cpsr_write_eret(cpu_env, tmp);
10219         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
10220             gen_io_end();
10221         }
10222         tcg_temp_free_i32(tmp);
10223         /* Must exit loop to check un-masked IRQs */
10224         s->base.is_jmp = DISAS_EXIT;
10225     }
10226     return true;
10227 }
10228
10229 static bool trans_LDM_a32(DisasContext *s, arg_ldst_block *a)
10230 {
10231     /*
10232      * Writeback register in register list is UNPREDICTABLE
10233      * for ArchVersion() >= 7.  Prior to v7, A32 would write
10234      * an UNKNOWN value to the base register.
10235      */
10236     if (ENABLE_ARCH_7 && a->w && (a->list & (1 << a->rn))) {
10237         unallocated_encoding(s);
10238         return true;
10239     }
10240     /* BitCount(list) < 1 is UNPREDICTABLE */
10241     return do_ldm(s, a, 1);
10242 }
10243
10244 static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a)
10245 {
10246     /* Writeback register in register list is UNPREDICTABLE for T32. */
10247     if (a->w && (a->list & (1 << a->rn))) {
10248         unallocated_encoding(s);
10249         return true;
10250     }
10251     /* BitCount(list) < 2 is UNPREDICTABLE */
10252     return do_ldm(s, a, 2);
10253 }
10254
10255 static bool trans_LDM_t16(DisasContext *s, arg_ldst_block *a)
10256 {
10257     /* Writeback is conditional on the base register not being loaded.  */
10258     a->w = !(a->list & (1 << a->rn));
10259     /* BitCount(list) < 1 is UNPREDICTABLE */
10260     return do_ldm(s, a, 1);
10261 }
10262
10263 /*
10264  * Branch, branch with link
10265  */
10266
10267 static bool trans_B(DisasContext *s, arg_i *a)
10268 {
10269     gen_jmp(s, read_pc(s) + a->imm);
10270     return true;
10271 }
10272
10273 static bool trans_B_cond_thumb(DisasContext *s, arg_ci *a)
10274 {
10275     /* This has cond from encoding, required to be outside IT block.  */
10276     if (a->cond >= 0xe) {
10277         return false;
10278     }
10279     if (s->condexec_mask) {
10280         unallocated_encoding(s);
10281         return true;
10282     }
10283     arm_skip_unless(s, a->cond);
10284     gen_jmp(s, read_pc(s) + a->imm);
10285     return true;
10286 }
10287
10288 static bool trans_BL(DisasContext *s, arg_i *a)
10289 {
10290     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
10291     gen_jmp(s, read_pc(s) + a->imm);
10292     return true;
10293 }
10294
10295 static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
10296 {
10297     TCGv_i32 tmp;
10298
10299     /* For A32, ARCH(5) is checked near the start of the uncond block. */
10300     if (s->thumb && (a->imm & 2)) {
10301         return false;
10302     }
10303     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
10304     tmp = tcg_const_i32(!s->thumb);
10305     store_cpu_field(tmp, thumb);
10306     gen_jmp(s, (read_pc(s) & ~3) + a->imm);
10307     return true;
10308 }
10309
10310 static bool trans_BL_BLX_prefix(DisasContext *s, arg_BL_BLX_prefix *a)
10311 {
10312     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
10313     tcg_gen_movi_i32(cpu_R[14], read_pc(s) + (a->imm << 12));
10314     return true;
10315 }
10316
10317 static bool trans_BL_suffix(DisasContext *s, arg_BL_suffix *a)
10318 {
10319     TCGv_i32 tmp = tcg_temp_new_i32();
10320
10321     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
10322     tcg_gen_addi_i32(tmp, cpu_R[14], (a->imm << 1) | 1);
10323     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1);
10324     gen_bx(s, tmp);
10325     return true;
10326 }
10327
10328 static bool trans_BLX_suffix(DisasContext *s, arg_BLX_suffix *a)
10329 {
10330     TCGv_i32 tmp;
10331
10332     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
10333     if (!ENABLE_ARCH_5) {
10334         return false;
10335     }
10336     tmp = tcg_temp_new_i32();
10337     tcg_gen_addi_i32(tmp, cpu_R[14], a->imm << 1);
10338     tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
10339     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1);
10340     gen_bx(s, tmp);
10341     return true;
10342 }
10343
10344 static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
10345 {
10346     TCGv_i32 addr, tmp;
10347
10348     tmp = load_reg(s, a->rm);
10349     if (half) {
10350         tcg_gen_add_i32(tmp, tmp, tmp);
10351     }
10352     addr = load_reg(s, a->rn);
10353     tcg_gen_add_i32(addr, addr, tmp);
10354
10355     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
10356                     half ? MO_UW | s->be_data : MO_UB);
10357     tcg_temp_free_i32(addr);
10358
10359     tcg_gen_add_i32(tmp, tmp, tmp);
10360     tcg_gen_addi_i32(tmp, tmp, read_pc(s));
10361     store_reg(s, 15, tmp);
10362     return true;
10363 }
10364
10365 static bool trans_TBB(DisasContext *s, arg_tbranch *a)
10366 {
10367     return op_tbranch(s, a, false);
10368 }
10369
10370 static bool trans_TBH(DisasContext *s, arg_tbranch *a)
10371 {
10372     return op_tbranch(s, a, true);
10373 }
10374
10375 static bool trans_CBZ(DisasContext *s, arg_CBZ *a)
10376 {
10377     TCGv_i32 tmp = load_reg(s, a->rn);
10378
10379     arm_gen_condlabel(s);
10380     tcg_gen_brcondi_i32(a->nz ? TCG_COND_EQ : TCG_COND_NE,
10381                         tmp, 0, s->condlabel);
10382     tcg_temp_free_i32(tmp);
10383     gen_jmp(s, read_pc(s) + a->imm);
10384     return true;
10385 }
10386
10387 /*
10388  * Supervisor call - both T32 & A32 come here so we need to check
10389  * which mode we are in when checking for semihosting.
10390  */
10391
10392 static bool trans_SVC(DisasContext *s, arg_SVC *a)
10393 {
10394     const uint32_t semihost_imm = s->thumb ? 0xab : 0x123456;
10395
10396     if (!arm_dc_feature(s, ARM_FEATURE_M) && semihosting_enabled() &&
10397 #ifndef CONFIG_USER_ONLY
10398         !IS_USER(s) &&
10399 #endif
10400         (a->imm == semihost_imm)) {
10401         gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
10402     } else {
10403         gen_set_pc_im(s, s->base.pc_next);
10404         s->svc_imm = a->imm;
10405         s->base.is_jmp = DISAS_SWI;
10406     }
10407     return true;
10408 }
10409
10410 /*
10411  * Unconditional system instructions
10412  */
10413
10414 static bool trans_RFE(DisasContext *s, arg_RFE *a)
10415 {
10416     static const int8_t pre_offset[4] = {
10417         /* DA */ -4, /* IA */ 0, /* DB */ -8, /* IB */ 4
10418     };
10419     static const int8_t post_offset[4] = {
10420         /* DA */ -8, /* IA */ 4, /* DB */ -4, /* IB */ 0
10421     };
10422     TCGv_i32 addr, t1, t2;
10423
10424     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
10425         return false;
10426     }
10427     if (IS_USER(s)) {
10428         unallocated_encoding(s);
10429         return true;
10430     }
10431
10432     addr = load_reg(s, a->rn);
10433     tcg_gen_addi_i32(addr, addr, pre_offset[a->pu]);
10434
10435     /* Load PC into tmp and CPSR into tmp2.  */
10436     t1 = tcg_temp_new_i32();
10437     gen_aa32_ld32u(s, t1, addr, get_mem_index(s));
10438     tcg_gen_addi_i32(addr, addr, 4);
10439     t2 = tcg_temp_new_i32();
10440     gen_aa32_ld32u(s, t2, addr, get_mem_index(s));
10441
10442     if (a->w) {
10443         /* Base writeback.  */
10444         tcg_gen_addi_i32(addr, addr, post_offset[a->pu]);
10445         store_reg(s, a->rn, addr);
10446     } else {
10447         tcg_temp_free_i32(addr);
10448     }
10449     gen_rfe(s, t1, t2);
10450     return true;
10451 }
10452
10453 static bool trans_SRS(DisasContext *s, arg_SRS *a)
10454 {
10455     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
10456         return false;
10457     }
10458     gen_srs(s, a->mode, a->pu, a->w);
10459     return true;
10460 }
10461
10462 static bool trans_CPS(DisasContext *s, arg_CPS *a)
10463 {
10464     uint32_t mask, val;
10465
10466     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
10467         return false;
10468     }
10469     if (IS_USER(s)) {
10470         /* Implemented as NOP in user mode.  */
10471         return true;
10472     }
10473     /* TODO: There are quite a lot of UNPREDICTABLE argument combinations. */
10474
10475     mask = val = 0;
10476     if (a->imod & 2) {
10477         if (a->A) {
10478             mask |= CPSR_A;
10479         }
10480         if (a->I) {
10481             mask |= CPSR_I;
10482         }
10483         if (a->F) {
10484             mask |= CPSR_F;
10485         }
10486         if (a->imod & 1) {
10487             val |= mask;
10488         }
10489     }
10490     if (a->M) {
10491         mask |= CPSR_M;
10492         val |= a->mode;
10493     }
10494     if (mask) {
10495         gen_set_psr_im(s, mask, 0, val);
10496     }
10497     return true;
10498 }
10499
10500 static bool trans_CPS_v7m(DisasContext *s, arg_CPS_v7m *a)
10501 {
10502     TCGv_i32 tmp, addr, el;
10503
10504     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
10505         return false;
10506     }
10507     if (IS_USER(s)) {
10508         /* Implemented as NOP in user mode.  */
10509         return true;
10510     }
10511
10512     tmp = tcg_const_i32(a->im);
10513     /* FAULTMASK */
10514     if (a->F) {
10515         addr = tcg_const_i32(19);
10516         gen_helper_v7m_msr(cpu_env, addr, tmp);
10517         tcg_temp_free_i32(addr);
10518     }
10519     /* PRIMASK */
10520     if (a->I) {
10521         addr = tcg_const_i32(16);
10522         gen_helper_v7m_msr(cpu_env, addr, tmp);
10523         tcg_temp_free_i32(addr);
10524     }
10525     el = tcg_const_i32(s->current_el);
10526     gen_helper_rebuild_hflags_m32(cpu_env, el);
10527     tcg_temp_free_i32(el);
10528     tcg_temp_free_i32(tmp);
10529     gen_lookup_tb(s);
10530     return true;
10531 }
10532
10533 /*
10534  * Clear-Exclusive, Barriers
10535  */
10536
10537 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
10538 {
10539     if (s->thumb
10540         ? !ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)
10541         : !ENABLE_ARCH_6K) {
10542         return false;
10543     }
10544     gen_clrex(s);
10545     return true;
10546 }
10547
10548 static bool trans_DSB(DisasContext *s, arg_DSB *a)
10549 {
10550     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
10551         return false;
10552     }
10553     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
10554     return true;
10555 }
10556
10557 static bool trans_DMB(DisasContext *s, arg_DMB *a)
10558 {
10559     return trans_DSB(s, NULL);
10560 }
10561
10562 static bool trans_ISB(DisasContext *s, arg_ISB *a)
10563 {
10564     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
10565         return false;
10566     }
10567     /*
10568      * We need to break the TB after this insn to execute
10569      * self-modifying code correctly and also to take
10570      * any pending interrupts immediately.
10571      */
10572     gen_goto_tb(s, 0, s->base.pc_next);
10573     return true;
10574 }
10575
10576 static bool trans_SB(DisasContext *s, arg_SB *a)
10577 {
10578     if (!dc_isar_feature(aa32_sb, s)) {
10579         return false;
10580     }
10581     /*
10582      * TODO: There is no speculation barrier opcode
10583      * for TCG; MB and end the TB instead.
10584      */
10585     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
10586     gen_goto_tb(s, 0, s->base.pc_next);
10587     return true;
10588 }
10589
10590 static bool trans_SETEND(DisasContext *s, arg_SETEND *a)
10591 {
10592     if (!ENABLE_ARCH_6) {
10593         return false;
10594     }
10595     if (a->E != (s->be_data == MO_BE)) {
10596         gen_helper_setend(cpu_env);
10597         s->base.is_jmp = DISAS_UPDATE;
10598     }
10599     return true;
10600 }
10601
10602 /*
10603  * Preload instructions
10604  * All are nops, contingent on the appropriate arch level.
10605  */
10606
10607 static bool trans_PLD(DisasContext *s, arg_PLD *a)
10608 {
10609     return ENABLE_ARCH_5TE;
10610 }
10611
10612 static bool trans_PLDW(DisasContext *s, arg_PLD *a)
10613 {
10614     return arm_dc_feature(s, ARM_FEATURE_V7MP);
10615 }
10616
10617 static bool trans_PLI(DisasContext *s, arg_PLD *a)
10618 {
10619     return ENABLE_ARCH_7;
10620 }
10621
10622 /*
10623  * If-then
10624  */
10625
10626 static bool trans_IT(DisasContext *s, arg_IT *a)
10627 {
10628     int cond_mask = a->cond_mask;
10629
10630     /*
10631      * No actual code generated for this insn, just setup state.
10632      *
10633      * Combinations of firstcond and mask which set up an 0b1111
10634      * condition are UNPREDICTABLE; we take the CONSTRAINED
10635      * UNPREDICTABLE choice to treat 0b1111 the same as 0b1110,
10636      * i.e. both meaning "execute always".
10637      */
10638     s->condexec_cond = (cond_mask >> 4) & 0xe;
10639     s->condexec_mask = cond_mask & 0x1f;
10640     return true;
10641 }
10642
10643 /*
10644  * Legacy decoder.
10645  */
10646
10647 static void disas_arm_insn(DisasContext *s, unsigned int insn)
10648 {
10649     unsigned int cond = insn >> 28;
10650
10651     /* M variants do not implement ARM mode; this must raise the INVSTATE
10652      * UsageFault exception.
10653      */
10654     if (arm_dc_feature(s, ARM_FEATURE_M)) {
10655         gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
10656                            default_exception_el(s));
10657         return;
10658     }
10659
10660     if (cond == 0xf) {
10661         /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
10662          * choose to UNDEF. In ARMv5 and above the space is used
10663          * for miscellaneous unconditional instructions.
10664          */
10665         ARCH(5);
10666
10667         /* Unconditional instructions.  */
10668         /* TODO: Perhaps merge these into one decodetree output file.  */
10669         if (disas_a32_uncond(s, insn) ||
10670             disas_vfp_uncond(s, insn) ||
10671             disas_neon_dp(s, insn) ||
10672             disas_neon_ls(s, insn) ||
10673             disas_neon_shared(s, insn)) {
10674             return;
10675         }
10676         /* fall back to legacy decoder */
10677
10678         if (((insn >> 25) & 7) == 1) {
10679             /* NEON Data processing.  */
10680             if (disas_neon_data_insn(s, insn)) {
10681                 goto illegal_op;
10682             }
10683             return;
10684         }
10685         if ((insn & 0x0e000f00) == 0x0c000100) {
10686             if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
10687                 /* iWMMXt register transfer.  */
10688                 if (extract32(s->c15_cpar, 1, 1)) {
10689                     if (!disas_iwmmxt_insn(s, insn)) {
10690                         return;
10691                     }
10692                 }
10693             }
10694         }
10695         goto illegal_op;
10696     }
10697     if (cond != 0xe) {
10698         /* if not always execute, we generate a conditional jump to
10699            next instruction */
10700         arm_skip_unless(s, cond);
10701     }
10702
10703     /* TODO: Perhaps merge these into one decodetree output file.  */
10704     if (disas_a32(s, insn) ||
10705         disas_vfp(s, insn)) {
10706         return;
10707     }
10708     /* fall back to legacy decoder */
10709
10710     switch ((insn >> 24) & 0xf) {
10711     case 0xc:
10712     case 0xd:
10713     case 0xe:
10714         if (((insn >> 8) & 0xe) == 10) {
10715             /* VFP, but failed disas_vfp.  */
10716             goto illegal_op;
10717         }
10718         if (disas_coproc_insn(s, insn)) {
10719             /* Coprocessor.  */
10720             goto illegal_op;
10721         }
10722         break;
10723     default:
10724     illegal_op:
10725         unallocated_encoding(s);
10726         break;
10727     }
10728 }
10729
10730 static bool thumb_insn_is_16bit(DisasContext *s, uint32_t pc, uint32_t insn)
10731 {
10732     /*
10733      * Return true if this is a 16 bit instruction. We must be precise
10734      * about this (matching the decode).
10735      */
10736     if ((insn >> 11) < 0x1d) {
10737         /* Definitely a 16-bit instruction */
10738         return true;
10739     }
10740
10741     /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
10742      * first half of a 32-bit Thumb insn. Thumb-1 cores might
10743      * end up actually treating this as two 16-bit insns, though,
10744      * if it's half of a bl/blx pair that might span a page boundary.
10745      */
10746     if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
10747         arm_dc_feature(s, ARM_FEATURE_M)) {
10748         /* Thumb2 cores (including all M profile ones) always treat
10749          * 32-bit insns as 32-bit.
10750          */
10751         return false;
10752     }
10753
10754     if ((insn >> 11) == 0x1e && pc - s->page_start < TARGET_PAGE_SIZE - 3) {
10755         /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
10756          * is not on the next page; we merge this into a 32-bit
10757          * insn.
10758          */
10759         return false;
10760     }
10761     /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
10762      * 0b1111_1xxx_xxxx_xxxx : BL suffix;
10763      * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
10764      *  -- handle as single 16 bit insn
10765      */
10766     return true;
10767 }
10768
10769 /* Translate a 32-bit thumb instruction. */
10770 static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
10771 {
10772     /*
10773      * ARMv6-M supports a limited subset of Thumb2 instructions.
10774      * Other Thumb1 architectures allow only 32-bit
10775      * combined BL/BLX prefix and suffix.
10776      */
10777     if (arm_dc_feature(s, ARM_FEATURE_M) &&
10778         !arm_dc_feature(s, ARM_FEATURE_V7)) {
10779         int i;
10780         bool found = false;
10781         static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
10782                                                0xf3b08040 /* dsb */,
10783                                                0xf3b08050 /* dmb */,
10784                                                0xf3b08060 /* isb */,
10785                                                0xf3e08000 /* mrs */,
10786                                                0xf000d000 /* bl */};
10787         static const uint32_t armv6m_mask[] = {0xffe0d000,
10788                                                0xfff0d0f0,
10789                                                0xfff0d0f0,
10790                                                0xfff0d0f0,
10791                                                0xffe0d000,
10792                                                0xf800d000};
10793
10794         for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
10795             if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
10796                 found = true;
10797                 break;
10798             }
10799         }
10800         if (!found) {
10801             goto illegal_op;
10802         }
10803     } else if ((insn & 0xf800e800) != 0xf000e800)  {
10804         ARCH(6T2);
10805     }
10806
10807     if ((insn & 0xef000000) == 0xef000000) {
10808         /*
10809          * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
10810          * transform into
10811          * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
10812          */
10813         uint32_t a32_insn = (insn & 0xe2ffffff) |
10814             ((insn & (1 << 28)) >> 4) | (1 << 28);
10815
10816         if (disas_neon_dp(s, a32_insn)) {
10817             return;
10818         }
10819     }
10820
10821     if ((insn & 0xff100000) == 0xf9000000) {
10822         /*
10823          * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
10824          * transform into
10825          * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
10826          */
10827         uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000;
10828
10829         if (disas_neon_ls(s, a32_insn)) {
10830             return;
10831         }
10832     }
10833
10834     /*
10835      * TODO: Perhaps merge these into one decodetree output file.
10836      * Note disas_vfp is written for a32 with cond field in the
10837      * top nibble.  The t32 encoding requires 0xe in the top nibble.
10838      */
10839     if (disas_t32(s, insn) ||
10840         disas_vfp_uncond(s, insn) ||
10841         disas_neon_shared(s, insn) ||
10842         ((insn >> 28) == 0xe && disas_vfp(s, insn))) {
10843         return;
10844     }
10845     /* fall back to legacy decoder */
10846
10847     switch ((insn >> 25) & 0xf) {
10848     case 0: case 1: case 2: case 3:
10849         /* 16-bit instructions.  Should never happen.  */
10850         abort();
10851     case 6: case 7: case 14: case 15:
10852         /* Coprocessor.  */
10853         if (arm_dc_feature(s, ARM_FEATURE_M)) {
10854             /* 0b111x_11xx_xxxx_xxxx_xxxx_xxxx_xxxx_xxxx */
10855             if (extract32(insn, 24, 2) == 3) {
10856                 goto illegal_op; /* op0 = 0b11 : unallocated */
10857             }
10858
10859             if (((insn >> 8) & 0xe) == 10 &&
10860                 dc_isar_feature(aa32_fpsp_v2, s)) {
10861                 /* FP, and the CPU supports it */
10862                 goto illegal_op;
10863             } else {
10864                 /* All other insns: NOCP */
10865                 gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
10866                                    syn_uncategorized(),
10867                                    default_exception_el(s));
10868             }
10869             break;
10870         }
10871         if (((insn >> 24) & 3) == 3) {
10872             /* Translate into the equivalent ARM encoding.  */
10873             insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28);
10874             if (disas_neon_data_insn(s, insn)) {
10875                 goto illegal_op;
10876             }
10877         } else if (((insn >> 8) & 0xe) == 10) {
10878             /* VFP, but failed disas_vfp.  */
10879             goto illegal_op;
10880         } else {
10881             if (insn & (1 << 28))
10882                 goto illegal_op;
10883             if (disas_coproc_insn(s, insn)) {
10884                 goto illegal_op;
10885             }
10886         }
10887         break;
10888     case 12:
10889         goto illegal_op;
10890     default:
10891     illegal_op:
10892         unallocated_encoding(s);
10893     }
10894 }
10895
10896 static void disas_thumb_insn(DisasContext *s, uint32_t insn)
10897 {
10898     if (!disas_t16(s, insn)) {
10899         unallocated_encoding(s);
10900     }
10901 }
10902
10903 static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
10904 {
10905     /* Return true if the insn at dc->base.pc_next might cross a page boundary.
10906      * (False positives are OK, false negatives are not.)
10907      * We know this is a Thumb insn, and our caller ensures we are
10908      * only called if dc->base.pc_next is less than 4 bytes from the page
10909      * boundary, so we cross the page if the first 16 bits indicate
10910      * that this is a 32 bit insn.
10911      */
10912     uint16_t insn = arm_lduw_code(env, s->base.pc_next, s->sctlr_b);
10913
10914     return !thumb_insn_is_16bit(s, s->base.pc_next, insn);
10915 }
10916
10917 static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
10918 {
10919     DisasContext *dc = container_of(dcbase, DisasContext, base);
10920     CPUARMState *env = cs->env_ptr;
10921     ARMCPU *cpu = env_archcpu(env);
10922     uint32_t tb_flags = dc->base.tb->flags;
10923     uint32_t condexec, core_mmu_idx;
10924
10925     dc->isar = &cpu->isar;
10926     dc->condjmp = 0;
10927
10928     dc->aarch64 = 0;
10929     /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
10930      * there is no secure EL1, so we route exceptions to EL3.
10931      */
10932     dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
10933                                !arm_el_is_aa64(env, 3);
10934     dc->thumb = FIELD_EX32(tb_flags, TBFLAG_AM32, THUMB);
10935     dc->be_data = FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
10936     condexec = FIELD_EX32(tb_flags, TBFLAG_AM32, CONDEXEC);
10937     dc->condexec_mask = (condexec & 0xf) << 1;
10938     dc->condexec_cond = condexec >> 4;
10939
10940     core_mmu_idx = FIELD_EX32(tb_flags, TBFLAG_ANY, MMUIDX);
10941     dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
10942     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
10943 #if !defined(CONFIG_USER_ONLY)
10944     dc->user = (dc->current_el == 0);
10945 #endif
10946     dc->fp_excp_el = FIELD_EX32(tb_flags, TBFLAG_ANY, FPEXC_EL);
10947
10948     if (arm_feature(env, ARM_FEATURE_M)) {
10949         dc->vfp_enabled = 1;
10950         dc->be_data = MO_TE;
10951         dc->v7m_handler_mode = FIELD_EX32(tb_flags, TBFLAG_M32, HANDLER);
10952         dc->v8m_secure = arm_feature(env, ARM_FEATURE_M_SECURITY) &&
10953             regime_is_secure(env, dc->mmu_idx);
10954         dc->v8m_stackcheck = FIELD_EX32(tb_flags, TBFLAG_M32, STACKCHECK);
10955         dc->v8m_fpccr_s_wrong =
10956             FIELD_EX32(tb_flags, TBFLAG_M32, FPCCR_S_WRONG);
10957         dc->v7m_new_fp_ctxt_needed =
10958             FIELD_EX32(tb_flags, TBFLAG_M32, NEW_FP_CTXT_NEEDED);
10959         dc->v7m_lspact = FIELD_EX32(tb_flags, TBFLAG_M32, LSPACT);
10960     } else {
10961         dc->be_data =
10962             FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
10963         dc->debug_target_el =
10964             FIELD_EX32(tb_flags, TBFLAG_ANY, DEBUG_TARGET_EL);
10965         dc->sctlr_b = FIELD_EX32(tb_flags, TBFLAG_A32, SCTLR_B);
10966         dc->hstr_active = FIELD_EX32(tb_flags, TBFLAG_A32, HSTR_ACTIVE);
10967         dc->ns = FIELD_EX32(tb_flags, TBFLAG_A32, NS);
10968         dc->vfp_enabled = FIELD_EX32(tb_flags, TBFLAG_A32, VFPEN);
10969         if (arm_feature(env, ARM_FEATURE_XSCALE)) {
10970             dc->c15_cpar = FIELD_EX32(tb_flags, TBFLAG_A32, XSCALE_CPAR);
10971         } else {
10972             dc->vec_len = FIELD_EX32(tb_flags, TBFLAG_A32, VECLEN);
10973             dc->vec_stride = FIELD_EX32(tb_flags, TBFLAG_A32, VECSTRIDE);
10974         }
10975     }
10976     dc->cp_regs = cpu->cp_regs;
10977     dc->features = env->features;
10978
10979     /* Single step state. The code-generation logic here is:
10980      *  SS_ACTIVE == 0:
10981      *   generate code with no special handling for single-stepping (except
10982      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
10983      *   this happens anyway because those changes are all system register or
10984      *   PSTATE writes).
10985      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
10986      *   emit code for one insn
10987      *   emit code to clear PSTATE.SS
10988      *   emit code to generate software step exception for completed step
10989      *   end TB (as usual for having generated an exception)
10990      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
10991      *   emit code to generate a software step exception
10992      *   end the TB
10993      */
10994     dc->ss_active = FIELD_EX32(tb_flags, TBFLAG_ANY, SS_ACTIVE);
10995     dc->pstate_ss = FIELD_EX32(tb_flags, TBFLAG_ANY, PSTATE_SS);
10996     dc->is_ldex = false;
10997
10998     dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
10999
11000     /* If architectural single step active, limit to 1.  */
11001     if (is_singlestepping(dc)) {
11002         dc->base.max_insns = 1;
11003     }
11004
11005     /* ARM is a fixed-length ISA.  Bound the number of insns to execute
11006        to those left on the page.  */
11007     if (!dc->thumb) {
11008         int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
11009         dc->base.max_insns = MIN(dc->base.max_insns, bound);
11010     }
11011
11012     cpu_V0 = tcg_temp_new_i64();
11013     cpu_V1 = tcg_temp_new_i64();
11014     /* FIXME: cpu_M0 can probably be the same as cpu_V0.  */
11015     cpu_M0 = tcg_temp_new_i64();
11016 }
11017
11018 static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
11019 {
11020     DisasContext *dc = container_of(dcbase, DisasContext, base);
11021
11022     /* A note on handling of the condexec (IT) bits:
11023      *
11024      * We want to avoid the overhead of having to write the updated condexec
11025      * bits back to the CPUARMState for every instruction in an IT block. So:
11026      * (1) if the condexec bits are not already zero then we write
11027      * zero back into the CPUARMState now. This avoids complications trying
11028      * to do it at the end of the block. (For example if we don't do this
11029      * it's hard to identify whether we can safely skip writing condexec
11030      * at the end of the TB, which we definitely want to do for the case
11031      * where a TB doesn't do anything with the IT state at all.)
11032      * (2) if we are going to leave the TB then we call gen_set_condexec()
11033      * which will write the correct value into CPUARMState if zero is wrong.
11034      * This is done both for leaving the TB at the end, and for leaving
11035      * it because of an exception we know will happen, which is done in
11036      * gen_exception_insn(). The latter is necessary because we need to
11037      * leave the TB with the PC/IT state just prior to execution of the
11038      * instruction which caused the exception.
11039      * (3) if we leave the TB unexpectedly (eg a data abort on a load)
11040      * then the CPUARMState will be wrong and we need to reset it.
11041      * This is handled in the same way as restoration of the
11042      * PC in these situations; we save the value of the condexec bits
11043      * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
11044      * then uses this to restore them after an exception.
11045      *
11046      * Note that there are no instructions which can read the condexec
11047      * bits, and none which can write non-static values to them, so
11048      * we don't need to care about whether CPUARMState is correct in the
11049      * middle of a TB.
11050      */
11051
11052     /* Reset the conditional execution bits immediately. This avoids
11053        complications trying to do it at the end of the block.  */
11054     if (dc->condexec_mask || dc->condexec_cond) {
11055         TCGv_i32 tmp = tcg_temp_new_i32();
11056         tcg_gen_movi_i32(tmp, 0);
11057         store_cpu_field(tmp, condexec_bits);
11058     }
11059 }
11060
11061 static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
11062 {
11063     DisasContext *dc = container_of(dcbase, DisasContext, base);
11064
11065     tcg_gen_insn_start(dc->base.pc_next,
11066                        (dc->condexec_cond << 4) | (dc->condexec_mask >> 1),
11067                        0);
11068     dc->insn_start = tcg_last_op();
11069 }
11070
11071 static bool arm_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
11072                                     const CPUBreakpoint *bp)
11073 {
11074     DisasContext *dc = container_of(dcbase, DisasContext, base);
11075
11076     if (bp->flags & BP_CPU) {
11077         gen_set_condexec(dc);
11078         gen_set_pc_im(dc, dc->base.pc_next);
11079         gen_helper_check_breakpoints(cpu_env);
11080         /* End the TB early; it's likely not going to be executed */
11081         dc->base.is_jmp = DISAS_TOO_MANY;
11082     } else {
11083         gen_exception_internal_insn(dc, dc->base.pc_next, EXCP_DEBUG);
11084         /* The address covered by the breakpoint must be
11085            included in [tb->pc, tb->pc + tb->size) in order
11086            to for it to be properly cleared -- thus we
11087            increment the PC here so that the logic setting
11088            tb->size below does the right thing.  */
11089         /* TODO: Advance PC by correct instruction length to
11090          * avoid disassembler error messages */
11091         dc->base.pc_next += 2;
11092         dc->base.is_jmp = DISAS_NORETURN;
11093     }
11094
11095     return true;
11096 }
11097
11098 static bool arm_pre_translate_insn(DisasContext *dc)
11099 {
11100 #ifdef CONFIG_USER_ONLY
11101     /* Intercept jump to the magic kernel page.  */
11102     if (dc->base.pc_next >= 0xffff0000) {
11103         /* We always get here via a jump, so know we are not in a
11104            conditional execution block.  */
11105         gen_exception_internal(EXCP_KERNEL_TRAP);
11106         dc->base.is_jmp = DISAS_NORETURN;
11107         return true;
11108     }
11109 #endif
11110
11111     if (dc->ss_active && !dc->pstate_ss) {
11112         /* Singlestep state is Active-pending.
11113          * If we're in this state at the start of a TB then either
11114          *  a) we just took an exception to an EL which is being debugged
11115          *     and this is the first insn in the exception handler
11116          *  b) debug exceptions were masked and we just unmasked them
11117          *     without changing EL (eg by clearing PSTATE.D)
11118          * In either case we're going to take a swstep exception in the
11119          * "did not step an insn" case, and so the syndrome ISV and EX
11120          * bits should be zero.
11121          */
11122         assert(dc->base.num_insns == 1);
11123         gen_swstep_exception(dc, 0, 0);
11124         dc->base.is_jmp = DISAS_NORETURN;
11125         return true;
11126     }
11127
11128     return false;
11129 }
11130
11131 static void arm_post_translate_insn(DisasContext *dc)
11132 {
11133     if (dc->condjmp && !dc->base.is_jmp) {
11134         gen_set_label(dc->condlabel);
11135         dc->condjmp = 0;
11136     }
11137     translator_loop_temp_check(&dc->base);
11138 }
11139
11140 static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
11141 {
11142     DisasContext *dc = container_of(dcbase, DisasContext, base);
11143     CPUARMState *env = cpu->env_ptr;
11144     unsigned int insn;
11145
11146     if (arm_pre_translate_insn(dc)) {
11147         return;
11148     }
11149
11150     dc->pc_curr = dc->base.pc_next;
11151     insn = arm_ldl_code(env, dc->base.pc_next, dc->sctlr_b);
11152     dc->insn = insn;
11153     dc->base.pc_next += 4;
11154     disas_arm_insn(dc, insn);
11155
11156     arm_post_translate_insn(dc);
11157
11158     /* ARM is a fixed-length ISA.  We performed the cross-page check
11159        in init_disas_context by adjusting max_insns.  */
11160 }
11161
11162 static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
11163 {
11164     /* Return true if this Thumb insn is always unconditional,
11165      * even inside an IT block. This is true of only a very few
11166      * instructions: BKPT, HLT, and SG.
11167      *
11168      * A larger class of instructions are UNPREDICTABLE if used
11169      * inside an IT block; we do not need to detect those here, because
11170      * what we do by default (perform the cc check and update the IT
11171      * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
11172      * choice for those situations.
11173      *
11174      * insn is either a 16-bit or a 32-bit instruction; the two are
11175      * distinguishable because for the 16-bit case the top 16 bits
11176      * are zeroes, and that isn't a valid 32-bit encoding.
11177      */
11178     if ((insn & 0xffffff00) == 0xbe00) {
11179         /* BKPT */
11180         return true;
11181     }
11182
11183     if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
11184         !arm_dc_feature(s, ARM_FEATURE_M)) {
11185         /* HLT: v8A only. This is unconditional even when it is going to
11186          * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
11187          * For v7 cores this was a plain old undefined encoding and so
11188          * honours its cc check. (We might be using the encoding as
11189          * a semihosting trap, but we don't change the cc check behaviour
11190          * on that account, because a debugger connected to a real v7A
11191          * core and emulating semihosting traps by catching the UNDEF
11192          * exception would also only see cases where the cc check passed.
11193          * No guest code should be trying to do a HLT semihosting trap
11194          * in an IT block anyway.
11195          */
11196         return true;
11197     }
11198
11199     if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
11200         arm_dc_feature(s, ARM_FEATURE_M)) {
11201         /* SG: v8M only */
11202         return true;
11203     }
11204
11205     return false;
11206 }
11207
11208 static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
11209 {
11210     DisasContext *dc = container_of(dcbase, DisasContext, base);
11211     CPUARMState *env = cpu->env_ptr;
11212     uint32_t insn;
11213     bool is_16bit;
11214
11215     if (arm_pre_translate_insn(dc)) {
11216         return;
11217     }
11218
11219     dc->pc_curr = dc->base.pc_next;
11220     insn = arm_lduw_code(env, dc->base.pc_next, dc->sctlr_b);
11221     is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn);
11222     dc->base.pc_next += 2;
11223     if (!is_16bit) {
11224         uint32_t insn2 = arm_lduw_code(env, dc->base.pc_next, dc->sctlr_b);
11225
11226         insn = insn << 16 | insn2;
11227         dc->base.pc_next += 2;
11228     }
11229     dc->insn = insn;
11230
11231     if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
11232         uint32_t cond = dc->condexec_cond;
11233
11234         /*
11235          * Conditionally skip the insn. Note that both 0xe and 0xf mean
11236          * "always"; 0xf is not "never".
11237          */
11238         if (cond < 0x0e) {
11239             arm_skip_unless(dc, cond);
11240         }
11241     }
11242
11243     if (is_16bit) {
11244         disas_thumb_insn(dc, insn);
11245     } else {
11246         disas_thumb2_insn(dc, insn);
11247     }
11248
11249     /* Advance the Thumb condexec condition.  */
11250     if (dc->condexec_mask) {
11251         dc->condexec_cond = ((dc->condexec_cond & 0xe) |
11252                              ((dc->condexec_mask >> 4) & 1));
11253         dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
11254         if (dc->condexec_mask == 0) {
11255             dc->condexec_cond = 0;
11256         }
11257     }
11258
11259     arm_post_translate_insn(dc);
11260
11261     /* Thumb is a variable-length ISA.  Stop translation when the next insn
11262      * will touch a new page.  This ensures that prefetch aborts occur at
11263      * the right place.
11264      *
11265      * We want to stop the TB if the next insn starts in a new page,
11266      * or if it spans between this page and the next. This means that
11267      * if we're looking at the last halfword in the page we need to
11268      * see if it's a 16-bit Thumb insn (which will fit in this TB)
11269      * or a 32-bit Thumb insn (which won't).
11270      * This is to avoid generating a silly TB with a single 16-bit insn
11271      * in it at the end of this page (which would execute correctly
11272      * but isn't very efficient).
11273      */
11274     if (dc->base.is_jmp == DISAS_NEXT
11275         && (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE
11276             || (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE - 3
11277                 && insn_crosses_page(env, dc)))) {
11278         dc->base.is_jmp = DISAS_TOO_MANY;
11279     }
11280 }
11281
11282 static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
11283 {
11284     DisasContext *dc = container_of(dcbase, DisasContext, base);
11285
11286     if (tb_cflags(dc->base.tb) & CF_LAST_IO && dc->condjmp) {
11287         /* FIXME: This can theoretically happen with self-modifying code. */
11288         cpu_abort(cpu, "IO on conditional branch instruction");
11289     }
11290
11291     /* At this stage dc->condjmp will only be set when the skipped
11292        instruction was a conditional branch or trap, and the PC has
11293        already been written.  */
11294     gen_set_condexec(dc);
11295     if (dc->base.is_jmp == DISAS_BX_EXCRET) {
11296         /* Exception return branches need some special case code at the
11297          * end of the TB, which is complex enough that it has to
11298          * handle the single-step vs not and the condition-failed
11299          * insn codepath itself.
11300          */
11301         gen_bx_excret_final_code(dc);
11302     } else if (unlikely(is_singlestepping(dc))) {
11303         /* Unconditional and "condition passed" instruction codepath. */
11304         switch (dc->base.is_jmp) {
11305         case DISAS_SWI:
11306             gen_ss_advance(dc);
11307             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
11308                           default_exception_el(dc));
11309             break;
11310         case DISAS_HVC:
11311             gen_ss_advance(dc);
11312             gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
11313             break;
11314         case DISAS_SMC:
11315             gen_ss_advance(dc);
11316             gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
11317             break;
11318         case DISAS_NEXT:
11319         case DISAS_TOO_MANY:
11320         case DISAS_UPDATE:
11321             gen_set_pc_im(dc, dc->base.pc_next);
11322             /* fall through */
11323         default:
11324             /* FIXME: Single stepping a WFI insn will not halt the CPU. */
11325             gen_singlestep_exception(dc);
11326             break;
11327         case DISAS_NORETURN:
11328             break;
11329         }
11330     } else {
11331         /* While branches must always occur at the end of an IT block,
11332            there are a few other things that can cause us to terminate
11333            the TB in the middle of an IT block:
11334             - Exception generating instructions (bkpt, swi, undefined).
11335             - Page boundaries.
11336             - Hardware watchpoints.
11337            Hardware breakpoints have already been handled and skip this code.
11338          */
11339         switch(dc->base.is_jmp) {
11340         case DISAS_NEXT:
11341         case DISAS_TOO_MANY:
11342             gen_goto_tb(dc, 1, dc->base.pc_next);
11343             break;
11344         case DISAS_JUMP:
11345             gen_goto_ptr();
11346             break;
11347         case DISAS_UPDATE:
11348             gen_set_pc_im(dc, dc->base.pc_next);
11349             /* fall through */
11350         default:
11351             /* indicate that the hash table must be used to find the next TB */
11352             tcg_gen_exit_tb(NULL, 0);
11353             break;
11354         case DISAS_NORETURN:
11355             /* nothing more to generate */
11356             break;
11357         case DISAS_WFI:
11358         {
11359             TCGv_i32 tmp = tcg_const_i32((dc->thumb &&
11360                                           !(dc->insn & (1U << 31))) ? 2 : 4);
11361
11362             gen_helper_wfi(cpu_env, tmp);
11363             tcg_temp_free_i32(tmp);
11364             /* The helper doesn't necessarily throw an exception, but we
11365              * must go back to the main loop to check for interrupts anyway.
11366              */
11367             tcg_gen_exit_tb(NULL, 0);
11368             break;
11369         }
11370         case DISAS_WFE:
11371             gen_helper_wfe(cpu_env);
11372             break;
11373         case DISAS_YIELD:
11374             gen_helper_yield(cpu_env);
11375             break;
11376         case DISAS_SWI:
11377             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
11378                           default_exception_el(dc));
11379             break;
11380         case DISAS_HVC:
11381             gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
11382             break;
11383         case DISAS_SMC:
11384             gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
11385             break;
11386         }
11387     }
11388
11389     if (dc->condjmp) {
11390         /* "Condition failed" instruction codepath for the branch/trap insn */
11391         gen_set_label(dc->condlabel);
11392         gen_set_condexec(dc);
11393         if (unlikely(is_singlestepping(dc))) {
11394             gen_set_pc_im(dc, dc->base.pc_next);
11395             gen_singlestep_exception(dc);
11396         } else {
11397             gen_goto_tb(dc, 1, dc->base.pc_next);
11398         }
11399     }
11400 }
11401
11402 static void arm_tr_disas_log(const DisasContextBase *dcbase, CPUState *cpu)
11403 {
11404     DisasContext *dc = container_of(dcbase, DisasContext, base);
11405
11406     qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
11407     log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
11408 }
11409
11410 static const TranslatorOps arm_translator_ops = {
11411     .init_disas_context = arm_tr_init_disas_context,
11412     .tb_start           = arm_tr_tb_start,
11413     .insn_start         = arm_tr_insn_start,
11414     .breakpoint_check   = arm_tr_breakpoint_check,
11415     .translate_insn     = arm_tr_translate_insn,
11416     .tb_stop            = arm_tr_tb_stop,
11417     .disas_log          = arm_tr_disas_log,
11418 };
11419
11420 static const TranslatorOps thumb_translator_ops = {
11421     .init_disas_context = arm_tr_init_disas_context,
11422     .tb_start           = arm_tr_tb_start,
11423     .insn_start         = arm_tr_insn_start,
11424     .breakpoint_check   = arm_tr_breakpoint_check,
11425     .translate_insn     = thumb_tr_translate_insn,
11426     .tb_stop            = arm_tr_tb_stop,
11427     .disas_log          = arm_tr_disas_log,
11428 };
11429
11430 /* generate intermediate code for basic block 'tb'.  */
11431 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
11432 {
11433     DisasContext dc = { };
11434     const TranslatorOps *ops = &arm_translator_ops;
11435
11436     if (FIELD_EX32(tb->flags, TBFLAG_AM32, THUMB)) {
11437         ops = &thumb_translator_ops;
11438     }
11439 #ifdef TARGET_AARCH64
11440     if (FIELD_EX32(tb->flags, TBFLAG_ANY, AARCH64_STATE)) {
11441         ops = &aarch64_translator_ops;
11442     }
11443 #endif
11444
11445     translator_loop(ops, &dc.base, cpu, tb, max_insns);
11446 }
11447
11448 void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb,
11449                           target_ulong *data)
11450 {
11451     if (is_a64(env)) {
11452         env->pc = data[0];
11453         env->condexec_bits = 0;
11454         env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
11455     } else {
11456         env->regs[15] = data[0];
11457         env->condexec_bits = data[1];
11458         env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
11459     }
11460 }