target/arm/translate.c

   1 /*
   2  *  ARM translation
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *  Copyright (c) 2005-2007 CodeSourcery
   6  *  Copyright (c) 2007 OpenedHand, Ltd.
   7  *
   8  * This library is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2 of the License, or (at your option) any later version.
  12  *
  13  * This library is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  20  */
  21 #include "qemu/osdep.h"
  22
  23 #include "cpu.h"
  24 #include "internals.h"
  25 #include "disas/disas.h"
  26 #include "exec/exec-all.h"
  27 #include "tcg/tcg-op.h"
  28 #include "tcg/tcg-op-gvec.h"
  29 #include "qemu/log.h"
  30 #include "qemu/bitops.h"
  31 #include "arm_ldst.h"
  32 #include "hw/semihosting/semihost.h"
  33
  34 #include "exec/helper-proto.h"
  35 #include "exec/helper-gen.h"
  36
  37 #include "trace-tcg.h"
  38 #include "exec/log.h"
  39
  40
  41 #define ENABLE_ARCH_4T    arm_dc_feature(s, ARM_FEATURE_V4T)
  42 #define ENABLE_ARCH_5     arm_dc_feature(s, ARM_FEATURE_V5)
  43 /* currently all emulated v5 cores are also v5TE, so don't bother */
  44 #define ENABLE_ARCH_5TE   arm_dc_feature(s, ARM_FEATURE_V5)
  45 #define ENABLE_ARCH_5J    dc_isar_feature(aa32_jazelle, s)
  46 #define ENABLE_ARCH_6     arm_dc_feature(s, ARM_FEATURE_V6)
  47 #define ENABLE_ARCH_6K    arm_dc_feature(s, ARM_FEATURE_V6K)
  48 #define ENABLE_ARCH_6T2   arm_dc_feature(s, ARM_FEATURE_THUMB2)
  49 #define ENABLE_ARCH_7     arm_dc_feature(s, ARM_FEATURE_V7)
  50 #define ENABLE_ARCH_8     arm_dc_feature(s, ARM_FEATURE_V8)
  51
  52 #define ARCH(x) do { if (!ENABLE_ARCH_##x) goto illegal_op; } while(0)
  53
  54 #include "translate.h"
  55
  56 #if defined(CONFIG_USER_ONLY)
  57 #define IS_USER(s) 1
  58 #else
  59 #define IS_USER(s) (s->user)
  60 #endif
  61
  62 /* We reuse the same 64-bit temporaries for efficiency.  */
  63 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
  64 static TCGv_i32 cpu_R[16];
  65 TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
  66 TCGv_i64 cpu_exclusive_addr;
  67 TCGv_i64 cpu_exclusive_val;
  68
  69 #include "exec/gen-icount.h"
  70
  71 static const char * const regnames[] =
  72     { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
  73       "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
  74
  75 /* Function prototypes for gen_ functions calling Neon helpers.  */
  76 typedef void NeonGenThreeOpEnvFn(TCGv_i32, TCGv_env, TCGv_i32,
  77                                  TCGv_i32, TCGv_i32);
  78 /* Function prototypes for gen_ functions for fix point conversions */
  79 typedef void VFPGenFixPointFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
  80
  81 /* initialize TCG globals.  */
  82 void arm_translate_init(void)
  83 {
  84     int i;
  85
  86     for (i = 0; i < 16; i++) {
  87         cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
  88                                           offsetof(CPUARMState, regs[i]),
  89                                           regnames[i]);
  90     }
  91     cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF");
  92     cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF");
  93     cpu_VF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, VF), "VF");
  94     cpu_ZF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, ZF), "ZF");
  95
  96     cpu_exclusive_addr = tcg_global_mem_new_i64(cpu_env,
  97         offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
  98     cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
  99         offsetof(CPUARMState, exclusive_val), "exclusive_val");
 100
 101     a64_translate_init();
 102 }
 103
 104 /* Flags for the disas_set_da_iss info argument:
 105  * lower bits hold the Rt register number, higher bits are flags.
 106  */
 107 typedef enum ISSInfo {
 108     ISSNone = 0,
 109     ISSRegMask = 0x1f,
 110     ISSInvalid = (1 << 5),
 111     ISSIsAcqRel = (1 << 6),
 112     ISSIsWrite = (1 << 7),
 113     ISSIs16Bit = (1 << 8),
 114 } ISSInfo;
 115
 116 /* Save the syndrome information for a Data Abort */
 117 static void disas_set_da_iss(DisasContext *s, MemOp memop, ISSInfo issinfo)
 118 {
 119     uint32_t syn;
 120     int sas = memop & MO_SIZE;
 121     bool sse = memop & MO_SIGN;
 122     bool is_acqrel = issinfo & ISSIsAcqRel;
 123     bool is_write = issinfo & ISSIsWrite;
 124     bool is_16bit = issinfo & ISSIs16Bit;
 125     int srt = issinfo & ISSRegMask;
 126
 127     if (issinfo & ISSInvalid) {
 128         /* Some callsites want to conditionally provide ISS info,
 129          * eg "only if this was not a writeback"
 130          */
 131         return;
 132     }
 133
 134     if (srt == 15) {
 135         /* For AArch32, insns where the src/dest is R15 never generate
 136          * ISS information. Catching that here saves checking at all
 137          * the call sites.
 138          */
 139         return;
 140     }
 141
 142     syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
 143                                   0, 0, 0, is_write, 0, is_16bit);
 144     disas_set_insn_syndrome(s, syn);
 145 }
 146
 147 static inline int get_a32_user_mem_index(DisasContext *s)
 148 {
 149     /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
 150      * insns:
 151      *  if PL2, UNPREDICTABLE (we choose to implement as if PL0)
 152      *  otherwise, access as if at PL0.
 153      */
 154     switch (s->mmu_idx) {
 155     case ARMMMUIdx_E2:        /* this one is UNPREDICTABLE */
 156     case ARMMMUIdx_E10_0:
 157     case ARMMMUIdx_E10_1:
 158     case ARMMMUIdx_E10_1_PAN:
 159         return arm_to_core_mmu_idx(ARMMMUIdx_E10_0);
 160     case ARMMMUIdx_SE3:
 161     case ARMMMUIdx_SE10_0:
 162     case ARMMMUIdx_SE10_1:
 163     case ARMMMUIdx_SE10_1_PAN:
 164         return arm_to_core_mmu_idx(ARMMMUIdx_SE10_0);
 165     case ARMMMUIdx_MUser:
 166     case ARMMMUIdx_MPriv:
 167         return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
 168     case ARMMMUIdx_MUserNegPri:
 169     case ARMMMUIdx_MPrivNegPri:
 170         return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
 171     case ARMMMUIdx_MSUser:
 172     case ARMMMUIdx_MSPriv:
 173         return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
 174     case ARMMMUIdx_MSUserNegPri:
 175     case ARMMMUIdx_MSPrivNegPri:
 176         return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
 177     default:
 178         g_assert_not_reached();
 179     }
 180 }
 181
 182 static inline TCGv_i32 load_cpu_offset(int offset)
 183 {
 184     TCGv_i32 tmp = tcg_temp_new_i32();
 185     tcg_gen_ld_i32(tmp, cpu_env, offset);
 186     return tmp;
 187 }
 188
 189 #define load_cpu_field(name) load_cpu_offset(offsetof(CPUARMState, name))
 190
 191 static inline void store_cpu_offset(TCGv_i32 var, int offset)
 192 {
 193     tcg_gen_st_i32(var, cpu_env, offset);
 194     tcg_temp_free_i32(var);
 195 }
 196
 197 #define store_cpu_field(var, name) \
 198     store_cpu_offset(var, offsetof(CPUARMState, name))
 199
 200 /* The architectural value of PC.  */
 201 static uint32_t read_pc(DisasContext *s)
 202 {
 203     return s->pc_curr + (s->thumb ? 4 : 8);
 204 }
 205
 206 /* Set a variable to the value of a CPU register.  */
 207 static void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
 208 {
 209     if (reg == 15) {
 210         tcg_gen_movi_i32(var, read_pc(s));
 211     } else {
 212         tcg_gen_mov_i32(var, cpu_R[reg]);
 213     }
 214 }
 215
 216 /* Create a new temporary and set it to the value of a CPU register.  */
 217 static inline TCGv_i32 load_reg(DisasContext *s, int reg)
 218 {
 219     TCGv_i32 tmp = tcg_temp_new_i32();
 220     load_reg_var(s, tmp, reg);
 221     return tmp;
 222 }
 223
 224 /*
 225  * Create a new temp, REG + OFS, except PC is ALIGN(PC, 4).
 226  * This is used for load/store for which use of PC implies (literal),
 227  * or ADD that implies ADR.
 228  */
 229 static TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
 230 {
 231     TCGv_i32 tmp = tcg_temp_new_i32();
 232
 233     if (reg == 15) {
 234         tcg_gen_movi_i32(tmp, (read_pc(s) & ~3) + ofs);
 235     } else {
 236         tcg_gen_addi_i32(tmp, cpu_R[reg], ofs);
 237     }
 238     return tmp;
 239 }
 240
 241 /* Set a CPU register.  The source must be a temporary and will be
 242    marked as dead.  */
 243 static void store_reg(DisasContext *s, int reg, TCGv_i32 var)
 244 {
 245     if (reg == 15) {
 246         /* In Thumb mode, we must ignore bit 0.
 247          * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
 248          * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
 249          * We choose to ignore [1:0] in ARM mode for all architecture versions.
 250          */
 251         tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
 252         s->base.is_jmp = DISAS_JUMP;
 253     }
 254     tcg_gen_mov_i32(cpu_R[reg], var);
 255     tcg_temp_free_i32(var);
 256 }
 257
 258 /*
 259  * Variant of store_reg which applies v8M stack-limit checks before updating
 260  * SP. If the check fails this will result in an exception being taken.
 261  * We disable the stack checks for CONFIG_USER_ONLY because we have
 262  * no idea what the stack limits should be in that case.
 263  * If stack checking is not being done this just acts like store_reg().
 264  */
 265 static void store_sp_checked(DisasContext *s, TCGv_i32 var)
 266 {
 267 #ifndef CONFIG_USER_ONLY
 268     if (s->v8m_stackcheck) {
 269         gen_helper_v8m_stackcheck(cpu_env, var);
 270     }
 271 #endif
 272     store_reg(s, 13, var);
 273 }
 274
 275 /* Value extensions.  */
 276 #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
 277 #define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
 278 #define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
 279 #define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
 280
 281 #define gen_sxtb16(var) gen_helper_sxtb16(var, var)
 282 #define gen_uxtb16(var) gen_helper_uxtb16(var, var)
 283
 284
 285 static inline void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
 286 {
 287     TCGv_i32 tmp_mask = tcg_const_i32(mask);
 288     gen_helper_cpsr_write(cpu_env, var, tmp_mask);
 289     tcg_temp_free_i32(tmp_mask);
 290 }
 291 /* Set NZCV flags from the high 4 bits of var.  */
 292 #define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
 293
 294 static void gen_exception_internal(int excp)
 295 {
 296     TCGv_i32 tcg_excp = tcg_const_i32(excp);
 297
 298     assert(excp_is_internal(excp));
 299     gen_helper_exception_internal(cpu_env, tcg_excp);
 300     tcg_temp_free_i32(tcg_excp);
 301 }
 302
 303 static void gen_step_complete_exception(DisasContext *s)
 304 {
 305     /* We just completed step of an insn. Move from Active-not-pending
 306      * to Active-pending, and then also take the swstep exception.
 307      * This corresponds to making the (IMPDEF) choice to prioritize
 308      * swstep exceptions over asynchronous exceptions taken to an exception
 309      * level where debug is disabled. This choice has the advantage that
 310      * we do not need to maintain internal state corresponding to the
 311      * ISV/EX syndrome bits between completion of the step and generation
 312      * of the exception, and our syndrome information is always correct.
 313      */
 314     gen_ss_advance(s);
 315     gen_swstep_exception(s, 1, s->is_ldex);
 316     s->base.is_jmp = DISAS_NORETURN;
 317 }
 318
 319 static void gen_singlestep_exception(DisasContext *s)
 320 {
 321     /* Generate the right kind of exception for singlestep, which is
 322      * either the architectural singlestep or EXCP_DEBUG for QEMU's
 323      * gdb singlestepping.
 324      */
 325     if (s->ss_active) {
 326         gen_step_complete_exception(s);
 327     } else {
 328         gen_exception_internal(EXCP_DEBUG);
 329     }
 330 }
 331
 332 static inline bool is_singlestepping(DisasContext *s)
 333 {
 334     /* Return true if we are singlestepping either because of
 335      * architectural singlestep or QEMU gdbstub singlestep. This does
 336      * not include the command line '-singlestep' mode which is rather
 337      * misnamed as it only means "one instruction per TB" and doesn't
 338      * affect the code we generate.
 339      */
 340     return s->base.singlestep_enabled || s->ss_active;
 341 }
 342
 343 static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
 344 {
 345     TCGv_i32 tmp1 = tcg_temp_new_i32();
 346     TCGv_i32 tmp2 = tcg_temp_new_i32();
 347     tcg_gen_ext16s_i32(tmp1, a);
 348     tcg_gen_ext16s_i32(tmp2, b);
 349     tcg_gen_mul_i32(tmp1, tmp1, tmp2);
 350     tcg_temp_free_i32(tmp2);
 351     tcg_gen_sari_i32(a, a, 16);
 352     tcg_gen_sari_i32(b, b, 16);
 353     tcg_gen_mul_i32(b, b, a);
 354     tcg_gen_mov_i32(a, tmp1);
 355     tcg_temp_free_i32(tmp1);
 356 }
 357
 358 /* Byteswap each halfword.  */
 359 static void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
 360 {
 361     TCGv_i32 tmp = tcg_temp_new_i32();
 362     TCGv_i32 mask = tcg_const_i32(0x00ff00ff);
 363     tcg_gen_shri_i32(tmp, var, 8);
 364     tcg_gen_and_i32(tmp, tmp, mask);
 365     tcg_gen_and_i32(var, var, mask);
 366     tcg_gen_shli_i32(var, var, 8);
 367     tcg_gen_or_i32(dest, var, tmp);
 368     tcg_temp_free_i32(mask);
 369     tcg_temp_free_i32(tmp);
 370 }
 371
 372 /* Byteswap low halfword and sign extend.  */
 373 static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
 374 {
 375     tcg_gen_ext16u_i32(var, var);
 376     tcg_gen_bswap16_i32(var, var);
 377     tcg_gen_ext16s_i32(dest, var);
 378 }
 379
 380 /* Swap low and high halfwords.  */
 381 static void gen_swap_half(TCGv_i32 dest, TCGv_i32 var)
 382 {
 383     tcg_gen_rotri_i32(dest, var, 16);
 384 }
 385
 386 /* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
 387     tmp = (t0 ^ t1) & 0x8000;
 388     t0 &= ~0x8000;
 389     t1 &= ~0x8000;
 390     t0 = (t0 + t1) ^ tmp;
 391  */
 392
 393 static void gen_add16(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 394 {
 395     TCGv_i32 tmp = tcg_temp_new_i32();
 396     tcg_gen_xor_i32(tmp, t0, t1);
 397     tcg_gen_andi_i32(tmp, tmp, 0x8000);
 398     tcg_gen_andi_i32(t0, t0, ~0x8000);
 399     tcg_gen_andi_i32(t1, t1, ~0x8000);
 400     tcg_gen_add_i32(t0, t0, t1);
 401     tcg_gen_xor_i32(dest, t0, tmp);
 402     tcg_temp_free_i32(tmp);
 403 }
 404
 405 /* Set N and Z flags from var.  */
 406 static inline void gen_logic_CC(TCGv_i32 var)
 407 {
 408     tcg_gen_mov_i32(cpu_NF, var);
 409     tcg_gen_mov_i32(cpu_ZF, var);
 410 }
 411
 412 /* dest = T0 + T1 + CF. */
 413 static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 414 {
 415     tcg_gen_add_i32(dest, t0, t1);
 416     tcg_gen_add_i32(dest, dest, cpu_CF);
 417 }
 418
 419 /* dest = T0 - T1 + CF - 1.  */
 420 static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 421 {
 422     tcg_gen_sub_i32(dest, t0, t1);
 423     tcg_gen_add_i32(dest, dest, cpu_CF);
 424     tcg_gen_subi_i32(dest, dest, 1);
 425 }
 426
 427 /* dest = T0 + T1. Compute C, N, V and Z flags */
 428 static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 429 {
 430     TCGv_i32 tmp = tcg_temp_new_i32();
 431     tcg_gen_movi_i32(tmp, 0);
 432     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
 433     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 434     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 435     tcg_gen_xor_i32(tmp, t0, t1);
 436     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 437     tcg_temp_free_i32(tmp);
 438     tcg_gen_mov_i32(dest, cpu_NF);
 439 }
 440
 441 /* dest = T0 + T1 + CF.  Compute C, N, V and Z flags */
 442 static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 443 {
 444     TCGv_i32 tmp = tcg_temp_new_i32();
 445     if (TCG_TARGET_HAS_add2_i32) {
 446         tcg_gen_movi_i32(tmp, 0);
 447         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
 448         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
 449     } else {
 450         TCGv_i64 q0 = tcg_temp_new_i64();
 451         TCGv_i64 q1 = tcg_temp_new_i64();
 452         tcg_gen_extu_i32_i64(q0, t0);
 453         tcg_gen_extu_i32_i64(q1, t1);
 454         tcg_gen_add_i64(q0, q0, q1);
 455         tcg_gen_extu_i32_i64(q1, cpu_CF);
 456         tcg_gen_add_i64(q0, q0, q1);
 457         tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
 458         tcg_temp_free_i64(q0);
 459         tcg_temp_free_i64(q1);
 460     }
 461     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 462     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 463     tcg_gen_xor_i32(tmp, t0, t1);
 464     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 465     tcg_temp_free_i32(tmp);
 466     tcg_gen_mov_i32(dest, cpu_NF);
 467 }
 468
 469 /* dest = T0 - T1. Compute C, N, V and Z flags */
 470 static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 471 {
 472     TCGv_i32 tmp;
 473     tcg_gen_sub_i32(cpu_NF, t0, t1);
 474     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 475     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
 476     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 477     tmp = tcg_temp_new_i32();
 478     tcg_gen_xor_i32(tmp, t0, t1);
 479     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 480     tcg_temp_free_i32(tmp);
 481     tcg_gen_mov_i32(dest, cpu_NF);
 482 }
 483
 484 /* dest = T0 + ~T1 + CF.  Compute C, N, V and Z flags */
 485 static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 486 {
 487     TCGv_i32 tmp = tcg_temp_new_i32();
 488     tcg_gen_not_i32(tmp, t1);
 489     gen_adc_CC(dest, t0, tmp);
 490     tcg_temp_free_i32(tmp);
 491 }
 492
 493 #define GEN_SHIFT(name)                                               \
 494 static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)       \
 495 {                                                                     \
 496     TCGv_i32 tmp1, tmp2, tmp3;                                        \
 497     tmp1 = tcg_temp_new_i32();                                        \
 498     tcg_gen_andi_i32(tmp1, t1, 0xff);                                 \
 499     tmp2 = tcg_const_i32(0);                                          \
 500     tmp3 = tcg_const_i32(0x1f);                                       \
 501     tcg_gen_movcond_i32(TCG_COND_GTU, tmp2, tmp1, tmp3, tmp2, t0);    \
 502     tcg_temp_free_i32(tmp3);                                          \
 503     tcg_gen_andi_i32(tmp1, tmp1, 0x1f);                               \
 504     tcg_gen_##name##_i32(dest, tmp2, tmp1);                           \
 505     tcg_temp_free_i32(tmp2);                                          \
 506     tcg_temp_free_i32(tmp1);                                          \
 507 }
 508 GEN_SHIFT(shl)
 509 GEN_SHIFT(shr)
 510 #undef GEN_SHIFT
 511
 512 static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 513 {
 514     TCGv_i32 tmp1, tmp2;
 515     tmp1 = tcg_temp_new_i32();
 516     tcg_gen_andi_i32(tmp1, t1, 0xff);
 517     tmp2 = tcg_const_i32(0x1f);
 518     tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
 519     tcg_temp_free_i32(tmp2);
 520     tcg_gen_sar_i32(dest, t0, tmp1);
 521     tcg_temp_free_i32(tmp1);
 522 }
 523
 524 static void shifter_out_im(TCGv_i32 var, int shift)
 525 {
 526     tcg_gen_extract_i32(cpu_CF, var, shift, 1);
 527 }
 528
 529 /* Shift by immediate.  Includes special handling for shift == 0.  */
 530 static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
 531                                     int shift, int flags)
 532 {
 533     switch (shiftop) {
 534     case 0: /* LSL */
 535         if (shift != 0) {
 536             if (flags)
 537                 shifter_out_im(var, 32 - shift);
 538             tcg_gen_shli_i32(var, var, shift);
 539         }
 540         break;
 541     case 1: /* LSR */
 542         if (shift == 0) {
 543             if (flags) {
 544                 tcg_gen_shri_i32(cpu_CF, var, 31);
 545             }
 546             tcg_gen_movi_i32(var, 0);
 547         } else {
 548             if (flags)
 549                 shifter_out_im(var, shift - 1);
 550             tcg_gen_shri_i32(var, var, shift);
 551         }
 552         break;
 553     case 2: /* ASR */
 554         if (shift == 0)
 555             shift = 32;
 556         if (flags)
 557             shifter_out_im(var, shift - 1);
 558         if (shift == 32)
 559           shift = 31;
 560         tcg_gen_sari_i32(var, var, shift);
 561         break;
 562     case 3: /* ROR/RRX */
 563         if (shift != 0) {
 564             if (flags)
 565                 shifter_out_im(var, shift - 1);
 566             tcg_gen_rotri_i32(var, var, shift); break;
 567         } else {
 568             TCGv_i32 tmp = tcg_temp_new_i32();
 569             tcg_gen_shli_i32(tmp, cpu_CF, 31);
 570             if (flags)
 571                 shifter_out_im(var, 0);
 572             tcg_gen_shri_i32(var, var, 1);
 573             tcg_gen_or_i32(var, var, tmp);
 574             tcg_temp_free_i32(tmp);
 575         }
 576     }
 577 };
 578
 579 static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
 580                                      TCGv_i32 shift, int flags)
 581 {
 582     if (flags) {
 583         switch (shiftop) {
 584         case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
 585         case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
 586         case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
 587         case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
 588         }
 589     } else {
 590         switch (shiftop) {
 591         case 0:
 592             gen_shl(var, var, shift);
 593             break;
 594         case 1:
 595             gen_shr(var, var, shift);
 596             break;
 597         case 2:
 598             gen_sar(var, var, shift);
 599             break;
 600         case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
 601                 tcg_gen_rotr_i32(var, var, shift); break;
 602         }
 603     }
 604     tcg_temp_free_i32(shift);
 605 }
 606
 607 /*
 608  * Generate a conditional based on ARM condition code cc.
 609  * This is common between ARM and Aarch64 targets.
 610  */
 611 void arm_test_cc(DisasCompare *cmp, int cc)
 612 {
 613     TCGv_i32 value;
 614     TCGCond cond;
 615     bool global = true;
 616
 617     switch (cc) {
 618     case 0: /* eq: Z */
 619     case 1: /* ne: !Z */
 620         cond = TCG_COND_EQ;
 621         value = cpu_ZF;
 622         break;
 623
 624     case 2: /* cs: C */
 625     case 3: /* cc: !C */
 626         cond = TCG_COND_NE;
 627         value = cpu_CF;
 628         break;
 629
 630     case 4: /* mi: N */
 631     case 5: /* pl: !N */
 632         cond = TCG_COND_LT;
 633         value = cpu_NF;
 634         break;
 635
 636     case 6: /* vs: V */
 637     case 7: /* vc: !V */
 638         cond = TCG_COND_LT;
 639         value = cpu_VF;
 640         break;
 641
 642     case 8: /* hi: C && !Z */
 643     case 9: /* ls: !C || Z -> !(C && !Z) */
 644         cond = TCG_COND_NE;
 645         value = tcg_temp_new_i32();
 646         global = false;
 647         /* CF is 1 for C, so -CF is an all-bits-set mask for C;
 648            ZF is non-zero for !Z; so AND the two subexpressions.  */
 649         tcg_gen_neg_i32(value, cpu_CF);
 650         tcg_gen_and_i32(value, value, cpu_ZF);
 651         break;
 652
 653     case 10: /* ge: N == V -> N ^ V == 0 */
 654     case 11: /* lt: N != V -> N ^ V != 0 */
 655         /* Since we're only interested in the sign bit, == 0 is >= 0.  */
 656         cond = TCG_COND_GE;
 657         value = tcg_temp_new_i32();
 658         global = false;
 659         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
 660         break;
 661
 662     case 12: /* gt: !Z && N == V */
 663     case 13: /* le: Z || N != V */
 664         cond = TCG_COND_NE;
 665         value = tcg_temp_new_i32();
 666         global = false;
 667         /* (N == V) is equal to the sign bit of ~(NF ^ VF).  Propagate
 668          * the sign bit then AND with ZF to yield the result.  */
 669         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
 670         tcg_gen_sari_i32(value, value, 31);
 671         tcg_gen_andc_i32(value, cpu_ZF, value);
 672         break;
 673
 674     case 14: /* always */
 675     case 15: /* always */
 676         /* Use the ALWAYS condition, which will fold early.
 677          * It doesn't matter what we use for the value.  */
 678         cond = TCG_COND_ALWAYS;
 679         value = cpu_ZF;
 680         goto no_invert;
 681
 682     default:
 683         fprintf(stderr, "Bad condition code 0x%x\n", cc);
 684         abort();
 685     }
 686
 687     if (cc & 1) {
 688         cond = tcg_invert_cond(cond);
 689     }
 690
 691  no_invert:
 692     cmp->cond = cond;
 693     cmp->value = value;
 694     cmp->value_global = global;
 695 }
 696
 697 void arm_free_cc(DisasCompare *cmp)
 698 {
 699     if (!cmp->value_global) {
 700         tcg_temp_free_i32(cmp->value);
 701     }
 702 }
 703
 704 void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
 705 {
 706     tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
 707 }
 708
 709 void arm_gen_test_cc(int cc, TCGLabel *label)
 710 {
 711     DisasCompare cmp;
 712     arm_test_cc(&cmp, cc);
 713     arm_jump_cc(&cmp, label);
 714     arm_free_cc(&cmp);
 715 }
 716
 717 static inline void gen_set_condexec(DisasContext *s)
 718 {
 719     if (s->condexec_mask) {
 720         uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
 721         TCGv_i32 tmp = tcg_temp_new_i32();
 722         tcg_gen_movi_i32(tmp, val);
 723         store_cpu_field(tmp, condexec_bits);
 724     }
 725 }
 726
 727 static inline void gen_set_pc_im(DisasContext *s, target_ulong val)
 728 {
 729     tcg_gen_movi_i32(cpu_R[15], val);
 730 }
 731
 732 /* Set PC and Thumb state from var.  var is marked as dead.  */
 733 static inline void gen_bx(DisasContext *s, TCGv_i32 var)
 734 {
 735     s->base.is_jmp = DISAS_JUMP;
 736     tcg_gen_andi_i32(cpu_R[15], var, ~1);
 737     tcg_gen_andi_i32(var, var, 1);
 738     store_cpu_field(var, thumb);
 739 }
 740
 741 /*
 742  * Set PC and Thumb state from var. var is marked as dead.
 743  * For M-profile CPUs, include logic to detect exception-return
 744  * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
 745  * and BX reg, and no others, and happens only for code in Handler mode.
 746  * The Security Extension also requires us to check for the FNC_RETURN
 747  * which signals a function return from non-secure state; this can happen
 748  * in both Handler and Thread mode.
 749  * To avoid having to do multiple comparisons in inline generated code,
 750  * we make the check we do here loose, so it will match for EXC_RETURN
 751  * in Thread mode. For system emulation do_v7m_exception_exit() checks
 752  * for these spurious cases and returns without doing anything (giving
 753  * the same behaviour as for a branch to a non-magic address).
 754  *
 755  * In linux-user mode it is unclear what the right behaviour for an
 756  * attempted FNC_RETURN should be, because in real hardware this will go
 757  * directly to Secure code (ie not the Linux kernel) which will then treat
 758  * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN
 759  * attempt behave the way it would on a CPU without the security extension,
 760  * which is to say "like a normal branch". That means we can simply treat
 761  * all branches as normal with no magic address behaviour.
 762  */
 763 static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
 764 {
 765     /* Generate the same code here as for a simple bx, but flag via
 766      * s->base.is_jmp that we need to do the rest of the work later.
 767      */
 768     gen_bx(s, var);
 769 #ifndef CONFIG_USER_ONLY
 770     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
 771         (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
 772         s->base.is_jmp = DISAS_BX_EXCRET;
 773     }
 774 #endif
 775 }
 776
 777 static inline void gen_bx_excret_final_code(DisasContext *s)
 778 {
 779     /* Generate the code to finish possible exception return and end the TB */
 780     TCGLabel *excret_label = gen_new_label();
 781     uint32_t min_magic;
 782
 783     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
 784         /* Covers FNC_RETURN and EXC_RETURN magic */
 785         min_magic = FNC_RETURN_MIN_MAGIC;
 786     } else {
 787         /* EXC_RETURN magic only */
 788         min_magic = EXC_RETURN_MIN_MAGIC;
 789     }
 790
 791     /* Is the new PC value in the magic range indicating exception return? */
 792     tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label);
 793     /* No: end the TB as we would for a DISAS_JMP */
 794     if (is_singlestepping(s)) {
 795         gen_singlestep_exception(s);
 796     } else {
 797         tcg_gen_exit_tb(NULL, 0);
 798     }
 799     gen_set_label(excret_label);
 800     /* Yes: this is an exception return.
 801      * At this point in runtime env->regs[15] and env->thumb will hold
 802      * the exception-return magic number, which do_v7m_exception_exit()
 803      * will read. Nothing else will be able to see those values because
 804      * the cpu-exec main loop guarantees that we will always go straight
 805      * from raising the exception to the exception-handling code.
 806      *
 807      * gen_ss_advance(s) does nothing on M profile currently but
 808      * calling it is conceptually the right thing as we have executed
 809      * this instruction (compare SWI, HVC, SMC handling).
 810      */
 811     gen_ss_advance(s);
 812     gen_exception_internal(EXCP_EXCEPTION_EXIT);
 813 }
 814
 815 static inline void gen_bxns(DisasContext *s, int rm)
 816 {
 817     TCGv_i32 var = load_reg(s, rm);
 818
 819     /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
 820      * we need to sync state before calling it, but:
 821      *  - we don't need to do gen_set_pc_im() because the bxns helper will
 822      *    always set the PC itself
 823      *  - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
 824      *    unless it's outside an IT block or the last insn in an IT block,
 825      *    so we know that condexec == 0 (already set at the top of the TB)
 826      *    is correct in the non-UNPREDICTABLE cases, and we can choose
 827      *    "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
 828      */
 829     gen_helper_v7m_bxns(cpu_env, var);
 830     tcg_temp_free_i32(var);
 831     s->base.is_jmp = DISAS_EXIT;
 832 }
 833
 834 static inline void gen_blxns(DisasContext *s, int rm)
 835 {
 836     TCGv_i32 var = load_reg(s, rm);
 837
 838     /* We don't need to sync condexec state, for the same reason as bxns.
 839      * We do however need to set the PC, because the blxns helper reads it.
 840      * The blxns helper may throw an exception.
 841      */
 842     gen_set_pc_im(s, s->base.pc_next);
 843     gen_helper_v7m_blxns(cpu_env, var);
 844     tcg_temp_free_i32(var);
 845     s->base.is_jmp = DISAS_EXIT;
 846 }
 847
 848 /* Variant of store_reg which uses branch&exchange logic when storing
 849    to r15 in ARM architecture v7 and above. The source must be a temporary
 850    and will be marked as dead. */
 851 static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
 852 {
 853     if (reg == 15 && ENABLE_ARCH_7) {
 854         gen_bx(s, var);
 855     } else {
 856         store_reg(s, reg, var);
 857     }
 858 }
 859
 860 /* Variant of store_reg which uses branch&exchange logic when storing
 861  * to r15 in ARM architecture v5T and above. This is used for storing
 862  * the results of a LDR/LDM/POP into r15, and corresponds to the cases
 863  * in the ARM ARM which use the LoadWritePC() pseudocode function. */
 864 static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
 865 {
 866     if (reg == 15 && ENABLE_ARCH_5) {
 867         gen_bx_excret(s, var);
 868     } else {
 869         store_reg(s, reg, var);
 870     }
 871 }
 872
 873 #ifdef CONFIG_USER_ONLY
 874 #define IS_USER_ONLY 1
 875 #else
 876 #define IS_USER_ONLY 0
 877 #endif
 878
 879 /* Abstractions of "generate code to do a guest load/store for
 880  * AArch32", where a vaddr is always 32 bits (and is zero
 881  * extended if we're a 64 bit core) and  data is also
 882  * 32 bits unless specifically doing a 64 bit access.
 883  * These functions work like tcg_gen_qemu_{ld,st}* except
 884  * that the address argument is TCGv_i32 rather than TCGv.
 885  */
 886
 887 static inline TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
 888 {
 889     TCGv addr = tcg_temp_new();
 890     tcg_gen_extu_i32_tl(addr, a32);
 891
 892     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
 893     if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
 894         tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
 895     }
 896     return addr;
 897 }
 898
 899 static void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
 900                             int index, MemOp opc)
 901 {
 902     TCGv addr;
 903
 904     if (arm_dc_feature(s, ARM_FEATURE_M) &&
 905         !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
 906         opc |= MO_ALIGN;
 907     }
 908
 909     addr = gen_aa32_addr(s, a32, opc);
 910     tcg_gen_qemu_ld_i32(val, addr, index, opc);
 911     tcg_temp_free(addr);
 912 }
 913
 914 static void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
 915                             int index, MemOp opc)
 916 {
 917     TCGv addr;
 918
 919     if (arm_dc_feature(s, ARM_FEATURE_M) &&
 920         !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
 921         opc |= MO_ALIGN;
 922     }
 923
 924     addr = gen_aa32_addr(s, a32, opc);
 925     tcg_gen_qemu_st_i32(val, addr, index, opc);
 926     tcg_temp_free(addr);
 927 }
 928
 929 #define DO_GEN_LD(SUFF, OPC)                                             \
 930 static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val,      \
 931                                      TCGv_i32 a32, int index)            \
 932 {                                                                        \
 933     gen_aa32_ld_i32(s, val, a32, index, OPC | s->be_data);               \
 934 }
 935
 936 #define DO_GEN_ST(SUFF, OPC)                                             \
 937 static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val,      \
 938                                      TCGv_i32 a32, int index)            \
 939 {                                                                        \
 940     gen_aa32_st_i32(s, val, a32, index, OPC | s->be_data);               \
 941 }
 942
 943 static inline void gen_aa32_frob64(DisasContext *s, TCGv_i64 val)
 944 {
 945     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
 946     if (!IS_USER_ONLY && s->sctlr_b) {
 947         tcg_gen_rotri_i64(val, val, 32);
 948     }
 949 }
 950
 951 static void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
 952                             int index, MemOp opc)
 953 {
 954     TCGv addr = gen_aa32_addr(s, a32, opc);
 955     tcg_gen_qemu_ld_i64(val, addr, index, opc);
 956     gen_aa32_frob64(s, val);
 957     tcg_temp_free(addr);
 958 }
 959
 960 static inline void gen_aa32_ld64(DisasContext *s, TCGv_i64 val,
 961                                  TCGv_i32 a32, int index)
 962 {
 963     gen_aa32_ld_i64(s, val, a32, index, MO_Q | s->be_data);
 964 }
 965
 966 static void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
 967                             int index, MemOp opc)
 968 {
 969     TCGv addr = gen_aa32_addr(s, a32, opc);
 970
 971     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
 972     if (!IS_USER_ONLY && s->sctlr_b) {
 973         TCGv_i64 tmp = tcg_temp_new_i64();
 974         tcg_gen_rotri_i64(tmp, val, 32);
 975         tcg_gen_qemu_st_i64(tmp, addr, index, opc);
 976         tcg_temp_free_i64(tmp);
 977     } else {
 978         tcg_gen_qemu_st_i64(val, addr, index, opc);
 979     }
 980     tcg_temp_free(addr);
 981 }
 982
 983 static inline void gen_aa32_st64(DisasContext *s, TCGv_i64 val,
 984                                  TCGv_i32 a32, int index)
 985 {
 986     gen_aa32_st_i64(s, val, a32, index, MO_Q | s->be_data);
 987 }
 988
 989 DO_GEN_LD(8u, MO_UB)
 990 DO_GEN_LD(16u, MO_UW)
 991 DO_GEN_LD(32u, MO_UL)
 992 DO_GEN_ST(8, MO_UB)
 993 DO_GEN_ST(16, MO_UW)
 994 DO_GEN_ST(32, MO_UL)
 995
 996 static inline void gen_hvc(DisasContext *s, int imm16)
 997 {
 998     /* The pre HVC helper handles cases when HVC gets trapped
 999      * as an undefined insn by runtime configuration (ie before
1000      * the insn really executes).
1001      */
1002     gen_set_pc_im(s, s->pc_curr);
1003     gen_helper_pre_hvc(cpu_env);
1004     /* Otherwise we will treat this as a real exception which
1005      * happens after execution of the insn. (The distinction matters
1006      * for the PC value reported to the exception handler and also
1007      * for single stepping.)
1008      */
1009     s->svc_imm = imm16;
1010     gen_set_pc_im(s, s->base.pc_next);
1011     s->base.is_jmp = DISAS_HVC;
1012 }
1013
1014 static inline void gen_smc(DisasContext *s)
1015 {
1016     /* As with HVC, we may take an exception either before or after
1017      * the insn executes.
1018      */
1019     TCGv_i32 tmp;
1020
1021     gen_set_pc_im(s, s->pc_curr);
1022     tmp = tcg_const_i32(syn_aa32_smc());
1023     gen_helper_pre_smc(cpu_env, tmp);
1024     tcg_temp_free_i32(tmp);
1025     gen_set_pc_im(s, s->base.pc_next);
1026     s->base.is_jmp = DISAS_SMC;
1027 }
1028
1029 static void gen_exception_internal_insn(DisasContext *s, uint32_t pc, int excp)
1030 {
1031     gen_set_condexec(s);
1032     gen_set_pc_im(s, pc);
1033     gen_exception_internal(excp);
1034     s->base.is_jmp = DISAS_NORETURN;
1035 }
1036
1037 static void gen_exception_insn(DisasContext *s, uint32_t pc, int excp,
1038                                int syn, uint32_t target_el)
1039 {
1040     gen_set_condexec(s);
1041     gen_set_pc_im(s, pc);
1042     gen_exception(excp, syn, target_el);
1043     s->base.is_jmp = DISAS_NORETURN;
1044 }
1045
1046 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn)
1047 {
1048     TCGv_i32 tcg_syn;
1049
1050     gen_set_condexec(s);
1051     gen_set_pc_im(s, s->pc_curr);
1052     tcg_syn = tcg_const_i32(syn);
1053     gen_helper_exception_bkpt_insn(cpu_env, tcg_syn);
1054     tcg_temp_free_i32(tcg_syn);
1055     s->base.is_jmp = DISAS_NORETURN;
1056 }
1057
1058 static void unallocated_encoding(DisasContext *s)
1059 {
1060     /* Unallocated and reserved encodings are uncategorized */
1061     gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(),
1062                        default_exception_el(s));
1063 }
1064
1065 /* Force a TB lookup after an instruction that changes the CPU state.  */
1066 static inline void gen_lookup_tb(DisasContext *s)
1067 {
1068     tcg_gen_movi_i32(cpu_R[15], s->base.pc_next);
1069     s->base.is_jmp = DISAS_EXIT;
1070 }
1071
1072 static inline void gen_hlt(DisasContext *s, int imm)
1073 {
1074     /* HLT. This has two purposes.
1075      * Architecturally, it is an external halting debug instruction.
1076      * Since QEMU doesn't implement external debug, we treat this as
1077      * it is required for halting debug disabled: it will UNDEF.
1078      * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
1079      * and "HLT 0xF000" is an A32 semihosting syscall. These traps
1080      * must trigger semihosting even for ARMv7 and earlier, where
1081      * HLT was an undefined encoding.
1082      * In system mode, we don't allow userspace access to
1083      * semihosting, to provide some semblance of security
1084      * (and for consistency with our 32-bit semihosting).
1085      */
1086     if (semihosting_enabled() &&
1087 #ifndef CONFIG_USER_ONLY
1088         s->current_el != 0 &&
1089 #endif
1090         (imm == (s->thumb ? 0x3c : 0xf000))) {
1091         gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
1092         return;
1093     }
1094
1095     unallocated_encoding(s);
1096 }
1097
1098 static TCGv_ptr get_fpstatus_ptr(int neon)
1099 {
1100     TCGv_ptr statusptr = tcg_temp_new_ptr();
1101     int offset;
1102     if (neon) {
1103         offset = offsetof(CPUARMState, vfp.standard_fp_status);
1104     } else {
1105         offset = offsetof(CPUARMState, vfp.fp_status);
1106     }
1107     tcg_gen_addi_ptr(statusptr, cpu_env, offset);
1108     return statusptr;
1109 }
1110
1111 static inline long vfp_reg_offset(bool dp, unsigned reg)
1112 {
1113     if (dp) {
1114         return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
1115     } else {
1116         long ofs = offsetof(CPUARMState, vfp.zregs[reg >> 2].d[(reg >> 1) & 1]);
1117         if (reg & 1) {
1118             ofs += offsetof(CPU_DoubleU, l.upper);
1119         } else {
1120             ofs += offsetof(CPU_DoubleU, l.lower);
1121         }
1122         return ofs;
1123     }
1124 }
1125
1126 /* Return the offset of a 32-bit piece of a NEON register.
1127    zero is the least significant end of the register.  */
1128 static inline long
1129 neon_reg_offset (int reg, int n)
1130 {
1131     int sreg;
1132     sreg = reg * 2 + n;
1133     return vfp_reg_offset(0, sreg);
1134 }
1135
1136 static TCGv_i32 neon_load_reg(int reg, int pass)
1137 {
1138     TCGv_i32 tmp = tcg_temp_new_i32();
1139     tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass));
1140     return tmp;
1141 }
1142
1143 static void neon_store_reg(int reg, int pass, TCGv_i32 var)
1144 {
1145     tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass));
1146     tcg_temp_free_i32(var);
1147 }
1148
1149 static inline void neon_load_reg64(TCGv_i64 var, int reg)
1150 {
1151     tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg));
1152 }
1153
1154 static inline void neon_store_reg64(TCGv_i64 var, int reg)
1155 {
1156     tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg));
1157 }
1158
1159 static inline void neon_load_reg32(TCGv_i32 var, int reg)
1160 {
1161     tcg_gen_ld_i32(var, cpu_env, vfp_reg_offset(false, reg));
1162 }
1163
1164 static inline void neon_store_reg32(TCGv_i32 var, int reg)
1165 {
1166     tcg_gen_st_i32(var, cpu_env, vfp_reg_offset(false, reg));
1167 }
1168
1169 static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
1170 {
1171     TCGv_ptr ret = tcg_temp_new_ptr();
1172     tcg_gen_addi_ptr(ret, cpu_env, vfp_reg_offset(dp, reg));
1173     return ret;
1174 }
1175
1176 #define ARM_CP_RW_BIT   (1 << 20)
1177
1178 /* Include the VFP and Neon decoders */
1179 #include "translate-vfp.c.inc"
1180 #include "translate-neon.c.inc"
1181
1182 static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1183 {
1184     tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1185 }
1186
1187 static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1188 {
1189     tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1190 }
1191
1192 static inline TCGv_i32 iwmmxt_load_creg(int reg)
1193 {
1194     TCGv_i32 var = tcg_temp_new_i32();
1195     tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1196     return var;
1197 }
1198
1199 static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1200 {
1201     tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1202     tcg_temp_free_i32(var);
1203 }
1204
1205 static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1206 {
1207     iwmmxt_store_reg(cpu_M0, rn);
1208 }
1209
1210 static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1211 {
1212     iwmmxt_load_reg(cpu_M0, rn);
1213 }
1214
1215 static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1216 {
1217     iwmmxt_load_reg(cpu_V1, rn);
1218     tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1219 }
1220
1221 static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1222 {
1223     iwmmxt_load_reg(cpu_V1, rn);
1224     tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1225 }
1226
1227 static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1228 {
1229     iwmmxt_load_reg(cpu_V1, rn);
1230     tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1231 }
1232
1233 #define IWMMXT_OP(name) \
1234 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1235 { \
1236     iwmmxt_load_reg(cpu_V1, rn); \
1237     gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1238 }
1239
1240 #define IWMMXT_OP_ENV(name) \
1241 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1242 { \
1243     iwmmxt_load_reg(cpu_V1, rn); \
1244     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1245 }
1246
1247 #define IWMMXT_OP_ENV_SIZE(name) \
1248 IWMMXT_OP_ENV(name##b) \
1249 IWMMXT_OP_ENV(name##w) \
1250 IWMMXT_OP_ENV(name##l)
1251
1252 #define IWMMXT_OP_ENV1(name) \
1253 static inline void gen_op_iwmmxt_##name##_M0(void) \
1254 { \
1255     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1256 }
1257
1258 IWMMXT_OP(maddsq)
1259 IWMMXT_OP(madduq)
1260 IWMMXT_OP(sadb)
1261 IWMMXT_OP(sadw)
1262 IWMMXT_OP(mulslw)
1263 IWMMXT_OP(mulshw)
1264 IWMMXT_OP(mululw)
1265 IWMMXT_OP(muluhw)
1266 IWMMXT_OP(macsw)
1267 IWMMXT_OP(macuw)
1268
1269 IWMMXT_OP_ENV_SIZE(unpackl)
1270 IWMMXT_OP_ENV_SIZE(unpackh)
1271
1272 IWMMXT_OP_ENV1(unpacklub)
1273 IWMMXT_OP_ENV1(unpackluw)
1274 IWMMXT_OP_ENV1(unpacklul)
1275 IWMMXT_OP_ENV1(unpackhub)
1276 IWMMXT_OP_ENV1(unpackhuw)
1277 IWMMXT_OP_ENV1(unpackhul)
1278 IWMMXT_OP_ENV1(unpacklsb)
1279 IWMMXT_OP_ENV1(unpacklsw)
1280 IWMMXT_OP_ENV1(unpacklsl)
1281 IWMMXT_OP_ENV1(unpackhsb)
1282 IWMMXT_OP_ENV1(unpackhsw)
1283 IWMMXT_OP_ENV1(unpackhsl)
1284
1285 IWMMXT_OP_ENV_SIZE(cmpeq)
1286 IWMMXT_OP_ENV_SIZE(cmpgtu)
1287 IWMMXT_OP_ENV_SIZE(cmpgts)
1288
1289 IWMMXT_OP_ENV_SIZE(mins)
1290 IWMMXT_OP_ENV_SIZE(minu)
1291 IWMMXT_OP_ENV_SIZE(maxs)
1292 IWMMXT_OP_ENV_SIZE(maxu)
1293
1294 IWMMXT_OP_ENV_SIZE(subn)
1295 IWMMXT_OP_ENV_SIZE(addn)
1296 IWMMXT_OP_ENV_SIZE(subu)
1297 IWMMXT_OP_ENV_SIZE(addu)
1298 IWMMXT_OP_ENV_SIZE(subs)
1299 IWMMXT_OP_ENV_SIZE(adds)
1300
1301 IWMMXT_OP_ENV(avgb0)
1302 IWMMXT_OP_ENV(avgb1)
1303 IWMMXT_OP_ENV(avgw0)
1304 IWMMXT_OP_ENV(avgw1)
1305
1306 IWMMXT_OP_ENV(packuw)
1307 IWMMXT_OP_ENV(packul)
1308 IWMMXT_OP_ENV(packuq)
1309 IWMMXT_OP_ENV(packsw)
1310 IWMMXT_OP_ENV(packsl)
1311 IWMMXT_OP_ENV(packsq)
1312
1313 static void gen_op_iwmmxt_set_mup(void)
1314 {
1315     TCGv_i32 tmp;
1316     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1317     tcg_gen_ori_i32(tmp, tmp, 2);
1318     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1319 }
1320
1321 static void gen_op_iwmmxt_set_cup(void)
1322 {
1323     TCGv_i32 tmp;
1324     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1325     tcg_gen_ori_i32(tmp, tmp, 1);
1326     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1327 }
1328
1329 static void gen_op_iwmmxt_setpsr_nz(void)
1330 {
1331     TCGv_i32 tmp = tcg_temp_new_i32();
1332     gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1333     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1334 }
1335
1336 static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1337 {
1338     iwmmxt_load_reg(cpu_V1, rn);
1339     tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1340     tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1341 }
1342
1343 static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1344                                      TCGv_i32 dest)
1345 {
1346     int rd;
1347     uint32_t offset;
1348     TCGv_i32 tmp;
1349
1350     rd = (insn >> 16) & 0xf;
1351     tmp = load_reg(s, rd);
1352
1353     offset = (insn & 0xff) << ((insn >> 7) & 2);
1354     if (insn & (1 << 24)) {
1355         /* Pre indexed */
1356         if (insn & (1 << 23))
1357             tcg_gen_addi_i32(tmp, tmp, offset);
1358         else
1359             tcg_gen_addi_i32(tmp, tmp, -offset);
1360         tcg_gen_mov_i32(dest, tmp);
1361         if (insn & (1 << 21))
1362             store_reg(s, rd, tmp);
1363         else
1364             tcg_temp_free_i32(tmp);
1365     } else if (insn & (1 << 21)) {
1366         /* Post indexed */
1367         tcg_gen_mov_i32(dest, tmp);
1368         if (insn & (1 << 23))
1369             tcg_gen_addi_i32(tmp, tmp, offset);
1370         else
1371             tcg_gen_addi_i32(tmp, tmp, -offset);
1372         store_reg(s, rd, tmp);
1373     } else if (!(insn & (1 << 23)))
1374         return 1;
1375     return 0;
1376 }
1377
1378 static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1379 {
1380     int rd = (insn >> 0) & 0xf;
1381     TCGv_i32 tmp;
1382
1383     if (insn & (1 << 8)) {
1384         if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1385             return 1;
1386         } else {
1387             tmp = iwmmxt_load_creg(rd);
1388         }
1389     } else {
1390         tmp = tcg_temp_new_i32();
1391         iwmmxt_load_reg(cpu_V0, rd);
1392         tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1393     }
1394     tcg_gen_andi_i32(tmp, tmp, mask);
1395     tcg_gen_mov_i32(dest, tmp);
1396     tcg_temp_free_i32(tmp);
1397     return 0;
1398 }
1399
1400 /* Disassemble an iwMMXt instruction.  Returns nonzero if an error occurred
1401    (ie. an undefined instruction).  */
1402 static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1403 {
1404     int rd, wrd;
1405     int rdhi, rdlo, rd0, rd1, i;
1406     TCGv_i32 addr;
1407     TCGv_i32 tmp, tmp2, tmp3;
1408
1409     if ((insn & 0x0e000e00) == 0x0c000000) {
1410         if ((insn & 0x0fe00ff0) == 0x0c400000) {
1411             wrd = insn & 0xf;
1412             rdlo = (insn >> 12) & 0xf;
1413             rdhi = (insn >> 16) & 0xf;
1414             if (insn & ARM_CP_RW_BIT) {                         /* TMRRC */
1415                 iwmmxt_load_reg(cpu_V0, wrd);
1416                 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1417                 tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
1418             } else {                                    /* TMCRR */
1419                 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1420                 iwmmxt_store_reg(cpu_V0, wrd);
1421                 gen_op_iwmmxt_set_mup();
1422             }
1423             return 0;
1424         }
1425
1426         wrd = (insn >> 12) & 0xf;
1427         addr = tcg_temp_new_i32();
1428         if (gen_iwmmxt_address(s, insn, addr)) {
1429             tcg_temp_free_i32(addr);
1430             return 1;
1431         }
1432         if (insn & ARM_CP_RW_BIT) {
1433             if ((insn >> 28) == 0xf) {                  /* WLDRW wCx */
1434                 tmp = tcg_temp_new_i32();
1435                 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1436                 iwmmxt_store_creg(wrd, tmp);
1437             } else {
1438                 i = 1;
1439                 if (insn & (1 << 8)) {
1440                     if (insn & (1 << 22)) {             /* WLDRD */
1441                         gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
1442                         i = 0;
1443                     } else {                            /* WLDRW wRd */
1444                         tmp = tcg_temp_new_i32();
1445                         gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1446                     }
1447                 } else {
1448                     tmp = tcg_temp_new_i32();
1449                     if (insn & (1 << 22)) {             /* WLDRH */
1450                         gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1451                     } else {                            /* WLDRB */
1452                         gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
1453                     }
1454                 }
1455                 if (i) {
1456                     tcg_gen_extu_i32_i64(cpu_M0, tmp);
1457                     tcg_temp_free_i32(tmp);
1458                 }
1459                 gen_op_iwmmxt_movq_wRn_M0(wrd);
1460             }
1461         } else {
1462             if ((insn >> 28) == 0xf) {                  /* WSTRW wCx */
1463                 tmp = iwmmxt_load_creg(wrd);
1464                 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1465             } else {
1466                 gen_op_iwmmxt_movq_M0_wRn(wrd);
1467                 tmp = tcg_temp_new_i32();
1468                 if (insn & (1 << 8)) {
1469                     if (insn & (1 << 22)) {             /* WSTRD */
1470                         gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
1471                     } else {                            /* WSTRW wRd */
1472                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1473                         gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1474                     }
1475                 } else {
1476                     if (insn & (1 << 22)) {             /* WSTRH */
1477                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1478                         gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1479                     } else {                            /* WSTRB */
1480                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1481                         gen_aa32_st8(s, tmp, addr, get_mem_index(s));
1482                     }
1483                 }
1484             }
1485             tcg_temp_free_i32(tmp);
1486         }
1487         tcg_temp_free_i32(addr);
1488         return 0;
1489     }
1490
1491     if ((insn & 0x0f000000) != 0x0e000000)
1492         return 1;
1493
1494     switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1495     case 0x000:                                                 /* WOR */
1496         wrd = (insn >> 12) & 0xf;
1497         rd0 = (insn >> 0) & 0xf;
1498         rd1 = (insn >> 16) & 0xf;
1499         gen_op_iwmmxt_movq_M0_wRn(rd0);
1500         gen_op_iwmmxt_orq_M0_wRn(rd1);
1501         gen_op_iwmmxt_setpsr_nz();
1502         gen_op_iwmmxt_movq_wRn_M0(wrd);
1503         gen_op_iwmmxt_set_mup();
1504         gen_op_iwmmxt_set_cup();
1505         break;
1506     case 0x011:                                                 /* TMCR */
1507         if (insn & 0xf)
1508             return 1;
1509         rd = (insn >> 12) & 0xf;
1510         wrd = (insn >> 16) & 0xf;
1511         switch (wrd) {
1512         case ARM_IWMMXT_wCID:
1513         case ARM_IWMMXT_wCASF:
1514             break;
1515         case ARM_IWMMXT_wCon:
1516             gen_op_iwmmxt_set_cup();
1517             /* Fall through.  */
1518         case ARM_IWMMXT_wCSSF:
1519             tmp = iwmmxt_load_creg(wrd);
1520             tmp2 = load_reg(s, rd);
1521             tcg_gen_andc_i32(tmp, tmp, tmp2);
1522             tcg_temp_free_i32(tmp2);
1523             iwmmxt_store_creg(wrd, tmp);
1524             break;
1525         case ARM_IWMMXT_wCGR0:
1526         case ARM_IWMMXT_wCGR1:
1527         case ARM_IWMMXT_wCGR2:
1528         case ARM_IWMMXT_wCGR3:
1529             gen_op_iwmmxt_set_cup();
1530             tmp = load_reg(s, rd);
1531             iwmmxt_store_creg(wrd, tmp);
1532             break;
1533         default:
1534             return 1;
1535         }
1536         break;
1537     case 0x100:                                                 /* WXOR */
1538         wrd = (insn >> 12) & 0xf;
1539         rd0 = (insn >> 0) & 0xf;
1540         rd1 = (insn >> 16) & 0xf;
1541         gen_op_iwmmxt_movq_M0_wRn(rd0);
1542         gen_op_iwmmxt_xorq_M0_wRn(rd1);
1543         gen_op_iwmmxt_setpsr_nz();
1544         gen_op_iwmmxt_movq_wRn_M0(wrd);
1545         gen_op_iwmmxt_set_mup();
1546         gen_op_iwmmxt_set_cup();
1547         break;
1548     case 0x111:                                                 /* TMRC */
1549         if (insn & 0xf)
1550             return 1;
1551         rd = (insn >> 12) & 0xf;
1552         wrd = (insn >> 16) & 0xf;
1553         tmp = iwmmxt_load_creg(wrd);
1554         store_reg(s, rd, tmp);
1555         break;
1556     case 0x300:                                                 /* WANDN */
1557         wrd = (insn >> 12) & 0xf;
1558         rd0 = (insn >> 0) & 0xf;
1559         rd1 = (insn >> 16) & 0xf;
1560         gen_op_iwmmxt_movq_M0_wRn(rd0);
1561         tcg_gen_neg_i64(cpu_M0, cpu_M0);
1562         gen_op_iwmmxt_andq_M0_wRn(rd1);
1563         gen_op_iwmmxt_setpsr_nz();
1564         gen_op_iwmmxt_movq_wRn_M0(wrd);
1565         gen_op_iwmmxt_set_mup();
1566         gen_op_iwmmxt_set_cup();
1567         break;
1568     case 0x200:                                                 /* WAND */
1569         wrd = (insn >> 12) & 0xf;
1570         rd0 = (insn >> 0) & 0xf;
1571         rd1 = (insn >> 16) & 0xf;
1572         gen_op_iwmmxt_movq_M0_wRn(rd0);
1573         gen_op_iwmmxt_andq_M0_wRn(rd1);
1574         gen_op_iwmmxt_setpsr_nz();
1575         gen_op_iwmmxt_movq_wRn_M0(wrd);
1576         gen_op_iwmmxt_set_mup();
1577         gen_op_iwmmxt_set_cup();
1578         break;
1579     case 0x810: case 0xa10:                             /* WMADD */
1580         wrd = (insn >> 12) & 0xf;
1581         rd0 = (insn >> 0) & 0xf;
1582         rd1 = (insn >> 16) & 0xf;
1583         gen_op_iwmmxt_movq_M0_wRn(rd0);
1584         if (insn & (1 << 21))
1585             gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1586         else
1587             gen_op_iwmmxt_madduq_M0_wRn(rd1);
1588         gen_op_iwmmxt_movq_wRn_M0(wrd);
1589         gen_op_iwmmxt_set_mup();
1590         break;
1591     case 0x10e: case 0x50e: case 0x90e: case 0xd0e:     /* WUNPCKIL */
1592         wrd = (insn >> 12) & 0xf;
1593         rd0 = (insn >> 16) & 0xf;
1594         rd1 = (insn >> 0) & 0xf;
1595         gen_op_iwmmxt_movq_M0_wRn(rd0);
1596         switch ((insn >> 22) & 3) {
1597         case 0:
1598             gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1599             break;
1600         case 1:
1601             gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1602             break;
1603         case 2:
1604             gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1605             break;
1606         case 3:
1607             return 1;
1608         }
1609         gen_op_iwmmxt_movq_wRn_M0(wrd);
1610         gen_op_iwmmxt_set_mup();
1611         gen_op_iwmmxt_set_cup();
1612         break;
1613     case 0x10c: case 0x50c: case 0x90c: case 0xd0c:     /* WUNPCKIH */
1614         wrd = (insn >> 12) & 0xf;
1615         rd0 = (insn >> 16) & 0xf;
1616         rd1 = (insn >> 0) & 0xf;
1617         gen_op_iwmmxt_movq_M0_wRn(rd0);
1618         switch ((insn >> 22) & 3) {
1619         case 0:
1620             gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1621             break;
1622         case 1:
1623             gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1624             break;
1625         case 2:
1626             gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1627             break;
1628         case 3:
1629             return 1;
1630         }
1631         gen_op_iwmmxt_movq_wRn_M0(wrd);
1632         gen_op_iwmmxt_set_mup();
1633         gen_op_iwmmxt_set_cup();
1634         break;
1635     case 0x012: case 0x112: case 0x412: case 0x512:     /* WSAD */
1636         wrd = (insn >> 12) & 0xf;
1637         rd0 = (insn >> 16) & 0xf;
1638         rd1 = (insn >> 0) & 0xf;
1639         gen_op_iwmmxt_movq_M0_wRn(rd0);
1640         if (insn & (1 << 22))
1641             gen_op_iwmmxt_sadw_M0_wRn(rd1);
1642         else
1643             gen_op_iwmmxt_sadb_M0_wRn(rd1);
1644         if (!(insn & (1 << 20)))
1645             gen_op_iwmmxt_addl_M0_wRn(wrd);
1646         gen_op_iwmmxt_movq_wRn_M0(wrd);
1647         gen_op_iwmmxt_set_mup();
1648         break;
1649     case 0x010: case 0x110: case 0x210: case 0x310:     /* WMUL */
1650         wrd = (insn >> 12) & 0xf;
1651         rd0 = (insn >> 16) & 0xf;
1652         rd1 = (insn >> 0) & 0xf;
1653         gen_op_iwmmxt_movq_M0_wRn(rd0);
1654         if (insn & (1 << 21)) {
1655             if (insn & (1 << 20))
1656                 gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1657             else
1658                 gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1659         } else {
1660             if (insn & (1 << 20))
1661                 gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1662             else
1663                 gen_op_iwmmxt_mululw_M0_wRn(rd1);
1664         }
1665         gen_op_iwmmxt_movq_wRn_M0(wrd);
1666         gen_op_iwmmxt_set_mup();
1667         break;
1668     case 0x410: case 0x510: case 0x610: case 0x710:     /* WMAC */
1669         wrd = (insn >> 12) & 0xf;
1670         rd0 = (insn >> 16) & 0xf;
1671         rd1 = (insn >> 0) & 0xf;
1672         gen_op_iwmmxt_movq_M0_wRn(rd0);
1673         if (insn & (1 << 21))
1674             gen_op_iwmmxt_macsw_M0_wRn(rd1);
1675         else
1676             gen_op_iwmmxt_macuw_M0_wRn(rd1);
1677         if (!(insn & (1 << 20))) {
1678             iwmmxt_load_reg(cpu_V1, wrd);
1679             tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1680         }
1681         gen_op_iwmmxt_movq_wRn_M0(wrd);
1682         gen_op_iwmmxt_set_mup();
1683         break;
1684     case 0x006: case 0x406: case 0x806: case 0xc06:     /* WCMPEQ */
1685         wrd = (insn >> 12) & 0xf;
1686         rd0 = (insn >> 16) & 0xf;
1687         rd1 = (insn >> 0) & 0xf;
1688         gen_op_iwmmxt_movq_M0_wRn(rd0);
1689         switch ((insn >> 22) & 3) {
1690         case 0:
1691             gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1692             break;
1693         case 1:
1694             gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1695             break;
1696         case 2:
1697             gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1698             break;
1699         case 3:
1700             return 1;
1701         }
1702         gen_op_iwmmxt_movq_wRn_M0(wrd);
1703         gen_op_iwmmxt_set_mup();
1704         gen_op_iwmmxt_set_cup();
1705         break;
1706     case 0x800: case 0x900: case 0xc00: case 0xd00:     /* WAVG2 */
1707         wrd = (insn >> 12) & 0xf;
1708         rd0 = (insn >> 16) & 0xf;
1709         rd1 = (insn >> 0) & 0xf;
1710         gen_op_iwmmxt_movq_M0_wRn(rd0);
1711         if (insn & (1 << 22)) {
1712             if (insn & (1 << 20))
1713                 gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1714             else
1715                 gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1716         } else {
1717             if (insn & (1 << 20))
1718                 gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1719             else
1720                 gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1721         }
1722         gen_op_iwmmxt_movq_wRn_M0(wrd);
1723         gen_op_iwmmxt_set_mup();
1724         gen_op_iwmmxt_set_cup();
1725         break;
1726     case 0x802: case 0x902: case 0xa02: case 0xb02:     /* WALIGNR */
1727         wrd = (insn >> 12) & 0xf;
1728         rd0 = (insn >> 16) & 0xf;
1729         rd1 = (insn >> 0) & 0xf;
1730         gen_op_iwmmxt_movq_M0_wRn(rd0);
1731         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1732         tcg_gen_andi_i32(tmp, tmp, 7);
1733         iwmmxt_load_reg(cpu_V1, rd1);
1734         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1735         tcg_temp_free_i32(tmp);
1736         gen_op_iwmmxt_movq_wRn_M0(wrd);
1737         gen_op_iwmmxt_set_mup();
1738         break;
1739     case 0x601: case 0x605: case 0x609: case 0x60d:     /* TINSR */
1740         if (((insn >> 6) & 3) == 3)
1741             return 1;
1742         rd = (insn >> 12) & 0xf;
1743         wrd = (insn >> 16) & 0xf;
1744         tmp = load_reg(s, rd);
1745         gen_op_iwmmxt_movq_M0_wRn(wrd);
1746         switch ((insn >> 6) & 3) {
1747         case 0:
1748             tmp2 = tcg_const_i32(0xff);
1749             tmp3 = tcg_const_i32((insn & 7) << 3);
1750             break;
1751         case 1:
1752             tmp2 = tcg_const_i32(0xffff);
1753             tmp3 = tcg_const_i32((insn & 3) << 4);
1754             break;
1755         case 2:
1756             tmp2 = tcg_const_i32(0xffffffff);
1757             tmp3 = tcg_const_i32((insn & 1) << 5);
1758             break;
1759         default:
1760             tmp2 = NULL;
1761             tmp3 = NULL;
1762         }
1763         gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1764         tcg_temp_free_i32(tmp3);
1765         tcg_temp_free_i32(tmp2);
1766         tcg_temp_free_i32(tmp);
1767         gen_op_iwmmxt_movq_wRn_M0(wrd);
1768         gen_op_iwmmxt_set_mup();
1769         break;
1770     case 0x107: case 0x507: case 0x907: case 0xd07:     /* TEXTRM */
1771         rd = (insn >> 12) & 0xf;
1772         wrd = (insn >> 16) & 0xf;
1773         if (rd == 15 || ((insn >> 22) & 3) == 3)
1774             return 1;
1775         gen_op_iwmmxt_movq_M0_wRn(wrd);
1776         tmp = tcg_temp_new_i32();
1777         switch ((insn >> 22) & 3) {
1778         case 0:
1779             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1780             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1781             if (insn & 8) {
1782                 tcg_gen_ext8s_i32(tmp, tmp);
1783             } else {
1784                 tcg_gen_andi_i32(tmp, tmp, 0xff);
1785             }
1786             break;
1787         case 1:
1788             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1789             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1790             if (insn & 8) {
1791                 tcg_gen_ext16s_i32(tmp, tmp);
1792             } else {
1793                 tcg_gen_andi_i32(tmp, tmp, 0xffff);
1794             }
1795             break;
1796         case 2:
1797             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1798             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1799             break;
1800         }
1801         store_reg(s, rd, tmp);
1802         break;
1803     case 0x117: case 0x517: case 0x917: case 0xd17:     /* TEXTRC */
1804         if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1805             return 1;
1806         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1807         switch ((insn >> 22) & 3) {
1808         case 0:
1809             tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
1810             break;
1811         case 1:
1812             tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
1813             break;
1814         case 2:
1815             tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
1816             break;
1817         }
1818         tcg_gen_shli_i32(tmp, tmp, 28);
1819         gen_set_nzcv(tmp);
1820         tcg_temp_free_i32(tmp);
1821         break;
1822     case 0x401: case 0x405: case 0x409: case 0x40d:     /* TBCST */
1823         if (((insn >> 6) & 3) == 3)
1824             return 1;
1825         rd = (insn >> 12) & 0xf;
1826         wrd = (insn >> 16) & 0xf;
1827         tmp = load_reg(s, rd);
1828         switch ((insn >> 6) & 3) {
1829         case 0:
1830             gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
1831             break;
1832         case 1:
1833             gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
1834             break;
1835         case 2:
1836             gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
1837             break;
1838         }
1839         tcg_temp_free_i32(tmp);
1840         gen_op_iwmmxt_movq_wRn_M0(wrd);
1841         gen_op_iwmmxt_set_mup();
1842         break;
1843     case 0x113: case 0x513: case 0x913: case 0xd13:     /* TANDC */
1844         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1845             return 1;
1846         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1847         tmp2 = tcg_temp_new_i32();
1848         tcg_gen_mov_i32(tmp2, tmp);
1849         switch ((insn >> 22) & 3) {
1850         case 0:
1851             for (i = 0; i < 7; i ++) {
1852                 tcg_gen_shli_i32(tmp2, tmp2, 4);
1853                 tcg_gen_and_i32(tmp, tmp, tmp2);
1854             }
1855             break;
1856         case 1:
1857             for (i = 0; i < 3; i ++) {
1858                 tcg_gen_shli_i32(tmp2, tmp2, 8);
1859                 tcg_gen_and_i32(tmp, tmp, tmp2);
1860             }
1861             break;
1862         case 2:
1863             tcg_gen_shli_i32(tmp2, tmp2, 16);
1864             tcg_gen_and_i32(tmp, tmp, tmp2);
1865             break;
1866         }
1867         gen_set_nzcv(tmp);
1868         tcg_temp_free_i32(tmp2);
1869         tcg_temp_free_i32(tmp);
1870         break;
1871     case 0x01c: case 0x41c: case 0x81c: case 0xc1c:     /* WACC */
1872         wrd = (insn >> 12) & 0xf;
1873         rd0 = (insn >> 16) & 0xf;
1874         gen_op_iwmmxt_movq_M0_wRn(rd0);
1875         switch ((insn >> 22) & 3) {
1876         case 0:
1877             gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
1878             break;
1879         case 1:
1880             gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
1881             break;
1882         case 2:
1883             gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
1884             break;
1885         case 3:
1886             return 1;
1887         }
1888         gen_op_iwmmxt_movq_wRn_M0(wrd);
1889         gen_op_iwmmxt_set_mup();
1890         break;
1891     case 0x115: case 0x515: case 0x915: case 0xd15:     /* TORC */
1892         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1893             return 1;
1894         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1895         tmp2 = tcg_temp_new_i32();
1896         tcg_gen_mov_i32(tmp2, tmp);
1897         switch ((insn >> 22) & 3) {
1898         case 0:
1899             for (i = 0; i < 7; i ++) {
1900                 tcg_gen_shli_i32(tmp2, tmp2, 4);
1901                 tcg_gen_or_i32(tmp, tmp, tmp2);
1902             }
1903             break;
1904         case 1:
1905             for (i = 0; i < 3; i ++) {
1906                 tcg_gen_shli_i32(tmp2, tmp2, 8);
1907                 tcg_gen_or_i32(tmp, tmp, tmp2);
1908             }
1909             break;
1910         case 2:
1911             tcg_gen_shli_i32(tmp2, tmp2, 16);
1912             tcg_gen_or_i32(tmp, tmp, tmp2);
1913             break;
1914         }
1915         gen_set_nzcv(tmp);
1916         tcg_temp_free_i32(tmp2);
1917         tcg_temp_free_i32(tmp);
1918         break;
1919     case 0x103: case 0x503: case 0x903: case 0xd03:     /* TMOVMSK */
1920         rd = (insn >> 12) & 0xf;
1921         rd0 = (insn >> 16) & 0xf;
1922         if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
1923             return 1;
1924         gen_op_iwmmxt_movq_M0_wRn(rd0);
1925         tmp = tcg_temp_new_i32();
1926         switch ((insn >> 22) & 3) {
1927         case 0:
1928             gen_helper_iwmmxt_msbb(tmp, cpu_M0);
1929             break;
1930         case 1:
1931             gen_helper_iwmmxt_msbw(tmp, cpu_M0);
1932             break;
1933         case 2:
1934             gen_helper_iwmmxt_msbl(tmp, cpu_M0);
1935             break;
1936         }
1937         store_reg(s, rd, tmp);
1938         break;
1939     case 0x106: case 0x306: case 0x506: case 0x706:     /* WCMPGT */
1940     case 0x906: case 0xb06: case 0xd06: case 0xf06:
1941         wrd = (insn >> 12) & 0xf;
1942         rd0 = (insn >> 16) & 0xf;
1943         rd1 = (insn >> 0) & 0xf;
1944         gen_op_iwmmxt_movq_M0_wRn(rd0);
1945         switch ((insn >> 22) & 3) {
1946         case 0:
1947             if (insn & (1 << 21))
1948                 gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
1949             else
1950                 gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
1951             break;
1952         case 1:
1953             if (insn & (1 << 21))
1954                 gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
1955             else
1956                 gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
1957             break;
1958         case 2:
1959             if (insn & (1 << 21))
1960                 gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
1961             else
1962                 gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
1963             break;
1964         case 3:
1965             return 1;
1966         }
1967         gen_op_iwmmxt_movq_wRn_M0(wrd);
1968         gen_op_iwmmxt_set_mup();
1969         gen_op_iwmmxt_set_cup();
1970         break;
1971     case 0x00e: case 0x20e: case 0x40e: case 0x60e:     /* WUNPCKEL */
1972     case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
1973         wrd = (insn >> 12) & 0xf;
1974         rd0 = (insn >> 16) & 0xf;
1975         gen_op_iwmmxt_movq_M0_wRn(rd0);
1976         switch ((insn >> 22) & 3) {
1977         case 0:
1978             if (insn & (1 << 21))
1979                 gen_op_iwmmxt_unpacklsb_M0();
1980             else
1981                 gen_op_iwmmxt_unpacklub_M0();
1982             break;
1983         case 1:
1984             if (insn & (1 << 21))
1985                 gen_op_iwmmxt_unpacklsw_M0();
1986             else
1987                 gen_op_iwmmxt_unpackluw_M0();
1988             break;
1989         case 2:
1990             if (insn & (1 << 21))
1991                 gen_op_iwmmxt_unpacklsl_M0();
1992             else
1993                 gen_op_iwmmxt_unpacklul_M0();
1994             break;
1995         case 3:
1996             return 1;
1997         }
1998         gen_op_iwmmxt_movq_wRn_M0(wrd);
1999         gen_op_iwmmxt_set_mup();
2000         gen_op_iwmmxt_set_cup();
2001         break;
2002     case 0x00c: case 0x20c: case 0x40c: case 0x60c:     /* WUNPCKEH */
2003     case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2004         wrd = (insn >> 12) & 0xf;
2005         rd0 = (insn >> 16) & 0xf;
2006         gen_op_iwmmxt_movq_M0_wRn(rd0);
2007         switch ((insn >> 22) & 3) {
2008         case 0:
2009             if (insn & (1 << 21))
2010                 gen_op_iwmmxt_unpackhsb_M0();
2011             else
2012                 gen_op_iwmmxt_unpackhub_M0();
2013             break;
2014         case 1:
2015             if (insn & (1 << 21))
2016                 gen_op_iwmmxt_unpackhsw_M0();
2017             else
2018                 gen_op_iwmmxt_unpackhuw_M0();
2019             break;
2020         case 2:
2021             if (insn & (1 << 21))
2022                 gen_op_iwmmxt_unpackhsl_M0();
2023             else
2024                 gen_op_iwmmxt_unpackhul_M0();
2025             break;
2026         case 3:
2027             return 1;
2028         }
2029         gen_op_iwmmxt_movq_wRn_M0(wrd);
2030         gen_op_iwmmxt_set_mup();
2031         gen_op_iwmmxt_set_cup();
2032         break;
2033     case 0x204: case 0x604: case 0xa04: case 0xe04:     /* WSRL */
2034     case 0x214: case 0x614: case 0xa14: case 0xe14:
2035         if (((insn >> 22) & 3) == 0)
2036             return 1;
2037         wrd = (insn >> 12) & 0xf;
2038         rd0 = (insn >> 16) & 0xf;
2039         gen_op_iwmmxt_movq_M0_wRn(rd0);
2040         tmp = tcg_temp_new_i32();
2041         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2042             tcg_temp_free_i32(tmp);
2043             return 1;
2044         }
2045         switch ((insn >> 22) & 3) {
2046         case 1:
2047             gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2048             break;
2049         case 2:
2050             gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2051             break;
2052         case 3:
2053             gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2054             break;
2055         }
2056         tcg_temp_free_i32(tmp);
2057         gen_op_iwmmxt_movq_wRn_M0(wrd);
2058         gen_op_iwmmxt_set_mup();
2059         gen_op_iwmmxt_set_cup();
2060         break;
2061     case 0x004: case 0x404: case 0x804: case 0xc04:     /* WSRA */
2062     case 0x014: case 0x414: case 0x814: case 0xc14:
2063         if (((insn >> 22) & 3) == 0)
2064             return 1;
2065         wrd = (insn >> 12) & 0xf;
2066         rd0 = (insn >> 16) & 0xf;
2067         gen_op_iwmmxt_movq_M0_wRn(rd0);
2068         tmp = tcg_temp_new_i32();
2069         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2070             tcg_temp_free_i32(tmp);
2071             return 1;
2072         }
2073         switch ((insn >> 22) & 3) {
2074         case 1:
2075             gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2076             break;
2077         case 2:
2078             gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2079             break;
2080         case 3:
2081             gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2082             break;
2083         }
2084         tcg_temp_free_i32(tmp);
2085         gen_op_iwmmxt_movq_wRn_M0(wrd);
2086         gen_op_iwmmxt_set_mup();
2087         gen_op_iwmmxt_set_cup();
2088         break;
2089     case 0x104: case 0x504: case 0x904: case 0xd04:     /* WSLL */
2090     case 0x114: case 0x514: case 0x914: case 0xd14:
2091         if (((insn >> 22) & 3) == 0)
2092             return 1;
2093         wrd = (insn >> 12) & 0xf;
2094         rd0 = (insn >> 16) & 0xf;
2095         gen_op_iwmmxt_movq_M0_wRn(rd0);
2096         tmp = tcg_temp_new_i32();
2097         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2098             tcg_temp_free_i32(tmp);
2099             return 1;
2100         }
2101         switch ((insn >> 22) & 3) {
2102         case 1:
2103             gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2104             break;
2105         case 2:
2106             gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2107             break;
2108         case 3:
2109             gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2110             break;
2111         }
2112         tcg_temp_free_i32(tmp);
2113         gen_op_iwmmxt_movq_wRn_M0(wrd);
2114         gen_op_iwmmxt_set_mup();
2115         gen_op_iwmmxt_set_cup();
2116         break;
2117     case 0x304: case 0x704: case 0xb04: case 0xf04:     /* WROR */
2118     case 0x314: case 0x714: case 0xb14: case 0xf14:
2119         if (((insn >> 22) & 3) == 0)
2120             return 1;
2121         wrd = (insn >> 12) & 0xf;
2122         rd0 = (insn >> 16) & 0xf;
2123         gen_op_iwmmxt_movq_M0_wRn(rd0);
2124         tmp = tcg_temp_new_i32();
2125         switch ((insn >> 22) & 3) {
2126         case 1:
2127             if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2128                 tcg_temp_free_i32(tmp);
2129                 return 1;
2130             }
2131             gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2132             break;
2133         case 2:
2134             if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2135                 tcg_temp_free_i32(tmp);
2136                 return 1;
2137             }
2138             gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2139             break;
2140         case 3:
2141             if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2142                 tcg_temp_free_i32(tmp);
2143                 return 1;
2144             }
2145             gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2146             break;
2147         }
2148         tcg_temp_free_i32(tmp);
2149         gen_op_iwmmxt_movq_wRn_M0(wrd);
2150         gen_op_iwmmxt_set_mup();
2151         gen_op_iwmmxt_set_cup();
2152         break;
2153     case 0x116: case 0x316: case 0x516: case 0x716:     /* WMIN */
2154     case 0x916: case 0xb16: case 0xd16: case 0xf16:
2155         wrd = (insn >> 12) & 0xf;
2156         rd0 = (insn >> 16) & 0xf;
2157         rd1 = (insn >> 0) & 0xf;
2158         gen_op_iwmmxt_movq_M0_wRn(rd0);
2159         switch ((insn >> 22) & 3) {
2160         case 0:
2161             if (insn & (1 << 21))
2162                 gen_op_iwmmxt_minsb_M0_wRn(rd1);
2163             else
2164                 gen_op_iwmmxt_minub_M0_wRn(rd1);
2165             break;
2166         case 1:
2167             if (insn & (1 << 21))
2168                 gen_op_iwmmxt_minsw_M0_wRn(rd1);
2169             else
2170                 gen_op_iwmmxt_minuw_M0_wRn(rd1);
2171             break;
2172         case 2:
2173             if (insn & (1 << 21))
2174                 gen_op_iwmmxt_minsl_M0_wRn(rd1);
2175             else
2176                 gen_op_iwmmxt_minul_M0_wRn(rd1);
2177             break;
2178         case 3:
2179             return 1;
2180         }
2181         gen_op_iwmmxt_movq_wRn_M0(wrd);
2182         gen_op_iwmmxt_set_mup();
2183         break;
2184     case 0x016: case 0x216: case 0x416: case 0x616:     /* WMAX */
2185     case 0x816: case 0xa16: case 0xc16: case 0xe16:
2186         wrd = (insn >> 12) & 0xf;
2187         rd0 = (insn >> 16) & 0xf;
2188         rd1 = (insn >> 0) & 0xf;
2189         gen_op_iwmmxt_movq_M0_wRn(rd0);
2190         switch ((insn >> 22) & 3) {
2191         case 0:
2192             if (insn & (1 << 21))
2193                 gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2194             else
2195                 gen_op_iwmmxt_maxub_M0_wRn(rd1);
2196             break;
2197         case 1:
2198             if (insn & (1 << 21))
2199                 gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2200             else
2201                 gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2202             break;
2203         case 2:
2204             if (insn & (1 << 21))
2205                 gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2206             else
2207                 gen_op_iwmmxt_maxul_M0_wRn(rd1);
2208             break;
2209         case 3:
2210             return 1;
2211         }
2212         gen_op_iwmmxt_movq_wRn_M0(wrd);
2213         gen_op_iwmmxt_set_mup();
2214         break;
2215     case 0x002: case 0x102: case 0x202: case 0x302:     /* WALIGNI */
2216     case 0x402: case 0x502: case 0x602: case 0x702:
2217         wrd = (insn >> 12) & 0xf;
2218         rd0 = (insn >> 16) & 0xf;
2219         rd1 = (insn >> 0) & 0xf;
2220         gen_op_iwmmxt_movq_M0_wRn(rd0);
2221         tmp = tcg_const_i32((insn >> 20) & 3);
2222         iwmmxt_load_reg(cpu_V1, rd1);
2223         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
2224         tcg_temp_free_i32(tmp);
2225         gen_op_iwmmxt_movq_wRn_M0(wrd);
2226         gen_op_iwmmxt_set_mup();
2227         break;
2228     case 0x01a: case 0x11a: case 0x21a: case 0x31a:     /* WSUB */
2229     case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2230     case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2231     case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2232         wrd = (insn >> 12) & 0xf;
2233         rd0 = (insn >> 16) & 0xf;
2234         rd1 = (insn >> 0) & 0xf;
2235         gen_op_iwmmxt_movq_M0_wRn(rd0);
2236         switch ((insn >> 20) & 0xf) {
2237         case 0x0:
2238             gen_op_iwmmxt_subnb_M0_wRn(rd1);
2239             break;
2240         case 0x1:
2241             gen_op_iwmmxt_subub_M0_wRn(rd1);
2242             break;
2243         case 0x3:
2244             gen_op_iwmmxt_subsb_M0_wRn(rd1);
2245             break;
2246         case 0x4:
2247             gen_op_iwmmxt_subnw_M0_wRn(rd1);
2248             break;
2249         case 0x5:
2250             gen_op_iwmmxt_subuw_M0_wRn(rd1);
2251             break;
2252         case 0x7:
2253             gen_op_iwmmxt_subsw_M0_wRn(rd1);
2254             break;
2255         case 0x8:
2256             gen_op_iwmmxt_subnl_M0_wRn(rd1);
2257             break;
2258         case 0x9:
2259             gen_op_iwmmxt_subul_M0_wRn(rd1);
2260             break;
2261         case 0xb:
2262             gen_op_iwmmxt_subsl_M0_wRn(rd1);
2263             break;
2264         default:
2265             return 1;
2266         }
2267         gen_op_iwmmxt_movq_wRn_M0(wrd);
2268         gen_op_iwmmxt_set_mup();
2269         gen_op_iwmmxt_set_cup();
2270         break;
2271     case 0x01e: case 0x11e: case 0x21e: case 0x31e:     /* WSHUFH */
2272     case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2273     case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2274     case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2275         wrd = (insn >> 12) & 0xf;
2276         rd0 = (insn >> 16) & 0xf;
2277         gen_op_iwmmxt_movq_M0_wRn(rd0);
2278         tmp = tcg_const_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2279         gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2280         tcg_temp_free_i32(tmp);
2281         gen_op_iwmmxt_movq_wRn_M0(wrd);
2282         gen_op_iwmmxt_set_mup();
2283         gen_op_iwmmxt_set_cup();
2284         break;
2285     case 0x018: case 0x118: case 0x218: case 0x318:     /* WADD */
2286     case 0x418: case 0x518: case 0x618: case 0x718:
2287     case 0x818: case 0x918: case 0xa18: case 0xb18:
2288     case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2289         wrd = (insn >> 12) & 0xf;
2290         rd0 = (insn >> 16) & 0xf;
2291         rd1 = (insn >> 0) & 0xf;
2292         gen_op_iwmmxt_movq_M0_wRn(rd0);
2293         switch ((insn >> 20) & 0xf) {
2294         case 0x0:
2295             gen_op_iwmmxt_addnb_M0_wRn(rd1);
2296             break;
2297         case 0x1:
2298             gen_op_iwmmxt_addub_M0_wRn(rd1);
2299             break;
2300         case 0x3:
2301             gen_op_iwmmxt_addsb_M0_wRn(rd1);
2302             break;
2303         case 0x4:
2304             gen_op_iwmmxt_addnw_M0_wRn(rd1);
2305             break;
2306         case 0x5:
2307             gen_op_iwmmxt_adduw_M0_wRn(rd1);
2308             break;
2309         case 0x7:
2310             gen_op_iwmmxt_addsw_M0_wRn(rd1);
2311             break;
2312         case 0x8:
2313             gen_op_iwmmxt_addnl_M0_wRn(rd1);
2314             break;
2315         case 0x9:
2316             gen_op_iwmmxt_addul_M0_wRn(rd1);
2317             break;
2318         case 0xb:
2319             gen_op_iwmmxt_addsl_M0_wRn(rd1);
2320             break;
2321         default:
2322             return 1;
2323         }
2324         gen_op_iwmmxt_movq_wRn_M0(wrd);
2325         gen_op_iwmmxt_set_mup();
2326         gen_op_iwmmxt_set_cup();
2327         break;
2328     case 0x008: case 0x108: case 0x208: case 0x308:     /* WPACK */
2329     case 0x408: case 0x508: case 0x608: case 0x708:
2330     case 0x808: case 0x908: case 0xa08: case 0xb08:
2331     case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2332         if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2333             return 1;
2334         wrd = (insn >> 12) & 0xf;
2335         rd0 = (insn >> 16) & 0xf;
2336         rd1 = (insn >> 0) & 0xf;
2337         gen_op_iwmmxt_movq_M0_wRn(rd0);
2338         switch ((insn >> 22) & 3) {
2339         case 1:
2340             if (insn & (1 << 21))
2341                 gen_op_iwmmxt_packsw_M0_wRn(rd1);
2342             else
2343                 gen_op_iwmmxt_packuw_M0_wRn(rd1);
2344             break;
2345         case 2:
2346             if (insn & (1 << 21))
2347                 gen_op_iwmmxt_packsl_M0_wRn(rd1);
2348             else
2349                 gen_op_iwmmxt_packul_M0_wRn(rd1);
2350             break;
2351         case 3:
2352             if (insn & (1 << 21))
2353                 gen_op_iwmmxt_packsq_M0_wRn(rd1);
2354             else
2355                 gen_op_iwmmxt_packuq_M0_wRn(rd1);
2356             break;
2357         }
2358         gen_op_iwmmxt_movq_wRn_M0(wrd);
2359         gen_op_iwmmxt_set_mup();
2360         gen_op_iwmmxt_set_cup();
2361         break;
2362     case 0x201: case 0x203: case 0x205: case 0x207:
2363     case 0x209: case 0x20b: case 0x20d: case 0x20f:
2364     case 0x211: case 0x213: case 0x215: case 0x217:
2365     case 0x219: case 0x21b: case 0x21d: case 0x21f:
2366         wrd = (insn >> 5) & 0xf;
2367         rd0 = (insn >> 12) & 0xf;
2368         rd1 = (insn >> 0) & 0xf;
2369         if (rd0 == 0xf || rd1 == 0xf)
2370             return 1;
2371         gen_op_iwmmxt_movq_M0_wRn(wrd);
2372         tmp = load_reg(s, rd0);
2373         tmp2 = load_reg(s, rd1);
2374         switch ((insn >> 16) & 0xf) {
2375         case 0x0:                                       /* TMIA */
2376             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2377             break;
2378         case 0x8:                                       /* TMIAPH */
2379             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2380             break;
2381         case 0xc: case 0xd: case 0xe: case 0xf:                 /* TMIAxy */
2382             if (insn & (1 << 16))
2383                 tcg_gen_shri_i32(tmp, tmp, 16);
2384             if (insn & (1 << 17))
2385                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2386             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2387             break;
2388         default:
2389             tcg_temp_free_i32(tmp2);
2390             tcg_temp_free_i32(tmp);
2391             return 1;
2392         }
2393         tcg_temp_free_i32(tmp2);
2394         tcg_temp_free_i32(tmp);
2395         gen_op_iwmmxt_movq_wRn_M0(wrd);
2396         gen_op_iwmmxt_set_mup();
2397         break;
2398     default:
2399         return 1;
2400     }
2401
2402     return 0;
2403 }
2404
2405 /* Disassemble an XScale DSP instruction.  Returns nonzero if an error occurred
2406    (ie. an undefined instruction).  */
2407 static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2408 {
2409     int acc, rd0, rd1, rdhi, rdlo;
2410     TCGv_i32 tmp, tmp2;
2411
2412     if ((insn & 0x0ff00f10) == 0x0e200010) {
2413         /* Multiply with Internal Accumulate Format */
2414         rd0 = (insn >> 12) & 0xf;
2415         rd1 = insn & 0xf;
2416         acc = (insn >> 5) & 7;
2417
2418         if (acc != 0)
2419             return 1;
2420
2421         tmp = load_reg(s, rd0);
2422         tmp2 = load_reg(s, rd1);
2423         switch ((insn >> 16) & 0xf) {
2424         case 0x0:                                       /* MIA */
2425             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2426             break;
2427         case 0x8:                                       /* MIAPH */
2428             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2429             break;
2430         case 0xc:                                       /* MIABB */
2431         case 0xd:                                       /* MIABT */
2432         case 0xe:                                       /* MIATB */
2433         case 0xf:                                       /* MIATT */
2434             if (insn & (1 << 16))
2435                 tcg_gen_shri_i32(tmp, tmp, 16);
2436             if (insn & (1 << 17))
2437                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2438             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2439             break;
2440         default:
2441             return 1;
2442         }
2443         tcg_temp_free_i32(tmp2);
2444         tcg_temp_free_i32(tmp);
2445
2446         gen_op_iwmmxt_movq_wRn_M0(acc);
2447         return 0;
2448     }
2449
2450     if ((insn & 0x0fe00ff8) == 0x0c400000) {
2451         /* Internal Accumulator Access Format */
2452         rdhi = (insn >> 16) & 0xf;
2453         rdlo = (insn >> 12) & 0xf;
2454         acc = insn & 7;
2455
2456         if (acc != 0)
2457             return 1;
2458
2459         if (insn & ARM_CP_RW_BIT) {                     /* MRA */
2460             iwmmxt_load_reg(cpu_V0, acc);
2461             tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2462             tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
2463             tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2464         } else {                                        /* MAR */
2465             tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2466             iwmmxt_store_reg(cpu_V0, acc);
2467         }
2468         return 0;
2469     }
2470
2471     return 1;
2472 }
2473
2474 #define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n))
2475 #define VFP_DREG(reg, insn, bigbit, smallbit) do { \
2476     if (dc_isar_feature(aa32_simd_r32, s)) { \
2477         reg = (((insn) >> (bigbit)) & 0x0f) \
2478               | (((insn) >> ((smallbit) - 4)) & 0x10); \
2479     } else { \
2480         if (insn & (1 << (smallbit))) \
2481             return 1; \
2482         reg = ((insn) >> (bigbit)) & 0x0f; \
2483     }} while (0)
2484
2485 #define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22)
2486 #define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16,  7)
2487 #define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn,  0,  5)
2488
2489 static inline bool use_goto_tb(DisasContext *s, target_ulong dest)
2490 {
2491 #ifndef CONFIG_USER_ONLY
2492     return (s->base.tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK) ||
2493            ((s->base.pc_next - 1) & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
2494 #else
2495     return true;
2496 #endif
2497 }
2498
2499 static void gen_goto_ptr(void)
2500 {
2501     tcg_gen_lookup_and_goto_ptr();
2502 }
2503
2504 /* This will end the TB but doesn't guarantee we'll return to
2505  * cpu_loop_exec. Any live exit_requests will be processed as we
2506  * enter the next TB.
2507  */
2508 static void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
2509 {
2510     if (use_goto_tb(s, dest)) {
2511         tcg_gen_goto_tb(n);
2512         gen_set_pc_im(s, dest);
2513         tcg_gen_exit_tb(s->base.tb, n);
2514     } else {
2515         gen_set_pc_im(s, dest);
2516         gen_goto_ptr();
2517     }
2518     s->base.is_jmp = DISAS_NORETURN;
2519 }
2520
2521 static inline void gen_jmp (DisasContext *s, uint32_t dest)
2522 {
2523     if (unlikely(is_singlestepping(s))) {
2524         /* An indirect jump so that we still trigger the debug exception.  */
2525         gen_set_pc_im(s, dest);
2526         s->base.is_jmp = DISAS_JUMP;
2527     } else {
2528         gen_goto_tb(s, 0, dest);
2529     }
2530 }
2531
2532 static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
2533 {
2534     if (x)
2535         tcg_gen_sari_i32(t0, t0, 16);
2536     else
2537         gen_sxth(t0);
2538     if (y)
2539         tcg_gen_sari_i32(t1, t1, 16);
2540     else
2541         gen_sxth(t1);
2542     tcg_gen_mul_i32(t0, t0, t1);
2543 }
2544
2545 /* Return the mask of PSR bits set by a MSR instruction.  */
2546 static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
2547 {
2548     uint32_t mask = 0;
2549
2550     if (flags & (1 << 0)) {
2551         mask |= 0xff;
2552     }
2553     if (flags & (1 << 1)) {
2554         mask |= 0xff00;
2555     }
2556     if (flags & (1 << 2)) {
2557         mask |= 0xff0000;
2558     }
2559     if (flags & (1 << 3)) {
2560         mask |= 0xff000000;
2561     }
2562
2563     /* Mask out undefined and reserved bits.  */
2564     mask &= aarch32_cpsr_valid_mask(s->features, s->isar);
2565
2566     /* Mask out execution state.  */
2567     if (!spsr) {
2568         mask &= ~CPSR_EXEC;
2569     }
2570
2571     /* Mask out privileged bits.  */
2572     if (IS_USER(s)) {
2573         mask &= CPSR_USER;
2574     }
2575     return mask;
2576 }
2577
2578 /* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
2579 static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
2580 {
2581     TCGv_i32 tmp;
2582     if (spsr) {
2583         /* ??? This is also undefined in system mode.  */
2584         if (IS_USER(s))
2585             return 1;
2586
2587         tmp = load_cpu_field(spsr);
2588         tcg_gen_andi_i32(tmp, tmp, ~mask);
2589         tcg_gen_andi_i32(t0, t0, mask);
2590         tcg_gen_or_i32(tmp, tmp, t0);
2591         store_cpu_field(tmp, spsr);
2592     } else {
2593         gen_set_cpsr(t0, mask);
2594     }
2595     tcg_temp_free_i32(t0);
2596     gen_lookup_tb(s);
2597     return 0;
2598 }
2599
2600 /* Returns nonzero if access to the PSR is not permitted.  */
2601 static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
2602 {
2603     TCGv_i32 tmp;
2604     tmp = tcg_temp_new_i32();
2605     tcg_gen_movi_i32(tmp, val);
2606     return gen_set_psr(s, mask, spsr, tmp);
2607 }
2608
2609 static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
2610                                      int *tgtmode, int *regno)
2611 {
2612     /* Decode the r and sysm fields of MSR/MRS banked accesses into
2613      * the target mode and register number, and identify the various
2614      * unpredictable cases.
2615      * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
2616      *  + executed in user mode
2617      *  + using R15 as the src/dest register
2618      *  + accessing an unimplemented register
2619      *  + accessing a register that's inaccessible at current PL/security state*
2620      *  + accessing a register that you could access with a different insn
2621      * We choose to UNDEF in all these cases.
2622      * Since we don't know which of the various AArch32 modes we are in
2623      * we have to defer some checks to runtime.
2624      * Accesses to Monitor mode registers from Secure EL1 (which implies
2625      * that EL3 is AArch64) must trap to EL3.
2626      *
2627      * If the access checks fail this function will emit code to take
2628      * an exception and return false. Otherwise it will return true,
2629      * and set *tgtmode and *regno appropriately.
2630      */
2631     int exc_target = default_exception_el(s);
2632
2633     /* These instructions are present only in ARMv8, or in ARMv7 with the
2634      * Virtualization Extensions.
2635      */
2636     if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
2637         !arm_dc_feature(s, ARM_FEATURE_EL2)) {
2638         goto undef;
2639     }
2640
2641     if (IS_USER(s) || rn == 15) {
2642         goto undef;
2643     }
2644
2645     /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
2646      * of registers into (r, sysm).
2647      */
2648     if (r) {
2649         /* SPSRs for other modes */
2650         switch (sysm) {
2651         case 0xe: /* SPSR_fiq */
2652             *tgtmode = ARM_CPU_MODE_FIQ;
2653             break;
2654         case 0x10: /* SPSR_irq */
2655             *tgtmode = ARM_CPU_MODE_IRQ;
2656             break;
2657         case 0x12: /* SPSR_svc */
2658             *tgtmode = ARM_CPU_MODE_SVC;
2659             break;
2660         case 0x14: /* SPSR_abt */
2661             *tgtmode = ARM_CPU_MODE_ABT;
2662             break;
2663         case 0x16: /* SPSR_und */
2664             *tgtmode = ARM_CPU_MODE_UND;
2665             break;
2666         case 0x1c: /* SPSR_mon */
2667             *tgtmode = ARM_CPU_MODE_MON;
2668             break;
2669         case 0x1e: /* SPSR_hyp */
2670             *tgtmode = ARM_CPU_MODE_HYP;
2671             break;
2672         default: /* unallocated */
2673             goto undef;
2674         }
2675         /* We arbitrarily assign SPSR a register number of 16. */
2676         *regno = 16;
2677     } else {
2678         /* general purpose registers for other modes */
2679         switch (sysm) {
2680         case 0x0 ... 0x6:   /* 0b00xxx : r8_usr ... r14_usr */
2681             *tgtmode = ARM_CPU_MODE_USR;
2682             *regno = sysm + 8;
2683             break;
2684         case 0x8 ... 0xe:   /* 0b01xxx : r8_fiq ... r14_fiq */
2685             *tgtmode = ARM_CPU_MODE_FIQ;
2686             *regno = sysm;
2687             break;
2688         case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
2689             *tgtmode = ARM_CPU_MODE_IRQ;
2690             *regno = sysm & 1 ? 13 : 14;
2691             break;
2692         case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
2693             *tgtmode = ARM_CPU_MODE_SVC;
2694             *regno = sysm & 1 ? 13 : 14;
2695             break;
2696         case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
2697             *tgtmode = ARM_CPU_MODE_ABT;
2698             *regno = sysm & 1 ? 13 : 14;
2699             break;
2700         case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
2701             *tgtmode = ARM_CPU_MODE_UND;
2702             *regno = sysm & 1 ? 13 : 14;
2703             break;
2704         case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
2705             *tgtmode = ARM_CPU_MODE_MON;
2706             *regno = sysm & 1 ? 13 : 14;
2707             break;
2708         case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
2709             *tgtmode = ARM_CPU_MODE_HYP;
2710             /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
2711             *regno = sysm & 1 ? 13 : 17;
2712             break;
2713         default: /* unallocated */
2714             goto undef;
2715         }
2716     }
2717
2718     /* Catch the 'accessing inaccessible register' cases we can detect
2719      * at translate time.
2720      */
2721     switch (*tgtmode) {
2722     case ARM_CPU_MODE_MON:
2723         if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
2724             goto undef;
2725         }
2726         if (s->current_el == 1) {
2727             /* If we're in Secure EL1 (which implies that EL3 is AArch64)
2728              * then accesses to Mon registers trap to EL3
2729              */
2730             exc_target = 3;
2731             goto undef;
2732         }
2733         break;
2734     case ARM_CPU_MODE_HYP:
2735         /*
2736          * SPSR_hyp and r13_hyp can only be accessed from Monitor mode
2737          * (and so we can forbid accesses from EL2 or below). elr_hyp
2738          * can be accessed also from Hyp mode, so forbid accesses from
2739          * EL0 or EL1.
2740          */
2741         if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
2742             (s->current_el < 3 && *regno != 17)) {
2743             goto undef;
2744         }
2745         break;
2746     default:
2747         break;
2748     }
2749
2750     return true;
2751
2752 undef:
2753     /* If we get here then some access check did not pass */
2754     gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
2755                        syn_uncategorized(), exc_target);
2756     return false;
2757 }
2758
2759 static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
2760 {
2761     TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
2762     int tgtmode = 0, regno = 0;
2763
2764     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2765         return;
2766     }
2767
2768     /* Sync state because msr_banked() can raise exceptions */
2769     gen_set_condexec(s);
2770     gen_set_pc_im(s, s->pc_curr);
2771     tcg_reg = load_reg(s, rn);
2772     tcg_tgtmode = tcg_const_i32(tgtmode);
2773     tcg_regno = tcg_const_i32(regno);
2774     gen_helper_msr_banked(cpu_env, tcg_reg, tcg_tgtmode, tcg_regno);
2775     tcg_temp_free_i32(tcg_tgtmode);
2776     tcg_temp_free_i32(tcg_regno);
2777     tcg_temp_free_i32(tcg_reg);
2778     s->base.is_jmp = DISAS_UPDATE_EXIT;
2779 }
2780
2781 static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
2782 {
2783     TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
2784     int tgtmode = 0, regno = 0;
2785
2786     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2787         return;
2788     }
2789
2790     /* Sync state because mrs_banked() can raise exceptions */
2791     gen_set_condexec(s);
2792     gen_set_pc_im(s, s->pc_curr);
2793     tcg_reg = tcg_temp_new_i32();
2794     tcg_tgtmode = tcg_const_i32(tgtmode);
2795     tcg_regno = tcg_const_i32(regno);
2796     gen_helper_mrs_banked(tcg_reg, cpu_env, tcg_tgtmode, tcg_regno);
2797     tcg_temp_free_i32(tcg_tgtmode);
2798     tcg_temp_free_i32(tcg_regno);
2799     store_reg(s, rn, tcg_reg);
2800     s->base.is_jmp = DISAS_UPDATE_EXIT;
2801 }
2802
2803 /* Store value to PC as for an exception return (ie don't
2804  * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
2805  * will do the masking based on the new value of the Thumb bit.
2806  */
2807 static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
2808 {
2809     tcg_gen_mov_i32(cpu_R[15], pc);
2810     tcg_temp_free_i32(pc);
2811 }
2812
2813 /* Generate a v6 exception return.  Marks both values as dead.  */
2814 static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
2815 {
2816     store_pc_exc_ret(s, pc);
2817     /* The cpsr_write_eret helper will mask the low bits of PC
2818      * appropriately depending on the new Thumb bit, so it must
2819      * be called after storing the new PC.
2820      */
2821     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
2822         gen_io_start();
2823     }
2824     gen_helper_cpsr_write_eret(cpu_env, cpsr);
2825     tcg_temp_free_i32(cpsr);
2826     /* Must exit loop to check un-masked IRQs */
2827     s->base.is_jmp = DISAS_EXIT;
2828 }
2829
2830 /* Generate an old-style exception return. Marks pc as dead. */
2831 static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
2832 {
2833     gen_rfe(s, pc, load_cpu_field(spsr));
2834 }
2835
2836 static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
2837                             uint32_t opr_sz, uint32_t max_sz,
2838                             gen_helper_gvec_3_ptr *fn)
2839 {
2840     TCGv_ptr qc_ptr = tcg_temp_new_ptr();
2841
2842     tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
2843     tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
2844                        opr_sz, max_sz, 0, fn);
2845     tcg_temp_free_ptr(qc_ptr);
2846 }
2847
2848 void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2849                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2850 {
2851     static gen_helper_gvec_3_ptr * const fns[2] = {
2852         gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
2853     };
2854     tcg_debug_assert(vece >= 1 && vece <= 2);
2855     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
2856 }
2857
2858 void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2859                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2860 {
2861     static gen_helper_gvec_3_ptr * const fns[2] = {
2862         gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
2863     };
2864     tcg_debug_assert(vece >= 1 && vece <= 2);
2865     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
2866 }
2867
2868 #define GEN_CMP0(NAME, COND)                                            \
2869     static void gen_##NAME##0_i32(TCGv_i32 d, TCGv_i32 a)               \
2870     {                                                                   \
2871         tcg_gen_setcondi_i32(COND, d, a, 0);                            \
2872         tcg_gen_neg_i32(d, d);                                          \
2873     }                                                                   \
2874     static void gen_##NAME##0_i64(TCGv_i64 d, TCGv_i64 a)               \
2875     {                                                                   \
2876         tcg_gen_setcondi_i64(COND, d, a, 0);                            \
2877         tcg_gen_neg_i64(d, d);                                          \
2878     }                                                                   \
2879     static void gen_##NAME##0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) \
2880     {                                                                   \
2881         TCGv_vec zero = tcg_const_zeros_vec_matching(d);                \
2882         tcg_gen_cmp_vec(COND, vece, d, a, zero);                        \
2883         tcg_temp_free_vec(zero);                                        \
2884     }                                                                   \
2885     void gen_gvec_##NAME##0(unsigned vece, uint32_t d, uint32_t m,      \
2886                             uint32_t opr_sz, uint32_t max_sz)           \
2887     {                                                                   \
2888         const GVecGen2 op[4] = {                                        \
2889             { .fno = gen_helper_gvec_##NAME##0_b,                       \
2890               .fniv = gen_##NAME##0_vec,                                \
2891               .opt_opc = vecop_list_cmp,                                \
2892               .vece = MO_8 },                                           \
2893             { .fno = gen_helper_gvec_##NAME##0_h,                       \
2894               .fniv = gen_##NAME##0_vec,                                \
2895               .opt_opc = vecop_list_cmp,                                \
2896               .vece = MO_16 },                                          \
2897             { .fni4 = gen_##NAME##0_i32,                                \
2898               .fniv = gen_##NAME##0_vec,                                \
2899               .opt_opc = vecop_list_cmp,                                \
2900               .vece = MO_32 },                                          \
2901             { .fni8 = gen_##NAME##0_i64,                                \
2902               .fniv = gen_##NAME##0_vec,                                \
2903               .opt_opc = vecop_list_cmp,                                \
2904               .prefer_i64 = TCG_TARGET_REG_BITS == 64,                  \
2905               .vece = MO_64 },                                          \
2906         };                                                              \
2907         tcg_gen_gvec_2(d, m, opr_sz, max_sz, &op[vece]);                \
2908     }
2909
2910 static const TCGOpcode vecop_list_cmp[] = {
2911     INDEX_op_cmp_vec, 0
2912 };
2913
2914 GEN_CMP0(ceq, TCG_COND_EQ)
2915 GEN_CMP0(cle, TCG_COND_LE)
2916 GEN_CMP0(cge, TCG_COND_GE)
2917 GEN_CMP0(clt, TCG_COND_LT)
2918 GEN_CMP0(cgt, TCG_COND_GT)
2919
2920 #undef GEN_CMP0
2921
2922 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
2923 {
2924     tcg_gen_vec_sar8i_i64(a, a, shift);
2925     tcg_gen_vec_add8_i64(d, d, a);
2926 }
2927
2928 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
2929 {
2930     tcg_gen_vec_sar16i_i64(a, a, shift);
2931     tcg_gen_vec_add16_i64(d, d, a);
2932 }
2933
2934 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
2935 {
2936     tcg_gen_sari_i32(a, a, shift);
2937     tcg_gen_add_i32(d, d, a);
2938 }
2939
2940 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
2941 {
2942     tcg_gen_sari_i64(a, a, shift);
2943     tcg_gen_add_i64(d, d, a);
2944 }
2945
2946 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
2947 {
2948     tcg_gen_sari_vec(vece, a, a, sh);
2949     tcg_gen_add_vec(vece, d, d, a);
2950 }
2951
2952 void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
2953                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
2954 {
2955     static const TCGOpcode vecop_list[] = {
2956         INDEX_op_sari_vec, INDEX_op_add_vec, 0
2957     };
2958     static const GVecGen2i ops[4] = {
2959         { .fni8 = gen_ssra8_i64,
2960           .fniv = gen_ssra_vec,
2961           .fno = gen_helper_gvec_ssra_b,
2962           .load_dest = true,
2963           .opt_opc = vecop_list,
2964           .vece = MO_8 },
2965         { .fni8 = gen_ssra16_i64,
2966           .fniv = gen_ssra_vec,
2967           .fno = gen_helper_gvec_ssra_h,
2968           .load_dest = true,
2969           .opt_opc = vecop_list,
2970           .vece = MO_16 },
2971         { .fni4 = gen_ssra32_i32,
2972           .fniv = gen_ssra_vec,
2973           .fno = gen_helper_gvec_ssra_s,
2974           .load_dest = true,
2975           .opt_opc = vecop_list,
2976           .vece = MO_32 },
2977         { .fni8 = gen_ssra64_i64,
2978           .fniv = gen_ssra_vec,
2979           .fno = gen_helper_gvec_ssra_b,
2980           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
2981           .opt_opc = vecop_list,
2982           .load_dest = true,
2983           .vece = MO_64 },
2984     };
2985
2986     /* tszimm encoding produces immediates in the range [1..esize]. */
2987     tcg_debug_assert(shift > 0);
2988     tcg_debug_assert(shift <= (8 << vece));
2989
2990     /*
2991      * Shifts larger than the element size are architecturally valid.
2992      * Signed results in all sign bits.
2993      */
2994     shift = MIN(shift, (8 << vece) - 1);
2995     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
2996 }
2997
2998 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
2999 {
3000     tcg_gen_vec_shr8i_i64(a, a, shift);
3001     tcg_gen_vec_add8_i64(d, d, a);
3002 }
3003
3004 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3005 {
3006     tcg_gen_vec_shr16i_i64(a, a, shift);
3007     tcg_gen_vec_add16_i64(d, d, a);
3008 }
3009
3010 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3011 {
3012     tcg_gen_shri_i32(a, a, shift);
3013     tcg_gen_add_i32(d, d, a);
3014 }
3015
3016 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3017 {
3018     tcg_gen_shri_i64(a, a, shift);
3019     tcg_gen_add_i64(d, d, a);
3020 }
3021
3022 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3023 {
3024     tcg_gen_shri_vec(vece, a, a, sh);
3025     tcg_gen_add_vec(vece, d, d, a);
3026 }
3027
3028 void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3029                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3030 {
3031     static const TCGOpcode vecop_list[] = {
3032         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3033     };
3034     static const GVecGen2i ops[4] = {
3035         { .fni8 = gen_usra8_i64,
3036           .fniv = gen_usra_vec,
3037           .fno = gen_helper_gvec_usra_b,
3038           .load_dest = true,
3039           .opt_opc = vecop_list,
3040           .vece = MO_8, },
3041         { .fni8 = gen_usra16_i64,
3042           .fniv = gen_usra_vec,
3043           .fno = gen_helper_gvec_usra_h,
3044           .load_dest = true,
3045           .opt_opc = vecop_list,
3046           .vece = MO_16, },
3047         { .fni4 = gen_usra32_i32,
3048           .fniv = gen_usra_vec,
3049           .fno = gen_helper_gvec_usra_s,
3050           .load_dest = true,
3051           .opt_opc = vecop_list,
3052           .vece = MO_32, },
3053         { .fni8 = gen_usra64_i64,
3054           .fniv = gen_usra_vec,
3055           .fno = gen_helper_gvec_usra_d,
3056           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3057           .load_dest = true,
3058           .opt_opc = vecop_list,
3059           .vece = MO_64, },
3060     };
3061
3062     /* tszimm encoding produces immediates in the range [1..esize]. */
3063     tcg_debug_assert(shift > 0);
3064     tcg_debug_assert(shift <= (8 << vece));
3065
3066     /*
3067      * Shifts larger than the element size are architecturally valid.
3068      * Unsigned results in all zeros as input to accumulate: nop.
3069      */
3070     if (shift < (8 << vece)) {
3071         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3072     } else {
3073         /* Nop, but we do need to clear the tail. */
3074         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3075     }
3076 }
3077
3078 /*
3079  * Shift one less than the requested amount, and the low bit is
3080  * the rounding bit.  For the 8 and 16-bit operations, because we
3081  * mask the low bit, we can perform a normal integer shift instead
3082  * of a vector shift.
3083  */
3084 static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3085 {
3086     TCGv_i64 t = tcg_temp_new_i64();
3087
3088     tcg_gen_shri_i64(t, a, sh - 1);
3089     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3090     tcg_gen_vec_sar8i_i64(d, a, sh);
3091     tcg_gen_vec_add8_i64(d, d, t);
3092     tcg_temp_free_i64(t);
3093 }
3094
3095 static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3096 {
3097     TCGv_i64 t = tcg_temp_new_i64();
3098
3099     tcg_gen_shri_i64(t, a, sh - 1);
3100     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3101     tcg_gen_vec_sar16i_i64(d, a, sh);
3102     tcg_gen_vec_add16_i64(d, d, t);
3103     tcg_temp_free_i64(t);
3104 }
3105
3106 static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3107 {
3108     TCGv_i32 t = tcg_temp_new_i32();
3109
3110     tcg_gen_extract_i32(t, a, sh - 1, 1);
3111     tcg_gen_sari_i32(d, a, sh);
3112     tcg_gen_add_i32(d, d, t);
3113     tcg_temp_free_i32(t);
3114 }
3115
3116 static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3117 {
3118     TCGv_i64 t = tcg_temp_new_i64();
3119
3120     tcg_gen_extract_i64(t, a, sh - 1, 1);
3121     tcg_gen_sari_i64(d, a, sh);
3122     tcg_gen_add_i64(d, d, t);
3123     tcg_temp_free_i64(t);
3124 }
3125
3126 static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3127 {
3128     TCGv_vec t = tcg_temp_new_vec_matching(d);
3129     TCGv_vec ones = tcg_temp_new_vec_matching(d);
3130
3131     tcg_gen_shri_vec(vece, t, a, sh - 1);
3132     tcg_gen_dupi_vec(vece, ones, 1);
3133     tcg_gen_and_vec(vece, t, t, ones);
3134     tcg_gen_sari_vec(vece, d, a, sh);
3135     tcg_gen_add_vec(vece, d, d, t);
3136
3137     tcg_temp_free_vec(t);
3138     tcg_temp_free_vec(ones);
3139 }
3140
3141 void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3142                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3143 {
3144     static const TCGOpcode vecop_list[] = {
3145         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3146     };
3147     static const GVecGen2i ops[4] = {
3148         { .fni8 = gen_srshr8_i64,
3149           .fniv = gen_srshr_vec,
3150           .fno = gen_helper_gvec_srshr_b,
3151           .opt_opc = vecop_list,
3152           .vece = MO_8 },
3153         { .fni8 = gen_srshr16_i64,
3154           .fniv = gen_srshr_vec,
3155           .fno = gen_helper_gvec_srshr_h,
3156           .opt_opc = vecop_list,
3157           .vece = MO_16 },
3158         { .fni4 = gen_srshr32_i32,
3159           .fniv = gen_srshr_vec,
3160           .fno = gen_helper_gvec_srshr_s,
3161           .opt_opc = vecop_list,
3162           .vece = MO_32 },
3163         { .fni8 = gen_srshr64_i64,
3164           .fniv = gen_srshr_vec,
3165           .fno = gen_helper_gvec_srshr_d,
3166           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3167           .opt_opc = vecop_list,
3168           .vece = MO_64 },
3169     };
3170
3171     /* tszimm encoding produces immediates in the range [1..esize] */
3172     tcg_debug_assert(shift > 0);
3173     tcg_debug_assert(shift <= (8 << vece));
3174
3175     if (shift == (8 << vece)) {
3176         /*
3177          * Shifts larger than the element size are architecturally valid.
3178          * Signed results in all sign bits.  With rounding, this produces
3179          *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3180          * I.e. always zero.
3181          */
3182         tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
3183     } else {
3184         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3185     }
3186 }
3187
3188 static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3189 {
3190     TCGv_i64 t = tcg_temp_new_i64();
3191
3192     gen_srshr8_i64(t, a, sh);
3193     tcg_gen_vec_add8_i64(d, d, t);
3194     tcg_temp_free_i64(t);
3195 }
3196
3197 static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3198 {
3199     TCGv_i64 t = tcg_temp_new_i64();
3200
3201     gen_srshr16_i64(t, a, sh);
3202     tcg_gen_vec_add16_i64(d, d, t);
3203     tcg_temp_free_i64(t);
3204 }
3205
3206 static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3207 {
3208     TCGv_i32 t = tcg_temp_new_i32();
3209
3210     gen_srshr32_i32(t, a, sh);
3211     tcg_gen_add_i32(d, d, t);
3212     tcg_temp_free_i32(t);
3213 }
3214
3215 static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3216 {
3217     TCGv_i64 t = tcg_temp_new_i64();
3218
3219     gen_srshr64_i64(t, a, sh);
3220     tcg_gen_add_i64(d, d, t);
3221     tcg_temp_free_i64(t);
3222 }
3223
3224 static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3225 {
3226     TCGv_vec t = tcg_temp_new_vec_matching(d);
3227
3228     gen_srshr_vec(vece, t, a, sh);
3229     tcg_gen_add_vec(vece, d, d, t);
3230     tcg_temp_free_vec(t);
3231 }
3232
3233 void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3234                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3235 {
3236     static const TCGOpcode vecop_list[] = {
3237         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3238     };
3239     static const GVecGen2i ops[4] = {
3240         { .fni8 = gen_srsra8_i64,
3241           .fniv = gen_srsra_vec,
3242           .fno = gen_helper_gvec_srsra_b,
3243           .opt_opc = vecop_list,
3244           .load_dest = true,
3245           .vece = MO_8 },
3246         { .fni8 = gen_srsra16_i64,
3247           .fniv = gen_srsra_vec,
3248           .fno = gen_helper_gvec_srsra_h,
3249           .opt_opc = vecop_list,
3250           .load_dest = true,
3251           .vece = MO_16 },
3252         { .fni4 = gen_srsra32_i32,
3253           .fniv = gen_srsra_vec,
3254           .fno = gen_helper_gvec_srsra_s,
3255           .opt_opc = vecop_list,
3256           .load_dest = true,
3257           .vece = MO_32 },
3258         { .fni8 = gen_srsra64_i64,
3259           .fniv = gen_srsra_vec,
3260           .fno = gen_helper_gvec_srsra_d,
3261           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3262           .opt_opc = vecop_list,
3263           .load_dest = true,
3264           .vece = MO_64 },
3265     };
3266
3267     /* tszimm encoding produces immediates in the range [1..esize] */
3268     tcg_debug_assert(shift > 0);
3269     tcg_debug_assert(shift <= (8 << vece));
3270
3271     /*
3272      * Shifts larger than the element size are architecturally valid.
3273      * Signed results in all sign bits.  With rounding, this produces
3274      *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3275      * I.e. always zero.  With accumulation, this leaves D unchanged.
3276      */
3277     if (shift == (8 << vece)) {
3278         /* Nop, but we do need to clear the tail. */
3279         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3280     } else {
3281         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3282     }
3283 }
3284
3285 static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3286 {
3287     TCGv_i64 t = tcg_temp_new_i64();
3288
3289     tcg_gen_shri_i64(t, a, sh - 1);
3290     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3291     tcg_gen_vec_shr8i_i64(d, a, sh);
3292     tcg_gen_vec_add8_i64(d, d, t);
3293     tcg_temp_free_i64(t);
3294 }
3295
3296 static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3297 {
3298     TCGv_i64 t = tcg_temp_new_i64();
3299
3300     tcg_gen_shri_i64(t, a, sh - 1);
3301     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3302     tcg_gen_vec_shr16i_i64(d, a, sh);
3303     tcg_gen_vec_add16_i64(d, d, t);
3304     tcg_temp_free_i64(t);
3305 }
3306
3307 static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3308 {
3309     TCGv_i32 t = tcg_temp_new_i32();
3310
3311     tcg_gen_extract_i32(t, a, sh - 1, 1);
3312     tcg_gen_shri_i32(d, a, sh);
3313     tcg_gen_add_i32(d, d, t);
3314     tcg_temp_free_i32(t);
3315 }
3316
3317 static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3318 {
3319     TCGv_i64 t = tcg_temp_new_i64();
3320
3321     tcg_gen_extract_i64(t, a, sh - 1, 1);
3322     tcg_gen_shri_i64(d, a, sh);
3323     tcg_gen_add_i64(d, d, t);
3324     tcg_temp_free_i64(t);
3325 }
3326
3327 static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
3328 {
3329     TCGv_vec t = tcg_temp_new_vec_matching(d);
3330     TCGv_vec ones = tcg_temp_new_vec_matching(d);
3331
3332     tcg_gen_shri_vec(vece, t, a, shift - 1);
3333     tcg_gen_dupi_vec(vece, ones, 1);
3334     tcg_gen_and_vec(vece, t, t, ones);
3335     tcg_gen_shri_vec(vece, d, a, shift);
3336     tcg_gen_add_vec(vece, d, d, t);
3337
3338     tcg_temp_free_vec(t);
3339     tcg_temp_free_vec(ones);
3340 }
3341
3342 void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3343                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3344 {
3345     static const TCGOpcode vecop_list[] = {
3346         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3347     };
3348     static const GVecGen2i ops[4] = {
3349         { .fni8 = gen_urshr8_i64,
3350           .fniv = gen_urshr_vec,
3351           .fno = gen_helper_gvec_urshr_b,
3352           .opt_opc = vecop_list,
3353           .vece = MO_8 },
3354         { .fni8 = gen_urshr16_i64,
3355           .fniv = gen_urshr_vec,
3356           .fno = gen_helper_gvec_urshr_h,
3357           .opt_opc = vecop_list,
3358           .vece = MO_16 },
3359         { .fni4 = gen_urshr32_i32,
3360           .fniv = gen_urshr_vec,
3361           .fno = gen_helper_gvec_urshr_s,
3362           .opt_opc = vecop_list,
3363           .vece = MO_32 },
3364         { .fni8 = gen_urshr64_i64,
3365           .fniv = gen_urshr_vec,
3366           .fno = gen_helper_gvec_urshr_d,
3367           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3368           .opt_opc = vecop_list,
3369           .vece = MO_64 },
3370     };
3371
3372     /* tszimm encoding produces immediates in the range [1..esize] */
3373     tcg_debug_assert(shift > 0);
3374     tcg_debug_assert(shift <= (8 << vece));
3375
3376     if (shift == (8 << vece)) {
3377         /*
3378          * Shifts larger than the element size are architecturally valid.
3379          * Unsigned results in zero.  With rounding, this produces a
3380          * copy of the most significant bit.
3381          */
3382         tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
3383     } else {
3384         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3385     }
3386 }
3387
3388 static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3389 {
3390     TCGv_i64 t = tcg_temp_new_i64();
3391
3392     if (sh == 8) {
3393         tcg_gen_vec_shr8i_i64(t, a, 7);
3394     } else {
3395         gen_urshr8_i64(t, a, sh);
3396     }
3397     tcg_gen_vec_add8_i64(d, d, t);
3398     tcg_temp_free_i64(t);
3399 }
3400
3401 static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3402 {
3403     TCGv_i64 t = tcg_temp_new_i64();
3404
3405     if (sh == 16) {
3406         tcg_gen_vec_shr16i_i64(t, a, 15);
3407     } else {
3408         gen_urshr16_i64(t, a, sh);
3409     }
3410     tcg_gen_vec_add16_i64(d, d, t);
3411     tcg_temp_free_i64(t);
3412 }
3413
3414 static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3415 {
3416     TCGv_i32 t = tcg_temp_new_i32();
3417
3418     if (sh == 32) {
3419         tcg_gen_shri_i32(t, a, 31);
3420     } else {
3421         gen_urshr32_i32(t, a, sh);
3422     }
3423     tcg_gen_add_i32(d, d, t);
3424     tcg_temp_free_i32(t);
3425 }
3426
3427 static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3428 {
3429     TCGv_i64 t = tcg_temp_new_i64();
3430
3431     if (sh == 64) {
3432         tcg_gen_shri_i64(t, a, 63);
3433     } else {
3434         gen_urshr64_i64(t, a, sh);
3435     }
3436     tcg_gen_add_i64(d, d, t);
3437     tcg_temp_free_i64(t);
3438 }
3439
3440 static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3441 {
3442     TCGv_vec t = tcg_temp_new_vec_matching(d);
3443
3444     if (sh == (8 << vece)) {
3445         tcg_gen_shri_vec(vece, t, a, sh - 1);
3446     } else {
3447         gen_urshr_vec(vece, t, a, sh);
3448     }
3449     tcg_gen_add_vec(vece, d, d, t);
3450     tcg_temp_free_vec(t);
3451 }
3452
3453 void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3454                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3455 {
3456     static const TCGOpcode vecop_list[] = {
3457         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3458     };
3459     static const GVecGen2i ops[4] = {
3460         { .fni8 = gen_ursra8_i64,
3461           .fniv = gen_ursra_vec,
3462           .fno = gen_helper_gvec_ursra_b,
3463           .opt_opc = vecop_list,
3464           .load_dest = true,
3465           .vece = MO_8 },
3466         { .fni8 = gen_ursra16_i64,
3467           .fniv = gen_ursra_vec,
3468           .fno = gen_helper_gvec_ursra_h,
3469           .opt_opc = vecop_list,
3470           .load_dest = true,
3471           .vece = MO_16 },
3472         { .fni4 = gen_ursra32_i32,
3473           .fniv = gen_ursra_vec,
3474           .fno = gen_helper_gvec_ursra_s,
3475           .opt_opc = vecop_list,
3476           .load_dest = true,
3477           .vece = MO_32 },
3478         { .fni8 = gen_ursra64_i64,
3479           .fniv = gen_ursra_vec,
3480           .fno = gen_helper_gvec_ursra_d,
3481           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3482           .opt_opc = vecop_list,
3483           .load_dest = true,
3484           .vece = MO_64 },
3485     };
3486
3487     /* tszimm encoding produces immediates in the range [1..esize] */
3488     tcg_debug_assert(shift > 0);
3489     tcg_debug_assert(shift <= (8 << vece));
3490
3491     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3492 }
3493
3494 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3495 {
3496     uint64_t mask = dup_const(MO_8, 0xff >> shift);
3497     TCGv_i64 t = tcg_temp_new_i64();
3498
3499     tcg_gen_shri_i64(t, a, shift);
3500     tcg_gen_andi_i64(t, t, mask);
3501     tcg_gen_andi_i64(d, d, ~mask);
3502     tcg_gen_or_i64(d, d, t);
3503     tcg_temp_free_i64(t);
3504 }
3505
3506 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3507 {
3508     uint64_t mask = dup_const(MO_16, 0xffff >> shift);
3509     TCGv_i64 t = tcg_temp_new_i64();
3510
3511     tcg_gen_shri_i64(t, a, shift);
3512     tcg_gen_andi_i64(t, t, mask);
3513     tcg_gen_andi_i64(d, d, ~mask);
3514     tcg_gen_or_i64(d, d, t);
3515     tcg_temp_free_i64(t);
3516 }
3517
3518 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3519 {
3520     tcg_gen_shri_i32(a, a, shift);
3521     tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
3522 }
3523
3524 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3525 {
3526     tcg_gen_shri_i64(a, a, shift);
3527     tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
3528 }
3529
3530 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3531 {
3532     TCGv_vec t = tcg_temp_new_vec_matching(d);
3533     TCGv_vec m = tcg_temp_new_vec_matching(d);
3534
3535     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
3536     tcg_gen_shri_vec(vece, t, a, sh);
3537     tcg_gen_and_vec(vece, d, d, m);
3538     tcg_gen_or_vec(vece, d, d, t);
3539
3540     tcg_temp_free_vec(t);
3541     tcg_temp_free_vec(m);
3542 }
3543
3544 void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3545                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3546 {
3547     static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
3548     const GVecGen2i ops[4] = {
3549         { .fni8 = gen_shr8_ins_i64,
3550           .fniv = gen_shr_ins_vec,
3551           .fno = gen_helper_gvec_sri_b,
3552           .load_dest = true,
3553           .opt_opc = vecop_list,
3554           .vece = MO_8 },
3555         { .fni8 = gen_shr16_ins_i64,
3556           .fniv = gen_shr_ins_vec,
3557           .fno = gen_helper_gvec_sri_h,
3558           .load_dest = true,
3559           .opt_opc = vecop_list,
3560           .vece = MO_16 },
3561         { .fni4 = gen_shr32_ins_i32,
3562           .fniv = gen_shr_ins_vec,
3563           .fno = gen_helper_gvec_sri_s,
3564           .load_dest = true,
3565           .opt_opc = vecop_list,
3566           .vece = MO_32 },
3567         { .fni8 = gen_shr64_ins_i64,
3568           .fniv = gen_shr_ins_vec,
3569           .fno = gen_helper_gvec_sri_d,
3570           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3571           .load_dest = true,
3572           .opt_opc = vecop_list,
3573           .vece = MO_64 },
3574     };
3575
3576     /* tszimm encoding produces immediates in the range [1..esize]. */
3577     tcg_debug_assert(shift > 0);
3578     tcg_debug_assert(shift <= (8 << vece));
3579
3580     /* Shift of esize leaves destination unchanged. */
3581     if (shift < (8 << vece)) {
3582         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3583     } else {
3584         /* Nop, but we do need to clear the tail. */
3585         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3586     }
3587 }
3588
3589 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3590 {
3591     uint64_t mask = dup_const(MO_8, 0xff << shift);
3592     TCGv_i64 t = tcg_temp_new_i64();
3593
3594     tcg_gen_shli_i64(t, a, shift);
3595     tcg_gen_andi_i64(t, t, mask);
3596     tcg_gen_andi_i64(d, d, ~mask);
3597     tcg_gen_or_i64(d, d, t);
3598     tcg_temp_free_i64(t);
3599 }
3600
3601 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3602 {
3603     uint64_t mask = dup_const(MO_16, 0xffff << shift);
3604     TCGv_i64 t = tcg_temp_new_i64();
3605
3606     tcg_gen_shli_i64(t, a, shift);
3607     tcg_gen_andi_i64(t, t, mask);
3608     tcg_gen_andi_i64(d, d, ~mask);
3609     tcg_gen_or_i64(d, d, t);
3610     tcg_temp_free_i64(t);
3611 }
3612
3613 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3614 {
3615     tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
3616 }
3617
3618 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3619 {
3620     tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
3621 }
3622
3623 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3624 {
3625     TCGv_vec t = tcg_temp_new_vec_matching(d);
3626     TCGv_vec m = tcg_temp_new_vec_matching(d);
3627
3628     tcg_gen_shli_vec(vece, t, a, sh);
3629     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
3630     tcg_gen_and_vec(vece, d, d, m);
3631     tcg_gen_or_vec(vece, d, d, t);
3632
3633     tcg_temp_free_vec(t);
3634     tcg_temp_free_vec(m);
3635 }
3636
3637 void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3638                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3639 {
3640     static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
3641     const GVecGen2i ops[4] = {
3642         { .fni8 = gen_shl8_ins_i64,
3643           .fniv = gen_shl_ins_vec,
3644           .fno = gen_helper_gvec_sli_b,
3645           .load_dest = true,
3646           .opt_opc = vecop_list,
3647           .vece = MO_8 },
3648         { .fni8 = gen_shl16_ins_i64,
3649           .fniv = gen_shl_ins_vec,
3650           .fno = gen_helper_gvec_sli_h,
3651           .load_dest = true,
3652           .opt_opc = vecop_list,
3653           .vece = MO_16 },
3654         { .fni4 = gen_shl32_ins_i32,
3655           .fniv = gen_shl_ins_vec,
3656           .fno = gen_helper_gvec_sli_s,
3657           .load_dest = true,
3658           .opt_opc = vecop_list,
3659           .vece = MO_32 },
3660         { .fni8 = gen_shl64_ins_i64,
3661           .fniv = gen_shl_ins_vec,
3662           .fno = gen_helper_gvec_sli_d,
3663           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3664           .load_dest = true,
3665           .opt_opc = vecop_list,
3666           .vece = MO_64 },
3667     };
3668
3669     /* tszimm encoding produces immediates in the range [0..esize-1]. */
3670     tcg_debug_assert(shift >= 0);
3671     tcg_debug_assert(shift < (8 << vece));
3672
3673     if (shift == 0) {
3674         tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
3675     } else {
3676         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3677     }
3678 }
3679
3680 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3681 {
3682     gen_helper_neon_mul_u8(a, a, b);
3683     gen_helper_neon_add_u8(d, d, a);
3684 }
3685
3686 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3687 {
3688     gen_helper_neon_mul_u8(a, a, b);
3689     gen_helper_neon_sub_u8(d, d, a);
3690 }
3691
3692 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3693 {
3694     gen_helper_neon_mul_u16(a, a, b);
3695     gen_helper_neon_add_u16(d, d, a);
3696 }
3697
3698 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3699 {
3700     gen_helper_neon_mul_u16(a, a, b);
3701     gen_helper_neon_sub_u16(d, d, a);
3702 }
3703
3704 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3705 {
3706     tcg_gen_mul_i32(a, a, b);
3707     tcg_gen_add_i32(d, d, a);
3708 }
3709
3710 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3711 {
3712     tcg_gen_mul_i32(a, a, b);
3713     tcg_gen_sub_i32(d, d, a);
3714 }
3715
3716 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3717 {
3718     tcg_gen_mul_i64(a, a, b);
3719     tcg_gen_add_i64(d, d, a);
3720 }
3721
3722 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3723 {
3724     tcg_gen_mul_i64(a, a, b);
3725     tcg_gen_sub_i64(d, d, a);
3726 }
3727
3728 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3729 {
3730     tcg_gen_mul_vec(vece, a, a, b);
3731     tcg_gen_add_vec(vece, d, d, a);
3732 }
3733
3734 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3735 {
3736     tcg_gen_mul_vec(vece, a, a, b);
3737     tcg_gen_sub_vec(vece, d, d, a);
3738 }
3739
3740 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
3741  * these tables are shared with AArch64 which does support them.
3742  */
3743 void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3744                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3745 {
3746     static const TCGOpcode vecop_list[] = {
3747         INDEX_op_mul_vec, INDEX_op_add_vec, 0
3748     };
3749     static const GVecGen3 ops[4] = {
3750         { .fni4 = gen_mla8_i32,
3751           .fniv = gen_mla_vec,
3752           .load_dest = true,
3753           .opt_opc = vecop_list,
3754           .vece = MO_8 },
3755         { .fni4 = gen_mla16_i32,
3756           .fniv = gen_mla_vec,
3757           .load_dest = true,
3758           .opt_opc = vecop_list,
3759           .vece = MO_16 },
3760         { .fni4 = gen_mla32_i32,
3761           .fniv = gen_mla_vec,
3762           .load_dest = true,
3763           .opt_opc = vecop_list,
3764           .vece = MO_32 },
3765         { .fni8 = gen_mla64_i64,
3766           .fniv = gen_mla_vec,
3767           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3768           .load_dest = true,
3769           .opt_opc = vecop_list,
3770           .vece = MO_64 },
3771     };
3772     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3773 }
3774
3775 void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3776                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3777 {
3778     static const TCGOpcode vecop_list[] = {
3779         INDEX_op_mul_vec, INDEX_op_sub_vec, 0
3780     };
3781     static const GVecGen3 ops[4] = {
3782         { .fni4 = gen_mls8_i32,
3783           .fniv = gen_mls_vec,
3784           .load_dest = true,
3785           .opt_opc = vecop_list,
3786           .vece = MO_8 },
3787         { .fni4 = gen_mls16_i32,
3788           .fniv = gen_mls_vec,
3789           .load_dest = true,
3790           .opt_opc = vecop_list,
3791           .vece = MO_16 },
3792         { .fni4 = gen_mls32_i32,
3793           .fniv = gen_mls_vec,
3794           .load_dest = true,
3795           .opt_opc = vecop_list,
3796           .vece = MO_32 },
3797         { .fni8 = gen_mls64_i64,
3798           .fniv = gen_mls_vec,
3799           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3800           .load_dest = true,
3801           .opt_opc = vecop_list,
3802           .vece = MO_64 },
3803     };
3804     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3805 }
3806
3807 /* CMTST : test is "if (X & Y != 0)". */
3808 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3809 {
3810     tcg_gen_and_i32(d, a, b);
3811     tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
3812     tcg_gen_neg_i32(d, d);
3813 }
3814
3815 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3816 {
3817     tcg_gen_and_i64(d, a, b);
3818     tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
3819     tcg_gen_neg_i64(d, d);
3820 }
3821
3822 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3823 {
3824     tcg_gen_and_vec(vece, d, a, b);
3825     tcg_gen_dupi_vec(vece, a, 0);
3826     tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
3827 }
3828
3829 void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3830                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3831 {
3832     static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
3833     static const GVecGen3 ops[4] = {
3834         { .fni4 = gen_helper_neon_tst_u8,
3835           .fniv = gen_cmtst_vec,
3836           .opt_opc = vecop_list,
3837           .vece = MO_8 },
3838         { .fni4 = gen_helper_neon_tst_u16,
3839           .fniv = gen_cmtst_vec,
3840           .opt_opc = vecop_list,
3841           .vece = MO_16 },
3842         { .fni4 = gen_cmtst_i32,
3843           .fniv = gen_cmtst_vec,
3844           .opt_opc = vecop_list,
3845           .vece = MO_32 },
3846         { .fni8 = gen_cmtst_i64,
3847           .fniv = gen_cmtst_vec,
3848           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3849           .opt_opc = vecop_list,
3850           .vece = MO_64 },
3851     };
3852     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3853 }
3854
3855 void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
3856 {
3857     TCGv_i32 lval = tcg_temp_new_i32();
3858     TCGv_i32 rval = tcg_temp_new_i32();
3859     TCGv_i32 lsh = tcg_temp_new_i32();
3860     TCGv_i32 rsh = tcg_temp_new_i32();
3861     TCGv_i32 zero = tcg_const_i32(0);
3862     TCGv_i32 max = tcg_const_i32(32);
3863
3864     /*
3865      * Rely on the TCG guarantee that out of range shifts produce
3866      * unspecified results, not undefined behaviour (i.e. no trap).
3867      * Discard out-of-range results after the fact.
3868      */
3869     tcg_gen_ext8s_i32(lsh, shift);
3870     tcg_gen_neg_i32(rsh, lsh);
3871     tcg_gen_shl_i32(lval, src, lsh);
3872     tcg_gen_shr_i32(rval, src, rsh);
3873     tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
3874     tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
3875
3876     tcg_temp_free_i32(lval);
3877     tcg_temp_free_i32(rval);
3878     tcg_temp_free_i32(lsh);
3879     tcg_temp_free_i32(rsh);
3880     tcg_temp_free_i32(zero);
3881     tcg_temp_free_i32(max);
3882 }
3883
3884 void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
3885 {
3886     TCGv_i64 lval = tcg_temp_new_i64();
3887     TCGv_i64 rval = tcg_temp_new_i64();
3888     TCGv_i64 lsh = tcg_temp_new_i64();
3889     TCGv_i64 rsh = tcg_temp_new_i64();
3890     TCGv_i64 zero = tcg_const_i64(0);
3891     TCGv_i64 max = tcg_const_i64(64);
3892
3893     /*
3894      * Rely on the TCG guarantee that out of range shifts produce
3895      * unspecified results, not undefined behaviour (i.e. no trap).
3896      * Discard out-of-range results after the fact.
3897      */
3898     tcg_gen_ext8s_i64(lsh, shift);
3899     tcg_gen_neg_i64(rsh, lsh);
3900     tcg_gen_shl_i64(lval, src, lsh);
3901     tcg_gen_shr_i64(rval, src, rsh);
3902     tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
3903     tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
3904
3905     tcg_temp_free_i64(lval);
3906     tcg_temp_free_i64(rval);
3907     tcg_temp_free_i64(lsh);
3908     tcg_temp_free_i64(rsh);
3909     tcg_temp_free_i64(zero);
3910     tcg_temp_free_i64(max);
3911 }
3912
3913 static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
3914                          TCGv_vec src, TCGv_vec shift)
3915 {
3916     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
3917     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
3918     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
3919     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
3920     TCGv_vec msk, max;
3921
3922     tcg_gen_neg_vec(vece, rsh, shift);
3923     if (vece == MO_8) {
3924         tcg_gen_mov_vec(lsh, shift);
3925     } else {
3926         msk = tcg_temp_new_vec_matching(dst);
3927         tcg_gen_dupi_vec(vece, msk, 0xff);
3928         tcg_gen_and_vec(vece, lsh, shift, msk);
3929         tcg_gen_and_vec(vece, rsh, rsh, msk);
3930         tcg_temp_free_vec(msk);
3931     }
3932
3933     /*
3934      * Rely on the TCG guarantee that out of range shifts produce
3935      * unspecified results, not undefined behaviour (i.e. no trap).
3936      * Discard out-of-range results after the fact.
3937      */
3938     tcg_gen_shlv_vec(vece, lval, src, lsh);
3939     tcg_gen_shrv_vec(vece, rval, src, rsh);
3940
3941     max = tcg_temp_new_vec_matching(dst);
3942     tcg_gen_dupi_vec(vece, max, 8 << vece);
3943
3944     /*
3945      * The choice of LT (signed) and GEU (unsigned) are biased toward
3946      * the instructions of the x86_64 host.  For MO_8, the whole byte
3947      * is significant so we must use an unsigned compare; otherwise we
3948      * have already masked to a byte and so a signed compare works.
3949      * Other tcg hosts have a full set of comparisons and do not care.
3950      */
3951     if (vece == MO_8) {
3952         tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
3953         tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
3954         tcg_gen_andc_vec(vece, lval, lval, lsh);
3955         tcg_gen_andc_vec(vece, rval, rval, rsh);
3956     } else {
3957         tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
3958         tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
3959         tcg_gen_and_vec(vece, lval, lval, lsh);
3960         tcg_gen_and_vec(vece, rval, rval, rsh);
3961     }
3962     tcg_gen_or_vec(vece, dst, lval, rval);
3963
3964     tcg_temp_free_vec(max);
3965     tcg_temp_free_vec(lval);
3966     tcg_temp_free_vec(rval);
3967     tcg_temp_free_vec(lsh);
3968     tcg_temp_free_vec(rsh);
3969 }
3970
3971 void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3972                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3973 {
3974     static const TCGOpcode vecop_list[] = {
3975         INDEX_op_neg_vec, INDEX_op_shlv_vec,
3976         INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
3977     };
3978     static const GVecGen3 ops[4] = {
3979         { .fniv = gen_ushl_vec,
3980           .fno = gen_helper_gvec_ushl_b,
3981           .opt_opc = vecop_list,
3982           .vece = MO_8 },
3983         { .fniv = gen_ushl_vec,
3984           .fno = gen_helper_gvec_ushl_h,
3985           .opt_opc = vecop_list,
3986           .vece = MO_16 },
3987         { .fni4 = gen_ushl_i32,
3988           .fniv = gen_ushl_vec,
3989           .opt_opc = vecop_list,
3990           .vece = MO_32 },
3991         { .fni8 = gen_ushl_i64,
3992           .fniv = gen_ushl_vec,
3993           .opt_opc = vecop_list,
3994           .vece = MO_64 },
3995     };
3996     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3997 }
3998
3999 void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4000 {
4001     TCGv_i32 lval = tcg_temp_new_i32();
4002     TCGv_i32 rval = tcg_temp_new_i32();
4003     TCGv_i32 lsh = tcg_temp_new_i32();
4004     TCGv_i32 rsh = tcg_temp_new_i32();
4005     TCGv_i32 zero = tcg_const_i32(0);
4006     TCGv_i32 max = tcg_const_i32(31);
4007
4008     /*
4009      * Rely on the TCG guarantee that out of range shifts produce
4010      * unspecified results, not undefined behaviour (i.e. no trap).
4011      * Discard out-of-range results after the fact.
4012      */
4013     tcg_gen_ext8s_i32(lsh, shift);
4014     tcg_gen_neg_i32(rsh, lsh);
4015     tcg_gen_shl_i32(lval, src, lsh);
4016     tcg_gen_umin_i32(rsh, rsh, max);
4017     tcg_gen_sar_i32(rval, src, rsh);
4018     tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
4019     tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
4020
4021     tcg_temp_free_i32(lval);
4022     tcg_temp_free_i32(rval);
4023     tcg_temp_free_i32(lsh);
4024     tcg_temp_free_i32(rsh);
4025     tcg_temp_free_i32(zero);
4026     tcg_temp_free_i32(max);
4027 }
4028
4029 void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4030 {
4031     TCGv_i64 lval = tcg_temp_new_i64();
4032     TCGv_i64 rval = tcg_temp_new_i64();
4033     TCGv_i64 lsh = tcg_temp_new_i64();
4034     TCGv_i64 rsh = tcg_temp_new_i64();
4035     TCGv_i64 zero = tcg_const_i64(0);
4036     TCGv_i64 max = tcg_const_i64(63);
4037
4038     /*
4039      * Rely on the TCG guarantee that out of range shifts produce
4040      * unspecified results, not undefined behaviour (i.e. no trap).
4041      * Discard out-of-range results after the fact.
4042      */
4043     tcg_gen_ext8s_i64(lsh, shift);
4044     tcg_gen_neg_i64(rsh, lsh);
4045     tcg_gen_shl_i64(lval, src, lsh);
4046     tcg_gen_umin_i64(rsh, rsh, max);
4047     tcg_gen_sar_i64(rval, src, rsh);
4048     tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
4049     tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
4050
4051     tcg_temp_free_i64(lval);
4052     tcg_temp_free_i64(rval);
4053     tcg_temp_free_i64(lsh);
4054     tcg_temp_free_i64(rsh);
4055     tcg_temp_free_i64(zero);
4056     tcg_temp_free_i64(max);
4057 }
4058
4059 static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
4060                          TCGv_vec src, TCGv_vec shift)
4061 {
4062     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4063     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4064     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4065     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4066     TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
4067
4068     /*
4069      * Rely on the TCG guarantee that out of range shifts produce
4070      * unspecified results, not undefined behaviour (i.e. no trap).
4071      * Discard out-of-range results after the fact.
4072      */
4073     tcg_gen_neg_vec(vece, rsh, shift);
4074     if (vece == MO_8) {
4075         tcg_gen_mov_vec(lsh, shift);
4076     } else {
4077         tcg_gen_dupi_vec(vece, tmp, 0xff);
4078         tcg_gen_and_vec(vece, lsh, shift, tmp);
4079         tcg_gen_and_vec(vece, rsh, rsh, tmp);
4080     }
4081
4082     /* Bound rsh so out of bound right shift gets -1.  */
4083     tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
4084     tcg_gen_umin_vec(vece, rsh, rsh, tmp);
4085     tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
4086
4087     tcg_gen_shlv_vec(vece, lval, src, lsh);
4088     tcg_gen_sarv_vec(vece, rval, src, rsh);
4089
4090     /* Select in-bound left shift.  */
4091     tcg_gen_andc_vec(vece, lval, lval, tmp);
4092
4093     /* Select between left and right shift.  */
4094     if (vece == MO_8) {
4095         tcg_gen_dupi_vec(vece, tmp, 0);
4096         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
4097     } else {
4098         tcg_gen_dupi_vec(vece, tmp, 0x80);
4099         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
4100     }
4101
4102     tcg_temp_free_vec(lval);
4103     tcg_temp_free_vec(rval);
4104     tcg_temp_free_vec(lsh);
4105     tcg_temp_free_vec(rsh);
4106     tcg_temp_free_vec(tmp);
4107 }
4108
4109 void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4110                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4111 {
4112     static const TCGOpcode vecop_list[] = {
4113         INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
4114         INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
4115     };
4116     static const GVecGen3 ops[4] = {
4117         { .fniv = gen_sshl_vec,
4118           .fno = gen_helper_gvec_sshl_b,
4119           .opt_opc = vecop_list,
4120           .vece = MO_8 },
4121         { .fniv = gen_sshl_vec,
4122           .fno = gen_helper_gvec_sshl_h,
4123           .opt_opc = vecop_list,
4124           .vece = MO_16 },
4125         { .fni4 = gen_sshl_i32,
4126           .fniv = gen_sshl_vec,
4127           .opt_opc = vecop_list,
4128           .vece = MO_32 },
4129         { .fni8 = gen_sshl_i64,
4130           .fniv = gen_sshl_vec,
4131           .opt_opc = vecop_list,
4132           .vece = MO_64 },
4133     };
4134     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4135 }
4136
4137 static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4138                           TCGv_vec a, TCGv_vec b)
4139 {
4140     TCGv_vec x = tcg_temp_new_vec_matching(t);
4141     tcg_gen_add_vec(vece, x, a, b);
4142     tcg_gen_usadd_vec(vece, t, a, b);
4143     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4144     tcg_gen_or_vec(vece, sat, sat, x);
4145     tcg_temp_free_vec(x);
4146 }
4147
4148 void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4149                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4150 {
4151     static const TCGOpcode vecop_list[] = {
4152         INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4153     };
4154     static const GVecGen4 ops[4] = {
4155         { .fniv = gen_uqadd_vec,
4156           .fno = gen_helper_gvec_uqadd_b,
4157           .write_aofs = true,
4158           .opt_opc = vecop_list,
4159           .vece = MO_8 },
4160         { .fniv = gen_uqadd_vec,
4161           .fno = gen_helper_gvec_uqadd_h,
4162           .write_aofs = true,
4163           .opt_opc = vecop_list,
4164           .vece = MO_16 },
4165         { .fniv = gen_uqadd_vec,
4166           .fno = gen_helper_gvec_uqadd_s,
4167           .write_aofs = true,
4168           .opt_opc = vecop_list,
4169           .vece = MO_32 },
4170         { .fniv = gen_uqadd_vec,
4171           .fno = gen_helper_gvec_uqadd_d,
4172           .write_aofs = true,
4173           .opt_opc = vecop_list,
4174           .vece = MO_64 },
4175     };
4176     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4177                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4178 }
4179
4180 static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4181                           TCGv_vec a, TCGv_vec b)
4182 {
4183     TCGv_vec x = tcg_temp_new_vec_matching(t);
4184     tcg_gen_add_vec(vece, x, a, b);
4185     tcg_gen_ssadd_vec(vece, t, a, b);
4186     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4187     tcg_gen_or_vec(vece, sat, sat, x);
4188     tcg_temp_free_vec(x);
4189 }
4190
4191 void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4192                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4193 {
4194     static const TCGOpcode vecop_list[] = {
4195         INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4196     };
4197     static const GVecGen4 ops[4] = {
4198         { .fniv = gen_sqadd_vec,
4199           .fno = gen_helper_gvec_sqadd_b,
4200           .opt_opc = vecop_list,
4201           .write_aofs = true,
4202           .vece = MO_8 },
4203         { .fniv = gen_sqadd_vec,
4204           .fno = gen_helper_gvec_sqadd_h,
4205           .opt_opc = vecop_list,
4206           .write_aofs = true,
4207           .vece = MO_16 },
4208         { .fniv = gen_sqadd_vec,
4209           .fno = gen_helper_gvec_sqadd_s,
4210           .opt_opc = vecop_list,
4211           .write_aofs = true,
4212           .vece = MO_32 },
4213         { .fniv = gen_sqadd_vec,
4214           .fno = gen_helper_gvec_sqadd_d,
4215           .opt_opc = vecop_list,
4216           .write_aofs = true,
4217           .vece = MO_64 },
4218     };
4219     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4220                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4221 }
4222
4223 static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4224                           TCGv_vec a, TCGv_vec b)
4225 {
4226     TCGv_vec x = tcg_temp_new_vec_matching(t);
4227     tcg_gen_sub_vec(vece, x, a, b);
4228     tcg_gen_ussub_vec(vece, t, a, b);
4229     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4230     tcg_gen_or_vec(vece, sat, sat, x);
4231     tcg_temp_free_vec(x);
4232 }
4233
4234 void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4235                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4236 {
4237     static const TCGOpcode vecop_list[] = {
4238         INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4239     };
4240     static const GVecGen4 ops[4] = {
4241         { .fniv = gen_uqsub_vec,
4242           .fno = gen_helper_gvec_uqsub_b,
4243           .opt_opc = vecop_list,
4244           .write_aofs = true,
4245           .vece = MO_8 },
4246         { .fniv = gen_uqsub_vec,
4247           .fno = gen_helper_gvec_uqsub_h,
4248           .opt_opc = vecop_list,
4249           .write_aofs = true,
4250           .vece = MO_16 },
4251         { .fniv = gen_uqsub_vec,
4252           .fno = gen_helper_gvec_uqsub_s,
4253           .opt_opc = vecop_list,
4254           .write_aofs = true,
4255           .vece = MO_32 },
4256         { .fniv = gen_uqsub_vec,
4257           .fno = gen_helper_gvec_uqsub_d,
4258           .opt_opc = vecop_list,
4259           .write_aofs = true,
4260           .vece = MO_64 },
4261     };
4262     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4263                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4264 }
4265
4266 static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4267                           TCGv_vec a, TCGv_vec b)
4268 {
4269     TCGv_vec x = tcg_temp_new_vec_matching(t);
4270     tcg_gen_sub_vec(vece, x, a, b);
4271     tcg_gen_sssub_vec(vece, t, a, b);
4272     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4273     tcg_gen_or_vec(vece, sat, sat, x);
4274     tcg_temp_free_vec(x);
4275 }
4276
4277 void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4278                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4279 {
4280     static const TCGOpcode vecop_list[] = {
4281         INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4282     };
4283     static const GVecGen4 ops[4] = {
4284         { .fniv = gen_sqsub_vec,
4285           .fno = gen_helper_gvec_sqsub_b,
4286           .opt_opc = vecop_list,
4287           .write_aofs = true,
4288           .vece = MO_8 },
4289         { .fniv = gen_sqsub_vec,
4290           .fno = gen_helper_gvec_sqsub_h,
4291           .opt_opc = vecop_list,
4292           .write_aofs = true,
4293           .vece = MO_16 },
4294         { .fniv = gen_sqsub_vec,
4295           .fno = gen_helper_gvec_sqsub_s,
4296           .opt_opc = vecop_list,
4297           .write_aofs = true,
4298           .vece = MO_32 },
4299         { .fniv = gen_sqsub_vec,
4300           .fno = gen_helper_gvec_sqsub_d,
4301           .opt_opc = vecop_list,
4302           .write_aofs = true,
4303           .vece = MO_64 },
4304     };
4305     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4306                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4307 }
4308
4309 static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4310 {
4311     TCGv_i32 t = tcg_temp_new_i32();
4312
4313     tcg_gen_sub_i32(t, a, b);
4314     tcg_gen_sub_i32(d, b, a);
4315     tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
4316     tcg_temp_free_i32(t);
4317 }
4318
4319 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4320 {
4321     TCGv_i64 t = tcg_temp_new_i64();
4322
4323     tcg_gen_sub_i64(t, a, b);
4324     tcg_gen_sub_i64(d, b, a);
4325     tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
4326     tcg_temp_free_i64(t);
4327 }
4328
4329 static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4330 {
4331     TCGv_vec t = tcg_temp_new_vec_matching(d);
4332
4333     tcg_gen_smin_vec(vece, t, a, b);
4334     tcg_gen_smax_vec(vece, d, a, b);
4335     tcg_gen_sub_vec(vece, d, d, t);
4336     tcg_temp_free_vec(t);
4337 }
4338
4339 void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4340                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4341 {
4342     static const TCGOpcode vecop_list[] = {
4343         INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4344     };
4345     static const GVecGen3 ops[4] = {
4346         { .fniv = gen_sabd_vec,
4347           .fno = gen_helper_gvec_sabd_b,
4348           .opt_opc = vecop_list,
4349           .vece = MO_8 },
4350         { .fniv = gen_sabd_vec,
4351           .fno = gen_helper_gvec_sabd_h,
4352           .opt_opc = vecop_list,
4353           .vece = MO_16 },
4354         { .fni4 = gen_sabd_i32,
4355           .fniv = gen_sabd_vec,
4356           .fno = gen_helper_gvec_sabd_s,
4357           .opt_opc = vecop_list,
4358           .vece = MO_32 },
4359         { .fni8 = gen_sabd_i64,
4360           .fniv = gen_sabd_vec,
4361           .fno = gen_helper_gvec_sabd_d,
4362           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4363           .opt_opc = vecop_list,
4364           .vece = MO_64 },
4365     };
4366     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4367 }
4368
4369 static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4370 {
4371     TCGv_i32 t = tcg_temp_new_i32();
4372
4373     tcg_gen_sub_i32(t, a, b);
4374     tcg_gen_sub_i32(d, b, a);
4375     tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
4376     tcg_temp_free_i32(t);
4377 }
4378
4379 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4380 {
4381     TCGv_i64 t = tcg_temp_new_i64();
4382
4383     tcg_gen_sub_i64(t, a, b);
4384     tcg_gen_sub_i64(d, b, a);
4385     tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
4386     tcg_temp_free_i64(t);
4387 }
4388
4389 static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4390 {
4391     TCGv_vec t = tcg_temp_new_vec_matching(d);
4392
4393     tcg_gen_umin_vec(vece, t, a, b);
4394     tcg_gen_umax_vec(vece, d, a, b);
4395     tcg_gen_sub_vec(vece, d, d, t);
4396     tcg_temp_free_vec(t);
4397 }
4398
4399 void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4400                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4401 {
4402     static const TCGOpcode vecop_list[] = {
4403         INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4404     };
4405     static const GVecGen3 ops[4] = {
4406         { .fniv = gen_uabd_vec,
4407           .fno = gen_helper_gvec_uabd_b,
4408           .opt_opc = vecop_list,
4409           .vece = MO_8 },
4410         { .fniv = gen_uabd_vec,
4411           .fno = gen_helper_gvec_uabd_h,
4412           .opt_opc = vecop_list,
4413           .vece = MO_16 },
4414         { .fni4 = gen_uabd_i32,
4415           .fniv = gen_uabd_vec,
4416           .fno = gen_helper_gvec_uabd_s,
4417           .opt_opc = vecop_list,
4418           .vece = MO_32 },
4419         { .fni8 = gen_uabd_i64,
4420           .fniv = gen_uabd_vec,
4421           .fno = gen_helper_gvec_uabd_d,
4422           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4423           .opt_opc = vecop_list,
4424           .vece = MO_64 },
4425     };
4426     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4427 }
4428
4429 static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4430 {
4431     TCGv_i32 t = tcg_temp_new_i32();
4432     gen_sabd_i32(t, a, b);
4433     tcg_gen_add_i32(d, d, t);
4434     tcg_temp_free_i32(t);
4435 }
4436
4437 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4438 {
4439     TCGv_i64 t = tcg_temp_new_i64();
4440     gen_sabd_i64(t, a, b);
4441     tcg_gen_add_i64(d, d, t);
4442     tcg_temp_free_i64(t);
4443 }
4444
4445 static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4446 {
4447     TCGv_vec t = tcg_temp_new_vec_matching(d);
4448     gen_sabd_vec(vece, t, a, b);
4449     tcg_gen_add_vec(vece, d, d, t);
4450     tcg_temp_free_vec(t);
4451 }
4452
4453 void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4454                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4455 {
4456     static const TCGOpcode vecop_list[] = {
4457         INDEX_op_sub_vec, INDEX_op_add_vec,
4458         INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4459     };
4460     static const GVecGen3 ops[4] = {
4461         { .fniv = gen_saba_vec,
4462           .fno = gen_helper_gvec_saba_b,
4463           .opt_opc = vecop_list,
4464           .load_dest = true,
4465           .vece = MO_8 },
4466         { .fniv = gen_saba_vec,
4467           .fno = gen_helper_gvec_saba_h,
4468           .opt_opc = vecop_list,
4469           .load_dest = true,
4470           .vece = MO_16 },
4471         { .fni4 = gen_saba_i32,
4472           .fniv = gen_saba_vec,
4473           .fno = gen_helper_gvec_saba_s,
4474           .opt_opc = vecop_list,
4475           .load_dest = true,
4476           .vece = MO_32 },
4477         { .fni8 = gen_saba_i64,
4478           .fniv = gen_saba_vec,
4479           .fno = gen_helper_gvec_saba_d,
4480           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4481           .opt_opc = vecop_list,
4482           .load_dest = true,
4483           .vece = MO_64 },
4484     };
4485     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4486 }
4487
4488 static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4489 {
4490     TCGv_i32 t = tcg_temp_new_i32();
4491     gen_uabd_i32(t, a, b);
4492     tcg_gen_add_i32(d, d, t);
4493     tcg_temp_free_i32(t);
4494 }
4495
4496 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4497 {
4498     TCGv_i64 t = tcg_temp_new_i64();
4499     gen_uabd_i64(t, a, b);
4500     tcg_gen_add_i64(d, d, t);
4501     tcg_temp_free_i64(t);
4502 }
4503
4504 static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4505 {
4506     TCGv_vec t = tcg_temp_new_vec_matching(d);
4507     gen_uabd_vec(vece, t, a, b);
4508     tcg_gen_add_vec(vece, d, d, t);
4509     tcg_temp_free_vec(t);
4510 }
4511
4512 void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4513                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4514 {
4515     static const TCGOpcode vecop_list[] = {
4516         INDEX_op_sub_vec, INDEX_op_add_vec,
4517         INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4518     };
4519     static const GVecGen3 ops[4] = {
4520         { .fniv = gen_uaba_vec,
4521           .fno = gen_helper_gvec_uaba_b,
4522           .opt_opc = vecop_list,
4523           .load_dest = true,
4524           .vece = MO_8 },
4525         { .fniv = gen_uaba_vec,
4526           .fno = gen_helper_gvec_uaba_h,
4527           .opt_opc = vecop_list,
4528           .load_dest = true,
4529           .vece = MO_16 },
4530         { .fni4 = gen_uaba_i32,
4531           .fniv = gen_uaba_vec,
4532           .fno = gen_helper_gvec_uaba_s,
4533           .opt_opc = vecop_list,
4534           .load_dest = true,
4535           .vece = MO_32 },
4536         { .fni8 = gen_uaba_i64,
4537           .fniv = gen_uaba_vec,
4538           .fno = gen_helper_gvec_uaba_d,
4539           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4540           .opt_opc = vecop_list,
4541           .load_dest = true,
4542           .vece = MO_64 },
4543     };
4544     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4545 }
4546
4547 static void do_coproc_insn(DisasContext *s, int cpnum, int is64,
4548                            int opc1, int crn, int crm, int opc2,
4549                            bool isread, int rt, int rt2)
4550 {
4551     const ARMCPRegInfo *ri;
4552
4553     ri = get_arm_cp_reginfo(s->cp_regs,
4554             ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2));
4555     if (ri) {
4556         bool need_exit_tb;
4557
4558         /* Check access permissions */
4559         if (!cp_access_ok(s->current_el, ri, isread)) {
4560             unallocated_encoding(s);
4561             return;
4562         }
4563
4564         if (s->hstr_active || ri->accessfn ||
4565             (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
4566             /* Emit code to perform further access permissions checks at
4567              * runtime; this may result in an exception.
4568              * Note that on XScale all cp0..c13 registers do an access check
4569              * call in order to handle c15_cpar.
4570              */
4571             TCGv_ptr tmpptr;
4572             TCGv_i32 tcg_syn, tcg_isread;
4573             uint32_t syndrome;
4574
4575             /* Note that since we are an implementation which takes an
4576              * exception on a trapped conditional instruction only if the
4577              * instruction passes its condition code check, we can take
4578              * advantage of the clause in the ARM ARM that allows us to set
4579              * the COND field in the instruction to 0xE in all cases.
4580              * We could fish the actual condition out of the insn (ARM)
4581              * or the condexec bits (Thumb) but it isn't necessary.
4582              */
4583             switch (cpnum) {
4584             case 14:
4585                 if (is64) {
4586                     syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4587                                                  isread, false);
4588                 } else {
4589                     syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4590                                                 rt, isread, false);
4591                 }
4592                 break;
4593             case 15:
4594                 if (is64) {
4595                     syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4596                                                  isread, false);
4597                 } else {
4598                     syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4599                                                 rt, isread, false);
4600                 }
4601                 break;
4602             default:
4603                 /* ARMv8 defines that only coprocessors 14 and 15 exist,
4604                  * so this can only happen if this is an ARMv7 or earlier CPU,
4605                  * in which case the syndrome information won't actually be
4606                  * guest visible.
4607                  */
4608                 assert(!arm_dc_feature(s, ARM_FEATURE_V8));
4609                 syndrome = syn_uncategorized();
4610                 break;
4611             }
4612
4613             gen_set_condexec(s);
4614             gen_set_pc_im(s, s->pc_curr);
4615             tmpptr = tcg_const_ptr(ri);
4616             tcg_syn = tcg_const_i32(syndrome);
4617             tcg_isread = tcg_const_i32(isread);
4618             gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn,
4619                                            tcg_isread);
4620             tcg_temp_free_ptr(tmpptr);
4621             tcg_temp_free_i32(tcg_syn);
4622             tcg_temp_free_i32(tcg_isread);
4623         } else if (ri->type & ARM_CP_RAISES_EXC) {
4624             /*
4625              * The readfn or writefn might raise an exception;
4626              * synchronize the CPU state in case it does.
4627              */
4628             gen_set_condexec(s);
4629             gen_set_pc_im(s, s->pc_curr);
4630         }
4631
4632         /* Handle special cases first */
4633         switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
4634         case ARM_CP_NOP:
4635             return;
4636         case ARM_CP_WFI:
4637             if (isread) {
4638                 unallocated_encoding(s);
4639                 return;
4640             }
4641             gen_set_pc_im(s, s->base.pc_next);
4642             s->base.is_jmp = DISAS_WFI;
4643             return;
4644         default:
4645             break;
4646         }
4647
4648         if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
4649             gen_io_start();
4650         }
4651
4652         if (isread) {
4653             /* Read */
4654             if (is64) {
4655                 TCGv_i64 tmp64;
4656                 TCGv_i32 tmp;
4657                 if (ri->type & ARM_CP_CONST) {
4658                     tmp64 = tcg_const_i64(ri->resetvalue);
4659                 } else if (ri->readfn) {
4660                     TCGv_ptr tmpptr;
4661                     tmp64 = tcg_temp_new_i64();
4662                     tmpptr = tcg_const_ptr(ri);
4663                     gen_helper_get_cp_reg64(tmp64, cpu_env, tmpptr);
4664                     tcg_temp_free_ptr(tmpptr);
4665                 } else {
4666                     tmp64 = tcg_temp_new_i64();
4667                     tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
4668                 }
4669                 tmp = tcg_temp_new_i32();
4670                 tcg_gen_extrl_i64_i32(tmp, tmp64);
4671                 store_reg(s, rt, tmp);
4672                 tmp = tcg_temp_new_i32();
4673                 tcg_gen_extrh_i64_i32(tmp, tmp64);
4674                 tcg_temp_free_i64(tmp64);
4675                 store_reg(s, rt2, tmp);
4676             } else {
4677                 TCGv_i32 tmp;
4678                 if (ri->type & ARM_CP_CONST) {
4679                     tmp = tcg_const_i32(ri->resetvalue);
4680                 } else if (ri->readfn) {
4681                     TCGv_ptr tmpptr;
4682                     tmp = tcg_temp_new_i32();
4683                     tmpptr = tcg_const_ptr(ri);
4684                     gen_helper_get_cp_reg(tmp, cpu_env, tmpptr);
4685                     tcg_temp_free_ptr(tmpptr);
4686                 } else {
4687                     tmp = load_cpu_offset(ri->fieldoffset);
4688                 }
4689                 if (rt == 15) {
4690                     /* Destination register of r15 for 32 bit loads sets
4691                      * the condition codes from the high 4 bits of the value
4692                      */
4693                     gen_set_nzcv(tmp);
4694                     tcg_temp_free_i32(tmp);
4695                 } else {
4696                     store_reg(s, rt, tmp);
4697                 }
4698             }
4699         } else {
4700             /* Write */
4701             if (ri->type & ARM_CP_CONST) {
4702                 /* If not forbidden by access permissions, treat as WI */
4703                 return;
4704             }
4705
4706             if (is64) {
4707                 TCGv_i32 tmplo, tmphi;
4708                 TCGv_i64 tmp64 = tcg_temp_new_i64();
4709                 tmplo = load_reg(s, rt);
4710                 tmphi = load_reg(s, rt2);
4711                 tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
4712                 tcg_temp_free_i32(tmplo);
4713                 tcg_temp_free_i32(tmphi);
4714                 if (ri->writefn) {
4715                     TCGv_ptr tmpptr = tcg_const_ptr(ri);
4716                     gen_helper_set_cp_reg64(cpu_env, tmpptr, tmp64);
4717                     tcg_temp_free_ptr(tmpptr);
4718                 } else {
4719                     tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
4720                 }
4721                 tcg_temp_free_i64(tmp64);
4722             } else {
4723                 if (ri->writefn) {
4724                     TCGv_i32 tmp;
4725                     TCGv_ptr tmpptr;
4726                     tmp = load_reg(s, rt);
4727                     tmpptr = tcg_const_ptr(ri);
4728                     gen_helper_set_cp_reg(cpu_env, tmpptr, tmp);
4729                     tcg_temp_free_ptr(tmpptr);
4730                     tcg_temp_free_i32(tmp);
4731                 } else {
4732                     TCGv_i32 tmp = load_reg(s, rt);
4733                     store_cpu_offset(tmp, ri->fieldoffset);
4734                 }
4735             }
4736         }
4737
4738         /* I/O operations must end the TB here (whether read or write) */
4739         need_exit_tb = ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) &&
4740                         (ri->type & ARM_CP_IO));
4741
4742         if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
4743             /*
4744              * A write to any coprocessor register that ends a TB
4745              * must rebuild the hflags for the next TB.
4746              */
4747             TCGv_i32 tcg_el = tcg_const_i32(s->current_el);
4748             if (arm_dc_feature(s, ARM_FEATURE_M)) {
4749                 gen_helper_rebuild_hflags_m32(cpu_env, tcg_el);
4750             } else {
4751                 if (ri->type & ARM_CP_NEWEL) {
4752                     gen_helper_rebuild_hflags_a32_newel(cpu_env);
4753                 } else {
4754                     gen_helper_rebuild_hflags_a32(cpu_env, tcg_el);
4755                 }
4756             }
4757             tcg_temp_free_i32(tcg_el);
4758             /*
4759              * We default to ending the TB on a coprocessor register write,
4760              * but allow this to be suppressed by the register definition
4761              * (usually only necessary to work around guest bugs).
4762              */
4763             need_exit_tb = true;
4764         }
4765         if (need_exit_tb) {
4766             gen_lookup_tb(s);
4767         }
4768
4769         return;
4770     }
4771
4772     /* Unknown register; this might be a guest error or a QEMU
4773      * unimplemented feature.
4774      */
4775     if (is64) {
4776         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4777                       "64 bit system register cp:%d opc1: %d crm:%d "
4778                       "(%s)\n",
4779                       isread ? "read" : "write", cpnum, opc1, crm,
4780                       s->ns ? "non-secure" : "secure");
4781     } else {
4782         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4783                       "system register cp:%d opc1:%d crn:%d crm:%d opc2:%d "
4784                       "(%s)\n",
4785                       isread ? "read" : "write", cpnum, opc1, crn, crm, opc2,
4786                       s->ns ? "non-secure" : "secure");
4787     }
4788
4789     unallocated_encoding(s);
4790     return;
4791 }
4792
4793 static int disas_coproc_insn(DisasContext *s, uint32_t insn)
4794 {
4795     int cpnum, is64, crn, crm, opc1, opc2, isread, rt, rt2;
4796
4797     cpnum = (insn >> 8) & 0xf;
4798
4799     is64 = (insn & (1 << 25)) == 0;
4800     if (!is64 && ((insn & (1 << 4)) == 0)) {
4801         /* cdp */
4802         return 1;
4803     }
4804
4805     crm = insn & 0xf;
4806     if (is64) {
4807         crn = 0;
4808         opc1 = (insn >> 4) & 0xf;
4809         opc2 = 0;
4810         rt2 = (insn >> 16) & 0xf;
4811     } else {
4812         crn = (insn >> 16) & 0xf;
4813         opc1 = (insn >> 21) & 7;
4814         opc2 = (insn >> 5) & 7;
4815         rt2 = 0;
4816     }
4817     isread = (insn >> 20) & 1;
4818     rt = (insn >> 12) & 0xf;
4819
4820     do_coproc_insn(s, cpnum, is64, opc1, crn, crm, opc2, isread, rt, rt2);
4821     return 0;
4822 }
4823
4824 /* Decode XScale DSP or iWMMXt insn (in the copro space, cp=0 or 1) */
4825 static void disas_xscale_insn(DisasContext *s, uint32_t insn)
4826 {
4827     int cpnum = (insn >> 8) & 0xf;
4828
4829     if (extract32(s->c15_cpar, cpnum, 1) == 0) {
4830         unallocated_encoding(s);
4831     } else if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
4832         if (disas_iwmmxt_insn(s, insn)) {
4833             unallocated_encoding(s);
4834         }
4835     } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
4836         if (disas_dsp_insn(s, insn)) {
4837             unallocated_encoding(s);
4838         }
4839     }
4840 }
4841
4842 /* Store a 64-bit value to a register pair.  Clobbers val.  */
4843 static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
4844 {
4845     TCGv_i32 tmp;
4846     tmp = tcg_temp_new_i32();
4847     tcg_gen_extrl_i64_i32(tmp, val);
4848     store_reg(s, rlow, tmp);
4849     tmp = tcg_temp_new_i32();
4850     tcg_gen_extrh_i64_i32(tmp, val);
4851     store_reg(s, rhigh, tmp);
4852 }
4853
4854 /* load and add a 64-bit value from a register pair.  */
4855 static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
4856 {
4857     TCGv_i64 tmp;
4858     TCGv_i32 tmpl;
4859     TCGv_i32 tmph;
4860
4861     /* Load 64-bit value rd:rn.  */
4862     tmpl = load_reg(s, rlow);
4863     tmph = load_reg(s, rhigh);
4864     tmp = tcg_temp_new_i64();
4865     tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
4866     tcg_temp_free_i32(tmpl);
4867     tcg_temp_free_i32(tmph);
4868     tcg_gen_add_i64(val, val, tmp);
4869     tcg_temp_free_i64(tmp);
4870 }
4871
4872 /* Set N and Z flags from hi|lo.  */
4873 static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
4874 {
4875     tcg_gen_mov_i32(cpu_NF, hi);
4876     tcg_gen_or_i32(cpu_ZF, lo, hi);
4877 }
4878
4879 /* Load/Store exclusive instructions are implemented by remembering
4880    the value/address loaded, and seeing if these are the same
4881    when the store is performed.  This should be sufficient to implement
4882    the architecturally mandated semantics, and avoids having to monitor
4883    regular stores.  The compare vs the remembered value is done during
4884    the cmpxchg operation, but we must compare the addresses manually.  */
4885 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
4886                                TCGv_i32 addr, int size)
4887 {
4888     TCGv_i32 tmp = tcg_temp_new_i32();
4889     MemOp opc = size | MO_ALIGN | s->be_data;
4890
4891     s->is_ldex = true;
4892
4893     if (size == 3) {
4894         TCGv_i32 tmp2 = tcg_temp_new_i32();
4895         TCGv_i64 t64 = tcg_temp_new_i64();
4896
4897         /* For AArch32, architecturally the 32-bit word at the lowest
4898          * address is always Rt and the one at addr+4 is Rt2, even if
4899          * the CPU is big-endian. That means we don't want to do a
4900          * gen_aa32_ld_i64(), which invokes gen_aa32_frob64() as if
4901          * for an architecturally 64-bit access, but instead do a
4902          * 64-bit access using MO_BE if appropriate and then split
4903          * the two halves.
4904          * This only makes a difference for BE32 user-mode, where
4905          * frob64() must not flip the two halves of the 64-bit data
4906          * but this code must treat BE32 user-mode like BE32 system.
4907          */
4908         TCGv taddr = gen_aa32_addr(s, addr, opc);
4909
4910         tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
4911         tcg_temp_free(taddr);
4912         tcg_gen_mov_i64(cpu_exclusive_val, t64);
4913         if (s->be_data == MO_BE) {
4914             tcg_gen_extr_i64_i32(tmp2, tmp, t64);
4915         } else {
4916             tcg_gen_extr_i64_i32(tmp, tmp2, t64);
4917         }
4918         tcg_temp_free_i64(t64);
4919
4920         store_reg(s, rt2, tmp2);
4921     } else {
4922         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
4923         tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
4924     }
4925
4926     store_reg(s, rt, tmp);
4927     tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
4928 }
4929
4930 static void gen_clrex(DisasContext *s)
4931 {
4932     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
4933 }
4934
4935 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
4936                                 TCGv_i32 addr, int size)
4937 {
4938     TCGv_i32 t0, t1, t2;
4939     TCGv_i64 extaddr;
4940     TCGv taddr;
4941     TCGLabel *done_label;
4942     TCGLabel *fail_label;
4943     MemOp opc = size | MO_ALIGN | s->be_data;
4944
4945     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
4946          [addr] = {Rt};
4947          {Rd} = 0;
4948        } else {
4949          {Rd} = 1;
4950        } */
4951     fail_label = gen_new_label();
4952     done_label = gen_new_label();
4953     extaddr = tcg_temp_new_i64();
4954     tcg_gen_extu_i32_i64(extaddr, addr);
4955     tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
4956     tcg_temp_free_i64(extaddr);
4957
4958     taddr = gen_aa32_addr(s, addr, opc);
4959     t0 = tcg_temp_new_i32();
4960     t1 = load_reg(s, rt);
4961     if (size == 3) {
4962         TCGv_i64 o64 = tcg_temp_new_i64();
4963         TCGv_i64 n64 = tcg_temp_new_i64();
4964
4965         t2 = load_reg(s, rt2);
4966         /* For AArch32, architecturally the 32-bit word at the lowest
4967          * address is always Rt and the one at addr+4 is Rt2, even if
4968          * the CPU is big-endian. Since we're going to treat this as a
4969          * single 64-bit BE store, we need to put the two halves in the
4970          * opposite order for BE to LE, so that they end up in the right
4971          * places.
4972          * We don't want gen_aa32_frob64() because that does the wrong
4973          * thing for BE32 usermode.
4974          */
4975         if (s->be_data == MO_BE) {
4976             tcg_gen_concat_i32_i64(n64, t2, t1);
4977         } else {
4978             tcg_gen_concat_i32_i64(n64, t1, t2);
4979         }
4980         tcg_temp_free_i32(t2);
4981
4982         tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
4983                                    get_mem_index(s), opc);
4984         tcg_temp_free_i64(n64);
4985
4986         tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
4987         tcg_gen_extrl_i64_i32(t0, o64);
4988
4989         tcg_temp_free_i64(o64);
4990     } else {
4991         t2 = tcg_temp_new_i32();
4992         tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
4993         tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
4994         tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
4995         tcg_temp_free_i32(t2);
4996     }
4997     tcg_temp_free_i32(t1);
4998     tcg_temp_free(taddr);
4999     tcg_gen_mov_i32(cpu_R[rd], t0);
5000     tcg_temp_free_i32(t0);
5001     tcg_gen_br(done_label);
5002
5003     gen_set_label(fail_label);
5004     tcg_gen_movi_i32(cpu_R[rd], 1);
5005     gen_set_label(done_label);
5006     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
5007 }
5008
5009 /* gen_srs:
5010  * @env: CPUARMState
5011  * @s: DisasContext
5012  * @mode: mode field from insn (which stack to store to)
5013  * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
5014  * @writeback: true if writeback bit set
5015  *
5016  * Generate code for the SRS (Store Return State) insn.
5017  */
5018 static void gen_srs(DisasContext *s,
5019                     uint32_t mode, uint32_t amode, bool writeback)
5020 {
5021     int32_t offset;
5022     TCGv_i32 addr, tmp;
5023     bool undef = false;
5024
5025     /* SRS is:
5026      * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
5027      *   and specified mode is monitor mode
5028      * - UNDEFINED in Hyp mode
5029      * - UNPREDICTABLE in User or System mode
5030      * - UNPREDICTABLE if the specified mode is:
5031      * -- not implemented
5032      * -- not a valid mode number
5033      * -- a mode that's at a higher exception level
5034      * -- Monitor, if we are Non-secure
5035      * For the UNPREDICTABLE cases we choose to UNDEF.
5036      */
5037     if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
5038         gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(), 3);
5039         return;
5040     }
5041
5042     if (s->current_el == 0 || s->current_el == 2) {
5043         undef = true;
5044     }
5045
5046     switch (mode) {
5047     case ARM_CPU_MODE_USR:
5048     case ARM_CPU_MODE_FIQ:
5049     case ARM_CPU_MODE_IRQ:
5050     case ARM_CPU_MODE_SVC:
5051     case ARM_CPU_MODE_ABT:
5052     case ARM_CPU_MODE_UND:
5053     case ARM_CPU_MODE_SYS:
5054         break;
5055     case ARM_CPU_MODE_HYP:
5056         if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
5057             undef = true;
5058         }
5059         break;
5060     case ARM_CPU_MODE_MON:
5061         /* No need to check specifically for "are we non-secure" because
5062          * we've already made EL0 UNDEF and handled the trap for S-EL1;
5063          * so if this isn't EL3 then we must be non-secure.
5064          */
5065         if (s->current_el != 3) {
5066             undef = true;
5067         }
5068         break;
5069     default:
5070         undef = true;
5071     }
5072
5073     if (undef) {
5074         unallocated_encoding(s);
5075         return;
5076     }
5077
5078     addr = tcg_temp_new_i32();
5079     tmp = tcg_const_i32(mode);
5080     /* get_r13_banked() will raise an exception if called from System mode */
5081     gen_set_condexec(s);
5082     gen_set_pc_im(s, s->pc_curr);
5083     gen_helper_get_r13_banked(addr, cpu_env, tmp);
5084     tcg_temp_free_i32(tmp);
5085     switch (amode) {
5086     case 0: /* DA */
5087         offset = -4;
5088         break;
5089     case 1: /* IA */
5090         offset = 0;
5091         break;
5092     case 2: /* DB */
5093         offset = -8;
5094         break;
5095     case 3: /* IB */
5096         offset = 4;
5097         break;
5098     default:
5099         abort();
5100     }
5101     tcg_gen_addi_i32(addr, addr, offset);
5102     tmp = load_reg(s, 14);
5103     gen_aa32_st32(s, tmp, addr, get_mem_index(s));
5104     tcg_temp_free_i32(tmp);
5105     tmp = load_cpu_field(spsr);
5106     tcg_gen_addi_i32(addr, addr, 4);
5107     gen_aa32_st32(s, tmp, addr, get_mem_index(s));
5108     tcg_temp_free_i32(tmp);
5109     if (writeback) {
5110         switch (amode) {
5111         case 0:
5112             offset = -8;
5113             break;
5114         case 1:
5115             offset = 4;
5116             break;
5117         case 2:
5118             offset = -4;
5119             break;
5120         case 3:
5121             offset = 0;
5122             break;
5123         default:
5124             abort();
5125         }
5126         tcg_gen_addi_i32(addr, addr, offset);
5127         tmp = tcg_const_i32(mode);
5128         gen_helper_set_r13_banked(cpu_env, tmp, addr);
5129         tcg_temp_free_i32(tmp);
5130     }
5131     tcg_temp_free_i32(addr);
5132     s->base.is_jmp = DISAS_UPDATE_EXIT;
5133 }
5134
5135 /* Generate a label used for skipping this instruction */
5136 static void arm_gen_condlabel(DisasContext *s)
5137 {
5138     if (!s->condjmp) {
5139         s->condlabel = gen_new_label();
5140         s->condjmp = 1;
5141     }
5142 }
5143
5144 /* Skip this instruction if the ARM condition is false */
5145 static void arm_skip_unless(DisasContext *s, uint32_t cond)
5146 {
5147     arm_gen_condlabel(s);
5148     arm_gen_test_cc(cond ^ 1, s->condlabel);
5149 }
5150
5151
5152 /*
5153  * Constant expanders for the decoders.
5154  */
5155
5156 static int negate(DisasContext *s, int x)
5157 {
5158     return -x;
5159 }
5160
5161 static int plus_2(DisasContext *s, int x)
5162 {
5163     return x + 2;
5164 }
5165
5166 static int times_2(DisasContext *s, int x)
5167 {
5168     return x * 2;
5169 }
5170
5171 static int times_4(DisasContext *s, int x)
5172 {
5173     return x * 4;
5174 }
5175
5176 /* Return only the rotation part of T32ExpandImm.  */
5177 static int t32_expandimm_rot(DisasContext *s, int x)
5178 {
5179     return x & 0xc00 ? extract32(x, 7, 5) : 0;
5180 }
5181
5182 /* Return the unrotated immediate from T32ExpandImm.  */
5183 static int t32_expandimm_imm(DisasContext *s, int x)
5184 {
5185     int imm = extract32(x, 0, 8);
5186
5187     switch (extract32(x, 8, 4)) {
5188     case 0: /* XY */
5189         /* Nothing to do.  */
5190         break;
5191     case 1: /* 00XY00XY */
5192         imm *= 0x00010001;
5193         break;
5194     case 2: /* XY00XY00 */
5195         imm *= 0x01000100;
5196         break;
5197     case 3: /* XYXYXYXY */
5198         imm *= 0x01010101;
5199         break;
5200     default:
5201         /* Rotated constant.  */
5202         imm |= 0x80;
5203         break;
5204     }
5205     return imm;
5206 }
5207
5208 static int t32_branch24(DisasContext *s, int x)
5209 {
5210     /* Convert J1:J2 at x[22:21] to I2:I1, which involves I=J^~S.  */
5211     x ^= !(x < 0) * (3 << 21);
5212     /* Append the final zero.  */
5213     return x << 1;
5214 }
5215
5216 static int t16_setflags(DisasContext *s)
5217 {
5218     return s->condexec_mask == 0;
5219 }
5220
5221 static int t16_push_list(DisasContext *s, int x)
5222 {
5223     return (x & 0xff) | (x & 0x100) << (14 - 8);
5224 }
5225
5226 static int t16_pop_list(DisasContext *s, int x)
5227 {
5228     return (x & 0xff) | (x & 0x100) << (15 - 8);
5229 }
5230
5231 /*
5232  * Include the generated decoders.
5233  */
5234
5235 #include "decode-a32.c.inc"
5236 #include "decode-a32-uncond.c.inc"
5237 #include "decode-t32.c.inc"
5238 #include "decode-t16.c.inc"
5239
5240 static bool valid_cp(DisasContext *s, int cp)
5241 {
5242     /*
5243      * Return true if this coprocessor field indicates something
5244      * that's really a possible coprocessor.
5245      * For v7 and earlier, coprocessors 8..15 were reserved for Arm use,
5246      * and of those only cp14 and cp15 were used for registers.
5247      * cp10 and cp11 were used for VFP and Neon, whose decode is
5248      * dealt with elsewhere. With the advent of fp16, cp9 is also
5249      * now part of VFP.
5250      * For v8A and later, the encoding has been tightened so that
5251      * only cp14 and cp15 are valid, and other values aren't considered
5252      * to be in the coprocessor-instruction space at all. v8M still
5253      * permits coprocessors 0..7.
5254      */
5255     if (arm_dc_feature(s, ARM_FEATURE_V8) &&
5256         !arm_dc_feature(s, ARM_FEATURE_M)) {
5257         return cp >= 14;
5258     }
5259     return cp < 8 || cp >= 14;
5260 }
5261
5262 static bool trans_MCR(DisasContext *s, arg_MCR *a)
5263 {
5264     if (!valid_cp(s, a->cp)) {
5265         return false;
5266     }
5267     do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5268                    false, a->rt, 0);
5269     return true;
5270 }
5271
5272 static bool trans_MRC(DisasContext *s, arg_MRC *a)
5273 {
5274     if (!valid_cp(s, a->cp)) {
5275         return false;
5276     }
5277     do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5278                    true, a->rt, 0);
5279     return true;
5280 }
5281
5282 static bool trans_MCRR(DisasContext *s, arg_MCRR *a)
5283 {
5284     if (!valid_cp(s, a->cp)) {
5285         return false;
5286     }
5287     do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5288                    false, a->rt, a->rt2);
5289     return true;
5290 }
5291
5292 static bool trans_MRRC(DisasContext *s, arg_MRRC *a)
5293 {
5294     if (!valid_cp(s, a->cp)) {
5295         return false;
5296     }
5297     do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5298                    true, a->rt, a->rt2);
5299     return true;
5300 }
5301
5302 /* Helpers to swap operands for reverse-subtract.  */
5303 static void gen_rsb(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5304 {
5305     tcg_gen_sub_i32(dst, b, a);
5306 }
5307
5308 static void gen_rsb_CC(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5309 {
5310     gen_sub_CC(dst, b, a);
5311 }
5312
5313 static void gen_rsc(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5314 {
5315     gen_sub_carry(dest, b, a);
5316 }
5317
5318 static void gen_rsc_CC(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5319 {
5320     gen_sbc_CC(dest, b, a);
5321 }
5322
5323 /*
5324  * Helpers for the data processing routines.
5325  *
5326  * After the computation store the results back.
5327  * This may be suppressed altogether (STREG_NONE), require a runtime
5328  * check against the stack limits (STREG_SP_CHECK), or generate an
5329  * exception return.  Oh, or store into a register.
5330  *
5331  * Always return true, indicating success for a trans_* function.
5332  */
5333 typedef enum {
5334    STREG_NONE,
5335    STREG_NORMAL,
5336    STREG_SP_CHECK,
5337    STREG_EXC_RET,
5338 } StoreRegKind;
5339
5340 static bool store_reg_kind(DisasContext *s, int rd,
5341                             TCGv_i32 val, StoreRegKind kind)
5342 {
5343     switch (kind) {
5344     case STREG_NONE:
5345         tcg_temp_free_i32(val);
5346         return true;
5347     case STREG_NORMAL:
5348         /* See ALUWritePC: Interworking only from a32 mode. */
5349         if (s->thumb) {
5350             store_reg(s, rd, val);
5351         } else {
5352             store_reg_bx(s, rd, val);
5353         }
5354         return true;
5355     case STREG_SP_CHECK:
5356         store_sp_checked(s, val);
5357         return true;
5358     case STREG_EXC_RET:
5359         gen_exception_return(s, val);
5360         return true;
5361     }
5362     g_assert_not_reached();
5363 }
5364
5365 /*
5366  * Data Processing (register)
5367  *
5368  * Operate, with set flags, one register source,
5369  * one immediate shifted register source, and a destination.
5370  */
5371 static bool op_s_rrr_shi(DisasContext *s, arg_s_rrr_shi *a,
5372                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5373                          int logic_cc, StoreRegKind kind)
5374 {
5375     TCGv_i32 tmp1, tmp2;
5376
5377     tmp2 = load_reg(s, a->rm);
5378     gen_arm_shift_im(tmp2, a->shty, a->shim, logic_cc);
5379     tmp1 = load_reg(s, a->rn);
5380
5381     gen(tmp1, tmp1, tmp2);
5382     tcg_temp_free_i32(tmp2);
5383
5384     if (logic_cc) {
5385         gen_logic_CC(tmp1);
5386     }
5387     return store_reg_kind(s, a->rd, tmp1, kind);
5388 }
5389
5390 static bool op_s_rxr_shi(DisasContext *s, arg_s_rrr_shi *a,
5391                          void (*gen)(TCGv_i32, TCGv_i32),
5392                          int logic_cc, StoreRegKind kind)
5393 {
5394     TCGv_i32 tmp;
5395
5396     tmp = load_reg(s, a->rm);
5397     gen_arm_shift_im(tmp, a->shty, a->shim, logic_cc);
5398
5399     gen(tmp, tmp);
5400     if (logic_cc) {
5401         gen_logic_CC(tmp);
5402     }
5403     return store_reg_kind(s, a->rd, tmp, kind);
5404 }
5405
5406 /*
5407  * Data-processing (register-shifted register)
5408  *
5409  * Operate, with set flags, one register source,
5410  * one register shifted register source, and a destination.
5411  */
5412 static bool op_s_rrr_shr(DisasContext *s, arg_s_rrr_shr *a,
5413                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5414                          int logic_cc, StoreRegKind kind)
5415 {
5416     TCGv_i32 tmp1, tmp2;
5417
5418     tmp1 = load_reg(s, a->rs);
5419     tmp2 = load_reg(s, a->rm);
5420     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5421     tmp1 = load_reg(s, a->rn);
5422
5423     gen(tmp1, tmp1, tmp2);
5424     tcg_temp_free_i32(tmp2);
5425
5426     if (logic_cc) {
5427         gen_logic_CC(tmp1);
5428     }
5429     return store_reg_kind(s, a->rd, tmp1, kind);
5430 }
5431
5432 static bool op_s_rxr_shr(DisasContext *s, arg_s_rrr_shr *a,
5433                          void (*gen)(TCGv_i32, TCGv_i32),
5434                          int logic_cc, StoreRegKind kind)
5435 {
5436     TCGv_i32 tmp1, tmp2;
5437
5438     tmp1 = load_reg(s, a->rs);
5439     tmp2 = load_reg(s, a->rm);
5440     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5441
5442     gen(tmp2, tmp2);
5443     if (logic_cc) {
5444         gen_logic_CC(tmp2);
5445     }
5446     return store_reg_kind(s, a->rd, tmp2, kind);
5447 }
5448
5449 /*
5450  * Data-processing (immediate)
5451  *
5452  * Operate, with set flags, one register source,
5453  * one rotated immediate, and a destination.
5454  *
5455  * Note that logic_cc && a->rot setting CF based on the msb of the
5456  * immediate is the reason why we must pass in the unrotated form
5457  * of the immediate.
5458  */
5459 static bool op_s_rri_rot(DisasContext *s, arg_s_rri_rot *a,
5460                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5461                          int logic_cc, StoreRegKind kind)
5462 {
5463     TCGv_i32 tmp1, tmp2;
5464     uint32_t imm;
5465
5466     imm = ror32(a->imm, a->rot);
5467     if (logic_cc && a->rot) {
5468         tcg_gen_movi_i32(cpu_CF, imm >> 31);
5469     }
5470     tmp2 = tcg_const_i32(imm);
5471     tmp1 = load_reg(s, a->rn);
5472
5473     gen(tmp1, tmp1, tmp2);
5474     tcg_temp_free_i32(tmp2);
5475
5476     if (logic_cc) {
5477         gen_logic_CC(tmp1);
5478     }
5479     return store_reg_kind(s, a->rd, tmp1, kind);
5480 }
5481
5482 static bool op_s_rxi_rot(DisasContext *s, arg_s_rri_rot *a,
5483                          void (*gen)(TCGv_i32, TCGv_i32),
5484                          int logic_cc, StoreRegKind kind)
5485 {
5486     TCGv_i32 tmp;
5487     uint32_t imm;
5488
5489     imm = ror32(a->imm, a->rot);
5490     if (logic_cc && a->rot) {
5491         tcg_gen_movi_i32(cpu_CF, imm >> 31);
5492     }
5493     tmp = tcg_const_i32(imm);
5494
5495     gen(tmp, tmp);
5496     if (logic_cc) {
5497         gen_logic_CC(tmp);
5498     }
5499     return store_reg_kind(s, a->rd, tmp, kind);
5500 }
5501
5502 #define DO_ANY3(NAME, OP, L, K)                                         \
5503     static bool trans_##NAME##_rrri(DisasContext *s, arg_s_rrr_shi *a)  \
5504     { StoreRegKind k = (K); return op_s_rrr_shi(s, a, OP, L, k); }      \
5505     static bool trans_##NAME##_rrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5506     { StoreRegKind k = (K); return op_s_rrr_shr(s, a, OP, L, k); }      \
5507     static bool trans_##NAME##_rri(DisasContext *s, arg_s_rri_rot *a)   \
5508     { StoreRegKind k = (K); return op_s_rri_rot(s, a, OP, L, k); }
5509
5510 #define DO_ANY2(NAME, OP, L, K)                                         \
5511     static bool trans_##NAME##_rxri(DisasContext *s, arg_s_rrr_shi *a)  \
5512     { StoreRegKind k = (K); return op_s_rxr_shi(s, a, OP, L, k); }      \
5513     static bool trans_##NAME##_rxrr(DisasContext *s, arg_s_rrr_shr *a)  \
5514     { StoreRegKind k = (K); return op_s_rxr_shr(s, a, OP, L, k); }      \
5515     static bool trans_##NAME##_rxi(DisasContext *s, arg_s_rri_rot *a)   \
5516     { StoreRegKind k = (K); return op_s_rxi_rot(s, a, OP, L, k); }
5517
5518 #define DO_CMP2(NAME, OP, L)                                            \
5519     static bool trans_##NAME##_xrri(DisasContext *s, arg_s_rrr_shi *a)  \
5520     { return op_s_rrr_shi(s, a, OP, L, STREG_NONE); }                   \
5521     static bool trans_##NAME##_xrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5522     { return op_s_rrr_shr(s, a, OP, L, STREG_NONE); }                   \
5523     static bool trans_##NAME##_xri(DisasContext *s, arg_s_rri_rot *a)   \
5524     { return op_s_rri_rot(s, a, OP, L, STREG_NONE); }
5525
5526 DO_ANY3(AND, tcg_gen_and_i32, a->s, STREG_NORMAL)
5527 DO_ANY3(EOR, tcg_gen_xor_i32, a->s, STREG_NORMAL)
5528 DO_ANY3(ORR, tcg_gen_or_i32, a->s, STREG_NORMAL)
5529 DO_ANY3(BIC, tcg_gen_andc_i32, a->s, STREG_NORMAL)
5530
5531 DO_ANY3(RSB, a->s ? gen_rsb_CC : gen_rsb, false, STREG_NORMAL)
5532 DO_ANY3(ADC, a->s ? gen_adc_CC : gen_add_carry, false, STREG_NORMAL)
5533 DO_ANY3(SBC, a->s ? gen_sbc_CC : gen_sub_carry, false, STREG_NORMAL)
5534 DO_ANY3(RSC, a->s ? gen_rsc_CC : gen_rsc, false, STREG_NORMAL)
5535
5536 DO_CMP2(TST, tcg_gen_and_i32, true)
5537 DO_CMP2(TEQ, tcg_gen_xor_i32, true)
5538 DO_CMP2(CMN, gen_add_CC, false)
5539 DO_CMP2(CMP, gen_sub_CC, false)
5540
5541 DO_ANY3(ADD, a->s ? gen_add_CC : tcg_gen_add_i32, false,
5542         a->rd == 13 && a->rn == 13 ? STREG_SP_CHECK : STREG_NORMAL)
5543
5544 /*
5545  * Note for the computation of StoreRegKind we return out of the
5546  * middle of the functions that are expanded by DO_ANY3, and that
5547  * we modify a->s via that parameter before it is used by OP.
5548  */
5549 DO_ANY3(SUB, a->s ? gen_sub_CC : tcg_gen_sub_i32, false,
5550         ({
5551             StoreRegKind ret = STREG_NORMAL;
5552             if (a->rd == 15 && a->s) {
5553                 /*
5554                  * See ALUExceptionReturn:
5555                  * In User mode, UNPREDICTABLE; we choose UNDEF.
5556                  * In Hyp mode, UNDEFINED.
5557                  */
5558                 if (IS_USER(s) || s->current_el == 2) {
5559                     unallocated_encoding(s);
5560                     return true;
5561                 }
5562                 /* There is no writeback of nzcv to PSTATE.  */
5563                 a->s = 0;
5564                 ret = STREG_EXC_RET;
5565             } else if (a->rd == 13 && a->rn == 13) {
5566                 ret = STREG_SP_CHECK;
5567             }
5568             ret;
5569         }))
5570
5571 DO_ANY2(MOV, tcg_gen_mov_i32, a->s,
5572         ({
5573             StoreRegKind ret = STREG_NORMAL;
5574             if (a->rd == 15 && a->s) {
5575                 /*
5576                  * See ALUExceptionReturn:
5577                  * In User mode, UNPREDICTABLE; we choose UNDEF.
5578                  * In Hyp mode, UNDEFINED.
5579                  */
5580                 if (IS_USER(s) || s->current_el == 2) {
5581                     unallocated_encoding(s);
5582                     return true;
5583                 }
5584                 /* There is no writeback of nzcv to PSTATE.  */
5585                 a->s = 0;
5586                 ret = STREG_EXC_RET;
5587             } else if (a->rd == 13) {
5588                 ret = STREG_SP_CHECK;
5589             }
5590             ret;
5591         }))
5592
5593 DO_ANY2(MVN, tcg_gen_not_i32, a->s, STREG_NORMAL)
5594
5595 /*
5596  * ORN is only available with T32, so there is no register-shifted-register
5597  * form of the insn.  Using the DO_ANY3 macro would create an unused function.
5598  */
5599 static bool trans_ORN_rrri(DisasContext *s, arg_s_rrr_shi *a)
5600 {
5601     return op_s_rrr_shi(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5602 }
5603
5604 static bool trans_ORN_rri(DisasContext *s, arg_s_rri_rot *a)
5605 {
5606     return op_s_rri_rot(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5607 }
5608
5609 #undef DO_ANY3
5610 #undef DO_ANY2
5611 #undef DO_CMP2
5612
5613 static bool trans_ADR(DisasContext *s, arg_ri *a)
5614 {
5615     store_reg_bx(s, a->rd, add_reg_for_lit(s, 15, a->imm));
5616     return true;
5617 }
5618
5619 static bool trans_MOVW(DisasContext *s, arg_MOVW *a)
5620 {
5621     TCGv_i32 tmp;
5622
5623     if (!ENABLE_ARCH_6T2) {
5624         return false;
5625     }
5626
5627     tmp = tcg_const_i32(a->imm);
5628     store_reg(s, a->rd, tmp);
5629     return true;
5630 }
5631
5632 static bool trans_MOVT(DisasContext *s, arg_MOVW *a)
5633 {
5634     TCGv_i32 tmp;
5635
5636     if (!ENABLE_ARCH_6T2) {
5637         return false;
5638     }
5639
5640     tmp = load_reg(s, a->rd);
5641     tcg_gen_ext16u_i32(tmp, tmp);
5642     tcg_gen_ori_i32(tmp, tmp, a->imm << 16);
5643     store_reg(s, a->rd, tmp);
5644     return true;
5645 }
5646
5647 /*
5648  * Multiply and multiply accumulate
5649  */
5650
5651 static bool op_mla(DisasContext *s, arg_s_rrrr *a, bool add)
5652 {
5653     TCGv_i32 t1, t2;
5654
5655     t1 = load_reg(s, a->rn);
5656     t2 = load_reg(s, a->rm);
5657     tcg_gen_mul_i32(t1, t1, t2);
5658     tcg_temp_free_i32(t2);
5659     if (add) {
5660         t2 = load_reg(s, a->ra);
5661         tcg_gen_add_i32(t1, t1, t2);
5662         tcg_temp_free_i32(t2);
5663     }
5664     if (a->s) {
5665         gen_logic_CC(t1);
5666     }
5667     store_reg(s, a->rd, t1);
5668     return true;
5669 }
5670
5671 static bool trans_MUL(DisasContext *s, arg_MUL *a)
5672 {
5673     return op_mla(s, a, false);
5674 }
5675
5676 static bool trans_MLA(DisasContext *s, arg_MLA *a)
5677 {
5678     return op_mla(s, a, true);
5679 }
5680
5681 static bool trans_MLS(DisasContext *s, arg_MLS *a)
5682 {
5683     TCGv_i32 t1, t2;
5684
5685     if (!ENABLE_ARCH_6T2) {
5686         return false;
5687     }
5688     t1 = load_reg(s, a->rn);
5689     t2 = load_reg(s, a->rm);
5690     tcg_gen_mul_i32(t1, t1, t2);
5691     tcg_temp_free_i32(t2);
5692     t2 = load_reg(s, a->ra);
5693     tcg_gen_sub_i32(t1, t2, t1);
5694     tcg_temp_free_i32(t2);
5695     store_reg(s, a->rd, t1);
5696     return true;
5697 }
5698
5699 static bool op_mlal(DisasContext *s, arg_s_rrrr *a, bool uns, bool add)
5700 {
5701     TCGv_i32 t0, t1, t2, t3;
5702
5703     t0 = load_reg(s, a->rm);
5704     t1 = load_reg(s, a->rn);
5705     if (uns) {
5706         tcg_gen_mulu2_i32(t0, t1, t0, t1);
5707     } else {
5708         tcg_gen_muls2_i32(t0, t1, t0, t1);
5709     }
5710     if (add) {
5711         t2 = load_reg(s, a->ra);
5712         t3 = load_reg(s, a->rd);
5713         tcg_gen_add2_i32(t0, t1, t0, t1, t2, t3);
5714         tcg_temp_free_i32(t2);
5715         tcg_temp_free_i32(t3);
5716     }
5717     if (a->s) {
5718         gen_logicq_cc(t0, t1);
5719     }
5720     store_reg(s, a->ra, t0);
5721     store_reg(s, a->rd, t1);
5722     return true;
5723 }
5724
5725 static bool trans_UMULL(DisasContext *s, arg_UMULL *a)
5726 {
5727     return op_mlal(s, a, true, false);
5728 }
5729
5730 static bool trans_SMULL(DisasContext *s, arg_SMULL *a)
5731 {
5732     return op_mlal(s, a, false, false);
5733 }
5734
5735 static bool trans_UMLAL(DisasContext *s, arg_UMLAL *a)
5736 {
5737     return op_mlal(s, a, true, true);
5738 }
5739
5740 static bool trans_SMLAL(DisasContext *s, arg_SMLAL *a)
5741 {
5742     return op_mlal(s, a, false, true);
5743 }
5744
5745 static bool trans_UMAAL(DisasContext *s, arg_UMAAL *a)
5746 {
5747     TCGv_i32 t0, t1, t2, zero;
5748
5749     if (s->thumb
5750         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5751         : !ENABLE_ARCH_6) {
5752         return false;
5753     }
5754
5755     t0 = load_reg(s, a->rm);
5756     t1 = load_reg(s, a->rn);
5757     tcg_gen_mulu2_i32(t0, t1, t0, t1);
5758     zero = tcg_const_i32(0);
5759     t2 = load_reg(s, a->ra);
5760     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
5761     tcg_temp_free_i32(t2);
5762     t2 = load_reg(s, a->rd);
5763     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
5764     tcg_temp_free_i32(t2);
5765     tcg_temp_free_i32(zero);
5766     store_reg(s, a->ra, t0);
5767     store_reg(s, a->rd, t1);
5768     return true;
5769 }
5770
5771 /*
5772  * Saturating addition and subtraction
5773  */
5774
5775 static bool op_qaddsub(DisasContext *s, arg_rrr *a, bool add, bool doub)
5776 {
5777     TCGv_i32 t0, t1;
5778
5779     if (s->thumb
5780         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5781         : !ENABLE_ARCH_5TE) {
5782         return false;
5783     }
5784
5785     t0 = load_reg(s, a->rm);
5786     t1 = load_reg(s, a->rn);
5787     if (doub) {
5788         gen_helper_add_saturate(t1, cpu_env, t1, t1);
5789     }
5790     if (add) {
5791         gen_helper_add_saturate(t0, cpu_env, t0, t1);
5792     } else {
5793         gen_helper_sub_saturate(t0, cpu_env, t0, t1);
5794     }
5795     tcg_temp_free_i32(t1);
5796     store_reg(s, a->rd, t0);
5797     return true;
5798 }
5799
5800 #define DO_QADDSUB(NAME, ADD, DOUB) \
5801 static bool trans_##NAME(DisasContext *s, arg_rrr *a)    \
5802 {                                                        \
5803     return op_qaddsub(s, a, ADD, DOUB);                  \
5804 }
5805
5806 DO_QADDSUB(QADD, true, false)
5807 DO_QADDSUB(QSUB, false, false)
5808 DO_QADDSUB(QDADD, true, true)
5809 DO_QADDSUB(QDSUB, false, true)
5810
5811 #undef DO_QADDSUB
5812
5813 /*
5814  * Halfword multiply and multiply accumulate
5815  */
5816
5817 static bool op_smlaxxx(DisasContext *s, arg_rrrr *a,
5818                        int add_long, bool nt, bool mt)
5819 {
5820     TCGv_i32 t0, t1, tl, th;
5821
5822     if (s->thumb
5823         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5824         : !ENABLE_ARCH_5TE) {
5825         return false;
5826     }
5827
5828     t0 = load_reg(s, a->rn);
5829     t1 = load_reg(s, a->rm);
5830     gen_mulxy(t0, t1, nt, mt);
5831     tcg_temp_free_i32(t1);
5832
5833     switch (add_long) {
5834     case 0:
5835         store_reg(s, a->rd, t0);
5836         break;
5837     case 1:
5838         t1 = load_reg(s, a->ra);
5839         gen_helper_add_setq(t0, cpu_env, t0, t1);
5840         tcg_temp_free_i32(t1);
5841         store_reg(s, a->rd, t0);
5842         break;
5843     case 2:
5844         tl = load_reg(s, a->ra);
5845         th = load_reg(s, a->rd);
5846         /* Sign-extend the 32-bit product to 64 bits.  */
5847         t1 = tcg_temp_new_i32();
5848         tcg_gen_sari_i32(t1, t0, 31);
5849         tcg_gen_add2_i32(tl, th, tl, th, t0, t1);
5850         tcg_temp_free_i32(t0);
5851         tcg_temp_free_i32(t1);
5852         store_reg(s, a->ra, tl);
5853         store_reg(s, a->rd, th);
5854         break;
5855     default:
5856         g_assert_not_reached();
5857     }
5858     return true;
5859 }
5860
5861 #define DO_SMLAX(NAME, add, nt, mt) \
5862 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
5863 {                                                          \
5864     return op_smlaxxx(s, a, add, nt, mt);                  \
5865 }
5866
5867 DO_SMLAX(SMULBB, 0, 0, 0)
5868 DO_SMLAX(SMULBT, 0, 0, 1)
5869 DO_SMLAX(SMULTB, 0, 1, 0)
5870 DO_SMLAX(SMULTT, 0, 1, 1)
5871
5872 DO_SMLAX(SMLABB, 1, 0, 0)
5873 DO_SMLAX(SMLABT, 1, 0, 1)
5874 DO_SMLAX(SMLATB, 1, 1, 0)
5875 DO_SMLAX(SMLATT, 1, 1, 1)
5876
5877 DO_SMLAX(SMLALBB, 2, 0, 0)
5878 DO_SMLAX(SMLALBT, 2, 0, 1)
5879 DO_SMLAX(SMLALTB, 2, 1, 0)
5880 DO_SMLAX(SMLALTT, 2, 1, 1)
5881
5882 #undef DO_SMLAX
5883
5884 static bool op_smlawx(DisasContext *s, arg_rrrr *a, bool add, bool mt)
5885 {
5886     TCGv_i32 t0, t1;
5887
5888     if (!ENABLE_ARCH_5TE) {
5889         return false;
5890     }
5891
5892     t0 = load_reg(s, a->rn);
5893     t1 = load_reg(s, a->rm);
5894     /*
5895      * Since the nominal result is product<47:16>, shift the 16-bit
5896      * input up by 16 bits, so that the result is at product<63:32>.
5897      */
5898     if (mt) {
5899         tcg_gen_andi_i32(t1, t1, 0xffff0000);
5900     } else {
5901         tcg_gen_shli_i32(t1, t1, 16);
5902     }
5903     tcg_gen_muls2_i32(t0, t1, t0, t1);
5904     tcg_temp_free_i32(t0);
5905     if (add) {
5906         t0 = load_reg(s, a->ra);
5907         gen_helper_add_setq(t1, cpu_env, t1, t0);
5908         tcg_temp_free_i32(t0);
5909     }
5910     store_reg(s, a->rd, t1);
5911     return true;
5912 }
5913
5914 #define DO_SMLAWX(NAME, add, mt) \
5915 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
5916 {                                                          \
5917     return op_smlawx(s, a, add, mt);                       \
5918 }
5919
5920 DO_SMLAWX(SMULWB, 0, 0)
5921 DO_SMLAWX(SMULWT, 0, 1)
5922 DO_SMLAWX(SMLAWB, 1, 0)
5923 DO_SMLAWX(SMLAWT, 1, 1)
5924
5925 #undef DO_SMLAWX
5926
5927 /*
5928  * MSR (immediate) and hints
5929  */
5930
5931 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
5932 {
5933     /*
5934      * When running single-threaded TCG code, use the helper to ensure that
5935      * the next round-robin scheduled vCPU gets a crack.  When running in
5936      * MTTCG we don't generate jumps to the helper as it won't affect the
5937      * scheduling of other vCPUs.
5938      */
5939     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
5940         gen_set_pc_im(s, s->base.pc_next);
5941         s->base.is_jmp = DISAS_YIELD;
5942     }
5943     return true;
5944 }
5945
5946 static bool trans_WFE(DisasContext *s, arg_WFE *a)
5947 {
5948     /*
5949      * When running single-threaded TCG code, use the helper to ensure that
5950      * the next round-robin scheduled vCPU gets a crack.  In MTTCG mode we
5951      * just skip this instruction.  Currently the SEV/SEVL instructions,
5952      * which are *one* of many ways to wake the CPU from WFE, are not
5953      * implemented so we can't sleep like WFI does.
5954      */
5955     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
5956         gen_set_pc_im(s, s->base.pc_next);
5957         s->base.is_jmp = DISAS_WFE;
5958     }
5959     return true;
5960 }
5961
5962 static bool trans_WFI(DisasContext *s, arg_WFI *a)
5963 {
5964     /* For WFI, halt the vCPU until an IRQ. */
5965     gen_set_pc_im(s, s->base.pc_next);
5966     s->base.is_jmp = DISAS_WFI;
5967     return true;
5968 }
5969
5970 static bool trans_NOP(DisasContext *s, arg_NOP *a)
5971 {
5972     return true;
5973 }
5974
5975 static bool trans_MSR_imm(DisasContext *s, arg_MSR_imm *a)
5976 {
5977     uint32_t val = ror32(a->imm, a->rot * 2);
5978     uint32_t mask = msr_mask(s, a->mask, a->r);
5979
5980     if (gen_set_psr_im(s, mask, a->r, val)) {
5981         unallocated_encoding(s);
5982     }
5983     return true;
5984 }
5985
5986 /*
5987  * Cyclic Redundancy Check
5988  */
5989
5990 static bool op_crc32(DisasContext *s, arg_rrr *a, bool c, MemOp sz)
5991 {
5992     TCGv_i32 t1, t2, t3;
5993
5994     if (!dc_isar_feature(aa32_crc32, s)) {
5995         return false;
5996     }
5997
5998     t1 = load_reg(s, a->rn);
5999     t2 = load_reg(s, a->rm);
6000     switch (sz) {
6001     case MO_8:
6002         gen_uxtb(t2);
6003         break;
6004     case MO_16:
6005         gen_uxth(t2);
6006         break;
6007     case MO_32:
6008         break;
6009     default:
6010         g_assert_not_reached();
6011     }
6012     t3 = tcg_const_i32(1 << sz);
6013     if (c) {
6014         gen_helper_crc32c(t1, t1, t2, t3);
6015     } else {
6016         gen_helper_crc32(t1, t1, t2, t3);
6017     }
6018     tcg_temp_free_i32(t2);
6019     tcg_temp_free_i32(t3);
6020     store_reg(s, a->rd, t1);
6021     return true;
6022 }
6023
6024 #define DO_CRC32(NAME, c, sz) \
6025 static bool trans_##NAME(DisasContext *s, arg_rrr *a)  \
6026     { return op_crc32(s, a, c, sz); }
6027
6028 DO_CRC32(CRC32B, false, MO_8)
6029 DO_CRC32(CRC32H, false, MO_16)
6030 DO_CRC32(CRC32W, false, MO_32)
6031 DO_CRC32(CRC32CB, true, MO_8)
6032 DO_CRC32(CRC32CH, true, MO_16)
6033 DO_CRC32(CRC32CW, true, MO_32)
6034
6035 #undef DO_CRC32
6036
6037 /*
6038  * Miscellaneous instructions
6039  */
6040
6041 static bool trans_MRS_bank(DisasContext *s, arg_MRS_bank *a)
6042 {
6043     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6044         return false;
6045     }
6046     gen_mrs_banked(s, a->r, a->sysm, a->rd);
6047     return true;
6048 }
6049
6050 static bool trans_MSR_bank(DisasContext *s, arg_MSR_bank *a)
6051 {
6052     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6053         return false;
6054     }
6055     gen_msr_banked(s, a->r, a->sysm, a->rn);
6056     return true;
6057 }
6058
6059 static bool trans_MRS_reg(DisasContext *s, arg_MRS_reg *a)
6060 {
6061     TCGv_i32 tmp;
6062
6063     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6064         return false;
6065     }
6066     if (a->r) {
6067         if (IS_USER(s)) {
6068             unallocated_encoding(s);
6069             return true;
6070         }
6071         tmp = load_cpu_field(spsr);
6072     } else {
6073         tmp = tcg_temp_new_i32();
6074         gen_helper_cpsr_read(tmp, cpu_env);
6075     }
6076     store_reg(s, a->rd, tmp);
6077     return true;
6078 }
6079
6080 static bool trans_MSR_reg(DisasContext *s, arg_MSR_reg *a)
6081 {
6082     TCGv_i32 tmp;
6083     uint32_t mask = msr_mask(s, a->mask, a->r);
6084
6085     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6086         return false;
6087     }
6088     tmp = load_reg(s, a->rn);
6089     if (gen_set_psr(s, mask, a->r, tmp)) {
6090         unallocated_encoding(s);
6091     }
6092     return true;
6093 }
6094
6095 static bool trans_MRS_v7m(DisasContext *s, arg_MRS_v7m *a)
6096 {
6097     TCGv_i32 tmp;
6098
6099     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6100         return false;
6101     }
6102     tmp = tcg_const_i32(a->sysm);
6103     gen_helper_v7m_mrs(tmp, cpu_env, tmp);
6104     store_reg(s, a->rd, tmp);
6105     return true;
6106 }
6107
6108 static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
6109 {
6110     TCGv_i32 addr, reg;
6111
6112     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6113         return false;
6114     }
6115     addr = tcg_const_i32((a->mask << 10) | a->sysm);
6116     reg = load_reg(s, a->rn);
6117     gen_helper_v7m_msr(cpu_env, addr, reg);
6118     tcg_temp_free_i32(addr);
6119     tcg_temp_free_i32(reg);
6120     /* If we wrote to CONTROL, the EL might have changed */
6121     gen_helper_rebuild_hflags_m32_newel(cpu_env);
6122     gen_lookup_tb(s);
6123     return true;
6124 }
6125
6126 static bool trans_BX(DisasContext *s, arg_BX *a)
6127 {
6128     if (!ENABLE_ARCH_4T) {
6129         return false;
6130     }
6131     gen_bx_excret(s, load_reg(s, a->rm));
6132     return true;
6133 }
6134
6135 static bool trans_BXJ(DisasContext *s, arg_BXJ *a)
6136 {
6137     if (!ENABLE_ARCH_5J || arm_dc_feature(s, ARM_FEATURE_M)) {
6138         return false;
6139     }
6140     /* Trivial implementation equivalent to bx.  */
6141     gen_bx(s, load_reg(s, a->rm));
6142     return true;
6143 }
6144
6145 static bool trans_BLX_r(DisasContext *s, arg_BLX_r *a)
6146 {
6147     TCGv_i32 tmp;
6148
6149     if (!ENABLE_ARCH_5) {
6150         return false;
6151     }
6152     tmp = load_reg(s, a->rm);
6153     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
6154     gen_bx(s, tmp);
6155     return true;
6156 }
6157
6158 /*
6159  * BXNS/BLXNS: only exist for v8M with the security extensions,
6160  * and always UNDEF if NonSecure.  We don't implement these in
6161  * the user-only mode either (in theory you can use them from
6162  * Secure User mode but they are too tied in to system emulation).
6163  */
6164 static bool trans_BXNS(DisasContext *s, arg_BXNS *a)
6165 {
6166     if (!s->v8m_secure || IS_USER_ONLY) {
6167         unallocated_encoding(s);
6168     } else {
6169         gen_bxns(s, a->rm);
6170     }
6171     return true;
6172 }
6173
6174 static bool trans_BLXNS(DisasContext *s, arg_BLXNS *a)
6175 {
6176     if (!s->v8m_secure || IS_USER_ONLY) {
6177         unallocated_encoding(s);
6178     } else {
6179         gen_blxns(s, a->rm);
6180     }
6181     return true;
6182 }
6183
6184 static bool trans_CLZ(DisasContext *s, arg_CLZ *a)
6185 {
6186     TCGv_i32 tmp;
6187
6188     if (!ENABLE_ARCH_5) {
6189         return false;
6190     }
6191     tmp = load_reg(s, a->rm);
6192     tcg_gen_clzi_i32(tmp, tmp, 32);
6193     store_reg(s, a->rd, tmp);
6194     return true;
6195 }
6196
6197 static bool trans_ERET(DisasContext *s, arg_ERET *a)
6198 {
6199     TCGv_i32 tmp;
6200
6201     if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
6202         return false;
6203     }
6204     if (IS_USER(s)) {
6205         unallocated_encoding(s);
6206         return true;
6207     }
6208     if (s->current_el == 2) {
6209         /* ERET from Hyp uses ELR_Hyp, not LR */
6210         tmp = load_cpu_field(elr_el[2]);
6211     } else {
6212         tmp = load_reg(s, 14);
6213     }
6214     gen_exception_return(s, tmp);
6215     return true;
6216 }
6217
6218 static bool trans_HLT(DisasContext *s, arg_HLT *a)
6219 {
6220     gen_hlt(s, a->imm);
6221     return true;
6222 }
6223
6224 static bool trans_BKPT(DisasContext *s, arg_BKPT *a)
6225 {
6226     if (!ENABLE_ARCH_5) {
6227         return false;
6228     }
6229     if (arm_dc_feature(s, ARM_FEATURE_M) &&
6230         semihosting_enabled() &&
6231 #ifndef CONFIG_USER_ONLY
6232         !IS_USER(s) &&
6233 #endif
6234         (a->imm == 0xab)) {
6235         gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
6236     } else {
6237         gen_exception_bkpt_insn(s, syn_aa32_bkpt(a->imm, false));
6238     }
6239     return true;
6240 }
6241
6242 static bool trans_HVC(DisasContext *s, arg_HVC *a)
6243 {
6244     if (!ENABLE_ARCH_7 || arm_dc_feature(s, ARM_FEATURE_M)) {
6245         return false;
6246     }
6247     if (IS_USER(s)) {
6248         unallocated_encoding(s);
6249     } else {
6250         gen_hvc(s, a->imm);
6251     }
6252     return true;
6253 }
6254
6255 static bool trans_SMC(DisasContext *s, arg_SMC *a)
6256 {
6257     if (!ENABLE_ARCH_6K || arm_dc_feature(s, ARM_FEATURE_M)) {
6258         return false;
6259     }
6260     if (IS_USER(s)) {
6261         unallocated_encoding(s);
6262     } else {
6263         gen_smc(s);
6264     }
6265     return true;
6266 }
6267
6268 static bool trans_SG(DisasContext *s, arg_SG *a)
6269 {
6270     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6271         !arm_dc_feature(s, ARM_FEATURE_V8)) {
6272         return false;
6273     }
6274     /*
6275      * SG (v8M only)
6276      * The bulk of the behaviour for this instruction is implemented
6277      * in v7m_handle_execute_nsc(), which deals with the insn when
6278      * it is executed by a CPU in non-secure state from memory
6279      * which is Secure & NonSecure-Callable.
6280      * Here we only need to handle the remaining cases:
6281      *  * in NS memory (including the "security extension not
6282      *    implemented" case) : NOP
6283      *  * in S memory but CPU already secure (clear IT bits)
6284      * We know that the attribute for the memory this insn is
6285      * in must match the current CPU state, because otherwise
6286      * get_phys_addr_pmsav8 would have generated an exception.
6287      */
6288     if (s->v8m_secure) {
6289         /* Like the IT insn, we don't need to generate any code */
6290         s->condexec_cond = 0;
6291         s->condexec_mask = 0;
6292     }
6293     return true;
6294 }
6295
6296 static bool trans_TT(DisasContext *s, arg_TT *a)
6297 {
6298     TCGv_i32 addr, tmp;
6299
6300     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6301         !arm_dc_feature(s, ARM_FEATURE_V8)) {
6302         return false;
6303     }
6304     if (a->rd == 13 || a->rd == 15 || a->rn == 15) {
6305         /* We UNDEF for these UNPREDICTABLE cases */
6306         unallocated_encoding(s);
6307         return true;
6308     }
6309     if (a->A && !s->v8m_secure) {
6310         /* This case is UNDEFINED.  */
6311         unallocated_encoding(s);
6312         return true;
6313     }
6314
6315     addr = load_reg(s, a->rn);
6316     tmp = tcg_const_i32((a->A << 1) | a->T);
6317     gen_helper_v7m_tt(tmp, cpu_env, addr, tmp);
6318     tcg_temp_free_i32(addr);
6319     store_reg(s, a->rd, tmp);
6320     return true;
6321 }
6322
6323 /*
6324  * Load/store register index
6325  */
6326
6327 static ISSInfo make_issinfo(DisasContext *s, int rd, bool p, bool w)
6328 {
6329     ISSInfo ret;
6330
6331     /* ISS not valid if writeback */
6332     if (p && !w) {
6333         ret = rd;
6334         if (s->base.pc_next - s->pc_curr == 2) {
6335             ret |= ISSIs16Bit;
6336         }
6337     } else {
6338         ret = ISSInvalid;
6339     }
6340     return ret;
6341 }
6342
6343 static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a)
6344 {
6345     TCGv_i32 addr = load_reg(s, a->rn);
6346
6347     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6348         gen_helper_v8m_stackcheck(cpu_env, addr);
6349     }
6350
6351     if (a->p) {
6352         TCGv_i32 ofs = load_reg(s, a->rm);
6353         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6354         if (a->u) {
6355             tcg_gen_add_i32(addr, addr, ofs);
6356         } else {
6357             tcg_gen_sub_i32(addr, addr, ofs);
6358         }
6359         tcg_temp_free_i32(ofs);
6360     }
6361     return addr;
6362 }
6363
6364 static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
6365                             TCGv_i32 addr, int address_offset)
6366 {
6367     if (!a->p) {
6368         TCGv_i32 ofs = load_reg(s, a->rm);
6369         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6370         if (a->u) {
6371             tcg_gen_add_i32(addr, addr, ofs);
6372         } else {
6373             tcg_gen_sub_i32(addr, addr, ofs);
6374         }
6375         tcg_temp_free_i32(ofs);
6376     } else if (!a->w) {
6377         tcg_temp_free_i32(addr);
6378         return;
6379     }
6380     tcg_gen_addi_i32(addr, addr, address_offset);
6381     store_reg(s, a->rn, addr);
6382 }
6383
6384 static bool op_load_rr(DisasContext *s, arg_ldst_rr *a,
6385                        MemOp mop, int mem_idx)
6386 {
6387     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6388     TCGv_i32 addr, tmp;
6389
6390     addr = op_addr_rr_pre(s, a);
6391
6392     tmp = tcg_temp_new_i32();
6393     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop | s->be_data);
6394     disas_set_da_iss(s, mop, issinfo);
6395
6396     /*
6397      * Perform base writeback before the loaded value to
6398      * ensure correct behavior with overlapping index registers.
6399      */
6400     op_addr_rr_post(s, a, addr, 0);
6401     store_reg_from_load(s, a->rt, tmp);
6402     return true;
6403 }
6404
6405 static bool op_store_rr(DisasContext *s, arg_ldst_rr *a,
6406                         MemOp mop, int mem_idx)
6407 {
6408     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6409     TCGv_i32 addr, tmp;
6410
6411     addr = op_addr_rr_pre(s, a);
6412
6413     tmp = load_reg(s, a->rt);
6414     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop | s->be_data);
6415     disas_set_da_iss(s, mop, issinfo);
6416     tcg_temp_free_i32(tmp);
6417
6418     op_addr_rr_post(s, a, addr, 0);
6419     return true;
6420 }
6421
6422 static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
6423 {
6424     int mem_idx = get_mem_index(s);
6425     TCGv_i32 addr, tmp;
6426
6427     if (!ENABLE_ARCH_5TE) {
6428         return false;
6429     }
6430     if (a->rt & 1) {
6431         unallocated_encoding(s);
6432         return true;
6433     }
6434     addr = op_addr_rr_pre(s, a);
6435
6436     tmp = tcg_temp_new_i32();
6437     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
6438     store_reg(s, a->rt, tmp);
6439
6440     tcg_gen_addi_i32(addr, addr, 4);
6441
6442     tmp = tcg_temp_new_i32();
6443     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
6444     store_reg(s, a->rt + 1, tmp);
6445
6446     /* LDRD w/ base writeback is undefined if the registers overlap.  */
6447     op_addr_rr_post(s, a, addr, -4);
6448     return true;
6449 }
6450
6451 static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
6452 {
6453     int mem_idx = get_mem_index(s);
6454     TCGv_i32 addr, tmp;
6455
6456     if (!ENABLE_ARCH_5TE) {
6457         return false;
6458     }
6459     if (a->rt & 1) {
6460         unallocated_encoding(s);
6461         return true;
6462     }
6463     addr = op_addr_rr_pre(s, a);
6464
6465     tmp = load_reg(s, a->rt);
6466     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
6467     tcg_temp_free_i32(tmp);
6468
6469     tcg_gen_addi_i32(addr, addr, 4);
6470
6471     tmp = load_reg(s, a->rt + 1);
6472     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
6473     tcg_temp_free_i32(tmp);
6474
6475     op_addr_rr_post(s, a, addr, -4);
6476     return true;
6477 }
6478
6479 /*
6480  * Load/store immediate index
6481  */
6482
6483 static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a)
6484 {
6485     int ofs = a->imm;
6486
6487     if (!a->u) {
6488         ofs = -ofs;
6489     }
6490
6491     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6492         /*
6493          * Stackcheck. Here we know 'addr' is the current SP;
6494          * U is set if we're moving SP up, else down. It is
6495          * UNKNOWN whether the limit check triggers when SP starts
6496          * below the limit and ends up above it; we chose to do so.
6497          */
6498         if (!a->u) {
6499             TCGv_i32 newsp = tcg_temp_new_i32();
6500             tcg_gen_addi_i32(newsp, cpu_R[13], ofs);
6501             gen_helper_v8m_stackcheck(cpu_env, newsp);
6502             tcg_temp_free_i32(newsp);
6503         } else {
6504             gen_helper_v8m_stackcheck(cpu_env, cpu_R[13]);
6505         }
6506     }
6507
6508     return add_reg_for_lit(s, a->rn, a->p ? ofs : 0);
6509 }
6510
6511 static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a,
6512                             TCGv_i32 addr, int address_offset)
6513 {
6514     if (!a->p) {
6515         if (a->u) {
6516             address_offset += a->imm;
6517         } else {
6518             address_offset -= a->imm;
6519         }
6520     } else if (!a->w) {
6521         tcg_temp_free_i32(addr);
6522         return;
6523     }
6524     tcg_gen_addi_i32(addr, addr, address_offset);
6525     store_reg(s, a->rn, addr);
6526 }
6527
6528 static bool op_load_ri(DisasContext *s, arg_ldst_ri *a,
6529                        MemOp mop, int mem_idx)
6530 {
6531     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6532     TCGv_i32 addr, tmp;
6533
6534     addr = op_addr_ri_pre(s, a);
6535
6536     tmp = tcg_temp_new_i32();
6537     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop | s->be_data);
6538     disas_set_da_iss(s, mop, issinfo);
6539
6540     /*
6541      * Perform base writeback before the loaded value to
6542      * ensure correct behavior with overlapping index registers.
6543      */
6544     op_addr_ri_post(s, a, addr, 0);
6545     store_reg_from_load(s, a->rt, tmp);
6546     return true;
6547 }
6548
6549 static bool op_store_ri(DisasContext *s, arg_ldst_ri *a,
6550                         MemOp mop, int mem_idx)
6551 {
6552     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6553     TCGv_i32 addr, tmp;
6554
6555     addr = op_addr_ri_pre(s, a);
6556
6557     tmp = load_reg(s, a->rt);
6558     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop | s->be_data);
6559     disas_set_da_iss(s, mop, issinfo);
6560     tcg_temp_free_i32(tmp);
6561
6562     op_addr_ri_post(s, a, addr, 0);
6563     return true;
6564 }
6565
6566 static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
6567 {
6568     int mem_idx = get_mem_index(s);
6569     TCGv_i32 addr, tmp;
6570
6571     addr = op_addr_ri_pre(s, a);
6572
6573     tmp = tcg_temp_new_i32();
6574     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
6575     store_reg(s, a->rt, tmp);
6576
6577     tcg_gen_addi_i32(addr, addr, 4);
6578
6579     tmp = tcg_temp_new_i32();
6580     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
6581     store_reg(s, rt2, tmp);
6582
6583     /* LDRD w/ base writeback is undefined if the registers overlap.  */
6584     op_addr_ri_post(s, a, addr, -4);
6585     return true;
6586 }
6587
6588 static bool trans_LDRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
6589 {
6590     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
6591         return false;
6592     }
6593     return op_ldrd_ri(s, a, a->rt + 1);
6594 }
6595
6596 static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
6597 {
6598     arg_ldst_ri b = {
6599         .u = a->u, .w = a->w, .p = a->p,
6600         .rn = a->rn, .rt = a->rt, .imm = a->imm
6601     };
6602     return op_ldrd_ri(s, &b, a->rt2);
6603 }
6604
6605 static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
6606 {
6607     int mem_idx = get_mem_index(s);
6608     TCGv_i32 addr, tmp;
6609
6610     addr = op_addr_ri_pre(s, a);
6611
6612     tmp = load_reg(s, a->rt);
6613     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
6614     tcg_temp_free_i32(tmp);
6615
6616     tcg_gen_addi_i32(addr, addr, 4);
6617
6618     tmp = load_reg(s, rt2);
6619     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
6620     tcg_temp_free_i32(tmp);
6621
6622     op_addr_ri_post(s, a, addr, -4);
6623     return true;
6624 }
6625
6626 static bool trans_STRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
6627 {
6628     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
6629         return false;
6630     }
6631     return op_strd_ri(s, a, a->rt + 1);
6632 }
6633
6634 static bool trans_STRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
6635 {
6636     arg_ldst_ri b = {
6637         .u = a->u, .w = a->w, .p = a->p,
6638         .rn = a->rn, .rt = a->rt, .imm = a->imm
6639     };
6640     return op_strd_ri(s, &b, a->rt2);
6641 }
6642
6643 #define DO_LDST(NAME, WHICH, MEMOP) \
6644 static bool trans_##NAME##_ri(DisasContext *s, arg_ldst_ri *a)        \
6645 {                                                                     \
6646     return op_##WHICH##_ri(s, a, MEMOP, get_mem_index(s));            \
6647 }                                                                     \
6648 static bool trans_##NAME##T_ri(DisasContext *s, arg_ldst_ri *a)       \
6649 {                                                                     \
6650     return op_##WHICH##_ri(s, a, MEMOP, get_a32_user_mem_index(s));   \
6651 }                                                                     \
6652 static bool trans_##NAME##_rr(DisasContext *s, arg_ldst_rr *a)        \
6653 {                                                                     \
6654     return op_##WHICH##_rr(s, a, MEMOP, get_mem_index(s));            \
6655 }                                                                     \
6656 static bool trans_##NAME##T_rr(DisasContext *s, arg_ldst_rr *a)       \
6657 {                                                                     \
6658     return op_##WHICH##_rr(s, a, MEMOP, get_a32_user_mem_index(s));   \
6659 }
6660
6661 DO_LDST(LDR, load, MO_UL)
6662 DO_LDST(LDRB, load, MO_UB)
6663 DO_LDST(LDRH, load, MO_UW)
6664 DO_LDST(LDRSB, load, MO_SB)
6665 DO_LDST(LDRSH, load, MO_SW)
6666
6667 DO_LDST(STR, store, MO_UL)
6668 DO_LDST(STRB, store, MO_UB)
6669 DO_LDST(STRH, store, MO_UW)
6670
6671 #undef DO_LDST
6672
6673 /*
6674  * Synchronization primitives
6675  */
6676
6677 static bool op_swp(DisasContext *s, arg_SWP *a, MemOp opc)
6678 {
6679     TCGv_i32 addr, tmp;
6680     TCGv taddr;
6681
6682     opc |= s->be_data;
6683     addr = load_reg(s, a->rn);
6684     taddr = gen_aa32_addr(s, addr, opc);
6685     tcg_temp_free_i32(addr);
6686
6687     tmp = load_reg(s, a->rt2);
6688     tcg_gen_atomic_xchg_i32(tmp, taddr, tmp, get_mem_index(s), opc);
6689     tcg_temp_free(taddr);
6690
6691     store_reg(s, a->rt, tmp);
6692     return true;
6693 }
6694
6695 static bool trans_SWP(DisasContext *s, arg_SWP *a)
6696 {
6697     return op_swp(s, a, MO_UL | MO_ALIGN);
6698 }
6699
6700 static bool trans_SWPB(DisasContext *s, arg_SWP *a)
6701 {
6702     return op_swp(s, a, MO_UB);
6703 }
6704
6705 /*
6706  * Load/Store Exclusive and Load-Acquire/Store-Release
6707  */
6708
6709 static bool op_strex(DisasContext *s, arg_STREX *a, MemOp mop, bool rel)
6710 {
6711     TCGv_i32 addr;
6712     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
6713     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
6714
6715     /* We UNDEF for these UNPREDICTABLE cases.  */
6716     if (a->rd == 15 || a->rn == 15 || a->rt == 15
6717         || a->rd == a->rn || a->rd == a->rt
6718         || (!v8a && s->thumb && (a->rd == 13 || a->rt == 13))
6719         || (mop == MO_64
6720             && (a->rt2 == 15
6721                 || a->rd == a->rt2
6722                 || (!v8a && s->thumb && a->rt2 == 13)))) {
6723         unallocated_encoding(s);
6724         return true;
6725     }
6726
6727     if (rel) {
6728         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
6729     }
6730
6731     addr = tcg_temp_local_new_i32();
6732     load_reg_var(s, addr, a->rn);
6733     tcg_gen_addi_i32(addr, addr, a->imm);
6734
6735     gen_store_exclusive(s, a->rd, a->rt, a->rt2, addr, mop);
6736     tcg_temp_free_i32(addr);
6737     return true;
6738 }
6739
6740 static bool trans_STREX(DisasContext *s, arg_STREX *a)
6741 {
6742     if (!ENABLE_ARCH_6) {
6743         return false;
6744     }
6745     return op_strex(s, a, MO_32, false);
6746 }
6747
6748 static bool trans_STREXD_a32(DisasContext *s, arg_STREX *a)
6749 {
6750     if (!ENABLE_ARCH_6K) {
6751         return false;
6752     }
6753     /* We UNDEF for these UNPREDICTABLE cases.  */
6754     if (a->rt & 1) {
6755         unallocated_encoding(s);
6756         return true;
6757     }
6758     a->rt2 = a->rt + 1;
6759     return op_strex(s, a, MO_64, false);
6760 }
6761
6762 static bool trans_STREXD_t32(DisasContext *s, arg_STREX *a)
6763 {
6764     return op_strex(s, a, MO_64, false);
6765 }
6766
6767 static bool trans_STREXB(DisasContext *s, arg_STREX *a)
6768 {
6769     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
6770         return false;
6771     }
6772     return op_strex(s, a, MO_8, false);
6773 }
6774
6775 static bool trans_STREXH(DisasContext *s, arg_STREX *a)
6776 {
6777     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
6778         return false;
6779     }
6780     return op_strex(s, a, MO_16, false);
6781 }
6782
6783 static bool trans_STLEX(DisasContext *s, arg_STREX *a)
6784 {
6785     if (!ENABLE_ARCH_8) {
6786         return false;
6787     }
6788     return op_strex(s, a, MO_32, true);
6789 }
6790
6791 static bool trans_STLEXD_a32(DisasContext *s, arg_STREX *a)
6792 {
6793     if (!ENABLE_ARCH_8) {
6794         return false;
6795     }
6796     /* We UNDEF for these UNPREDICTABLE cases.  */
6797     if (a->rt & 1) {
6798         unallocated_encoding(s);
6799         return true;
6800     }
6801     a->rt2 = a->rt + 1;
6802     return op_strex(s, a, MO_64, true);
6803 }
6804
6805 static bool trans_STLEXD_t32(DisasContext *s, arg_STREX *a)
6806 {
6807     if (!ENABLE_ARCH_8) {
6808         return false;
6809     }
6810     return op_strex(s, a, MO_64, true);
6811 }
6812
6813 static bool trans_STLEXB(DisasContext *s, arg_STREX *a)
6814 {
6815     if (!ENABLE_ARCH_8) {
6816         return false;
6817     }
6818     return op_strex(s, a, MO_8, true);
6819 }
6820
6821 static bool trans_STLEXH(DisasContext *s, arg_STREX *a)
6822 {
6823     if (!ENABLE_ARCH_8) {
6824         return false;
6825     }
6826     return op_strex(s, a, MO_16, true);
6827 }
6828
6829 static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop)
6830 {
6831     TCGv_i32 addr, tmp;
6832
6833     if (!ENABLE_ARCH_8) {
6834         return false;
6835     }
6836     /* We UNDEF for these UNPREDICTABLE cases.  */
6837     if (a->rn == 15 || a->rt == 15) {
6838         unallocated_encoding(s);
6839         return true;
6840     }
6841
6842     addr = load_reg(s, a->rn);
6843     tmp = load_reg(s, a->rt);
6844     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
6845     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | s->be_data);
6846     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite);
6847
6848     tcg_temp_free_i32(tmp);
6849     tcg_temp_free_i32(addr);
6850     return true;
6851 }
6852
6853 static bool trans_STL(DisasContext *s, arg_STL *a)
6854 {
6855     return op_stl(s, a, MO_UL);
6856 }
6857
6858 static bool trans_STLB(DisasContext *s, arg_STL *a)
6859 {
6860     return op_stl(s, a, MO_UB);
6861 }
6862
6863 static bool trans_STLH(DisasContext *s, arg_STL *a)
6864 {
6865     return op_stl(s, a, MO_UW);
6866 }
6867
6868 static bool op_ldrex(DisasContext *s, arg_LDREX *a, MemOp mop, bool acq)
6869 {
6870     TCGv_i32 addr;
6871     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
6872     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
6873
6874     /* We UNDEF for these UNPREDICTABLE cases.  */
6875     if (a->rn == 15 || a->rt == 15
6876         || (!v8a && s->thumb && a->rt == 13)
6877         || (mop == MO_64
6878             && (a->rt2 == 15 || a->rt == a->rt2
6879                 || (!v8a && s->thumb && a->rt2 == 13)))) {
6880         unallocated_encoding(s);
6881         return true;
6882     }
6883
6884     addr = tcg_temp_local_new_i32();
6885     load_reg_var(s, addr, a->rn);
6886     tcg_gen_addi_i32(addr, addr, a->imm);
6887
6888     gen_load_exclusive(s, a->rt, a->rt2, addr, mop);
6889     tcg_temp_free_i32(addr);
6890
6891     if (acq) {
6892         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
6893     }
6894     return true;
6895 }
6896
6897 static bool trans_LDREX(DisasContext *s, arg_LDREX *a)
6898 {
6899     if (!ENABLE_ARCH_6) {
6900         return false;
6901     }
6902     return op_ldrex(s, a, MO_32, false);
6903 }
6904
6905 static bool trans_LDREXD_a32(DisasContext *s, arg_LDREX *a)
6906 {
6907     if (!ENABLE_ARCH_6K) {
6908         return false;
6909     }
6910     /* We UNDEF for these UNPREDICTABLE cases.  */
6911     if (a->rt & 1) {
6912         unallocated_encoding(s);
6913         return true;
6914     }
6915     a->rt2 = a->rt + 1;
6916     return op_ldrex(s, a, MO_64, false);
6917 }
6918
6919 static bool trans_LDREXD_t32(DisasContext *s, arg_LDREX *a)
6920 {
6921     return op_ldrex(s, a, MO_64, false);
6922 }
6923
6924 static bool trans_LDREXB(DisasContext *s, arg_LDREX *a)
6925 {
6926     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
6927         return false;
6928     }
6929     return op_ldrex(s, a, MO_8, false);
6930 }
6931
6932 static bool trans_LDREXH(DisasContext *s, arg_LDREX *a)
6933 {
6934     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
6935         return false;
6936     }
6937     return op_ldrex(s, a, MO_16, false);
6938 }
6939
6940 static bool trans_LDAEX(DisasContext *s, arg_LDREX *a)
6941 {
6942     if (!ENABLE_ARCH_8) {
6943         return false;
6944     }
6945     return op_ldrex(s, a, MO_32, true);
6946 }
6947
6948 static bool trans_LDAEXD_a32(DisasContext *s, arg_LDREX *a)
6949 {
6950     if (!ENABLE_ARCH_8) {
6951         return false;
6952     }
6953     /* We UNDEF for these UNPREDICTABLE cases.  */
6954     if (a->rt & 1) {
6955         unallocated_encoding(s);
6956         return true;
6957     }
6958     a->rt2 = a->rt + 1;
6959     return op_ldrex(s, a, MO_64, true);
6960 }
6961
6962 static bool trans_LDAEXD_t32(DisasContext *s, arg_LDREX *a)
6963 {
6964     if (!ENABLE_ARCH_8) {
6965         return false;
6966     }
6967     return op_ldrex(s, a, MO_64, true);
6968 }
6969
6970 static bool trans_LDAEXB(DisasContext *s, arg_LDREX *a)
6971 {
6972     if (!ENABLE_ARCH_8) {
6973         return false;
6974     }
6975     return op_ldrex(s, a, MO_8, true);
6976 }
6977
6978 static bool trans_LDAEXH(DisasContext *s, arg_LDREX *a)
6979 {
6980     if (!ENABLE_ARCH_8) {
6981         return false;
6982     }
6983     return op_ldrex(s, a, MO_16, true);
6984 }
6985
6986 static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop)
6987 {
6988     TCGv_i32 addr, tmp;
6989
6990     if (!ENABLE_ARCH_8) {
6991         return false;
6992     }
6993     /* We UNDEF for these UNPREDICTABLE cases.  */
6994     if (a->rn == 15 || a->rt == 15) {
6995         unallocated_encoding(s);
6996         return true;
6997     }
6998
6999     addr = load_reg(s, a->rn);
7000     tmp = tcg_temp_new_i32();
7001     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | s->be_data);
7002     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel);
7003     tcg_temp_free_i32(addr);
7004
7005     store_reg(s, a->rt, tmp);
7006     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7007     return true;
7008 }
7009
7010 static bool trans_LDA(DisasContext *s, arg_LDA *a)
7011 {
7012     return op_lda(s, a, MO_UL);
7013 }
7014
7015 static bool trans_LDAB(DisasContext *s, arg_LDA *a)
7016 {
7017     return op_lda(s, a, MO_UB);
7018 }
7019
7020 static bool trans_LDAH(DisasContext *s, arg_LDA *a)
7021 {
7022     return op_lda(s, a, MO_UW);
7023 }
7024
7025 /*
7026  * Media instructions
7027  */
7028
7029 static bool trans_USADA8(DisasContext *s, arg_USADA8 *a)
7030 {
7031     TCGv_i32 t1, t2;
7032
7033     if (!ENABLE_ARCH_6) {
7034         return false;
7035     }
7036
7037     t1 = load_reg(s, a->rn);
7038     t2 = load_reg(s, a->rm);
7039     gen_helper_usad8(t1, t1, t2);
7040     tcg_temp_free_i32(t2);
7041     if (a->ra != 15) {
7042         t2 = load_reg(s, a->ra);
7043         tcg_gen_add_i32(t1, t1, t2);
7044         tcg_temp_free_i32(t2);
7045     }
7046     store_reg(s, a->rd, t1);
7047     return true;
7048 }
7049
7050 static bool op_bfx(DisasContext *s, arg_UBFX *a, bool u)
7051 {
7052     TCGv_i32 tmp;
7053     int width = a->widthm1 + 1;
7054     int shift = a->lsb;
7055
7056     if (!ENABLE_ARCH_6T2) {
7057         return false;
7058     }
7059     if (shift + width > 32) {
7060         /* UNPREDICTABLE; we choose to UNDEF */
7061         unallocated_encoding(s);
7062         return true;
7063     }
7064
7065     tmp = load_reg(s, a->rn);
7066     if (u) {
7067         tcg_gen_extract_i32(tmp, tmp, shift, width);
7068     } else {
7069         tcg_gen_sextract_i32(tmp, tmp, shift, width);
7070     }
7071     store_reg(s, a->rd, tmp);
7072     return true;
7073 }
7074
7075 static bool trans_SBFX(DisasContext *s, arg_SBFX *a)
7076 {
7077     return op_bfx(s, a, false);
7078 }
7079
7080 static bool trans_UBFX(DisasContext *s, arg_UBFX *a)
7081 {
7082     return op_bfx(s, a, true);
7083 }
7084
7085 static bool trans_BFCI(DisasContext *s, arg_BFCI *a)
7086 {
7087     TCGv_i32 tmp;
7088     int msb = a->msb, lsb = a->lsb;
7089     int width;
7090
7091     if (!ENABLE_ARCH_6T2) {
7092         return false;
7093     }
7094     if (msb < lsb) {
7095         /* UNPREDICTABLE; we choose to UNDEF */
7096         unallocated_encoding(s);
7097         return true;
7098     }
7099
7100     width = msb + 1 - lsb;
7101     if (a->rn == 15) {
7102         /* BFC */
7103         tmp = tcg_const_i32(0);
7104     } else {
7105         /* BFI */
7106         tmp = load_reg(s, a->rn);
7107     }
7108     if (width != 32) {
7109         TCGv_i32 tmp2 = load_reg(s, a->rd);
7110         tcg_gen_deposit_i32(tmp, tmp2, tmp, lsb, width);
7111         tcg_temp_free_i32(tmp2);
7112     }
7113     store_reg(s, a->rd, tmp);
7114     return true;
7115 }
7116
7117 static bool trans_UDF(DisasContext *s, arg_UDF *a)
7118 {
7119     unallocated_encoding(s);
7120     return true;
7121 }
7122
7123 /*
7124  * Parallel addition and subtraction
7125  */
7126
7127 static bool op_par_addsub(DisasContext *s, arg_rrr *a,
7128                           void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
7129 {
7130     TCGv_i32 t0, t1;
7131
7132     if (s->thumb
7133         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7134         : !ENABLE_ARCH_6) {
7135         return false;
7136     }
7137
7138     t0 = load_reg(s, a->rn);
7139     t1 = load_reg(s, a->rm);
7140
7141     gen(t0, t0, t1);
7142
7143     tcg_temp_free_i32(t1);
7144     store_reg(s, a->rd, t0);
7145     return true;
7146 }
7147
7148 static bool op_par_addsub_ge(DisasContext *s, arg_rrr *a,
7149                              void (*gen)(TCGv_i32, TCGv_i32,
7150                                          TCGv_i32, TCGv_ptr))
7151 {
7152     TCGv_i32 t0, t1;
7153     TCGv_ptr ge;
7154
7155     if (s->thumb
7156         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7157         : !ENABLE_ARCH_6) {
7158         return false;
7159     }
7160
7161     t0 = load_reg(s, a->rn);
7162     t1 = load_reg(s, a->rm);
7163
7164     ge = tcg_temp_new_ptr();
7165     tcg_gen_addi_ptr(ge, cpu_env, offsetof(CPUARMState, GE));
7166     gen(t0, t0, t1, ge);
7167
7168     tcg_temp_free_ptr(ge);
7169     tcg_temp_free_i32(t1);
7170     store_reg(s, a->rd, t0);
7171     return true;
7172 }
7173
7174 #define DO_PAR_ADDSUB(NAME, helper) \
7175 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7176 {                                                       \
7177     return op_par_addsub(s, a, helper);                 \
7178 }
7179
7180 #define DO_PAR_ADDSUB_GE(NAME, helper) \
7181 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7182 {                                                       \
7183     return op_par_addsub_ge(s, a, helper);              \
7184 }
7185
7186 DO_PAR_ADDSUB_GE(SADD16, gen_helper_sadd16)
7187 DO_PAR_ADDSUB_GE(SASX, gen_helper_saddsubx)
7188 DO_PAR_ADDSUB_GE(SSAX, gen_helper_ssubaddx)
7189 DO_PAR_ADDSUB_GE(SSUB16, gen_helper_ssub16)
7190 DO_PAR_ADDSUB_GE(SADD8, gen_helper_sadd8)
7191 DO_PAR_ADDSUB_GE(SSUB8, gen_helper_ssub8)
7192
7193 DO_PAR_ADDSUB_GE(UADD16, gen_helper_uadd16)
7194 DO_PAR_ADDSUB_GE(UASX, gen_helper_uaddsubx)
7195 DO_PAR_ADDSUB_GE(USAX, gen_helper_usubaddx)
7196 DO_PAR_ADDSUB_GE(USUB16, gen_helper_usub16)
7197 DO_PAR_ADDSUB_GE(UADD8, gen_helper_uadd8)
7198 DO_PAR_ADDSUB_GE(USUB8, gen_helper_usub8)
7199
7200 DO_PAR_ADDSUB(QADD16, gen_helper_qadd16)
7201 DO_PAR_ADDSUB(QASX, gen_helper_qaddsubx)
7202 DO_PAR_ADDSUB(QSAX, gen_helper_qsubaddx)
7203 DO_PAR_ADDSUB(QSUB16, gen_helper_qsub16)
7204 DO_PAR_ADDSUB(QADD8, gen_helper_qadd8)
7205 DO_PAR_ADDSUB(QSUB8, gen_helper_qsub8)
7206
7207 DO_PAR_ADDSUB(UQADD16, gen_helper_uqadd16)
7208 DO_PAR_ADDSUB(UQASX, gen_helper_uqaddsubx)
7209 DO_PAR_ADDSUB(UQSAX, gen_helper_uqsubaddx)
7210 DO_PAR_ADDSUB(UQSUB16, gen_helper_uqsub16)
7211 DO_PAR_ADDSUB(UQADD8, gen_helper_uqadd8)
7212 DO_PAR_ADDSUB(UQSUB8, gen_helper_uqsub8)
7213
7214 DO_PAR_ADDSUB(SHADD16, gen_helper_shadd16)
7215 DO_PAR_ADDSUB(SHASX, gen_helper_shaddsubx)
7216 DO_PAR_ADDSUB(SHSAX, gen_helper_shsubaddx)
7217 DO_PAR_ADDSUB(SHSUB16, gen_helper_shsub16)
7218 DO_PAR_ADDSUB(SHADD8, gen_helper_shadd8)
7219 DO_PAR_ADDSUB(SHSUB8, gen_helper_shsub8)
7220
7221 DO_PAR_ADDSUB(UHADD16, gen_helper_uhadd16)
7222 DO_PAR_ADDSUB(UHASX, gen_helper_uhaddsubx)
7223 DO_PAR_ADDSUB(UHSAX, gen_helper_uhsubaddx)
7224 DO_PAR_ADDSUB(UHSUB16, gen_helper_uhsub16)
7225 DO_PAR_ADDSUB(UHADD8, gen_helper_uhadd8)
7226 DO_PAR_ADDSUB(UHSUB8, gen_helper_uhsub8)
7227
7228 #undef DO_PAR_ADDSUB
7229 #undef DO_PAR_ADDSUB_GE
7230
7231 /*
7232  * Packing, unpacking, saturation, and reversal
7233  */
7234
7235 static bool trans_PKH(DisasContext *s, arg_PKH *a)
7236 {
7237     TCGv_i32 tn, tm;
7238     int shift = a->imm;
7239
7240     if (s->thumb
7241         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7242         : !ENABLE_ARCH_6) {
7243         return false;
7244     }
7245
7246     tn = load_reg(s, a->rn);
7247     tm = load_reg(s, a->rm);
7248     if (a->tb) {
7249         /* PKHTB */
7250         if (shift == 0) {
7251             shift = 31;
7252         }
7253         tcg_gen_sari_i32(tm, tm, shift);
7254         tcg_gen_deposit_i32(tn, tn, tm, 0, 16);
7255     } else {
7256         /* PKHBT */
7257         tcg_gen_shli_i32(tm, tm, shift);
7258         tcg_gen_deposit_i32(tn, tm, tn, 0, 16);
7259     }
7260     tcg_temp_free_i32(tm);
7261     store_reg(s, a->rd, tn);
7262     return true;
7263 }
7264
7265 static bool op_sat(DisasContext *s, arg_sat *a,
7266                    void (*gen)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32))
7267 {
7268     TCGv_i32 tmp, satimm;
7269     int shift = a->imm;
7270
7271     if (!ENABLE_ARCH_6) {
7272         return false;
7273     }
7274
7275     tmp = load_reg(s, a->rn);
7276     if (a->sh) {
7277         tcg_gen_sari_i32(tmp, tmp, shift ? shift : 31);
7278     } else {
7279         tcg_gen_shli_i32(tmp, tmp, shift);
7280     }
7281
7282     satimm = tcg_const_i32(a->satimm);
7283     gen(tmp, cpu_env, tmp, satimm);
7284     tcg_temp_free_i32(satimm);
7285
7286     store_reg(s, a->rd, tmp);
7287     return true;
7288 }
7289
7290 static bool trans_SSAT(DisasContext *s, arg_sat *a)
7291 {
7292     return op_sat(s, a, gen_helper_ssat);
7293 }
7294
7295 static bool trans_USAT(DisasContext *s, arg_sat *a)
7296 {
7297     return op_sat(s, a, gen_helper_usat);
7298 }
7299
7300 static bool trans_SSAT16(DisasContext *s, arg_sat *a)
7301 {
7302     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7303         return false;
7304     }
7305     return op_sat(s, a, gen_helper_ssat16);
7306 }
7307
7308 static bool trans_USAT16(DisasContext *s, arg_sat *a)
7309 {
7310     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7311         return false;
7312     }
7313     return op_sat(s, a, gen_helper_usat16);
7314 }
7315
7316 static bool op_xta(DisasContext *s, arg_rrr_rot *a,
7317                    void (*gen_extract)(TCGv_i32, TCGv_i32),
7318                    void (*gen_add)(TCGv_i32, TCGv_i32, TCGv_i32))
7319 {
7320     TCGv_i32 tmp;
7321
7322     if (!ENABLE_ARCH_6) {
7323         return false;
7324     }
7325
7326     tmp = load_reg(s, a->rm);
7327     /*
7328      * TODO: In many cases we could do a shift instead of a rotate.
7329      * Combined with a simple extend, that becomes an extract.
7330      */
7331     tcg_gen_rotri_i32(tmp, tmp, a->rot * 8);
7332     gen_extract(tmp, tmp);
7333
7334     if (a->rn != 15) {
7335         TCGv_i32 tmp2 = load_reg(s, a->rn);
7336         gen_add(tmp, tmp, tmp2);
7337         tcg_temp_free_i32(tmp2);
7338     }
7339     store_reg(s, a->rd, tmp);
7340     return true;
7341 }
7342
7343 static bool trans_SXTAB(DisasContext *s, arg_rrr_rot *a)
7344 {
7345     return op_xta(s, a, tcg_gen_ext8s_i32, tcg_gen_add_i32);
7346 }
7347
7348 static bool trans_SXTAH(DisasContext *s, arg_rrr_rot *a)
7349 {
7350     return op_xta(s, a, tcg_gen_ext16s_i32, tcg_gen_add_i32);
7351 }
7352
7353 static bool trans_SXTAB16(DisasContext *s, arg_rrr_rot *a)
7354 {
7355     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7356         return false;
7357     }
7358     return op_xta(s, a, gen_helper_sxtb16, gen_add16);
7359 }
7360
7361 static bool trans_UXTAB(DisasContext *s, arg_rrr_rot *a)
7362 {
7363     return op_xta(s, a, tcg_gen_ext8u_i32, tcg_gen_add_i32);
7364 }
7365
7366 static bool trans_UXTAH(DisasContext *s, arg_rrr_rot *a)
7367 {
7368     return op_xta(s, a, tcg_gen_ext16u_i32, tcg_gen_add_i32);
7369 }
7370
7371 static bool trans_UXTAB16(DisasContext *s, arg_rrr_rot *a)
7372 {
7373     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7374         return false;
7375     }
7376     return op_xta(s, a, gen_helper_uxtb16, gen_add16);
7377 }
7378
7379 static bool trans_SEL(DisasContext *s, arg_rrr *a)
7380 {
7381     TCGv_i32 t1, t2, t3;
7382
7383     if (s->thumb
7384         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7385         : !ENABLE_ARCH_6) {
7386         return false;
7387     }
7388
7389     t1 = load_reg(s, a->rn);
7390     t2 = load_reg(s, a->rm);
7391     t3 = tcg_temp_new_i32();
7392     tcg_gen_ld_i32(t3, cpu_env, offsetof(CPUARMState, GE));
7393     gen_helper_sel_flags(t1, t3, t1, t2);
7394     tcg_temp_free_i32(t3);
7395     tcg_temp_free_i32(t2);
7396     store_reg(s, a->rd, t1);
7397     return true;
7398 }
7399
7400 static bool op_rr(DisasContext *s, arg_rr *a,
7401                   void (*gen)(TCGv_i32, TCGv_i32))
7402 {
7403     TCGv_i32 tmp;
7404
7405     tmp = load_reg(s, a->rm);
7406     gen(tmp, tmp);
7407     store_reg(s, a->rd, tmp);
7408     return true;
7409 }
7410
7411 static bool trans_REV(DisasContext *s, arg_rr *a)
7412 {
7413     if (!ENABLE_ARCH_6) {
7414         return false;
7415     }
7416     return op_rr(s, a, tcg_gen_bswap32_i32);
7417 }
7418
7419 static bool trans_REV16(DisasContext *s, arg_rr *a)
7420 {
7421     if (!ENABLE_ARCH_6) {
7422         return false;
7423     }
7424     return op_rr(s, a, gen_rev16);
7425 }
7426
7427 static bool trans_REVSH(DisasContext *s, arg_rr *a)
7428 {
7429     if (!ENABLE_ARCH_6) {
7430         return false;
7431     }
7432     return op_rr(s, a, gen_revsh);
7433 }
7434
7435 static bool trans_RBIT(DisasContext *s, arg_rr *a)
7436 {
7437     if (!ENABLE_ARCH_6T2) {
7438         return false;
7439     }
7440     return op_rr(s, a, gen_helper_rbit);
7441 }
7442
7443 /*
7444  * Signed multiply, signed and unsigned divide
7445  */
7446
7447 static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7448 {
7449     TCGv_i32 t1, t2;
7450
7451     if (!ENABLE_ARCH_6) {
7452         return false;
7453     }
7454
7455     t1 = load_reg(s, a->rn);
7456     t2 = load_reg(s, a->rm);
7457     if (m_swap) {
7458         gen_swap_half(t2, t2);
7459     }
7460     gen_smul_dual(t1, t2);
7461
7462     if (sub) {
7463         /* This subtraction cannot overflow. */
7464         tcg_gen_sub_i32(t1, t1, t2);
7465     } else {
7466         /*
7467          * This addition cannot overflow 32 bits; however it may
7468          * overflow considered as a signed operation, in which case
7469          * we must set the Q flag.
7470          */
7471         gen_helper_add_setq(t1, cpu_env, t1, t2);
7472     }
7473     tcg_temp_free_i32(t2);
7474
7475     if (a->ra != 15) {
7476         t2 = load_reg(s, a->ra);
7477         gen_helper_add_setq(t1, cpu_env, t1, t2);
7478         tcg_temp_free_i32(t2);
7479     }
7480     store_reg(s, a->rd, t1);
7481     return true;
7482 }
7483
7484 static bool trans_SMLAD(DisasContext *s, arg_rrrr *a)
7485 {
7486     return op_smlad(s, a, false, false);
7487 }
7488
7489 static bool trans_SMLADX(DisasContext *s, arg_rrrr *a)
7490 {
7491     return op_smlad(s, a, true, false);
7492 }
7493
7494 static bool trans_SMLSD(DisasContext *s, arg_rrrr *a)
7495 {
7496     return op_smlad(s, a, false, true);
7497 }
7498
7499 static bool trans_SMLSDX(DisasContext *s, arg_rrrr *a)
7500 {
7501     return op_smlad(s, a, true, true);
7502 }
7503
7504 static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7505 {
7506     TCGv_i32 t1, t2;
7507     TCGv_i64 l1, l2;
7508
7509     if (!ENABLE_ARCH_6) {
7510         return false;
7511     }
7512
7513     t1 = load_reg(s, a->rn);
7514     t2 = load_reg(s, a->rm);
7515     if (m_swap) {
7516         gen_swap_half(t2, t2);
7517     }
7518     gen_smul_dual(t1, t2);
7519
7520     l1 = tcg_temp_new_i64();
7521     l2 = tcg_temp_new_i64();
7522     tcg_gen_ext_i32_i64(l1, t1);
7523     tcg_gen_ext_i32_i64(l2, t2);
7524     tcg_temp_free_i32(t1);
7525     tcg_temp_free_i32(t2);
7526
7527     if (sub) {
7528         tcg_gen_sub_i64(l1, l1, l2);
7529     } else {
7530         tcg_gen_add_i64(l1, l1, l2);
7531     }
7532     tcg_temp_free_i64(l2);
7533
7534     gen_addq(s, l1, a->ra, a->rd);
7535     gen_storeq_reg(s, a->ra, a->rd, l1);
7536     tcg_temp_free_i64(l1);
7537     return true;
7538 }
7539
7540 static bool trans_SMLALD(DisasContext *s, arg_rrrr *a)
7541 {
7542     return op_smlald(s, a, false, false);
7543 }
7544
7545 static bool trans_SMLALDX(DisasContext *s, arg_rrrr *a)
7546 {
7547     return op_smlald(s, a, true, false);
7548 }
7549
7550 static bool trans_SMLSLD(DisasContext *s, arg_rrrr *a)
7551 {
7552     return op_smlald(s, a, false, true);
7553 }
7554
7555 static bool trans_SMLSLDX(DisasContext *s, arg_rrrr *a)
7556 {
7557     return op_smlald(s, a, true, true);
7558 }
7559
7560 static bool op_smmla(DisasContext *s, arg_rrrr *a, bool round, bool sub)
7561 {
7562     TCGv_i32 t1, t2;
7563
7564     if (s->thumb
7565         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7566         : !ENABLE_ARCH_6) {
7567         return false;
7568     }
7569
7570     t1 = load_reg(s, a->rn);
7571     t2 = load_reg(s, a->rm);
7572     tcg_gen_muls2_i32(t2, t1, t1, t2);
7573
7574     if (a->ra != 15) {
7575         TCGv_i32 t3 = load_reg(s, a->ra);
7576         if (sub) {
7577             /*
7578              * For SMMLS, we need a 64-bit subtract.  Borrow caused by
7579              * a non-zero multiplicand lowpart, and the correct result
7580              * lowpart for rounding.
7581              */
7582             TCGv_i32 zero = tcg_const_i32(0);
7583             tcg_gen_sub2_i32(t2, t1, zero, t3, t2, t1);
7584             tcg_temp_free_i32(zero);
7585         } else {
7586             tcg_gen_add_i32(t1, t1, t3);
7587         }
7588         tcg_temp_free_i32(t3);
7589     }
7590     if (round) {
7591         /*
7592          * Adding 0x80000000 to the 64-bit quantity means that we have
7593          * carry in to the high word when the low word has the msb set.
7594          */
7595         tcg_gen_shri_i32(t2, t2, 31);
7596         tcg_gen_add_i32(t1, t1, t2);
7597     }
7598     tcg_temp_free_i32(t2);
7599     store_reg(s, a->rd, t1);
7600     return true;
7601 }
7602
7603 static bool trans_SMMLA(DisasContext *s, arg_rrrr *a)
7604 {
7605     return op_smmla(s, a, false, false);
7606 }
7607
7608 static bool trans_SMMLAR(DisasContext *s, arg_rrrr *a)
7609 {
7610     return op_smmla(s, a, true, false);
7611 }
7612
7613 static bool trans_SMMLS(DisasContext *s, arg_rrrr *a)
7614 {
7615     return op_smmla(s, a, false, true);
7616 }
7617
7618 static bool trans_SMMLSR(DisasContext *s, arg_rrrr *a)
7619 {
7620     return op_smmla(s, a, true, true);
7621 }
7622
7623 static bool op_div(DisasContext *s, arg_rrr *a, bool u)
7624 {
7625     TCGv_i32 t1, t2;
7626
7627     if (s->thumb
7628         ? !dc_isar_feature(aa32_thumb_div, s)
7629         : !dc_isar_feature(aa32_arm_div, s)) {
7630         return false;
7631     }
7632
7633     t1 = load_reg(s, a->rn);
7634     t2 = load_reg(s, a->rm);
7635     if (u) {
7636         gen_helper_udiv(t1, t1, t2);
7637     } else {
7638         gen_helper_sdiv(t1, t1, t2);
7639     }
7640     tcg_temp_free_i32(t2);
7641     store_reg(s, a->rd, t1);
7642     return true;
7643 }
7644
7645 static bool trans_SDIV(DisasContext *s, arg_rrr *a)
7646 {
7647     return op_div(s, a, false);
7648 }
7649
7650 static bool trans_UDIV(DisasContext *s, arg_rrr *a)
7651 {
7652     return op_div(s, a, true);
7653 }
7654
7655 /*
7656  * Block data transfer
7657  */
7658
7659 static TCGv_i32 op_addr_block_pre(DisasContext *s, arg_ldst_block *a, int n)
7660 {
7661     TCGv_i32 addr = load_reg(s, a->rn);
7662
7663     if (a->b) {
7664         if (a->i) {
7665             /* pre increment */
7666             tcg_gen_addi_i32(addr, addr, 4);
7667         } else {
7668             /* pre decrement */
7669             tcg_gen_addi_i32(addr, addr, -(n * 4));
7670         }
7671     } else if (!a->i && n != 1) {
7672         /* post decrement */
7673         tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
7674     }
7675
7676     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
7677         /*
7678          * If the writeback is incrementing SP rather than
7679          * decrementing it, and the initial SP is below the
7680          * stack limit but the final written-back SP would
7681          * be above, then then we must not perform any memory
7682          * accesses, but it is IMPDEF whether we generate
7683          * an exception. We choose to do so in this case.
7684          * At this point 'addr' is the lowest address, so
7685          * either the original SP (if incrementing) or our
7686          * final SP (if decrementing), so that's what we check.
7687          */
7688         gen_helper_v8m_stackcheck(cpu_env, addr);
7689     }
7690
7691     return addr;
7692 }
7693
7694 static void op_addr_block_post(DisasContext *s, arg_ldst_block *a,
7695                                TCGv_i32 addr, int n)
7696 {
7697     if (a->w) {
7698         /* write back */
7699         if (!a->b) {
7700             if (a->i) {
7701                 /* post increment */
7702                 tcg_gen_addi_i32(addr, addr, 4);
7703             } else {
7704                 /* post decrement */
7705                 tcg_gen_addi_i32(addr, addr, -(n * 4));
7706             }
7707         } else if (!a->i && n != 1) {
7708             /* pre decrement */
7709             tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
7710         }
7711         store_reg(s, a->rn, addr);
7712     } else {
7713         tcg_temp_free_i32(addr);
7714     }
7715 }
7716
7717 static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n)
7718 {
7719     int i, j, n, list, mem_idx;
7720     bool user = a->u;
7721     TCGv_i32 addr, tmp, tmp2;
7722
7723     if (user) {
7724         /* STM (user) */
7725         if (IS_USER(s)) {
7726             /* Only usable in supervisor mode.  */
7727             unallocated_encoding(s);
7728             return true;
7729         }
7730     }
7731
7732     list = a->list;
7733     n = ctpop16(list);
7734     if (n < min_n || a->rn == 15) {
7735         unallocated_encoding(s);
7736         return true;
7737     }
7738
7739     addr = op_addr_block_pre(s, a, n);
7740     mem_idx = get_mem_index(s);
7741
7742     for (i = j = 0; i < 16; i++) {
7743         if (!(list & (1 << i))) {
7744             continue;
7745         }
7746
7747         if (user && i != 15) {
7748             tmp = tcg_temp_new_i32();
7749             tmp2 = tcg_const_i32(i);
7750             gen_helper_get_user_reg(tmp, cpu_env, tmp2);
7751             tcg_temp_free_i32(tmp2);
7752         } else {
7753             tmp = load_reg(s, i);
7754         }
7755         gen_aa32_st32(s, tmp, addr, mem_idx);
7756         tcg_temp_free_i32(tmp);
7757
7758         /* No need to add after the last transfer.  */
7759         if (++j != n) {
7760             tcg_gen_addi_i32(addr, addr, 4);
7761         }
7762     }
7763
7764     op_addr_block_post(s, a, addr, n);
7765     return true;
7766 }
7767
7768 static bool trans_STM(DisasContext *s, arg_ldst_block *a)
7769 {
7770     /* BitCount(list) < 1 is UNPREDICTABLE */
7771     return op_stm(s, a, 1);
7772 }
7773
7774 static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a)
7775 {
7776     /* Writeback register in register list is UNPREDICTABLE for T32.  */
7777     if (a->w && (a->list & (1 << a->rn))) {
7778         unallocated_encoding(s);
7779         return true;
7780     }
7781     /* BitCount(list) < 2 is UNPREDICTABLE */
7782     return op_stm(s, a, 2);
7783 }
7784
7785 static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n)
7786 {
7787     int i, j, n, list, mem_idx;
7788     bool loaded_base;
7789     bool user = a->u;
7790     bool exc_return = false;
7791     TCGv_i32 addr, tmp, tmp2, loaded_var;
7792
7793     if (user) {
7794         /* LDM (user), LDM (exception return) */
7795         if (IS_USER(s)) {
7796             /* Only usable in supervisor mode.  */
7797             unallocated_encoding(s);
7798             return true;
7799         }
7800         if (extract32(a->list, 15, 1)) {
7801             exc_return = true;
7802             user = false;
7803         } else {
7804             /* LDM (user) does not allow writeback.  */
7805             if (a->w) {
7806                 unallocated_encoding(s);
7807                 return true;
7808             }
7809         }
7810     }
7811
7812     list = a->list;
7813     n = ctpop16(list);
7814     if (n < min_n || a->rn == 15) {
7815         unallocated_encoding(s);
7816         return true;
7817     }
7818
7819     addr = op_addr_block_pre(s, a, n);
7820     mem_idx = get_mem_index(s);
7821     loaded_base = false;
7822     loaded_var = NULL;
7823
7824     for (i = j = 0; i < 16; i++) {
7825         if (!(list & (1 << i))) {
7826             continue;
7827         }
7828
7829         tmp = tcg_temp_new_i32();
7830         gen_aa32_ld32u(s, tmp, addr, mem_idx);
7831         if (user) {
7832             tmp2 = tcg_const_i32(i);
7833             gen_helper_set_user_reg(cpu_env, tmp2, tmp);
7834             tcg_temp_free_i32(tmp2);
7835             tcg_temp_free_i32(tmp);
7836         } else if (i == a->rn) {
7837             loaded_var = tmp;
7838             loaded_base = true;
7839         } else if (i == 15 && exc_return) {
7840             store_pc_exc_ret(s, tmp);
7841         } else {
7842             store_reg_from_load(s, i, tmp);
7843         }
7844
7845         /* No need to add after the last transfer.  */
7846         if (++j != n) {
7847             tcg_gen_addi_i32(addr, addr, 4);
7848         }
7849     }
7850
7851     op_addr_block_post(s, a, addr, n);
7852
7853     if (loaded_base) {
7854         /* Note that we reject base == pc above.  */
7855         store_reg(s, a->rn, loaded_var);
7856     }
7857
7858     if (exc_return) {
7859         /* Restore CPSR from SPSR.  */
7860         tmp = load_cpu_field(spsr);
7861         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7862             gen_io_start();
7863         }
7864         gen_helper_cpsr_write_eret(cpu_env, tmp);
7865         tcg_temp_free_i32(tmp);
7866         /* Must exit loop to check un-masked IRQs */
7867         s->base.is_jmp = DISAS_EXIT;
7868     }
7869     return true;
7870 }
7871
7872 static bool trans_LDM_a32(DisasContext *s, arg_ldst_block *a)
7873 {
7874     /*
7875      * Writeback register in register list is UNPREDICTABLE
7876      * for ArchVersion() >= 7.  Prior to v7, A32 would write
7877      * an UNKNOWN value to the base register.
7878      */
7879     if (ENABLE_ARCH_7 && a->w && (a->list & (1 << a->rn))) {
7880         unallocated_encoding(s);
7881         return true;
7882     }
7883     /* BitCount(list) < 1 is UNPREDICTABLE */
7884     return do_ldm(s, a, 1);
7885 }
7886
7887 static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a)
7888 {
7889     /* Writeback register in register list is UNPREDICTABLE for T32. */
7890     if (a->w && (a->list & (1 << a->rn))) {
7891         unallocated_encoding(s);
7892         return true;
7893     }
7894     /* BitCount(list) < 2 is UNPREDICTABLE */
7895     return do_ldm(s, a, 2);
7896 }
7897
7898 static bool trans_LDM_t16(DisasContext *s, arg_ldst_block *a)
7899 {
7900     /* Writeback is conditional on the base register not being loaded.  */
7901     a->w = !(a->list & (1 << a->rn));
7902     /* BitCount(list) < 1 is UNPREDICTABLE */
7903     return do_ldm(s, a, 1);
7904 }
7905
7906 /*
7907  * Branch, branch with link
7908  */
7909
7910 static bool trans_B(DisasContext *s, arg_i *a)
7911 {
7912     gen_jmp(s, read_pc(s) + a->imm);
7913     return true;
7914 }
7915
7916 static bool trans_B_cond_thumb(DisasContext *s, arg_ci *a)
7917 {
7918     /* This has cond from encoding, required to be outside IT block.  */
7919     if (a->cond >= 0xe) {
7920         return false;
7921     }
7922     if (s->condexec_mask) {
7923         unallocated_encoding(s);
7924         return true;
7925     }
7926     arm_skip_unless(s, a->cond);
7927     gen_jmp(s, read_pc(s) + a->imm);
7928     return true;
7929 }
7930
7931 static bool trans_BL(DisasContext *s, arg_i *a)
7932 {
7933     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
7934     gen_jmp(s, read_pc(s) + a->imm);
7935     return true;
7936 }
7937
7938 static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
7939 {
7940     TCGv_i32 tmp;
7941
7942     /* For A32, ARCH(5) is checked near the start of the uncond block. */
7943     if (s->thumb && (a->imm & 2)) {
7944         return false;
7945     }
7946     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
7947     tmp = tcg_const_i32(!s->thumb);
7948     store_cpu_field(tmp, thumb);
7949     gen_jmp(s, (read_pc(s) & ~3) + a->imm);
7950     return true;
7951 }
7952
7953 static bool trans_BL_BLX_prefix(DisasContext *s, arg_BL_BLX_prefix *a)
7954 {
7955     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
7956     tcg_gen_movi_i32(cpu_R[14], read_pc(s) + (a->imm << 12));
7957     return true;
7958 }
7959
7960 static bool trans_BL_suffix(DisasContext *s, arg_BL_suffix *a)
7961 {
7962     TCGv_i32 tmp = tcg_temp_new_i32();
7963
7964     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
7965     tcg_gen_addi_i32(tmp, cpu_R[14], (a->imm << 1) | 1);
7966     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1);
7967     gen_bx(s, tmp);
7968     return true;
7969 }
7970
7971 static bool trans_BLX_suffix(DisasContext *s, arg_BLX_suffix *a)
7972 {
7973     TCGv_i32 tmp;
7974
7975     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
7976     if (!ENABLE_ARCH_5) {
7977         return false;
7978     }
7979     tmp = tcg_temp_new_i32();
7980     tcg_gen_addi_i32(tmp, cpu_R[14], a->imm << 1);
7981     tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
7982     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1);
7983     gen_bx(s, tmp);
7984     return true;
7985 }
7986
7987 static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
7988 {
7989     TCGv_i32 addr, tmp;
7990
7991     tmp = load_reg(s, a->rm);
7992     if (half) {
7993         tcg_gen_add_i32(tmp, tmp, tmp);
7994     }
7995     addr = load_reg(s, a->rn);
7996     tcg_gen_add_i32(addr, addr, tmp);
7997
7998     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
7999                     half ? MO_UW | s->be_data : MO_UB);
8000     tcg_temp_free_i32(addr);
8001
8002     tcg_gen_add_i32(tmp, tmp, tmp);
8003     tcg_gen_addi_i32(tmp, tmp, read_pc(s));
8004     store_reg(s, 15, tmp);
8005     return true;
8006 }
8007
8008 static bool trans_TBB(DisasContext *s, arg_tbranch *a)
8009 {
8010     return op_tbranch(s, a, false);
8011 }
8012
8013 static bool trans_TBH(DisasContext *s, arg_tbranch *a)
8014 {
8015     return op_tbranch(s, a, true);
8016 }
8017
8018 static bool trans_CBZ(DisasContext *s, arg_CBZ *a)
8019 {
8020     TCGv_i32 tmp = load_reg(s, a->rn);
8021
8022     arm_gen_condlabel(s);
8023     tcg_gen_brcondi_i32(a->nz ? TCG_COND_EQ : TCG_COND_NE,
8024                         tmp, 0, s->condlabel);
8025     tcg_temp_free_i32(tmp);
8026     gen_jmp(s, read_pc(s) + a->imm);
8027     return true;
8028 }
8029
8030 /*
8031  * Supervisor call - both T32 & A32 come here so we need to check
8032  * which mode we are in when checking for semihosting.
8033  */
8034
8035 static bool trans_SVC(DisasContext *s, arg_SVC *a)
8036 {
8037     const uint32_t semihost_imm = s->thumb ? 0xab : 0x123456;
8038
8039     if (!arm_dc_feature(s, ARM_FEATURE_M) && semihosting_enabled() &&
8040 #ifndef CONFIG_USER_ONLY
8041         !IS_USER(s) &&
8042 #endif
8043         (a->imm == semihost_imm)) {
8044         gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
8045     } else {
8046         gen_set_pc_im(s, s->base.pc_next);
8047         s->svc_imm = a->imm;
8048         s->base.is_jmp = DISAS_SWI;
8049     }
8050     return true;
8051 }
8052
8053 /*
8054  * Unconditional system instructions
8055  */
8056
8057 static bool trans_RFE(DisasContext *s, arg_RFE *a)
8058 {
8059     static const int8_t pre_offset[4] = {
8060         /* DA */ -4, /* IA */ 0, /* DB */ -8, /* IB */ 4
8061     };
8062     static const int8_t post_offset[4] = {
8063         /* DA */ -8, /* IA */ 4, /* DB */ -4, /* IB */ 0
8064     };
8065     TCGv_i32 addr, t1, t2;
8066
8067     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8068         return false;
8069     }
8070     if (IS_USER(s)) {
8071         unallocated_encoding(s);
8072         return true;
8073     }
8074
8075     addr = load_reg(s, a->rn);
8076     tcg_gen_addi_i32(addr, addr, pre_offset[a->pu]);
8077
8078     /* Load PC into tmp and CPSR into tmp2.  */
8079     t1 = tcg_temp_new_i32();
8080     gen_aa32_ld32u(s, t1, addr, get_mem_index(s));
8081     tcg_gen_addi_i32(addr, addr, 4);
8082     t2 = tcg_temp_new_i32();
8083     gen_aa32_ld32u(s, t2, addr, get_mem_index(s));
8084
8085     if (a->w) {
8086         /* Base writeback.  */
8087         tcg_gen_addi_i32(addr, addr, post_offset[a->pu]);
8088         store_reg(s, a->rn, addr);
8089     } else {
8090         tcg_temp_free_i32(addr);
8091     }
8092     gen_rfe(s, t1, t2);
8093     return true;
8094 }
8095
8096 static bool trans_SRS(DisasContext *s, arg_SRS *a)
8097 {
8098     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8099         return false;
8100     }
8101     gen_srs(s, a->mode, a->pu, a->w);
8102     return true;
8103 }
8104
8105 static bool trans_CPS(DisasContext *s, arg_CPS *a)
8106 {
8107     uint32_t mask, val;
8108
8109     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8110         return false;
8111     }
8112     if (IS_USER(s)) {
8113         /* Implemented as NOP in user mode.  */
8114         return true;
8115     }
8116     /* TODO: There are quite a lot of UNPREDICTABLE argument combinations. */
8117
8118     mask = val = 0;
8119     if (a->imod & 2) {
8120         if (a->A) {
8121             mask |= CPSR_A;
8122         }
8123         if (a->I) {
8124             mask |= CPSR_I;
8125         }
8126         if (a->F) {
8127             mask |= CPSR_F;
8128         }
8129         if (a->imod & 1) {
8130             val |= mask;
8131         }
8132     }
8133     if (a->M) {
8134         mask |= CPSR_M;
8135         val |= a->mode;
8136     }
8137     if (mask) {
8138         gen_set_psr_im(s, mask, 0, val);
8139     }
8140     return true;
8141 }
8142
8143 static bool trans_CPS_v7m(DisasContext *s, arg_CPS_v7m *a)
8144 {
8145     TCGv_i32 tmp, addr, el;
8146
8147     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8148         return false;
8149     }
8150     if (IS_USER(s)) {
8151         /* Implemented as NOP in user mode.  */
8152         return true;
8153     }
8154
8155     tmp = tcg_const_i32(a->im);
8156     /* FAULTMASK */
8157     if (a->F) {
8158         addr = tcg_const_i32(19);
8159         gen_helper_v7m_msr(cpu_env, addr, tmp);
8160         tcg_temp_free_i32(addr);
8161     }
8162     /* PRIMASK */
8163     if (a->I) {
8164         addr = tcg_const_i32(16);
8165         gen_helper_v7m_msr(cpu_env, addr, tmp);
8166         tcg_temp_free_i32(addr);
8167     }
8168     el = tcg_const_i32(s->current_el);
8169     gen_helper_rebuild_hflags_m32(cpu_env, el);
8170     tcg_temp_free_i32(el);
8171     tcg_temp_free_i32(tmp);
8172     gen_lookup_tb(s);
8173     return true;
8174 }
8175
8176 /*
8177  * Clear-Exclusive, Barriers
8178  */
8179
8180 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
8181 {
8182     if (s->thumb
8183         ? !ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)
8184         : !ENABLE_ARCH_6K) {
8185         return false;
8186     }
8187     gen_clrex(s);
8188     return true;
8189 }
8190
8191 static bool trans_DSB(DisasContext *s, arg_DSB *a)
8192 {
8193     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8194         return false;
8195     }
8196     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8197     return true;
8198 }
8199
8200 static bool trans_DMB(DisasContext *s, arg_DMB *a)
8201 {
8202     return trans_DSB(s, NULL);
8203 }
8204
8205 static bool trans_ISB(DisasContext *s, arg_ISB *a)
8206 {
8207     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8208         return false;
8209     }
8210     /*
8211      * We need to break the TB after this insn to execute
8212      * self-modifying code correctly and also to take
8213      * any pending interrupts immediately.
8214      */
8215     gen_goto_tb(s, 0, s->base.pc_next);
8216     return true;
8217 }
8218
8219 static bool trans_SB(DisasContext *s, arg_SB *a)
8220 {
8221     if (!dc_isar_feature(aa32_sb, s)) {
8222         return false;
8223     }
8224     /*
8225      * TODO: There is no speculation barrier opcode
8226      * for TCG; MB and end the TB instead.
8227      */
8228     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8229     gen_goto_tb(s, 0, s->base.pc_next);
8230     return true;
8231 }
8232
8233 static bool trans_SETEND(DisasContext *s, arg_SETEND *a)
8234 {
8235     if (!ENABLE_ARCH_6) {
8236         return false;
8237     }
8238     if (a->E != (s->be_data == MO_BE)) {
8239         gen_helper_setend(cpu_env);
8240         s->base.is_jmp = DISAS_UPDATE_EXIT;
8241     }
8242     return true;
8243 }
8244
8245 /*
8246  * Preload instructions
8247  * All are nops, contingent on the appropriate arch level.
8248  */
8249
8250 static bool trans_PLD(DisasContext *s, arg_PLD *a)
8251 {
8252     return ENABLE_ARCH_5TE;
8253 }
8254
8255 static bool trans_PLDW(DisasContext *s, arg_PLD *a)
8256 {
8257     return arm_dc_feature(s, ARM_FEATURE_V7MP);
8258 }
8259
8260 static bool trans_PLI(DisasContext *s, arg_PLD *a)
8261 {
8262     return ENABLE_ARCH_7;
8263 }
8264
8265 /*
8266  * If-then
8267  */
8268
8269 static bool trans_IT(DisasContext *s, arg_IT *a)
8270 {
8271     int cond_mask = a->cond_mask;
8272
8273     /*
8274      * No actual code generated for this insn, just setup state.
8275      *
8276      * Combinations of firstcond and mask which set up an 0b1111
8277      * condition are UNPREDICTABLE; we take the CONSTRAINED
8278      * UNPREDICTABLE choice to treat 0b1111 the same as 0b1110,
8279      * i.e. both meaning "execute always".
8280      */
8281     s->condexec_cond = (cond_mask >> 4) & 0xe;
8282     s->condexec_mask = cond_mask & 0x1f;
8283     return true;
8284 }
8285
8286 /*
8287  * Legacy decoder.
8288  */
8289
8290 static void disas_arm_insn(DisasContext *s, unsigned int insn)
8291 {
8292     unsigned int cond = insn >> 28;
8293
8294     /* M variants do not implement ARM mode; this must raise the INVSTATE
8295      * UsageFault exception.
8296      */
8297     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8298         gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
8299                            default_exception_el(s));
8300         return;
8301     }
8302
8303     if (cond == 0xf) {
8304         /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
8305          * choose to UNDEF. In ARMv5 and above the space is used
8306          * for miscellaneous unconditional instructions.
8307          */
8308         ARCH(5);
8309
8310         /* Unconditional instructions.  */
8311         /* TODO: Perhaps merge these into one decodetree output file.  */
8312         if (disas_a32_uncond(s, insn) ||
8313             disas_vfp_uncond(s, insn) ||
8314             disas_neon_dp(s, insn) ||
8315             disas_neon_ls(s, insn) ||
8316             disas_neon_shared(s, insn)) {
8317             return;
8318         }
8319         /* fall back to legacy decoder */
8320
8321         if ((insn & 0x0e000f00) == 0x0c000100) {
8322             if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
8323                 /* iWMMXt register transfer.  */
8324                 if (extract32(s->c15_cpar, 1, 1)) {
8325                     if (!disas_iwmmxt_insn(s, insn)) {
8326                         return;
8327                     }
8328                 }
8329             }
8330         }
8331         goto illegal_op;
8332     }
8333     if (cond != 0xe) {
8334         /* if not always execute, we generate a conditional jump to
8335            next instruction */
8336         arm_skip_unless(s, cond);
8337     }
8338
8339     /* TODO: Perhaps merge these into one decodetree output file.  */
8340     if (disas_a32(s, insn) ||
8341         disas_vfp(s, insn)) {
8342         return;
8343     }
8344     /* fall back to legacy decoder */
8345     /* TODO: convert xscale/iwmmxt decoder to decodetree ?? */
8346     if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
8347         if (((insn & 0x0c000e00) == 0x0c000000)
8348             && ((insn & 0x03000000) != 0x03000000)) {
8349             /* Coprocessor insn, coprocessor 0 or 1 */
8350             disas_xscale_insn(s, insn);
8351             return;
8352         }
8353     }
8354
8355 illegal_op:
8356     unallocated_encoding(s);
8357 }
8358
8359 static bool thumb_insn_is_16bit(DisasContext *s, uint32_t pc, uint32_t insn)
8360 {
8361     /*
8362      * Return true if this is a 16 bit instruction. We must be precise
8363      * about this (matching the decode).
8364      */
8365     if ((insn >> 11) < 0x1d) {
8366         /* Definitely a 16-bit instruction */
8367         return true;
8368     }
8369
8370     /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
8371      * first half of a 32-bit Thumb insn. Thumb-1 cores might
8372      * end up actually treating this as two 16-bit insns, though,
8373      * if it's half of a bl/blx pair that might span a page boundary.
8374      */
8375     if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
8376         arm_dc_feature(s, ARM_FEATURE_M)) {
8377         /* Thumb2 cores (including all M profile ones) always treat
8378          * 32-bit insns as 32-bit.
8379          */
8380         return false;
8381     }
8382
8383     if ((insn >> 11) == 0x1e && pc - s->page_start < TARGET_PAGE_SIZE - 3) {
8384         /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
8385          * is not on the next page; we merge this into a 32-bit
8386          * insn.
8387          */
8388         return false;
8389     }
8390     /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
8391      * 0b1111_1xxx_xxxx_xxxx : BL suffix;
8392      * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
8393      *  -- handle as single 16 bit insn
8394      */
8395     return true;
8396 }
8397
8398 /* Translate a 32-bit thumb instruction. */
8399 static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
8400 {
8401     /*
8402      * ARMv6-M supports a limited subset of Thumb2 instructions.
8403      * Other Thumb1 architectures allow only 32-bit
8404      * combined BL/BLX prefix and suffix.
8405      */
8406     if (arm_dc_feature(s, ARM_FEATURE_M) &&
8407         !arm_dc_feature(s, ARM_FEATURE_V7)) {
8408         int i;
8409         bool found = false;
8410         static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
8411                                                0xf3b08040 /* dsb */,
8412                                                0xf3b08050 /* dmb */,
8413                                                0xf3b08060 /* isb */,
8414                                                0xf3e08000 /* mrs */,
8415                                                0xf000d000 /* bl */};
8416         static const uint32_t armv6m_mask[] = {0xffe0d000,
8417                                                0xfff0d0f0,
8418                                                0xfff0d0f0,
8419                                                0xfff0d0f0,
8420                                                0xffe0d000,
8421                                                0xf800d000};
8422
8423         for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
8424             if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
8425                 found = true;
8426                 break;
8427             }
8428         }
8429         if (!found) {
8430             goto illegal_op;
8431         }
8432     } else if ((insn & 0xf800e800) != 0xf000e800)  {
8433         ARCH(6T2);
8434     }
8435
8436     if ((insn & 0xef000000) == 0xef000000) {
8437         /*
8438          * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
8439          * transform into
8440          * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
8441          */
8442         uint32_t a32_insn = (insn & 0xe2ffffff) |
8443             ((insn & (1 << 28)) >> 4) | (1 << 28);
8444
8445         if (disas_neon_dp(s, a32_insn)) {
8446             return;
8447         }
8448     }
8449
8450     if ((insn & 0xff100000) == 0xf9000000) {
8451         /*
8452          * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
8453          * transform into
8454          * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
8455          */
8456         uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000;
8457
8458         if (disas_neon_ls(s, a32_insn)) {
8459             return;
8460         }
8461     }
8462
8463     /*
8464      * TODO: Perhaps merge these into one decodetree output file.
8465      * Note disas_vfp is written for a32 with cond field in the
8466      * top nibble.  The t32 encoding requires 0xe in the top nibble.
8467      */
8468     if (disas_t32(s, insn) ||
8469         disas_vfp_uncond(s, insn) ||
8470         disas_neon_shared(s, insn) ||
8471         ((insn >> 28) == 0xe && disas_vfp(s, insn))) {
8472         return;
8473     }
8474     /* fall back to legacy decoder */
8475
8476     switch ((insn >> 25) & 0xf) {
8477     case 0: case 1: case 2: case 3:
8478         /* 16-bit instructions.  Should never happen.  */
8479         abort();
8480     case 6: case 7: case 14: case 15:
8481         /* Coprocessor.  */
8482         if (arm_dc_feature(s, ARM_FEATURE_M)) {
8483             /* 0b111x_11xx_xxxx_xxxx_xxxx_xxxx_xxxx_xxxx */
8484             if (extract32(insn, 24, 2) == 3) {
8485                 goto illegal_op; /* op0 = 0b11 : unallocated */
8486             }
8487
8488             if (((insn >> 8) & 0xe) == 10 &&
8489                 dc_isar_feature(aa32_fpsp_v2, s)) {
8490                 /* FP, and the CPU supports it */
8491                 goto illegal_op;
8492             } else {
8493                 /* All other insns: NOCP */
8494                 gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
8495                                    syn_uncategorized(),
8496                                    default_exception_el(s));
8497             }
8498             break;
8499         }
8500         if (((insn >> 24) & 3) == 3) {
8501             /* Neon DP, but failed disas_neon_dp() */
8502             goto illegal_op;
8503         } else if (((insn >> 8) & 0xe) == 10) {
8504             /* VFP, but failed disas_vfp.  */
8505             goto illegal_op;
8506         } else {
8507             if (insn & (1 << 28))
8508                 goto illegal_op;
8509             if (disas_coproc_insn(s, insn)) {
8510                 goto illegal_op;
8511             }
8512         }
8513         break;
8514     case 12:
8515         goto illegal_op;
8516     default:
8517     illegal_op:
8518         unallocated_encoding(s);
8519     }
8520 }
8521
8522 static void disas_thumb_insn(DisasContext *s, uint32_t insn)
8523 {
8524     if (!disas_t16(s, insn)) {
8525         unallocated_encoding(s);
8526     }
8527 }
8528
8529 static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
8530 {
8531     /* Return true if the insn at dc->base.pc_next might cross a page boundary.
8532      * (False positives are OK, false negatives are not.)
8533      * We know this is a Thumb insn, and our caller ensures we are
8534      * only called if dc->base.pc_next is less than 4 bytes from the page
8535      * boundary, so we cross the page if the first 16 bits indicate
8536      * that this is a 32 bit insn.
8537      */
8538     uint16_t insn = arm_lduw_code(env, s->base.pc_next, s->sctlr_b);
8539
8540     return !thumb_insn_is_16bit(s, s->base.pc_next, insn);
8541 }
8542
8543 static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
8544 {
8545     DisasContext *dc = container_of(dcbase, DisasContext, base);
8546     CPUARMState *env = cs->env_ptr;
8547     ARMCPU *cpu = env_archcpu(env);
8548     uint32_t tb_flags = dc->base.tb->flags;
8549     uint32_t condexec, core_mmu_idx;
8550
8551     dc->isar = &cpu->isar;
8552     dc->condjmp = 0;
8553
8554     dc->aarch64 = 0;
8555     /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
8556      * there is no secure EL1, so we route exceptions to EL3.
8557      */
8558     dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
8559                                !arm_el_is_aa64(env, 3);
8560     dc->thumb = FIELD_EX32(tb_flags, TBFLAG_AM32, THUMB);
8561     dc->be_data = FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
8562     condexec = FIELD_EX32(tb_flags, TBFLAG_AM32, CONDEXEC);
8563     dc->condexec_mask = (condexec & 0xf) << 1;
8564     dc->condexec_cond = condexec >> 4;
8565
8566     core_mmu_idx = FIELD_EX32(tb_flags, TBFLAG_ANY, MMUIDX);
8567     dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
8568     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
8569 #if !defined(CONFIG_USER_ONLY)
8570     dc->user = (dc->current_el == 0);
8571 #endif
8572     dc->fp_excp_el = FIELD_EX32(tb_flags, TBFLAG_ANY, FPEXC_EL);
8573
8574     if (arm_feature(env, ARM_FEATURE_M)) {
8575         dc->vfp_enabled = 1;
8576         dc->be_data = MO_TE;
8577         dc->v7m_handler_mode = FIELD_EX32(tb_flags, TBFLAG_M32, HANDLER);
8578         dc->v8m_secure = arm_feature(env, ARM_FEATURE_M_SECURITY) &&
8579             regime_is_secure(env, dc->mmu_idx);
8580         dc->v8m_stackcheck = FIELD_EX32(tb_flags, TBFLAG_M32, STACKCHECK);
8581         dc->v8m_fpccr_s_wrong =
8582             FIELD_EX32(tb_flags, TBFLAG_M32, FPCCR_S_WRONG);
8583         dc->v7m_new_fp_ctxt_needed =
8584             FIELD_EX32(tb_flags, TBFLAG_M32, NEW_FP_CTXT_NEEDED);
8585         dc->v7m_lspact = FIELD_EX32(tb_flags, TBFLAG_M32, LSPACT);
8586     } else {
8587         dc->be_data =
8588             FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
8589         dc->debug_target_el =
8590             FIELD_EX32(tb_flags, TBFLAG_ANY, DEBUG_TARGET_EL);
8591         dc->sctlr_b = FIELD_EX32(tb_flags, TBFLAG_A32, SCTLR_B);
8592         dc->hstr_active = FIELD_EX32(tb_flags, TBFLAG_A32, HSTR_ACTIVE);
8593         dc->ns = FIELD_EX32(tb_flags, TBFLAG_A32, NS);
8594         dc->vfp_enabled = FIELD_EX32(tb_flags, TBFLAG_A32, VFPEN);
8595         if (arm_feature(env, ARM_FEATURE_XSCALE)) {
8596             dc->c15_cpar = FIELD_EX32(tb_flags, TBFLAG_A32, XSCALE_CPAR);
8597         } else {
8598             dc->vec_len = FIELD_EX32(tb_flags, TBFLAG_A32, VECLEN);
8599             dc->vec_stride = FIELD_EX32(tb_flags, TBFLAG_A32, VECSTRIDE);
8600         }
8601     }
8602     dc->cp_regs = cpu->cp_regs;
8603     dc->features = env->features;
8604
8605     /* Single step state. The code-generation logic here is:
8606      *  SS_ACTIVE == 0:
8607      *   generate code with no special handling for single-stepping (except
8608      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
8609      *   this happens anyway because those changes are all system register or
8610      *   PSTATE writes).
8611      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
8612      *   emit code for one insn
8613      *   emit code to clear PSTATE.SS
8614      *   emit code to generate software step exception for completed step
8615      *   end TB (as usual for having generated an exception)
8616      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
8617      *   emit code to generate a software step exception
8618      *   end the TB
8619      */
8620     dc->ss_active = FIELD_EX32(tb_flags, TBFLAG_ANY, SS_ACTIVE);
8621     dc->pstate_ss = FIELD_EX32(tb_flags, TBFLAG_ANY, PSTATE_SS);
8622     dc->is_ldex = false;
8623
8624     dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
8625
8626     /* If architectural single step active, limit to 1.  */
8627     if (is_singlestepping(dc)) {
8628         dc->base.max_insns = 1;
8629     }
8630
8631     /* ARM is a fixed-length ISA.  Bound the number of insns to execute
8632        to those left on the page.  */
8633     if (!dc->thumb) {
8634         int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
8635         dc->base.max_insns = MIN(dc->base.max_insns, bound);
8636     }
8637
8638     cpu_V0 = tcg_temp_new_i64();
8639     cpu_V1 = tcg_temp_new_i64();
8640     /* FIXME: cpu_M0 can probably be the same as cpu_V0.  */
8641     cpu_M0 = tcg_temp_new_i64();
8642 }
8643
8644 static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
8645 {
8646     DisasContext *dc = container_of(dcbase, DisasContext, base);
8647
8648     /* A note on handling of the condexec (IT) bits:
8649      *
8650      * We want to avoid the overhead of having to write the updated condexec
8651      * bits back to the CPUARMState for every instruction in an IT block. So:
8652      * (1) if the condexec bits are not already zero then we write
8653      * zero back into the CPUARMState now. This avoids complications trying
8654      * to do it at the end of the block. (For example if we don't do this
8655      * it's hard to identify whether we can safely skip writing condexec
8656      * at the end of the TB, which we definitely want to do for the case
8657      * where a TB doesn't do anything with the IT state at all.)
8658      * (2) if we are going to leave the TB then we call gen_set_condexec()
8659      * which will write the correct value into CPUARMState if zero is wrong.
8660      * This is done both for leaving the TB at the end, and for leaving
8661      * it because of an exception we know will happen, which is done in
8662      * gen_exception_insn(). The latter is necessary because we need to
8663      * leave the TB with the PC/IT state just prior to execution of the
8664      * instruction which caused the exception.
8665      * (3) if we leave the TB unexpectedly (eg a data abort on a load)
8666      * then the CPUARMState will be wrong and we need to reset it.
8667      * This is handled in the same way as restoration of the
8668      * PC in these situations; we save the value of the condexec bits
8669      * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
8670      * then uses this to restore them after an exception.
8671      *
8672      * Note that there are no instructions which can read the condexec
8673      * bits, and none which can write non-static values to them, so
8674      * we don't need to care about whether CPUARMState is correct in the
8675      * middle of a TB.
8676      */
8677
8678     /* Reset the conditional execution bits immediately. This avoids
8679        complications trying to do it at the end of the block.  */
8680     if (dc->condexec_mask || dc->condexec_cond) {
8681         TCGv_i32 tmp = tcg_temp_new_i32();
8682         tcg_gen_movi_i32(tmp, 0);
8683         store_cpu_field(tmp, condexec_bits);
8684     }
8685 }
8686
8687 static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
8688 {
8689     DisasContext *dc = container_of(dcbase, DisasContext, base);
8690
8691     tcg_gen_insn_start(dc->base.pc_next,
8692                        (dc->condexec_cond << 4) | (dc->condexec_mask >> 1),
8693                        0);
8694     dc->insn_start = tcg_last_op();
8695 }
8696
8697 static bool arm_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
8698                                     const CPUBreakpoint *bp)
8699 {
8700     DisasContext *dc = container_of(dcbase, DisasContext, base);
8701
8702     if (bp->flags & BP_CPU) {
8703         gen_set_condexec(dc);
8704         gen_set_pc_im(dc, dc->base.pc_next);
8705         gen_helper_check_breakpoints(cpu_env);
8706         /* End the TB early; it's likely not going to be executed */
8707         dc->base.is_jmp = DISAS_TOO_MANY;
8708     } else {
8709         gen_exception_internal_insn(dc, dc->base.pc_next, EXCP_DEBUG);
8710         /* The address covered by the breakpoint must be
8711            included in [tb->pc, tb->pc + tb->size) in order
8712            to for it to be properly cleared -- thus we
8713            increment the PC here so that the logic setting
8714            tb->size below does the right thing.  */
8715         /* TODO: Advance PC by correct instruction length to
8716          * avoid disassembler error messages */
8717         dc->base.pc_next += 2;
8718         dc->base.is_jmp = DISAS_NORETURN;
8719     }
8720
8721     return true;
8722 }
8723
8724 static bool arm_pre_translate_insn(DisasContext *dc)
8725 {
8726 #ifdef CONFIG_USER_ONLY
8727     /* Intercept jump to the magic kernel page.  */
8728     if (dc->base.pc_next >= 0xffff0000) {
8729         /* We always get here via a jump, so know we are not in a
8730            conditional execution block.  */
8731         gen_exception_internal(EXCP_KERNEL_TRAP);
8732         dc->base.is_jmp = DISAS_NORETURN;
8733         return true;
8734     }
8735 #endif
8736
8737     if (dc->ss_active && !dc->pstate_ss) {
8738         /* Singlestep state is Active-pending.
8739          * If we're in this state at the start of a TB then either
8740          *  a) we just took an exception to an EL which is being debugged
8741          *     and this is the first insn in the exception handler
8742          *  b) debug exceptions were masked and we just unmasked them
8743          *     without changing EL (eg by clearing PSTATE.D)
8744          * In either case we're going to take a swstep exception in the
8745          * "did not step an insn" case, and so the syndrome ISV and EX
8746          * bits should be zero.
8747          */
8748         assert(dc->base.num_insns == 1);
8749         gen_swstep_exception(dc, 0, 0);
8750         dc->base.is_jmp = DISAS_NORETURN;
8751         return true;
8752     }
8753
8754     return false;
8755 }
8756
8757 static void arm_post_translate_insn(DisasContext *dc)
8758 {
8759     if (dc->condjmp && !dc->base.is_jmp) {
8760         gen_set_label(dc->condlabel);
8761         dc->condjmp = 0;
8762     }
8763     translator_loop_temp_check(&dc->base);
8764 }
8765
8766 static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
8767 {
8768     DisasContext *dc = container_of(dcbase, DisasContext, base);
8769     CPUARMState *env = cpu->env_ptr;
8770     unsigned int insn;
8771
8772     if (arm_pre_translate_insn(dc)) {
8773         return;
8774     }
8775
8776     dc->pc_curr = dc->base.pc_next;
8777     insn = arm_ldl_code(env, dc->base.pc_next, dc->sctlr_b);
8778     dc->insn = insn;
8779     dc->base.pc_next += 4;
8780     disas_arm_insn(dc, insn);
8781
8782     arm_post_translate_insn(dc);
8783
8784     /* ARM is a fixed-length ISA.  We performed the cross-page check
8785        in init_disas_context by adjusting max_insns.  */
8786 }
8787
8788 static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
8789 {
8790     /* Return true if this Thumb insn is always unconditional,
8791      * even inside an IT block. This is true of only a very few
8792      * instructions: BKPT, HLT, and SG.
8793      *
8794      * A larger class of instructions are UNPREDICTABLE if used
8795      * inside an IT block; we do not need to detect those here, because
8796      * what we do by default (perform the cc check and update the IT
8797      * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
8798      * choice for those situations.
8799      *
8800      * insn is either a 16-bit or a 32-bit instruction; the two are
8801      * distinguishable because for the 16-bit case the top 16 bits
8802      * are zeroes, and that isn't a valid 32-bit encoding.
8803      */
8804     if ((insn & 0xffffff00) == 0xbe00) {
8805         /* BKPT */
8806         return true;
8807     }
8808
8809     if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
8810         !arm_dc_feature(s, ARM_FEATURE_M)) {
8811         /* HLT: v8A only. This is unconditional even when it is going to
8812          * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
8813          * For v7 cores this was a plain old undefined encoding and so
8814          * honours its cc check. (We might be using the encoding as
8815          * a semihosting trap, but we don't change the cc check behaviour
8816          * on that account, because a debugger connected to a real v7A
8817          * core and emulating semihosting traps by catching the UNDEF
8818          * exception would also only see cases where the cc check passed.
8819          * No guest code should be trying to do a HLT semihosting trap
8820          * in an IT block anyway.
8821          */
8822         return true;
8823     }
8824
8825     if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
8826         arm_dc_feature(s, ARM_FEATURE_M)) {
8827         /* SG: v8M only */
8828         return true;
8829     }
8830
8831     return false;
8832 }
8833
8834 static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
8835 {
8836     DisasContext *dc = container_of(dcbase, DisasContext, base);
8837     CPUARMState *env = cpu->env_ptr;
8838     uint32_t insn;
8839     bool is_16bit;
8840
8841     if (arm_pre_translate_insn(dc)) {
8842         return;
8843     }
8844
8845     dc->pc_curr = dc->base.pc_next;
8846     insn = arm_lduw_code(env, dc->base.pc_next, dc->sctlr_b);
8847     is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn);
8848     dc->base.pc_next += 2;
8849     if (!is_16bit) {
8850         uint32_t insn2 = arm_lduw_code(env, dc->base.pc_next, dc->sctlr_b);
8851
8852         insn = insn << 16 | insn2;
8853         dc->base.pc_next += 2;
8854     }
8855     dc->insn = insn;
8856
8857     if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
8858         uint32_t cond = dc->condexec_cond;
8859
8860         /*
8861          * Conditionally skip the insn. Note that both 0xe and 0xf mean
8862          * "always"; 0xf is not "never".
8863          */
8864         if (cond < 0x0e) {
8865             arm_skip_unless(dc, cond);
8866         }
8867     }
8868
8869     if (is_16bit) {
8870         disas_thumb_insn(dc, insn);
8871     } else {
8872         disas_thumb2_insn(dc, insn);
8873     }
8874
8875     /* Advance the Thumb condexec condition.  */
8876     if (dc->condexec_mask) {
8877         dc->condexec_cond = ((dc->condexec_cond & 0xe) |
8878                              ((dc->condexec_mask >> 4) & 1));
8879         dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
8880         if (dc->condexec_mask == 0) {
8881             dc->condexec_cond = 0;
8882         }
8883     }
8884
8885     arm_post_translate_insn(dc);
8886
8887     /* Thumb is a variable-length ISA.  Stop translation when the next insn
8888      * will touch a new page.  This ensures that prefetch aborts occur at
8889      * the right place.
8890      *
8891      * We want to stop the TB if the next insn starts in a new page,
8892      * or if it spans between this page and the next. This means that
8893      * if we're looking at the last halfword in the page we need to
8894      * see if it's a 16-bit Thumb insn (which will fit in this TB)
8895      * or a 32-bit Thumb insn (which won't).
8896      * This is to avoid generating a silly TB with a single 16-bit insn
8897      * in it at the end of this page (which would execute correctly
8898      * but isn't very efficient).
8899      */
8900     if (dc->base.is_jmp == DISAS_NEXT
8901         && (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE
8902             || (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE - 3
8903                 && insn_crosses_page(env, dc)))) {
8904         dc->base.is_jmp = DISAS_TOO_MANY;
8905     }
8906 }
8907
8908 static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
8909 {
8910     DisasContext *dc = container_of(dcbase, DisasContext, base);
8911
8912     if (tb_cflags(dc->base.tb) & CF_LAST_IO && dc->condjmp) {
8913         /* FIXME: This can theoretically happen with self-modifying code. */
8914         cpu_abort(cpu, "IO on conditional branch instruction");
8915     }
8916
8917     /* At this stage dc->condjmp will only be set when the skipped
8918        instruction was a conditional branch or trap, and the PC has
8919        already been written.  */
8920     gen_set_condexec(dc);
8921     if (dc->base.is_jmp == DISAS_BX_EXCRET) {
8922         /* Exception return branches need some special case code at the
8923          * end of the TB, which is complex enough that it has to
8924          * handle the single-step vs not and the condition-failed
8925          * insn codepath itself.
8926          */
8927         gen_bx_excret_final_code(dc);
8928     } else if (unlikely(is_singlestepping(dc))) {
8929         /* Unconditional and "condition passed" instruction codepath. */
8930         switch (dc->base.is_jmp) {
8931         case DISAS_SWI:
8932             gen_ss_advance(dc);
8933             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
8934                           default_exception_el(dc));
8935             break;
8936         case DISAS_HVC:
8937             gen_ss_advance(dc);
8938             gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
8939             break;
8940         case DISAS_SMC:
8941             gen_ss_advance(dc);
8942             gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
8943             break;
8944         case DISAS_NEXT:
8945         case DISAS_TOO_MANY:
8946         case DISAS_UPDATE_EXIT:
8947         case DISAS_UPDATE_NOCHAIN:
8948             gen_set_pc_im(dc, dc->base.pc_next);
8949             /* fall through */
8950         default:
8951             /* FIXME: Single stepping a WFI insn will not halt the CPU. */
8952             gen_singlestep_exception(dc);
8953             break;
8954         case DISAS_NORETURN:
8955             break;
8956         }
8957     } else {
8958         /* While branches must always occur at the end of an IT block,
8959            there are a few other things that can cause us to terminate
8960            the TB in the middle of an IT block:
8961             - Exception generating instructions (bkpt, swi, undefined).
8962             - Page boundaries.
8963             - Hardware watchpoints.
8964            Hardware breakpoints have already been handled and skip this code.
8965          */
8966         switch(dc->base.is_jmp) {
8967         case DISAS_NEXT:
8968         case DISAS_TOO_MANY:
8969             gen_goto_tb(dc, 1, dc->base.pc_next);
8970             break;
8971         case DISAS_UPDATE_NOCHAIN:
8972             gen_set_pc_im(dc, dc->base.pc_next);
8973             /* fall through */
8974         case DISAS_JUMP:
8975             gen_goto_ptr();
8976             break;
8977         case DISAS_UPDATE_EXIT:
8978             gen_set_pc_im(dc, dc->base.pc_next);
8979             /* fall through */
8980         default:
8981             /* indicate that the hash table must be used to find the next TB */
8982             tcg_gen_exit_tb(NULL, 0);
8983             break;
8984         case DISAS_NORETURN:
8985             /* nothing more to generate */
8986             break;
8987         case DISAS_WFI:
8988         {
8989             TCGv_i32 tmp = tcg_const_i32((dc->thumb &&
8990                                           !(dc->insn & (1U << 31))) ? 2 : 4);
8991
8992             gen_helper_wfi(cpu_env, tmp);
8993             tcg_temp_free_i32(tmp);
8994             /* The helper doesn't necessarily throw an exception, but we
8995              * must go back to the main loop to check for interrupts anyway.
8996              */
8997             tcg_gen_exit_tb(NULL, 0);
8998             break;
8999         }
9000         case DISAS_WFE:
9001             gen_helper_wfe(cpu_env);
9002             break;
9003         case DISAS_YIELD:
9004             gen_helper_yield(cpu_env);
9005             break;
9006         case DISAS_SWI:
9007             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
9008                           default_exception_el(dc));
9009             break;
9010         case DISAS_HVC:
9011             gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9012             break;
9013         case DISAS_SMC:
9014             gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
9015             break;
9016         }
9017     }
9018
9019     if (dc->condjmp) {
9020         /* "Condition failed" instruction codepath for the branch/trap insn */
9021         gen_set_label(dc->condlabel);
9022         gen_set_condexec(dc);
9023         if (unlikely(is_singlestepping(dc))) {
9024             gen_set_pc_im(dc, dc->base.pc_next);
9025             gen_singlestep_exception(dc);
9026         } else {
9027             gen_goto_tb(dc, 1, dc->base.pc_next);
9028         }
9029     }
9030 }
9031
9032 static void arm_tr_disas_log(const DisasContextBase *dcbase, CPUState *cpu)
9033 {
9034     DisasContext *dc = container_of(dcbase, DisasContext, base);
9035
9036     qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
9037     log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
9038 }
9039
9040 static const TranslatorOps arm_translator_ops = {
9041     .init_disas_context = arm_tr_init_disas_context,
9042     .tb_start           = arm_tr_tb_start,
9043     .insn_start         = arm_tr_insn_start,
9044     .breakpoint_check   = arm_tr_breakpoint_check,
9045     .translate_insn     = arm_tr_translate_insn,
9046     .tb_stop            = arm_tr_tb_stop,
9047     .disas_log          = arm_tr_disas_log,
9048 };
9049
9050 static const TranslatorOps thumb_translator_ops = {
9051     .init_disas_context = arm_tr_init_disas_context,
9052     .tb_start           = arm_tr_tb_start,
9053     .insn_start         = arm_tr_insn_start,
9054     .breakpoint_check   = arm_tr_breakpoint_check,
9055     .translate_insn     = thumb_tr_translate_insn,
9056     .tb_stop            = arm_tr_tb_stop,
9057     .disas_log          = arm_tr_disas_log,
9058 };
9059
9060 /* generate intermediate code for basic block 'tb'.  */
9061 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
9062 {
9063     DisasContext dc = { };
9064     const TranslatorOps *ops = &arm_translator_ops;
9065
9066     if (FIELD_EX32(tb->flags, TBFLAG_AM32, THUMB)) {
9067         ops = &thumb_translator_ops;
9068     }
9069 #ifdef TARGET_AARCH64
9070     if (FIELD_EX32(tb->flags, TBFLAG_ANY, AARCH64_STATE)) {
9071         ops = &aarch64_translator_ops;
9072     }
9073 #endif
9074
9075     translator_loop(ops, &dc.base, cpu, tb, max_insns);
9076 }
9077
9078 void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb,
9079                           target_ulong *data)
9080 {
9081     if (is_a64(env)) {
9082         env->pc = data[0];
9083         env->condexec_bits = 0;
9084         env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
9085     } else {
9086         env->regs[15] = data[0];
9087         env->condexec_bits = data[1];
9088         env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
9089     }
9090 }