target/arm/translate.c

   1 /*
   2  *  ARM translation
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *  Copyright (c) 2005-2007 CodeSourcery
   6  *  Copyright (c) 2007 OpenedHand, Ltd.
   7  *
   8  * This library is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2 of the License, or (at your option) any later version.
  12  *
  13  * This library is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  20  */
  21 #include "qemu/osdep.h"
  22
  23 #include "cpu.h"
  24 #include "internals.h"
  25 #include "disas/disas.h"
  26 #include "exec/exec-all.h"
  27 #include "tcg/tcg-op.h"
  28 #include "tcg/tcg-op-gvec.h"
  29 #include "qemu/log.h"
  30 #include "qemu/bitops.h"
  31 #include "arm_ldst.h"
  32 #include "hw/semihosting/semihost.h"
  33
  34 #include "exec/helper-proto.h"
  35 #include "exec/helper-gen.h"
  36
  37 #include "trace-tcg.h"
  38 #include "exec/log.h"
  39
  40
  41 #define ENABLE_ARCH_4T    arm_dc_feature(s, ARM_FEATURE_V4T)
  42 #define ENABLE_ARCH_5     arm_dc_feature(s, ARM_FEATURE_V5)
  43 /* currently all emulated v5 cores are also v5TE, so don't bother */
  44 #define ENABLE_ARCH_5TE   arm_dc_feature(s, ARM_FEATURE_V5)
  45 #define ENABLE_ARCH_5J    dc_isar_feature(aa32_jazelle, s)
  46 #define ENABLE_ARCH_6     arm_dc_feature(s, ARM_FEATURE_V6)
  47 #define ENABLE_ARCH_6K    arm_dc_feature(s, ARM_FEATURE_V6K)
  48 #define ENABLE_ARCH_6T2   arm_dc_feature(s, ARM_FEATURE_THUMB2)
  49 #define ENABLE_ARCH_7     arm_dc_feature(s, ARM_FEATURE_V7)
  50 #define ENABLE_ARCH_8     arm_dc_feature(s, ARM_FEATURE_V8)
  51
  52 #define ARCH(x) do { if (!ENABLE_ARCH_##x) goto illegal_op; } while(0)
  53
  54 #include "translate.h"
  55
  56 #if defined(CONFIG_USER_ONLY)
  57 #define IS_USER(s) 1
  58 #else
  59 #define IS_USER(s) (s->user)
  60 #endif
  61
  62 /* We reuse the same 64-bit temporaries for efficiency.  */
  63 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
  64 static TCGv_i32 cpu_R[16];
  65 TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
  66 TCGv_i64 cpu_exclusive_addr;
  67 TCGv_i64 cpu_exclusive_val;
  68
  69 #include "exec/gen-icount.h"
  70
  71 static const char * const regnames[] =
  72     { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
  73       "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
  74
  75 /* Function prototypes for gen_ functions calling Neon helpers.  */
  76 typedef void NeonGenThreeOpEnvFn(TCGv_i32, TCGv_env, TCGv_i32,
  77                                  TCGv_i32, TCGv_i32);
  78 /* Function prototypes for gen_ functions for fix point conversions */
  79 typedef void VFPGenFixPointFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
  80
  81 /* initialize TCG globals.  */
  82 void arm_translate_init(void)
  83 {
  84     int i;
  85
  86     for (i = 0; i < 16; i++) {
  87         cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
  88                                           offsetof(CPUARMState, regs[i]),
  89                                           regnames[i]);
  90     }
  91     cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF");
  92     cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF");
  93     cpu_VF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, VF), "VF");
  94     cpu_ZF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, ZF), "ZF");
  95
  96     cpu_exclusive_addr = tcg_global_mem_new_i64(cpu_env,
  97         offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
  98     cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
  99         offsetof(CPUARMState, exclusive_val), "exclusive_val");
 100
 101     a64_translate_init();
 102 }
 103
 104 /* Flags for the disas_set_da_iss info argument:
 105  * lower bits hold the Rt register number, higher bits are flags.
 106  */
 107 typedef enum ISSInfo {
 108     ISSNone = 0,
 109     ISSRegMask = 0x1f,
 110     ISSInvalid = (1 << 5),
 111     ISSIsAcqRel = (1 << 6),
 112     ISSIsWrite = (1 << 7),
 113     ISSIs16Bit = (1 << 8),
 114 } ISSInfo;
 115
 116 /* Save the syndrome information for a Data Abort */
 117 static void disas_set_da_iss(DisasContext *s, MemOp memop, ISSInfo issinfo)
 118 {
 119     uint32_t syn;
 120     int sas = memop & MO_SIZE;
 121     bool sse = memop & MO_SIGN;
 122     bool is_acqrel = issinfo & ISSIsAcqRel;
 123     bool is_write = issinfo & ISSIsWrite;
 124     bool is_16bit = issinfo & ISSIs16Bit;
 125     int srt = issinfo & ISSRegMask;
 126
 127     if (issinfo & ISSInvalid) {
 128         /* Some callsites want to conditionally provide ISS info,
 129          * eg "only if this was not a writeback"
 130          */
 131         return;
 132     }
 133
 134     if (srt == 15) {
 135         /* For AArch32, insns where the src/dest is R15 never generate
 136          * ISS information. Catching that here saves checking at all
 137          * the call sites.
 138          */
 139         return;
 140     }
 141
 142     syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
 143                                   0, 0, 0, is_write, 0, is_16bit);
 144     disas_set_insn_syndrome(s, syn);
 145 }
 146
 147 static inline int get_a32_user_mem_index(DisasContext *s)
 148 {
 149     /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
 150      * insns:
 151      *  if PL2, UNPREDICTABLE (we choose to implement as if PL0)
 152      *  otherwise, access as if at PL0.
 153      */
 154     switch (s->mmu_idx) {
 155     case ARMMMUIdx_E2:        /* this one is UNPREDICTABLE */
 156     case ARMMMUIdx_E10_0:
 157     case ARMMMUIdx_E10_1:
 158     case ARMMMUIdx_E10_1_PAN:
 159         return arm_to_core_mmu_idx(ARMMMUIdx_E10_0);
 160     case ARMMMUIdx_SE3:
 161     case ARMMMUIdx_SE10_0:
 162     case ARMMMUIdx_SE10_1:
 163     case ARMMMUIdx_SE10_1_PAN:
 164         return arm_to_core_mmu_idx(ARMMMUIdx_SE10_0);
 165     case ARMMMUIdx_MUser:
 166     case ARMMMUIdx_MPriv:
 167         return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
 168     case ARMMMUIdx_MUserNegPri:
 169     case ARMMMUIdx_MPrivNegPri:
 170         return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
 171     case ARMMMUIdx_MSUser:
 172     case ARMMMUIdx_MSPriv:
 173         return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
 174     case ARMMMUIdx_MSUserNegPri:
 175     case ARMMMUIdx_MSPrivNegPri:
 176         return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
 177     default:
 178         g_assert_not_reached();
 179     }
 180 }
 181
 182 static inline TCGv_i32 load_cpu_offset(int offset)
 183 {
 184     TCGv_i32 tmp = tcg_temp_new_i32();
 185     tcg_gen_ld_i32(tmp, cpu_env, offset);
 186     return tmp;
 187 }
 188
 189 #define load_cpu_field(name) load_cpu_offset(offsetof(CPUARMState, name))
 190
 191 static inline void store_cpu_offset(TCGv_i32 var, int offset)
 192 {
 193     tcg_gen_st_i32(var, cpu_env, offset);
 194     tcg_temp_free_i32(var);
 195 }
 196
 197 #define store_cpu_field(var, name) \
 198     store_cpu_offset(var, offsetof(CPUARMState, name))
 199
 200 /* The architectural value of PC.  */
 201 static uint32_t read_pc(DisasContext *s)
 202 {
 203     return s->pc_curr + (s->thumb ? 4 : 8);
 204 }
 205
 206 /* Set a variable to the value of a CPU register.  */
 207 static void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
 208 {
 209     if (reg == 15) {
 210         tcg_gen_movi_i32(var, read_pc(s));
 211     } else {
 212         tcg_gen_mov_i32(var, cpu_R[reg]);
 213     }
 214 }
 215
 216 /* Create a new temporary and set it to the value of a CPU register.  */
 217 static inline TCGv_i32 load_reg(DisasContext *s, int reg)
 218 {
 219     TCGv_i32 tmp = tcg_temp_new_i32();
 220     load_reg_var(s, tmp, reg);
 221     return tmp;
 222 }
 223
 224 /*
 225  * Create a new temp, REG + OFS, except PC is ALIGN(PC, 4).
 226  * This is used for load/store for which use of PC implies (literal),
 227  * or ADD that implies ADR.
 228  */
 229 static TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
 230 {
 231     TCGv_i32 tmp = tcg_temp_new_i32();
 232
 233     if (reg == 15) {
 234         tcg_gen_movi_i32(tmp, (read_pc(s) & ~3) + ofs);
 235     } else {
 236         tcg_gen_addi_i32(tmp, cpu_R[reg], ofs);
 237     }
 238     return tmp;
 239 }
 240
 241 /* Set a CPU register.  The source must be a temporary and will be
 242    marked as dead.  */
 243 static void store_reg(DisasContext *s, int reg, TCGv_i32 var)
 244 {
 245     if (reg == 15) {
 246         /* In Thumb mode, we must ignore bit 0.
 247          * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
 248          * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
 249          * We choose to ignore [1:0] in ARM mode for all architecture versions.
 250          */
 251         tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
 252         s->base.is_jmp = DISAS_JUMP;
 253     }
 254     tcg_gen_mov_i32(cpu_R[reg], var);
 255     tcg_temp_free_i32(var);
 256 }
 257
 258 /*
 259  * Variant of store_reg which applies v8M stack-limit checks before updating
 260  * SP. If the check fails this will result in an exception being taken.
 261  * We disable the stack checks for CONFIG_USER_ONLY because we have
 262  * no idea what the stack limits should be in that case.
 263  * If stack checking is not being done this just acts like store_reg().
 264  */
 265 static void store_sp_checked(DisasContext *s, TCGv_i32 var)
 266 {
 267 #ifndef CONFIG_USER_ONLY
 268     if (s->v8m_stackcheck) {
 269         gen_helper_v8m_stackcheck(cpu_env, var);
 270     }
 271 #endif
 272     store_reg(s, 13, var);
 273 }
 274
 275 /* Value extensions.  */
 276 #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
 277 #define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
 278 #define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
 279 #define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
 280
 281 #define gen_sxtb16(var) gen_helper_sxtb16(var, var)
 282 #define gen_uxtb16(var) gen_helper_uxtb16(var, var)
 283
 284
 285 static inline void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
 286 {
 287     TCGv_i32 tmp_mask = tcg_const_i32(mask);
 288     gen_helper_cpsr_write(cpu_env, var, tmp_mask);
 289     tcg_temp_free_i32(tmp_mask);
 290 }
 291 /* Set NZCV flags from the high 4 bits of var.  */
 292 #define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
 293
 294 static void gen_exception_internal(int excp)
 295 {
 296     TCGv_i32 tcg_excp = tcg_const_i32(excp);
 297
 298     assert(excp_is_internal(excp));
 299     gen_helper_exception_internal(cpu_env, tcg_excp);
 300     tcg_temp_free_i32(tcg_excp);
 301 }
 302
 303 static void gen_step_complete_exception(DisasContext *s)
 304 {
 305     /* We just completed step of an insn. Move from Active-not-pending
 306      * to Active-pending, and then also take the swstep exception.
 307      * This corresponds to making the (IMPDEF) choice to prioritize
 308      * swstep exceptions over asynchronous exceptions taken to an exception
 309      * level where debug is disabled. This choice has the advantage that
 310      * we do not need to maintain internal state corresponding to the
 311      * ISV/EX syndrome bits between completion of the step and generation
 312      * of the exception, and our syndrome information is always correct.
 313      */
 314     gen_ss_advance(s);
 315     gen_swstep_exception(s, 1, s->is_ldex);
 316     s->base.is_jmp = DISAS_NORETURN;
 317 }
 318
 319 static void gen_singlestep_exception(DisasContext *s)
 320 {
 321     /* Generate the right kind of exception for singlestep, which is
 322      * either the architectural singlestep or EXCP_DEBUG for QEMU's
 323      * gdb singlestepping.
 324      */
 325     if (s->ss_active) {
 326         gen_step_complete_exception(s);
 327     } else {
 328         gen_exception_internal(EXCP_DEBUG);
 329     }
 330 }
 331
 332 static inline bool is_singlestepping(DisasContext *s)
 333 {
 334     /* Return true if we are singlestepping either because of
 335      * architectural singlestep or QEMU gdbstub singlestep. This does
 336      * not include the command line '-singlestep' mode which is rather
 337      * misnamed as it only means "one instruction per TB" and doesn't
 338      * affect the code we generate.
 339      */
 340     return s->base.singlestep_enabled || s->ss_active;
 341 }
 342
 343 static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
 344 {
 345     TCGv_i32 tmp1 = tcg_temp_new_i32();
 346     TCGv_i32 tmp2 = tcg_temp_new_i32();
 347     tcg_gen_ext16s_i32(tmp1, a);
 348     tcg_gen_ext16s_i32(tmp2, b);
 349     tcg_gen_mul_i32(tmp1, tmp1, tmp2);
 350     tcg_temp_free_i32(tmp2);
 351     tcg_gen_sari_i32(a, a, 16);
 352     tcg_gen_sari_i32(b, b, 16);
 353     tcg_gen_mul_i32(b, b, a);
 354     tcg_gen_mov_i32(a, tmp1);
 355     tcg_temp_free_i32(tmp1);
 356 }
 357
 358 /* Byteswap each halfword.  */
 359 static void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
 360 {
 361     TCGv_i32 tmp = tcg_temp_new_i32();
 362     TCGv_i32 mask = tcg_const_i32(0x00ff00ff);
 363     tcg_gen_shri_i32(tmp, var, 8);
 364     tcg_gen_and_i32(tmp, tmp, mask);
 365     tcg_gen_and_i32(var, var, mask);
 366     tcg_gen_shli_i32(var, var, 8);
 367     tcg_gen_or_i32(dest, var, tmp);
 368     tcg_temp_free_i32(mask);
 369     tcg_temp_free_i32(tmp);
 370 }
 371
 372 /* Byteswap low halfword and sign extend.  */
 373 static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
 374 {
 375     tcg_gen_ext16u_i32(var, var);
 376     tcg_gen_bswap16_i32(var, var);
 377     tcg_gen_ext16s_i32(dest, var);
 378 }
 379
 380 /* Swap low and high halfwords.  */
 381 static void gen_swap_half(TCGv_i32 var)
 382 {
 383     tcg_gen_rotri_i32(var, var, 16);
 384 }
 385
 386 /* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
 387     tmp = (t0 ^ t1) & 0x8000;
 388     t0 &= ~0x8000;
 389     t1 &= ~0x8000;
 390     t0 = (t0 + t1) ^ tmp;
 391  */
 392
 393 static void gen_add16(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 394 {
 395     TCGv_i32 tmp = tcg_temp_new_i32();
 396     tcg_gen_xor_i32(tmp, t0, t1);
 397     tcg_gen_andi_i32(tmp, tmp, 0x8000);
 398     tcg_gen_andi_i32(t0, t0, ~0x8000);
 399     tcg_gen_andi_i32(t1, t1, ~0x8000);
 400     tcg_gen_add_i32(t0, t0, t1);
 401     tcg_gen_xor_i32(dest, t0, tmp);
 402     tcg_temp_free_i32(tmp);
 403 }
 404
 405 /* Set N and Z flags from var.  */
 406 static inline void gen_logic_CC(TCGv_i32 var)
 407 {
 408     tcg_gen_mov_i32(cpu_NF, var);
 409     tcg_gen_mov_i32(cpu_ZF, var);
 410 }
 411
 412 /* dest = T0 + T1 + CF. */
 413 static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 414 {
 415     tcg_gen_add_i32(dest, t0, t1);
 416     tcg_gen_add_i32(dest, dest, cpu_CF);
 417 }
 418
 419 /* dest = T0 - T1 + CF - 1.  */
 420 static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 421 {
 422     tcg_gen_sub_i32(dest, t0, t1);
 423     tcg_gen_add_i32(dest, dest, cpu_CF);
 424     tcg_gen_subi_i32(dest, dest, 1);
 425 }
 426
 427 /* dest = T0 + T1. Compute C, N, V and Z flags */
 428 static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 429 {
 430     TCGv_i32 tmp = tcg_temp_new_i32();
 431     tcg_gen_movi_i32(tmp, 0);
 432     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
 433     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 434     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 435     tcg_gen_xor_i32(tmp, t0, t1);
 436     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 437     tcg_temp_free_i32(tmp);
 438     tcg_gen_mov_i32(dest, cpu_NF);
 439 }
 440
 441 /* dest = T0 + T1 + CF.  Compute C, N, V and Z flags */
 442 static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 443 {
 444     TCGv_i32 tmp = tcg_temp_new_i32();
 445     if (TCG_TARGET_HAS_add2_i32) {
 446         tcg_gen_movi_i32(tmp, 0);
 447         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
 448         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
 449     } else {
 450         TCGv_i64 q0 = tcg_temp_new_i64();
 451         TCGv_i64 q1 = tcg_temp_new_i64();
 452         tcg_gen_extu_i32_i64(q0, t0);
 453         tcg_gen_extu_i32_i64(q1, t1);
 454         tcg_gen_add_i64(q0, q0, q1);
 455         tcg_gen_extu_i32_i64(q1, cpu_CF);
 456         tcg_gen_add_i64(q0, q0, q1);
 457         tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
 458         tcg_temp_free_i64(q0);
 459         tcg_temp_free_i64(q1);
 460     }
 461     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 462     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 463     tcg_gen_xor_i32(tmp, t0, t1);
 464     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 465     tcg_temp_free_i32(tmp);
 466     tcg_gen_mov_i32(dest, cpu_NF);
 467 }
 468
 469 /* dest = T0 - T1. Compute C, N, V and Z flags */
 470 static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 471 {
 472     TCGv_i32 tmp;
 473     tcg_gen_sub_i32(cpu_NF, t0, t1);
 474     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 475     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
 476     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 477     tmp = tcg_temp_new_i32();
 478     tcg_gen_xor_i32(tmp, t0, t1);
 479     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 480     tcg_temp_free_i32(tmp);
 481     tcg_gen_mov_i32(dest, cpu_NF);
 482 }
 483
 484 /* dest = T0 + ~T1 + CF.  Compute C, N, V and Z flags */
 485 static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 486 {
 487     TCGv_i32 tmp = tcg_temp_new_i32();
 488     tcg_gen_not_i32(tmp, t1);
 489     gen_adc_CC(dest, t0, tmp);
 490     tcg_temp_free_i32(tmp);
 491 }
 492
 493 #define GEN_SHIFT(name)                                               \
 494 static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)       \
 495 {                                                                     \
 496     TCGv_i32 tmp1, tmp2, tmp3;                                        \
 497     tmp1 = tcg_temp_new_i32();                                        \
 498     tcg_gen_andi_i32(tmp1, t1, 0xff);                                 \
 499     tmp2 = tcg_const_i32(0);                                          \
 500     tmp3 = tcg_const_i32(0x1f);                                       \
 501     tcg_gen_movcond_i32(TCG_COND_GTU, tmp2, tmp1, tmp3, tmp2, t0);    \
 502     tcg_temp_free_i32(tmp3);                                          \
 503     tcg_gen_andi_i32(tmp1, tmp1, 0x1f);                               \
 504     tcg_gen_##name##_i32(dest, tmp2, tmp1);                           \
 505     tcg_temp_free_i32(tmp2);                                          \
 506     tcg_temp_free_i32(tmp1);                                          \
 507 }
 508 GEN_SHIFT(shl)
 509 GEN_SHIFT(shr)
 510 #undef GEN_SHIFT
 511
 512 static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 513 {
 514     TCGv_i32 tmp1, tmp2;
 515     tmp1 = tcg_temp_new_i32();
 516     tcg_gen_andi_i32(tmp1, t1, 0xff);
 517     tmp2 = tcg_const_i32(0x1f);
 518     tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
 519     tcg_temp_free_i32(tmp2);
 520     tcg_gen_sar_i32(dest, t0, tmp1);
 521     tcg_temp_free_i32(tmp1);
 522 }
 523
 524 static void shifter_out_im(TCGv_i32 var, int shift)
 525 {
 526     tcg_gen_extract_i32(cpu_CF, var, shift, 1);
 527 }
 528
 529 /* Shift by immediate.  Includes special handling for shift == 0.  */
 530 static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
 531                                     int shift, int flags)
 532 {
 533     switch (shiftop) {
 534     case 0: /* LSL */
 535         if (shift != 0) {
 536             if (flags)
 537                 shifter_out_im(var, 32 - shift);
 538             tcg_gen_shli_i32(var, var, shift);
 539         }
 540         break;
 541     case 1: /* LSR */
 542         if (shift == 0) {
 543             if (flags) {
 544                 tcg_gen_shri_i32(cpu_CF, var, 31);
 545             }
 546             tcg_gen_movi_i32(var, 0);
 547         } else {
 548             if (flags)
 549                 shifter_out_im(var, shift - 1);
 550             tcg_gen_shri_i32(var, var, shift);
 551         }
 552         break;
 553     case 2: /* ASR */
 554         if (shift == 0)
 555             shift = 32;
 556         if (flags)
 557             shifter_out_im(var, shift - 1);
 558         if (shift == 32)
 559           shift = 31;
 560         tcg_gen_sari_i32(var, var, shift);
 561         break;
 562     case 3: /* ROR/RRX */
 563         if (shift != 0) {
 564             if (flags)
 565                 shifter_out_im(var, shift - 1);
 566             tcg_gen_rotri_i32(var, var, shift); break;
 567         } else {
 568             TCGv_i32 tmp = tcg_temp_new_i32();
 569             tcg_gen_shli_i32(tmp, cpu_CF, 31);
 570             if (flags)
 571                 shifter_out_im(var, 0);
 572             tcg_gen_shri_i32(var, var, 1);
 573             tcg_gen_or_i32(var, var, tmp);
 574             tcg_temp_free_i32(tmp);
 575         }
 576     }
 577 };
 578
 579 static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
 580                                      TCGv_i32 shift, int flags)
 581 {
 582     if (flags) {
 583         switch (shiftop) {
 584         case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
 585         case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
 586         case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
 587         case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
 588         }
 589     } else {
 590         switch (shiftop) {
 591         case 0:
 592             gen_shl(var, var, shift);
 593             break;
 594         case 1:
 595             gen_shr(var, var, shift);
 596             break;
 597         case 2:
 598             gen_sar(var, var, shift);
 599             break;
 600         case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
 601                 tcg_gen_rotr_i32(var, var, shift); break;
 602         }
 603     }
 604     tcg_temp_free_i32(shift);
 605 }
 606
 607 /*
 608  * Generate a conditional based on ARM condition code cc.
 609  * This is common between ARM and Aarch64 targets.
 610  */
 611 void arm_test_cc(DisasCompare *cmp, int cc)
 612 {
 613     TCGv_i32 value;
 614     TCGCond cond;
 615     bool global = true;
 616
 617     switch (cc) {
 618     case 0: /* eq: Z */
 619     case 1: /* ne: !Z */
 620         cond = TCG_COND_EQ;
 621         value = cpu_ZF;
 622         break;
 623
 624     case 2: /* cs: C */
 625     case 3: /* cc: !C */
 626         cond = TCG_COND_NE;
 627         value = cpu_CF;
 628         break;
 629
 630     case 4: /* mi: N */
 631     case 5: /* pl: !N */
 632         cond = TCG_COND_LT;
 633         value = cpu_NF;
 634         break;
 635
 636     case 6: /* vs: V */
 637     case 7: /* vc: !V */
 638         cond = TCG_COND_LT;
 639         value = cpu_VF;
 640         break;
 641
 642     case 8: /* hi: C && !Z */
 643     case 9: /* ls: !C || Z -> !(C && !Z) */
 644         cond = TCG_COND_NE;
 645         value = tcg_temp_new_i32();
 646         global = false;
 647         /* CF is 1 for C, so -CF is an all-bits-set mask for C;
 648            ZF is non-zero for !Z; so AND the two subexpressions.  */
 649         tcg_gen_neg_i32(value, cpu_CF);
 650         tcg_gen_and_i32(value, value, cpu_ZF);
 651         break;
 652
 653     case 10: /* ge: N == V -> N ^ V == 0 */
 654     case 11: /* lt: N != V -> N ^ V != 0 */
 655         /* Since we're only interested in the sign bit, == 0 is >= 0.  */
 656         cond = TCG_COND_GE;
 657         value = tcg_temp_new_i32();
 658         global = false;
 659         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
 660         break;
 661
 662     case 12: /* gt: !Z && N == V */
 663     case 13: /* le: Z || N != V */
 664         cond = TCG_COND_NE;
 665         value = tcg_temp_new_i32();
 666         global = false;
 667         /* (N == V) is equal to the sign bit of ~(NF ^ VF).  Propagate
 668          * the sign bit then AND with ZF to yield the result.  */
 669         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
 670         tcg_gen_sari_i32(value, value, 31);
 671         tcg_gen_andc_i32(value, cpu_ZF, value);
 672         break;
 673
 674     case 14: /* always */
 675     case 15: /* always */
 676         /* Use the ALWAYS condition, which will fold early.
 677          * It doesn't matter what we use for the value.  */
 678         cond = TCG_COND_ALWAYS;
 679         value = cpu_ZF;
 680         goto no_invert;
 681
 682     default:
 683         fprintf(stderr, "Bad condition code 0x%x\n", cc);
 684         abort();
 685     }
 686
 687     if (cc & 1) {
 688         cond = tcg_invert_cond(cond);
 689     }
 690
 691  no_invert:
 692     cmp->cond = cond;
 693     cmp->value = value;
 694     cmp->value_global = global;
 695 }
 696
 697 void arm_free_cc(DisasCompare *cmp)
 698 {
 699     if (!cmp->value_global) {
 700         tcg_temp_free_i32(cmp->value);
 701     }
 702 }
 703
 704 void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
 705 {
 706     tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
 707 }
 708
 709 void arm_gen_test_cc(int cc, TCGLabel *label)
 710 {
 711     DisasCompare cmp;
 712     arm_test_cc(&cmp, cc);
 713     arm_jump_cc(&cmp, label);
 714     arm_free_cc(&cmp);
 715 }
 716
 717 static inline void gen_set_condexec(DisasContext *s)
 718 {
 719     if (s->condexec_mask) {
 720         uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
 721         TCGv_i32 tmp = tcg_temp_new_i32();
 722         tcg_gen_movi_i32(tmp, val);
 723         store_cpu_field(tmp, condexec_bits);
 724     }
 725 }
 726
 727 static inline void gen_set_pc_im(DisasContext *s, target_ulong val)
 728 {
 729     tcg_gen_movi_i32(cpu_R[15], val);
 730 }
 731
 732 /* Set PC and Thumb state from var.  var is marked as dead.  */
 733 static inline void gen_bx(DisasContext *s, TCGv_i32 var)
 734 {
 735     s->base.is_jmp = DISAS_JUMP;
 736     tcg_gen_andi_i32(cpu_R[15], var, ~1);
 737     tcg_gen_andi_i32(var, var, 1);
 738     store_cpu_field(var, thumb);
 739 }
 740
 741 /*
 742  * Set PC and Thumb state from var. var is marked as dead.
 743  * For M-profile CPUs, include logic to detect exception-return
 744  * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
 745  * and BX reg, and no others, and happens only for code in Handler mode.
 746  * The Security Extension also requires us to check for the FNC_RETURN
 747  * which signals a function return from non-secure state; this can happen
 748  * in both Handler and Thread mode.
 749  * To avoid having to do multiple comparisons in inline generated code,
 750  * we make the check we do here loose, so it will match for EXC_RETURN
 751  * in Thread mode. For system emulation do_v7m_exception_exit() checks
 752  * for these spurious cases and returns without doing anything (giving
 753  * the same behaviour as for a branch to a non-magic address).
 754  *
 755  * In linux-user mode it is unclear what the right behaviour for an
 756  * attempted FNC_RETURN should be, because in real hardware this will go
 757  * directly to Secure code (ie not the Linux kernel) which will then treat
 758  * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN
 759  * attempt behave the way it would on a CPU without the security extension,
 760  * which is to say "like a normal branch". That means we can simply treat
 761  * all branches as normal with no magic address behaviour.
 762  */
 763 static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
 764 {
 765     /* Generate the same code here as for a simple bx, but flag via
 766      * s->base.is_jmp that we need to do the rest of the work later.
 767      */
 768     gen_bx(s, var);
 769 #ifndef CONFIG_USER_ONLY
 770     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
 771         (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
 772         s->base.is_jmp = DISAS_BX_EXCRET;
 773     }
 774 #endif
 775 }
 776
 777 static inline void gen_bx_excret_final_code(DisasContext *s)
 778 {
 779     /* Generate the code to finish possible exception return and end the TB */
 780     TCGLabel *excret_label = gen_new_label();
 781     uint32_t min_magic;
 782
 783     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
 784         /* Covers FNC_RETURN and EXC_RETURN magic */
 785         min_magic = FNC_RETURN_MIN_MAGIC;
 786     } else {
 787         /* EXC_RETURN magic only */
 788         min_magic = EXC_RETURN_MIN_MAGIC;
 789     }
 790
 791     /* Is the new PC value in the magic range indicating exception return? */
 792     tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label);
 793     /* No: end the TB as we would for a DISAS_JMP */
 794     if (is_singlestepping(s)) {
 795         gen_singlestep_exception(s);
 796     } else {
 797         tcg_gen_exit_tb(NULL, 0);
 798     }
 799     gen_set_label(excret_label);
 800     /* Yes: this is an exception return.
 801      * At this point in runtime env->regs[15] and env->thumb will hold
 802      * the exception-return magic number, which do_v7m_exception_exit()
 803      * will read. Nothing else will be able to see those values because
 804      * the cpu-exec main loop guarantees that we will always go straight
 805      * from raising the exception to the exception-handling code.
 806      *
 807      * gen_ss_advance(s) does nothing on M profile currently but
 808      * calling it is conceptually the right thing as we have executed
 809      * this instruction (compare SWI, HVC, SMC handling).
 810      */
 811     gen_ss_advance(s);
 812     gen_exception_internal(EXCP_EXCEPTION_EXIT);
 813 }
 814
 815 static inline void gen_bxns(DisasContext *s, int rm)
 816 {
 817     TCGv_i32 var = load_reg(s, rm);
 818
 819     /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
 820      * we need to sync state before calling it, but:
 821      *  - we don't need to do gen_set_pc_im() because the bxns helper will
 822      *    always set the PC itself
 823      *  - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
 824      *    unless it's outside an IT block or the last insn in an IT block,
 825      *    so we know that condexec == 0 (already set at the top of the TB)
 826      *    is correct in the non-UNPREDICTABLE cases, and we can choose
 827      *    "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
 828      */
 829     gen_helper_v7m_bxns(cpu_env, var);
 830     tcg_temp_free_i32(var);
 831     s->base.is_jmp = DISAS_EXIT;
 832 }
 833
 834 static inline void gen_blxns(DisasContext *s, int rm)
 835 {
 836     TCGv_i32 var = load_reg(s, rm);
 837
 838     /* We don't need to sync condexec state, for the same reason as bxns.
 839      * We do however need to set the PC, because the blxns helper reads it.
 840      * The blxns helper may throw an exception.
 841      */
 842     gen_set_pc_im(s, s->base.pc_next);
 843     gen_helper_v7m_blxns(cpu_env, var);
 844     tcg_temp_free_i32(var);
 845     s->base.is_jmp = DISAS_EXIT;
 846 }
 847
 848 /* Variant of store_reg which uses branch&exchange logic when storing
 849    to r15 in ARM architecture v7 and above. The source must be a temporary
 850    and will be marked as dead. */
 851 static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
 852 {
 853     if (reg == 15 && ENABLE_ARCH_7) {
 854         gen_bx(s, var);
 855     } else {
 856         store_reg(s, reg, var);
 857     }
 858 }
 859
 860 /* Variant of store_reg which uses branch&exchange logic when storing
 861  * to r15 in ARM architecture v5T and above. This is used for storing
 862  * the results of a LDR/LDM/POP into r15, and corresponds to the cases
 863  * in the ARM ARM which use the LoadWritePC() pseudocode function. */
 864 static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
 865 {
 866     if (reg == 15 && ENABLE_ARCH_5) {
 867         gen_bx_excret(s, var);
 868     } else {
 869         store_reg(s, reg, var);
 870     }
 871 }
 872
 873 #ifdef CONFIG_USER_ONLY
 874 #define IS_USER_ONLY 1
 875 #else
 876 #define IS_USER_ONLY 0
 877 #endif
 878
 879 /* Abstractions of "generate code to do a guest load/store for
 880  * AArch32", where a vaddr is always 32 bits (and is zero
 881  * extended if we're a 64 bit core) and  data is also
 882  * 32 bits unless specifically doing a 64 bit access.
 883  * These functions work like tcg_gen_qemu_{ld,st}* except
 884  * that the address argument is TCGv_i32 rather than TCGv.
 885  */
 886
 887 static inline TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
 888 {
 889     TCGv addr = tcg_temp_new();
 890     tcg_gen_extu_i32_tl(addr, a32);
 891
 892     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
 893     if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
 894         tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
 895     }
 896     return addr;
 897 }
 898
 899 static void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
 900                             int index, MemOp opc)
 901 {
 902     TCGv addr;
 903
 904     if (arm_dc_feature(s, ARM_FEATURE_M) &&
 905         !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
 906         opc |= MO_ALIGN;
 907     }
 908
 909     addr = gen_aa32_addr(s, a32, opc);
 910     tcg_gen_qemu_ld_i32(val, addr, index, opc);
 911     tcg_temp_free(addr);
 912 }
 913
 914 static void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
 915                             int index, MemOp opc)
 916 {
 917     TCGv addr;
 918
 919     if (arm_dc_feature(s, ARM_FEATURE_M) &&
 920         !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
 921         opc |= MO_ALIGN;
 922     }
 923
 924     addr = gen_aa32_addr(s, a32, opc);
 925     tcg_gen_qemu_st_i32(val, addr, index, opc);
 926     tcg_temp_free(addr);
 927 }
 928
 929 #define DO_GEN_LD(SUFF, OPC)                                             \
 930 static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val,      \
 931                                      TCGv_i32 a32, int index)            \
 932 {                                                                        \
 933     gen_aa32_ld_i32(s, val, a32, index, OPC | s->be_data);               \
 934 }
 935
 936 #define DO_GEN_ST(SUFF, OPC)                                             \
 937 static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val,      \
 938                                      TCGv_i32 a32, int index)            \
 939 {                                                                        \
 940     gen_aa32_st_i32(s, val, a32, index, OPC | s->be_data);               \
 941 }
 942
 943 static inline void gen_aa32_frob64(DisasContext *s, TCGv_i64 val)
 944 {
 945     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
 946     if (!IS_USER_ONLY && s->sctlr_b) {
 947         tcg_gen_rotri_i64(val, val, 32);
 948     }
 949 }
 950
 951 static void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
 952                             int index, MemOp opc)
 953 {
 954     TCGv addr = gen_aa32_addr(s, a32, opc);
 955     tcg_gen_qemu_ld_i64(val, addr, index, opc);
 956     gen_aa32_frob64(s, val);
 957     tcg_temp_free(addr);
 958 }
 959
 960 static inline void gen_aa32_ld64(DisasContext *s, TCGv_i64 val,
 961                                  TCGv_i32 a32, int index)
 962 {
 963     gen_aa32_ld_i64(s, val, a32, index, MO_Q | s->be_data);
 964 }
 965
 966 static void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
 967                             int index, MemOp opc)
 968 {
 969     TCGv addr = gen_aa32_addr(s, a32, opc);
 970
 971     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
 972     if (!IS_USER_ONLY && s->sctlr_b) {
 973         TCGv_i64 tmp = tcg_temp_new_i64();
 974         tcg_gen_rotri_i64(tmp, val, 32);
 975         tcg_gen_qemu_st_i64(tmp, addr, index, opc);
 976         tcg_temp_free_i64(tmp);
 977     } else {
 978         tcg_gen_qemu_st_i64(val, addr, index, opc);
 979     }
 980     tcg_temp_free(addr);
 981 }
 982
 983 static inline void gen_aa32_st64(DisasContext *s, TCGv_i64 val,
 984                                  TCGv_i32 a32, int index)
 985 {
 986     gen_aa32_st_i64(s, val, a32, index, MO_Q | s->be_data);
 987 }
 988
 989 DO_GEN_LD(8u, MO_UB)
 990 DO_GEN_LD(16u, MO_UW)
 991 DO_GEN_LD(32u, MO_UL)
 992 DO_GEN_ST(8, MO_UB)
 993 DO_GEN_ST(16, MO_UW)
 994 DO_GEN_ST(32, MO_UL)
 995
 996 static inline void gen_hvc(DisasContext *s, int imm16)
 997 {
 998     /* The pre HVC helper handles cases when HVC gets trapped
 999      * as an undefined insn by runtime configuration (ie before
1000      * the insn really executes).
1001      */
1002     gen_set_pc_im(s, s->pc_curr);
1003     gen_helper_pre_hvc(cpu_env);
1004     /* Otherwise we will treat this as a real exception which
1005      * happens after execution of the insn. (The distinction matters
1006      * for the PC value reported to the exception handler and also
1007      * for single stepping.)
1008      */
1009     s->svc_imm = imm16;
1010     gen_set_pc_im(s, s->base.pc_next);
1011     s->base.is_jmp = DISAS_HVC;
1012 }
1013
1014 static inline void gen_smc(DisasContext *s)
1015 {
1016     /* As with HVC, we may take an exception either before or after
1017      * the insn executes.
1018      */
1019     TCGv_i32 tmp;
1020
1021     gen_set_pc_im(s, s->pc_curr);
1022     tmp = tcg_const_i32(syn_aa32_smc());
1023     gen_helper_pre_smc(cpu_env, tmp);
1024     tcg_temp_free_i32(tmp);
1025     gen_set_pc_im(s, s->base.pc_next);
1026     s->base.is_jmp = DISAS_SMC;
1027 }
1028
1029 static void gen_exception_internal_insn(DisasContext *s, uint32_t pc, int excp)
1030 {
1031     gen_set_condexec(s);
1032     gen_set_pc_im(s, pc);
1033     gen_exception_internal(excp);
1034     s->base.is_jmp = DISAS_NORETURN;
1035 }
1036
1037 static void gen_exception_insn(DisasContext *s, uint32_t pc, int excp,
1038                                int syn, uint32_t target_el)
1039 {
1040     gen_set_condexec(s);
1041     gen_set_pc_im(s, pc);
1042     gen_exception(excp, syn, target_el);
1043     s->base.is_jmp = DISAS_NORETURN;
1044 }
1045
1046 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn)
1047 {
1048     TCGv_i32 tcg_syn;
1049
1050     gen_set_condexec(s);
1051     gen_set_pc_im(s, s->pc_curr);
1052     tcg_syn = tcg_const_i32(syn);
1053     gen_helper_exception_bkpt_insn(cpu_env, tcg_syn);
1054     tcg_temp_free_i32(tcg_syn);
1055     s->base.is_jmp = DISAS_NORETURN;
1056 }
1057
1058 static void unallocated_encoding(DisasContext *s)
1059 {
1060     /* Unallocated and reserved encodings are uncategorized */
1061     gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(),
1062                        default_exception_el(s));
1063 }
1064
1065 /* Force a TB lookup after an instruction that changes the CPU state.  */
1066 static inline void gen_lookup_tb(DisasContext *s)
1067 {
1068     tcg_gen_movi_i32(cpu_R[15], s->base.pc_next);
1069     s->base.is_jmp = DISAS_EXIT;
1070 }
1071
1072 static inline void gen_hlt(DisasContext *s, int imm)
1073 {
1074     /* HLT. This has two purposes.
1075      * Architecturally, it is an external halting debug instruction.
1076      * Since QEMU doesn't implement external debug, we treat this as
1077      * it is required for halting debug disabled: it will UNDEF.
1078      * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
1079      * and "HLT 0xF000" is an A32 semihosting syscall. These traps
1080      * must trigger semihosting even for ARMv7 and earlier, where
1081      * HLT was an undefined encoding.
1082      * In system mode, we don't allow userspace access to
1083      * semihosting, to provide some semblance of security
1084      * (and for consistency with our 32-bit semihosting).
1085      */
1086     if (semihosting_enabled() &&
1087 #ifndef CONFIG_USER_ONLY
1088         s->current_el != 0 &&
1089 #endif
1090         (imm == (s->thumb ? 0x3c : 0xf000))) {
1091         gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
1092         return;
1093     }
1094
1095     unallocated_encoding(s);
1096 }
1097
1098 static TCGv_ptr get_fpstatus_ptr(int neon)
1099 {
1100     TCGv_ptr statusptr = tcg_temp_new_ptr();
1101     int offset;
1102     if (neon) {
1103         offset = offsetof(CPUARMState, vfp.standard_fp_status);
1104     } else {
1105         offset = offsetof(CPUARMState, vfp.fp_status);
1106     }
1107     tcg_gen_addi_ptr(statusptr, cpu_env, offset);
1108     return statusptr;
1109 }
1110
1111 static inline long vfp_reg_offset(bool dp, unsigned reg)
1112 {
1113     if (dp) {
1114         return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
1115     } else {
1116         long ofs = offsetof(CPUARMState, vfp.zregs[reg >> 2].d[(reg >> 1) & 1]);
1117         if (reg & 1) {
1118             ofs += offsetof(CPU_DoubleU, l.upper);
1119         } else {
1120             ofs += offsetof(CPU_DoubleU, l.lower);
1121         }
1122         return ofs;
1123     }
1124 }
1125
1126 /* Return the offset of a 32-bit piece of a NEON register.
1127    zero is the least significant end of the register.  */
1128 static inline long
1129 neon_reg_offset (int reg, int n)
1130 {
1131     int sreg;
1132     sreg = reg * 2 + n;
1133     return vfp_reg_offset(0, sreg);
1134 }
1135
1136 /* Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
1137  * where 0 is the least significant end of the register.
1138  */
1139 static inline long
1140 neon_element_offset(int reg, int element, MemOp size)
1141 {
1142     int element_size = 1 << size;
1143     int ofs = element * element_size;
1144 #ifdef HOST_WORDS_BIGENDIAN
1145     /* Calculate the offset assuming fully little-endian,
1146      * then XOR to account for the order of the 8-byte units.
1147      */
1148     if (element_size < 8) {
1149         ofs ^= 8 - element_size;
1150     }
1151 #endif
1152     return neon_reg_offset(reg, 0) + ofs;
1153 }
1154
1155 static TCGv_i32 neon_load_reg(int reg, int pass)
1156 {
1157     TCGv_i32 tmp = tcg_temp_new_i32();
1158     tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass));
1159     return tmp;
1160 }
1161
1162 static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop)
1163 {
1164     long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
1165
1166     switch (mop) {
1167     case MO_UB:
1168         tcg_gen_ld8u_i32(var, cpu_env, offset);
1169         break;
1170     case MO_UW:
1171         tcg_gen_ld16u_i32(var, cpu_env, offset);
1172         break;
1173     case MO_UL:
1174         tcg_gen_ld_i32(var, cpu_env, offset);
1175         break;
1176     default:
1177         g_assert_not_reached();
1178     }
1179 }
1180
1181 static void neon_load_element64(TCGv_i64 var, int reg, int ele, MemOp mop)
1182 {
1183     long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
1184
1185     switch (mop) {
1186     case MO_UB:
1187         tcg_gen_ld8u_i64(var, cpu_env, offset);
1188         break;
1189     case MO_UW:
1190         tcg_gen_ld16u_i64(var, cpu_env, offset);
1191         break;
1192     case MO_UL:
1193         tcg_gen_ld32u_i64(var, cpu_env, offset);
1194         break;
1195     case MO_Q:
1196         tcg_gen_ld_i64(var, cpu_env, offset);
1197         break;
1198     default:
1199         g_assert_not_reached();
1200     }
1201 }
1202
1203 static void neon_store_reg(int reg, int pass, TCGv_i32 var)
1204 {
1205     tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass));
1206     tcg_temp_free_i32(var);
1207 }
1208
1209 static void neon_store_element(int reg, int ele, MemOp size, TCGv_i32 var)
1210 {
1211     long offset = neon_element_offset(reg, ele, size);
1212
1213     switch (size) {
1214     case MO_8:
1215         tcg_gen_st8_i32(var, cpu_env, offset);
1216         break;
1217     case MO_16:
1218         tcg_gen_st16_i32(var, cpu_env, offset);
1219         break;
1220     case MO_32:
1221         tcg_gen_st_i32(var, cpu_env, offset);
1222         break;
1223     default:
1224         g_assert_not_reached();
1225     }
1226 }
1227
1228 static void neon_store_element64(int reg, int ele, MemOp size, TCGv_i64 var)
1229 {
1230     long offset = neon_element_offset(reg, ele, size);
1231
1232     switch (size) {
1233     case MO_8:
1234         tcg_gen_st8_i64(var, cpu_env, offset);
1235         break;
1236     case MO_16:
1237         tcg_gen_st16_i64(var, cpu_env, offset);
1238         break;
1239     case MO_32:
1240         tcg_gen_st32_i64(var, cpu_env, offset);
1241         break;
1242     case MO_64:
1243         tcg_gen_st_i64(var, cpu_env, offset);
1244         break;
1245     default:
1246         g_assert_not_reached();
1247     }
1248 }
1249
1250 static inline void neon_load_reg64(TCGv_i64 var, int reg)
1251 {
1252     tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg));
1253 }
1254
1255 static inline void neon_store_reg64(TCGv_i64 var, int reg)
1256 {
1257     tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg));
1258 }
1259
1260 static inline void neon_load_reg32(TCGv_i32 var, int reg)
1261 {
1262     tcg_gen_ld_i32(var, cpu_env, vfp_reg_offset(false, reg));
1263 }
1264
1265 static inline void neon_store_reg32(TCGv_i32 var, int reg)
1266 {
1267     tcg_gen_st_i32(var, cpu_env, vfp_reg_offset(false, reg));
1268 }
1269
1270 static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
1271 {
1272     TCGv_ptr ret = tcg_temp_new_ptr();
1273     tcg_gen_addi_ptr(ret, cpu_env, vfp_reg_offset(dp, reg));
1274     return ret;
1275 }
1276
1277 #define ARM_CP_RW_BIT   (1 << 20)
1278
1279 /* Include the VFP and Neon decoders */
1280 #include "translate-vfp.inc.c"
1281 #include "translate-neon.inc.c"
1282
1283 static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1284 {
1285     tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1286 }
1287
1288 static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1289 {
1290     tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1291 }
1292
1293 static inline TCGv_i32 iwmmxt_load_creg(int reg)
1294 {
1295     TCGv_i32 var = tcg_temp_new_i32();
1296     tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1297     return var;
1298 }
1299
1300 static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1301 {
1302     tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1303     tcg_temp_free_i32(var);
1304 }
1305
1306 static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1307 {
1308     iwmmxt_store_reg(cpu_M0, rn);
1309 }
1310
1311 static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1312 {
1313     iwmmxt_load_reg(cpu_M0, rn);
1314 }
1315
1316 static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1317 {
1318     iwmmxt_load_reg(cpu_V1, rn);
1319     tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1320 }
1321
1322 static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1323 {
1324     iwmmxt_load_reg(cpu_V1, rn);
1325     tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1326 }
1327
1328 static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1329 {
1330     iwmmxt_load_reg(cpu_V1, rn);
1331     tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1332 }
1333
1334 #define IWMMXT_OP(name) \
1335 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1336 { \
1337     iwmmxt_load_reg(cpu_V1, rn); \
1338     gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1339 }
1340
1341 #define IWMMXT_OP_ENV(name) \
1342 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1343 { \
1344     iwmmxt_load_reg(cpu_V1, rn); \
1345     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1346 }
1347
1348 #define IWMMXT_OP_ENV_SIZE(name) \
1349 IWMMXT_OP_ENV(name##b) \
1350 IWMMXT_OP_ENV(name##w) \
1351 IWMMXT_OP_ENV(name##l)
1352
1353 #define IWMMXT_OP_ENV1(name) \
1354 static inline void gen_op_iwmmxt_##name##_M0(void) \
1355 { \
1356     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1357 }
1358
1359 IWMMXT_OP(maddsq)
1360 IWMMXT_OP(madduq)
1361 IWMMXT_OP(sadb)
1362 IWMMXT_OP(sadw)
1363 IWMMXT_OP(mulslw)
1364 IWMMXT_OP(mulshw)
1365 IWMMXT_OP(mululw)
1366 IWMMXT_OP(muluhw)
1367 IWMMXT_OP(macsw)
1368 IWMMXT_OP(macuw)
1369
1370 IWMMXT_OP_ENV_SIZE(unpackl)
1371 IWMMXT_OP_ENV_SIZE(unpackh)
1372
1373 IWMMXT_OP_ENV1(unpacklub)
1374 IWMMXT_OP_ENV1(unpackluw)
1375 IWMMXT_OP_ENV1(unpacklul)
1376 IWMMXT_OP_ENV1(unpackhub)
1377 IWMMXT_OP_ENV1(unpackhuw)
1378 IWMMXT_OP_ENV1(unpackhul)
1379 IWMMXT_OP_ENV1(unpacklsb)
1380 IWMMXT_OP_ENV1(unpacklsw)
1381 IWMMXT_OP_ENV1(unpacklsl)
1382 IWMMXT_OP_ENV1(unpackhsb)
1383 IWMMXT_OP_ENV1(unpackhsw)
1384 IWMMXT_OP_ENV1(unpackhsl)
1385
1386 IWMMXT_OP_ENV_SIZE(cmpeq)
1387 IWMMXT_OP_ENV_SIZE(cmpgtu)
1388 IWMMXT_OP_ENV_SIZE(cmpgts)
1389
1390 IWMMXT_OP_ENV_SIZE(mins)
1391 IWMMXT_OP_ENV_SIZE(minu)
1392 IWMMXT_OP_ENV_SIZE(maxs)
1393 IWMMXT_OP_ENV_SIZE(maxu)
1394
1395 IWMMXT_OP_ENV_SIZE(subn)
1396 IWMMXT_OP_ENV_SIZE(addn)
1397 IWMMXT_OP_ENV_SIZE(subu)
1398 IWMMXT_OP_ENV_SIZE(addu)
1399 IWMMXT_OP_ENV_SIZE(subs)
1400 IWMMXT_OP_ENV_SIZE(adds)
1401
1402 IWMMXT_OP_ENV(avgb0)
1403 IWMMXT_OP_ENV(avgb1)
1404 IWMMXT_OP_ENV(avgw0)
1405 IWMMXT_OP_ENV(avgw1)
1406
1407 IWMMXT_OP_ENV(packuw)
1408 IWMMXT_OP_ENV(packul)
1409 IWMMXT_OP_ENV(packuq)
1410 IWMMXT_OP_ENV(packsw)
1411 IWMMXT_OP_ENV(packsl)
1412 IWMMXT_OP_ENV(packsq)
1413
1414 static void gen_op_iwmmxt_set_mup(void)
1415 {
1416     TCGv_i32 tmp;
1417     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1418     tcg_gen_ori_i32(tmp, tmp, 2);
1419     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1420 }
1421
1422 static void gen_op_iwmmxt_set_cup(void)
1423 {
1424     TCGv_i32 tmp;
1425     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1426     tcg_gen_ori_i32(tmp, tmp, 1);
1427     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1428 }
1429
1430 static void gen_op_iwmmxt_setpsr_nz(void)
1431 {
1432     TCGv_i32 tmp = tcg_temp_new_i32();
1433     gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1434     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1435 }
1436
1437 static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1438 {
1439     iwmmxt_load_reg(cpu_V1, rn);
1440     tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1441     tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1442 }
1443
1444 static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1445                                      TCGv_i32 dest)
1446 {
1447     int rd;
1448     uint32_t offset;
1449     TCGv_i32 tmp;
1450
1451     rd = (insn >> 16) & 0xf;
1452     tmp = load_reg(s, rd);
1453
1454     offset = (insn & 0xff) << ((insn >> 7) & 2);
1455     if (insn & (1 << 24)) {
1456         /* Pre indexed */
1457         if (insn & (1 << 23))
1458             tcg_gen_addi_i32(tmp, tmp, offset);
1459         else
1460             tcg_gen_addi_i32(tmp, tmp, -offset);
1461         tcg_gen_mov_i32(dest, tmp);
1462         if (insn & (1 << 21))
1463             store_reg(s, rd, tmp);
1464         else
1465             tcg_temp_free_i32(tmp);
1466     } else if (insn & (1 << 21)) {
1467         /* Post indexed */
1468         tcg_gen_mov_i32(dest, tmp);
1469         if (insn & (1 << 23))
1470             tcg_gen_addi_i32(tmp, tmp, offset);
1471         else
1472             tcg_gen_addi_i32(tmp, tmp, -offset);
1473         store_reg(s, rd, tmp);
1474     } else if (!(insn & (1 << 23)))
1475         return 1;
1476     return 0;
1477 }
1478
1479 static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1480 {
1481     int rd = (insn >> 0) & 0xf;
1482     TCGv_i32 tmp;
1483
1484     if (insn & (1 << 8)) {
1485         if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1486             return 1;
1487         } else {
1488             tmp = iwmmxt_load_creg(rd);
1489         }
1490     } else {
1491         tmp = tcg_temp_new_i32();
1492         iwmmxt_load_reg(cpu_V0, rd);
1493         tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1494     }
1495     tcg_gen_andi_i32(tmp, tmp, mask);
1496     tcg_gen_mov_i32(dest, tmp);
1497     tcg_temp_free_i32(tmp);
1498     return 0;
1499 }
1500
1501 /* Disassemble an iwMMXt instruction.  Returns nonzero if an error occurred
1502    (ie. an undefined instruction).  */
1503 static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1504 {
1505     int rd, wrd;
1506     int rdhi, rdlo, rd0, rd1, i;
1507     TCGv_i32 addr;
1508     TCGv_i32 tmp, tmp2, tmp3;
1509
1510     if ((insn & 0x0e000e00) == 0x0c000000) {
1511         if ((insn & 0x0fe00ff0) == 0x0c400000) {
1512             wrd = insn & 0xf;
1513             rdlo = (insn >> 12) & 0xf;
1514             rdhi = (insn >> 16) & 0xf;
1515             if (insn & ARM_CP_RW_BIT) {                         /* TMRRC */
1516                 iwmmxt_load_reg(cpu_V0, wrd);
1517                 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1518                 tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
1519             } else {                                    /* TMCRR */
1520                 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1521                 iwmmxt_store_reg(cpu_V0, wrd);
1522                 gen_op_iwmmxt_set_mup();
1523             }
1524             return 0;
1525         }
1526
1527         wrd = (insn >> 12) & 0xf;
1528         addr = tcg_temp_new_i32();
1529         if (gen_iwmmxt_address(s, insn, addr)) {
1530             tcg_temp_free_i32(addr);
1531             return 1;
1532         }
1533         if (insn & ARM_CP_RW_BIT) {
1534             if ((insn >> 28) == 0xf) {                  /* WLDRW wCx */
1535                 tmp = tcg_temp_new_i32();
1536                 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1537                 iwmmxt_store_creg(wrd, tmp);
1538             } else {
1539                 i = 1;
1540                 if (insn & (1 << 8)) {
1541                     if (insn & (1 << 22)) {             /* WLDRD */
1542                         gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
1543                         i = 0;
1544                     } else {                            /* WLDRW wRd */
1545                         tmp = tcg_temp_new_i32();
1546                         gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1547                     }
1548                 } else {
1549                     tmp = tcg_temp_new_i32();
1550                     if (insn & (1 << 22)) {             /* WLDRH */
1551                         gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1552                     } else {                            /* WLDRB */
1553                         gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
1554                     }
1555                 }
1556                 if (i) {
1557                     tcg_gen_extu_i32_i64(cpu_M0, tmp);
1558                     tcg_temp_free_i32(tmp);
1559                 }
1560                 gen_op_iwmmxt_movq_wRn_M0(wrd);
1561             }
1562         } else {
1563             if ((insn >> 28) == 0xf) {                  /* WSTRW wCx */
1564                 tmp = iwmmxt_load_creg(wrd);
1565                 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1566             } else {
1567                 gen_op_iwmmxt_movq_M0_wRn(wrd);
1568                 tmp = tcg_temp_new_i32();
1569                 if (insn & (1 << 8)) {
1570                     if (insn & (1 << 22)) {             /* WSTRD */
1571                         gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
1572                     } else {                            /* WSTRW wRd */
1573                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1574                         gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1575                     }
1576                 } else {
1577                     if (insn & (1 << 22)) {             /* WSTRH */
1578                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1579                         gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1580                     } else {                            /* WSTRB */
1581                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1582                         gen_aa32_st8(s, tmp, addr, get_mem_index(s));
1583                     }
1584                 }
1585             }
1586             tcg_temp_free_i32(tmp);
1587         }
1588         tcg_temp_free_i32(addr);
1589         return 0;
1590     }
1591
1592     if ((insn & 0x0f000000) != 0x0e000000)
1593         return 1;
1594
1595     switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1596     case 0x000:                                                 /* WOR */
1597         wrd = (insn >> 12) & 0xf;
1598         rd0 = (insn >> 0) & 0xf;
1599         rd1 = (insn >> 16) & 0xf;
1600         gen_op_iwmmxt_movq_M0_wRn(rd0);
1601         gen_op_iwmmxt_orq_M0_wRn(rd1);
1602         gen_op_iwmmxt_setpsr_nz();
1603         gen_op_iwmmxt_movq_wRn_M0(wrd);
1604         gen_op_iwmmxt_set_mup();
1605         gen_op_iwmmxt_set_cup();
1606         break;
1607     case 0x011:                                                 /* TMCR */
1608         if (insn & 0xf)
1609             return 1;
1610         rd = (insn >> 12) & 0xf;
1611         wrd = (insn >> 16) & 0xf;
1612         switch (wrd) {
1613         case ARM_IWMMXT_wCID:
1614         case ARM_IWMMXT_wCASF:
1615             break;
1616         case ARM_IWMMXT_wCon:
1617             gen_op_iwmmxt_set_cup();
1618             /* Fall through.  */
1619         case ARM_IWMMXT_wCSSF:
1620             tmp = iwmmxt_load_creg(wrd);
1621             tmp2 = load_reg(s, rd);
1622             tcg_gen_andc_i32(tmp, tmp, tmp2);
1623             tcg_temp_free_i32(tmp2);
1624             iwmmxt_store_creg(wrd, tmp);
1625             break;
1626         case ARM_IWMMXT_wCGR0:
1627         case ARM_IWMMXT_wCGR1:
1628         case ARM_IWMMXT_wCGR2:
1629         case ARM_IWMMXT_wCGR3:
1630             gen_op_iwmmxt_set_cup();
1631             tmp = load_reg(s, rd);
1632             iwmmxt_store_creg(wrd, tmp);
1633             break;
1634         default:
1635             return 1;
1636         }
1637         break;
1638     case 0x100:                                                 /* WXOR */
1639         wrd = (insn >> 12) & 0xf;
1640         rd0 = (insn >> 0) & 0xf;
1641         rd1 = (insn >> 16) & 0xf;
1642         gen_op_iwmmxt_movq_M0_wRn(rd0);
1643         gen_op_iwmmxt_xorq_M0_wRn(rd1);
1644         gen_op_iwmmxt_setpsr_nz();
1645         gen_op_iwmmxt_movq_wRn_M0(wrd);
1646         gen_op_iwmmxt_set_mup();
1647         gen_op_iwmmxt_set_cup();
1648         break;
1649     case 0x111:                                                 /* TMRC */
1650         if (insn & 0xf)
1651             return 1;
1652         rd = (insn >> 12) & 0xf;
1653         wrd = (insn >> 16) & 0xf;
1654         tmp = iwmmxt_load_creg(wrd);
1655         store_reg(s, rd, tmp);
1656         break;
1657     case 0x300:                                                 /* WANDN */
1658         wrd = (insn >> 12) & 0xf;
1659         rd0 = (insn >> 0) & 0xf;
1660         rd1 = (insn >> 16) & 0xf;
1661         gen_op_iwmmxt_movq_M0_wRn(rd0);
1662         tcg_gen_neg_i64(cpu_M0, cpu_M0);
1663         gen_op_iwmmxt_andq_M0_wRn(rd1);
1664         gen_op_iwmmxt_setpsr_nz();
1665         gen_op_iwmmxt_movq_wRn_M0(wrd);
1666         gen_op_iwmmxt_set_mup();
1667         gen_op_iwmmxt_set_cup();
1668         break;
1669     case 0x200:                                                 /* WAND */
1670         wrd = (insn >> 12) & 0xf;
1671         rd0 = (insn >> 0) & 0xf;
1672         rd1 = (insn >> 16) & 0xf;
1673         gen_op_iwmmxt_movq_M0_wRn(rd0);
1674         gen_op_iwmmxt_andq_M0_wRn(rd1);
1675         gen_op_iwmmxt_setpsr_nz();
1676         gen_op_iwmmxt_movq_wRn_M0(wrd);
1677         gen_op_iwmmxt_set_mup();
1678         gen_op_iwmmxt_set_cup();
1679         break;
1680     case 0x810: case 0xa10:                             /* WMADD */
1681         wrd = (insn >> 12) & 0xf;
1682         rd0 = (insn >> 0) & 0xf;
1683         rd1 = (insn >> 16) & 0xf;
1684         gen_op_iwmmxt_movq_M0_wRn(rd0);
1685         if (insn & (1 << 21))
1686             gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1687         else
1688             gen_op_iwmmxt_madduq_M0_wRn(rd1);
1689         gen_op_iwmmxt_movq_wRn_M0(wrd);
1690         gen_op_iwmmxt_set_mup();
1691         break;
1692     case 0x10e: case 0x50e: case 0x90e: case 0xd0e:     /* WUNPCKIL */
1693         wrd = (insn >> 12) & 0xf;
1694         rd0 = (insn >> 16) & 0xf;
1695         rd1 = (insn >> 0) & 0xf;
1696         gen_op_iwmmxt_movq_M0_wRn(rd0);
1697         switch ((insn >> 22) & 3) {
1698         case 0:
1699             gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1700             break;
1701         case 1:
1702             gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1703             break;
1704         case 2:
1705             gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1706             break;
1707         case 3:
1708             return 1;
1709         }
1710         gen_op_iwmmxt_movq_wRn_M0(wrd);
1711         gen_op_iwmmxt_set_mup();
1712         gen_op_iwmmxt_set_cup();
1713         break;
1714     case 0x10c: case 0x50c: case 0x90c: case 0xd0c:     /* WUNPCKIH */
1715         wrd = (insn >> 12) & 0xf;
1716         rd0 = (insn >> 16) & 0xf;
1717         rd1 = (insn >> 0) & 0xf;
1718         gen_op_iwmmxt_movq_M0_wRn(rd0);
1719         switch ((insn >> 22) & 3) {
1720         case 0:
1721             gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1722             break;
1723         case 1:
1724             gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1725             break;
1726         case 2:
1727             gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1728             break;
1729         case 3:
1730             return 1;
1731         }
1732         gen_op_iwmmxt_movq_wRn_M0(wrd);
1733         gen_op_iwmmxt_set_mup();
1734         gen_op_iwmmxt_set_cup();
1735         break;
1736     case 0x012: case 0x112: case 0x412: case 0x512:     /* WSAD */
1737         wrd = (insn >> 12) & 0xf;
1738         rd0 = (insn >> 16) & 0xf;
1739         rd1 = (insn >> 0) & 0xf;
1740         gen_op_iwmmxt_movq_M0_wRn(rd0);
1741         if (insn & (1 << 22))
1742             gen_op_iwmmxt_sadw_M0_wRn(rd1);
1743         else
1744             gen_op_iwmmxt_sadb_M0_wRn(rd1);
1745         if (!(insn & (1 << 20)))
1746             gen_op_iwmmxt_addl_M0_wRn(wrd);
1747         gen_op_iwmmxt_movq_wRn_M0(wrd);
1748         gen_op_iwmmxt_set_mup();
1749         break;
1750     case 0x010: case 0x110: case 0x210: case 0x310:     /* WMUL */
1751         wrd = (insn >> 12) & 0xf;
1752         rd0 = (insn >> 16) & 0xf;
1753         rd1 = (insn >> 0) & 0xf;
1754         gen_op_iwmmxt_movq_M0_wRn(rd0);
1755         if (insn & (1 << 21)) {
1756             if (insn & (1 << 20))
1757                 gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1758             else
1759                 gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1760         } else {
1761             if (insn & (1 << 20))
1762                 gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1763             else
1764                 gen_op_iwmmxt_mululw_M0_wRn(rd1);
1765         }
1766         gen_op_iwmmxt_movq_wRn_M0(wrd);
1767         gen_op_iwmmxt_set_mup();
1768         break;
1769     case 0x410: case 0x510: case 0x610: case 0x710:     /* WMAC */
1770         wrd = (insn >> 12) & 0xf;
1771         rd0 = (insn >> 16) & 0xf;
1772         rd1 = (insn >> 0) & 0xf;
1773         gen_op_iwmmxt_movq_M0_wRn(rd0);
1774         if (insn & (1 << 21))
1775             gen_op_iwmmxt_macsw_M0_wRn(rd1);
1776         else
1777             gen_op_iwmmxt_macuw_M0_wRn(rd1);
1778         if (!(insn & (1 << 20))) {
1779             iwmmxt_load_reg(cpu_V1, wrd);
1780             tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1781         }
1782         gen_op_iwmmxt_movq_wRn_M0(wrd);
1783         gen_op_iwmmxt_set_mup();
1784         break;
1785     case 0x006: case 0x406: case 0x806: case 0xc06:     /* WCMPEQ */
1786         wrd = (insn >> 12) & 0xf;
1787         rd0 = (insn >> 16) & 0xf;
1788         rd1 = (insn >> 0) & 0xf;
1789         gen_op_iwmmxt_movq_M0_wRn(rd0);
1790         switch ((insn >> 22) & 3) {
1791         case 0:
1792             gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1793             break;
1794         case 1:
1795             gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1796             break;
1797         case 2:
1798             gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1799             break;
1800         case 3:
1801             return 1;
1802         }
1803         gen_op_iwmmxt_movq_wRn_M0(wrd);
1804         gen_op_iwmmxt_set_mup();
1805         gen_op_iwmmxt_set_cup();
1806         break;
1807     case 0x800: case 0x900: case 0xc00: case 0xd00:     /* WAVG2 */
1808         wrd = (insn >> 12) & 0xf;
1809         rd0 = (insn >> 16) & 0xf;
1810         rd1 = (insn >> 0) & 0xf;
1811         gen_op_iwmmxt_movq_M0_wRn(rd0);
1812         if (insn & (1 << 22)) {
1813             if (insn & (1 << 20))
1814                 gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1815             else
1816                 gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1817         } else {
1818             if (insn & (1 << 20))
1819                 gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1820             else
1821                 gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1822         }
1823         gen_op_iwmmxt_movq_wRn_M0(wrd);
1824         gen_op_iwmmxt_set_mup();
1825         gen_op_iwmmxt_set_cup();
1826         break;
1827     case 0x802: case 0x902: case 0xa02: case 0xb02:     /* WALIGNR */
1828         wrd = (insn >> 12) & 0xf;
1829         rd0 = (insn >> 16) & 0xf;
1830         rd1 = (insn >> 0) & 0xf;
1831         gen_op_iwmmxt_movq_M0_wRn(rd0);
1832         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1833         tcg_gen_andi_i32(tmp, tmp, 7);
1834         iwmmxt_load_reg(cpu_V1, rd1);
1835         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1836         tcg_temp_free_i32(tmp);
1837         gen_op_iwmmxt_movq_wRn_M0(wrd);
1838         gen_op_iwmmxt_set_mup();
1839         break;
1840     case 0x601: case 0x605: case 0x609: case 0x60d:     /* TINSR */
1841         if (((insn >> 6) & 3) == 3)
1842             return 1;
1843         rd = (insn >> 12) & 0xf;
1844         wrd = (insn >> 16) & 0xf;
1845         tmp = load_reg(s, rd);
1846         gen_op_iwmmxt_movq_M0_wRn(wrd);
1847         switch ((insn >> 6) & 3) {
1848         case 0:
1849             tmp2 = tcg_const_i32(0xff);
1850             tmp3 = tcg_const_i32((insn & 7) << 3);
1851             break;
1852         case 1:
1853             tmp2 = tcg_const_i32(0xffff);
1854             tmp3 = tcg_const_i32((insn & 3) << 4);
1855             break;
1856         case 2:
1857             tmp2 = tcg_const_i32(0xffffffff);
1858             tmp3 = tcg_const_i32((insn & 1) << 5);
1859             break;
1860         default:
1861             tmp2 = NULL;
1862             tmp3 = NULL;
1863         }
1864         gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1865         tcg_temp_free_i32(tmp3);
1866         tcg_temp_free_i32(tmp2);
1867         tcg_temp_free_i32(tmp);
1868         gen_op_iwmmxt_movq_wRn_M0(wrd);
1869         gen_op_iwmmxt_set_mup();
1870         break;
1871     case 0x107: case 0x507: case 0x907: case 0xd07:     /* TEXTRM */
1872         rd = (insn >> 12) & 0xf;
1873         wrd = (insn >> 16) & 0xf;
1874         if (rd == 15 || ((insn >> 22) & 3) == 3)
1875             return 1;
1876         gen_op_iwmmxt_movq_M0_wRn(wrd);
1877         tmp = tcg_temp_new_i32();
1878         switch ((insn >> 22) & 3) {
1879         case 0:
1880             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1881             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1882             if (insn & 8) {
1883                 tcg_gen_ext8s_i32(tmp, tmp);
1884             } else {
1885                 tcg_gen_andi_i32(tmp, tmp, 0xff);
1886             }
1887             break;
1888         case 1:
1889             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1890             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1891             if (insn & 8) {
1892                 tcg_gen_ext16s_i32(tmp, tmp);
1893             } else {
1894                 tcg_gen_andi_i32(tmp, tmp, 0xffff);
1895             }
1896             break;
1897         case 2:
1898             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1899             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1900             break;
1901         }
1902         store_reg(s, rd, tmp);
1903         break;
1904     case 0x117: case 0x517: case 0x917: case 0xd17:     /* TEXTRC */
1905         if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1906             return 1;
1907         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1908         switch ((insn >> 22) & 3) {
1909         case 0:
1910             tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
1911             break;
1912         case 1:
1913             tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
1914             break;
1915         case 2:
1916             tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
1917             break;
1918         }
1919         tcg_gen_shli_i32(tmp, tmp, 28);
1920         gen_set_nzcv(tmp);
1921         tcg_temp_free_i32(tmp);
1922         break;
1923     case 0x401: case 0x405: case 0x409: case 0x40d:     /* TBCST */
1924         if (((insn >> 6) & 3) == 3)
1925             return 1;
1926         rd = (insn >> 12) & 0xf;
1927         wrd = (insn >> 16) & 0xf;
1928         tmp = load_reg(s, rd);
1929         switch ((insn >> 6) & 3) {
1930         case 0:
1931             gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
1932             break;
1933         case 1:
1934             gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
1935             break;
1936         case 2:
1937             gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
1938             break;
1939         }
1940         tcg_temp_free_i32(tmp);
1941         gen_op_iwmmxt_movq_wRn_M0(wrd);
1942         gen_op_iwmmxt_set_mup();
1943         break;
1944     case 0x113: case 0x513: case 0x913: case 0xd13:     /* TANDC */
1945         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1946             return 1;
1947         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1948         tmp2 = tcg_temp_new_i32();
1949         tcg_gen_mov_i32(tmp2, tmp);
1950         switch ((insn >> 22) & 3) {
1951         case 0:
1952             for (i = 0; i < 7; i ++) {
1953                 tcg_gen_shli_i32(tmp2, tmp2, 4);
1954                 tcg_gen_and_i32(tmp, tmp, tmp2);
1955             }
1956             break;
1957         case 1:
1958             for (i = 0; i < 3; i ++) {
1959                 tcg_gen_shli_i32(tmp2, tmp2, 8);
1960                 tcg_gen_and_i32(tmp, tmp, tmp2);
1961             }
1962             break;
1963         case 2:
1964             tcg_gen_shli_i32(tmp2, tmp2, 16);
1965             tcg_gen_and_i32(tmp, tmp, tmp2);
1966             break;
1967         }
1968         gen_set_nzcv(tmp);
1969         tcg_temp_free_i32(tmp2);
1970         tcg_temp_free_i32(tmp);
1971         break;
1972     case 0x01c: case 0x41c: case 0x81c: case 0xc1c:     /* WACC */
1973         wrd = (insn >> 12) & 0xf;
1974         rd0 = (insn >> 16) & 0xf;
1975         gen_op_iwmmxt_movq_M0_wRn(rd0);
1976         switch ((insn >> 22) & 3) {
1977         case 0:
1978             gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
1979             break;
1980         case 1:
1981             gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
1982             break;
1983         case 2:
1984             gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
1985             break;
1986         case 3:
1987             return 1;
1988         }
1989         gen_op_iwmmxt_movq_wRn_M0(wrd);
1990         gen_op_iwmmxt_set_mup();
1991         break;
1992     case 0x115: case 0x515: case 0x915: case 0xd15:     /* TORC */
1993         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1994             return 1;
1995         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1996         tmp2 = tcg_temp_new_i32();
1997         tcg_gen_mov_i32(tmp2, tmp);
1998         switch ((insn >> 22) & 3) {
1999         case 0:
2000             for (i = 0; i < 7; i ++) {
2001                 tcg_gen_shli_i32(tmp2, tmp2, 4);
2002                 tcg_gen_or_i32(tmp, tmp, tmp2);
2003             }
2004             break;
2005         case 1:
2006             for (i = 0; i < 3; i ++) {
2007                 tcg_gen_shli_i32(tmp2, tmp2, 8);
2008                 tcg_gen_or_i32(tmp, tmp, tmp2);
2009             }
2010             break;
2011         case 2:
2012             tcg_gen_shli_i32(tmp2, tmp2, 16);
2013             tcg_gen_or_i32(tmp, tmp, tmp2);
2014             break;
2015         }
2016         gen_set_nzcv(tmp);
2017         tcg_temp_free_i32(tmp2);
2018         tcg_temp_free_i32(tmp);
2019         break;
2020     case 0x103: case 0x503: case 0x903: case 0xd03:     /* TMOVMSK */
2021         rd = (insn >> 12) & 0xf;
2022         rd0 = (insn >> 16) & 0xf;
2023         if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
2024             return 1;
2025         gen_op_iwmmxt_movq_M0_wRn(rd0);
2026         tmp = tcg_temp_new_i32();
2027         switch ((insn >> 22) & 3) {
2028         case 0:
2029             gen_helper_iwmmxt_msbb(tmp, cpu_M0);
2030             break;
2031         case 1:
2032             gen_helper_iwmmxt_msbw(tmp, cpu_M0);
2033             break;
2034         case 2:
2035             gen_helper_iwmmxt_msbl(tmp, cpu_M0);
2036             break;
2037         }
2038         store_reg(s, rd, tmp);
2039         break;
2040     case 0x106: case 0x306: case 0x506: case 0x706:     /* WCMPGT */
2041     case 0x906: case 0xb06: case 0xd06: case 0xf06:
2042         wrd = (insn >> 12) & 0xf;
2043         rd0 = (insn >> 16) & 0xf;
2044         rd1 = (insn >> 0) & 0xf;
2045         gen_op_iwmmxt_movq_M0_wRn(rd0);
2046         switch ((insn >> 22) & 3) {
2047         case 0:
2048             if (insn & (1 << 21))
2049                 gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2050             else
2051                 gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2052             break;
2053         case 1:
2054             if (insn & (1 << 21))
2055                 gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2056             else
2057                 gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2058             break;
2059         case 2:
2060             if (insn & (1 << 21))
2061                 gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2062             else
2063                 gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2064             break;
2065         case 3:
2066             return 1;
2067         }
2068         gen_op_iwmmxt_movq_wRn_M0(wrd);
2069         gen_op_iwmmxt_set_mup();
2070         gen_op_iwmmxt_set_cup();
2071         break;
2072     case 0x00e: case 0x20e: case 0x40e: case 0x60e:     /* WUNPCKEL */
2073     case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2074         wrd = (insn >> 12) & 0xf;
2075         rd0 = (insn >> 16) & 0xf;
2076         gen_op_iwmmxt_movq_M0_wRn(rd0);
2077         switch ((insn >> 22) & 3) {
2078         case 0:
2079             if (insn & (1 << 21))
2080                 gen_op_iwmmxt_unpacklsb_M0();
2081             else
2082                 gen_op_iwmmxt_unpacklub_M0();
2083             break;
2084         case 1:
2085             if (insn & (1 << 21))
2086                 gen_op_iwmmxt_unpacklsw_M0();
2087             else
2088                 gen_op_iwmmxt_unpackluw_M0();
2089             break;
2090         case 2:
2091             if (insn & (1 << 21))
2092                 gen_op_iwmmxt_unpacklsl_M0();
2093             else
2094                 gen_op_iwmmxt_unpacklul_M0();
2095             break;
2096         case 3:
2097             return 1;
2098         }
2099         gen_op_iwmmxt_movq_wRn_M0(wrd);
2100         gen_op_iwmmxt_set_mup();
2101         gen_op_iwmmxt_set_cup();
2102         break;
2103     case 0x00c: case 0x20c: case 0x40c: case 0x60c:     /* WUNPCKEH */
2104     case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2105         wrd = (insn >> 12) & 0xf;
2106         rd0 = (insn >> 16) & 0xf;
2107         gen_op_iwmmxt_movq_M0_wRn(rd0);
2108         switch ((insn >> 22) & 3) {
2109         case 0:
2110             if (insn & (1 << 21))
2111                 gen_op_iwmmxt_unpackhsb_M0();
2112             else
2113                 gen_op_iwmmxt_unpackhub_M0();
2114             break;
2115         case 1:
2116             if (insn & (1 << 21))
2117                 gen_op_iwmmxt_unpackhsw_M0();
2118             else
2119                 gen_op_iwmmxt_unpackhuw_M0();
2120             break;
2121         case 2:
2122             if (insn & (1 << 21))
2123                 gen_op_iwmmxt_unpackhsl_M0();
2124             else
2125                 gen_op_iwmmxt_unpackhul_M0();
2126             break;
2127         case 3:
2128             return 1;
2129         }
2130         gen_op_iwmmxt_movq_wRn_M0(wrd);
2131         gen_op_iwmmxt_set_mup();
2132         gen_op_iwmmxt_set_cup();
2133         break;
2134     case 0x204: case 0x604: case 0xa04: case 0xe04:     /* WSRL */
2135     case 0x214: case 0x614: case 0xa14: case 0xe14:
2136         if (((insn >> 22) & 3) == 0)
2137             return 1;
2138         wrd = (insn >> 12) & 0xf;
2139         rd0 = (insn >> 16) & 0xf;
2140         gen_op_iwmmxt_movq_M0_wRn(rd0);
2141         tmp = tcg_temp_new_i32();
2142         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2143             tcg_temp_free_i32(tmp);
2144             return 1;
2145         }
2146         switch ((insn >> 22) & 3) {
2147         case 1:
2148             gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2149             break;
2150         case 2:
2151             gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2152             break;
2153         case 3:
2154             gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2155             break;
2156         }
2157         tcg_temp_free_i32(tmp);
2158         gen_op_iwmmxt_movq_wRn_M0(wrd);
2159         gen_op_iwmmxt_set_mup();
2160         gen_op_iwmmxt_set_cup();
2161         break;
2162     case 0x004: case 0x404: case 0x804: case 0xc04:     /* WSRA */
2163     case 0x014: case 0x414: case 0x814: case 0xc14:
2164         if (((insn >> 22) & 3) == 0)
2165             return 1;
2166         wrd = (insn >> 12) & 0xf;
2167         rd0 = (insn >> 16) & 0xf;
2168         gen_op_iwmmxt_movq_M0_wRn(rd0);
2169         tmp = tcg_temp_new_i32();
2170         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2171             tcg_temp_free_i32(tmp);
2172             return 1;
2173         }
2174         switch ((insn >> 22) & 3) {
2175         case 1:
2176             gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2177             break;
2178         case 2:
2179             gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2180             break;
2181         case 3:
2182             gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2183             break;
2184         }
2185         tcg_temp_free_i32(tmp);
2186         gen_op_iwmmxt_movq_wRn_M0(wrd);
2187         gen_op_iwmmxt_set_mup();
2188         gen_op_iwmmxt_set_cup();
2189         break;
2190     case 0x104: case 0x504: case 0x904: case 0xd04:     /* WSLL */
2191     case 0x114: case 0x514: case 0x914: case 0xd14:
2192         if (((insn >> 22) & 3) == 0)
2193             return 1;
2194         wrd = (insn >> 12) & 0xf;
2195         rd0 = (insn >> 16) & 0xf;
2196         gen_op_iwmmxt_movq_M0_wRn(rd0);
2197         tmp = tcg_temp_new_i32();
2198         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2199             tcg_temp_free_i32(tmp);
2200             return 1;
2201         }
2202         switch ((insn >> 22) & 3) {
2203         case 1:
2204             gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2205             break;
2206         case 2:
2207             gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2208             break;
2209         case 3:
2210             gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2211             break;
2212         }
2213         tcg_temp_free_i32(tmp);
2214         gen_op_iwmmxt_movq_wRn_M0(wrd);
2215         gen_op_iwmmxt_set_mup();
2216         gen_op_iwmmxt_set_cup();
2217         break;
2218     case 0x304: case 0x704: case 0xb04: case 0xf04:     /* WROR */
2219     case 0x314: case 0x714: case 0xb14: case 0xf14:
2220         if (((insn >> 22) & 3) == 0)
2221             return 1;
2222         wrd = (insn >> 12) & 0xf;
2223         rd0 = (insn >> 16) & 0xf;
2224         gen_op_iwmmxt_movq_M0_wRn(rd0);
2225         tmp = tcg_temp_new_i32();
2226         switch ((insn >> 22) & 3) {
2227         case 1:
2228             if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2229                 tcg_temp_free_i32(tmp);
2230                 return 1;
2231             }
2232             gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2233             break;
2234         case 2:
2235             if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2236                 tcg_temp_free_i32(tmp);
2237                 return 1;
2238             }
2239             gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2240             break;
2241         case 3:
2242             if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2243                 tcg_temp_free_i32(tmp);
2244                 return 1;
2245             }
2246             gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2247             break;
2248         }
2249         tcg_temp_free_i32(tmp);
2250         gen_op_iwmmxt_movq_wRn_M0(wrd);
2251         gen_op_iwmmxt_set_mup();
2252         gen_op_iwmmxt_set_cup();
2253         break;
2254     case 0x116: case 0x316: case 0x516: case 0x716:     /* WMIN */
2255     case 0x916: case 0xb16: case 0xd16: case 0xf16:
2256         wrd = (insn >> 12) & 0xf;
2257         rd0 = (insn >> 16) & 0xf;
2258         rd1 = (insn >> 0) & 0xf;
2259         gen_op_iwmmxt_movq_M0_wRn(rd0);
2260         switch ((insn >> 22) & 3) {
2261         case 0:
2262             if (insn & (1 << 21))
2263                 gen_op_iwmmxt_minsb_M0_wRn(rd1);
2264             else
2265                 gen_op_iwmmxt_minub_M0_wRn(rd1);
2266             break;
2267         case 1:
2268             if (insn & (1 << 21))
2269                 gen_op_iwmmxt_minsw_M0_wRn(rd1);
2270             else
2271                 gen_op_iwmmxt_minuw_M0_wRn(rd1);
2272             break;
2273         case 2:
2274             if (insn & (1 << 21))
2275                 gen_op_iwmmxt_minsl_M0_wRn(rd1);
2276             else
2277                 gen_op_iwmmxt_minul_M0_wRn(rd1);
2278             break;
2279         case 3:
2280             return 1;
2281         }
2282         gen_op_iwmmxt_movq_wRn_M0(wrd);
2283         gen_op_iwmmxt_set_mup();
2284         break;
2285     case 0x016: case 0x216: case 0x416: case 0x616:     /* WMAX */
2286     case 0x816: case 0xa16: case 0xc16: case 0xe16:
2287         wrd = (insn >> 12) & 0xf;
2288         rd0 = (insn >> 16) & 0xf;
2289         rd1 = (insn >> 0) & 0xf;
2290         gen_op_iwmmxt_movq_M0_wRn(rd0);
2291         switch ((insn >> 22) & 3) {
2292         case 0:
2293             if (insn & (1 << 21))
2294                 gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2295             else
2296                 gen_op_iwmmxt_maxub_M0_wRn(rd1);
2297             break;
2298         case 1:
2299             if (insn & (1 << 21))
2300                 gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2301             else
2302                 gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2303             break;
2304         case 2:
2305             if (insn & (1 << 21))
2306                 gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2307             else
2308                 gen_op_iwmmxt_maxul_M0_wRn(rd1);
2309             break;
2310         case 3:
2311             return 1;
2312         }
2313         gen_op_iwmmxt_movq_wRn_M0(wrd);
2314         gen_op_iwmmxt_set_mup();
2315         break;
2316     case 0x002: case 0x102: case 0x202: case 0x302:     /* WALIGNI */
2317     case 0x402: case 0x502: case 0x602: case 0x702:
2318         wrd = (insn >> 12) & 0xf;
2319         rd0 = (insn >> 16) & 0xf;
2320         rd1 = (insn >> 0) & 0xf;
2321         gen_op_iwmmxt_movq_M0_wRn(rd0);
2322         tmp = tcg_const_i32((insn >> 20) & 3);
2323         iwmmxt_load_reg(cpu_V1, rd1);
2324         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
2325         tcg_temp_free_i32(tmp);
2326         gen_op_iwmmxt_movq_wRn_M0(wrd);
2327         gen_op_iwmmxt_set_mup();
2328         break;
2329     case 0x01a: case 0x11a: case 0x21a: case 0x31a:     /* WSUB */
2330     case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2331     case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2332     case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2333         wrd = (insn >> 12) & 0xf;
2334         rd0 = (insn >> 16) & 0xf;
2335         rd1 = (insn >> 0) & 0xf;
2336         gen_op_iwmmxt_movq_M0_wRn(rd0);
2337         switch ((insn >> 20) & 0xf) {
2338         case 0x0:
2339             gen_op_iwmmxt_subnb_M0_wRn(rd1);
2340             break;
2341         case 0x1:
2342             gen_op_iwmmxt_subub_M0_wRn(rd1);
2343             break;
2344         case 0x3:
2345             gen_op_iwmmxt_subsb_M0_wRn(rd1);
2346             break;
2347         case 0x4:
2348             gen_op_iwmmxt_subnw_M0_wRn(rd1);
2349             break;
2350         case 0x5:
2351             gen_op_iwmmxt_subuw_M0_wRn(rd1);
2352             break;
2353         case 0x7:
2354             gen_op_iwmmxt_subsw_M0_wRn(rd1);
2355             break;
2356         case 0x8:
2357             gen_op_iwmmxt_subnl_M0_wRn(rd1);
2358             break;
2359         case 0x9:
2360             gen_op_iwmmxt_subul_M0_wRn(rd1);
2361             break;
2362         case 0xb:
2363             gen_op_iwmmxt_subsl_M0_wRn(rd1);
2364             break;
2365         default:
2366             return 1;
2367         }
2368         gen_op_iwmmxt_movq_wRn_M0(wrd);
2369         gen_op_iwmmxt_set_mup();
2370         gen_op_iwmmxt_set_cup();
2371         break;
2372     case 0x01e: case 0x11e: case 0x21e: case 0x31e:     /* WSHUFH */
2373     case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2374     case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2375     case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2376         wrd = (insn >> 12) & 0xf;
2377         rd0 = (insn >> 16) & 0xf;
2378         gen_op_iwmmxt_movq_M0_wRn(rd0);
2379         tmp = tcg_const_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2380         gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2381         tcg_temp_free_i32(tmp);
2382         gen_op_iwmmxt_movq_wRn_M0(wrd);
2383         gen_op_iwmmxt_set_mup();
2384         gen_op_iwmmxt_set_cup();
2385         break;
2386     case 0x018: case 0x118: case 0x218: case 0x318:     /* WADD */
2387     case 0x418: case 0x518: case 0x618: case 0x718:
2388     case 0x818: case 0x918: case 0xa18: case 0xb18:
2389     case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2390         wrd = (insn >> 12) & 0xf;
2391         rd0 = (insn >> 16) & 0xf;
2392         rd1 = (insn >> 0) & 0xf;
2393         gen_op_iwmmxt_movq_M0_wRn(rd0);
2394         switch ((insn >> 20) & 0xf) {
2395         case 0x0:
2396             gen_op_iwmmxt_addnb_M0_wRn(rd1);
2397             break;
2398         case 0x1:
2399             gen_op_iwmmxt_addub_M0_wRn(rd1);
2400             break;
2401         case 0x3:
2402             gen_op_iwmmxt_addsb_M0_wRn(rd1);
2403             break;
2404         case 0x4:
2405             gen_op_iwmmxt_addnw_M0_wRn(rd1);
2406             break;
2407         case 0x5:
2408             gen_op_iwmmxt_adduw_M0_wRn(rd1);
2409             break;
2410         case 0x7:
2411             gen_op_iwmmxt_addsw_M0_wRn(rd1);
2412             break;
2413         case 0x8:
2414             gen_op_iwmmxt_addnl_M0_wRn(rd1);
2415             break;
2416         case 0x9:
2417             gen_op_iwmmxt_addul_M0_wRn(rd1);
2418             break;
2419         case 0xb:
2420             gen_op_iwmmxt_addsl_M0_wRn(rd1);
2421             break;
2422         default:
2423             return 1;
2424         }
2425         gen_op_iwmmxt_movq_wRn_M0(wrd);
2426         gen_op_iwmmxt_set_mup();
2427         gen_op_iwmmxt_set_cup();
2428         break;
2429     case 0x008: case 0x108: case 0x208: case 0x308:     /* WPACK */
2430     case 0x408: case 0x508: case 0x608: case 0x708:
2431     case 0x808: case 0x908: case 0xa08: case 0xb08:
2432     case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2433         if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2434             return 1;
2435         wrd = (insn >> 12) & 0xf;
2436         rd0 = (insn >> 16) & 0xf;
2437         rd1 = (insn >> 0) & 0xf;
2438         gen_op_iwmmxt_movq_M0_wRn(rd0);
2439         switch ((insn >> 22) & 3) {
2440         case 1:
2441             if (insn & (1 << 21))
2442                 gen_op_iwmmxt_packsw_M0_wRn(rd1);
2443             else
2444                 gen_op_iwmmxt_packuw_M0_wRn(rd1);
2445             break;
2446         case 2:
2447             if (insn & (1 << 21))
2448                 gen_op_iwmmxt_packsl_M0_wRn(rd1);
2449             else
2450                 gen_op_iwmmxt_packul_M0_wRn(rd1);
2451             break;
2452         case 3:
2453             if (insn & (1 << 21))
2454                 gen_op_iwmmxt_packsq_M0_wRn(rd1);
2455             else
2456                 gen_op_iwmmxt_packuq_M0_wRn(rd1);
2457             break;
2458         }
2459         gen_op_iwmmxt_movq_wRn_M0(wrd);
2460         gen_op_iwmmxt_set_mup();
2461         gen_op_iwmmxt_set_cup();
2462         break;
2463     case 0x201: case 0x203: case 0x205: case 0x207:
2464     case 0x209: case 0x20b: case 0x20d: case 0x20f:
2465     case 0x211: case 0x213: case 0x215: case 0x217:
2466     case 0x219: case 0x21b: case 0x21d: case 0x21f:
2467         wrd = (insn >> 5) & 0xf;
2468         rd0 = (insn >> 12) & 0xf;
2469         rd1 = (insn >> 0) & 0xf;
2470         if (rd0 == 0xf || rd1 == 0xf)
2471             return 1;
2472         gen_op_iwmmxt_movq_M0_wRn(wrd);
2473         tmp = load_reg(s, rd0);
2474         tmp2 = load_reg(s, rd1);
2475         switch ((insn >> 16) & 0xf) {
2476         case 0x0:                                       /* TMIA */
2477             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2478             break;
2479         case 0x8:                                       /* TMIAPH */
2480             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2481             break;
2482         case 0xc: case 0xd: case 0xe: case 0xf:                 /* TMIAxy */
2483             if (insn & (1 << 16))
2484                 tcg_gen_shri_i32(tmp, tmp, 16);
2485             if (insn & (1 << 17))
2486                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2487             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2488             break;
2489         default:
2490             tcg_temp_free_i32(tmp2);
2491             tcg_temp_free_i32(tmp);
2492             return 1;
2493         }
2494         tcg_temp_free_i32(tmp2);
2495         tcg_temp_free_i32(tmp);
2496         gen_op_iwmmxt_movq_wRn_M0(wrd);
2497         gen_op_iwmmxt_set_mup();
2498         break;
2499     default:
2500         return 1;
2501     }
2502
2503     return 0;
2504 }
2505
2506 /* Disassemble an XScale DSP instruction.  Returns nonzero if an error occurred
2507    (ie. an undefined instruction).  */
2508 static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2509 {
2510     int acc, rd0, rd1, rdhi, rdlo;
2511     TCGv_i32 tmp, tmp2;
2512
2513     if ((insn & 0x0ff00f10) == 0x0e200010) {
2514         /* Multiply with Internal Accumulate Format */
2515         rd0 = (insn >> 12) & 0xf;
2516         rd1 = insn & 0xf;
2517         acc = (insn >> 5) & 7;
2518
2519         if (acc != 0)
2520             return 1;
2521
2522         tmp = load_reg(s, rd0);
2523         tmp2 = load_reg(s, rd1);
2524         switch ((insn >> 16) & 0xf) {
2525         case 0x0:                                       /* MIA */
2526             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2527             break;
2528         case 0x8:                                       /* MIAPH */
2529             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2530             break;
2531         case 0xc:                                       /* MIABB */
2532         case 0xd:                                       /* MIABT */
2533         case 0xe:                                       /* MIATB */
2534         case 0xf:                                       /* MIATT */
2535             if (insn & (1 << 16))
2536                 tcg_gen_shri_i32(tmp, tmp, 16);
2537             if (insn & (1 << 17))
2538                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2539             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2540             break;
2541         default:
2542             return 1;
2543         }
2544         tcg_temp_free_i32(tmp2);
2545         tcg_temp_free_i32(tmp);
2546
2547         gen_op_iwmmxt_movq_wRn_M0(acc);
2548         return 0;
2549     }
2550
2551     if ((insn & 0x0fe00ff8) == 0x0c400000) {
2552         /* Internal Accumulator Access Format */
2553         rdhi = (insn >> 16) & 0xf;
2554         rdlo = (insn >> 12) & 0xf;
2555         acc = insn & 7;
2556
2557         if (acc != 0)
2558             return 1;
2559
2560         if (insn & ARM_CP_RW_BIT) {                     /* MRA */
2561             iwmmxt_load_reg(cpu_V0, acc);
2562             tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2563             tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
2564             tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2565         } else {                                        /* MAR */
2566             tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2567             iwmmxt_store_reg(cpu_V0, acc);
2568         }
2569         return 0;
2570     }
2571
2572     return 1;
2573 }
2574
2575 #define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n))
2576 #define VFP_DREG(reg, insn, bigbit, smallbit) do { \
2577     if (dc_isar_feature(aa32_simd_r32, s)) { \
2578         reg = (((insn) >> (bigbit)) & 0x0f) \
2579               | (((insn) >> ((smallbit) - 4)) & 0x10); \
2580     } else { \
2581         if (insn & (1 << (smallbit))) \
2582             return 1; \
2583         reg = ((insn) >> (bigbit)) & 0x0f; \
2584     }} while (0)
2585
2586 #define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22)
2587 #define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16,  7)
2588 #define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn,  0,  5)
2589
2590 static inline bool use_goto_tb(DisasContext *s, target_ulong dest)
2591 {
2592 #ifndef CONFIG_USER_ONLY
2593     return (s->base.tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK) ||
2594            ((s->base.pc_next - 1) & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
2595 #else
2596     return true;
2597 #endif
2598 }
2599
2600 static void gen_goto_ptr(void)
2601 {
2602     tcg_gen_lookup_and_goto_ptr();
2603 }
2604
2605 /* This will end the TB but doesn't guarantee we'll return to
2606  * cpu_loop_exec. Any live exit_requests will be processed as we
2607  * enter the next TB.
2608  */
2609 static void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
2610 {
2611     if (use_goto_tb(s, dest)) {
2612         tcg_gen_goto_tb(n);
2613         gen_set_pc_im(s, dest);
2614         tcg_gen_exit_tb(s->base.tb, n);
2615     } else {
2616         gen_set_pc_im(s, dest);
2617         gen_goto_ptr();
2618     }
2619     s->base.is_jmp = DISAS_NORETURN;
2620 }
2621
2622 static inline void gen_jmp (DisasContext *s, uint32_t dest)
2623 {
2624     if (unlikely(is_singlestepping(s))) {
2625         /* An indirect jump so that we still trigger the debug exception.  */
2626         gen_set_pc_im(s, dest);
2627         s->base.is_jmp = DISAS_JUMP;
2628     } else {
2629         gen_goto_tb(s, 0, dest);
2630     }
2631 }
2632
2633 static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
2634 {
2635     if (x)
2636         tcg_gen_sari_i32(t0, t0, 16);
2637     else
2638         gen_sxth(t0);
2639     if (y)
2640         tcg_gen_sari_i32(t1, t1, 16);
2641     else
2642         gen_sxth(t1);
2643     tcg_gen_mul_i32(t0, t0, t1);
2644 }
2645
2646 /* Return the mask of PSR bits set by a MSR instruction.  */
2647 static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
2648 {
2649     uint32_t mask = 0;
2650
2651     if (flags & (1 << 0)) {
2652         mask |= 0xff;
2653     }
2654     if (flags & (1 << 1)) {
2655         mask |= 0xff00;
2656     }
2657     if (flags & (1 << 2)) {
2658         mask |= 0xff0000;
2659     }
2660     if (flags & (1 << 3)) {
2661         mask |= 0xff000000;
2662     }
2663
2664     /* Mask out undefined and reserved bits.  */
2665     mask &= aarch32_cpsr_valid_mask(s->features, s->isar);
2666
2667     /* Mask out execution state.  */
2668     if (!spsr) {
2669         mask &= ~CPSR_EXEC;
2670     }
2671
2672     /* Mask out privileged bits.  */
2673     if (IS_USER(s)) {
2674         mask &= CPSR_USER;
2675     }
2676     return mask;
2677 }
2678
2679 /* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
2680 static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
2681 {
2682     TCGv_i32 tmp;
2683     if (spsr) {
2684         /* ??? This is also undefined in system mode.  */
2685         if (IS_USER(s))
2686             return 1;
2687
2688         tmp = load_cpu_field(spsr);
2689         tcg_gen_andi_i32(tmp, tmp, ~mask);
2690         tcg_gen_andi_i32(t0, t0, mask);
2691         tcg_gen_or_i32(tmp, tmp, t0);
2692         store_cpu_field(tmp, spsr);
2693     } else {
2694         gen_set_cpsr(t0, mask);
2695     }
2696     tcg_temp_free_i32(t0);
2697     gen_lookup_tb(s);
2698     return 0;
2699 }
2700
2701 /* Returns nonzero if access to the PSR is not permitted.  */
2702 static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
2703 {
2704     TCGv_i32 tmp;
2705     tmp = tcg_temp_new_i32();
2706     tcg_gen_movi_i32(tmp, val);
2707     return gen_set_psr(s, mask, spsr, tmp);
2708 }
2709
2710 static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
2711                                      int *tgtmode, int *regno)
2712 {
2713     /* Decode the r and sysm fields of MSR/MRS banked accesses into
2714      * the target mode and register number, and identify the various
2715      * unpredictable cases.
2716      * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
2717      *  + executed in user mode
2718      *  + using R15 as the src/dest register
2719      *  + accessing an unimplemented register
2720      *  + accessing a register that's inaccessible at current PL/security state*
2721      *  + accessing a register that you could access with a different insn
2722      * We choose to UNDEF in all these cases.
2723      * Since we don't know which of the various AArch32 modes we are in
2724      * we have to defer some checks to runtime.
2725      * Accesses to Monitor mode registers from Secure EL1 (which implies
2726      * that EL3 is AArch64) must trap to EL3.
2727      *
2728      * If the access checks fail this function will emit code to take
2729      * an exception and return false. Otherwise it will return true,
2730      * and set *tgtmode and *regno appropriately.
2731      */
2732     int exc_target = default_exception_el(s);
2733
2734     /* These instructions are present only in ARMv8, or in ARMv7 with the
2735      * Virtualization Extensions.
2736      */
2737     if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
2738         !arm_dc_feature(s, ARM_FEATURE_EL2)) {
2739         goto undef;
2740     }
2741
2742     if (IS_USER(s) || rn == 15) {
2743         goto undef;
2744     }
2745
2746     /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
2747      * of registers into (r, sysm).
2748      */
2749     if (r) {
2750         /* SPSRs for other modes */
2751         switch (sysm) {
2752         case 0xe: /* SPSR_fiq */
2753             *tgtmode = ARM_CPU_MODE_FIQ;
2754             break;
2755         case 0x10: /* SPSR_irq */
2756             *tgtmode = ARM_CPU_MODE_IRQ;
2757             break;
2758         case 0x12: /* SPSR_svc */
2759             *tgtmode = ARM_CPU_MODE_SVC;
2760             break;
2761         case 0x14: /* SPSR_abt */
2762             *tgtmode = ARM_CPU_MODE_ABT;
2763             break;
2764         case 0x16: /* SPSR_und */
2765             *tgtmode = ARM_CPU_MODE_UND;
2766             break;
2767         case 0x1c: /* SPSR_mon */
2768             *tgtmode = ARM_CPU_MODE_MON;
2769             break;
2770         case 0x1e: /* SPSR_hyp */
2771             *tgtmode = ARM_CPU_MODE_HYP;
2772             break;
2773         default: /* unallocated */
2774             goto undef;
2775         }
2776         /* We arbitrarily assign SPSR a register number of 16. */
2777         *regno = 16;
2778     } else {
2779         /* general purpose registers for other modes */
2780         switch (sysm) {
2781         case 0x0 ... 0x6:   /* 0b00xxx : r8_usr ... r14_usr */
2782             *tgtmode = ARM_CPU_MODE_USR;
2783             *regno = sysm + 8;
2784             break;
2785         case 0x8 ... 0xe:   /* 0b01xxx : r8_fiq ... r14_fiq */
2786             *tgtmode = ARM_CPU_MODE_FIQ;
2787             *regno = sysm;
2788             break;
2789         case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
2790             *tgtmode = ARM_CPU_MODE_IRQ;
2791             *regno = sysm & 1 ? 13 : 14;
2792             break;
2793         case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
2794             *tgtmode = ARM_CPU_MODE_SVC;
2795             *regno = sysm & 1 ? 13 : 14;
2796             break;
2797         case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
2798             *tgtmode = ARM_CPU_MODE_ABT;
2799             *regno = sysm & 1 ? 13 : 14;
2800             break;
2801         case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
2802             *tgtmode = ARM_CPU_MODE_UND;
2803             *regno = sysm & 1 ? 13 : 14;
2804             break;
2805         case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
2806             *tgtmode = ARM_CPU_MODE_MON;
2807             *regno = sysm & 1 ? 13 : 14;
2808             break;
2809         case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
2810             *tgtmode = ARM_CPU_MODE_HYP;
2811             /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
2812             *regno = sysm & 1 ? 13 : 17;
2813             break;
2814         default: /* unallocated */
2815             goto undef;
2816         }
2817     }
2818
2819     /* Catch the 'accessing inaccessible register' cases we can detect
2820      * at translate time.
2821      */
2822     switch (*tgtmode) {
2823     case ARM_CPU_MODE_MON:
2824         if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
2825             goto undef;
2826         }
2827         if (s->current_el == 1) {
2828             /* If we're in Secure EL1 (which implies that EL3 is AArch64)
2829              * then accesses to Mon registers trap to EL3
2830              */
2831             exc_target = 3;
2832             goto undef;
2833         }
2834         break;
2835     case ARM_CPU_MODE_HYP:
2836         /*
2837          * SPSR_hyp and r13_hyp can only be accessed from Monitor mode
2838          * (and so we can forbid accesses from EL2 or below). elr_hyp
2839          * can be accessed also from Hyp mode, so forbid accesses from
2840          * EL0 or EL1.
2841          */
2842         if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
2843             (s->current_el < 3 && *regno != 17)) {
2844             goto undef;
2845         }
2846         break;
2847     default:
2848         break;
2849     }
2850
2851     return true;
2852
2853 undef:
2854     /* If we get here then some access check did not pass */
2855     gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
2856                        syn_uncategorized(), exc_target);
2857     return false;
2858 }
2859
2860 static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
2861 {
2862     TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
2863     int tgtmode = 0, regno = 0;
2864
2865     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2866         return;
2867     }
2868
2869     /* Sync state because msr_banked() can raise exceptions */
2870     gen_set_condexec(s);
2871     gen_set_pc_im(s, s->pc_curr);
2872     tcg_reg = load_reg(s, rn);
2873     tcg_tgtmode = tcg_const_i32(tgtmode);
2874     tcg_regno = tcg_const_i32(regno);
2875     gen_helper_msr_banked(cpu_env, tcg_reg, tcg_tgtmode, tcg_regno);
2876     tcg_temp_free_i32(tcg_tgtmode);
2877     tcg_temp_free_i32(tcg_regno);
2878     tcg_temp_free_i32(tcg_reg);
2879     s->base.is_jmp = DISAS_UPDATE;
2880 }
2881
2882 static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
2883 {
2884     TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
2885     int tgtmode = 0, regno = 0;
2886
2887     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2888         return;
2889     }
2890
2891     /* Sync state because mrs_banked() can raise exceptions */
2892     gen_set_condexec(s);
2893     gen_set_pc_im(s, s->pc_curr);
2894     tcg_reg = tcg_temp_new_i32();
2895     tcg_tgtmode = tcg_const_i32(tgtmode);
2896     tcg_regno = tcg_const_i32(regno);
2897     gen_helper_mrs_banked(tcg_reg, cpu_env, tcg_tgtmode, tcg_regno);
2898     tcg_temp_free_i32(tcg_tgtmode);
2899     tcg_temp_free_i32(tcg_regno);
2900     store_reg(s, rn, tcg_reg);
2901     s->base.is_jmp = DISAS_UPDATE;
2902 }
2903
2904 /* Store value to PC as for an exception return (ie don't
2905  * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
2906  * will do the masking based on the new value of the Thumb bit.
2907  */
2908 static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
2909 {
2910     tcg_gen_mov_i32(cpu_R[15], pc);
2911     tcg_temp_free_i32(pc);
2912 }
2913
2914 /* Generate a v6 exception return.  Marks both values as dead.  */
2915 static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
2916 {
2917     store_pc_exc_ret(s, pc);
2918     /* The cpsr_write_eret helper will mask the low bits of PC
2919      * appropriately depending on the new Thumb bit, so it must
2920      * be called after storing the new PC.
2921      */
2922     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
2923         gen_io_start();
2924     }
2925     gen_helper_cpsr_write_eret(cpu_env, cpsr);
2926     tcg_temp_free_i32(cpsr);
2927     /* Must exit loop to check un-masked IRQs */
2928     s->base.is_jmp = DISAS_EXIT;
2929 }
2930
2931 /* Generate an old-style exception return. Marks pc as dead. */
2932 static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
2933 {
2934     gen_rfe(s, pc, load_cpu_field(spsr));
2935 }
2936
2937 static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
2938 {
2939     TCGv_i32 rd, tmp;
2940
2941     rd = tcg_temp_new_i32();
2942     tmp = tcg_temp_new_i32();
2943
2944     tcg_gen_shli_i32(rd, t0, 8);
2945     tcg_gen_andi_i32(rd, rd, 0xff00ff00);
2946     tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
2947     tcg_gen_or_i32(rd, rd, tmp);
2948
2949     tcg_gen_shri_i32(t1, t1, 8);
2950     tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
2951     tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
2952     tcg_gen_or_i32(t1, t1, tmp);
2953     tcg_gen_mov_i32(t0, rd);
2954
2955     tcg_temp_free_i32(tmp);
2956     tcg_temp_free_i32(rd);
2957 }
2958
2959 static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
2960 {
2961     TCGv_i32 rd, tmp;
2962
2963     rd = tcg_temp_new_i32();
2964     tmp = tcg_temp_new_i32();
2965
2966     tcg_gen_shli_i32(rd, t0, 16);
2967     tcg_gen_andi_i32(tmp, t1, 0xffff);
2968     tcg_gen_or_i32(rd, rd, tmp);
2969     tcg_gen_shri_i32(t1, t1, 16);
2970     tcg_gen_andi_i32(tmp, t0, 0xffff0000);
2971     tcg_gen_or_i32(t1, t1, tmp);
2972     tcg_gen_mov_i32(t0, rd);
2973
2974     tcg_temp_free_i32(tmp);
2975     tcg_temp_free_i32(rd);
2976 }
2977
2978 /* Symbolic constants for op fields for Neon 2-register miscellaneous.
2979  * The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B
2980  * table A7-13.
2981  */
2982 #define NEON_2RM_VREV64 0
2983 #define NEON_2RM_VREV32 1
2984 #define NEON_2RM_VREV16 2
2985 #define NEON_2RM_VPADDL 4
2986 #define NEON_2RM_VPADDL_U 5
2987 #define NEON_2RM_AESE 6 /* Includes AESD */
2988 #define NEON_2RM_AESMC 7 /* Includes AESIMC */
2989 #define NEON_2RM_VCLS 8
2990 #define NEON_2RM_VCLZ 9
2991 #define NEON_2RM_VCNT 10
2992 #define NEON_2RM_VMVN 11
2993 #define NEON_2RM_VPADAL 12
2994 #define NEON_2RM_VPADAL_U 13
2995 #define NEON_2RM_VQABS 14
2996 #define NEON_2RM_VQNEG 15
2997 #define NEON_2RM_VCGT0 16
2998 #define NEON_2RM_VCGE0 17
2999 #define NEON_2RM_VCEQ0 18
3000 #define NEON_2RM_VCLE0 19
3001 #define NEON_2RM_VCLT0 20
3002 #define NEON_2RM_SHA1H 21
3003 #define NEON_2RM_VABS 22
3004 #define NEON_2RM_VNEG 23
3005 #define NEON_2RM_VCGT0_F 24
3006 #define NEON_2RM_VCGE0_F 25
3007 #define NEON_2RM_VCEQ0_F 26
3008 #define NEON_2RM_VCLE0_F 27
3009 #define NEON_2RM_VCLT0_F 28
3010 #define NEON_2RM_VABS_F 30
3011 #define NEON_2RM_VNEG_F 31
3012 #define NEON_2RM_VSWP 32
3013 #define NEON_2RM_VTRN 33
3014 #define NEON_2RM_VUZP 34
3015 #define NEON_2RM_VZIP 35
3016 #define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */
3017 #define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */
3018 #define NEON_2RM_VSHLL 38
3019 #define NEON_2RM_SHA1SU1 39 /* Includes SHA256SU0 */
3020 #define NEON_2RM_VRINTN 40
3021 #define NEON_2RM_VRINTX 41
3022 #define NEON_2RM_VRINTA 42
3023 #define NEON_2RM_VRINTZ 43
3024 #define NEON_2RM_VCVT_F16_F32 44
3025 #define NEON_2RM_VRINTM 45
3026 #define NEON_2RM_VCVT_F32_F16 46
3027 #define NEON_2RM_VRINTP 47
3028 #define NEON_2RM_VCVTAU 48
3029 #define NEON_2RM_VCVTAS 49
3030 #define NEON_2RM_VCVTNU 50
3031 #define NEON_2RM_VCVTNS 51
3032 #define NEON_2RM_VCVTPU 52
3033 #define NEON_2RM_VCVTPS 53
3034 #define NEON_2RM_VCVTMU 54
3035 #define NEON_2RM_VCVTMS 55
3036 #define NEON_2RM_VRECPE 56
3037 #define NEON_2RM_VRSQRTE 57
3038 #define NEON_2RM_VRECPE_F 58
3039 #define NEON_2RM_VRSQRTE_F 59
3040 #define NEON_2RM_VCVT_FS 60
3041 #define NEON_2RM_VCVT_FU 61
3042 #define NEON_2RM_VCVT_SF 62
3043 #define NEON_2RM_VCVT_UF 63
3044
3045 static bool neon_2rm_is_v8_op(int op)
3046 {
3047     /* Return true if this neon 2reg-misc op is ARMv8 and up */
3048     switch (op) {
3049     case NEON_2RM_VRINTN:
3050     case NEON_2RM_VRINTA:
3051     case NEON_2RM_VRINTM:
3052     case NEON_2RM_VRINTP:
3053     case NEON_2RM_VRINTZ:
3054     case NEON_2RM_VRINTX:
3055     case NEON_2RM_VCVTAU:
3056     case NEON_2RM_VCVTAS:
3057     case NEON_2RM_VCVTNU:
3058     case NEON_2RM_VCVTNS:
3059     case NEON_2RM_VCVTPU:
3060     case NEON_2RM_VCVTPS:
3061     case NEON_2RM_VCVTMU:
3062     case NEON_2RM_VCVTMS:
3063         return true;
3064     default:
3065         return false;
3066     }
3067 }
3068
3069 /* Each entry in this array has bit n set if the insn allows
3070  * size value n (otherwise it will UNDEF). Since unallocated
3071  * op values will have no bits set they always UNDEF.
3072  */
3073 static const uint8_t neon_2rm_sizes[] = {
3074     [NEON_2RM_VREV64] = 0x7,
3075     [NEON_2RM_VREV32] = 0x3,
3076     [NEON_2RM_VREV16] = 0x1,
3077     [NEON_2RM_VPADDL] = 0x7,
3078     [NEON_2RM_VPADDL_U] = 0x7,
3079     [NEON_2RM_AESE] = 0x1,
3080     [NEON_2RM_AESMC] = 0x1,
3081     [NEON_2RM_VCLS] = 0x7,
3082     [NEON_2RM_VCLZ] = 0x7,
3083     [NEON_2RM_VCNT] = 0x1,
3084     [NEON_2RM_VMVN] = 0x1,
3085     [NEON_2RM_VPADAL] = 0x7,
3086     [NEON_2RM_VPADAL_U] = 0x7,
3087     [NEON_2RM_VQABS] = 0x7,
3088     [NEON_2RM_VQNEG] = 0x7,
3089     [NEON_2RM_VCGT0] = 0x7,
3090     [NEON_2RM_VCGE0] = 0x7,
3091     [NEON_2RM_VCEQ0] = 0x7,
3092     [NEON_2RM_VCLE0] = 0x7,
3093     [NEON_2RM_VCLT0] = 0x7,
3094     [NEON_2RM_SHA1H] = 0x4,
3095     [NEON_2RM_VABS] = 0x7,
3096     [NEON_2RM_VNEG] = 0x7,
3097     [NEON_2RM_VCGT0_F] = 0x4,
3098     [NEON_2RM_VCGE0_F] = 0x4,
3099     [NEON_2RM_VCEQ0_F] = 0x4,
3100     [NEON_2RM_VCLE0_F] = 0x4,
3101     [NEON_2RM_VCLT0_F] = 0x4,
3102     [NEON_2RM_VABS_F] = 0x4,
3103     [NEON_2RM_VNEG_F] = 0x4,
3104     [NEON_2RM_VSWP] = 0x1,
3105     [NEON_2RM_VTRN] = 0x7,
3106     [NEON_2RM_VUZP] = 0x7,
3107     [NEON_2RM_VZIP] = 0x7,
3108     [NEON_2RM_VMOVN] = 0x7,
3109     [NEON_2RM_VQMOVN] = 0x7,
3110     [NEON_2RM_VSHLL] = 0x7,
3111     [NEON_2RM_SHA1SU1] = 0x4,
3112     [NEON_2RM_VRINTN] = 0x4,
3113     [NEON_2RM_VRINTX] = 0x4,
3114     [NEON_2RM_VRINTA] = 0x4,
3115     [NEON_2RM_VRINTZ] = 0x4,
3116     [NEON_2RM_VCVT_F16_F32] = 0x2,
3117     [NEON_2RM_VRINTM] = 0x4,
3118     [NEON_2RM_VCVT_F32_F16] = 0x2,
3119     [NEON_2RM_VRINTP] = 0x4,
3120     [NEON_2RM_VCVTAU] = 0x4,
3121     [NEON_2RM_VCVTAS] = 0x4,
3122     [NEON_2RM_VCVTNU] = 0x4,
3123     [NEON_2RM_VCVTNS] = 0x4,
3124     [NEON_2RM_VCVTPU] = 0x4,
3125     [NEON_2RM_VCVTPS] = 0x4,
3126     [NEON_2RM_VCVTMU] = 0x4,
3127     [NEON_2RM_VCVTMS] = 0x4,
3128     [NEON_2RM_VRECPE] = 0x4,
3129     [NEON_2RM_VRSQRTE] = 0x4,
3130     [NEON_2RM_VRECPE_F] = 0x4,
3131     [NEON_2RM_VRSQRTE_F] = 0x4,
3132     [NEON_2RM_VCVT_FS] = 0x4,
3133     [NEON_2RM_VCVT_FU] = 0x4,
3134     [NEON_2RM_VCVT_SF] = 0x4,
3135     [NEON_2RM_VCVT_UF] = 0x4,
3136 };
3137
3138 static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
3139                             uint32_t opr_sz, uint32_t max_sz,
3140                             gen_helper_gvec_3_ptr *fn)
3141 {
3142     TCGv_ptr qc_ptr = tcg_temp_new_ptr();
3143
3144     tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
3145     tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
3146                        opr_sz, max_sz, 0, fn);
3147     tcg_temp_free_ptr(qc_ptr);
3148 }
3149
3150 void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3151                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3152 {
3153     static gen_helper_gvec_3_ptr * const fns[2] = {
3154         gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
3155     };
3156     tcg_debug_assert(vece >= 1 && vece <= 2);
3157     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
3158 }
3159
3160 void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3161                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3162 {
3163     static gen_helper_gvec_3_ptr * const fns[2] = {
3164         gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
3165     };
3166     tcg_debug_assert(vece >= 1 && vece <= 2);
3167     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
3168 }
3169
3170 #define GEN_CMP0(NAME, COND)                                            \
3171     static void gen_##NAME##0_i32(TCGv_i32 d, TCGv_i32 a)               \
3172     {                                                                   \
3173         tcg_gen_setcondi_i32(COND, d, a, 0);                            \
3174         tcg_gen_neg_i32(d, d);                                          \
3175     }                                                                   \
3176     static void gen_##NAME##0_i64(TCGv_i64 d, TCGv_i64 a)               \
3177     {                                                                   \
3178         tcg_gen_setcondi_i64(COND, d, a, 0);                            \
3179         tcg_gen_neg_i64(d, d);                                          \
3180     }                                                                   \
3181     static void gen_##NAME##0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) \
3182     {                                                                   \
3183         TCGv_vec zero = tcg_const_zeros_vec_matching(d);                \
3184         tcg_gen_cmp_vec(COND, vece, d, a, zero);                        \
3185         tcg_temp_free_vec(zero);                                        \
3186     }                                                                   \
3187     void gen_gvec_##NAME##0(unsigned vece, uint32_t d, uint32_t m,      \
3188                             uint32_t opr_sz, uint32_t max_sz)           \
3189     {                                                                   \
3190         const GVecGen2 op[4] = {                                        \
3191             { .fno = gen_helper_gvec_##NAME##0_b,                       \
3192               .fniv = gen_##NAME##0_vec,                                \
3193               .opt_opc = vecop_list_cmp,                                \
3194               .vece = MO_8 },                                           \
3195             { .fno = gen_helper_gvec_##NAME##0_h,                       \
3196               .fniv = gen_##NAME##0_vec,                                \
3197               .opt_opc = vecop_list_cmp,                                \
3198               .vece = MO_16 },                                          \
3199             { .fni4 = gen_##NAME##0_i32,                                \
3200               .fniv = gen_##NAME##0_vec,                                \
3201               .opt_opc = vecop_list_cmp,                                \
3202               .vece = MO_32 },                                          \
3203             { .fni8 = gen_##NAME##0_i64,                                \
3204               .fniv = gen_##NAME##0_vec,                                \
3205               .opt_opc = vecop_list_cmp,                                \
3206               .prefer_i64 = TCG_TARGET_REG_BITS == 64,                  \
3207               .vece = MO_64 },                                          \
3208         };                                                              \
3209         tcg_gen_gvec_2(d, m, opr_sz, max_sz, &op[vece]);                \
3210     }
3211
3212 static const TCGOpcode vecop_list_cmp[] = {
3213     INDEX_op_cmp_vec, 0
3214 };
3215
3216 GEN_CMP0(ceq, TCG_COND_EQ)
3217 GEN_CMP0(cle, TCG_COND_LE)
3218 GEN_CMP0(cge, TCG_COND_GE)
3219 GEN_CMP0(clt, TCG_COND_LT)
3220 GEN_CMP0(cgt, TCG_COND_GT)
3221
3222 #undef GEN_CMP0
3223
3224 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3225 {
3226     tcg_gen_vec_sar8i_i64(a, a, shift);
3227     tcg_gen_vec_add8_i64(d, d, a);
3228 }
3229
3230 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3231 {
3232     tcg_gen_vec_sar16i_i64(a, a, shift);
3233     tcg_gen_vec_add16_i64(d, d, a);
3234 }
3235
3236 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3237 {
3238     tcg_gen_sari_i32(a, a, shift);
3239     tcg_gen_add_i32(d, d, a);
3240 }
3241
3242 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3243 {
3244     tcg_gen_sari_i64(a, a, shift);
3245     tcg_gen_add_i64(d, d, a);
3246 }
3247
3248 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3249 {
3250     tcg_gen_sari_vec(vece, a, a, sh);
3251     tcg_gen_add_vec(vece, d, d, a);
3252 }
3253
3254 void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3255                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3256 {
3257     static const TCGOpcode vecop_list[] = {
3258         INDEX_op_sari_vec, INDEX_op_add_vec, 0
3259     };
3260     static const GVecGen2i ops[4] = {
3261         { .fni8 = gen_ssra8_i64,
3262           .fniv = gen_ssra_vec,
3263           .fno = gen_helper_gvec_ssra_b,
3264           .load_dest = true,
3265           .opt_opc = vecop_list,
3266           .vece = MO_8 },
3267         { .fni8 = gen_ssra16_i64,
3268           .fniv = gen_ssra_vec,
3269           .fno = gen_helper_gvec_ssra_h,
3270           .load_dest = true,
3271           .opt_opc = vecop_list,
3272           .vece = MO_16 },
3273         { .fni4 = gen_ssra32_i32,
3274           .fniv = gen_ssra_vec,
3275           .fno = gen_helper_gvec_ssra_s,
3276           .load_dest = true,
3277           .opt_opc = vecop_list,
3278           .vece = MO_32 },
3279         { .fni8 = gen_ssra64_i64,
3280           .fniv = gen_ssra_vec,
3281           .fno = gen_helper_gvec_ssra_b,
3282           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3283           .opt_opc = vecop_list,
3284           .load_dest = true,
3285           .vece = MO_64 },
3286     };
3287
3288     /* tszimm encoding produces immediates in the range [1..esize]. */
3289     tcg_debug_assert(shift > 0);
3290     tcg_debug_assert(shift <= (8 << vece));
3291
3292     /*
3293      * Shifts larger than the element size are architecturally valid.
3294      * Signed results in all sign bits.
3295      */
3296     shift = MIN(shift, (8 << vece) - 1);
3297     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3298 }
3299
3300 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3301 {
3302     tcg_gen_vec_shr8i_i64(a, a, shift);
3303     tcg_gen_vec_add8_i64(d, d, a);
3304 }
3305
3306 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3307 {
3308     tcg_gen_vec_shr16i_i64(a, a, shift);
3309     tcg_gen_vec_add16_i64(d, d, a);
3310 }
3311
3312 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3313 {
3314     tcg_gen_shri_i32(a, a, shift);
3315     tcg_gen_add_i32(d, d, a);
3316 }
3317
3318 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3319 {
3320     tcg_gen_shri_i64(a, a, shift);
3321     tcg_gen_add_i64(d, d, a);
3322 }
3323
3324 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3325 {
3326     tcg_gen_shri_vec(vece, a, a, sh);
3327     tcg_gen_add_vec(vece, d, d, a);
3328 }
3329
3330 void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3331                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3332 {
3333     static const TCGOpcode vecop_list[] = {
3334         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3335     };
3336     static const GVecGen2i ops[4] = {
3337         { .fni8 = gen_usra8_i64,
3338           .fniv = gen_usra_vec,
3339           .fno = gen_helper_gvec_usra_b,
3340           .load_dest = true,
3341           .opt_opc = vecop_list,
3342           .vece = MO_8, },
3343         { .fni8 = gen_usra16_i64,
3344           .fniv = gen_usra_vec,
3345           .fno = gen_helper_gvec_usra_h,
3346           .load_dest = true,
3347           .opt_opc = vecop_list,
3348           .vece = MO_16, },
3349         { .fni4 = gen_usra32_i32,
3350           .fniv = gen_usra_vec,
3351           .fno = gen_helper_gvec_usra_s,
3352           .load_dest = true,
3353           .opt_opc = vecop_list,
3354           .vece = MO_32, },
3355         { .fni8 = gen_usra64_i64,
3356           .fniv = gen_usra_vec,
3357           .fno = gen_helper_gvec_usra_d,
3358           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3359           .load_dest = true,
3360           .opt_opc = vecop_list,
3361           .vece = MO_64, },
3362     };
3363
3364     /* tszimm encoding produces immediates in the range [1..esize]. */
3365     tcg_debug_assert(shift > 0);
3366     tcg_debug_assert(shift <= (8 << vece));
3367
3368     /*
3369      * Shifts larger than the element size are architecturally valid.
3370      * Unsigned results in all zeros as input to accumulate: nop.
3371      */
3372     if (shift < (8 << vece)) {
3373         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3374     } else {
3375         /* Nop, but we do need to clear the tail. */
3376         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3377     }
3378 }
3379
3380 /*
3381  * Shift one less than the requested amount, and the low bit is
3382  * the rounding bit.  For the 8 and 16-bit operations, because we
3383  * mask the low bit, we can perform a normal integer shift instead
3384  * of a vector shift.
3385  */
3386 static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3387 {
3388     TCGv_i64 t = tcg_temp_new_i64();
3389
3390     tcg_gen_shri_i64(t, a, sh - 1);
3391     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3392     tcg_gen_vec_sar8i_i64(d, a, sh);
3393     tcg_gen_vec_add8_i64(d, d, t);
3394     tcg_temp_free_i64(t);
3395 }
3396
3397 static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3398 {
3399     TCGv_i64 t = tcg_temp_new_i64();
3400
3401     tcg_gen_shri_i64(t, a, sh - 1);
3402     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3403     tcg_gen_vec_sar16i_i64(d, a, sh);
3404     tcg_gen_vec_add16_i64(d, d, t);
3405     tcg_temp_free_i64(t);
3406 }
3407
3408 static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3409 {
3410     TCGv_i32 t = tcg_temp_new_i32();
3411
3412     tcg_gen_extract_i32(t, a, sh - 1, 1);
3413     tcg_gen_sari_i32(d, a, sh);
3414     tcg_gen_add_i32(d, d, t);
3415     tcg_temp_free_i32(t);
3416 }
3417
3418 static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3419 {
3420     TCGv_i64 t = tcg_temp_new_i64();
3421
3422     tcg_gen_extract_i64(t, a, sh - 1, 1);
3423     tcg_gen_sari_i64(d, a, sh);
3424     tcg_gen_add_i64(d, d, t);
3425     tcg_temp_free_i64(t);
3426 }
3427
3428 static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3429 {
3430     TCGv_vec t = tcg_temp_new_vec_matching(d);
3431     TCGv_vec ones = tcg_temp_new_vec_matching(d);
3432
3433     tcg_gen_shri_vec(vece, t, a, sh - 1);
3434     tcg_gen_dupi_vec(vece, ones, 1);
3435     tcg_gen_and_vec(vece, t, t, ones);
3436     tcg_gen_sari_vec(vece, d, a, sh);
3437     tcg_gen_add_vec(vece, d, d, t);
3438
3439     tcg_temp_free_vec(t);
3440     tcg_temp_free_vec(ones);
3441 }
3442
3443 void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3444                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3445 {
3446     static const TCGOpcode vecop_list[] = {
3447         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3448     };
3449     static const GVecGen2i ops[4] = {
3450         { .fni8 = gen_srshr8_i64,
3451           .fniv = gen_srshr_vec,
3452           .fno = gen_helper_gvec_srshr_b,
3453           .opt_opc = vecop_list,
3454           .vece = MO_8 },
3455         { .fni8 = gen_srshr16_i64,
3456           .fniv = gen_srshr_vec,
3457           .fno = gen_helper_gvec_srshr_h,
3458           .opt_opc = vecop_list,
3459           .vece = MO_16 },
3460         { .fni4 = gen_srshr32_i32,
3461           .fniv = gen_srshr_vec,
3462           .fno = gen_helper_gvec_srshr_s,
3463           .opt_opc = vecop_list,
3464           .vece = MO_32 },
3465         { .fni8 = gen_srshr64_i64,
3466           .fniv = gen_srshr_vec,
3467           .fno = gen_helper_gvec_srshr_d,
3468           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3469           .opt_opc = vecop_list,
3470           .vece = MO_64 },
3471     };
3472
3473     /* tszimm encoding produces immediates in the range [1..esize] */
3474     tcg_debug_assert(shift > 0);
3475     tcg_debug_assert(shift <= (8 << vece));
3476
3477     if (shift == (8 << vece)) {
3478         /*
3479          * Shifts larger than the element size are architecturally valid.
3480          * Signed results in all sign bits.  With rounding, this produces
3481          *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3482          * I.e. always zero.
3483          */
3484         tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
3485     } else {
3486         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3487     }
3488 }
3489
3490 static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3491 {
3492     TCGv_i64 t = tcg_temp_new_i64();
3493
3494     gen_srshr8_i64(t, a, sh);
3495     tcg_gen_vec_add8_i64(d, d, t);
3496     tcg_temp_free_i64(t);
3497 }
3498
3499 static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3500 {
3501     TCGv_i64 t = tcg_temp_new_i64();
3502
3503     gen_srshr16_i64(t, a, sh);
3504     tcg_gen_vec_add16_i64(d, d, t);
3505     tcg_temp_free_i64(t);
3506 }
3507
3508 static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3509 {
3510     TCGv_i32 t = tcg_temp_new_i32();
3511
3512     gen_srshr32_i32(t, a, sh);
3513     tcg_gen_add_i32(d, d, t);
3514     tcg_temp_free_i32(t);
3515 }
3516
3517 static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3518 {
3519     TCGv_i64 t = tcg_temp_new_i64();
3520
3521     gen_srshr64_i64(t, a, sh);
3522     tcg_gen_add_i64(d, d, t);
3523     tcg_temp_free_i64(t);
3524 }
3525
3526 static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3527 {
3528     TCGv_vec t = tcg_temp_new_vec_matching(d);
3529
3530     gen_srshr_vec(vece, t, a, sh);
3531     tcg_gen_add_vec(vece, d, d, t);
3532     tcg_temp_free_vec(t);
3533 }
3534
3535 void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3536                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3537 {
3538     static const TCGOpcode vecop_list[] = {
3539         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3540     };
3541     static const GVecGen2i ops[4] = {
3542         { .fni8 = gen_srsra8_i64,
3543           .fniv = gen_srsra_vec,
3544           .fno = gen_helper_gvec_srsra_b,
3545           .opt_opc = vecop_list,
3546           .load_dest = true,
3547           .vece = MO_8 },
3548         { .fni8 = gen_srsra16_i64,
3549           .fniv = gen_srsra_vec,
3550           .fno = gen_helper_gvec_srsra_h,
3551           .opt_opc = vecop_list,
3552           .load_dest = true,
3553           .vece = MO_16 },
3554         { .fni4 = gen_srsra32_i32,
3555           .fniv = gen_srsra_vec,
3556           .fno = gen_helper_gvec_srsra_s,
3557           .opt_opc = vecop_list,
3558           .load_dest = true,
3559           .vece = MO_32 },
3560         { .fni8 = gen_srsra64_i64,
3561           .fniv = gen_srsra_vec,
3562           .fno = gen_helper_gvec_srsra_d,
3563           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3564           .opt_opc = vecop_list,
3565           .load_dest = true,
3566           .vece = MO_64 },
3567     };
3568
3569     /* tszimm encoding produces immediates in the range [1..esize] */
3570     tcg_debug_assert(shift > 0);
3571     tcg_debug_assert(shift <= (8 << vece));
3572
3573     /*
3574      * Shifts larger than the element size are architecturally valid.
3575      * Signed results in all sign bits.  With rounding, this produces
3576      *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3577      * I.e. always zero.  With accumulation, this leaves D unchanged.
3578      */
3579     if (shift == (8 << vece)) {
3580         /* Nop, but we do need to clear the tail. */
3581         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3582     } else {
3583         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3584     }
3585 }
3586
3587 static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3588 {
3589     TCGv_i64 t = tcg_temp_new_i64();
3590
3591     tcg_gen_shri_i64(t, a, sh - 1);
3592     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3593     tcg_gen_vec_shr8i_i64(d, a, sh);
3594     tcg_gen_vec_add8_i64(d, d, t);
3595     tcg_temp_free_i64(t);
3596 }
3597
3598 static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3599 {
3600     TCGv_i64 t = tcg_temp_new_i64();
3601
3602     tcg_gen_shri_i64(t, a, sh - 1);
3603     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3604     tcg_gen_vec_shr16i_i64(d, a, sh);
3605     tcg_gen_vec_add16_i64(d, d, t);
3606     tcg_temp_free_i64(t);
3607 }
3608
3609 static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3610 {
3611     TCGv_i32 t = tcg_temp_new_i32();
3612
3613     tcg_gen_extract_i32(t, a, sh - 1, 1);
3614     tcg_gen_shri_i32(d, a, sh);
3615     tcg_gen_add_i32(d, d, t);
3616     tcg_temp_free_i32(t);
3617 }
3618
3619 static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3620 {
3621     TCGv_i64 t = tcg_temp_new_i64();
3622
3623     tcg_gen_extract_i64(t, a, sh - 1, 1);
3624     tcg_gen_shri_i64(d, a, sh);
3625     tcg_gen_add_i64(d, d, t);
3626     tcg_temp_free_i64(t);
3627 }
3628
3629 static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
3630 {
3631     TCGv_vec t = tcg_temp_new_vec_matching(d);
3632     TCGv_vec ones = tcg_temp_new_vec_matching(d);
3633
3634     tcg_gen_shri_vec(vece, t, a, shift - 1);
3635     tcg_gen_dupi_vec(vece, ones, 1);
3636     tcg_gen_and_vec(vece, t, t, ones);
3637     tcg_gen_shri_vec(vece, d, a, shift);
3638     tcg_gen_add_vec(vece, d, d, t);
3639
3640     tcg_temp_free_vec(t);
3641     tcg_temp_free_vec(ones);
3642 }
3643
3644 void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3645                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3646 {
3647     static const TCGOpcode vecop_list[] = {
3648         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3649     };
3650     static const GVecGen2i ops[4] = {
3651         { .fni8 = gen_urshr8_i64,
3652           .fniv = gen_urshr_vec,
3653           .fno = gen_helper_gvec_urshr_b,
3654           .opt_opc = vecop_list,
3655           .vece = MO_8 },
3656         { .fni8 = gen_urshr16_i64,
3657           .fniv = gen_urshr_vec,
3658           .fno = gen_helper_gvec_urshr_h,
3659           .opt_opc = vecop_list,
3660           .vece = MO_16 },
3661         { .fni4 = gen_urshr32_i32,
3662           .fniv = gen_urshr_vec,
3663           .fno = gen_helper_gvec_urshr_s,
3664           .opt_opc = vecop_list,
3665           .vece = MO_32 },
3666         { .fni8 = gen_urshr64_i64,
3667           .fniv = gen_urshr_vec,
3668           .fno = gen_helper_gvec_urshr_d,
3669           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3670           .opt_opc = vecop_list,
3671           .vece = MO_64 },
3672     };
3673
3674     /* tszimm encoding produces immediates in the range [1..esize] */
3675     tcg_debug_assert(shift > 0);
3676     tcg_debug_assert(shift <= (8 << vece));
3677
3678     if (shift == (8 << vece)) {
3679         /*
3680          * Shifts larger than the element size are architecturally valid.
3681          * Unsigned results in zero.  With rounding, this produces a
3682          * copy of the most significant bit.
3683          */
3684         tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
3685     } else {
3686         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3687     }
3688 }
3689
3690 static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3691 {
3692     TCGv_i64 t = tcg_temp_new_i64();
3693
3694     if (sh == 8) {
3695         tcg_gen_vec_shr8i_i64(t, a, 7);
3696     } else {
3697         gen_urshr8_i64(t, a, sh);
3698     }
3699     tcg_gen_vec_add8_i64(d, d, t);
3700     tcg_temp_free_i64(t);
3701 }
3702
3703 static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3704 {
3705     TCGv_i64 t = tcg_temp_new_i64();
3706
3707     if (sh == 16) {
3708         tcg_gen_vec_shr16i_i64(t, a, 15);
3709     } else {
3710         gen_urshr16_i64(t, a, sh);
3711     }
3712     tcg_gen_vec_add16_i64(d, d, t);
3713     tcg_temp_free_i64(t);
3714 }
3715
3716 static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3717 {
3718     TCGv_i32 t = tcg_temp_new_i32();
3719
3720     if (sh == 32) {
3721         tcg_gen_shri_i32(t, a, 31);
3722     } else {
3723         gen_urshr32_i32(t, a, sh);
3724     }
3725     tcg_gen_add_i32(d, d, t);
3726     tcg_temp_free_i32(t);
3727 }
3728
3729 static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3730 {
3731     TCGv_i64 t = tcg_temp_new_i64();
3732
3733     if (sh == 64) {
3734         tcg_gen_shri_i64(t, a, 63);
3735     } else {
3736         gen_urshr64_i64(t, a, sh);
3737     }
3738     tcg_gen_add_i64(d, d, t);
3739     tcg_temp_free_i64(t);
3740 }
3741
3742 static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3743 {
3744     TCGv_vec t = tcg_temp_new_vec_matching(d);
3745
3746     if (sh == (8 << vece)) {
3747         tcg_gen_shri_vec(vece, t, a, sh - 1);
3748     } else {
3749         gen_urshr_vec(vece, t, a, sh);
3750     }
3751     tcg_gen_add_vec(vece, d, d, t);
3752     tcg_temp_free_vec(t);
3753 }
3754
3755 void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3756                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3757 {
3758     static const TCGOpcode vecop_list[] = {
3759         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3760     };
3761     static const GVecGen2i ops[4] = {
3762         { .fni8 = gen_ursra8_i64,
3763           .fniv = gen_ursra_vec,
3764           .fno = gen_helper_gvec_ursra_b,
3765           .opt_opc = vecop_list,
3766           .load_dest = true,
3767           .vece = MO_8 },
3768         { .fni8 = gen_ursra16_i64,
3769           .fniv = gen_ursra_vec,
3770           .fno = gen_helper_gvec_ursra_h,
3771           .opt_opc = vecop_list,
3772           .load_dest = true,
3773           .vece = MO_16 },
3774         { .fni4 = gen_ursra32_i32,
3775           .fniv = gen_ursra_vec,
3776           .fno = gen_helper_gvec_ursra_s,
3777           .opt_opc = vecop_list,
3778           .load_dest = true,
3779           .vece = MO_32 },
3780         { .fni8 = gen_ursra64_i64,
3781           .fniv = gen_ursra_vec,
3782           .fno = gen_helper_gvec_ursra_d,
3783           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3784           .opt_opc = vecop_list,
3785           .load_dest = true,
3786           .vece = MO_64 },
3787     };
3788
3789     /* tszimm encoding produces immediates in the range [1..esize] */
3790     tcg_debug_assert(shift > 0);
3791     tcg_debug_assert(shift <= (8 << vece));
3792
3793     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3794 }
3795
3796 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3797 {
3798     uint64_t mask = dup_const(MO_8, 0xff >> shift);
3799     TCGv_i64 t = tcg_temp_new_i64();
3800
3801     tcg_gen_shri_i64(t, a, shift);
3802     tcg_gen_andi_i64(t, t, mask);
3803     tcg_gen_andi_i64(d, d, ~mask);
3804     tcg_gen_or_i64(d, d, t);
3805     tcg_temp_free_i64(t);
3806 }
3807
3808 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3809 {
3810     uint64_t mask = dup_const(MO_16, 0xffff >> shift);
3811     TCGv_i64 t = tcg_temp_new_i64();
3812
3813     tcg_gen_shri_i64(t, a, shift);
3814     tcg_gen_andi_i64(t, t, mask);
3815     tcg_gen_andi_i64(d, d, ~mask);
3816     tcg_gen_or_i64(d, d, t);
3817     tcg_temp_free_i64(t);
3818 }
3819
3820 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3821 {
3822     tcg_gen_shri_i32(a, a, shift);
3823     tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
3824 }
3825
3826 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3827 {
3828     tcg_gen_shri_i64(a, a, shift);
3829     tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
3830 }
3831
3832 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3833 {
3834     TCGv_vec t = tcg_temp_new_vec_matching(d);
3835     TCGv_vec m = tcg_temp_new_vec_matching(d);
3836
3837     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
3838     tcg_gen_shri_vec(vece, t, a, sh);
3839     tcg_gen_and_vec(vece, d, d, m);
3840     tcg_gen_or_vec(vece, d, d, t);
3841
3842     tcg_temp_free_vec(t);
3843     tcg_temp_free_vec(m);
3844 }
3845
3846 void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3847                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3848 {
3849     static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
3850     const GVecGen2i ops[4] = {
3851         { .fni8 = gen_shr8_ins_i64,
3852           .fniv = gen_shr_ins_vec,
3853           .fno = gen_helper_gvec_sri_b,
3854           .load_dest = true,
3855           .opt_opc = vecop_list,
3856           .vece = MO_8 },
3857         { .fni8 = gen_shr16_ins_i64,
3858           .fniv = gen_shr_ins_vec,
3859           .fno = gen_helper_gvec_sri_h,
3860           .load_dest = true,
3861           .opt_opc = vecop_list,
3862           .vece = MO_16 },
3863         { .fni4 = gen_shr32_ins_i32,
3864           .fniv = gen_shr_ins_vec,
3865           .fno = gen_helper_gvec_sri_s,
3866           .load_dest = true,
3867           .opt_opc = vecop_list,
3868           .vece = MO_32 },
3869         { .fni8 = gen_shr64_ins_i64,
3870           .fniv = gen_shr_ins_vec,
3871           .fno = gen_helper_gvec_sri_d,
3872           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3873           .load_dest = true,
3874           .opt_opc = vecop_list,
3875           .vece = MO_64 },
3876     };
3877
3878     /* tszimm encoding produces immediates in the range [1..esize]. */
3879     tcg_debug_assert(shift > 0);
3880     tcg_debug_assert(shift <= (8 << vece));
3881
3882     /* Shift of esize leaves destination unchanged. */
3883     if (shift < (8 << vece)) {
3884         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3885     } else {
3886         /* Nop, but we do need to clear the tail. */
3887         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3888     }
3889 }
3890
3891 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3892 {
3893     uint64_t mask = dup_const(MO_8, 0xff << shift);
3894     TCGv_i64 t = tcg_temp_new_i64();
3895
3896     tcg_gen_shli_i64(t, a, shift);
3897     tcg_gen_andi_i64(t, t, mask);
3898     tcg_gen_andi_i64(d, d, ~mask);
3899     tcg_gen_or_i64(d, d, t);
3900     tcg_temp_free_i64(t);
3901 }
3902
3903 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3904 {
3905     uint64_t mask = dup_const(MO_16, 0xffff << shift);
3906     TCGv_i64 t = tcg_temp_new_i64();
3907
3908     tcg_gen_shli_i64(t, a, shift);
3909     tcg_gen_andi_i64(t, t, mask);
3910     tcg_gen_andi_i64(d, d, ~mask);
3911     tcg_gen_or_i64(d, d, t);
3912     tcg_temp_free_i64(t);
3913 }
3914
3915 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3916 {
3917     tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
3918 }
3919
3920 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3921 {
3922     tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
3923 }
3924
3925 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3926 {
3927     TCGv_vec t = tcg_temp_new_vec_matching(d);
3928     TCGv_vec m = tcg_temp_new_vec_matching(d);
3929
3930     tcg_gen_shli_vec(vece, t, a, sh);
3931     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
3932     tcg_gen_and_vec(vece, d, d, m);
3933     tcg_gen_or_vec(vece, d, d, t);
3934
3935     tcg_temp_free_vec(t);
3936     tcg_temp_free_vec(m);
3937 }
3938
3939 void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3940                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3941 {
3942     static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
3943     const GVecGen2i ops[4] = {
3944         { .fni8 = gen_shl8_ins_i64,
3945           .fniv = gen_shl_ins_vec,
3946           .fno = gen_helper_gvec_sli_b,
3947           .load_dest = true,
3948           .opt_opc = vecop_list,
3949           .vece = MO_8 },
3950         { .fni8 = gen_shl16_ins_i64,
3951           .fniv = gen_shl_ins_vec,
3952           .fno = gen_helper_gvec_sli_h,
3953           .load_dest = true,
3954           .opt_opc = vecop_list,
3955           .vece = MO_16 },
3956         { .fni4 = gen_shl32_ins_i32,
3957           .fniv = gen_shl_ins_vec,
3958           .fno = gen_helper_gvec_sli_s,
3959           .load_dest = true,
3960           .opt_opc = vecop_list,
3961           .vece = MO_32 },
3962         { .fni8 = gen_shl64_ins_i64,
3963           .fniv = gen_shl_ins_vec,
3964           .fno = gen_helper_gvec_sli_d,
3965           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3966           .load_dest = true,
3967           .opt_opc = vecop_list,
3968           .vece = MO_64 },
3969     };
3970
3971     /* tszimm encoding produces immediates in the range [0..esize-1]. */
3972     tcg_debug_assert(shift >= 0);
3973     tcg_debug_assert(shift < (8 << vece));
3974
3975     if (shift == 0) {
3976         tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
3977     } else {
3978         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3979     }
3980 }
3981
3982 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3983 {
3984     gen_helper_neon_mul_u8(a, a, b);
3985     gen_helper_neon_add_u8(d, d, a);
3986 }
3987
3988 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3989 {
3990     gen_helper_neon_mul_u8(a, a, b);
3991     gen_helper_neon_sub_u8(d, d, a);
3992 }
3993
3994 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3995 {
3996     gen_helper_neon_mul_u16(a, a, b);
3997     gen_helper_neon_add_u16(d, d, a);
3998 }
3999
4000 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4001 {
4002     gen_helper_neon_mul_u16(a, a, b);
4003     gen_helper_neon_sub_u16(d, d, a);
4004 }
4005
4006 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4007 {
4008     tcg_gen_mul_i32(a, a, b);
4009     tcg_gen_add_i32(d, d, a);
4010 }
4011
4012 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4013 {
4014     tcg_gen_mul_i32(a, a, b);
4015     tcg_gen_sub_i32(d, d, a);
4016 }
4017
4018 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4019 {
4020     tcg_gen_mul_i64(a, a, b);
4021     tcg_gen_add_i64(d, d, a);
4022 }
4023
4024 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4025 {
4026     tcg_gen_mul_i64(a, a, b);
4027     tcg_gen_sub_i64(d, d, a);
4028 }
4029
4030 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4031 {
4032     tcg_gen_mul_vec(vece, a, a, b);
4033     tcg_gen_add_vec(vece, d, d, a);
4034 }
4035
4036 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4037 {
4038     tcg_gen_mul_vec(vece, a, a, b);
4039     tcg_gen_sub_vec(vece, d, d, a);
4040 }
4041
4042 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
4043  * these tables are shared with AArch64 which does support them.
4044  */
4045 void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4046                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4047 {
4048     static const TCGOpcode vecop_list[] = {
4049         INDEX_op_mul_vec, INDEX_op_add_vec, 0
4050     };
4051     static const GVecGen3 ops[4] = {
4052         { .fni4 = gen_mla8_i32,
4053           .fniv = gen_mla_vec,
4054           .load_dest = true,
4055           .opt_opc = vecop_list,
4056           .vece = MO_8 },
4057         { .fni4 = gen_mla16_i32,
4058           .fniv = gen_mla_vec,
4059           .load_dest = true,
4060           .opt_opc = vecop_list,
4061           .vece = MO_16 },
4062         { .fni4 = gen_mla32_i32,
4063           .fniv = gen_mla_vec,
4064           .load_dest = true,
4065           .opt_opc = vecop_list,
4066           .vece = MO_32 },
4067         { .fni8 = gen_mla64_i64,
4068           .fniv = gen_mla_vec,
4069           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4070           .load_dest = true,
4071           .opt_opc = vecop_list,
4072           .vece = MO_64 },
4073     };
4074     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4075 }
4076
4077 void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4078                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4079 {
4080     static const TCGOpcode vecop_list[] = {
4081         INDEX_op_mul_vec, INDEX_op_sub_vec, 0
4082     };
4083     static const GVecGen3 ops[4] = {
4084         { .fni4 = gen_mls8_i32,
4085           .fniv = gen_mls_vec,
4086           .load_dest = true,
4087           .opt_opc = vecop_list,
4088           .vece = MO_8 },
4089         { .fni4 = gen_mls16_i32,
4090           .fniv = gen_mls_vec,
4091           .load_dest = true,
4092           .opt_opc = vecop_list,
4093           .vece = MO_16 },
4094         { .fni4 = gen_mls32_i32,
4095           .fniv = gen_mls_vec,
4096           .load_dest = true,
4097           .opt_opc = vecop_list,
4098           .vece = MO_32 },
4099         { .fni8 = gen_mls64_i64,
4100           .fniv = gen_mls_vec,
4101           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4102           .load_dest = true,
4103           .opt_opc = vecop_list,
4104           .vece = MO_64 },
4105     };
4106     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4107 }
4108
4109 /* CMTST : test is "if (X & Y != 0)". */
4110 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4111 {
4112     tcg_gen_and_i32(d, a, b);
4113     tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
4114     tcg_gen_neg_i32(d, d);
4115 }
4116
4117 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4118 {
4119     tcg_gen_and_i64(d, a, b);
4120     tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
4121     tcg_gen_neg_i64(d, d);
4122 }
4123
4124 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4125 {
4126     tcg_gen_and_vec(vece, d, a, b);
4127     tcg_gen_dupi_vec(vece, a, 0);
4128     tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
4129 }
4130
4131 void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4132                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4133 {
4134     static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
4135     static const GVecGen3 ops[4] = {
4136         { .fni4 = gen_helper_neon_tst_u8,
4137           .fniv = gen_cmtst_vec,
4138           .opt_opc = vecop_list,
4139           .vece = MO_8 },
4140         { .fni4 = gen_helper_neon_tst_u16,
4141           .fniv = gen_cmtst_vec,
4142           .opt_opc = vecop_list,
4143           .vece = MO_16 },
4144         { .fni4 = gen_cmtst_i32,
4145           .fniv = gen_cmtst_vec,
4146           .opt_opc = vecop_list,
4147           .vece = MO_32 },
4148         { .fni8 = gen_cmtst_i64,
4149           .fniv = gen_cmtst_vec,
4150           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4151           .opt_opc = vecop_list,
4152           .vece = MO_64 },
4153     };
4154     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4155 }
4156
4157 void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4158 {
4159     TCGv_i32 lval = tcg_temp_new_i32();
4160     TCGv_i32 rval = tcg_temp_new_i32();
4161     TCGv_i32 lsh = tcg_temp_new_i32();
4162     TCGv_i32 rsh = tcg_temp_new_i32();
4163     TCGv_i32 zero = tcg_const_i32(0);
4164     TCGv_i32 max = tcg_const_i32(32);
4165
4166     /*
4167      * Rely on the TCG guarantee that out of range shifts produce
4168      * unspecified results, not undefined behaviour (i.e. no trap).
4169      * Discard out-of-range results after the fact.
4170      */
4171     tcg_gen_ext8s_i32(lsh, shift);
4172     tcg_gen_neg_i32(rsh, lsh);
4173     tcg_gen_shl_i32(lval, src, lsh);
4174     tcg_gen_shr_i32(rval, src, rsh);
4175     tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
4176     tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
4177
4178     tcg_temp_free_i32(lval);
4179     tcg_temp_free_i32(rval);
4180     tcg_temp_free_i32(lsh);
4181     tcg_temp_free_i32(rsh);
4182     tcg_temp_free_i32(zero);
4183     tcg_temp_free_i32(max);
4184 }
4185
4186 void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4187 {
4188     TCGv_i64 lval = tcg_temp_new_i64();
4189     TCGv_i64 rval = tcg_temp_new_i64();
4190     TCGv_i64 lsh = tcg_temp_new_i64();
4191     TCGv_i64 rsh = tcg_temp_new_i64();
4192     TCGv_i64 zero = tcg_const_i64(0);
4193     TCGv_i64 max = tcg_const_i64(64);
4194
4195     /*
4196      * Rely on the TCG guarantee that out of range shifts produce
4197      * unspecified results, not undefined behaviour (i.e. no trap).
4198      * Discard out-of-range results after the fact.
4199      */
4200     tcg_gen_ext8s_i64(lsh, shift);
4201     tcg_gen_neg_i64(rsh, lsh);
4202     tcg_gen_shl_i64(lval, src, lsh);
4203     tcg_gen_shr_i64(rval, src, rsh);
4204     tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
4205     tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
4206
4207     tcg_temp_free_i64(lval);
4208     tcg_temp_free_i64(rval);
4209     tcg_temp_free_i64(lsh);
4210     tcg_temp_free_i64(rsh);
4211     tcg_temp_free_i64(zero);
4212     tcg_temp_free_i64(max);
4213 }
4214
4215 static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
4216                          TCGv_vec src, TCGv_vec shift)
4217 {
4218     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4219     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4220     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4221     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4222     TCGv_vec msk, max;
4223
4224     tcg_gen_neg_vec(vece, rsh, shift);
4225     if (vece == MO_8) {
4226         tcg_gen_mov_vec(lsh, shift);
4227     } else {
4228         msk = tcg_temp_new_vec_matching(dst);
4229         tcg_gen_dupi_vec(vece, msk, 0xff);
4230         tcg_gen_and_vec(vece, lsh, shift, msk);
4231         tcg_gen_and_vec(vece, rsh, rsh, msk);
4232         tcg_temp_free_vec(msk);
4233     }
4234
4235     /*
4236      * Rely on the TCG guarantee that out of range shifts produce
4237      * unspecified results, not undefined behaviour (i.e. no trap).
4238      * Discard out-of-range results after the fact.
4239      */
4240     tcg_gen_shlv_vec(vece, lval, src, lsh);
4241     tcg_gen_shrv_vec(vece, rval, src, rsh);
4242
4243     max = tcg_temp_new_vec_matching(dst);
4244     tcg_gen_dupi_vec(vece, max, 8 << vece);
4245
4246     /*
4247      * The choice of LT (signed) and GEU (unsigned) are biased toward
4248      * the instructions of the x86_64 host.  For MO_8, the whole byte
4249      * is significant so we must use an unsigned compare; otherwise we
4250      * have already masked to a byte and so a signed compare works.
4251      * Other tcg hosts have a full set of comparisons and do not care.
4252      */
4253     if (vece == MO_8) {
4254         tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
4255         tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
4256         tcg_gen_andc_vec(vece, lval, lval, lsh);
4257         tcg_gen_andc_vec(vece, rval, rval, rsh);
4258     } else {
4259         tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
4260         tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
4261         tcg_gen_and_vec(vece, lval, lval, lsh);
4262         tcg_gen_and_vec(vece, rval, rval, rsh);
4263     }
4264     tcg_gen_or_vec(vece, dst, lval, rval);
4265
4266     tcg_temp_free_vec(max);
4267     tcg_temp_free_vec(lval);
4268     tcg_temp_free_vec(rval);
4269     tcg_temp_free_vec(lsh);
4270     tcg_temp_free_vec(rsh);
4271 }
4272
4273 void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4274                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4275 {
4276     static const TCGOpcode vecop_list[] = {
4277         INDEX_op_neg_vec, INDEX_op_shlv_vec,
4278         INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
4279     };
4280     static const GVecGen3 ops[4] = {
4281         { .fniv = gen_ushl_vec,
4282           .fno = gen_helper_gvec_ushl_b,
4283           .opt_opc = vecop_list,
4284           .vece = MO_8 },
4285         { .fniv = gen_ushl_vec,
4286           .fno = gen_helper_gvec_ushl_h,
4287           .opt_opc = vecop_list,
4288           .vece = MO_16 },
4289         { .fni4 = gen_ushl_i32,
4290           .fniv = gen_ushl_vec,
4291           .opt_opc = vecop_list,
4292           .vece = MO_32 },
4293         { .fni8 = gen_ushl_i64,
4294           .fniv = gen_ushl_vec,
4295           .opt_opc = vecop_list,
4296           .vece = MO_64 },
4297     };
4298     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4299 }
4300
4301 void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4302 {
4303     TCGv_i32 lval = tcg_temp_new_i32();
4304     TCGv_i32 rval = tcg_temp_new_i32();
4305     TCGv_i32 lsh = tcg_temp_new_i32();
4306     TCGv_i32 rsh = tcg_temp_new_i32();
4307     TCGv_i32 zero = tcg_const_i32(0);
4308     TCGv_i32 max = tcg_const_i32(31);
4309
4310     /*
4311      * Rely on the TCG guarantee that out of range shifts produce
4312      * unspecified results, not undefined behaviour (i.e. no trap).
4313      * Discard out-of-range results after the fact.
4314      */
4315     tcg_gen_ext8s_i32(lsh, shift);
4316     tcg_gen_neg_i32(rsh, lsh);
4317     tcg_gen_shl_i32(lval, src, lsh);
4318     tcg_gen_umin_i32(rsh, rsh, max);
4319     tcg_gen_sar_i32(rval, src, rsh);
4320     tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
4321     tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
4322
4323     tcg_temp_free_i32(lval);
4324     tcg_temp_free_i32(rval);
4325     tcg_temp_free_i32(lsh);
4326     tcg_temp_free_i32(rsh);
4327     tcg_temp_free_i32(zero);
4328     tcg_temp_free_i32(max);
4329 }
4330
4331 void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4332 {
4333     TCGv_i64 lval = tcg_temp_new_i64();
4334     TCGv_i64 rval = tcg_temp_new_i64();
4335     TCGv_i64 lsh = tcg_temp_new_i64();
4336     TCGv_i64 rsh = tcg_temp_new_i64();
4337     TCGv_i64 zero = tcg_const_i64(0);
4338     TCGv_i64 max = tcg_const_i64(63);
4339
4340     /*
4341      * Rely on the TCG guarantee that out of range shifts produce
4342      * unspecified results, not undefined behaviour (i.e. no trap).
4343      * Discard out-of-range results after the fact.
4344      */
4345     tcg_gen_ext8s_i64(lsh, shift);
4346     tcg_gen_neg_i64(rsh, lsh);
4347     tcg_gen_shl_i64(lval, src, lsh);
4348     tcg_gen_umin_i64(rsh, rsh, max);
4349     tcg_gen_sar_i64(rval, src, rsh);
4350     tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
4351     tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
4352
4353     tcg_temp_free_i64(lval);
4354     tcg_temp_free_i64(rval);
4355     tcg_temp_free_i64(lsh);
4356     tcg_temp_free_i64(rsh);
4357     tcg_temp_free_i64(zero);
4358     tcg_temp_free_i64(max);
4359 }
4360
4361 static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
4362                          TCGv_vec src, TCGv_vec shift)
4363 {
4364     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4365     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4366     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4367     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4368     TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
4369
4370     /*
4371      * Rely on the TCG guarantee that out of range shifts produce
4372      * unspecified results, not undefined behaviour (i.e. no trap).
4373      * Discard out-of-range results after the fact.
4374      */
4375     tcg_gen_neg_vec(vece, rsh, shift);
4376     if (vece == MO_8) {
4377         tcg_gen_mov_vec(lsh, shift);
4378     } else {
4379         tcg_gen_dupi_vec(vece, tmp, 0xff);
4380         tcg_gen_and_vec(vece, lsh, shift, tmp);
4381         tcg_gen_and_vec(vece, rsh, rsh, tmp);
4382     }
4383
4384     /* Bound rsh so out of bound right shift gets -1.  */
4385     tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
4386     tcg_gen_umin_vec(vece, rsh, rsh, tmp);
4387     tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
4388
4389     tcg_gen_shlv_vec(vece, lval, src, lsh);
4390     tcg_gen_sarv_vec(vece, rval, src, rsh);
4391
4392     /* Select in-bound left shift.  */
4393     tcg_gen_andc_vec(vece, lval, lval, tmp);
4394
4395     /* Select between left and right shift.  */
4396     if (vece == MO_8) {
4397         tcg_gen_dupi_vec(vece, tmp, 0);
4398         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
4399     } else {
4400         tcg_gen_dupi_vec(vece, tmp, 0x80);
4401         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
4402     }
4403
4404     tcg_temp_free_vec(lval);
4405     tcg_temp_free_vec(rval);
4406     tcg_temp_free_vec(lsh);
4407     tcg_temp_free_vec(rsh);
4408     tcg_temp_free_vec(tmp);
4409 }
4410
4411 void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4412                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4413 {
4414     static const TCGOpcode vecop_list[] = {
4415         INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
4416         INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
4417     };
4418     static const GVecGen3 ops[4] = {
4419         { .fniv = gen_sshl_vec,
4420           .fno = gen_helper_gvec_sshl_b,
4421           .opt_opc = vecop_list,
4422           .vece = MO_8 },
4423         { .fniv = gen_sshl_vec,
4424           .fno = gen_helper_gvec_sshl_h,
4425           .opt_opc = vecop_list,
4426           .vece = MO_16 },
4427         { .fni4 = gen_sshl_i32,
4428           .fniv = gen_sshl_vec,
4429           .opt_opc = vecop_list,
4430           .vece = MO_32 },
4431         { .fni8 = gen_sshl_i64,
4432           .fniv = gen_sshl_vec,
4433           .opt_opc = vecop_list,
4434           .vece = MO_64 },
4435     };
4436     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4437 }
4438
4439 static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4440                           TCGv_vec a, TCGv_vec b)
4441 {
4442     TCGv_vec x = tcg_temp_new_vec_matching(t);
4443     tcg_gen_add_vec(vece, x, a, b);
4444     tcg_gen_usadd_vec(vece, t, a, b);
4445     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4446     tcg_gen_or_vec(vece, sat, sat, x);
4447     tcg_temp_free_vec(x);
4448 }
4449
4450 void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4451                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4452 {
4453     static const TCGOpcode vecop_list[] = {
4454         INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4455     };
4456     static const GVecGen4 ops[4] = {
4457         { .fniv = gen_uqadd_vec,
4458           .fno = gen_helper_gvec_uqadd_b,
4459           .write_aofs = true,
4460           .opt_opc = vecop_list,
4461           .vece = MO_8 },
4462         { .fniv = gen_uqadd_vec,
4463           .fno = gen_helper_gvec_uqadd_h,
4464           .write_aofs = true,
4465           .opt_opc = vecop_list,
4466           .vece = MO_16 },
4467         { .fniv = gen_uqadd_vec,
4468           .fno = gen_helper_gvec_uqadd_s,
4469           .write_aofs = true,
4470           .opt_opc = vecop_list,
4471           .vece = MO_32 },
4472         { .fniv = gen_uqadd_vec,
4473           .fno = gen_helper_gvec_uqadd_d,
4474           .write_aofs = true,
4475           .opt_opc = vecop_list,
4476           .vece = MO_64 },
4477     };
4478     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4479                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4480 }
4481
4482 static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4483                           TCGv_vec a, TCGv_vec b)
4484 {
4485     TCGv_vec x = tcg_temp_new_vec_matching(t);
4486     tcg_gen_add_vec(vece, x, a, b);
4487     tcg_gen_ssadd_vec(vece, t, a, b);
4488     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4489     tcg_gen_or_vec(vece, sat, sat, x);
4490     tcg_temp_free_vec(x);
4491 }
4492
4493 void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4494                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4495 {
4496     static const TCGOpcode vecop_list[] = {
4497         INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4498     };
4499     static const GVecGen4 ops[4] = {
4500         { .fniv = gen_sqadd_vec,
4501           .fno = gen_helper_gvec_sqadd_b,
4502           .opt_opc = vecop_list,
4503           .write_aofs = true,
4504           .vece = MO_8 },
4505         { .fniv = gen_sqadd_vec,
4506           .fno = gen_helper_gvec_sqadd_h,
4507           .opt_opc = vecop_list,
4508           .write_aofs = true,
4509           .vece = MO_16 },
4510         { .fniv = gen_sqadd_vec,
4511           .fno = gen_helper_gvec_sqadd_s,
4512           .opt_opc = vecop_list,
4513           .write_aofs = true,
4514           .vece = MO_32 },
4515         { .fniv = gen_sqadd_vec,
4516           .fno = gen_helper_gvec_sqadd_d,
4517           .opt_opc = vecop_list,
4518           .write_aofs = true,
4519           .vece = MO_64 },
4520     };
4521     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4522                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4523 }
4524
4525 static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4526                           TCGv_vec a, TCGv_vec b)
4527 {
4528     TCGv_vec x = tcg_temp_new_vec_matching(t);
4529     tcg_gen_sub_vec(vece, x, a, b);
4530     tcg_gen_ussub_vec(vece, t, a, b);
4531     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4532     tcg_gen_or_vec(vece, sat, sat, x);
4533     tcg_temp_free_vec(x);
4534 }
4535
4536 void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4537                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4538 {
4539     static const TCGOpcode vecop_list[] = {
4540         INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4541     };
4542     static const GVecGen4 ops[4] = {
4543         { .fniv = gen_uqsub_vec,
4544           .fno = gen_helper_gvec_uqsub_b,
4545           .opt_opc = vecop_list,
4546           .write_aofs = true,
4547           .vece = MO_8 },
4548         { .fniv = gen_uqsub_vec,
4549           .fno = gen_helper_gvec_uqsub_h,
4550           .opt_opc = vecop_list,
4551           .write_aofs = true,
4552           .vece = MO_16 },
4553         { .fniv = gen_uqsub_vec,
4554           .fno = gen_helper_gvec_uqsub_s,
4555           .opt_opc = vecop_list,
4556           .write_aofs = true,
4557           .vece = MO_32 },
4558         { .fniv = gen_uqsub_vec,
4559           .fno = gen_helper_gvec_uqsub_d,
4560           .opt_opc = vecop_list,
4561           .write_aofs = true,
4562           .vece = MO_64 },
4563     };
4564     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4565                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4566 }
4567
4568 static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4569                           TCGv_vec a, TCGv_vec b)
4570 {
4571     TCGv_vec x = tcg_temp_new_vec_matching(t);
4572     tcg_gen_sub_vec(vece, x, a, b);
4573     tcg_gen_sssub_vec(vece, t, a, b);
4574     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4575     tcg_gen_or_vec(vece, sat, sat, x);
4576     tcg_temp_free_vec(x);
4577 }
4578
4579 void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4580                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4581 {
4582     static const TCGOpcode vecop_list[] = {
4583         INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4584     };
4585     static const GVecGen4 ops[4] = {
4586         { .fniv = gen_sqsub_vec,
4587           .fno = gen_helper_gvec_sqsub_b,
4588           .opt_opc = vecop_list,
4589           .write_aofs = true,
4590           .vece = MO_8 },
4591         { .fniv = gen_sqsub_vec,
4592           .fno = gen_helper_gvec_sqsub_h,
4593           .opt_opc = vecop_list,
4594           .write_aofs = true,
4595           .vece = MO_16 },
4596         { .fniv = gen_sqsub_vec,
4597           .fno = gen_helper_gvec_sqsub_s,
4598           .opt_opc = vecop_list,
4599           .write_aofs = true,
4600           .vece = MO_32 },
4601         { .fniv = gen_sqsub_vec,
4602           .fno = gen_helper_gvec_sqsub_d,
4603           .opt_opc = vecop_list,
4604           .write_aofs = true,
4605           .vece = MO_64 },
4606     };
4607     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4608                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4609 }
4610
4611 static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4612 {
4613     TCGv_i32 t = tcg_temp_new_i32();
4614
4615     tcg_gen_sub_i32(t, a, b);
4616     tcg_gen_sub_i32(d, b, a);
4617     tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
4618     tcg_temp_free_i32(t);
4619 }
4620
4621 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4622 {
4623     TCGv_i64 t = tcg_temp_new_i64();
4624
4625     tcg_gen_sub_i64(t, a, b);
4626     tcg_gen_sub_i64(d, b, a);
4627     tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
4628     tcg_temp_free_i64(t);
4629 }
4630
4631 static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4632 {
4633     TCGv_vec t = tcg_temp_new_vec_matching(d);
4634
4635     tcg_gen_smin_vec(vece, t, a, b);
4636     tcg_gen_smax_vec(vece, d, a, b);
4637     tcg_gen_sub_vec(vece, d, d, t);
4638     tcg_temp_free_vec(t);
4639 }
4640
4641 void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4642                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4643 {
4644     static const TCGOpcode vecop_list[] = {
4645         INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4646     };
4647     static const GVecGen3 ops[4] = {
4648         { .fniv = gen_sabd_vec,
4649           .fno = gen_helper_gvec_sabd_b,
4650           .opt_opc = vecop_list,
4651           .vece = MO_8 },
4652         { .fniv = gen_sabd_vec,
4653           .fno = gen_helper_gvec_sabd_h,
4654           .opt_opc = vecop_list,
4655           .vece = MO_16 },
4656         { .fni4 = gen_sabd_i32,
4657           .fniv = gen_sabd_vec,
4658           .fno = gen_helper_gvec_sabd_s,
4659           .opt_opc = vecop_list,
4660           .vece = MO_32 },
4661         { .fni8 = gen_sabd_i64,
4662           .fniv = gen_sabd_vec,
4663           .fno = gen_helper_gvec_sabd_d,
4664           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4665           .opt_opc = vecop_list,
4666           .vece = MO_64 },
4667     };
4668     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4669 }
4670
4671 static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4672 {
4673     TCGv_i32 t = tcg_temp_new_i32();
4674
4675     tcg_gen_sub_i32(t, a, b);
4676     tcg_gen_sub_i32(d, b, a);
4677     tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
4678     tcg_temp_free_i32(t);
4679 }
4680
4681 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4682 {
4683     TCGv_i64 t = tcg_temp_new_i64();
4684
4685     tcg_gen_sub_i64(t, a, b);
4686     tcg_gen_sub_i64(d, b, a);
4687     tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
4688     tcg_temp_free_i64(t);
4689 }
4690
4691 static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4692 {
4693     TCGv_vec t = tcg_temp_new_vec_matching(d);
4694
4695     tcg_gen_umin_vec(vece, t, a, b);
4696     tcg_gen_umax_vec(vece, d, a, b);
4697     tcg_gen_sub_vec(vece, d, d, t);
4698     tcg_temp_free_vec(t);
4699 }
4700
4701 void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4702                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4703 {
4704     static const TCGOpcode vecop_list[] = {
4705         INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4706     };
4707     static const GVecGen3 ops[4] = {
4708         { .fniv = gen_uabd_vec,
4709           .fno = gen_helper_gvec_uabd_b,
4710           .opt_opc = vecop_list,
4711           .vece = MO_8 },
4712         { .fniv = gen_uabd_vec,
4713           .fno = gen_helper_gvec_uabd_h,
4714           .opt_opc = vecop_list,
4715           .vece = MO_16 },
4716         { .fni4 = gen_uabd_i32,
4717           .fniv = gen_uabd_vec,
4718           .fno = gen_helper_gvec_uabd_s,
4719           .opt_opc = vecop_list,
4720           .vece = MO_32 },
4721         { .fni8 = gen_uabd_i64,
4722           .fniv = gen_uabd_vec,
4723           .fno = gen_helper_gvec_uabd_d,
4724           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4725           .opt_opc = vecop_list,
4726           .vece = MO_64 },
4727     };
4728     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4729 }
4730
4731 static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4732 {
4733     TCGv_i32 t = tcg_temp_new_i32();
4734     gen_sabd_i32(t, a, b);
4735     tcg_gen_add_i32(d, d, t);
4736     tcg_temp_free_i32(t);
4737 }
4738
4739 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4740 {
4741     TCGv_i64 t = tcg_temp_new_i64();
4742     gen_sabd_i64(t, a, b);
4743     tcg_gen_add_i64(d, d, t);
4744     tcg_temp_free_i64(t);
4745 }
4746
4747 static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4748 {
4749     TCGv_vec t = tcg_temp_new_vec_matching(d);
4750     gen_sabd_vec(vece, t, a, b);
4751     tcg_gen_add_vec(vece, d, d, t);
4752     tcg_temp_free_vec(t);
4753 }
4754
4755 void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4756                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4757 {
4758     static const TCGOpcode vecop_list[] = {
4759         INDEX_op_sub_vec, INDEX_op_add_vec,
4760         INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4761     };
4762     static const GVecGen3 ops[4] = {
4763         { .fniv = gen_saba_vec,
4764           .fno = gen_helper_gvec_saba_b,
4765           .opt_opc = vecop_list,
4766           .load_dest = true,
4767           .vece = MO_8 },
4768         { .fniv = gen_saba_vec,
4769           .fno = gen_helper_gvec_saba_h,
4770           .opt_opc = vecop_list,
4771           .load_dest = true,
4772           .vece = MO_16 },
4773         { .fni4 = gen_saba_i32,
4774           .fniv = gen_saba_vec,
4775           .fno = gen_helper_gvec_saba_s,
4776           .opt_opc = vecop_list,
4777           .load_dest = true,
4778           .vece = MO_32 },
4779         { .fni8 = gen_saba_i64,
4780           .fniv = gen_saba_vec,
4781           .fno = gen_helper_gvec_saba_d,
4782           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4783           .opt_opc = vecop_list,
4784           .load_dest = true,
4785           .vece = MO_64 },
4786     };
4787     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4788 }
4789
4790 static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4791 {
4792     TCGv_i32 t = tcg_temp_new_i32();
4793     gen_uabd_i32(t, a, b);
4794     tcg_gen_add_i32(d, d, t);
4795     tcg_temp_free_i32(t);
4796 }
4797
4798 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4799 {
4800     TCGv_i64 t = tcg_temp_new_i64();
4801     gen_uabd_i64(t, a, b);
4802     tcg_gen_add_i64(d, d, t);
4803     tcg_temp_free_i64(t);
4804 }
4805
4806 static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4807 {
4808     TCGv_vec t = tcg_temp_new_vec_matching(d);
4809     gen_uabd_vec(vece, t, a, b);
4810     tcg_gen_add_vec(vece, d, d, t);
4811     tcg_temp_free_vec(t);
4812 }
4813
4814 void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4815                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4816 {
4817     static const TCGOpcode vecop_list[] = {
4818         INDEX_op_sub_vec, INDEX_op_add_vec,
4819         INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4820     };
4821     static const GVecGen3 ops[4] = {
4822         { .fniv = gen_uaba_vec,
4823           .fno = gen_helper_gvec_uaba_b,
4824           .opt_opc = vecop_list,
4825           .load_dest = true,
4826           .vece = MO_8 },
4827         { .fniv = gen_uaba_vec,
4828           .fno = gen_helper_gvec_uaba_h,
4829           .opt_opc = vecop_list,
4830           .load_dest = true,
4831           .vece = MO_16 },
4832         { .fni4 = gen_uaba_i32,
4833           .fniv = gen_uaba_vec,
4834           .fno = gen_helper_gvec_uaba_s,
4835           .opt_opc = vecop_list,
4836           .load_dest = true,
4837           .vece = MO_32 },
4838         { .fni8 = gen_uaba_i64,
4839           .fniv = gen_uaba_vec,
4840           .fno = gen_helper_gvec_uaba_d,
4841           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4842           .opt_opc = vecop_list,
4843           .load_dest = true,
4844           .vece = MO_64 },
4845     };
4846     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4847 }
4848
4849 /* Translate a NEON data processing instruction.  Return nonzero if the
4850    instruction is invalid.
4851    We process data in a mixture of 32-bit and 64-bit chunks.
4852    Mostly we use 32-bit chunks so we can use normal scalar instructions.  */
4853
4854 static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
4855 {
4856     int op;
4857     int q;
4858     int rd, rm;
4859     int size;
4860     int pass;
4861     int u;
4862     TCGv_i32 tmp, tmp2;
4863
4864     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
4865         return 1;
4866     }
4867
4868     /* FIXME: this access check should not take precedence over UNDEF
4869      * for invalid encodings; we will generate incorrect syndrome information
4870      * for attempts to execute invalid vfp/neon encodings with FP disabled.
4871      */
4872     if (s->fp_excp_el) {
4873         gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
4874                            syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
4875         return 0;
4876     }
4877
4878     if (!s->vfp_enabled)
4879       return 1;
4880     q = (insn & (1 << 6)) != 0;
4881     u = (insn >> 24) & 1;
4882     VFP_DREG_D(rd, insn);
4883     VFP_DREG_M(rm, insn);
4884     size = (insn >> 20) & 3;
4885
4886     if ((insn & (1 << 23)) == 0) {
4887         /* Three register same length: handled by decodetree */
4888         return 1;
4889     } else if (insn & (1 << 4)) {
4890         /* Two registers and shift or reg and imm: handled by decodetree */
4891         return 1;
4892     } else { /* (insn & 0x00800010 == 0x00800000) */
4893         if (size != 3) {
4894             /*
4895              * Three registers of different lengths, or two registers and
4896              * a scalar: handled by decodetree
4897              */
4898             return 1;
4899         } else { /* size == 3 */
4900             if (!u) {
4901                 /* Extract: handled by decodetree */
4902                 return 1;
4903             } else if ((insn & (1 << 11)) == 0) {
4904                 /* Two register misc.  */
4905                 op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf);
4906                 size = (insn >> 18) & 3;
4907                 /* UNDEF for unknown op values and bad op-size combinations */
4908                 if ((neon_2rm_sizes[op] & (1 << size)) == 0) {
4909                     return 1;
4910                 }
4911                 if (neon_2rm_is_v8_op(op) &&
4912                     !arm_dc_feature(s, ARM_FEATURE_V8)) {
4913                     return 1;
4914                 }
4915                 if (q && ((rm | rd) & 1)) {
4916                     return 1;
4917                 }
4918                 switch (op) {
4919                 case NEON_2RM_VREV64:
4920                 case NEON_2RM_VPADDL: case NEON_2RM_VPADDL_U:
4921                 case NEON_2RM_VPADAL: case NEON_2RM_VPADAL_U:
4922                 case NEON_2RM_VUZP:
4923                 case NEON_2RM_VZIP:
4924                 case NEON_2RM_VMOVN: case NEON_2RM_VQMOVN:
4925                 case NEON_2RM_VSHLL:
4926                 case NEON_2RM_VCVT_F16_F32:
4927                 case NEON_2RM_VCVT_F32_F16:
4928                 case NEON_2RM_VMVN:
4929                 case NEON_2RM_VNEG:
4930                 case NEON_2RM_VABS:
4931                 case NEON_2RM_VCEQ0:
4932                 case NEON_2RM_VCGT0:
4933                 case NEON_2RM_VCLE0:
4934                 case NEON_2RM_VCGE0:
4935                 case NEON_2RM_VCLT0:
4936                 case NEON_2RM_AESE: case NEON_2RM_AESMC:
4937                 case NEON_2RM_SHA1H:
4938                 case NEON_2RM_SHA1SU1:
4939                     /* handled by decodetree */
4940                     return 1;
4941                 case NEON_2RM_VTRN:
4942                     if (size == 2) {
4943                         int n;
4944                         for (n = 0; n < (q ? 4 : 2); n += 2) {
4945                             tmp = neon_load_reg(rm, n);
4946                             tmp2 = neon_load_reg(rd, n + 1);
4947                             neon_store_reg(rm, n, tmp2);
4948                             neon_store_reg(rd, n + 1, tmp);
4949                         }
4950                     } else {
4951                         goto elementwise;
4952                     }
4953                     break;
4954
4955                 default:
4956                 elementwise:
4957                     for (pass = 0; pass < (q ? 4 : 2); pass++) {
4958                         tmp = neon_load_reg(rm, pass);
4959                         switch (op) {
4960                         case NEON_2RM_VREV32:
4961                             switch (size) {
4962                             case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
4963                             case 1: gen_swap_half(tmp); break;
4964                             default: abort();
4965                             }
4966                             break;
4967                         case NEON_2RM_VREV16:
4968                             gen_rev16(tmp, tmp);
4969                             break;
4970                         case NEON_2RM_VCLS:
4971                             switch (size) {
4972                             case 0: gen_helper_neon_cls_s8(tmp, tmp); break;
4973                             case 1: gen_helper_neon_cls_s16(tmp, tmp); break;
4974                             case 2: gen_helper_neon_cls_s32(tmp, tmp); break;
4975                             default: abort();
4976                             }
4977                             break;
4978                         case NEON_2RM_VCLZ:
4979                             switch (size) {
4980                             case 0: gen_helper_neon_clz_u8(tmp, tmp); break;
4981                             case 1: gen_helper_neon_clz_u16(tmp, tmp); break;
4982                             case 2: tcg_gen_clzi_i32(tmp, tmp, 32); break;
4983                             default: abort();
4984                             }
4985                             break;
4986                         case NEON_2RM_VCNT:
4987                             gen_helper_neon_cnt_u8(tmp, tmp);
4988                             break;
4989                         case NEON_2RM_VQABS:
4990                             switch (size) {
4991                             case 0:
4992                                 gen_helper_neon_qabs_s8(tmp, cpu_env, tmp);
4993                                 break;
4994                             case 1:
4995                                 gen_helper_neon_qabs_s16(tmp, cpu_env, tmp);
4996                                 break;
4997                             case 2:
4998                                 gen_helper_neon_qabs_s32(tmp, cpu_env, tmp);
4999                                 break;
5000                             default: abort();
5001                             }
5002                             break;
5003                         case NEON_2RM_VQNEG:
5004                             switch (size) {
5005                             case 0:
5006                                 gen_helper_neon_qneg_s8(tmp, cpu_env, tmp);
5007                                 break;
5008                             case 1:
5009                                 gen_helper_neon_qneg_s16(tmp, cpu_env, tmp);
5010                                 break;
5011                             case 2:
5012                                 gen_helper_neon_qneg_s32(tmp, cpu_env, tmp);
5013                                 break;
5014                             default: abort();
5015                             }
5016                             break;
5017                         case NEON_2RM_VCGT0_F:
5018                         {
5019                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5020                             tmp2 = tcg_const_i32(0);
5021                             gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
5022                             tcg_temp_free_i32(tmp2);
5023                             tcg_temp_free_ptr(fpstatus);
5024                             break;
5025                         }
5026                         case NEON_2RM_VCGE0_F:
5027                         {
5028                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5029                             tmp2 = tcg_const_i32(0);
5030                             gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
5031                             tcg_temp_free_i32(tmp2);
5032                             tcg_temp_free_ptr(fpstatus);
5033                             break;
5034                         }
5035                         case NEON_2RM_VCEQ0_F:
5036                         {
5037                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5038                             tmp2 = tcg_const_i32(0);
5039                             gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
5040                             tcg_temp_free_i32(tmp2);
5041                             tcg_temp_free_ptr(fpstatus);
5042                             break;
5043                         }
5044                         case NEON_2RM_VCLE0_F:
5045                         {
5046                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5047                             tmp2 = tcg_const_i32(0);
5048                             gen_helper_neon_cge_f32(tmp, tmp2, tmp, fpstatus);
5049                             tcg_temp_free_i32(tmp2);
5050                             tcg_temp_free_ptr(fpstatus);
5051                             break;
5052                         }
5053                         case NEON_2RM_VCLT0_F:
5054                         {
5055                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5056                             tmp2 = tcg_const_i32(0);
5057                             gen_helper_neon_cgt_f32(tmp, tmp2, tmp, fpstatus);
5058                             tcg_temp_free_i32(tmp2);
5059                             tcg_temp_free_ptr(fpstatus);
5060                             break;
5061                         }
5062                         case NEON_2RM_VABS_F:
5063                             gen_helper_vfp_abss(tmp, tmp);
5064                             break;
5065                         case NEON_2RM_VNEG_F:
5066                             gen_helper_vfp_negs(tmp, tmp);
5067                             break;
5068                         case NEON_2RM_VSWP:
5069                             tmp2 = neon_load_reg(rd, pass);
5070                             neon_store_reg(rm, pass, tmp2);
5071                             break;
5072                         case NEON_2RM_VTRN:
5073                             tmp2 = neon_load_reg(rd, pass);
5074                             switch (size) {
5075                             case 0: gen_neon_trn_u8(tmp, tmp2); break;
5076                             case 1: gen_neon_trn_u16(tmp, tmp2); break;
5077                             default: abort();
5078                             }
5079                             neon_store_reg(rm, pass, tmp2);
5080                             break;
5081                         case NEON_2RM_VRINTN:
5082                         case NEON_2RM_VRINTA:
5083                         case NEON_2RM_VRINTM:
5084                         case NEON_2RM_VRINTP:
5085                         case NEON_2RM_VRINTZ:
5086                         {
5087                             TCGv_i32 tcg_rmode;
5088                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5089                             int rmode;
5090
5091                             if (op == NEON_2RM_VRINTZ) {
5092                                 rmode = FPROUNDING_ZERO;
5093                             } else {
5094                                 rmode = fp_decode_rm[((op & 0x6) >> 1) ^ 1];
5095                             }
5096
5097                             tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
5098                             gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
5099                                                       cpu_env);
5100                             gen_helper_rints(tmp, tmp, fpstatus);
5101                             gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
5102                                                       cpu_env);
5103                             tcg_temp_free_ptr(fpstatus);
5104                             tcg_temp_free_i32(tcg_rmode);
5105                             break;
5106                         }
5107                         case NEON_2RM_VRINTX:
5108                         {
5109                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5110                             gen_helper_rints_exact(tmp, tmp, fpstatus);
5111                             tcg_temp_free_ptr(fpstatus);
5112                             break;
5113                         }
5114                         case NEON_2RM_VCVTAU:
5115                         case NEON_2RM_VCVTAS:
5116                         case NEON_2RM_VCVTNU:
5117                         case NEON_2RM_VCVTNS:
5118                         case NEON_2RM_VCVTPU:
5119                         case NEON_2RM_VCVTPS:
5120                         case NEON_2RM_VCVTMU:
5121                         case NEON_2RM_VCVTMS:
5122                         {
5123                             bool is_signed = !extract32(insn, 7, 1);
5124                             TCGv_ptr fpst = get_fpstatus_ptr(1);
5125                             TCGv_i32 tcg_rmode, tcg_shift;
5126                             int rmode = fp_decode_rm[extract32(insn, 8, 2)];
5127
5128                             tcg_shift = tcg_const_i32(0);
5129                             tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
5130                             gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
5131                                                       cpu_env);
5132
5133                             if (is_signed) {
5134                                 gen_helper_vfp_tosls(tmp, tmp,
5135                                                      tcg_shift, fpst);
5136                             } else {
5137                                 gen_helper_vfp_touls(tmp, tmp,
5138                                                      tcg_shift, fpst);
5139                             }
5140
5141                             gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
5142                                                       cpu_env);
5143                             tcg_temp_free_i32(tcg_rmode);
5144                             tcg_temp_free_i32(tcg_shift);
5145                             tcg_temp_free_ptr(fpst);
5146                             break;
5147                         }
5148                         case NEON_2RM_VRECPE:
5149                             gen_helper_recpe_u32(tmp, tmp);
5150                             break;
5151                         case NEON_2RM_VRSQRTE:
5152                             gen_helper_rsqrte_u32(tmp, tmp);
5153                             break;
5154                         case NEON_2RM_VRECPE_F:
5155                         {
5156                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5157                             gen_helper_recpe_f32(tmp, tmp, fpstatus);
5158                             tcg_temp_free_ptr(fpstatus);
5159                             break;
5160                         }
5161                         case NEON_2RM_VRSQRTE_F:
5162                         {
5163                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5164                             gen_helper_rsqrte_f32(tmp, tmp, fpstatus);
5165                             tcg_temp_free_ptr(fpstatus);
5166                             break;
5167                         }
5168                         case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */
5169                         {
5170                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5171                             gen_helper_vfp_sitos(tmp, tmp, fpstatus);
5172                             tcg_temp_free_ptr(fpstatus);
5173                             break;
5174                         }
5175                         case NEON_2RM_VCVT_FU: /* VCVT.F32.U32 */
5176                         {
5177                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5178                             gen_helper_vfp_uitos(tmp, tmp, fpstatus);
5179                             tcg_temp_free_ptr(fpstatus);
5180                             break;
5181                         }
5182                         case NEON_2RM_VCVT_SF: /* VCVT.S32.F32 */
5183                         {
5184                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5185                             gen_helper_vfp_tosizs(tmp, tmp, fpstatus);
5186                             tcg_temp_free_ptr(fpstatus);
5187                             break;
5188                         }
5189                         case NEON_2RM_VCVT_UF: /* VCVT.U32.F32 */
5190                         {
5191                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5192                             gen_helper_vfp_touizs(tmp, tmp, fpstatus);
5193                             tcg_temp_free_ptr(fpstatus);
5194                             break;
5195                         }
5196                         default:
5197                             /* Reserved op values were caught by the
5198                              * neon_2rm_sizes[] check earlier.
5199                              */
5200                             abort();
5201                         }
5202                         neon_store_reg(rd, pass, tmp);
5203                     }
5204                     break;
5205                 }
5206             } else {
5207                 /* VTBL, VTBX, VDUP: handled by decodetree */
5208                 return 1;
5209             }
5210         }
5211     }
5212     return 0;
5213 }
5214
5215 static int disas_coproc_insn(DisasContext *s, uint32_t insn)
5216 {
5217     int cpnum, is64, crn, crm, opc1, opc2, isread, rt, rt2;
5218     const ARMCPRegInfo *ri;
5219
5220     cpnum = (insn >> 8) & 0xf;
5221
5222     /* First check for coprocessor space used for XScale/iwMMXt insns */
5223     if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cpnum < 2)) {
5224         if (extract32(s->c15_cpar, cpnum, 1) == 0) {
5225             return 1;
5226         }
5227         if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
5228             return disas_iwmmxt_insn(s, insn);
5229         } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
5230             return disas_dsp_insn(s, insn);
5231         }
5232         return 1;
5233     }
5234
5235     /* Otherwise treat as a generic register access */
5236     is64 = (insn & (1 << 25)) == 0;
5237     if (!is64 && ((insn & (1 << 4)) == 0)) {
5238         /* cdp */
5239         return 1;
5240     }
5241
5242     crm = insn & 0xf;
5243     if (is64) {
5244         crn = 0;
5245         opc1 = (insn >> 4) & 0xf;
5246         opc2 = 0;
5247         rt2 = (insn >> 16) & 0xf;
5248     } else {
5249         crn = (insn >> 16) & 0xf;
5250         opc1 = (insn >> 21) & 7;
5251         opc2 = (insn >> 5) & 7;
5252         rt2 = 0;
5253     }
5254     isread = (insn >> 20) & 1;
5255     rt = (insn >> 12) & 0xf;
5256
5257     ri = get_arm_cp_reginfo(s->cp_regs,
5258             ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2));
5259     if (ri) {
5260         bool need_exit_tb;
5261
5262         /* Check access permissions */
5263         if (!cp_access_ok(s->current_el, ri, isread)) {
5264             return 1;
5265         }
5266
5267         if (s->hstr_active || ri->accessfn ||
5268             (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
5269             /* Emit code to perform further access permissions checks at
5270              * runtime; this may result in an exception.
5271              * Note that on XScale all cp0..c13 registers do an access check
5272              * call in order to handle c15_cpar.
5273              */
5274             TCGv_ptr tmpptr;
5275             TCGv_i32 tcg_syn, tcg_isread;
5276             uint32_t syndrome;
5277
5278             /* Note that since we are an implementation which takes an
5279              * exception on a trapped conditional instruction only if the
5280              * instruction passes its condition code check, we can take
5281              * advantage of the clause in the ARM ARM that allows us to set
5282              * the COND field in the instruction to 0xE in all cases.
5283              * We could fish the actual condition out of the insn (ARM)
5284              * or the condexec bits (Thumb) but it isn't necessary.
5285              */
5286             switch (cpnum) {
5287             case 14:
5288                 if (is64) {
5289                     syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
5290                                                  isread, false);
5291                 } else {
5292                     syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
5293                                                 rt, isread, false);
5294                 }
5295                 break;
5296             case 15:
5297                 if (is64) {
5298                     syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
5299                                                  isread, false);
5300                 } else {
5301                     syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
5302                                                 rt, isread, false);
5303                 }
5304                 break;
5305             default:
5306                 /* ARMv8 defines that only coprocessors 14 and 15 exist,
5307                  * so this can only happen if this is an ARMv7 or earlier CPU,
5308                  * in which case the syndrome information won't actually be
5309                  * guest visible.
5310                  */
5311                 assert(!arm_dc_feature(s, ARM_FEATURE_V8));
5312                 syndrome = syn_uncategorized();
5313                 break;
5314             }
5315
5316             gen_set_condexec(s);
5317             gen_set_pc_im(s, s->pc_curr);
5318             tmpptr = tcg_const_ptr(ri);
5319             tcg_syn = tcg_const_i32(syndrome);
5320             tcg_isread = tcg_const_i32(isread);
5321             gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn,
5322                                            tcg_isread);
5323             tcg_temp_free_ptr(tmpptr);
5324             tcg_temp_free_i32(tcg_syn);
5325             tcg_temp_free_i32(tcg_isread);
5326         } else if (ri->type & ARM_CP_RAISES_EXC) {
5327             /*
5328              * The readfn or writefn might raise an exception;
5329              * synchronize the CPU state in case it does.
5330              */
5331             gen_set_condexec(s);
5332             gen_set_pc_im(s, s->pc_curr);
5333         }
5334
5335         /* Handle special cases first */
5336         switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
5337         case ARM_CP_NOP:
5338             return 0;
5339         case ARM_CP_WFI:
5340             if (isread) {
5341                 return 1;
5342             }
5343             gen_set_pc_im(s, s->base.pc_next);
5344             s->base.is_jmp = DISAS_WFI;
5345             return 0;
5346         default:
5347             break;
5348         }
5349
5350         if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
5351             gen_io_start();
5352         }
5353
5354         if (isread) {
5355             /* Read */
5356             if (is64) {
5357                 TCGv_i64 tmp64;
5358                 TCGv_i32 tmp;
5359                 if (ri->type & ARM_CP_CONST) {
5360                     tmp64 = tcg_const_i64(ri->resetvalue);
5361                 } else if (ri->readfn) {
5362                     TCGv_ptr tmpptr;
5363                     tmp64 = tcg_temp_new_i64();
5364                     tmpptr = tcg_const_ptr(ri);
5365                     gen_helper_get_cp_reg64(tmp64, cpu_env, tmpptr);
5366                     tcg_temp_free_ptr(tmpptr);
5367                 } else {
5368                     tmp64 = tcg_temp_new_i64();
5369                     tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
5370                 }
5371                 tmp = tcg_temp_new_i32();
5372                 tcg_gen_extrl_i64_i32(tmp, tmp64);
5373                 store_reg(s, rt, tmp);
5374                 tmp = tcg_temp_new_i32();
5375                 tcg_gen_extrh_i64_i32(tmp, tmp64);
5376                 tcg_temp_free_i64(tmp64);
5377                 store_reg(s, rt2, tmp);
5378             } else {
5379                 TCGv_i32 tmp;
5380                 if (ri->type & ARM_CP_CONST) {
5381                     tmp = tcg_const_i32(ri->resetvalue);
5382                 } else if (ri->readfn) {
5383                     TCGv_ptr tmpptr;
5384                     tmp = tcg_temp_new_i32();
5385                     tmpptr = tcg_const_ptr(ri);
5386                     gen_helper_get_cp_reg(tmp, cpu_env, tmpptr);
5387                     tcg_temp_free_ptr(tmpptr);
5388                 } else {
5389                     tmp = load_cpu_offset(ri->fieldoffset);
5390                 }
5391                 if (rt == 15) {
5392                     /* Destination register of r15 for 32 bit loads sets
5393                      * the condition codes from the high 4 bits of the value
5394                      */
5395                     gen_set_nzcv(tmp);
5396                     tcg_temp_free_i32(tmp);
5397                 } else {
5398                     store_reg(s, rt, tmp);
5399                 }
5400             }
5401         } else {
5402             /* Write */
5403             if (ri->type & ARM_CP_CONST) {
5404                 /* If not forbidden by access permissions, treat as WI */
5405                 return 0;
5406             }
5407
5408             if (is64) {
5409                 TCGv_i32 tmplo, tmphi;
5410                 TCGv_i64 tmp64 = tcg_temp_new_i64();
5411                 tmplo = load_reg(s, rt);
5412                 tmphi = load_reg(s, rt2);
5413                 tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
5414                 tcg_temp_free_i32(tmplo);
5415                 tcg_temp_free_i32(tmphi);
5416                 if (ri->writefn) {
5417                     TCGv_ptr tmpptr = tcg_const_ptr(ri);
5418                     gen_helper_set_cp_reg64(cpu_env, tmpptr, tmp64);
5419                     tcg_temp_free_ptr(tmpptr);
5420                 } else {
5421                     tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
5422                 }
5423                 tcg_temp_free_i64(tmp64);
5424             } else {
5425                 if (ri->writefn) {
5426                     TCGv_i32 tmp;
5427                     TCGv_ptr tmpptr;
5428                     tmp = load_reg(s, rt);
5429                     tmpptr = tcg_const_ptr(ri);
5430                     gen_helper_set_cp_reg(cpu_env, tmpptr, tmp);
5431                     tcg_temp_free_ptr(tmpptr);
5432                     tcg_temp_free_i32(tmp);
5433                 } else {
5434                     TCGv_i32 tmp = load_reg(s, rt);
5435                     store_cpu_offset(tmp, ri->fieldoffset);
5436                 }
5437             }
5438         }
5439
5440         /* I/O operations must end the TB here (whether read or write) */
5441         need_exit_tb = ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) &&
5442                         (ri->type & ARM_CP_IO));
5443
5444         if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
5445             /*
5446              * A write to any coprocessor register that ends a TB
5447              * must rebuild the hflags for the next TB.
5448              */
5449             TCGv_i32 tcg_el = tcg_const_i32(s->current_el);
5450             if (arm_dc_feature(s, ARM_FEATURE_M)) {
5451                 gen_helper_rebuild_hflags_m32(cpu_env, tcg_el);
5452             } else {
5453                 if (ri->type & ARM_CP_NEWEL) {
5454                     gen_helper_rebuild_hflags_a32_newel(cpu_env);
5455                 } else {
5456                     gen_helper_rebuild_hflags_a32(cpu_env, tcg_el);
5457                 }
5458             }
5459             tcg_temp_free_i32(tcg_el);
5460             /*
5461              * We default to ending the TB on a coprocessor register write,
5462              * but allow this to be suppressed by the register definition
5463              * (usually only necessary to work around guest bugs).
5464              */
5465             need_exit_tb = true;
5466         }
5467         if (need_exit_tb) {
5468             gen_lookup_tb(s);
5469         }
5470
5471         return 0;
5472     }
5473
5474     /* Unknown register; this might be a guest error or a QEMU
5475      * unimplemented feature.
5476      */
5477     if (is64) {
5478         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
5479                       "64 bit system register cp:%d opc1: %d crm:%d "
5480                       "(%s)\n",
5481                       isread ? "read" : "write", cpnum, opc1, crm,
5482                       s->ns ? "non-secure" : "secure");
5483     } else {
5484         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
5485                       "system register cp:%d opc1:%d crn:%d crm:%d opc2:%d "
5486                       "(%s)\n",
5487                       isread ? "read" : "write", cpnum, opc1, crn, crm, opc2,
5488                       s->ns ? "non-secure" : "secure");
5489     }
5490
5491     return 1;
5492 }
5493
5494
5495 /* Store a 64-bit value to a register pair.  Clobbers val.  */
5496 static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
5497 {
5498     TCGv_i32 tmp;
5499     tmp = tcg_temp_new_i32();
5500     tcg_gen_extrl_i64_i32(tmp, val);
5501     store_reg(s, rlow, tmp);
5502     tmp = tcg_temp_new_i32();
5503     tcg_gen_extrh_i64_i32(tmp, val);
5504     store_reg(s, rhigh, tmp);
5505 }
5506
5507 /* load and add a 64-bit value from a register pair.  */
5508 static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
5509 {
5510     TCGv_i64 tmp;
5511     TCGv_i32 tmpl;
5512     TCGv_i32 tmph;
5513
5514     /* Load 64-bit value rd:rn.  */
5515     tmpl = load_reg(s, rlow);
5516     tmph = load_reg(s, rhigh);
5517     tmp = tcg_temp_new_i64();
5518     tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
5519     tcg_temp_free_i32(tmpl);
5520     tcg_temp_free_i32(tmph);
5521     tcg_gen_add_i64(val, val, tmp);
5522     tcg_temp_free_i64(tmp);
5523 }
5524
5525 /* Set N and Z flags from hi|lo.  */
5526 static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
5527 {
5528     tcg_gen_mov_i32(cpu_NF, hi);
5529     tcg_gen_or_i32(cpu_ZF, lo, hi);
5530 }
5531
5532 /* Load/Store exclusive instructions are implemented by remembering
5533    the value/address loaded, and seeing if these are the same
5534    when the store is performed.  This should be sufficient to implement
5535    the architecturally mandated semantics, and avoids having to monitor
5536    regular stores.  The compare vs the remembered value is done during
5537    the cmpxchg operation, but we must compare the addresses manually.  */
5538 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
5539                                TCGv_i32 addr, int size)
5540 {
5541     TCGv_i32 tmp = tcg_temp_new_i32();
5542     MemOp opc = size | MO_ALIGN | s->be_data;
5543
5544     s->is_ldex = true;
5545
5546     if (size == 3) {
5547         TCGv_i32 tmp2 = tcg_temp_new_i32();
5548         TCGv_i64 t64 = tcg_temp_new_i64();
5549
5550         /* For AArch32, architecturally the 32-bit word at the lowest
5551          * address is always Rt and the one at addr+4 is Rt2, even if
5552          * the CPU is big-endian. That means we don't want to do a
5553          * gen_aa32_ld_i64(), which invokes gen_aa32_frob64() as if
5554          * for an architecturally 64-bit access, but instead do a
5555          * 64-bit access using MO_BE if appropriate and then split
5556          * the two halves.
5557          * This only makes a difference for BE32 user-mode, where
5558          * frob64() must not flip the two halves of the 64-bit data
5559          * but this code must treat BE32 user-mode like BE32 system.
5560          */
5561         TCGv taddr = gen_aa32_addr(s, addr, opc);
5562
5563         tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
5564         tcg_temp_free(taddr);
5565         tcg_gen_mov_i64(cpu_exclusive_val, t64);
5566         if (s->be_data == MO_BE) {
5567             tcg_gen_extr_i64_i32(tmp2, tmp, t64);
5568         } else {
5569             tcg_gen_extr_i64_i32(tmp, tmp2, t64);
5570         }
5571         tcg_temp_free_i64(t64);
5572
5573         store_reg(s, rt2, tmp2);
5574     } else {
5575         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
5576         tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
5577     }
5578
5579     store_reg(s, rt, tmp);
5580     tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
5581 }
5582
5583 static void gen_clrex(DisasContext *s)
5584 {
5585     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
5586 }
5587
5588 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
5589                                 TCGv_i32 addr, int size)
5590 {
5591     TCGv_i32 t0, t1, t2;
5592     TCGv_i64 extaddr;
5593     TCGv taddr;
5594     TCGLabel *done_label;
5595     TCGLabel *fail_label;
5596     MemOp opc = size | MO_ALIGN | s->be_data;
5597
5598     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
5599          [addr] = {Rt};
5600          {Rd} = 0;
5601        } else {
5602          {Rd} = 1;
5603        } */
5604     fail_label = gen_new_label();
5605     done_label = gen_new_label();
5606     extaddr = tcg_temp_new_i64();
5607     tcg_gen_extu_i32_i64(extaddr, addr);
5608     tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
5609     tcg_temp_free_i64(extaddr);
5610
5611     taddr = gen_aa32_addr(s, addr, opc);
5612     t0 = tcg_temp_new_i32();
5613     t1 = load_reg(s, rt);
5614     if (size == 3) {
5615         TCGv_i64 o64 = tcg_temp_new_i64();
5616         TCGv_i64 n64 = tcg_temp_new_i64();
5617
5618         t2 = load_reg(s, rt2);
5619         /* For AArch32, architecturally the 32-bit word at the lowest
5620          * address is always Rt and the one at addr+4 is Rt2, even if
5621          * the CPU is big-endian. Since we're going to treat this as a
5622          * single 64-bit BE store, we need to put the two halves in the
5623          * opposite order for BE to LE, so that they end up in the right
5624          * places.
5625          * We don't want gen_aa32_frob64() because that does the wrong
5626          * thing for BE32 usermode.
5627          */
5628         if (s->be_data == MO_BE) {
5629             tcg_gen_concat_i32_i64(n64, t2, t1);
5630         } else {
5631             tcg_gen_concat_i32_i64(n64, t1, t2);
5632         }
5633         tcg_temp_free_i32(t2);
5634
5635         tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
5636                                    get_mem_index(s), opc);
5637         tcg_temp_free_i64(n64);
5638
5639         tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
5640         tcg_gen_extrl_i64_i32(t0, o64);
5641
5642         tcg_temp_free_i64(o64);
5643     } else {
5644         t2 = tcg_temp_new_i32();
5645         tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
5646         tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
5647         tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
5648         tcg_temp_free_i32(t2);
5649     }
5650     tcg_temp_free_i32(t1);
5651     tcg_temp_free(taddr);
5652     tcg_gen_mov_i32(cpu_R[rd], t0);
5653     tcg_temp_free_i32(t0);
5654     tcg_gen_br(done_label);
5655
5656     gen_set_label(fail_label);
5657     tcg_gen_movi_i32(cpu_R[rd], 1);
5658     gen_set_label(done_label);
5659     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
5660 }
5661
5662 /* gen_srs:
5663  * @env: CPUARMState
5664  * @s: DisasContext
5665  * @mode: mode field from insn (which stack to store to)
5666  * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
5667  * @writeback: true if writeback bit set
5668  *
5669  * Generate code for the SRS (Store Return State) insn.
5670  */
5671 static void gen_srs(DisasContext *s,
5672                     uint32_t mode, uint32_t amode, bool writeback)
5673 {
5674     int32_t offset;
5675     TCGv_i32 addr, tmp;
5676     bool undef = false;
5677
5678     /* SRS is:
5679      * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
5680      *   and specified mode is monitor mode
5681      * - UNDEFINED in Hyp mode
5682      * - UNPREDICTABLE in User or System mode
5683      * - UNPREDICTABLE if the specified mode is:
5684      * -- not implemented
5685      * -- not a valid mode number
5686      * -- a mode that's at a higher exception level
5687      * -- Monitor, if we are Non-secure
5688      * For the UNPREDICTABLE cases we choose to UNDEF.
5689      */
5690     if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
5691         gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(), 3);
5692         return;
5693     }
5694
5695     if (s->current_el == 0 || s->current_el == 2) {
5696         undef = true;
5697     }
5698
5699     switch (mode) {
5700     case ARM_CPU_MODE_USR:
5701     case ARM_CPU_MODE_FIQ:
5702     case ARM_CPU_MODE_IRQ:
5703     case ARM_CPU_MODE_SVC:
5704     case ARM_CPU_MODE_ABT:
5705     case ARM_CPU_MODE_UND:
5706     case ARM_CPU_MODE_SYS:
5707         break;
5708     case ARM_CPU_MODE_HYP:
5709         if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
5710             undef = true;
5711         }
5712         break;
5713     case ARM_CPU_MODE_MON:
5714         /* No need to check specifically for "are we non-secure" because
5715          * we've already made EL0 UNDEF and handled the trap for S-EL1;
5716          * so if this isn't EL3 then we must be non-secure.
5717          */
5718         if (s->current_el != 3) {
5719             undef = true;
5720         }
5721         break;
5722     default:
5723         undef = true;
5724     }
5725
5726     if (undef) {
5727         unallocated_encoding(s);
5728         return;
5729     }
5730
5731     addr = tcg_temp_new_i32();
5732     tmp = tcg_const_i32(mode);
5733     /* get_r13_banked() will raise an exception if called from System mode */
5734     gen_set_condexec(s);
5735     gen_set_pc_im(s, s->pc_curr);
5736     gen_helper_get_r13_banked(addr, cpu_env, tmp);
5737     tcg_temp_free_i32(tmp);
5738     switch (amode) {
5739     case 0: /* DA */
5740         offset = -4;
5741         break;
5742     case 1: /* IA */
5743         offset = 0;
5744         break;
5745     case 2: /* DB */
5746         offset = -8;
5747         break;
5748     case 3: /* IB */
5749         offset = 4;
5750         break;
5751     default:
5752         abort();
5753     }
5754     tcg_gen_addi_i32(addr, addr, offset);
5755     tmp = load_reg(s, 14);
5756     gen_aa32_st32(s, tmp, addr, get_mem_index(s));
5757     tcg_temp_free_i32(tmp);
5758     tmp = load_cpu_field(spsr);
5759     tcg_gen_addi_i32(addr, addr, 4);
5760     gen_aa32_st32(s, tmp, addr, get_mem_index(s));
5761     tcg_temp_free_i32(tmp);
5762     if (writeback) {
5763         switch (amode) {
5764         case 0:
5765             offset = -8;
5766             break;
5767         case 1:
5768             offset = 4;
5769             break;
5770         case 2:
5771             offset = -4;
5772             break;
5773         case 3:
5774             offset = 0;
5775             break;
5776         default:
5777             abort();
5778         }
5779         tcg_gen_addi_i32(addr, addr, offset);
5780         tmp = tcg_const_i32(mode);
5781         gen_helper_set_r13_banked(cpu_env, tmp, addr);
5782         tcg_temp_free_i32(tmp);
5783     }
5784     tcg_temp_free_i32(addr);
5785     s->base.is_jmp = DISAS_UPDATE;
5786 }
5787
5788 /* Generate a label used for skipping this instruction */
5789 static void arm_gen_condlabel(DisasContext *s)
5790 {
5791     if (!s->condjmp) {
5792         s->condlabel = gen_new_label();
5793         s->condjmp = 1;
5794     }
5795 }
5796
5797 /* Skip this instruction if the ARM condition is false */
5798 static void arm_skip_unless(DisasContext *s, uint32_t cond)
5799 {
5800     arm_gen_condlabel(s);
5801     arm_gen_test_cc(cond ^ 1, s->condlabel);
5802 }
5803
5804
5805 /*
5806  * Constant expanders for the decoders.
5807  */
5808
5809 static int negate(DisasContext *s, int x)
5810 {
5811     return -x;
5812 }
5813
5814 static int plus_2(DisasContext *s, int x)
5815 {
5816     return x + 2;
5817 }
5818
5819 static int times_2(DisasContext *s, int x)
5820 {
5821     return x * 2;
5822 }
5823
5824 static int times_4(DisasContext *s, int x)
5825 {
5826     return x * 4;
5827 }
5828
5829 /* Return only the rotation part of T32ExpandImm.  */
5830 static int t32_expandimm_rot(DisasContext *s, int x)
5831 {
5832     return x & 0xc00 ? extract32(x, 7, 5) : 0;
5833 }
5834
5835 /* Return the unrotated immediate from T32ExpandImm.  */
5836 static int t32_expandimm_imm(DisasContext *s, int x)
5837 {
5838     int imm = extract32(x, 0, 8);
5839
5840     switch (extract32(x, 8, 4)) {
5841     case 0: /* XY */
5842         /* Nothing to do.  */
5843         break;
5844     case 1: /* 00XY00XY */
5845         imm *= 0x00010001;
5846         break;
5847     case 2: /* XY00XY00 */
5848         imm *= 0x01000100;
5849         break;
5850     case 3: /* XYXYXYXY */
5851         imm *= 0x01010101;
5852         break;
5853     default:
5854         /* Rotated constant.  */
5855         imm |= 0x80;
5856         break;
5857     }
5858     return imm;
5859 }
5860
5861 static int t32_branch24(DisasContext *s, int x)
5862 {
5863     /* Convert J1:J2 at x[22:21] to I2:I1, which involves I=J^~S.  */
5864     x ^= !(x < 0) * (3 << 21);
5865     /* Append the final zero.  */
5866     return x << 1;
5867 }
5868
5869 static int t16_setflags(DisasContext *s)
5870 {
5871     return s->condexec_mask == 0;
5872 }
5873
5874 static int t16_push_list(DisasContext *s, int x)
5875 {
5876     return (x & 0xff) | (x & 0x100) << (14 - 8);
5877 }
5878
5879 static int t16_pop_list(DisasContext *s, int x)
5880 {
5881     return (x & 0xff) | (x & 0x100) << (15 - 8);
5882 }
5883
5884 /*
5885  * Include the generated decoders.
5886  */
5887
5888 #include "decode-a32.inc.c"
5889 #include "decode-a32-uncond.inc.c"
5890 #include "decode-t32.inc.c"
5891 #include "decode-t16.inc.c"
5892
5893 /* Helpers to swap operands for reverse-subtract.  */
5894 static void gen_rsb(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5895 {
5896     tcg_gen_sub_i32(dst, b, a);
5897 }
5898
5899 static void gen_rsb_CC(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5900 {
5901     gen_sub_CC(dst, b, a);
5902 }
5903
5904 static void gen_rsc(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5905 {
5906     gen_sub_carry(dest, b, a);
5907 }
5908
5909 static void gen_rsc_CC(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5910 {
5911     gen_sbc_CC(dest, b, a);
5912 }
5913
5914 /*
5915  * Helpers for the data processing routines.
5916  *
5917  * After the computation store the results back.
5918  * This may be suppressed altogether (STREG_NONE), require a runtime
5919  * check against the stack limits (STREG_SP_CHECK), or generate an
5920  * exception return.  Oh, or store into a register.
5921  *
5922  * Always return true, indicating success for a trans_* function.
5923  */
5924 typedef enum {
5925    STREG_NONE,
5926    STREG_NORMAL,
5927    STREG_SP_CHECK,
5928    STREG_EXC_RET,
5929 } StoreRegKind;
5930
5931 static bool store_reg_kind(DisasContext *s, int rd,
5932                             TCGv_i32 val, StoreRegKind kind)
5933 {
5934     switch (kind) {
5935     case STREG_NONE:
5936         tcg_temp_free_i32(val);
5937         return true;
5938     case STREG_NORMAL:
5939         /* See ALUWritePC: Interworking only from a32 mode. */
5940         if (s->thumb) {
5941             store_reg(s, rd, val);
5942         } else {
5943             store_reg_bx(s, rd, val);
5944         }
5945         return true;
5946     case STREG_SP_CHECK:
5947         store_sp_checked(s, val);
5948         return true;
5949     case STREG_EXC_RET:
5950         gen_exception_return(s, val);
5951         return true;
5952     }
5953     g_assert_not_reached();
5954 }
5955
5956 /*
5957  * Data Processing (register)
5958  *
5959  * Operate, with set flags, one register source,
5960  * one immediate shifted register source, and a destination.
5961  */
5962 static bool op_s_rrr_shi(DisasContext *s, arg_s_rrr_shi *a,
5963                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5964                          int logic_cc, StoreRegKind kind)
5965 {
5966     TCGv_i32 tmp1, tmp2;
5967
5968     tmp2 = load_reg(s, a->rm);
5969     gen_arm_shift_im(tmp2, a->shty, a->shim, logic_cc);
5970     tmp1 = load_reg(s, a->rn);
5971
5972     gen(tmp1, tmp1, tmp2);
5973     tcg_temp_free_i32(tmp2);
5974
5975     if (logic_cc) {
5976         gen_logic_CC(tmp1);
5977     }
5978     return store_reg_kind(s, a->rd, tmp1, kind);
5979 }
5980
5981 static bool op_s_rxr_shi(DisasContext *s, arg_s_rrr_shi *a,
5982                          void (*gen)(TCGv_i32, TCGv_i32),
5983                          int logic_cc, StoreRegKind kind)
5984 {
5985     TCGv_i32 tmp;
5986
5987     tmp = load_reg(s, a->rm);
5988     gen_arm_shift_im(tmp, a->shty, a->shim, logic_cc);
5989
5990     gen(tmp, tmp);
5991     if (logic_cc) {
5992         gen_logic_CC(tmp);
5993     }
5994     return store_reg_kind(s, a->rd, tmp, kind);
5995 }
5996
5997 /*
5998  * Data-processing (register-shifted register)
5999  *
6000  * Operate, with set flags, one register source,
6001  * one register shifted register source, and a destination.
6002  */
6003 static bool op_s_rrr_shr(DisasContext *s, arg_s_rrr_shr *a,
6004                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
6005                          int logic_cc, StoreRegKind kind)
6006 {
6007     TCGv_i32 tmp1, tmp2;
6008
6009     tmp1 = load_reg(s, a->rs);
6010     tmp2 = load_reg(s, a->rm);
6011     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
6012     tmp1 = load_reg(s, a->rn);
6013
6014     gen(tmp1, tmp1, tmp2);
6015     tcg_temp_free_i32(tmp2);
6016
6017     if (logic_cc) {
6018         gen_logic_CC(tmp1);
6019     }
6020     return store_reg_kind(s, a->rd, tmp1, kind);
6021 }
6022
6023 static bool op_s_rxr_shr(DisasContext *s, arg_s_rrr_shr *a,
6024                          void (*gen)(TCGv_i32, TCGv_i32),
6025                          int logic_cc, StoreRegKind kind)
6026 {
6027     TCGv_i32 tmp1, tmp2;
6028
6029     tmp1 = load_reg(s, a->rs);
6030     tmp2 = load_reg(s, a->rm);
6031     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
6032
6033     gen(tmp2, tmp2);
6034     if (logic_cc) {
6035         gen_logic_CC(tmp2);
6036     }
6037     return store_reg_kind(s, a->rd, tmp2, kind);
6038 }
6039
6040 /*
6041  * Data-processing (immediate)
6042  *
6043  * Operate, with set flags, one register source,
6044  * one rotated immediate, and a destination.
6045  *
6046  * Note that logic_cc && a->rot setting CF based on the msb of the
6047  * immediate is the reason why we must pass in the unrotated form
6048  * of the immediate.
6049  */
6050 static bool op_s_rri_rot(DisasContext *s, arg_s_rri_rot *a,
6051                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
6052                          int logic_cc, StoreRegKind kind)
6053 {
6054     TCGv_i32 tmp1, tmp2;
6055     uint32_t imm;
6056
6057     imm = ror32(a->imm, a->rot);
6058     if (logic_cc && a->rot) {
6059         tcg_gen_movi_i32(cpu_CF, imm >> 31);
6060     }
6061     tmp2 = tcg_const_i32(imm);
6062     tmp1 = load_reg(s, a->rn);
6063
6064     gen(tmp1, tmp1, tmp2);
6065     tcg_temp_free_i32(tmp2);
6066
6067     if (logic_cc) {
6068         gen_logic_CC(tmp1);
6069     }
6070     return store_reg_kind(s, a->rd, tmp1, kind);
6071 }
6072
6073 static bool op_s_rxi_rot(DisasContext *s, arg_s_rri_rot *a,
6074                          void (*gen)(TCGv_i32, TCGv_i32),
6075                          int logic_cc, StoreRegKind kind)
6076 {
6077     TCGv_i32 tmp;
6078     uint32_t imm;
6079
6080     imm = ror32(a->imm, a->rot);
6081     if (logic_cc && a->rot) {
6082         tcg_gen_movi_i32(cpu_CF, imm >> 31);
6083     }
6084     tmp = tcg_const_i32(imm);
6085
6086     gen(tmp, tmp);
6087     if (logic_cc) {
6088         gen_logic_CC(tmp);
6089     }
6090     return store_reg_kind(s, a->rd, tmp, kind);
6091 }
6092
6093 #define DO_ANY3(NAME, OP, L, K)                                         \
6094     static bool trans_##NAME##_rrri(DisasContext *s, arg_s_rrr_shi *a)  \
6095     { StoreRegKind k = (K); return op_s_rrr_shi(s, a, OP, L, k); }      \
6096     static bool trans_##NAME##_rrrr(DisasContext *s, arg_s_rrr_shr *a)  \
6097     { StoreRegKind k = (K); return op_s_rrr_shr(s, a, OP, L, k); }      \
6098     static bool trans_##NAME##_rri(DisasContext *s, arg_s_rri_rot *a)   \
6099     { StoreRegKind k = (K); return op_s_rri_rot(s, a, OP, L, k); }
6100
6101 #define DO_ANY2(NAME, OP, L, K)                                         \
6102     static bool trans_##NAME##_rxri(DisasContext *s, arg_s_rrr_shi *a)  \
6103     { StoreRegKind k = (K); return op_s_rxr_shi(s, a, OP, L, k); }      \
6104     static bool trans_##NAME##_rxrr(DisasContext *s, arg_s_rrr_shr *a)  \
6105     { StoreRegKind k = (K); return op_s_rxr_shr(s, a, OP, L, k); }      \
6106     static bool trans_##NAME##_rxi(DisasContext *s, arg_s_rri_rot *a)   \
6107     { StoreRegKind k = (K); return op_s_rxi_rot(s, a, OP, L, k); }
6108
6109 #define DO_CMP2(NAME, OP, L)                                            \
6110     static bool trans_##NAME##_xrri(DisasContext *s, arg_s_rrr_shi *a)  \
6111     { return op_s_rrr_shi(s, a, OP, L, STREG_NONE); }                   \
6112     static bool trans_##NAME##_xrrr(DisasContext *s, arg_s_rrr_shr *a)  \
6113     { return op_s_rrr_shr(s, a, OP, L, STREG_NONE); }                   \
6114     static bool trans_##NAME##_xri(DisasContext *s, arg_s_rri_rot *a)   \
6115     { return op_s_rri_rot(s, a, OP, L, STREG_NONE); }
6116
6117 DO_ANY3(AND, tcg_gen_and_i32, a->s, STREG_NORMAL)
6118 DO_ANY3(EOR, tcg_gen_xor_i32, a->s, STREG_NORMAL)
6119 DO_ANY3(ORR, tcg_gen_or_i32, a->s, STREG_NORMAL)
6120 DO_ANY3(BIC, tcg_gen_andc_i32, a->s, STREG_NORMAL)
6121
6122 DO_ANY3(RSB, a->s ? gen_rsb_CC : gen_rsb, false, STREG_NORMAL)
6123 DO_ANY3(ADC, a->s ? gen_adc_CC : gen_add_carry, false, STREG_NORMAL)
6124 DO_ANY3(SBC, a->s ? gen_sbc_CC : gen_sub_carry, false, STREG_NORMAL)
6125 DO_ANY3(RSC, a->s ? gen_rsc_CC : gen_rsc, false, STREG_NORMAL)
6126
6127 DO_CMP2(TST, tcg_gen_and_i32, true)
6128 DO_CMP2(TEQ, tcg_gen_xor_i32, true)
6129 DO_CMP2(CMN, gen_add_CC, false)
6130 DO_CMP2(CMP, gen_sub_CC, false)
6131
6132 DO_ANY3(ADD, a->s ? gen_add_CC : tcg_gen_add_i32, false,
6133         a->rd == 13 && a->rn == 13 ? STREG_SP_CHECK : STREG_NORMAL)
6134
6135 /*
6136  * Note for the computation of StoreRegKind we return out of the
6137  * middle of the functions that are expanded by DO_ANY3, and that
6138  * we modify a->s via that parameter before it is used by OP.
6139  */
6140 DO_ANY3(SUB, a->s ? gen_sub_CC : tcg_gen_sub_i32, false,
6141         ({
6142             StoreRegKind ret = STREG_NORMAL;
6143             if (a->rd == 15 && a->s) {
6144                 /*
6145                  * See ALUExceptionReturn:
6146                  * In User mode, UNPREDICTABLE; we choose UNDEF.
6147                  * In Hyp mode, UNDEFINED.
6148                  */
6149                 if (IS_USER(s) || s->current_el == 2) {
6150                     unallocated_encoding(s);
6151                     return true;
6152                 }
6153                 /* There is no writeback of nzcv to PSTATE.  */
6154                 a->s = 0;
6155                 ret = STREG_EXC_RET;
6156             } else if (a->rd == 13 && a->rn == 13) {
6157                 ret = STREG_SP_CHECK;
6158             }
6159             ret;
6160         }))
6161
6162 DO_ANY2(MOV, tcg_gen_mov_i32, a->s,
6163         ({
6164             StoreRegKind ret = STREG_NORMAL;
6165             if (a->rd == 15 && a->s) {
6166                 /*
6167                  * See ALUExceptionReturn:
6168                  * In User mode, UNPREDICTABLE; we choose UNDEF.
6169                  * In Hyp mode, UNDEFINED.
6170                  */
6171                 if (IS_USER(s) || s->current_el == 2) {
6172                     unallocated_encoding(s);
6173                     return true;
6174                 }
6175                 /* There is no writeback of nzcv to PSTATE.  */
6176                 a->s = 0;
6177                 ret = STREG_EXC_RET;
6178             } else if (a->rd == 13) {
6179                 ret = STREG_SP_CHECK;
6180             }
6181             ret;
6182         }))
6183
6184 DO_ANY2(MVN, tcg_gen_not_i32, a->s, STREG_NORMAL)
6185
6186 /*
6187  * ORN is only available with T32, so there is no register-shifted-register
6188  * form of the insn.  Using the DO_ANY3 macro would create an unused function.
6189  */
6190 static bool trans_ORN_rrri(DisasContext *s, arg_s_rrr_shi *a)
6191 {
6192     return op_s_rrr_shi(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
6193 }
6194
6195 static bool trans_ORN_rri(DisasContext *s, arg_s_rri_rot *a)
6196 {
6197     return op_s_rri_rot(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
6198 }
6199
6200 #undef DO_ANY3
6201 #undef DO_ANY2
6202 #undef DO_CMP2
6203
6204 static bool trans_ADR(DisasContext *s, arg_ri *a)
6205 {
6206     store_reg_bx(s, a->rd, add_reg_for_lit(s, 15, a->imm));
6207     return true;
6208 }
6209
6210 static bool trans_MOVW(DisasContext *s, arg_MOVW *a)
6211 {
6212     TCGv_i32 tmp;
6213
6214     if (!ENABLE_ARCH_6T2) {
6215         return false;
6216     }
6217
6218     tmp = tcg_const_i32(a->imm);
6219     store_reg(s, a->rd, tmp);
6220     return true;
6221 }
6222
6223 static bool trans_MOVT(DisasContext *s, arg_MOVW *a)
6224 {
6225     TCGv_i32 tmp;
6226
6227     if (!ENABLE_ARCH_6T2) {
6228         return false;
6229     }
6230
6231     tmp = load_reg(s, a->rd);
6232     tcg_gen_ext16u_i32(tmp, tmp);
6233     tcg_gen_ori_i32(tmp, tmp, a->imm << 16);
6234     store_reg(s, a->rd, tmp);
6235     return true;
6236 }
6237
6238 /*
6239  * Multiply and multiply accumulate
6240  */
6241
6242 static bool op_mla(DisasContext *s, arg_s_rrrr *a, bool add)
6243 {
6244     TCGv_i32 t1, t2;
6245
6246     t1 = load_reg(s, a->rn);
6247     t2 = load_reg(s, a->rm);
6248     tcg_gen_mul_i32(t1, t1, t2);
6249     tcg_temp_free_i32(t2);
6250     if (add) {
6251         t2 = load_reg(s, a->ra);
6252         tcg_gen_add_i32(t1, t1, t2);
6253         tcg_temp_free_i32(t2);
6254     }
6255     if (a->s) {
6256         gen_logic_CC(t1);
6257     }
6258     store_reg(s, a->rd, t1);
6259     return true;
6260 }
6261
6262 static bool trans_MUL(DisasContext *s, arg_MUL *a)
6263 {
6264     return op_mla(s, a, false);
6265 }
6266
6267 static bool trans_MLA(DisasContext *s, arg_MLA *a)
6268 {
6269     return op_mla(s, a, true);
6270 }
6271
6272 static bool trans_MLS(DisasContext *s, arg_MLS *a)
6273 {
6274     TCGv_i32 t1, t2;
6275
6276     if (!ENABLE_ARCH_6T2) {
6277         return false;
6278     }
6279     t1 = load_reg(s, a->rn);
6280     t2 = load_reg(s, a->rm);
6281     tcg_gen_mul_i32(t1, t1, t2);
6282     tcg_temp_free_i32(t2);
6283     t2 = load_reg(s, a->ra);
6284     tcg_gen_sub_i32(t1, t2, t1);
6285     tcg_temp_free_i32(t2);
6286     store_reg(s, a->rd, t1);
6287     return true;
6288 }
6289
6290 static bool op_mlal(DisasContext *s, arg_s_rrrr *a, bool uns, bool add)
6291 {
6292     TCGv_i32 t0, t1, t2, t3;
6293
6294     t0 = load_reg(s, a->rm);
6295     t1 = load_reg(s, a->rn);
6296     if (uns) {
6297         tcg_gen_mulu2_i32(t0, t1, t0, t1);
6298     } else {
6299         tcg_gen_muls2_i32(t0, t1, t0, t1);
6300     }
6301     if (add) {
6302         t2 = load_reg(s, a->ra);
6303         t3 = load_reg(s, a->rd);
6304         tcg_gen_add2_i32(t0, t1, t0, t1, t2, t3);
6305         tcg_temp_free_i32(t2);
6306         tcg_temp_free_i32(t3);
6307     }
6308     if (a->s) {
6309         gen_logicq_cc(t0, t1);
6310     }
6311     store_reg(s, a->ra, t0);
6312     store_reg(s, a->rd, t1);
6313     return true;
6314 }
6315
6316 static bool trans_UMULL(DisasContext *s, arg_UMULL *a)
6317 {
6318     return op_mlal(s, a, true, false);
6319 }
6320
6321 static bool trans_SMULL(DisasContext *s, arg_SMULL *a)
6322 {
6323     return op_mlal(s, a, false, false);
6324 }
6325
6326 static bool trans_UMLAL(DisasContext *s, arg_UMLAL *a)
6327 {
6328     return op_mlal(s, a, true, true);
6329 }
6330
6331 static bool trans_SMLAL(DisasContext *s, arg_SMLAL *a)
6332 {
6333     return op_mlal(s, a, false, true);
6334 }
6335
6336 static bool trans_UMAAL(DisasContext *s, arg_UMAAL *a)
6337 {
6338     TCGv_i32 t0, t1, t2, zero;
6339
6340     if (s->thumb
6341         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
6342         : !ENABLE_ARCH_6) {
6343         return false;
6344     }
6345
6346     t0 = load_reg(s, a->rm);
6347     t1 = load_reg(s, a->rn);
6348     tcg_gen_mulu2_i32(t0, t1, t0, t1);
6349     zero = tcg_const_i32(0);
6350     t2 = load_reg(s, a->ra);
6351     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
6352     tcg_temp_free_i32(t2);
6353     t2 = load_reg(s, a->rd);
6354     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
6355     tcg_temp_free_i32(t2);
6356     tcg_temp_free_i32(zero);
6357     store_reg(s, a->ra, t0);
6358     store_reg(s, a->rd, t1);
6359     return true;
6360 }
6361
6362 /*
6363  * Saturating addition and subtraction
6364  */
6365
6366 static bool op_qaddsub(DisasContext *s, arg_rrr *a, bool add, bool doub)
6367 {
6368     TCGv_i32 t0, t1;
6369
6370     if (s->thumb
6371         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
6372         : !ENABLE_ARCH_5TE) {
6373         return false;
6374     }
6375
6376     t0 = load_reg(s, a->rm);
6377     t1 = load_reg(s, a->rn);
6378     if (doub) {
6379         gen_helper_add_saturate(t1, cpu_env, t1, t1);
6380     }
6381     if (add) {
6382         gen_helper_add_saturate(t0, cpu_env, t0, t1);
6383     } else {
6384         gen_helper_sub_saturate(t0, cpu_env, t0, t1);
6385     }
6386     tcg_temp_free_i32(t1);
6387     store_reg(s, a->rd, t0);
6388     return true;
6389 }
6390
6391 #define DO_QADDSUB(NAME, ADD, DOUB) \
6392 static bool trans_##NAME(DisasContext *s, arg_rrr *a)    \
6393 {                                                        \
6394     return op_qaddsub(s, a, ADD, DOUB);                  \
6395 }
6396
6397 DO_QADDSUB(QADD, true, false)
6398 DO_QADDSUB(QSUB, false, false)
6399 DO_QADDSUB(QDADD, true, true)
6400 DO_QADDSUB(QDSUB, false, true)
6401
6402 #undef DO_QADDSUB
6403
6404 /*
6405  * Halfword multiply and multiply accumulate
6406  */
6407
6408 static bool op_smlaxxx(DisasContext *s, arg_rrrr *a,
6409                        int add_long, bool nt, bool mt)
6410 {
6411     TCGv_i32 t0, t1, tl, th;
6412
6413     if (s->thumb
6414         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
6415         : !ENABLE_ARCH_5TE) {
6416         return false;
6417     }
6418
6419     t0 = load_reg(s, a->rn);
6420     t1 = load_reg(s, a->rm);
6421     gen_mulxy(t0, t1, nt, mt);
6422     tcg_temp_free_i32(t1);
6423
6424     switch (add_long) {
6425     case 0:
6426         store_reg(s, a->rd, t0);
6427         break;
6428     case 1:
6429         t1 = load_reg(s, a->ra);
6430         gen_helper_add_setq(t0, cpu_env, t0, t1);
6431         tcg_temp_free_i32(t1);
6432         store_reg(s, a->rd, t0);
6433         break;
6434     case 2:
6435         tl = load_reg(s, a->ra);
6436         th = load_reg(s, a->rd);
6437         /* Sign-extend the 32-bit product to 64 bits.  */
6438         t1 = tcg_temp_new_i32();
6439         tcg_gen_sari_i32(t1, t0, 31);
6440         tcg_gen_add2_i32(tl, th, tl, th, t0, t1);
6441         tcg_temp_free_i32(t0);
6442         tcg_temp_free_i32(t1);
6443         store_reg(s, a->ra, tl);
6444         store_reg(s, a->rd, th);
6445         break;
6446     default:
6447         g_assert_not_reached();
6448     }
6449     return true;
6450 }
6451
6452 #define DO_SMLAX(NAME, add, nt, mt) \
6453 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
6454 {                                                          \
6455     return op_smlaxxx(s, a, add, nt, mt);                  \
6456 }
6457
6458 DO_SMLAX(SMULBB, 0, 0, 0)
6459 DO_SMLAX(SMULBT, 0, 0, 1)
6460 DO_SMLAX(SMULTB, 0, 1, 0)
6461 DO_SMLAX(SMULTT, 0, 1, 1)
6462
6463 DO_SMLAX(SMLABB, 1, 0, 0)
6464 DO_SMLAX(SMLABT, 1, 0, 1)
6465 DO_SMLAX(SMLATB, 1, 1, 0)
6466 DO_SMLAX(SMLATT, 1, 1, 1)
6467
6468 DO_SMLAX(SMLALBB, 2, 0, 0)
6469 DO_SMLAX(SMLALBT, 2, 0, 1)
6470 DO_SMLAX(SMLALTB, 2, 1, 0)
6471 DO_SMLAX(SMLALTT, 2, 1, 1)
6472
6473 #undef DO_SMLAX
6474
6475 static bool op_smlawx(DisasContext *s, arg_rrrr *a, bool add, bool mt)
6476 {
6477     TCGv_i32 t0, t1;
6478
6479     if (!ENABLE_ARCH_5TE) {
6480         return false;
6481     }
6482
6483     t0 = load_reg(s, a->rn);
6484     t1 = load_reg(s, a->rm);
6485     /*
6486      * Since the nominal result is product<47:16>, shift the 16-bit
6487      * input up by 16 bits, so that the result is at product<63:32>.
6488      */
6489     if (mt) {
6490         tcg_gen_andi_i32(t1, t1, 0xffff0000);
6491     } else {
6492         tcg_gen_shli_i32(t1, t1, 16);
6493     }
6494     tcg_gen_muls2_i32(t0, t1, t0, t1);
6495     tcg_temp_free_i32(t0);
6496     if (add) {
6497         t0 = load_reg(s, a->ra);
6498         gen_helper_add_setq(t1, cpu_env, t1, t0);
6499         tcg_temp_free_i32(t0);
6500     }
6501     store_reg(s, a->rd, t1);
6502     return true;
6503 }
6504
6505 #define DO_SMLAWX(NAME, add, mt) \
6506 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
6507 {                                                          \
6508     return op_smlawx(s, a, add, mt);                       \
6509 }
6510
6511 DO_SMLAWX(SMULWB, 0, 0)
6512 DO_SMLAWX(SMULWT, 0, 1)
6513 DO_SMLAWX(SMLAWB, 1, 0)
6514 DO_SMLAWX(SMLAWT, 1, 1)
6515
6516 #undef DO_SMLAWX
6517
6518 /*
6519  * MSR (immediate) and hints
6520  */
6521
6522 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
6523 {
6524     /*
6525      * When running single-threaded TCG code, use the helper to ensure that
6526      * the next round-robin scheduled vCPU gets a crack.  When running in
6527      * MTTCG we don't generate jumps to the helper as it won't affect the
6528      * scheduling of other vCPUs.
6529      */
6530     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
6531         gen_set_pc_im(s, s->base.pc_next);
6532         s->base.is_jmp = DISAS_YIELD;
6533     }
6534     return true;
6535 }
6536
6537 static bool trans_WFE(DisasContext *s, arg_WFE *a)
6538 {
6539     /*
6540      * When running single-threaded TCG code, use the helper to ensure that
6541      * the next round-robin scheduled vCPU gets a crack.  In MTTCG mode we
6542      * just skip this instruction.  Currently the SEV/SEVL instructions,
6543      * which are *one* of many ways to wake the CPU from WFE, are not
6544      * implemented so we can't sleep like WFI does.
6545      */
6546     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
6547         gen_set_pc_im(s, s->base.pc_next);
6548         s->base.is_jmp = DISAS_WFE;
6549     }
6550     return true;
6551 }
6552
6553 static bool trans_WFI(DisasContext *s, arg_WFI *a)
6554 {
6555     /* For WFI, halt the vCPU until an IRQ. */
6556     gen_set_pc_im(s, s->base.pc_next);
6557     s->base.is_jmp = DISAS_WFI;
6558     return true;
6559 }
6560
6561 static bool trans_NOP(DisasContext *s, arg_NOP *a)
6562 {
6563     return true;
6564 }
6565
6566 static bool trans_MSR_imm(DisasContext *s, arg_MSR_imm *a)
6567 {
6568     uint32_t val = ror32(a->imm, a->rot * 2);
6569     uint32_t mask = msr_mask(s, a->mask, a->r);
6570
6571     if (gen_set_psr_im(s, mask, a->r, val)) {
6572         unallocated_encoding(s);
6573     }
6574     return true;
6575 }
6576
6577 /*
6578  * Cyclic Redundancy Check
6579  */
6580
6581 static bool op_crc32(DisasContext *s, arg_rrr *a, bool c, MemOp sz)
6582 {
6583     TCGv_i32 t1, t2, t3;
6584
6585     if (!dc_isar_feature(aa32_crc32, s)) {
6586         return false;
6587     }
6588
6589     t1 = load_reg(s, a->rn);
6590     t2 = load_reg(s, a->rm);
6591     switch (sz) {
6592     case MO_8:
6593         gen_uxtb(t2);
6594         break;
6595     case MO_16:
6596         gen_uxth(t2);
6597         break;
6598     case MO_32:
6599         break;
6600     default:
6601         g_assert_not_reached();
6602     }
6603     t3 = tcg_const_i32(1 << sz);
6604     if (c) {
6605         gen_helper_crc32c(t1, t1, t2, t3);
6606     } else {
6607         gen_helper_crc32(t1, t1, t2, t3);
6608     }
6609     tcg_temp_free_i32(t2);
6610     tcg_temp_free_i32(t3);
6611     store_reg(s, a->rd, t1);
6612     return true;
6613 }
6614
6615 #define DO_CRC32(NAME, c, sz) \
6616 static bool trans_##NAME(DisasContext *s, arg_rrr *a)  \
6617     { return op_crc32(s, a, c, sz); }
6618
6619 DO_CRC32(CRC32B, false, MO_8)
6620 DO_CRC32(CRC32H, false, MO_16)
6621 DO_CRC32(CRC32W, false, MO_32)
6622 DO_CRC32(CRC32CB, true, MO_8)
6623 DO_CRC32(CRC32CH, true, MO_16)
6624 DO_CRC32(CRC32CW, true, MO_32)
6625
6626 #undef DO_CRC32
6627
6628 /*
6629  * Miscellaneous instructions
6630  */
6631
6632 static bool trans_MRS_bank(DisasContext *s, arg_MRS_bank *a)
6633 {
6634     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6635         return false;
6636     }
6637     gen_mrs_banked(s, a->r, a->sysm, a->rd);
6638     return true;
6639 }
6640
6641 static bool trans_MSR_bank(DisasContext *s, arg_MSR_bank *a)
6642 {
6643     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6644         return false;
6645     }
6646     gen_msr_banked(s, a->r, a->sysm, a->rn);
6647     return true;
6648 }
6649
6650 static bool trans_MRS_reg(DisasContext *s, arg_MRS_reg *a)
6651 {
6652     TCGv_i32 tmp;
6653
6654     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6655         return false;
6656     }
6657     if (a->r) {
6658         if (IS_USER(s)) {
6659             unallocated_encoding(s);
6660             return true;
6661         }
6662         tmp = load_cpu_field(spsr);
6663     } else {
6664         tmp = tcg_temp_new_i32();
6665         gen_helper_cpsr_read(tmp, cpu_env);
6666     }
6667     store_reg(s, a->rd, tmp);
6668     return true;
6669 }
6670
6671 static bool trans_MSR_reg(DisasContext *s, arg_MSR_reg *a)
6672 {
6673     TCGv_i32 tmp;
6674     uint32_t mask = msr_mask(s, a->mask, a->r);
6675
6676     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6677         return false;
6678     }
6679     tmp = load_reg(s, a->rn);
6680     if (gen_set_psr(s, mask, a->r, tmp)) {
6681         unallocated_encoding(s);
6682     }
6683     return true;
6684 }
6685
6686 static bool trans_MRS_v7m(DisasContext *s, arg_MRS_v7m *a)
6687 {
6688     TCGv_i32 tmp;
6689
6690     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6691         return false;
6692     }
6693     tmp = tcg_const_i32(a->sysm);
6694     gen_helper_v7m_mrs(tmp, cpu_env, tmp);
6695     store_reg(s, a->rd, tmp);
6696     return true;
6697 }
6698
6699 static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
6700 {
6701     TCGv_i32 addr, reg;
6702
6703     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6704         return false;
6705     }
6706     addr = tcg_const_i32((a->mask << 10) | a->sysm);
6707     reg = load_reg(s, a->rn);
6708     gen_helper_v7m_msr(cpu_env, addr, reg);
6709     tcg_temp_free_i32(addr);
6710     tcg_temp_free_i32(reg);
6711     /* If we wrote to CONTROL, the EL might have changed */
6712     gen_helper_rebuild_hflags_m32_newel(cpu_env);
6713     gen_lookup_tb(s);
6714     return true;
6715 }
6716
6717 static bool trans_BX(DisasContext *s, arg_BX *a)
6718 {
6719     if (!ENABLE_ARCH_4T) {
6720         return false;
6721     }
6722     gen_bx_excret(s, load_reg(s, a->rm));
6723     return true;
6724 }
6725
6726 static bool trans_BXJ(DisasContext *s, arg_BXJ *a)
6727 {
6728     if (!ENABLE_ARCH_5J || arm_dc_feature(s, ARM_FEATURE_M)) {
6729         return false;
6730     }
6731     /* Trivial implementation equivalent to bx.  */
6732     gen_bx(s, load_reg(s, a->rm));
6733     return true;
6734 }
6735
6736 static bool trans_BLX_r(DisasContext *s, arg_BLX_r *a)
6737 {
6738     TCGv_i32 tmp;
6739
6740     if (!ENABLE_ARCH_5) {
6741         return false;
6742     }
6743     tmp = load_reg(s, a->rm);
6744     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
6745     gen_bx(s, tmp);
6746     return true;
6747 }
6748
6749 /*
6750  * BXNS/BLXNS: only exist for v8M with the security extensions,
6751  * and always UNDEF if NonSecure.  We don't implement these in
6752  * the user-only mode either (in theory you can use them from
6753  * Secure User mode but they are too tied in to system emulation).
6754  */
6755 static bool trans_BXNS(DisasContext *s, arg_BXNS *a)
6756 {
6757     if (!s->v8m_secure || IS_USER_ONLY) {
6758         unallocated_encoding(s);
6759     } else {
6760         gen_bxns(s, a->rm);
6761     }
6762     return true;
6763 }
6764
6765 static bool trans_BLXNS(DisasContext *s, arg_BLXNS *a)
6766 {
6767     if (!s->v8m_secure || IS_USER_ONLY) {
6768         unallocated_encoding(s);
6769     } else {
6770         gen_blxns(s, a->rm);
6771     }
6772     return true;
6773 }
6774
6775 static bool trans_CLZ(DisasContext *s, arg_CLZ *a)
6776 {
6777     TCGv_i32 tmp;
6778
6779     if (!ENABLE_ARCH_5) {
6780         return false;
6781     }
6782     tmp = load_reg(s, a->rm);
6783     tcg_gen_clzi_i32(tmp, tmp, 32);
6784     store_reg(s, a->rd, tmp);
6785     return true;
6786 }
6787
6788 static bool trans_ERET(DisasContext *s, arg_ERET *a)
6789 {
6790     TCGv_i32 tmp;
6791
6792     if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
6793         return false;
6794     }
6795     if (IS_USER(s)) {
6796         unallocated_encoding(s);
6797         return true;
6798     }
6799     if (s->current_el == 2) {
6800         /* ERET from Hyp uses ELR_Hyp, not LR */
6801         tmp = load_cpu_field(elr_el[2]);
6802     } else {
6803         tmp = load_reg(s, 14);
6804     }
6805     gen_exception_return(s, tmp);
6806     return true;
6807 }
6808
6809 static bool trans_HLT(DisasContext *s, arg_HLT *a)
6810 {
6811     gen_hlt(s, a->imm);
6812     return true;
6813 }
6814
6815 static bool trans_BKPT(DisasContext *s, arg_BKPT *a)
6816 {
6817     if (!ENABLE_ARCH_5) {
6818         return false;
6819     }
6820     if (arm_dc_feature(s, ARM_FEATURE_M) &&
6821         semihosting_enabled() &&
6822 #ifndef CONFIG_USER_ONLY
6823         !IS_USER(s) &&
6824 #endif
6825         (a->imm == 0xab)) {
6826         gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
6827     } else {
6828         gen_exception_bkpt_insn(s, syn_aa32_bkpt(a->imm, false));
6829     }
6830     return true;
6831 }
6832
6833 static bool trans_HVC(DisasContext *s, arg_HVC *a)
6834 {
6835     if (!ENABLE_ARCH_7 || arm_dc_feature(s, ARM_FEATURE_M)) {
6836         return false;
6837     }
6838     if (IS_USER(s)) {
6839         unallocated_encoding(s);
6840     } else {
6841         gen_hvc(s, a->imm);
6842     }
6843     return true;
6844 }
6845
6846 static bool trans_SMC(DisasContext *s, arg_SMC *a)
6847 {
6848     if (!ENABLE_ARCH_6K || arm_dc_feature(s, ARM_FEATURE_M)) {
6849         return false;
6850     }
6851     if (IS_USER(s)) {
6852         unallocated_encoding(s);
6853     } else {
6854         gen_smc(s);
6855     }
6856     return true;
6857 }
6858
6859 static bool trans_SG(DisasContext *s, arg_SG *a)
6860 {
6861     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6862         !arm_dc_feature(s, ARM_FEATURE_V8)) {
6863         return false;
6864     }
6865     /*
6866      * SG (v8M only)
6867      * The bulk of the behaviour for this instruction is implemented
6868      * in v7m_handle_execute_nsc(), which deals with the insn when
6869      * it is executed by a CPU in non-secure state from memory
6870      * which is Secure & NonSecure-Callable.
6871      * Here we only need to handle the remaining cases:
6872      *  * in NS memory (including the "security extension not
6873      *    implemented" case) : NOP
6874      *  * in S memory but CPU already secure (clear IT bits)
6875      * We know that the attribute for the memory this insn is
6876      * in must match the current CPU state, because otherwise
6877      * get_phys_addr_pmsav8 would have generated an exception.
6878      */
6879     if (s->v8m_secure) {
6880         /* Like the IT insn, we don't need to generate any code */
6881         s->condexec_cond = 0;
6882         s->condexec_mask = 0;
6883     }
6884     return true;
6885 }
6886
6887 static bool trans_TT(DisasContext *s, arg_TT *a)
6888 {
6889     TCGv_i32 addr, tmp;
6890
6891     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6892         !arm_dc_feature(s, ARM_FEATURE_V8)) {
6893         return false;
6894     }
6895     if (a->rd == 13 || a->rd == 15 || a->rn == 15) {
6896         /* We UNDEF for these UNPREDICTABLE cases */
6897         unallocated_encoding(s);
6898         return true;
6899     }
6900     if (a->A && !s->v8m_secure) {
6901         /* This case is UNDEFINED.  */
6902         unallocated_encoding(s);
6903         return true;
6904     }
6905
6906     addr = load_reg(s, a->rn);
6907     tmp = tcg_const_i32((a->A << 1) | a->T);
6908     gen_helper_v7m_tt(tmp, cpu_env, addr, tmp);
6909     tcg_temp_free_i32(addr);
6910     store_reg(s, a->rd, tmp);
6911     return true;
6912 }
6913
6914 /*
6915  * Load/store register index
6916  */
6917
6918 static ISSInfo make_issinfo(DisasContext *s, int rd, bool p, bool w)
6919 {
6920     ISSInfo ret;
6921
6922     /* ISS not valid if writeback */
6923     if (p && !w) {
6924         ret = rd;
6925         if (s->base.pc_next - s->pc_curr == 2) {
6926             ret |= ISSIs16Bit;
6927         }
6928     } else {
6929         ret = ISSInvalid;
6930     }
6931     return ret;
6932 }
6933
6934 static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a)
6935 {
6936     TCGv_i32 addr = load_reg(s, a->rn);
6937
6938     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6939         gen_helper_v8m_stackcheck(cpu_env, addr);
6940     }
6941
6942     if (a->p) {
6943         TCGv_i32 ofs = load_reg(s, a->rm);
6944         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6945         if (a->u) {
6946             tcg_gen_add_i32(addr, addr, ofs);
6947         } else {
6948             tcg_gen_sub_i32(addr, addr, ofs);
6949         }
6950         tcg_temp_free_i32(ofs);
6951     }
6952     return addr;
6953 }
6954
6955 static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
6956                             TCGv_i32 addr, int address_offset)
6957 {
6958     if (!a->p) {
6959         TCGv_i32 ofs = load_reg(s, a->rm);
6960         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6961         if (a->u) {
6962             tcg_gen_add_i32(addr, addr, ofs);
6963         } else {
6964             tcg_gen_sub_i32(addr, addr, ofs);
6965         }
6966         tcg_temp_free_i32(ofs);
6967     } else if (!a->w) {
6968         tcg_temp_free_i32(addr);
6969         return;
6970     }
6971     tcg_gen_addi_i32(addr, addr, address_offset);
6972     store_reg(s, a->rn, addr);
6973 }
6974
6975 static bool op_load_rr(DisasContext *s, arg_ldst_rr *a,
6976                        MemOp mop, int mem_idx)
6977 {
6978     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6979     TCGv_i32 addr, tmp;
6980
6981     addr = op_addr_rr_pre(s, a);
6982
6983     tmp = tcg_temp_new_i32();
6984     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop | s->be_data);
6985     disas_set_da_iss(s, mop, issinfo);
6986
6987     /*
6988      * Perform base writeback before the loaded value to
6989      * ensure correct behavior with overlapping index registers.
6990      */
6991     op_addr_rr_post(s, a, addr, 0);
6992     store_reg_from_load(s, a->rt, tmp);
6993     return true;
6994 }
6995
6996 static bool op_store_rr(DisasContext *s, arg_ldst_rr *a,
6997                         MemOp mop, int mem_idx)
6998 {
6999     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
7000     TCGv_i32 addr, tmp;
7001
7002     addr = op_addr_rr_pre(s, a);
7003
7004     tmp = load_reg(s, a->rt);
7005     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop | s->be_data);
7006     disas_set_da_iss(s, mop, issinfo);
7007     tcg_temp_free_i32(tmp);
7008
7009     op_addr_rr_post(s, a, addr, 0);
7010     return true;
7011 }
7012
7013 static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
7014 {
7015     int mem_idx = get_mem_index(s);
7016     TCGv_i32 addr, tmp;
7017
7018     if (!ENABLE_ARCH_5TE) {
7019         return false;
7020     }
7021     if (a->rt & 1) {
7022         unallocated_encoding(s);
7023         return true;
7024     }
7025     addr = op_addr_rr_pre(s, a);
7026
7027     tmp = tcg_temp_new_i32();
7028     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
7029     store_reg(s, a->rt, tmp);
7030
7031     tcg_gen_addi_i32(addr, addr, 4);
7032
7033     tmp = tcg_temp_new_i32();
7034     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
7035     store_reg(s, a->rt + 1, tmp);
7036
7037     /* LDRD w/ base writeback is undefined if the registers overlap.  */
7038     op_addr_rr_post(s, a, addr, -4);
7039     return true;
7040 }
7041
7042 static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
7043 {
7044     int mem_idx = get_mem_index(s);
7045     TCGv_i32 addr, tmp;
7046
7047     if (!ENABLE_ARCH_5TE) {
7048         return false;
7049     }
7050     if (a->rt & 1) {
7051         unallocated_encoding(s);
7052         return true;
7053     }
7054     addr = op_addr_rr_pre(s, a);
7055
7056     tmp = load_reg(s, a->rt);
7057     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
7058     tcg_temp_free_i32(tmp);
7059
7060     tcg_gen_addi_i32(addr, addr, 4);
7061
7062     tmp = load_reg(s, a->rt + 1);
7063     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
7064     tcg_temp_free_i32(tmp);
7065
7066     op_addr_rr_post(s, a, addr, -4);
7067     return true;
7068 }
7069
7070 /*
7071  * Load/store immediate index
7072  */
7073
7074 static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a)
7075 {
7076     int ofs = a->imm;
7077
7078     if (!a->u) {
7079         ofs = -ofs;
7080     }
7081
7082     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
7083         /*
7084          * Stackcheck. Here we know 'addr' is the current SP;
7085          * U is set if we're moving SP up, else down. It is
7086          * UNKNOWN whether the limit check triggers when SP starts
7087          * below the limit and ends up above it; we chose to do so.
7088          */
7089         if (!a->u) {
7090             TCGv_i32 newsp = tcg_temp_new_i32();
7091             tcg_gen_addi_i32(newsp, cpu_R[13], ofs);
7092             gen_helper_v8m_stackcheck(cpu_env, newsp);
7093             tcg_temp_free_i32(newsp);
7094         } else {
7095             gen_helper_v8m_stackcheck(cpu_env, cpu_R[13]);
7096         }
7097     }
7098
7099     return add_reg_for_lit(s, a->rn, a->p ? ofs : 0);
7100 }
7101
7102 static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a,
7103                             TCGv_i32 addr, int address_offset)
7104 {
7105     if (!a->p) {
7106         if (a->u) {
7107             address_offset += a->imm;
7108         } else {
7109             address_offset -= a->imm;
7110         }
7111     } else if (!a->w) {
7112         tcg_temp_free_i32(addr);
7113         return;
7114     }
7115     tcg_gen_addi_i32(addr, addr, address_offset);
7116     store_reg(s, a->rn, addr);
7117 }
7118
7119 static bool op_load_ri(DisasContext *s, arg_ldst_ri *a,
7120                        MemOp mop, int mem_idx)
7121 {
7122     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
7123     TCGv_i32 addr, tmp;
7124
7125     addr = op_addr_ri_pre(s, a);
7126
7127     tmp = tcg_temp_new_i32();
7128     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop | s->be_data);
7129     disas_set_da_iss(s, mop, issinfo);
7130
7131     /*
7132      * Perform base writeback before the loaded value to
7133      * ensure correct behavior with overlapping index registers.
7134      */
7135     op_addr_ri_post(s, a, addr, 0);
7136     store_reg_from_load(s, a->rt, tmp);
7137     return true;
7138 }
7139
7140 static bool op_store_ri(DisasContext *s, arg_ldst_ri *a,
7141                         MemOp mop, int mem_idx)
7142 {
7143     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
7144     TCGv_i32 addr, tmp;
7145
7146     addr = op_addr_ri_pre(s, a);
7147
7148     tmp = load_reg(s, a->rt);
7149     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop | s->be_data);
7150     disas_set_da_iss(s, mop, issinfo);
7151     tcg_temp_free_i32(tmp);
7152
7153     op_addr_ri_post(s, a, addr, 0);
7154     return true;
7155 }
7156
7157 static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
7158 {
7159     int mem_idx = get_mem_index(s);
7160     TCGv_i32 addr, tmp;
7161
7162     addr = op_addr_ri_pre(s, a);
7163
7164     tmp = tcg_temp_new_i32();
7165     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
7166     store_reg(s, a->rt, tmp);
7167
7168     tcg_gen_addi_i32(addr, addr, 4);
7169
7170     tmp = tcg_temp_new_i32();
7171     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
7172     store_reg(s, rt2, tmp);
7173
7174     /* LDRD w/ base writeback is undefined if the registers overlap.  */
7175     op_addr_ri_post(s, a, addr, -4);
7176     return true;
7177 }
7178
7179 static bool trans_LDRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
7180 {
7181     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
7182         return false;
7183     }
7184     return op_ldrd_ri(s, a, a->rt + 1);
7185 }
7186
7187 static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
7188 {
7189     arg_ldst_ri b = {
7190         .u = a->u, .w = a->w, .p = a->p,
7191         .rn = a->rn, .rt = a->rt, .imm = a->imm
7192     };
7193     return op_ldrd_ri(s, &b, a->rt2);
7194 }
7195
7196 static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
7197 {
7198     int mem_idx = get_mem_index(s);
7199     TCGv_i32 addr, tmp;
7200
7201     addr = op_addr_ri_pre(s, a);
7202
7203     tmp = load_reg(s, a->rt);
7204     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
7205     tcg_temp_free_i32(tmp);
7206
7207     tcg_gen_addi_i32(addr, addr, 4);
7208
7209     tmp = load_reg(s, rt2);
7210     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
7211     tcg_temp_free_i32(tmp);
7212
7213     op_addr_ri_post(s, a, addr, -4);
7214     return true;
7215 }
7216
7217 static bool trans_STRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
7218 {
7219     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
7220         return false;
7221     }
7222     return op_strd_ri(s, a, a->rt + 1);
7223 }
7224
7225 static bool trans_STRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
7226 {
7227     arg_ldst_ri b = {
7228         .u = a->u, .w = a->w, .p = a->p,
7229         .rn = a->rn, .rt = a->rt, .imm = a->imm
7230     };
7231     return op_strd_ri(s, &b, a->rt2);
7232 }
7233
7234 #define DO_LDST(NAME, WHICH, MEMOP) \
7235 static bool trans_##NAME##_ri(DisasContext *s, arg_ldst_ri *a)        \
7236 {                                                                     \
7237     return op_##WHICH##_ri(s, a, MEMOP, get_mem_index(s));            \
7238 }                                                                     \
7239 static bool trans_##NAME##T_ri(DisasContext *s, arg_ldst_ri *a)       \
7240 {                                                                     \
7241     return op_##WHICH##_ri(s, a, MEMOP, get_a32_user_mem_index(s));   \
7242 }                                                                     \
7243 static bool trans_##NAME##_rr(DisasContext *s, arg_ldst_rr *a)        \
7244 {                                                                     \
7245     return op_##WHICH##_rr(s, a, MEMOP, get_mem_index(s));            \
7246 }                                                                     \
7247 static bool trans_##NAME##T_rr(DisasContext *s, arg_ldst_rr *a)       \
7248 {                                                                     \
7249     return op_##WHICH##_rr(s, a, MEMOP, get_a32_user_mem_index(s));   \
7250 }
7251
7252 DO_LDST(LDR, load, MO_UL)
7253 DO_LDST(LDRB, load, MO_UB)
7254 DO_LDST(LDRH, load, MO_UW)
7255 DO_LDST(LDRSB, load, MO_SB)
7256 DO_LDST(LDRSH, load, MO_SW)
7257
7258 DO_LDST(STR, store, MO_UL)
7259 DO_LDST(STRB, store, MO_UB)
7260 DO_LDST(STRH, store, MO_UW)
7261
7262 #undef DO_LDST
7263
7264 /*
7265  * Synchronization primitives
7266  */
7267
7268 static bool op_swp(DisasContext *s, arg_SWP *a, MemOp opc)
7269 {
7270     TCGv_i32 addr, tmp;
7271     TCGv taddr;
7272
7273     opc |= s->be_data;
7274     addr = load_reg(s, a->rn);
7275     taddr = gen_aa32_addr(s, addr, opc);
7276     tcg_temp_free_i32(addr);
7277
7278     tmp = load_reg(s, a->rt2);
7279     tcg_gen_atomic_xchg_i32(tmp, taddr, tmp, get_mem_index(s), opc);
7280     tcg_temp_free(taddr);
7281
7282     store_reg(s, a->rt, tmp);
7283     return true;
7284 }
7285
7286 static bool trans_SWP(DisasContext *s, arg_SWP *a)
7287 {
7288     return op_swp(s, a, MO_UL | MO_ALIGN);
7289 }
7290
7291 static bool trans_SWPB(DisasContext *s, arg_SWP *a)
7292 {
7293     return op_swp(s, a, MO_UB);
7294 }
7295
7296 /*
7297  * Load/Store Exclusive and Load-Acquire/Store-Release
7298  */
7299
7300 static bool op_strex(DisasContext *s, arg_STREX *a, MemOp mop, bool rel)
7301 {
7302     TCGv_i32 addr;
7303     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
7304     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
7305
7306     /* We UNDEF for these UNPREDICTABLE cases.  */
7307     if (a->rd == 15 || a->rn == 15 || a->rt == 15
7308         || a->rd == a->rn || a->rd == a->rt
7309         || (!v8a && s->thumb && (a->rd == 13 || a->rt == 13))
7310         || (mop == MO_64
7311             && (a->rt2 == 15
7312                 || a->rd == a->rt2
7313                 || (!v8a && s->thumb && a->rt2 == 13)))) {
7314         unallocated_encoding(s);
7315         return true;
7316     }
7317
7318     if (rel) {
7319         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7320     }
7321
7322     addr = tcg_temp_local_new_i32();
7323     load_reg_var(s, addr, a->rn);
7324     tcg_gen_addi_i32(addr, addr, a->imm);
7325
7326     gen_store_exclusive(s, a->rd, a->rt, a->rt2, addr, mop);
7327     tcg_temp_free_i32(addr);
7328     return true;
7329 }
7330
7331 static bool trans_STREX(DisasContext *s, arg_STREX *a)
7332 {
7333     if (!ENABLE_ARCH_6) {
7334         return false;
7335     }
7336     return op_strex(s, a, MO_32, false);
7337 }
7338
7339 static bool trans_STREXD_a32(DisasContext *s, arg_STREX *a)
7340 {
7341     if (!ENABLE_ARCH_6K) {
7342         return false;
7343     }
7344     /* We UNDEF for these UNPREDICTABLE cases.  */
7345     if (a->rt & 1) {
7346         unallocated_encoding(s);
7347         return true;
7348     }
7349     a->rt2 = a->rt + 1;
7350     return op_strex(s, a, MO_64, false);
7351 }
7352
7353 static bool trans_STREXD_t32(DisasContext *s, arg_STREX *a)
7354 {
7355     return op_strex(s, a, MO_64, false);
7356 }
7357
7358 static bool trans_STREXB(DisasContext *s, arg_STREX *a)
7359 {
7360     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7361         return false;
7362     }
7363     return op_strex(s, a, MO_8, false);
7364 }
7365
7366 static bool trans_STREXH(DisasContext *s, arg_STREX *a)
7367 {
7368     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7369         return false;
7370     }
7371     return op_strex(s, a, MO_16, false);
7372 }
7373
7374 static bool trans_STLEX(DisasContext *s, arg_STREX *a)
7375 {
7376     if (!ENABLE_ARCH_8) {
7377         return false;
7378     }
7379     return op_strex(s, a, MO_32, true);
7380 }
7381
7382 static bool trans_STLEXD_a32(DisasContext *s, arg_STREX *a)
7383 {
7384     if (!ENABLE_ARCH_8) {
7385         return false;
7386     }
7387     /* We UNDEF for these UNPREDICTABLE cases.  */
7388     if (a->rt & 1) {
7389         unallocated_encoding(s);
7390         return true;
7391     }
7392     a->rt2 = a->rt + 1;
7393     return op_strex(s, a, MO_64, true);
7394 }
7395
7396 static bool trans_STLEXD_t32(DisasContext *s, arg_STREX *a)
7397 {
7398     if (!ENABLE_ARCH_8) {
7399         return false;
7400     }
7401     return op_strex(s, a, MO_64, true);
7402 }
7403
7404 static bool trans_STLEXB(DisasContext *s, arg_STREX *a)
7405 {
7406     if (!ENABLE_ARCH_8) {
7407         return false;
7408     }
7409     return op_strex(s, a, MO_8, true);
7410 }
7411
7412 static bool trans_STLEXH(DisasContext *s, arg_STREX *a)
7413 {
7414     if (!ENABLE_ARCH_8) {
7415         return false;
7416     }
7417     return op_strex(s, a, MO_16, true);
7418 }
7419
7420 static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop)
7421 {
7422     TCGv_i32 addr, tmp;
7423
7424     if (!ENABLE_ARCH_8) {
7425         return false;
7426     }
7427     /* We UNDEF for these UNPREDICTABLE cases.  */
7428     if (a->rn == 15 || a->rt == 15) {
7429         unallocated_encoding(s);
7430         return true;
7431     }
7432
7433     addr = load_reg(s, a->rn);
7434     tmp = load_reg(s, a->rt);
7435     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7436     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | s->be_data);
7437     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite);
7438
7439     tcg_temp_free_i32(tmp);
7440     tcg_temp_free_i32(addr);
7441     return true;
7442 }
7443
7444 static bool trans_STL(DisasContext *s, arg_STL *a)
7445 {
7446     return op_stl(s, a, MO_UL);
7447 }
7448
7449 static bool trans_STLB(DisasContext *s, arg_STL *a)
7450 {
7451     return op_stl(s, a, MO_UB);
7452 }
7453
7454 static bool trans_STLH(DisasContext *s, arg_STL *a)
7455 {
7456     return op_stl(s, a, MO_UW);
7457 }
7458
7459 static bool op_ldrex(DisasContext *s, arg_LDREX *a, MemOp mop, bool acq)
7460 {
7461     TCGv_i32 addr;
7462     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
7463     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
7464
7465     /* We UNDEF for these UNPREDICTABLE cases.  */
7466     if (a->rn == 15 || a->rt == 15
7467         || (!v8a && s->thumb && a->rt == 13)
7468         || (mop == MO_64
7469             && (a->rt2 == 15 || a->rt == a->rt2
7470                 || (!v8a && s->thumb && a->rt2 == 13)))) {
7471         unallocated_encoding(s);
7472         return true;
7473     }
7474
7475     addr = tcg_temp_local_new_i32();
7476     load_reg_var(s, addr, a->rn);
7477     tcg_gen_addi_i32(addr, addr, a->imm);
7478
7479     gen_load_exclusive(s, a->rt, a->rt2, addr, mop);
7480     tcg_temp_free_i32(addr);
7481
7482     if (acq) {
7483         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
7484     }
7485     return true;
7486 }
7487
7488 static bool trans_LDREX(DisasContext *s, arg_LDREX *a)
7489 {
7490     if (!ENABLE_ARCH_6) {
7491         return false;
7492     }
7493     return op_ldrex(s, a, MO_32, false);
7494 }
7495
7496 static bool trans_LDREXD_a32(DisasContext *s, arg_LDREX *a)
7497 {
7498     if (!ENABLE_ARCH_6K) {
7499         return false;
7500     }
7501     /* We UNDEF for these UNPREDICTABLE cases.  */
7502     if (a->rt & 1) {
7503         unallocated_encoding(s);
7504         return true;
7505     }
7506     a->rt2 = a->rt + 1;
7507     return op_ldrex(s, a, MO_64, false);
7508 }
7509
7510 static bool trans_LDREXD_t32(DisasContext *s, arg_LDREX *a)
7511 {
7512     return op_ldrex(s, a, MO_64, false);
7513 }
7514
7515 static bool trans_LDREXB(DisasContext *s, arg_LDREX *a)
7516 {
7517     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7518         return false;
7519     }
7520     return op_ldrex(s, a, MO_8, false);
7521 }
7522
7523 static bool trans_LDREXH(DisasContext *s, arg_LDREX *a)
7524 {
7525     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7526         return false;
7527     }
7528     return op_ldrex(s, a, MO_16, false);
7529 }
7530
7531 static bool trans_LDAEX(DisasContext *s, arg_LDREX *a)
7532 {
7533     if (!ENABLE_ARCH_8) {
7534         return false;
7535     }
7536     return op_ldrex(s, a, MO_32, true);
7537 }
7538
7539 static bool trans_LDAEXD_a32(DisasContext *s, arg_LDREX *a)
7540 {
7541     if (!ENABLE_ARCH_8) {
7542         return false;
7543     }
7544     /* We UNDEF for these UNPREDICTABLE cases.  */
7545     if (a->rt & 1) {
7546         unallocated_encoding(s);
7547         return true;
7548     }
7549     a->rt2 = a->rt + 1;
7550     return op_ldrex(s, a, MO_64, true);
7551 }
7552
7553 static bool trans_LDAEXD_t32(DisasContext *s, arg_LDREX *a)
7554 {
7555     if (!ENABLE_ARCH_8) {
7556         return false;
7557     }
7558     return op_ldrex(s, a, MO_64, true);
7559 }
7560
7561 static bool trans_LDAEXB(DisasContext *s, arg_LDREX *a)
7562 {
7563     if (!ENABLE_ARCH_8) {
7564         return false;
7565     }
7566     return op_ldrex(s, a, MO_8, true);
7567 }
7568
7569 static bool trans_LDAEXH(DisasContext *s, arg_LDREX *a)
7570 {
7571     if (!ENABLE_ARCH_8) {
7572         return false;
7573     }
7574     return op_ldrex(s, a, MO_16, true);
7575 }
7576
7577 static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop)
7578 {
7579     TCGv_i32 addr, tmp;
7580
7581     if (!ENABLE_ARCH_8) {
7582         return false;
7583     }
7584     /* We UNDEF for these UNPREDICTABLE cases.  */
7585     if (a->rn == 15 || a->rt == 15) {
7586         unallocated_encoding(s);
7587         return true;
7588     }
7589
7590     addr = load_reg(s, a->rn);
7591     tmp = tcg_temp_new_i32();
7592     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | s->be_data);
7593     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel);
7594     tcg_temp_free_i32(addr);
7595
7596     store_reg(s, a->rt, tmp);
7597     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7598     return true;
7599 }
7600
7601 static bool trans_LDA(DisasContext *s, arg_LDA *a)
7602 {
7603     return op_lda(s, a, MO_UL);
7604 }
7605
7606 static bool trans_LDAB(DisasContext *s, arg_LDA *a)
7607 {
7608     return op_lda(s, a, MO_UB);
7609 }
7610
7611 static bool trans_LDAH(DisasContext *s, arg_LDA *a)
7612 {
7613     return op_lda(s, a, MO_UW);
7614 }
7615
7616 /*
7617  * Media instructions
7618  */
7619
7620 static bool trans_USADA8(DisasContext *s, arg_USADA8 *a)
7621 {
7622     TCGv_i32 t1, t2;
7623
7624     if (!ENABLE_ARCH_6) {
7625         return false;
7626     }
7627
7628     t1 = load_reg(s, a->rn);
7629     t2 = load_reg(s, a->rm);
7630     gen_helper_usad8(t1, t1, t2);
7631     tcg_temp_free_i32(t2);
7632     if (a->ra != 15) {
7633         t2 = load_reg(s, a->ra);
7634         tcg_gen_add_i32(t1, t1, t2);
7635         tcg_temp_free_i32(t2);
7636     }
7637     store_reg(s, a->rd, t1);
7638     return true;
7639 }
7640
7641 static bool op_bfx(DisasContext *s, arg_UBFX *a, bool u)
7642 {
7643     TCGv_i32 tmp;
7644     int width = a->widthm1 + 1;
7645     int shift = a->lsb;
7646
7647     if (!ENABLE_ARCH_6T2) {
7648         return false;
7649     }
7650     if (shift + width > 32) {
7651         /* UNPREDICTABLE; we choose to UNDEF */
7652         unallocated_encoding(s);
7653         return true;
7654     }
7655
7656     tmp = load_reg(s, a->rn);
7657     if (u) {
7658         tcg_gen_extract_i32(tmp, tmp, shift, width);
7659     } else {
7660         tcg_gen_sextract_i32(tmp, tmp, shift, width);
7661     }
7662     store_reg(s, a->rd, tmp);
7663     return true;
7664 }
7665
7666 static bool trans_SBFX(DisasContext *s, arg_SBFX *a)
7667 {
7668     return op_bfx(s, a, false);
7669 }
7670
7671 static bool trans_UBFX(DisasContext *s, arg_UBFX *a)
7672 {
7673     return op_bfx(s, a, true);
7674 }
7675
7676 static bool trans_BFCI(DisasContext *s, arg_BFCI *a)
7677 {
7678     TCGv_i32 tmp;
7679     int msb = a->msb, lsb = a->lsb;
7680     int width;
7681
7682     if (!ENABLE_ARCH_6T2) {
7683         return false;
7684     }
7685     if (msb < lsb) {
7686         /* UNPREDICTABLE; we choose to UNDEF */
7687         unallocated_encoding(s);
7688         return true;
7689     }
7690
7691     width = msb + 1 - lsb;
7692     if (a->rn == 15) {
7693         /* BFC */
7694         tmp = tcg_const_i32(0);
7695     } else {
7696         /* BFI */
7697         tmp = load_reg(s, a->rn);
7698     }
7699     if (width != 32) {
7700         TCGv_i32 tmp2 = load_reg(s, a->rd);
7701         tcg_gen_deposit_i32(tmp, tmp2, tmp, lsb, width);
7702         tcg_temp_free_i32(tmp2);
7703     }
7704     store_reg(s, a->rd, tmp);
7705     return true;
7706 }
7707
7708 static bool trans_UDF(DisasContext *s, arg_UDF *a)
7709 {
7710     unallocated_encoding(s);
7711     return true;
7712 }
7713
7714 /*
7715  * Parallel addition and subtraction
7716  */
7717
7718 static bool op_par_addsub(DisasContext *s, arg_rrr *a,
7719                           void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
7720 {
7721     TCGv_i32 t0, t1;
7722
7723     if (s->thumb
7724         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7725         : !ENABLE_ARCH_6) {
7726         return false;
7727     }
7728
7729     t0 = load_reg(s, a->rn);
7730     t1 = load_reg(s, a->rm);
7731
7732     gen(t0, t0, t1);
7733
7734     tcg_temp_free_i32(t1);
7735     store_reg(s, a->rd, t0);
7736     return true;
7737 }
7738
7739 static bool op_par_addsub_ge(DisasContext *s, arg_rrr *a,
7740                              void (*gen)(TCGv_i32, TCGv_i32,
7741                                          TCGv_i32, TCGv_ptr))
7742 {
7743     TCGv_i32 t0, t1;
7744     TCGv_ptr ge;
7745
7746     if (s->thumb
7747         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7748         : !ENABLE_ARCH_6) {
7749         return false;
7750     }
7751
7752     t0 = load_reg(s, a->rn);
7753     t1 = load_reg(s, a->rm);
7754
7755     ge = tcg_temp_new_ptr();
7756     tcg_gen_addi_ptr(ge, cpu_env, offsetof(CPUARMState, GE));
7757     gen(t0, t0, t1, ge);
7758
7759     tcg_temp_free_ptr(ge);
7760     tcg_temp_free_i32(t1);
7761     store_reg(s, a->rd, t0);
7762     return true;
7763 }
7764
7765 #define DO_PAR_ADDSUB(NAME, helper) \
7766 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7767 {                                                       \
7768     return op_par_addsub(s, a, helper);                 \
7769 }
7770
7771 #define DO_PAR_ADDSUB_GE(NAME, helper) \
7772 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7773 {                                                       \
7774     return op_par_addsub_ge(s, a, helper);              \
7775 }
7776
7777 DO_PAR_ADDSUB_GE(SADD16, gen_helper_sadd16)
7778 DO_PAR_ADDSUB_GE(SASX, gen_helper_saddsubx)
7779 DO_PAR_ADDSUB_GE(SSAX, gen_helper_ssubaddx)
7780 DO_PAR_ADDSUB_GE(SSUB16, gen_helper_ssub16)
7781 DO_PAR_ADDSUB_GE(SADD8, gen_helper_sadd8)
7782 DO_PAR_ADDSUB_GE(SSUB8, gen_helper_ssub8)
7783
7784 DO_PAR_ADDSUB_GE(UADD16, gen_helper_uadd16)
7785 DO_PAR_ADDSUB_GE(UASX, gen_helper_uaddsubx)
7786 DO_PAR_ADDSUB_GE(USAX, gen_helper_usubaddx)
7787 DO_PAR_ADDSUB_GE(USUB16, gen_helper_usub16)
7788 DO_PAR_ADDSUB_GE(UADD8, gen_helper_uadd8)
7789 DO_PAR_ADDSUB_GE(USUB8, gen_helper_usub8)
7790
7791 DO_PAR_ADDSUB(QADD16, gen_helper_qadd16)
7792 DO_PAR_ADDSUB(QASX, gen_helper_qaddsubx)
7793 DO_PAR_ADDSUB(QSAX, gen_helper_qsubaddx)
7794 DO_PAR_ADDSUB(QSUB16, gen_helper_qsub16)
7795 DO_PAR_ADDSUB(QADD8, gen_helper_qadd8)
7796 DO_PAR_ADDSUB(QSUB8, gen_helper_qsub8)
7797
7798 DO_PAR_ADDSUB(UQADD16, gen_helper_uqadd16)
7799 DO_PAR_ADDSUB(UQASX, gen_helper_uqaddsubx)
7800 DO_PAR_ADDSUB(UQSAX, gen_helper_uqsubaddx)
7801 DO_PAR_ADDSUB(UQSUB16, gen_helper_uqsub16)
7802 DO_PAR_ADDSUB(UQADD8, gen_helper_uqadd8)
7803 DO_PAR_ADDSUB(UQSUB8, gen_helper_uqsub8)
7804
7805 DO_PAR_ADDSUB(SHADD16, gen_helper_shadd16)
7806 DO_PAR_ADDSUB(SHASX, gen_helper_shaddsubx)
7807 DO_PAR_ADDSUB(SHSAX, gen_helper_shsubaddx)
7808 DO_PAR_ADDSUB(SHSUB16, gen_helper_shsub16)
7809 DO_PAR_ADDSUB(SHADD8, gen_helper_shadd8)
7810 DO_PAR_ADDSUB(SHSUB8, gen_helper_shsub8)
7811
7812 DO_PAR_ADDSUB(UHADD16, gen_helper_uhadd16)
7813 DO_PAR_ADDSUB(UHASX, gen_helper_uhaddsubx)
7814 DO_PAR_ADDSUB(UHSAX, gen_helper_uhsubaddx)
7815 DO_PAR_ADDSUB(UHSUB16, gen_helper_uhsub16)
7816 DO_PAR_ADDSUB(UHADD8, gen_helper_uhadd8)
7817 DO_PAR_ADDSUB(UHSUB8, gen_helper_uhsub8)
7818
7819 #undef DO_PAR_ADDSUB
7820 #undef DO_PAR_ADDSUB_GE
7821
7822 /*
7823  * Packing, unpacking, saturation, and reversal
7824  */
7825
7826 static bool trans_PKH(DisasContext *s, arg_PKH *a)
7827 {
7828     TCGv_i32 tn, tm;
7829     int shift = a->imm;
7830
7831     if (s->thumb
7832         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7833         : !ENABLE_ARCH_6) {
7834         return false;
7835     }
7836
7837     tn = load_reg(s, a->rn);
7838     tm = load_reg(s, a->rm);
7839     if (a->tb) {
7840         /* PKHTB */
7841         if (shift == 0) {
7842             shift = 31;
7843         }
7844         tcg_gen_sari_i32(tm, tm, shift);
7845         tcg_gen_deposit_i32(tn, tn, tm, 0, 16);
7846     } else {
7847         /* PKHBT */
7848         tcg_gen_shli_i32(tm, tm, shift);
7849         tcg_gen_deposit_i32(tn, tm, tn, 0, 16);
7850     }
7851     tcg_temp_free_i32(tm);
7852     store_reg(s, a->rd, tn);
7853     return true;
7854 }
7855
7856 static bool op_sat(DisasContext *s, arg_sat *a,
7857                    void (*gen)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32))
7858 {
7859     TCGv_i32 tmp, satimm;
7860     int shift = a->imm;
7861
7862     if (!ENABLE_ARCH_6) {
7863         return false;
7864     }
7865
7866     tmp = load_reg(s, a->rn);
7867     if (a->sh) {
7868         tcg_gen_sari_i32(tmp, tmp, shift ? shift : 31);
7869     } else {
7870         tcg_gen_shli_i32(tmp, tmp, shift);
7871     }
7872
7873     satimm = tcg_const_i32(a->satimm);
7874     gen(tmp, cpu_env, tmp, satimm);
7875     tcg_temp_free_i32(satimm);
7876
7877     store_reg(s, a->rd, tmp);
7878     return true;
7879 }
7880
7881 static bool trans_SSAT(DisasContext *s, arg_sat *a)
7882 {
7883     return op_sat(s, a, gen_helper_ssat);
7884 }
7885
7886 static bool trans_USAT(DisasContext *s, arg_sat *a)
7887 {
7888     return op_sat(s, a, gen_helper_usat);
7889 }
7890
7891 static bool trans_SSAT16(DisasContext *s, arg_sat *a)
7892 {
7893     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7894         return false;
7895     }
7896     return op_sat(s, a, gen_helper_ssat16);
7897 }
7898
7899 static bool trans_USAT16(DisasContext *s, arg_sat *a)
7900 {
7901     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7902         return false;
7903     }
7904     return op_sat(s, a, gen_helper_usat16);
7905 }
7906
7907 static bool op_xta(DisasContext *s, arg_rrr_rot *a,
7908                    void (*gen_extract)(TCGv_i32, TCGv_i32),
7909                    void (*gen_add)(TCGv_i32, TCGv_i32, TCGv_i32))
7910 {
7911     TCGv_i32 tmp;
7912
7913     if (!ENABLE_ARCH_6) {
7914         return false;
7915     }
7916
7917     tmp = load_reg(s, a->rm);
7918     /*
7919      * TODO: In many cases we could do a shift instead of a rotate.
7920      * Combined with a simple extend, that becomes an extract.
7921      */
7922     tcg_gen_rotri_i32(tmp, tmp, a->rot * 8);
7923     gen_extract(tmp, tmp);
7924
7925     if (a->rn != 15) {
7926         TCGv_i32 tmp2 = load_reg(s, a->rn);
7927         gen_add(tmp, tmp, tmp2);
7928         tcg_temp_free_i32(tmp2);
7929     }
7930     store_reg(s, a->rd, tmp);
7931     return true;
7932 }
7933
7934 static bool trans_SXTAB(DisasContext *s, arg_rrr_rot *a)
7935 {
7936     return op_xta(s, a, tcg_gen_ext8s_i32, tcg_gen_add_i32);
7937 }
7938
7939 static bool trans_SXTAH(DisasContext *s, arg_rrr_rot *a)
7940 {
7941     return op_xta(s, a, tcg_gen_ext16s_i32, tcg_gen_add_i32);
7942 }
7943
7944 static bool trans_SXTAB16(DisasContext *s, arg_rrr_rot *a)
7945 {
7946     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7947         return false;
7948     }
7949     return op_xta(s, a, gen_helper_sxtb16, gen_add16);
7950 }
7951
7952 static bool trans_UXTAB(DisasContext *s, arg_rrr_rot *a)
7953 {
7954     return op_xta(s, a, tcg_gen_ext8u_i32, tcg_gen_add_i32);
7955 }
7956
7957 static bool trans_UXTAH(DisasContext *s, arg_rrr_rot *a)
7958 {
7959     return op_xta(s, a, tcg_gen_ext16u_i32, tcg_gen_add_i32);
7960 }
7961
7962 static bool trans_UXTAB16(DisasContext *s, arg_rrr_rot *a)
7963 {
7964     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7965         return false;
7966     }
7967     return op_xta(s, a, gen_helper_uxtb16, gen_add16);
7968 }
7969
7970 static bool trans_SEL(DisasContext *s, arg_rrr *a)
7971 {
7972     TCGv_i32 t1, t2, t3;
7973
7974     if (s->thumb
7975         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7976         : !ENABLE_ARCH_6) {
7977         return false;
7978     }
7979
7980     t1 = load_reg(s, a->rn);
7981     t2 = load_reg(s, a->rm);
7982     t3 = tcg_temp_new_i32();
7983     tcg_gen_ld_i32(t3, cpu_env, offsetof(CPUARMState, GE));
7984     gen_helper_sel_flags(t1, t3, t1, t2);
7985     tcg_temp_free_i32(t3);
7986     tcg_temp_free_i32(t2);
7987     store_reg(s, a->rd, t1);
7988     return true;
7989 }
7990
7991 static bool op_rr(DisasContext *s, arg_rr *a,
7992                   void (*gen)(TCGv_i32, TCGv_i32))
7993 {
7994     TCGv_i32 tmp;
7995
7996     tmp = load_reg(s, a->rm);
7997     gen(tmp, tmp);
7998     store_reg(s, a->rd, tmp);
7999     return true;
8000 }
8001
8002 static bool trans_REV(DisasContext *s, arg_rr *a)
8003 {
8004     if (!ENABLE_ARCH_6) {
8005         return false;
8006     }
8007     return op_rr(s, a, tcg_gen_bswap32_i32);
8008 }
8009
8010 static bool trans_REV16(DisasContext *s, arg_rr *a)
8011 {
8012     if (!ENABLE_ARCH_6) {
8013         return false;
8014     }
8015     return op_rr(s, a, gen_rev16);
8016 }
8017
8018 static bool trans_REVSH(DisasContext *s, arg_rr *a)
8019 {
8020     if (!ENABLE_ARCH_6) {
8021         return false;
8022     }
8023     return op_rr(s, a, gen_revsh);
8024 }
8025
8026 static bool trans_RBIT(DisasContext *s, arg_rr *a)
8027 {
8028     if (!ENABLE_ARCH_6T2) {
8029         return false;
8030     }
8031     return op_rr(s, a, gen_helper_rbit);
8032 }
8033
8034 /*
8035  * Signed multiply, signed and unsigned divide
8036  */
8037
8038 static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
8039 {
8040     TCGv_i32 t1, t2;
8041
8042     if (!ENABLE_ARCH_6) {
8043         return false;
8044     }
8045
8046     t1 = load_reg(s, a->rn);
8047     t2 = load_reg(s, a->rm);
8048     if (m_swap) {
8049         gen_swap_half(t2);
8050     }
8051     gen_smul_dual(t1, t2);
8052
8053     if (sub) {
8054         /* This subtraction cannot overflow. */
8055         tcg_gen_sub_i32(t1, t1, t2);
8056     } else {
8057         /*
8058          * This addition cannot overflow 32 bits; however it may
8059          * overflow considered as a signed operation, in which case
8060          * we must set the Q flag.
8061          */
8062         gen_helper_add_setq(t1, cpu_env, t1, t2);
8063     }
8064     tcg_temp_free_i32(t2);
8065
8066     if (a->ra != 15) {
8067         t2 = load_reg(s, a->ra);
8068         gen_helper_add_setq(t1, cpu_env, t1, t2);
8069         tcg_temp_free_i32(t2);
8070     }
8071     store_reg(s, a->rd, t1);
8072     return true;
8073 }
8074
8075 static bool trans_SMLAD(DisasContext *s, arg_rrrr *a)
8076 {
8077     return op_smlad(s, a, false, false);
8078 }
8079
8080 static bool trans_SMLADX(DisasContext *s, arg_rrrr *a)
8081 {
8082     return op_smlad(s, a, true, false);
8083 }
8084
8085 static bool trans_SMLSD(DisasContext *s, arg_rrrr *a)
8086 {
8087     return op_smlad(s, a, false, true);
8088 }
8089
8090 static bool trans_SMLSDX(DisasContext *s, arg_rrrr *a)
8091 {
8092     return op_smlad(s, a, true, true);
8093 }
8094
8095 static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
8096 {
8097     TCGv_i32 t1, t2;
8098     TCGv_i64 l1, l2;
8099
8100     if (!ENABLE_ARCH_6) {
8101         return false;
8102     }
8103
8104     t1 = load_reg(s, a->rn);
8105     t2 = load_reg(s, a->rm);
8106     if (m_swap) {
8107         gen_swap_half(t2);
8108     }
8109     gen_smul_dual(t1, t2);
8110
8111     l1 = tcg_temp_new_i64();
8112     l2 = tcg_temp_new_i64();
8113     tcg_gen_ext_i32_i64(l1, t1);
8114     tcg_gen_ext_i32_i64(l2, t2);
8115     tcg_temp_free_i32(t1);
8116     tcg_temp_free_i32(t2);
8117
8118     if (sub) {
8119         tcg_gen_sub_i64(l1, l1, l2);
8120     } else {
8121         tcg_gen_add_i64(l1, l1, l2);
8122     }
8123     tcg_temp_free_i64(l2);
8124
8125     gen_addq(s, l1, a->ra, a->rd);
8126     gen_storeq_reg(s, a->ra, a->rd, l1);
8127     tcg_temp_free_i64(l1);
8128     return true;
8129 }
8130
8131 static bool trans_SMLALD(DisasContext *s, arg_rrrr *a)
8132 {
8133     return op_smlald(s, a, false, false);
8134 }
8135
8136 static bool trans_SMLALDX(DisasContext *s, arg_rrrr *a)
8137 {
8138     return op_smlald(s, a, true, false);
8139 }
8140
8141 static bool trans_SMLSLD(DisasContext *s, arg_rrrr *a)
8142 {
8143     return op_smlald(s, a, false, true);
8144 }
8145
8146 static bool trans_SMLSLDX(DisasContext *s, arg_rrrr *a)
8147 {
8148     return op_smlald(s, a, true, true);
8149 }
8150
8151 static bool op_smmla(DisasContext *s, arg_rrrr *a, bool round, bool sub)
8152 {
8153     TCGv_i32 t1, t2;
8154
8155     if (s->thumb
8156         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
8157         : !ENABLE_ARCH_6) {
8158         return false;
8159     }
8160
8161     t1 = load_reg(s, a->rn);
8162     t2 = load_reg(s, a->rm);
8163     tcg_gen_muls2_i32(t2, t1, t1, t2);
8164
8165     if (a->ra != 15) {
8166         TCGv_i32 t3 = load_reg(s, a->ra);
8167         if (sub) {
8168             /*
8169              * For SMMLS, we need a 64-bit subtract.  Borrow caused by
8170              * a non-zero multiplicand lowpart, and the correct result
8171              * lowpart for rounding.
8172              */
8173             TCGv_i32 zero = tcg_const_i32(0);
8174             tcg_gen_sub2_i32(t2, t1, zero, t3, t2, t1);
8175             tcg_temp_free_i32(zero);
8176         } else {
8177             tcg_gen_add_i32(t1, t1, t3);
8178         }
8179         tcg_temp_free_i32(t3);
8180     }
8181     if (round) {
8182         /*
8183          * Adding 0x80000000 to the 64-bit quantity means that we have
8184          * carry in to the high word when the low word has the msb set.
8185          */
8186         tcg_gen_shri_i32(t2, t2, 31);
8187         tcg_gen_add_i32(t1, t1, t2);
8188     }
8189     tcg_temp_free_i32(t2);
8190     store_reg(s, a->rd, t1);
8191     return true;
8192 }
8193
8194 static bool trans_SMMLA(DisasContext *s, arg_rrrr *a)
8195 {
8196     return op_smmla(s, a, false, false);
8197 }
8198
8199 static bool trans_SMMLAR(DisasContext *s, arg_rrrr *a)
8200 {
8201     return op_smmla(s, a, true, false);
8202 }
8203
8204 static bool trans_SMMLS(DisasContext *s, arg_rrrr *a)
8205 {
8206     return op_smmla(s, a, false, true);
8207 }
8208
8209 static bool trans_SMMLSR(DisasContext *s, arg_rrrr *a)
8210 {
8211     return op_smmla(s, a, true, true);
8212 }
8213
8214 static bool op_div(DisasContext *s, arg_rrr *a, bool u)
8215 {
8216     TCGv_i32 t1, t2;
8217
8218     if (s->thumb
8219         ? !dc_isar_feature(aa32_thumb_div, s)
8220         : !dc_isar_feature(aa32_arm_div, s)) {
8221         return false;
8222     }
8223
8224     t1 = load_reg(s, a->rn);
8225     t2 = load_reg(s, a->rm);
8226     if (u) {
8227         gen_helper_udiv(t1, t1, t2);
8228     } else {
8229         gen_helper_sdiv(t1, t1, t2);
8230     }
8231     tcg_temp_free_i32(t2);
8232     store_reg(s, a->rd, t1);
8233     return true;
8234 }
8235
8236 static bool trans_SDIV(DisasContext *s, arg_rrr *a)
8237 {
8238     return op_div(s, a, false);
8239 }
8240
8241 static bool trans_UDIV(DisasContext *s, arg_rrr *a)
8242 {
8243     return op_div(s, a, true);
8244 }
8245
8246 /*
8247  * Block data transfer
8248  */
8249
8250 static TCGv_i32 op_addr_block_pre(DisasContext *s, arg_ldst_block *a, int n)
8251 {
8252     TCGv_i32 addr = load_reg(s, a->rn);
8253
8254     if (a->b) {
8255         if (a->i) {
8256             /* pre increment */
8257             tcg_gen_addi_i32(addr, addr, 4);
8258         } else {
8259             /* pre decrement */
8260             tcg_gen_addi_i32(addr, addr, -(n * 4));
8261         }
8262     } else if (!a->i && n != 1) {
8263         /* post decrement */
8264         tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
8265     }
8266
8267     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
8268         /*
8269          * If the writeback is incrementing SP rather than
8270          * decrementing it, and the initial SP is below the
8271          * stack limit but the final written-back SP would
8272          * be above, then then we must not perform any memory
8273          * accesses, but it is IMPDEF whether we generate
8274          * an exception. We choose to do so in this case.
8275          * At this point 'addr' is the lowest address, so
8276          * either the original SP (if incrementing) or our
8277          * final SP (if decrementing), so that's what we check.
8278          */
8279         gen_helper_v8m_stackcheck(cpu_env, addr);
8280     }
8281
8282     return addr;
8283 }
8284
8285 static void op_addr_block_post(DisasContext *s, arg_ldst_block *a,
8286                                TCGv_i32 addr, int n)
8287 {
8288     if (a->w) {
8289         /* write back */
8290         if (!a->b) {
8291             if (a->i) {
8292                 /* post increment */
8293                 tcg_gen_addi_i32(addr, addr, 4);
8294             } else {
8295                 /* post decrement */
8296                 tcg_gen_addi_i32(addr, addr, -(n * 4));
8297             }
8298         } else if (!a->i && n != 1) {
8299             /* pre decrement */
8300             tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
8301         }
8302         store_reg(s, a->rn, addr);
8303     } else {
8304         tcg_temp_free_i32(addr);
8305     }
8306 }
8307
8308 static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n)
8309 {
8310     int i, j, n, list, mem_idx;
8311     bool user = a->u;
8312     TCGv_i32 addr, tmp, tmp2;
8313
8314     if (user) {
8315         /* STM (user) */
8316         if (IS_USER(s)) {
8317             /* Only usable in supervisor mode.  */
8318             unallocated_encoding(s);
8319             return true;
8320         }
8321     }
8322
8323     list = a->list;
8324     n = ctpop16(list);
8325     if (n < min_n || a->rn == 15) {
8326         unallocated_encoding(s);
8327         return true;
8328     }
8329
8330     addr = op_addr_block_pre(s, a, n);
8331     mem_idx = get_mem_index(s);
8332
8333     for (i = j = 0; i < 16; i++) {
8334         if (!(list & (1 << i))) {
8335             continue;
8336         }
8337
8338         if (user && i != 15) {
8339             tmp = tcg_temp_new_i32();
8340             tmp2 = tcg_const_i32(i);
8341             gen_helper_get_user_reg(tmp, cpu_env, tmp2);
8342             tcg_temp_free_i32(tmp2);
8343         } else {
8344             tmp = load_reg(s, i);
8345         }
8346         gen_aa32_st32(s, tmp, addr, mem_idx);
8347         tcg_temp_free_i32(tmp);
8348
8349         /* No need to add after the last transfer.  */
8350         if (++j != n) {
8351             tcg_gen_addi_i32(addr, addr, 4);
8352         }
8353     }
8354
8355     op_addr_block_post(s, a, addr, n);
8356     return true;
8357 }
8358
8359 static bool trans_STM(DisasContext *s, arg_ldst_block *a)
8360 {
8361     /* BitCount(list) < 1 is UNPREDICTABLE */
8362     return op_stm(s, a, 1);
8363 }
8364
8365 static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a)
8366 {
8367     /* Writeback register in register list is UNPREDICTABLE for T32.  */
8368     if (a->w && (a->list & (1 << a->rn))) {
8369         unallocated_encoding(s);
8370         return true;
8371     }
8372     /* BitCount(list) < 2 is UNPREDICTABLE */
8373     return op_stm(s, a, 2);
8374 }
8375
8376 static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n)
8377 {
8378     int i, j, n, list, mem_idx;
8379     bool loaded_base;
8380     bool user = a->u;
8381     bool exc_return = false;
8382     TCGv_i32 addr, tmp, tmp2, loaded_var;
8383
8384     if (user) {
8385         /* LDM (user), LDM (exception return) */
8386         if (IS_USER(s)) {
8387             /* Only usable in supervisor mode.  */
8388             unallocated_encoding(s);
8389             return true;
8390         }
8391         if (extract32(a->list, 15, 1)) {
8392             exc_return = true;
8393             user = false;
8394         } else {
8395             /* LDM (user) does not allow writeback.  */
8396             if (a->w) {
8397                 unallocated_encoding(s);
8398                 return true;
8399             }
8400         }
8401     }
8402
8403     list = a->list;
8404     n = ctpop16(list);
8405     if (n < min_n || a->rn == 15) {
8406         unallocated_encoding(s);
8407         return true;
8408     }
8409
8410     addr = op_addr_block_pre(s, a, n);
8411     mem_idx = get_mem_index(s);
8412     loaded_base = false;
8413     loaded_var = NULL;
8414
8415     for (i = j = 0; i < 16; i++) {
8416         if (!(list & (1 << i))) {
8417             continue;
8418         }
8419
8420         tmp = tcg_temp_new_i32();
8421         gen_aa32_ld32u(s, tmp, addr, mem_idx);
8422         if (user) {
8423             tmp2 = tcg_const_i32(i);
8424             gen_helper_set_user_reg(cpu_env, tmp2, tmp);
8425             tcg_temp_free_i32(tmp2);
8426             tcg_temp_free_i32(tmp);
8427         } else if (i == a->rn) {
8428             loaded_var = tmp;
8429             loaded_base = true;
8430         } else if (i == 15 && exc_return) {
8431             store_pc_exc_ret(s, tmp);
8432         } else {
8433             store_reg_from_load(s, i, tmp);
8434         }
8435
8436         /* No need to add after the last transfer.  */
8437         if (++j != n) {
8438             tcg_gen_addi_i32(addr, addr, 4);
8439         }
8440     }
8441
8442     op_addr_block_post(s, a, addr, n);
8443
8444     if (loaded_base) {
8445         /* Note that we reject base == pc above.  */
8446         store_reg(s, a->rn, loaded_var);
8447     }
8448
8449     if (exc_return) {
8450         /* Restore CPSR from SPSR.  */
8451         tmp = load_cpu_field(spsr);
8452         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8453             gen_io_start();
8454         }
8455         gen_helper_cpsr_write_eret(cpu_env, tmp);
8456         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8457             gen_io_end();
8458         }
8459         tcg_temp_free_i32(tmp);
8460         /* Must exit loop to check un-masked IRQs */
8461         s->base.is_jmp = DISAS_EXIT;
8462     }
8463     return true;
8464 }
8465
8466 static bool trans_LDM_a32(DisasContext *s, arg_ldst_block *a)
8467 {
8468     /*
8469      * Writeback register in register list is UNPREDICTABLE
8470      * for ArchVersion() >= 7.  Prior to v7, A32 would write
8471      * an UNKNOWN value to the base register.
8472      */
8473     if (ENABLE_ARCH_7 && a->w && (a->list & (1 << a->rn))) {
8474         unallocated_encoding(s);
8475         return true;
8476     }
8477     /* BitCount(list) < 1 is UNPREDICTABLE */
8478     return do_ldm(s, a, 1);
8479 }
8480
8481 static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a)
8482 {
8483     /* Writeback register in register list is UNPREDICTABLE for T32. */
8484     if (a->w && (a->list & (1 << a->rn))) {
8485         unallocated_encoding(s);
8486         return true;
8487     }
8488     /* BitCount(list) < 2 is UNPREDICTABLE */
8489     return do_ldm(s, a, 2);
8490 }
8491
8492 static bool trans_LDM_t16(DisasContext *s, arg_ldst_block *a)
8493 {
8494     /* Writeback is conditional on the base register not being loaded.  */
8495     a->w = !(a->list & (1 << a->rn));
8496     /* BitCount(list) < 1 is UNPREDICTABLE */
8497     return do_ldm(s, a, 1);
8498 }
8499
8500 /*
8501  * Branch, branch with link
8502  */
8503
8504 static bool trans_B(DisasContext *s, arg_i *a)
8505 {
8506     gen_jmp(s, read_pc(s) + a->imm);
8507     return true;
8508 }
8509
8510 static bool trans_B_cond_thumb(DisasContext *s, arg_ci *a)
8511 {
8512     /* This has cond from encoding, required to be outside IT block.  */
8513     if (a->cond >= 0xe) {
8514         return false;
8515     }
8516     if (s->condexec_mask) {
8517         unallocated_encoding(s);
8518         return true;
8519     }
8520     arm_skip_unless(s, a->cond);
8521     gen_jmp(s, read_pc(s) + a->imm);
8522     return true;
8523 }
8524
8525 static bool trans_BL(DisasContext *s, arg_i *a)
8526 {
8527     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
8528     gen_jmp(s, read_pc(s) + a->imm);
8529     return true;
8530 }
8531
8532 static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
8533 {
8534     TCGv_i32 tmp;
8535
8536     /* For A32, ARCH(5) is checked near the start of the uncond block. */
8537     if (s->thumb && (a->imm & 2)) {
8538         return false;
8539     }
8540     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
8541     tmp = tcg_const_i32(!s->thumb);
8542     store_cpu_field(tmp, thumb);
8543     gen_jmp(s, (read_pc(s) & ~3) + a->imm);
8544     return true;
8545 }
8546
8547 static bool trans_BL_BLX_prefix(DisasContext *s, arg_BL_BLX_prefix *a)
8548 {
8549     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8550     tcg_gen_movi_i32(cpu_R[14], read_pc(s) + (a->imm << 12));
8551     return true;
8552 }
8553
8554 static bool trans_BL_suffix(DisasContext *s, arg_BL_suffix *a)
8555 {
8556     TCGv_i32 tmp = tcg_temp_new_i32();
8557
8558     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8559     tcg_gen_addi_i32(tmp, cpu_R[14], (a->imm << 1) | 1);
8560     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1);
8561     gen_bx(s, tmp);
8562     return true;
8563 }
8564
8565 static bool trans_BLX_suffix(DisasContext *s, arg_BLX_suffix *a)
8566 {
8567     TCGv_i32 tmp;
8568
8569     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8570     if (!ENABLE_ARCH_5) {
8571         return false;
8572     }
8573     tmp = tcg_temp_new_i32();
8574     tcg_gen_addi_i32(tmp, cpu_R[14], a->imm << 1);
8575     tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
8576     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1);
8577     gen_bx(s, tmp);
8578     return true;
8579 }
8580
8581 static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
8582 {
8583     TCGv_i32 addr, tmp;
8584
8585     tmp = load_reg(s, a->rm);
8586     if (half) {
8587         tcg_gen_add_i32(tmp, tmp, tmp);
8588     }
8589     addr = load_reg(s, a->rn);
8590     tcg_gen_add_i32(addr, addr, tmp);
8591
8592     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
8593                     half ? MO_UW | s->be_data : MO_UB);
8594     tcg_temp_free_i32(addr);
8595
8596     tcg_gen_add_i32(tmp, tmp, tmp);
8597     tcg_gen_addi_i32(tmp, tmp, read_pc(s));
8598     store_reg(s, 15, tmp);
8599     return true;
8600 }
8601
8602 static bool trans_TBB(DisasContext *s, arg_tbranch *a)
8603 {
8604     return op_tbranch(s, a, false);
8605 }
8606
8607 static bool trans_TBH(DisasContext *s, arg_tbranch *a)
8608 {
8609     return op_tbranch(s, a, true);
8610 }
8611
8612 static bool trans_CBZ(DisasContext *s, arg_CBZ *a)
8613 {
8614     TCGv_i32 tmp = load_reg(s, a->rn);
8615
8616     arm_gen_condlabel(s);
8617     tcg_gen_brcondi_i32(a->nz ? TCG_COND_EQ : TCG_COND_NE,
8618                         tmp, 0, s->condlabel);
8619     tcg_temp_free_i32(tmp);
8620     gen_jmp(s, read_pc(s) + a->imm);
8621     return true;
8622 }
8623
8624 /*
8625  * Supervisor call - both T32 & A32 come here so we need to check
8626  * which mode we are in when checking for semihosting.
8627  */
8628
8629 static bool trans_SVC(DisasContext *s, arg_SVC *a)
8630 {
8631     const uint32_t semihost_imm = s->thumb ? 0xab : 0x123456;
8632
8633     if (!arm_dc_feature(s, ARM_FEATURE_M) && semihosting_enabled() &&
8634 #ifndef CONFIG_USER_ONLY
8635         !IS_USER(s) &&
8636 #endif
8637         (a->imm == semihost_imm)) {
8638         gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
8639     } else {
8640         gen_set_pc_im(s, s->base.pc_next);
8641         s->svc_imm = a->imm;
8642         s->base.is_jmp = DISAS_SWI;
8643     }
8644     return true;
8645 }
8646
8647 /*
8648  * Unconditional system instructions
8649  */
8650
8651 static bool trans_RFE(DisasContext *s, arg_RFE *a)
8652 {
8653     static const int8_t pre_offset[4] = {
8654         /* DA */ -4, /* IA */ 0, /* DB */ -8, /* IB */ 4
8655     };
8656     static const int8_t post_offset[4] = {
8657         /* DA */ -8, /* IA */ 4, /* DB */ -4, /* IB */ 0
8658     };
8659     TCGv_i32 addr, t1, t2;
8660
8661     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8662         return false;
8663     }
8664     if (IS_USER(s)) {
8665         unallocated_encoding(s);
8666         return true;
8667     }
8668
8669     addr = load_reg(s, a->rn);
8670     tcg_gen_addi_i32(addr, addr, pre_offset[a->pu]);
8671
8672     /* Load PC into tmp and CPSR into tmp2.  */
8673     t1 = tcg_temp_new_i32();
8674     gen_aa32_ld32u(s, t1, addr, get_mem_index(s));
8675     tcg_gen_addi_i32(addr, addr, 4);
8676     t2 = tcg_temp_new_i32();
8677     gen_aa32_ld32u(s, t2, addr, get_mem_index(s));
8678
8679     if (a->w) {
8680         /* Base writeback.  */
8681         tcg_gen_addi_i32(addr, addr, post_offset[a->pu]);
8682         store_reg(s, a->rn, addr);
8683     } else {
8684         tcg_temp_free_i32(addr);
8685     }
8686     gen_rfe(s, t1, t2);
8687     return true;
8688 }
8689
8690 static bool trans_SRS(DisasContext *s, arg_SRS *a)
8691 {
8692     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8693         return false;
8694     }
8695     gen_srs(s, a->mode, a->pu, a->w);
8696     return true;
8697 }
8698
8699 static bool trans_CPS(DisasContext *s, arg_CPS *a)
8700 {
8701     uint32_t mask, val;
8702
8703     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8704         return false;
8705     }
8706     if (IS_USER(s)) {
8707         /* Implemented as NOP in user mode.  */
8708         return true;
8709     }
8710     /* TODO: There are quite a lot of UNPREDICTABLE argument combinations. */
8711
8712     mask = val = 0;
8713     if (a->imod & 2) {
8714         if (a->A) {
8715             mask |= CPSR_A;
8716         }
8717         if (a->I) {
8718             mask |= CPSR_I;
8719         }
8720         if (a->F) {
8721             mask |= CPSR_F;
8722         }
8723         if (a->imod & 1) {
8724             val |= mask;
8725         }
8726     }
8727     if (a->M) {
8728         mask |= CPSR_M;
8729         val |= a->mode;
8730     }
8731     if (mask) {
8732         gen_set_psr_im(s, mask, 0, val);
8733     }
8734     return true;
8735 }
8736
8737 static bool trans_CPS_v7m(DisasContext *s, arg_CPS_v7m *a)
8738 {
8739     TCGv_i32 tmp, addr, el;
8740
8741     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8742         return false;
8743     }
8744     if (IS_USER(s)) {
8745         /* Implemented as NOP in user mode.  */
8746         return true;
8747     }
8748
8749     tmp = tcg_const_i32(a->im);
8750     /* FAULTMASK */
8751     if (a->F) {
8752         addr = tcg_const_i32(19);
8753         gen_helper_v7m_msr(cpu_env, addr, tmp);
8754         tcg_temp_free_i32(addr);
8755     }
8756     /* PRIMASK */
8757     if (a->I) {
8758         addr = tcg_const_i32(16);
8759         gen_helper_v7m_msr(cpu_env, addr, tmp);
8760         tcg_temp_free_i32(addr);
8761     }
8762     el = tcg_const_i32(s->current_el);
8763     gen_helper_rebuild_hflags_m32(cpu_env, el);
8764     tcg_temp_free_i32(el);
8765     tcg_temp_free_i32(tmp);
8766     gen_lookup_tb(s);
8767     return true;
8768 }
8769
8770 /*
8771  * Clear-Exclusive, Barriers
8772  */
8773
8774 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
8775 {
8776     if (s->thumb
8777         ? !ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)
8778         : !ENABLE_ARCH_6K) {
8779         return false;
8780     }
8781     gen_clrex(s);
8782     return true;
8783 }
8784
8785 static bool trans_DSB(DisasContext *s, arg_DSB *a)
8786 {
8787     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8788         return false;
8789     }
8790     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8791     return true;
8792 }
8793
8794 static bool trans_DMB(DisasContext *s, arg_DMB *a)
8795 {
8796     return trans_DSB(s, NULL);
8797 }
8798
8799 static bool trans_ISB(DisasContext *s, arg_ISB *a)
8800 {
8801     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8802         return false;
8803     }
8804     /*
8805      * We need to break the TB after this insn to execute
8806      * self-modifying code correctly and also to take
8807      * any pending interrupts immediately.
8808      */
8809     gen_goto_tb(s, 0, s->base.pc_next);
8810     return true;
8811 }
8812
8813 static bool trans_SB(DisasContext *s, arg_SB *a)
8814 {
8815     if (!dc_isar_feature(aa32_sb, s)) {
8816         return false;
8817     }
8818     /*
8819      * TODO: There is no speculation barrier opcode
8820      * for TCG; MB and end the TB instead.
8821      */
8822     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8823     gen_goto_tb(s, 0, s->base.pc_next);
8824     return true;
8825 }
8826
8827 static bool trans_SETEND(DisasContext *s, arg_SETEND *a)
8828 {
8829     if (!ENABLE_ARCH_6) {
8830         return false;
8831     }
8832     if (a->E != (s->be_data == MO_BE)) {
8833         gen_helper_setend(cpu_env);
8834         s->base.is_jmp = DISAS_UPDATE;
8835     }
8836     return true;
8837 }
8838
8839 /*
8840  * Preload instructions
8841  * All are nops, contingent on the appropriate arch level.
8842  */
8843
8844 static bool trans_PLD(DisasContext *s, arg_PLD *a)
8845 {
8846     return ENABLE_ARCH_5TE;
8847 }
8848
8849 static bool trans_PLDW(DisasContext *s, arg_PLD *a)
8850 {
8851     return arm_dc_feature(s, ARM_FEATURE_V7MP);
8852 }
8853
8854 static bool trans_PLI(DisasContext *s, arg_PLD *a)
8855 {
8856     return ENABLE_ARCH_7;
8857 }
8858
8859 /*
8860  * If-then
8861  */
8862
8863 static bool trans_IT(DisasContext *s, arg_IT *a)
8864 {
8865     int cond_mask = a->cond_mask;
8866
8867     /*
8868      * No actual code generated for this insn, just setup state.
8869      *
8870      * Combinations of firstcond and mask which set up an 0b1111
8871      * condition are UNPREDICTABLE; we take the CONSTRAINED
8872      * UNPREDICTABLE choice to treat 0b1111 the same as 0b1110,
8873      * i.e. both meaning "execute always".
8874      */
8875     s->condexec_cond = (cond_mask >> 4) & 0xe;
8876     s->condexec_mask = cond_mask & 0x1f;
8877     return true;
8878 }
8879
8880 /*
8881  * Legacy decoder.
8882  */
8883
8884 static void disas_arm_insn(DisasContext *s, unsigned int insn)
8885 {
8886     unsigned int cond = insn >> 28;
8887
8888     /* M variants do not implement ARM mode; this must raise the INVSTATE
8889      * UsageFault exception.
8890      */
8891     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8892         gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
8893                            default_exception_el(s));
8894         return;
8895     }
8896
8897     if (cond == 0xf) {
8898         /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
8899          * choose to UNDEF. In ARMv5 and above the space is used
8900          * for miscellaneous unconditional instructions.
8901          */
8902         ARCH(5);
8903
8904         /* Unconditional instructions.  */
8905         /* TODO: Perhaps merge these into one decodetree output file.  */
8906         if (disas_a32_uncond(s, insn) ||
8907             disas_vfp_uncond(s, insn) ||
8908             disas_neon_dp(s, insn) ||
8909             disas_neon_ls(s, insn) ||
8910             disas_neon_shared(s, insn)) {
8911             return;
8912         }
8913         /* fall back to legacy decoder */
8914
8915         if (((insn >> 25) & 7) == 1) {
8916             /* NEON Data processing.  */
8917             if (disas_neon_data_insn(s, insn)) {
8918                 goto illegal_op;
8919             }
8920             return;
8921         }
8922         if ((insn & 0x0e000f00) == 0x0c000100) {
8923             if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
8924                 /* iWMMXt register transfer.  */
8925                 if (extract32(s->c15_cpar, 1, 1)) {
8926                     if (!disas_iwmmxt_insn(s, insn)) {
8927                         return;
8928                     }
8929                 }
8930             }
8931         }
8932         goto illegal_op;
8933     }
8934     if (cond != 0xe) {
8935         /* if not always execute, we generate a conditional jump to
8936            next instruction */
8937         arm_skip_unless(s, cond);
8938     }
8939
8940     /* TODO: Perhaps merge these into one decodetree output file.  */
8941     if (disas_a32(s, insn) ||
8942         disas_vfp(s, insn)) {
8943         return;
8944     }
8945     /* fall back to legacy decoder */
8946
8947     switch ((insn >> 24) & 0xf) {
8948     case 0xc:
8949     case 0xd:
8950     case 0xe:
8951         if (((insn >> 8) & 0xe) == 10) {
8952             /* VFP, but failed disas_vfp.  */
8953             goto illegal_op;
8954         }
8955         if (disas_coproc_insn(s, insn)) {
8956             /* Coprocessor.  */
8957             goto illegal_op;
8958         }
8959         break;
8960     default:
8961     illegal_op:
8962         unallocated_encoding(s);
8963         break;
8964     }
8965 }
8966
8967 static bool thumb_insn_is_16bit(DisasContext *s, uint32_t pc, uint32_t insn)
8968 {
8969     /*
8970      * Return true if this is a 16 bit instruction. We must be precise
8971      * about this (matching the decode).
8972      */
8973     if ((insn >> 11) < 0x1d) {
8974         /* Definitely a 16-bit instruction */
8975         return true;
8976     }
8977
8978     /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
8979      * first half of a 32-bit Thumb insn. Thumb-1 cores might
8980      * end up actually treating this as two 16-bit insns, though,
8981      * if it's half of a bl/blx pair that might span a page boundary.
8982      */
8983     if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
8984         arm_dc_feature(s, ARM_FEATURE_M)) {
8985         /* Thumb2 cores (including all M profile ones) always treat
8986          * 32-bit insns as 32-bit.
8987          */
8988         return false;
8989     }
8990
8991     if ((insn >> 11) == 0x1e && pc - s->page_start < TARGET_PAGE_SIZE - 3) {
8992         /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
8993          * is not on the next page; we merge this into a 32-bit
8994          * insn.
8995          */
8996         return false;
8997     }
8998     /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
8999      * 0b1111_1xxx_xxxx_xxxx : BL suffix;
9000      * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
9001      *  -- handle as single 16 bit insn
9002      */
9003     return true;
9004 }
9005
9006 /* Translate a 32-bit thumb instruction. */
9007 static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
9008 {
9009     /*
9010      * ARMv6-M supports a limited subset of Thumb2 instructions.
9011      * Other Thumb1 architectures allow only 32-bit
9012      * combined BL/BLX prefix and suffix.
9013      */
9014     if (arm_dc_feature(s, ARM_FEATURE_M) &&
9015         !arm_dc_feature(s, ARM_FEATURE_V7)) {
9016         int i;
9017         bool found = false;
9018         static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
9019                                                0xf3b08040 /* dsb */,
9020                                                0xf3b08050 /* dmb */,
9021                                                0xf3b08060 /* isb */,
9022                                                0xf3e08000 /* mrs */,
9023                                                0xf000d000 /* bl */};
9024         static const uint32_t armv6m_mask[] = {0xffe0d000,
9025                                                0xfff0d0f0,
9026                                                0xfff0d0f0,
9027                                                0xfff0d0f0,
9028                                                0xffe0d000,
9029                                                0xf800d000};
9030
9031         for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
9032             if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
9033                 found = true;
9034                 break;
9035             }
9036         }
9037         if (!found) {
9038             goto illegal_op;
9039         }
9040     } else if ((insn & 0xf800e800) != 0xf000e800)  {
9041         ARCH(6T2);
9042     }
9043
9044     if ((insn & 0xef000000) == 0xef000000) {
9045         /*
9046          * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9047          * transform into
9048          * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9049          */
9050         uint32_t a32_insn = (insn & 0xe2ffffff) |
9051             ((insn & (1 << 28)) >> 4) | (1 << 28);
9052
9053         if (disas_neon_dp(s, a32_insn)) {
9054             return;
9055         }
9056     }
9057
9058     if ((insn & 0xff100000) == 0xf9000000) {
9059         /*
9060          * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9061          * transform into
9062          * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9063          */
9064         uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000;
9065
9066         if (disas_neon_ls(s, a32_insn)) {
9067             return;
9068         }
9069     }
9070
9071     /*
9072      * TODO: Perhaps merge these into one decodetree output file.
9073      * Note disas_vfp is written for a32 with cond field in the
9074      * top nibble.  The t32 encoding requires 0xe in the top nibble.
9075      */
9076     if (disas_t32(s, insn) ||
9077         disas_vfp_uncond(s, insn) ||
9078         disas_neon_shared(s, insn) ||
9079         ((insn >> 28) == 0xe && disas_vfp(s, insn))) {
9080         return;
9081     }
9082     /* fall back to legacy decoder */
9083
9084     switch ((insn >> 25) & 0xf) {
9085     case 0: case 1: case 2: case 3:
9086         /* 16-bit instructions.  Should never happen.  */
9087         abort();
9088     case 6: case 7: case 14: case 15:
9089         /* Coprocessor.  */
9090         if (arm_dc_feature(s, ARM_FEATURE_M)) {
9091             /* 0b111x_11xx_xxxx_xxxx_xxxx_xxxx_xxxx_xxxx */
9092             if (extract32(insn, 24, 2) == 3) {
9093                 goto illegal_op; /* op0 = 0b11 : unallocated */
9094             }
9095
9096             if (((insn >> 8) & 0xe) == 10 &&
9097                 dc_isar_feature(aa32_fpsp_v2, s)) {
9098                 /* FP, and the CPU supports it */
9099                 goto illegal_op;
9100             } else {
9101                 /* All other insns: NOCP */
9102                 gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
9103                                    syn_uncategorized(),
9104                                    default_exception_el(s));
9105             }
9106             break;
9107         }
9108         if (((insn >> 24) & 3) == 3) {
9109             /* Translate into the equivalent ARM encoding.  */
9110             insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28);
9111             if (disas_neon_data_insn(s, insn)) {
9112                 goto illegal_op;
9113             }
9114         } else if (((insn >> 8) & 0xe) == 10) {
9115             /* VFP, but failed disas_vfp.  */
9116             goto illegal_op;
9117         } else {
9118             if (insn & (1 << 28))
9119                 goto illegal_op;
9120             if (disas_coproc_insn(s, insn)) {
9121                 goto illegal_op;
9122             }
9123         }
9124         break;
9125     case 12:
9126         goto illegal_op;
9127     default:
9128     illegal_op:
9129         unallocated_encoding(s);
9130     }
9131 }
9132
9133 static void disas_thumb_insn(DisasContext *s, uint32_t insn)
9134 {
9135     if (!disas_t16(s, insn)) {
9136         unallocated_encoding(s);
9137     }
9138 }
9139
9140 static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
9141 {
9142     /* Return true if the insn at dc->base.pc_next might cross a page boundary.
9143      * (False positives are OK, false negatives are not.)
9144      * We know this is a Thumb insn, and our caller ensures we are
9145      * only called if dc->base.pc_next is less than 4 bytes from the page
9146      * boundary, so we cross the page if the first 16 bits indicate
9147      * that this is a 32 bit insn.
9148      */
9149     uint16_t insn = arm_lduw_code(env, s->base.pc_next, s->sctlr_b);
9150
9151     return !thumb_insn_is_16bit(s, s->base.pc_next, insn);
9152 }
9153
9154 static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
9155 {
9156     DisasContext *dc = container_of(dcbase, DisasContext, base);
9157     CPUARMState *env = cs->env_ptr;
9158     ARMCPU *cpu = env_archcpu(env);
9159     uint32_t tb_flags = dc->base.tb->flags;
9160     uint32_t condexec, core_mmu_idx;
9161
9162     dc->isar = &cpu->isar;
9163     dc->condjmp = 0;
9164
9165     dc->aarch64 = 0;
9166     /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
9167      * there is no secure EL1, so we route exceptions to EL3.
9168      */
9169     dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
9170                                !arm_el_is_aa64(env, 3);
9171     dc->thumb = FIELD_EX32(tb_flags, TBFLAG_AM32, THUMB);
9172     dc->be_data = FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
9173     condexec = FIELD_EX32(tb_flags, TBFLAG_AM32, CONDEXEC);
9174     dc->condexec_mask = (condexec & 0xf) << 1;
9175     dc->condexec_cond = condexec >> 4;
9176
9177     core_mmu_idx = FIELD_EX32(tb_flags, TBFLAG_ANY, MMUIDX);
9178     dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
9179     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
9180 #if !defined(CONFIG_USER_ONLY)
9181     dc->user = (dc->current_el == 0);
9182 #endif
9183     dc->fp_excp_el = FIELD_EX32(tb_flags, TBFLAG_ANY, FPEXC_EL);
9184
9185     if (arm_feature(env, ARM_FEATURE_M)) {
9186         dc->vfp_enabled = 1;
9187         dc->be_data = MO_TE;
9188         dc->v7m_handler_mode = FIELD_EX32(tb_flags, TBFLAG_M32, HANDLER);
9189         dc->v8m_secure = arm_feature(env, ARM_FEATURE_M_SECURITY) &&
9190             regime_is_secure(env, dc->mmu_idx);
9191         dc->v8m_stackcheck = FIELD_EX32(tb_flags, TBFLAG_M32, STACKCHECK);
9192         dc->v8m_fpccr_s_wrong =
9193             FIELD_EX32(tb_flags, TBFLAG_M32, FPCCR_S_WRONG);
9194         dc->v7m_new_fp_ctxt_needed =
9195             FIELD_EX32(tb_flags, TBFLAG_M32, NEW_FP_CTXT_NEEDED);
9196         dc->v7m_lspact = FIELD_EX32(tb_flags, TBFLAG_M32, LSPACT);
9197     } else {
9198         dc->be_data =
9199             FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
9200         dc->debug_target_el =
9201             FIELD_EX32(tb_flags, TBFLAG_ANY, DEBUG_TARGET_EL);
9202         dc->sctlr_b = FIELD_EX32(tb_flags, TBFLAG_A32, SCTLR_B);
9203         dc->hstr_active = FIELD_EX32(tb_flags, TBFLAG_A32, HSTR_ACTIVE);
9204         dc->ns = FIELD_EX32(tb_flags, TBFLAG_A32, NS);
9205         dc->vfp_enabled = FIELD_EX32(tb_flags, TBFLAG_A32, VFPEN);
9206         if (arm_feature(env, ARM_FEATURE_XSCALE)) {
9207             dc->c15_cpar = FIELD_EX32(tb_flags, TBFLAG_A32, XSCALE_CPAR);
9208         } else {
9209             dc->vec_len = FIELD_EX32(tb_flags, TBFLAG_A32, VECLEN);
9210             dc->vec_stride = FIELD_EX32(tb_flags, TBFLAG_A32, VECSTRIDE);
9211         }
9212     }
9213     dc->cp_regs = cpu->cp_regs;
9214     dc->features = env->features;
9215
9216     /* Single step state. The code-generation logic here is:
9217      *  SS_ACTIVE == 0:
9218      *   generate code with no special handling for single-stepping (except
9219      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
9220      *   this happens anyway because those changes are all system register or
9221      *   PSTATE writes).
9222      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
9223      *   emit code for one insn
9224      *   emit code to clear PSTATE.SS
9225      *   emit code to generate software step exception for completed step
9226      *   end TB (as usual for having generated an exception)
9227      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
9228      *   emit code to generate a software step exception
9229      *   end the TB
9230      */
9231     dc->ss_active = FIELD_EX32(tb_flags, TBFLAG_ANY, SS_ACTIVE);
9232     dc->pstate_ss = FIELD_EX32(tb_flags, TBFLAG_ANY, PSTATE_SS);
9233     dc->is_ldex = false;
9234
9235     dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
9236
9237     /* If architectural single step active, limit to 1.  */
9238     if (is_singlestepping(dc)) {
9239         dc->base.max_insns = 1;
9240     }
9241
9242     /* ARM is a fixed-length ISA.  Bound the number of insns to execute
9243        to those left on the page.  */
9244     if (!dc->thumb) {
9245         int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
9246         dc->base.max_insns = MIN(dc->base.max_insns, bound);
9247     }
9248
9249     cpu_V0 = tcg_temp_new_i64();
9250     cpu_V1 = tcg_temp_new_i64();
9251     /* FIXME: cpu_M0 can probably be the same as cpu_V0.  */
9252     cpu_M0 = tcg_temp_new_i64();
9253 }
9254
9255 static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
9256 {
9257     DisasContext *dc = container_of(dcbase, DisasContext, base);
9258
9259     /* A note on handling of the condexec (IT) bits:
9260      *
9261      * We want to avoid the overhead of having to write the updated condexec
9262      * bits back to the CPUARMState for every instruction in an IT block. So:
9263      * (1) if the condexec bits are not already zero then we write
9264      * zero back into the CPUARMState now. This avoids complications trying
9265      * to do it at the end of the block. (For example if we don't do this
9266      * it's hard to identify whether we can safely skip writing condexec
9267      * at the end of the TB, which we definitely want to do for the case
9268      * where a TB doesn't do anything with the IT state at all.)
9269      * (2) if we are going to leave the TB then we call gen_set_condexec()
9270      * which will write the correct value into CPUARMState if zero is wrong.
9271      * This is done both for leaving the TB at the end, and for leaving
9272      * it because of an exception we know will happen, which is done in
9273      * gen_exception_insn(). The latter is necessary because we need to
9274      * leave the TB with the PC/IT state just prior to execution of the
9275      * instruction which caused the exception.
9276      * (3) if we leave the TB unexpectedly (eg a data abort on a load)
9277      * then the CPUARMState will be wrong and we need to reset it.
9278      * This is handled in the same way as restoration of the
9279      * PC in these situations; we save the value of the condexec bits
9280      * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
9281      * then uses this to restore them after an exception.
9282      *
9283      * Note that there are no instructions which can read the condexec
9284      * bits, and none which can write non-static values to them, so
9285      * we don't need to care about whether CPUARMState is correct in the
9286      * middle of a TB.
9287      */
9288
9289     /* Reset the conditional execution bits immediately. This avoids
9290        complications trying to do it at the end of the block.  */
9291     if (dc->condexec_mask || dc->condexec_cond) {
9292         TCGv_i32 tmp = tcg_temp_new_i32();
9293         tcg_gen_movi_i32(tmp, 0);
9294         store_cpu_field(tmp, condexec_bits);
9295     }
9296 }
9297
9298 static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
9299 {
9300     DisasContext *dc = container_of(dcbase, DisasContext, base);
9301
9302     tcg_gen_insn_start(dc->base.pc_next,
9303                        (dc->condexec_cond << 4) | (dc->condexec_mask >> 1),
9304                        0);
9305     dc->insn_start = tcg_last_op();
9306 }
9307
9308 static bool arm_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
9309                                     const CPUBreakpoint *bp)
9310 {
9311     DisasContext *dc = container_of(dcbase, DisasContext, base);
9312
9313     if (bp->flags & BP_CPU) {
9314         gen_set_condexec(dc);
9315         gen_set_pc_im(dc, dc->base.pc_next);
9316         gen_helper_check_breakpoints(cpu_env);
9317         /* End the TB early; it's likely not going to be executed */
9318         dc->base.is_jmp = DISAS_TOO_MANY;
9319     } else {
9320         gen_exception_internal_insn(dc, dc->base.pc_next, EXCP_DEBUG);
9321         /* The address covered by the breakpoint must be
9322            included in [tb->pc, tb->pc + tb->size) in order
9323            to for it to be properly cleared -- thus we
9324            increment the PC here so that the logic setting
9325            tb->size below does the right thing.  */
9326         /* TODO: Advance PC by correct instruction length to
9327          * avoid disassembler error messages */
9328         dc->base.pc_next += 2;
9329         dc->base.is_jmp = DISAS_NORETURN;
9330     }
9331
9332     return true;
9333 }
9334
9335 static bool arm_pre_translate_insn(DisasContext *dc)
9336 {
9337 #ifdef CONFIG_USER_ONLY
9338     /* Intercept jump to the magic kernel page.  */
9339     if (dc->base.pc_next >= 0xffff0000) {
9340         /* We always get here via a jump, so know we are not in a
9341            conditional execution block.  */
9342         gen_exception_internal(EXCP_KERNEL_TRAP);
9343         dc->base.is_jmp = DISAS_NORETURN;
9344         return true;
9345     }
9346 #endif
9347
9348     if (dc->ss_active && !dc->pstate_ss) {
9349         /* Singlestep state is Active-pending.
9350          * If we're in this state at the start of a TB then either
9351          *  a) we just took an exception to an EL which is being debugged
9352          *     and this is the first insn in the exception handler
9353          *  b) debug exceptions were masked and we just unmasked them
9354          *     without changing EL (eg by clearing PSTATE.D)
9355          * In either case we're going to take a swstep exception in the
9356          * "did not step an insn" case, and so the syndrome ISV and EX
9357          * bits should be zero.
9358          */
9359         assert(dc->base.num_insns == 1);
9360         gen_swstep_exception(dc, 0, 0);
9361         dc->base.is_jmp = DISAS_NORETURN;
9362         return true;
9363     }
9364
9365     return false;
9366 }
9367
9368 static void arm_post_translate_insn(DisasContext *dc)
9369 {
9370     if (dc->condjmp && !dc->base.is_jmp) {
9371         gen_set_label(dc->condlabel);
9372         dc->condjmp = 0;
9373     }
9374     translator_loop_temp_check(&dc->base);
9375 }
9376
9377 static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9378 {
9379     DisasContext *dc = container_of(dcbase, DisasContext, base);
9380     CPUARMState *env = cpu->env_ptr;
9381     unsigned int insn;
9382
9383     if (arm_pre_translate_insn(dc)) {
9384         return;
9385     }
9386
9387     dc->pc_curr = dc->base.pc_next;
9388     insn = arm_ldl_code(env, dc->base.pc_next, dc->sctlr_b);
9389     dc->insn = insn;
9390     dc->base.pc_next += 4;
9391     disas_arm_insn(dc, insn);
9392
9393     arm_post_translate_insn(dc);
9394
9395     /* ARM is a fixed-length ISA.  We performed the cross-page check
9396        in init_disas_context by adjusting max_insns.  */
9397 }
9398
9399 static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
9400 {
9401     /* Return true if this Thumb insn is always unconditional,
9402      * even inside an IT block. This is true of only a very few
9403      * instructions: BKPT, HLT, and SG.
9404      *
9405      * A larger class of instructions are UNPREDICTABLE if used
9406      * inside an IT block; we do not need to detect those here, because
9407      * what we do by default (perform the cc check and update the IT
9408      * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
9409      * choice for those situations.
9410      *
9411      * insn is either a 16-bit or a 32-bit instruction; the two are
9412      * distinguishable because for the 16-bit case the top 16 bits
9413      * are zeroes, and that isn't a valid 32-bit encoding.
9414      */
9415     if ((insn & 0xffffff00) == 0xbe00) {
9416         /* BKPT */
9417         return true;
9418     }
9419
9420     if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
9421         !arm_dc_feature(s, ARM_FEATURE_M)) {
9422         /* HLT: v8A only. This is unconditional even when it is going to
9423          * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
9424          * For v7 cores this was a plain old undefined encoding and so
9425          * honours its cc check. (We might be using the encoding as
9426          * a semihosting trap, but we don't change the cc check behaviour
9427          * on that account, because a debugger connected to a real v7A
9428          * core and emulating semihosting traps by catching the UNDEF
9429          * exception would also only see cases where the cc check passed.
9430          * No guest code should be trying to do a HLT semihosting trap
9431          * in an IT block anyway.
9432          */
9433         return true;
9434     }
9435
9436     if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
9437         arm_dc_feature(s, ARM_FEATURE_M)) {
9438         /* SG: v8M only */
9439         return true;
9440     }
9441
9442     return false;
9443 }
9444
9445 static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9446 {
9447     DisasContext *dc = container_of(dcbase, DisasContext, base);
9448     CPUARMState *env = cpu->env_ptr;
9449     uint32_t insn;
9450     bool is_16bit;
9451
9452     if (arm_pre_translate_insn(dc)) {
9453         return;
9454     }
9455
9456     dc->pc_curr = dc->base.pc_next;
9457     insn = arm_lduw_code(env, dc->base.pc_next, dc->sctlr_b);
9458     is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn);
9459     dc->base.pc_next += 2;
9460     if (!is_16bit) {
9461         uint32_t insn2 = arm_lduw_code(env, dc->base.pc_next, dc->sctlr_b);
9462
9463         insn = insn << 16 | insn2;
9464         dc->base.pc_next += 2;
9465     }
9466     dc->insn = insn;
9467
9468     if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
9469         uint32_t cond = dc->condexec_cond;
9470
9471         /*
9472          * Conditionally skip the insn. Note that both 0xe and 0xf mean
9473          * "always"; 0xf is not "never".
9474          */
9475         if (cond < 0x0e) {
9476             arm_skip_unless(dc, cond);
9477         }
9478     }
9479
9480     if (is_16bit) {
9481         disas_thumb_insn(dc, insn);
9482     } else {
9483         disas_thumb2_insn(dc, insn);
9484     }
9485
9486     /* Advance the Thumb condexec condition.  */
9487     if (dc->condexec_mask) {
9488         dc->condexec_cond = ((dc->condexec_cond & 0xe) |
9489                              ((dc->condexec_mask >> 4) & 1));
9490         dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
9491         if (dc->condexec_mask == 0) {
9492             dc->condexec_cond = 0;
9493         }
9494     }
9495
9496     arm_post_translate_insn(dc);
9497
9498     /* Thumb is a variable-length ISA.  Stop translation when the next insn
9499      * will touch a new page.  This ensures that prefetch aborts occur at
9500      * the right place.
9501      *
9502      * We want to stop the TB if the next insn starts in a new page,
9503      * or if it spans between this page and the next. This means that
9504      * if we're looking at the last halfword in the page we need to
9505      * see if it's a 16-bit Thumb insn (which will fit in this TB)
9506      * or a 32-bit Thumb insn (which won't).
9507      * This is to avoid generating a silly TB with a single 16-bit insn
9508      * in it at the end of this page (which would execute correctly
9509      * but isn't very efficient).
9510      */
9511     if (dc->base.is_jmp == DISAS_NEXT
9512         && (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE
9513             || (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE - 3
9514                 && insn_crosses_page(env, dc)))) {
9515         dc->base.is_jmp = DISAS_TOO_MANY;
9516     }
9517 }
9518
9519 static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
9520 {
9521     DisasContext *dc = container_of(dcbase, DisasContext, base);
9522
9523     if (tb_cflags(dc->base.tb) & CF_LAST_IO && dc->condjmp) {
9524         /* FIXME: This can theoretically happen with self-modifying code. */
9525         cpu_abort(cpu, "IO on conditional branch instruction");
9526     }
9527
9528     /* At this stage dc->condjmp will only be set when the skipped
9529        instruction was a conditional branch or trap, and the PC has
9530        already been written.  */
9531     gen_set_condexec(dc);
9532     if (dc->base.is_jmp == DISAS_BX_EXCRET) {
9533         /* Exception return branches need some special case code at the
9534          * end of the TB, which is complex enough that it has to
9535          * handle the single-step vs not and the condition-failed
9536          * insn codepath itself.
9537          */
9538         gen_bx_excret_final_code(dc);
9539     } else if (unlikely(is_singlestepping(dc))) {
9540         /* Unconditional and "condition passed" instruction codepath. */
9541         switch (dc->base.is_jmp) {
9542         case DISAS_SWI:
9543             gen_ss_advance(dc);
9544             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
9545                           default_exception_el(dc));
9546             break;
9547         case DISAS_HVC:
9548             gen_ss_advance(dc);
9549             gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9550             break;
9551         case DISAS_SMC:
9552             gen_ss_advance(dc);
9553             gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
9554             break;
9555         case DISAS_NEXT:
9556         case DISAS_TOO_MANY:
9557         case DISAS_UPDATE:
9558             gen_set_pc_im(dc, dc->base.pc_next);
9559             /* fall through */
9560         default:
9561             /* FIXME: Single stepping a WFI insn will not halt the CPU. */
9562             gen_singlestep_exception(dc);
9563             break;
9564         case DISAS_NORETURN:
9565             break;
9566         }
9567     } else {
9568         /* While branches must always occur at the end of an IT block,
9569            there are a few other things that can cause us to terminate
9570            the TB in the middle of an IT block:
9571             - Exception generating instructions (bkpt, swi, undefined).
9572             - Page boundaries.
9573             - Hardware watchpoints.
9574            Hardware breakpoints have already been handled and skip this code.
9575          */
9576         switch(dc->base.is_jmp) {
9577         case DISAS_NEXT:
9578         case DISAS_TOO_MANY:
9579             gen_goto_tb(dc, 1, dc->base.pc_next);
9580             break;
9581         case DISAS_JUMP:
9582             gen_goto_ptr();
9583             break;
9584         case DISAS_UPDATE:
9585             gen_set_pc_im(dc, dc->base.pc_next);
9586             /* fall through */
9587         default:
9588             /* indicate that the hash table must be used to find the next TB */
9589             tcg_gen_exit_tb(NULL, 0);
9590             break;
9591         case DISAS_NORETURN:
9592             /* nothing more to generate */
9593             break;
9594         case DISAS_WFI:
9595         {
9596             TCGv_i32 tmp = tcg_const_i32((dc->thumb &&
9597                                           !(dc->insn & (1U << 31))) ? 2 : 4);
9598
9599             gen_helper_wfi(cpu_env, tmp);
9600             tcg_temp_free_i32(tmp);
9601             /* The helper doesn't necessarily throw an exception, but we
9602              * must go back to the main loop to check for interrupts anyway.
9603              */
9604             tcg_gen_exit_tb(NULL, 0);
9605             break;
9606         }
9607         case DISAS_WFE:
9608             gen_helper_wfe(cpu_env);
9609             break;
9610         case DISAS_YIELD:
9611             gen_helper_yield(cpu_env);
9612             break;
9613         case DISAS_SWI:
9614             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
9615                           default_exception_el(dc));
9616             break;
9617         case DISAS_HVC:
9618             gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9619             break;
9620         case DISAS_SMC:
9621             gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
9622             break;
9623         }
9624     }
9625
9626     if (dc->condjmp) {
9627         /* "Condition failed" instruction codepath for the branch/trap insn */
9628         gen_set_label(dc->condlabel);
9629         gen_set_condexec(dc);
9630         if (unlikely(is_singlestepping(dc))) {
9631             gen_set_pc_im(dc, dc->base.pc_next);
9632             gen_singlestep_exception(dc);
9633         } else {
9634             gen_goto_tb(dc, 1, dc->base.pc_next);
9635         }
9636     }
9637 }
9638
9639 static void arm_tr_disas_log(const DisasContextBase *dcbase, CPUState *cpu)
9640 {
9641     DisasContext *dc = container_of(dcbase, DisasContext, base);
9642
9643     qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
9644     log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
9645 }
9646
9647 static const TranslatorOps arm_translator_ops = {
9648     .init_disas_context = arm_tr_init_disas_context,
9649     .tb_start           = arm_tr_tb_start,
9650     .insn_start         = arm_tr_insn_start,
9651     .breakpoint_check   = arm_tr_breakpoint_check,
9652     .translate_insn     = arm_tr_translate_insn,
9653     .tb_stop            = arm_tr_tb_stop,
9654     .disas_log          = arm_tr_disas_log,
9655 };
9656
9657 static const TranslatorOps thumb_translator_ops = {
9658     .init_disas_context = arm_tr_init_disas_context,
9659     .tb_start           = arm_tr_tb_start,
9660     .insn_start         = arm_tr_insn_start,
9661     .breakpoint_check   = arm_tr_breakpoint_check,
9662     .translate_insn     = thumb_tr_translate_insn,
9663     .tb_stop            = arm_tr_tb_stop,
9664     .disas_log          = arm_tr_disas_log,
9665 };
9666
9667 /* generate intermediate code for basic block 'tb'.  */
9668 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
9669 {
9670     DisasContext dc = { };
9671     const TranslatorOps *ops = &arm_translator_ops;
9672
9673     if (FIELD_EX32(tb->flags, TBFLAG_AM32, THUMB)) {
9674         ops = &thumb_translator_ops;
9675     }
9676 #ifdef TARGET_AARCH64
9677     if (FIELD_EX32(tb->flags, TBFLAG_ANY, AARCH64_STATE)) {
9678         ops = &aarch64_translator_ops;
9679     }
9680 #endif
9681
9682     translator_loop(ops, &dc.base, cpu, tb, max_insns);
9683 }
9684
9685 void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb,
9686                           target_ulong *data)
9687 {
9688     if (is_a64(env)) {
9689         env->pc = data[0];
9690         env->condexec_bits = 0;
9691         env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
9692     } else {
9693         env->regs[15] = data[0];
9694         env->condexec_bits = data[1];
9695         env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
9696     }
9697 }