target/arm/translate.c

   1 /*
   2  *  ARM translation
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *  Copyright (c) 2005-2007 CodeSourcery
   6  *  Copyright (c) 2007 OpenedHand, Ltd.
   7  *
   8  * This library is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2 of the License, or (at your option) any later version.
  12  *
  13  * This library is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  20  */
  21 #include "qemu/osdep.h"
  22
  23 #include "cpu.h"
  24 #include "internals.h"
  25 #include "disas/disas.h"
  26 #include "exec/exec-all.h"
  27 #include "tcg/tcg-op.h"
  28 #include "tcg/tcg-op-gvec.h"
  29 #include "qemu/log.h"
  30 #include "qemu/bitops.h"
  31 #include "arm_ldst.h"
  32 #include "hw/semihosting/semihost.h"
  33
  34 #include "exec/helper-proto.h"
  35 #include "exec/helper-gen.h"
  36
  37 #include "trace-tcg.h"
  38 #include "exec/log.h"
  39
  40
  41 #define ENABLE_ARCH_4T    arm_dc_feature(s, ARM_FEATURE_V4T)
  42 #define ENABLE_ARCH_5     arm_dc_feature(s, ARM_FEATURE_V5)
  43 /* currently all emulated v5 cores are also v5TE, so don't bother */
  44 #define ENABLE_ARCH_5TE   arm_dc_feature(s, ARM_FEATURE_V5)
  45 #define ENABLE_ARCH_5J    dc_isar_feature(aa32_jazelle, s)
  46 #define ENABLE_ARCH_6     arm_dc_feature(s, ARM_FEATURE_V6)
  47 #define ENABLE_ARCH_6K    arm_dc_feature(s, ARM_FEATURE_V6K)
  48 #define ENABLE_ARCH_6T2   arm_dc_feature(s, ARM_FEATURE_THUMB2)
  49 #define ENABLE_ARCH_7     arm_dc_feature(s, ARM_FEATURE_V7)
  50 #define ENABLE_ARCH_8     arm_dc_feature(s, ARM_FEATURE_V8)
  51
  52 #include "translate.h"
  53
  54 #if defined(CONFIG_USER_ONLY)
  55 #define IS_USER(s) 1
  56 #else
  57 #define IS_USER(s) (s->user)
  58 #endif
  59
  60 /* We reuse the same 64-bit temporaries for efficiency.  */
  61 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
  62 static TCGv_i32 cpu_R[16];
  63 TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
  64 TCGv_i64 cpu_exclusive_addr;
  65 TCGv_i64 cpu_exclusive_val;
  66
  67 #include "exec/gen-icount.h"
  68
  69 static const char * const regnames[] =
  70     { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
  71       "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
  72
  73 /* Function prototypes for gen_ functions calling Neon helpers.  */
  74 typedef void NeonGenThreeOpEnvFn(TCGv_i32, TCGv_env, TCGv_i32,
  75                                  TCGv_i32, TCGv_i32);
  76 /* Function prototypes for gen_ functions for fix point conversions */
  77 typedef void VFPGenFixPointFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
  78
  79 /* initialize TCG globals.  */
  80 void arm_translate_init(void)
  81 {
  82     int i;
  83
  84     for (i = 0; i < 16; i++) {
  85         cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
  86                                           offsetof(CPUARMState, regs[i]),
  87                                           regnames[i]);
  88     }
  89     cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF");
  90     cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF");
  91     cpu_VF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, VF), "VF");
  92     cpu_ZF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, ZF), "ZF");
  93
  94     cpu_exclusive_addr = tcg_global_mem_new_i64(cpu_env,
  95         offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
  96     cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
  97         offsetof(CPUARMState, exclusive_val), "exclusive_val");
  98
  99     a64_translate_init();
 100 }
 101
 102 /* Flags for the disas_set_da_iss info argument:
 103  * lower bits hold the Rt register number, higher bits are flags.
 104  */
 105 typedef enum ISSInfo {
 106     ISSNone = 0,
 107     ISSRegMask = 0x1f,
 108     ISSInvalid = (1 << 5),
 109     ISSIsAcqRel = (1 << 6),
 110     ISSIsWrite = (1 << 7),
 111     ISSIs16Bit = (1 << 8),
 112 } ISSInfo;
 113
 114 /* Save the syndrome information for a Data Abort */
 115 static void disas_set_da_iss(DisasContext *s, MemOp memop, ISSInfo issinfo)
 116 {
 117     uint32_t syn;
 118     int sas = memop & MO_SIZE;
 119     bool sse = memop & MO_SIGN;
 120     bool is_acqrel = issinfo & ISSIsAcqRel;
 121     bool is_write = issinfo & ISSIsWrite;
 122     bool is_16bit = issinfo & ISSIs16Bit;
 123     int srt = issinfo & ISSRegMask;
 124
 125     if (issinfo & ISSInvalid) {
 126         /* Some callsites want to conditionally provide ISS info,
 127          * eg "only if this was not a writeback"
 128          */
 129         return;
 130     }
 131
 132     if (srt == 15) {
 133         /* For AArch32, insns where the src/dest is R15 never generate
 134          * ISS information. Catching that here saves checking at all
 135          * the call sites.
 136          */
 137         return;
 138     }
 139
 140     syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
 141                                   0, 0, 0, is_write, 0, is_16bit);
 142     disas_set_insn_syndrome(s, syn);
 143 }
 144
 145 static inline int get_a32_user_mem_index(DisasContext *s)
 146 {
 147     /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
 148      * insns:
 149      *  if PL2, UNPREDICTABLE (we choose to implement as if PL0)
 150      *  otherwise, access as if at PL0.
 151      */
 152     switch (s->mmu_idx) {
 153     case ARMMMUIdx_E2:        /* this one is UNPREDICTABLE */
 154     case ARMMMUIdx_E10_0:
 155     case ARMMMUIdx_E10_1:
 156     case ARMMMUIdx_E10_1_PAN:
 157         return arm_to_core_mmu_idx(ARMMMUIdx_E10_0);
 158     case ARMMMUIdx_SE3:
 159     case ARMMMUIdx_SE10_0:
 160     case ARMMMUIdx_SE10_1:
 161     case ARMMMUIdx_SE10_1_PAN:
 162         return arm_to_core_mmu_idx(ARMMMUIdx_SE10_0);
 163     case ARMMMUIdx_MUser:
 164     case ARMMMUIdx_MPriv:
 165         return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
 166     case ARMMMUIdx_MUserNegPri:
 167     case ARMMMUIdx_MPrivNegPri:
 168         return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
 169     case ARMMMUIdx_MSUser:
 170     case ARMMMUIdx_MSPriv:
 171         return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
 172     case ARMMMUIdx_MSUserNegPri:
 173     case ARMMMUIdx_MSPrivNegPri:
 174         return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
 175     default:
 176         g_assert_not_reached();
 177     }
 178 }
 179
 180 static inline TCGv_i32 load_cpu_offset(int offset)
 181 {
 182     TCGv_i32 tmp = tcg_temp_new_i32();
 183     tcg_gen_ld_i32(tmp, cpu_env, offset);
 184     return tmp;
 185 }
 186
 187 #define load_cpu_field(name) load_cpu_offset(offsetof(CPUARMState, name))
 188
 189 static inline void store_cpu_offset(TCGv_i32 var, int offset)
 190 {
 191     tcg_gen_st_i32(var, cpu_env, offset);
 192     tcg_temp_free_i32(var);
 193 }
 194
 195 #define store_cpu_field(var, name) \
 196     store_cpu_offset(var, offsetof(CPUARMState, name))
 197
 198 /* The architectural value of PC.  */
 199 static uint32_t read_pc(DisasContext *s)
 200 {
 201     return s->pc_curr + (s->thumb ? 4 : 8);
 202 }
 203
 204 /* Set a variable to the value of a CPU register.  */
 205 static void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
 206 {
 207     if (reg == 15) {
 208         tcg_gen_movi_i32(var, read_pc(s));
 209     } else {
 210         tcg_gen_mov_i32(var, cpu_R[reg]);
 211     }
 212 }
 213
 214 /* Create a new temporary and set it to the value of a CPU register.  */
 215 static inline TCGv_i32 load_reg(DisasContext *s, int reg)
 216 {
 217     TCGv_i32 tmp = tcg_temp_new_i32();
 218     load_reg_var(s, tmp, reg);
 219     return tmp;
 220 }
 221
 222 /*
 223  * Create a new temp, REG + OFS, except PC is ALIGN(PC, 4).
 224  * This is used for load/store for which use of PC implies (literal),
 225  * or ADD that implies ADR.
 226  */
 227 static TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
 228 {
 229     TCGv_i32 tmp = tcg_temp_new_i32();
 230
 231     if (reg == 15) {
 232         tcg_gen_movi_i32(tmp, (read_pc(s) & ~3) + ofs);
 233     } else {
 234         tcg_gen_addi_i32(tmp, cpu_R[reg], ofs);
 235     }
 236     return tmp;
 237 }
 238
 239 /* Set a CPU register.  The source must be a temporary and will be
 240    marked as dead.  */
 241 static void store_reg(DisasContext *s, int reg, TCGv_i32 var)
 242 {
 243     if (reg == 15) {
 244         /* In Thumb mode, we must ignore bit 0.
 245          * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
 246          * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
 247          * We choose to ignore [1:0] in ARM mode for all architecture versions.
 248          */
 249         tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
 250         s->base.is_jmp = DISAS_JUMP;
 251     }
 252     tcg_gen_mov_i32(cpu_R[reg], var);
 253     tcg_temp_free_i32(var);
 254 }
 255
 256 /*
 257  * Variant of store_reg which applies v8M stack-limit checks before updating
 258  * SP. If the check fails this will result in an exception being taken.
 259  * We disable the stack checks for CONFIG_USER_ONLY because we have
 260  * no idea what the stack limits should be in that case.
 261  * If stack checking is not being done this just acts like store_reg().
 262  */
 263 static void store_sp_checked(DisasContext *s, TCGv_i32 var)
 264 {
 265 #ifndef CONFIG_USER_ONLY
 266     if (s->v8m_stackcheck) {
 267         gen_helper_v8m_stackcheck(cpu_env, var);
 268     }
 269 #endif
 270     store_reg(s, 13, var);
 271 }
 272
 273 /* Value extensions.  */
 274 #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
 275 #define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
 276 #define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
 277 #define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
 278
 279 #define gen_sxtb16(var) gen_helper_sxtb16(var, var)
 280 #define gen_uxtb16(var) gen_helper_uxtb16(var, var)
 281
 282
 283 static inline void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
 284 {
 285     TCGv_i32 tmp_mask = tcg_const_i32(mask);
 286     gen_helper_cpsr_write(cpu_env, var, tmp_mask);
 287     tcg_temp_free_i32(tmp_mask);
 288 }
 289 /* Set NZCV flags from the high 4 bits of var.  */
 290 #define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
 291
 292 static void gen_exception_internal(int excp)
 293 {
 294     TCGv_i32 tcg_excp = tcg_const_i32(excp);
 295
 296     assert(excp_is_internal(excp));
 297     gen_helper_exception_internal(cpu_env, tcg_excp);
 298     tcg_temp_free_i32(tcg_excp);
 299 }
 300
 301 static void gen_step_complete_exception(DisasContext *s)
 302 {
 303     /* We just completed step of an insn. Move from Active-not-pending
 304      * to Active-pending, and then also take the swstep exception.
 305      * This corresponds to making the (IMPDEF) choice to prioritize
 306      * swstep exceptions over asynchronous exceptions taken to an exception
 307      * level where debug is disabled. This choice has the advantage that
 308      * we do not need to maintain internal state corresponding to the
 309      * ISV/EX syndrome bits between completion of the step and generation
 310      * of the exception, and our syndrome information is always correct.
 311      */
 312     gen_ss_advance(s);
 313     gen_swstep_exception(s, 1, s->is_ldex);
 314     s->base.is_jmp = DISAS_NORETURN;
 315 }
 316
 317 static void gen_singlestep_exception(DisasContext *s)
 318 {
 319     /* Generate the right kind of exception for singlestep, which is
 320      * either the architectural singlestep or EXCP_DEBUG for QEMU's
 321      * gdb singlestepping.
 322      */
 323     if (s->ss_active) {
 324         gen_step_complete_exception(s);
 325     } else {
 326         gen_exception_internal(EXCP_DEBUG);
 327     }
 328 }
 329
 330 static inline bool is_singlestepping(DisasContext *s)
 331 {
 332     /* Return true if we are singlestepping either because of
 333      * architectural singlestep or QEMU gdbstub singlestep. This does
 334      * not include the command line '-singlestep' mode which is rather
 335      * misnamed as it only means "one instruction per TB" and doesn't
 336      * affect the code we generate.
 337      */
 338     return s->base.singlestep_enabled || s->ss_active;
 339 }
 340
 341 static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
 342 {
 343     TCGv_i32 tmp1 = tcg_temp_new_i32();
 344     TCGv_i32 tmp2 = tcg_temp_new_i32();
 345     tcg_gen_ext16s_i32(tmp1, a);
 346     tcg_gen_ext16s_i32(tmp2, b);
 347     tcg_gen_mul_i32(tmp1, tmp1, tmp2);
 348     tcg_temp_free_i32(tmp2);
 349     tcg_gen_sari_i32(a, a, 16);
 350     tcg_gen_sari_i32(b, b, 16);
 351     tcg_gen_mul_i32(b, b, a);
 352     tcg_gen_mov_i32(a, tmp1);
 353     tcg_temp_free_i32(tmp1);
 354 }
 355
 356 /* Byteswap each halfword.  */
 357 static void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
 358 {
 359     TCGv_i32 tmp = tcg_temp_new_i32();
 360     TCGv_i32 mask = tcg_const_i32(0x00ff00ff);
 361     tcg_gen_shri_i32(tmp, var, 8);
 362     tcg_gen_and_i32(tmp, tmp, mask);
 363     tcg_gen_and_i32(var, var, mask);
 364     tcg_gen_shli_i32(var, var, 8);
 365     tcg_gen_or_i32(dest, var, tmp);
 366     tcg_temp_free_i32(mask);
 367     tcg_temp_free_i32(tmp);
 368 }
 369
 370 /* Byteswap low halfword and sign extend.  */
 371 static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
 372 {
 373     tcg_gen_ext16u_i32(var, var);
 374     tcg_gen_bswap16_i32(var, var);
 375     tcg_gen_ext16s_i32(dest, var);
 376 }
 377
 378 /* Swap low and high halfwords.  */
 379 static void gen_swap_half(TCGv_i32 dest, TCGv_i32 var)
 380 {
 381     tcg_gen_rotri_i32(dest, var, 16);
 382 }
 383
 384 /* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
 385     tmp = (t0 ^ t1) & 0x8000;
 386     t0 &= ~0x8000;
 387     t1 &= ~0x8000;
 388     t0 = (t0 + t1) ^ tmp;
 389  */
 390
 391 static void gen_add16(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 392 {
 393     TCGv_i32 tmp = tcg_temp_new_i32();
 394     tcg_gen_xor_i32(tmp, t0, t1);
 395     tcg_gen_andi_i32(tmp, tmp, 0x8000);
 396     tcg_gen_andi_i32(t0, t0, ~0x8000);
 397     tcg_gen_andi_i32(t1, t1, ~0x8000);
 398     tcg_gen_add_i32(t0, t0, t1);
 399     tcg_gen_xor_i32(dest, t0, tmp);
 400     tcg_temp_free_i32(tmp);
 401 }
 402
 403 /* Set N and Z flags from var.  */
 404 static inline void gen_logic_CC(TCGv_i32 var)
 405 {
 406     tcg_gen_mov_i32(cpu_NF, var);
 407     tcg_gen_mov_i32(cpu_ZF, var);
 408 }
 409
 410 /* dest = T0 + T1 + CF. */
 411 static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 412 {
 413     tcg_gen_add_i32(dest, t0, t1);
 414     tcg_gen_add_i32(dest, dest, cpu_CF);
 415 }
 416
 417 /* dest = T0 - T1 + CF - 1.  */
 418 static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 419 {
 420     tcg_gen_sub_i32(dest, t0, t1);
 421     tcg_gen_add_i32(dest, dest, cpu_CF);
 422     tcg_gen_subi_i32(dest, dest, 1);
 423 }
 424
 425 /* dest = T0 + T1. Compute C, N, V and Z flags */
 426 static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 427 {
 428     TCGv_i32 tmp = tcg_temp_new_i32();
 429     tcg_gen_movi_i32(tmp, 0);
 430     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
 431     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 432     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 433     tcg_gen_xor_i32(tmp, t0, t1);
 434     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 435     tcg_temp_free_i32(tmp);
 436     tcg_gen_mov_i32(dest, cpu_NF);
 437 }
 438
 439 /* dest = T0 + T1 + CF.  Compute C, N, V and Z flags */
 440 static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 441 {
 442     TCGv_i32 tmp = tcg_temp_new_i32();
 443     if (TCG_TARGET_HAS_add2_i32) {
 444         tcg_gen_movi_i32(tmp, 0);
 445         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
 446         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
 447     } else {
 448         TCGv_i64 q0 = tcg_temp_new_i64();
 449         TCGv_i64 q1 = tcg_temp_new_i64();
 450         tcg_gen_extu_i32_i64(q0, t0);
 451         tcg_gen_extu_i32_i64(q1, t1);
 452         tcg_gen_add_i64(q0, q0, q1);
 453         tcg_gen_extu_i32_i64(q1, cpu_CF);
 454         tcg_gen_add_i64(q0, q0, q1);
 455         tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
 456         tcg_temp_free_i64(q0);
 457         tcg_temp_free_i64(q1);
 458     }
 459     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 460     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 461     tcg_gen_xor_i32(tmp, t0, t1);
 462     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 463     tcg_temp_free_i32(tmp);
 464     tcg_gen_mov_i32(dest, cpu_NF);
 465 }
 466
 467 /* dest = T0 - T1. Compute C, N, V and Z flags */
 468 static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 469 {
 470     TCGv_i32 tmp;
 471     tcg_gen_sub_i32(cpu_NF, t0, t1);
 472     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 473     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
 474     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 475     tmp = tcg_temp_new_i32();
 476     tcg_gen_xor_i32(tmp, t0, t1);
 477     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 478     tcg_temp_free_i32(tmp);
 479     tcg_gen_mov_i32(dest, cpu_NF);
 480 }
 481
 482 /* dest = T0 + ~T1 + CF.  Compute C, N, V and Z flags */
 483 static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 484 {
 485     TCGv_i32 tmp = tcg_temp_new_i32();
 486     tcg_gen_not_i32(tmp, t1);
 487     gen_adc_CC(dest, t0, tmp);
 488     tcg_temp_free_i32(tmp);
 489 }
 490
 491 #define GEN_SHIFT(name)                                               \
 492 static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)       \
 493 {                                                                     \
 494     TCGv_i32 tmp1, tmp2, tmp3;                                        \
 495     tmp1 = tcg_temp_new_i32();                                        \
 496     tcg_gen_andi_i32(tmp1, t1, 0xff);                                 \
 497     tmp2 = tcg_const_i32(0);                                          \
 498     tmp3 = tcg_const_i32(0x1f);                                       \
 499     tcg_gen_movcond_i32(TCG_COND_GTU, tmp2, tmp1, tmp3, tmp2, t0);    \
 500     tcg_temp_free_i32(tmp3);                                          \
 501     tcg_gen_andi_i32(tmp1, tmp1, 0x1f);                               \
 502     tcg_gen_##name##_i32(dest, tmp2, tmp1);                           \
 503     tcg_temp_free_i32(tmp2);                                          \
 504     tcg_temp_free_i32(tmp1);                                          \
 505 }
 506 GEN_SHIFT(shl)
 507 GEN_SHIFT(shr)
 508 #undef GEN_SHIFT
 509
 510 static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 511 {
 512     TCGv_i32 tmp1, tmp2;
 513     tmp1 = tcg_temp_new_i32();
 514     tcg_gen_andi_i32(tmp1, t1, 0xff);
 515     tmp2 = tcg_const_i32(0x1f);
 516     tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
 517     tcg_temp_free_i32(tmp2);
 518     tcg_gen_sar_i32(dest, t0, tmp1);
 519     tcg_temp_free_i32(tmp1);
 520 }
 521
 522 static void shifter_out_im(TCGv_i32 var, int shift)
 523 {
 524     tcg_gen_extract_i32(cpu_CF, var, shift, 1);
 525 }
 526
 527 /* Shift by immediate.  Includes special handling for shift == 0.  */
 528 static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
 529                                     int shift, int flags)
 530 {
 531     switch (shiftop) {
 532     case 0: /* LSL */
 533         if (shift != 0) {
 534             if (flags)
 535                 shifter_out_im(var, 32 - shift);
 536             tcg_gen_shli_i32(var, var, shift);
 537         }
 538         break;
 539     case 1: /* LSR */
 540         if (shift == 0) {
 541             if (flags) {
 542                 tcg_gen_shri_i32(cpu_CF, var, 31);
 543             }
 544             tcg_gen_movi_i32(var, 0);
 545         } else {
 546             if (flags)
 547                 shifter_out_im(var, shift - 1);
 548             tcg_gen_shri_i32(var, var, shift);
 549         }
 550         break;
 551     case 2: /* ASR */
 552         if (shift == 0)
 553             shift = 32;
 554         if (flags)
 555             shifter_out_im(var, shift - 1);
 556         if (shift == 32)
 557           shift = 31;
 558         tcg_gen_sari_i32(var, var, shift);
 559         break;
 560     case 3: /* ROR/RRX */
 561         if (shift != 0) {
 562             if (flags)
 563                 shifter_out_im(var, shift - 1);
 564             tcg_gen_rotri_i32(var, var, shift); break;
 565         } else {
 566             TCGv_i32 tmp = tcg_temp_new_i32();
 567             tcg_gen_shli_i32(tmp, cpu_CF, 31);
 568             if (flags)
 569                 shifter_out_im(var, 0);
 570             tcg_gen_shri_i32(var, var, 1);
 571             tcg_gen_or_i32(var, var, tmp);
 572             tcg_temp_free_i32(tmp);
 573         }
 574     }
 575 };
 576
 577 static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
 578                                      TCGv_i32 shift, int flags)
 579 {
 580     if (flags) {
 581         switch (shiftop) {
 582         case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
 583         case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
 584         case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
 585         case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
 586         }
 587     } else {
 588         switch (shiftop) {
 589         case 0:
 590             gen_shl(var, var, shift);
 591             break;
 592         case 1:
 593             gen_shr(var, var, shift);
 594             break;
 595         case 2:
 596             gen_sar(var, var, shift);
 597             break;
 598         case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
 599                 tcg_gen_rotr_i32(var, var, shift); break;
 600         }
 601     }
 602     tcg_temp_free_i32(shift);
 603 }
 604
 605 /*
 606  * Generate a conditional based on ARM condition code cc.
 607  * This is common between ARM and Aarch64 targets.
 608  */
 609 void arm_test_cc(DisasCompare *cmp, int cc)
 610 {
 611     TCGv_i32 value;
 612     TCGCond cond;
 613     bool global = true;
 614
 615     switch (cc) {
 616     case 0: /* eq: Z */
 617     case 1: /* ne: !Z */
 618         cond = TCG_COND_EQ;
 619         value = cpu_ZF;
 620         break;
 621
 622     case 2: /* cs: C */
 623     case 3: /* cc: !C */
 624         cond = TCG_COND_NE;
 625         value = cpu_CF;
 626         break;
 627
 628     case 4: /* mi: N */
 629     case 5: /* pl: !N */
 630         cond = TCG_COND_LT;
 631         value = cpu_NF;
 632         break;
 633
 634     case 6: /* vs: V */
 635     case 7: /* vc: !V */
 636         cond = TCG_COND_LT;
 637         value = cpu_VF;
 638         break;
 639
 640     case 8: /* hi: C && !Z */
 641     case 9: /* ls: !C || Z -> !(C && !Z) */
 642         cond = TCG_COND_NE;
 643         value = tcg_temp_new_i32();
 644         global = false;
 645         /* CF is 1 for C, so -CF is an all-bits-set mask for C;
 646            ZF is non-zero for !Z; so AND the two subexpressions.  */
 647         tcg_gen_neg_i32(value, cpu_CF);
 648         tcg_gen_and_i32(value, value, cpu_ZF);
 649         break;
 650
 651     case 10: /* ge: N == V -> N ^ V == 0 */
 652     case 11: /* lt: N != V -> N ^ V != 0 */
 653         /* Since we're only interested in the sign bit, == 0 is >= 0.  */
 654         cond = TCG_COND_GE;
 655         value = tcg_temp_new_i32();
 656         global = false;
 657         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
 658         break;
 659
 660     case 12: /* gt: !Z && N == V */
 661     case 13: /* le: Z || N != V */
 662         cond = TCG_COND_NE;
 663         value = tcg_temp_new_i32();
 664         global = false;
 665         /* (N == V) is equal to the sign bit of ~(NF ^ VF).  Propagate
 666          * the sign bit then AND with ZF to yield the result.  */
 667         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
 668         tcg_gen_sari_i32(value, value, 31);
 669         tcg_gen_andc_i32(value, cpu_ZF, value);
 670         break;
 671
 672     case 14: /* always */
 673     case 15: /* always */
 674         /* Use the ALWAYS condition, which will fold early.
 675          * It doesn't matter what we use for the value.  */
 676         cond = TCG_COND_ALWAYS;
 677         value = cpu_ZF;
 678         goto no_invert;
 679
 680     default:
 681         fprintf(stderr, "Bad condition code 0x%x\n", cc);
 682         abort();
 683     }
 684
 685     if (cc & 1) {
 686         cond = tcg_invert_cond(cond);
 687     }
 688
 689  no_invert:
 690     cmp->cond = cond;
 691     cmp->value = value;
 692     cmp->value_global = global;
 693 }
 694
 695 void arm_free_cc(DisasCompare *cmp)
 696 {
 697     if (!cmp->value_global) {
 698         tcg_temp_free_i32(cmp->value);
 699     }
 700 }
 701
 702 void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
 703 {
 704     tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
 705 }
 706
 707 void arm_gen_test_cc(int cc, TCGLabel *label)
 708 {
 709     DisasCompare cmp;
 710     arm_test_cc(&cmp, cc);
 711     arm_jump_cc(&cmp, label);
 712     arm_free_cc(&cmp);
 713 }
 714
 715 static inline void gen_set_condexec(DisasContext *s)
 716 {
 717     if (s->condexec_mask) {
 718         uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
 719         TCGv_i32 tmp = tcg_temp_new_i32();
 720         tcg_gen_movi_i32(tmp, val);
 721         store_cpu_field(tmp, condexec_bits);
 722     }
 723 }
 724
 725 static inline void gen_set_pc_im(DisasContext *s, target_ulong val)
 726 {
 727     tcg_gen_movi_i32(cpu_R[15], val);
 728 }
 729
 730 /* Set PC and Thumb state from var.  var is marked as dead.  */
 731 static inline void gen_bx(DisasContext *s, TCGv_i32 var)
 732 {
 733     s->base.is_jmp = DISAS_JUMP;
 734     tcg_gen_andi_i32(cpu_R[15], var, ~1);
 735     tcg_gen_andi_i32(var, var, 1);
 736     store_cpu_field(var, thumb);
 737 }
 738
 739 /*
 740  * Set PC and Thumb state from var. var is marked as dead.
 741  * For M-profile CPUs, include logic to detect exception-return
 742  * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
 743  * and BX reg, and no others, and happens only for code in Handler mode.
 744  * The Security Extension also requires us to check for the FNC_RETURN
 745  * which signals a function return from non-secure state; this can happen
 746  * in both Handler and Thread mode.
 747  * To avoid having to do multiple comparisons in inline generated code,
 748  * we make the check we do here loose, so it will match for EXC_RETURN
 749  * in Thread mode. For system emulation do_v7m_exception_exit() checks
 750  * for these spurious cases and returns without doing anything (giving
 751  * the same behaviour as for a branch to a non-magic address).
 752  *
 753  * In linux-user mode it is unclear what the right behaviour for an
 754  * attempted FNC_RETURN should be, because in real hardware this will go
 755  * directly to Secure code (ie not the Linux kernel) which will then treat
 756  * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN
 757  * attempt behave the way it would on a CPU without the security extension,
 758  * which is to say "like a normal branch". That means we can simply treat
 759  * all branches as normal with no magic address behaviour.
 760  */
 761 static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
 762 {
 763     /* Generate the same code here as for a simple bx, but flag via
 764      * s->base.is_jmp that we need to do the rest of the work later.
 765      */
 766     gen_bx(s, var);
 767 #ifndef CONFIG_USER_ONLY
 768     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
 769         (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
 770         s->base.is_jmp = DISAS_BX_EXCRET;
 771     }
 772 #endif
 773 }
 774
 775 static inline void gen_bx_excret_final_code(DisasContext *s)
 776 {
 777     /* Generate the code to finish possible exception return and end the TB */
 778     TCGLabel *excret_label = gen_new_label();
 779     uint32_t min_magic;
 780
 781     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
 782         /* Covers FNC_RETURN and EXC_RETURN magic */
 783         min_magic = FNC_RETURN_MIN_MAGIC;
 784     } else {
 785         /* EXC_RETURN magic only */
 786         min_magic = EXC_RETURN_MIN_MAGIC;
 787     }
 788
 789     /* Is the new PC value in the magic range indicating exception return? */
 790     tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label);
 791     /* No: end the TB as we would for a DISAS_JMP */
 792     if (is_singlestepping(s)) {
 793         gen_singlestep_exception(s);
 794     } else {
 795         tcg_gen_exit_tb(NULL, 0);
 796     }
 797     gen_set_label(excret_label);
 798     /* Yes: this is an exception return.
 799      * At this point in runtime env->regs[15] and env->thumb will hold
 800      * the exception-return magic number, which do_v7m_exception_exit()
 801      * will read. Nothing else will be able to see those values because
 802      * the cpu-exec main loop guarantees that we will always go straight
 803      * from raising the exception to the exception-handling code.
 804      *
 805      * gen_ss_advance(s) does nothing on M profile currently but
 806      * calling it is conceptually the right thing as we have executed
 807      * this instruction (compare SWI, HVC, SMC handling).
 808      */
 809     gen_ss_advance(s);
 810     gen_exception_internal(EXCP_EXCEPTION_EXIT);
 811 }
 812
 813 static inline void gen_bxns(DisasContext *s, int rm)
 814 {
 815     TCGv_i32 var = load_reg(s, rm);
 816
 817     /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
 818      * we need to sync state before calling it, but:
 819      *  - we don't need to do gen_set_pc_im() because the bxns helper will
 820      *    always set the PC itself
 821      *  - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
 822      *    unless it's outside an IT block or the last insn in an IT block,
 823      *    so we know that condexec == 0 (already set at the top of the TB)
 824      *    is correct in the non-UNPREDICTABLE cases, and we can choose
 825      *    "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
 826      */
 827     gen_helper_v7m_bxns(cpu_env, var);
 828     tcg_temp_free_i32(var);
 829     s->base.is_jmp = DISAS_EXIT;
 830 }
 831
 832 static inline void gen_blxns(DisasContext *s, int rm)
 833 {
 834     TCGv_i32 var = load_reg(s, rm);
 835
 836     /* We don't need to sync condexec state, for the same reason as bxns.
 837      * We do however need to set the PC, because the blxns helper reads it.
 838      * The blxns helper may throw an exception.
 839      */
 840     gen_set_pc_im(s, s->base.pc_next);
 841     gen_helper_v7m_blxns(cpu_env, var);
 842     tcg_temp_free_i32(var);
 843     s->base.is_jmp = DISAS_EXIT;
 844 }
 845
 846 /* Variant of store_reg which uses branch&exchange logic when storing
 847    to r15 in ARM architecture v7 and above. The source must be a temporary
 848    and will be marked as dead. */
 849 static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
 850 {
 851     if (reg == 15 && ENABLE_ARCH_7) {
 852         gen_bx(s, var);
 853     } else {
 854         store_reg(s, reg, var);
 855     }
 856 }
 857
 858 /* Variant of store_reg which uses branch&exchange logic when storing
 859  * to r15 in ARM architecture v5T and above. This is used for storing
 860  * the results of a LDR/LDM/POP into r15, and corresponds to the cases
 861  * in the ARM ARM which use the LoadWritePC() pseudocode function. */
 862 static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
 863 {
 864     if (reg == 15 && ENABLE_ARCH_5) {
 865         gen_bx_excret(s, var);
 866     } else {
 867         store_reg(s, reg, var);
 868     }
 869 }
 870
 871 #ifdef CONFIG_USER_ONLY
 872 #define IS_USER_ONLY 1
 873 #else
 874 #define IS_USER_ONLY 0
 875 #endif
 876
 877 /* Abstractions of "generate code to do a guest load/store for
 878  * AArch32", where a vaddr is always 32 bits (and is zero
 879  * extended if we're a 64 bit core) and  data is also
 880  * 32 bits unless specifically doing a 64 bit access.
 881  * These functions work like tcg_gen_qemu_{ld,st}* except
 882  * that the address argument is TCGv_i32 rather than TCGv.
 883  */
 884
 885 static inline TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
 886 {
 887     TCGv addr = tcg_temp_new();
 888     tcg_gen_extu_i32_tl(addr, a32);
 889
 890     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
 891     if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
 892         tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
 893     }
 894     return addr;
 895 }
 896
 897 static void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
 898                             int index, MemOp opc)
 899 {
 900     TCGv addr;
 901
 902     if (arm_dc_feature(s, ARM_FEATURE_M) &&
 903         !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
 904         opc |= MO_ALIGN;
 905     }
 906
 907     addr = gen_aa32_addr(s, a32, opc);
 908     tcg_gen_qemu_ld_i32(val, addr, index, opc);
 909     tcg_temp_free(addr);
 910 }
 911
 912 static void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
 913                             int index, MemOp opc)
 914 {
 915     TCGv addr;
 916
 917     if (arm_dc_feature(s, ARM_FEATURE_M) &&
 918         !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
 919         opc |= MO_ALIGN;
 920     }
 921
 922     addr = gen_aa32_addr(s, a32, opc);
 923     tcg_gen_qemu_st_i32(val, addr, index, opc);
 924     tcg_temp_free(addr);
 925 }
 926
 927 #define DO_GEN_LD(SUFF, OPC)                                             \
 928 static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val,      \
 929                                      TCGv_i32 a32, int index)            \
 930 {                                                                        \
 931     gen_aa32_ld_i32(s, val, a32, index, OPC | s->be_data);               \
 932 }
 933
 934 #define DO_GEN_ST(SUFF, OPC)                                             \
 935 static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val,      \
 936                                      TCGv_i32 a32, int index)            \
 937 {                                                                        \
 938     gen_aa32_st_i32(s, val, a32, index, OPC | s->be_data);               \
 939 }
 940
 941 static inline void gen_aa32_frob64(DisasContext *s, TCGv_i64 val)
 942 {
 943     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
 944     if (!IS_USER_ONLY && s->sctlr_b) {
 945         tcg_gen_rotri_i64(val, val, 32);
 946     }
 947 }
 948
 949 static void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
 950                             int index, MemOp opc)
 951 {
 952     TCGv addr = gen_aa32_addr(s, a32, opc);
 953     tcg_gen_qemu_ld_i64(val, addr, index, opc);
 954     gen_aa32_frob64(s, val);
 955     tcg_temp_free(addr);
 956 }
 957
 958 static inline void gen_aa32_ld64(DisasContext *s, TCGv_i64 val,
 959                                  TCGv_i32 a32, int index)
 960 {
 961     gen_aa32_ld_i64(s, val, a32, index, MO_Q | s->be_data);
 962 }
 963
 964 static void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
 965                             int index, MemOp opc)
 966 {
 967     TCGv addr = gen_aa32_addr(s, a32, opc);
 968
 969     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
 970     if (!IS_USER_ONLY && s->sctlr_b) {
 971         TCGv_i64 tmp = tcg_temp_new_i64();
 972         tcg_gen_rotri_i64(tmp, val, 32);
 973         tcg_gen_qemu_st_i64(tmp, addr, index, opc);
 974         tcg_temp_free_i64(tmp);
 975     } else {
 976         tcg_gen_qemu_st_i64(val, addr, index, opc);
 977     }
 978     tcg_temp_free(addr);
 979 }
 980
 981 static inline void gen_aa32_st64(DisasContext *s, TCGv_i64 val,
 982                                  TCGv_i32 a32, int index)
 983 {
 984     gen_aa32_st_i64(s, val, a32, index, MO_Q | s->be_data);
 985 }
 986
 987 DO_GEN_LD(8u, MO_UB)
 988 DO_GEN_LD(16u, MO_UW)
 989 DO_GEN_LD(32u, MO_UL)
 990 DO_GEN_ST(8, MO_UB)
 991 DO_GEN_ST(16, MO_UW)
 992 DO_GEN_ST(32, MO_UL)
 993
 994 static inline void gen_hvc(DisasContext *s, int imm16)
 995 {
 996     /* The pre HVC helper handles cases when HVC gets trapped
 997      * as an undefined insn by runtime configuration (ie before
 998      * the insn really executes).
 999      */
1000     gen_set_pc_im(s, s->pc_curr);
1001     gen_helper_pre_hvc(cpu_env);
1002     /* Otherwise we will treat this as a real exception which
1003      * happens after execution of the insn. (The distinction matters
1004      * for the PC value reported to the exception handler and also
1005      * for single stepping.)
1006      */
1007     s->svc_imm = imm16;
1008     gen_set_pc_im(s, s->base.pc_next);
1009     s->base.is_jmp = DISAS_HVC;
1010 }
1011
1012 static inline void gen_smc(DisasContext *s)
1013 {
1014     /* As with HVC, we may take an exception either before or after
1015      * the insn executes.
1016      */
1017     TCGv_i32 tmp;
1018
1019     gen_set_pc_im(s, s->pc_curr);
1020     tmp = tcg_const_i32(syn_aa32_smc());
1021     gen_helper_pre_smc(cpu_env, tmp);
1022     tcg_temp_free_i32(tmp);
1023     gen_set_pc_im(s, s->base.pc_next);
1024     s->base.is_jmp = DISAS_SMC;
1025 }
1026
1027 static void gen_exception_internal_insn(DisasContext *s, uint32_t pc, int excp)
1028 {
1029     gen_set_condexec(s);
1030     gen_set_pc_im(s, pc);
1031     gen_exception_internal(excp);
1032     s->base.is_jmp = DISAS_NORETURN;
1033 }
1034
1035 static void gen_exception_insn(DisasContext *s, uint32_t pc, int excp,
1036                                int syn, uint32_t target_el)
1037 {
1038     gen_set_condexec(s);
1039     gen_set_pc_im(s, pc);
1040     gen_exception(excp, syn, target_el);
1041     s->base.is_jmp = DISAS_NORETURN;
1042 }
1043
1044 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn)
1045 {
1046     TCGv_i32 tcg_syn;
1047
1048     gen_set_condexec(s);
1049     gen_set_pc_im(s, s->pc_curr);
1050     tcg_syn = tcg_const_i32(syn);
1051     gen_helper_exception_bkpt_insn(cpu_env, tcg_syn);
1052     tcg_temp_free_i32(tcg_syn);
1053     s->base.is_jmp = DISAS_NORETURN;
1054 }
1055
1056 static void unallocated_encoding(DisasContext *s)
1057 {
1058     /* Unallocated and reserved encodings are uncategorized */
1059     gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(),
1060                        default_exception_el(s));
1061 }
1062
1063 /* Force a TB lookup after an instruction that changes the CPU state.  */
1064 static inline void gen_lookup_tb(DisasContext *s)
1065 {
1066     tcg_gen_movi_i32(cpu_R[15], s->base.pc_next);
1067     s->base.is_jmp = DISAS_EXIT;
1068 }
1069
1070 static inline void gen_hlt(DisasContext *s, int imm)
1071 {
1072     /* HLT. This has two purposes.
1073      * Architecturally, it is an external halting debug instruction.
1074      * Since QEMU doesn't implement external debug, we treat this as
1075      * it is required for halting debug disabled: it will UNDEF.
1076      * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
1077      * and "HLT 0xF000" is an A32 semihosting syscall. These traps
1078      * must trigger semihosting even for ARMv7 and earlier, where
1079      * HLT was an undefined encoding.
1080      * In system mode, we don't allow userspace access to
1081      * semihosting, to provide some semblance of security
1082      * (and for consistency with our 32-bit semihosting).
1083      */
1084     if (semihosting_enabled() &&
1085 #ifndef CONFIG_USER_ONLY
1086         s->current_el != 0 &&
1087 #endif
1088         (imm == (s->thumb ? 0x3c : 0xf000))) {
1089         gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
1090         return;
1091     }
1092
1093     unallocated_encoding(s);
1094 }
1095
1096 static TCGv_ptr get_fpstatus_ptr(int neon)
1097 {
1098     TCGv_ptr statusptr = tcg_temp_new_ptr();
1099     int offset;
1100     if (neon) {
1101         offset = offsetof(CPUARMState, vfp.standard_fp_status);
1102     } else {
1103         offset = offsetof(CPUARMState, vfp.fp_status);
1104     }
1105     tcg_gen_addi_ptr(statusptr, cpu_env, offset);
1106     return statusptr;
1107 }
1108
1109 static inline long vfp_reg_offset(bool dp, unsigned reg)
1110 {
1111     if (dp) {
1112         return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
1113     } else {
1114         long ofs = offsetof(CPUARMState, vfp.zregs[reg >> 2].d[(reg >> 1) & 1]);
1115         if (reg & 1) {
1116             ofs += offsetof(CPU_DoubleU, l.upper);
1117         } else {
1118             ofs += offsetof(CPU_DoubleU, l.lower);
1119         }
1120         return ofs;
1121     }
1122 }
1123
1124 /* Return the offset of a 32-bit piece of a NEON register.
1125    zero is the least significant end of the register.  */
1126 static inline long
1127 neon_reg_offset (int reg, int n)
1128 {
1129     int sreg;
1130     sreg = reg * 2 + n;
1131     return vfp_reg_offset(0, sreg);
1132 }
1133
1134 static TCGv_i32 neon_load_reg(int reg, int pass)
1135 {
1136     TCGv_i32 tmp = tcg_temp_new_i32();
1137     tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass));
1138     return tmp;
1139 }
1140
1141 static void neon_store_reg(int reg, int pass, TCGv_i32 var)
1142 {
1143     tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass));
1144     tcg_temp_free_i32(var);
1145 }
1146
1147 static inline void neon_load_reg64(TCGv_i64 var, int reg)
1148 {
1149     tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg));
1150 }
1151
1152 static inline void neon_store_reg64(TCGv_i64 var, int reg)
1153 {
1154     tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg));
1155 }
1156
1157 static inline void neon_load_reg32(TCGv_i32 var, int reg)
1158 {
1159     tcg_gen_ld_i32(var, cpu_env, vfp_reg_offset(false, reg));
1160 }
1161
1162 static inline void neon_store_reg32(TCGv_i32 var, int reg)
1163 {
1164     tcg_gen_st_i32(var, cpu_env, vfp_reg_offset(false, reg));
1165 }
1166
1167 static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
1168 {
1169     TCGv_ptr ret = tcg_temp_new_ptr();
1170     tcg_gen_addi_ptr(ret, cpu_env, vfp_reg_offset(dp, reg));
1171     return ret;
1172 }
1173
1174 #define ARM_CP_RW_BIT   (1 << 20)
1175
1176 /* Include the VFP and Neon decoders */
1177 #include "decode-m-nocp.c.inc"
1178 #include "translate-vfp.c.inc"
1179 #include "translate-neon.c.inc"
1180
1181 static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1182 {
1183     tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1184 }
1185
1186 static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1187 {
1188     tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1189 }
1190
1191 static inline TCGv_i32 iwmmxt_load_creg(int reg)
1192 {
1193     TCGv_i32 var = tcg_temp_new_i32();
1194     tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1195     return var;
1196 }
1197
1198 static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1199 {
1200     tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1201     tcg_temp_free_i32(var);
1202 }
1203
1204 static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1205 {
1206     iwmmxt_store_reg(cpu_M0, rn);
1207 }
1208
1209 static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1210 {
1211     iwmmxt_load_reg(cpu_M0, rn);
1212 }
1213
1214 static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1215 {
1216     iwmmxt_load_reg(cpu_V1, rn);
1217     tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1218 }
1219
1220 static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1221 {
1222     iwmmxt_load_reg(cpu_V1, rn);
1223     tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1224 }
1225
1226 static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1227 {
1228     iwmmxt_load_reg(cpu_V1, rn);
1229     tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1230 }
1231
1232 #define IWMMXT_OP(name) \
1233 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1234 { \
1235     iwmmxt_load_reg(cpu_V1, rn); \
1236     gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1237 }
1238
1239 #define IWMMXT_OP_ENV(name) \
1240 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1241 { \
1242     iwmmxt_load_reg(cpu_V1, rn); \
1243     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1244 }
1245
1246 #define IWMMXT_OP_ENV_SIZE(name) \
1247 IWMMXT_OP_ENV(name##b) \
1248 IWMMXT_OP_ENV(name##w) \
1249 IWMMXT_OP_ENV(name##l)
1250
1251 #define IWMMXT_OP_ENV1(name) \
1252 static inline void gen_op_iwmmxt_##name##_M0(void) \
1253 { \
1254     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1255 }
1256
1257 IWMMXT_OP(maddsq)
1258 IWMMXT_OP(madduq)
1259 IWMMXT_OP(sadb)
1260 IWMMXT_OP(sadw)
1261 IWMMXT_OP(mulslw)
1262 IWMMXT_OP(mulshw)
1263 IWMMXT_OP(mululw)
1264 IWMMXT_OP(muluhw)
1265 IWMMXT_OP(macsw)
1266 IWMMXT_OP(macuw)
1267
1268 IWMMXT_OP_ENV_SIZE(unpackl)
1269 IWMMXT_OP_ENV_SIZE(unpackh)
1270
1271 IWMMXT_OP_ENV1(unpacklub)
1272 IWMMXT_OP_ENV1(unpackluw)
1273 IWMMXT_OP_ENV1(unpacklul)
1274 IWMMXT_OP_ENV1(unpackhub)
1275 IWMMXT_OP_ENV1(unpackhuw)
1276 IWMMXT_OP_ENV1(unpackhul)
1277 IWMMXT_OP_ENV1(unpacklsb)
1278 IWMMXT_OP_ENV1(unpacklsw)
1279 IWMMXT_OP_ENV1(unpacklsl)
1280 IWMMXT_OP_ENV1(unpackhsb)
1281 IWMMXT_OP_ENV1(unpackhsw)
1282 IWMMXT_OP_ENV1(unpackhsl)
1283
1284 IWMMXT_OP_ENV_SIZE(cmpeq)
1285 IWMMXT_OP_ENV_SIZE(cmpgtu)
1286 IWMMXT_OP_ENV_SIZE(cmpgts)
1287
1288 IWMMXT_OP_ENV_SIZE(mins)
1289 IWMMXT_OP_ENV_SIZE(minu)
1290 IWMMXT_OP_ENV_SIZE(maxs)
1291 IWMMXT_OP_ENV_SIZE(maxu)
1292
1293 IWMMXT_OP_ENV_SIZE(subn)
1294 IWMMXT_OP_ENV_SIZE(addn)
1295 IWMMXT_OP_ENV_SIZE(subu)
1296 IWMMXT_OP_ENV_SIZE(addu)
1297 IWMMXT_OP_ENV_SIZE(subs)
1298 IWMMXT_OP_ENV_SIZE(adds)
1299
1300 IWMMXT_OP_ENV(avgb0)
1301 IWMMXT_OP_ENV(avgb1)
1302 IWMMXT_OP_ENV(avgw0)
1303 IWMMXT_OP_ENV(avgw1)
1304
1305 IWMMXT_OP_ENV(packuw)
1306 IWMMXT_OP_ENV(packul)
1307 IWMMXT_OP_ENV(packuq)
1308 IWMMXT_OP_ENV(packsw)
1309 IWMMXT_OP_ENV(packsl)
1310 IWMMXT_OP_ENV(packsq)
1311
1312 static void gen_op_iwmmxt_set_mup(void)
1313 {
1314     TCGv_i32 tmp;
1315     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1316     tcg_gen_ori_i32(tmp, tmp, 2);
1317     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1318 }
1319
1320 static void gen_op_iwmmxt_set_cup(void)
1321 {
1322     TCGv_i32 tmp;
1323     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1324     tcg_gen_ori_i32(tmp, tmp, 1);
1325     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1326 }
1327
1328 static void gen_op_iwmmxt_setpsr_nz(void)
1329 {
1330     TCGv_i32 tmp = tcg_temp_new_i32();
1331     gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1332     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1333 }
1334
1335 static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1336 {
1337     iwmmxt_load_reg(cpu_V1, rn);
1338     tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1339     tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1340 }
1341
1342 static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1343                                      TCGv_i32 dest)
1344 {
1345     int rd;
1346     uint32_t offset;
1347     TCGv_i32 tmp;
1348
1349     rd = (insn >> 16) & 0xf;
1350     tmp = load_reg(s, rd);
1351
1352     offset = (insn & 0xff) << ((insn >> 7) & 2);
1353     if (insn & (1 << 24)) {
1354         /* Pre indexed */
1355         if (insn & (1 << 23))
1356             tcg_gen_addi_i32(tmp, tmp, offset);
1357         else
1358             tcg_gen_addi_i32(tmp, tmp, -offset);
1359         tcg_gen_mov_i32(dest, tmp);
1360         if (insn & (1 << 21))
1361             store_reg(s, rd, tmp);
1362         else
1363             tcg_temp_free_i32(tmp);
1364     } else if (insn & (1 << 21)) {
1365         /* Post indexed */
1366         tcg_gen_mov_i32(dest, tmp);
1367         if (insn & (1 << 23))
1368             tcg_gen_addi_i32(tmp, tmp, offset);
1369         else
1370             tcg_gen_addi_i32(tmp, tmp, -offset);
1371         store_reg(s, rd, tmp);
1372     } else if (!(insn & (1 << 23)))
1373         return 1;
1374     return 0;
1375 }
1376
1377 static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1378 {
1379     int rd = (insn >> 0) & 0xf;
1380     TCGv_i32 tmp;
1381
1382     if (insn & (1 << 8)) {
1383         if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1384             return 1;
1385         } else {
1386             tmp = iwmmxt_load_creg(rd);
1387         }
1388     } else {
1389         tmp = tcg_temp_new_i32();
1390         iwmmxt_load_reg(cpu_V0, rd);
1391         tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1392     }
1393     tcg_gen_andi_i32(tmp, tmp, mask);
1394     tcg_gen_mov_i32(dest, tmp);
1395     tcg_temp_free_i32(tmp);
1396     return 0;
1397 }
1398
1399 /* Disassemble an iwMMXt instruction.  Returns nonzero if an error occurred
1400    (ie. an undefined instruction).  */
1401 static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1402 {
1403     int rd, wrd;
1404     int rdhi, rdlo, rd0, rd1, i;
1405     TCGv_i32 addr;
1406     TCGv_i32 tmp, tmp2, tmp3;
1407
1408     if ((insn & 0x0e000e00) == 0x0c000000) {
1409         if ((insn & 0x0fe00ff0) == 0x0c400000) {
1410             wrd = insn & 0xf;
1411             rdlo = (insn >> 12) & 0xf;
1412             rdhi = (insn >> 16) & 0xf;
1413             if (insn & ARM_CP_RW_BIT) {                         /* TMRRC */
1414                 iwmmxt_load_reg(cpu_V0, wrd);
1415                 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1416                 tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
1417             } else {                                    /* TMCRR */
1418                 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1419                 iwmmxt_store_reg(cpu_V0, wrd);
1420                 gen_op_iwmmxt_set_mup();
1421             }
1422             return 0;
1423         }
1424
1425         wrd = (insn >> 12) & 0xf;
1426         addr = tcg_temp_new_i32();
1427         if (gen_iwmmxt_address(s, insn, addr)) {
1428             tcg_temp_free_i32(addr);
1429             return 1;
1430         }
1431         if (insn & ARM_CP_RW_BIT) {
1432             if ((insn >> 28) == 0xf) {                  /* WLDRW wCx */
1433                 tmp = tcg_temp_new_i32();
1434                 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1435                 iwmmxt_store_creg(wrd, tmp);
1436             } else {
1437                 i = 1;
1438                 if (insn & (1 << 8)) {
1439                     if (insn & (1 << 22)) {             /* WLDRD */
1440                         gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
1441                         i = 0;
1442                     } else {                            /* WLDRW wRd */
1443                         tmp = tcg_temp_new_i32();
1444                         gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1445                     }
1446                 } else {
1447                     tmp = tcg_temp_new_i32();
1448                     if (insn & (1 << 22)) {             /* WLDRH */
1449                         gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1450                     } else {                            /* WLDRB */
1451                         gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
1452                     }
1453                 }
1454                 if (i) {
1455                     tcg_gen_extu_i32_i64(cpu_M0, tmp);
1456                     tcg_temp_free_i32(tmp);
1457                 }
1458                 gen_op_iwmmxt_movq_wRn_M0(wrd);
1459             }
1460         } else {
1461             if ((insn >> 28) == 0xf) {                  /* WSTRW wCx */
1462                 tmp = iwmmxt_load_creg(wrd);
1463                 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1464             } else {
1465                 gen_op_iwmmxt_movq_M0_wRn(wrd);
1466                 tmp = tcg_temp_new_i32();
1467                 if (insn & (1 << 8)) {
1468                     if (insn & (1 << 22)) {             /* WSTRD */
1469                         gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
1470                     } else {                            /* WSTRW wRd */
1471                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1472                         gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1473                     }
1474                 } else {
1475                     if (insn & (1 << 22)) {             /* WSTRH */
1476                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1477                         gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1478                     } else {                            /* WSTRB */
1479                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1480                         gen_aa32_st8(s, tmp, addr, get_mem_index(s));
1481                     }
1482                 }
1483             }
1484             tcg_temp_free_i32(tmp);
1485         }
1486         tcg_temp_free_i32(addr);
1487         return 0;
1488     }
1489
1490     if ((insn & 0x0f000000) != 0x0e000000)
1491         return 1;
1492
1493     switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1494     case 0x000:                                                 /* WOR */
1495         wrd = (insn >> 12) & 0xf;
1496         rd0 = (insn >> 0) & 0xf;
1497         rd1 = (insn >> 16) & 0xf;
1498         gen_op_iwmmxt_movq_M0_wRn(rd0);
1499         gen_op_iwmmxt_orq_M0_wRn(rd1);
1500         gen_op_iwmmxt_setpsr_nz();
1501         gen_op_iwmmxt_movq_wRn_M0(wrd);
1502         gen_op_iwmmxt_set_mup();
1503         gen_op_iwmmxt_set_cup();
1504         break;
1505     case 0x011:                                                 /* TMCR */
1506         if (insn & 0xf)
1507             return 1;
1508         rd = (insn >> 12) & 0xf;
1509         wrd = (insn >> 16) & 0xf;
1510         switch (wrd) {
1511         case ARM_IWMMXT_wCID:
1512         case ARM_IWMMXT_wCASF:
1513             break;
1514         case ARM_IWMMXT_wCon:
1515             gen_op_iwmmxt_set_cup();
1516             /* Fall through.  */
1517         case ARM_IWMMXT_wCSSF:
1518             tmp = iwmmxt_load_creg(wrd);
1519             tmp2 = load_reg(s, rd);
1520             tcg_gen_andc_i32(tmp, tmp, tmp2);
1521             tcg_temp_free_i32(tmp2);
1522             iwmmxt_store_creg(wrd, tmp);
1523             break;
1524         case ARM_IWMMXT_wCGR0:
1525         case ARM_IWMMXT_wCGR1:
1526         case ARM_IWMMXT_wCGR2:
1527         case ARM_IWMMXT_wCGR3:
1528             gen_op_iwmmxt_set_cup();
1529             tmp = load_reg(s, rd);
1530             iwmmxt_store_creg(wrd, tmp);
1531             break;
1532         default:
1533             return 1;
1534         }
1535         break;
1536     case 0x100:                                                 /* WXOR */
1537         wrd = (insn >> 12) & 0xf;
1538         rd0 = (insn >> 0) & 0xf;
1539         rd1 = (insn >> 16) & 0xf;
1540         gen_op_iwmmxt_movq_M0_wRn(rd0);
1541         gen_op_iwmmxt_xorq_M0_wRn(rd1);
1542         gen_op_iwmmxt_setpsr_nz();
1543         gen_op_iwmmxt_movq_wRn_M0(wrd);
1544         gen_op_iwmmxt_set_mup();
1545         gen_op_iwmmxt_set_cup();
1546         break;
1547     case 0x111:                                                 /* TMRC */
1548         if (insn & 0xf)
1549             return 1;
1550         rd = (insn >> 12) & 0xf;
1551         wrd = (insn >> 16) & 0xf;
1552         tmp = iwmmxt_load_creg(wrd);
1553         store_reg(s, rd, tmp);
1554         break;
1555     case 0x300:                                                 /* WANDN */
1556         wrd = (insn >> 12) & 0xf;
1557         rd0 = (insn >> 0) & 0xf;
1558         rd1 = (insn >> 16) & 0xf;
1559         gen_op_iwmmxt_movq_M0_wRn(rd0);
1560         tcg_gen_neg_i64(cpu_M0, cpu_M0);
1561         gen_op_iwmmxt_andq_M0_wRn(rd1);
1562         gen_op_iwmmxt_setpsr_nz();
1563         gen_op_iwmmxt_movq_wRn_M0(wrd);
1564         gen_op_iwmmxt_set_mup();
1565         gen_op_iwmmxt_set_cup();
1566         break;
1567     case 0x200:                                                 /* WAND */
1568         wrd = (insn >> 12) & 0xf;
1569         rd0 = (insn >> 0) & 0xf;
1570         rd1 = (insn >> 16) & 0xf;
1571         gen_op_iwmmxt_movq_M0_wRn(rd0);
1572         gen_op_iwmmxt_andq_M0_wRn(rd1);
1573         gen_op_iwmmxt_setpsr_nz();
1574         gen_op_iwmmxt_movq_wRn_M0(wrd);
1575         gen_op_iwmmxt_set_mup();
1576         gen_op_iwmmxt_set_cup();
1577         break;
1578     case 0x810: case 0xa10:                             /* WMADD */
1579         wrd = (insn >> 12) & 0xf;
1580         rd0 = (insn >> 0) & 0xf;
1581         rd1 = (insn >> 16) & 0xf;
1582         gen_op_iwmmxt_movq_M0_wRn(rd0);
1583         if (insn & (1 << 21))
1584             gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1585         else
1586             gen_op_iwmmxt_madduq_M0_wRn(rd1);
1587         gen_op_iwmmxt_movq_wRn_M0(wrd);
1588         gen_op_iwmmxt_set_mup();
1589         break;
1590     case 0x10e: case 0x50e: case 0x90e: case 0xd0e:     /* WUNPCKIL */
1591         wrd = (insn >> 12) & 0xf;
1592         rd0 = (insn >> 16) & 0xf;
1593         rd1 = (insn >> 0) & 0xf;
1594         gen_op_iwmmxt_movq_M0_wRn(rd0);
1595         switch ((insn >> 22) & 3) {
1596         case 0:
1597             gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1598             break;
1599         case 1:
1600             gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1601             break;
1602         case 2:
1603             gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1604             break;
1605         case 3:
1606             return 1;
1607         }
1608         gen_op_iwmmxt_movq_wRn_M0(wrd);
1609         gen_op_iwmmxt_set_mup();
1610         gen_op_iwmmxt_set_cup();
1611         break;
1612     case 0x10c: case 0x50c: case 0x90c: case 0xd0c:     /* WUNPCKIH */
1613         wrd = (insn >> 12) & 0xf;
1614         rd0 = (insn >> 16) & 0xf;
1615         rd1 = (insn >> 0) & 0xf;
1616         gen_op_iwmmxt_movq_M0_wRn(rd0);
1617         switch ((insn >> 22) & 3) {
1618         case 0:
1619             gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1620             break;
1621         case 1:
1622             gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1623             break;
1624         case 2:
1625             gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1626             break;
1627         case 3:
1628             return 1;
1629         }
1630         gen_op_iwmmxt_movq_wRn_M0(wrd);
1631         gen_op_iwmmxt_set_mup();
1632         gen_op_iwmmxt_set_cup();
1633         break;
1634     case 0x012: case 0x112: case 0x412: case 0x512:     /* WSAD */
1635         wrd = (insn >> 12) & 0xf;
1636         rd0 = (insn >> 16) & 0xf;
1637         rd1 = (insn >> 0) & 0xf;
1638         gen_op_iwmmxt_movq_M0_wRn(rd0);
1639         if (insn & (1 << 22))
1640             gen_op_iwmmxt_sadw_M0_wRn(rd1);
1641         else
1642             gen_op_iwmmxt_sadb_M0_wRn(rd1);
1643         if (!(insn & (1 << 20)))
1644             gen_op_iwmmxt_addl_M0_wRn(wrd);
1645         gen_op_iwmmxt_movq_wRn_M0(wrd);
1646         gen_op_iwmmxt_set_mup();
1647         break;
1648     case 0x010: case 0x110: case 0x210: case 0x310:     /* WMUL */
1649         wrd = (insn >> 12) & 0xf;
1650         rd0 = (insn >> 16) & 0xf;
1651         rd1 = (insn >> 0) & 0xf;
1652         gen_op_iwmmxt_movq_M0_wRn(rd0);
1653         if (insn & (1 << 21)) {
1654             if (insn & (1 << 20))
1655                 gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1656             else
1657                 gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1658         } else {
1659             if (insn & (1 << 20))
1660                 gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1661             else
1662                 gen_op_iwmmxt_mululw_M0_wRn(rd1);
1663         }
1664         gen_op_iwmmxt_movq_wRn_M0(wrd);
1665         gen_op_iwmmxt_set_mup();
1666         break;
1667     case 0x410: case 0x510: case 0x610: case 0x710:     /* WMAC */
1668         wrd = (insn >> 12) & 0xf;
1669         rd0 = (insn >> 16) & 0xf;
1670         rd1 = (insn >> 0) & 0xf;
1671         gen_op_iwmmxt_movq_M0_wRn(rd0);
1672         if (insn & (1 << 21))
1673             gen_op_iwmmxt_macsw_M0_wRn(rd1);
1674         else
1675             gen_op_iwmmxt_macuw_M0_wRn(rd1);
1676         if (!(insn & (1 << 20))) {
1677             iwmmxt_load_reg(cpu_V1, wrd);
1678             tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1679         }
1680         gen_op_iwmmxt_movq_wRn_M0(wrd);
1681         gen_op_iwmmxt_set_mup();
1682         break;
1683     case 0x006: case 0x406: case 0x806: case 0xc06:     /* WCMPEQ */
1684         wrd = (insn >> 12) & 0xf;
1685         rd0 = (insn >> 16) & 0xf;
1686         rd1 = (insn >> 0) & 0xf;
1687         gen_op_iwmmxt_movq_M0_wRn(rd0);
1688         switch ((insn >> 22) & 3) {
1689         case 0:
1690             gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1691             break;
1692         case 1:
1693             gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1694             break;
1695         case 2:
1696             gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1697             break;
1698         case 3:
1699             return 1;
1700         }
1701         gen_op_iwmmxt_movq_wRn_M0(wrd);
1702         gen_op_iwmmxt_set_mup();
1703         gen_op_iwmmxt_set_cup();
1704         break;
1705     case 0x800: case 0x900: case 0xc00: case 0xd00:     /* WAVG2 */
1706         wrd = (insn >> 12) & 0xf;
1707         rd0 = (insn >> 16) & 0xf;
1708         rd1 = (insn >> 0) & 0xf;
1709         gen_op_iwmmxt_movq_M0_wRn(rd0);
1710         if (insn & (1 << 22)) {
1711             if (insn & (1 << 20))
1712                 gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1713             else
1714                 gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1715         } else {
1716             if (insn & (1 << 20))
1717                 gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1718             else
1719                 gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1720         }
1721         gen_op_iwmmxt_movq_wRn_M0(wrd);
1722         gen_op_iwmmxt_set_mup();
1723         gen_op_iwmmxt_set_cup();
1724         break;
1725     case 0x802: case 0x902: case 0xa02: case 0xb02:     /* WALIGNR */
1726         wrd = (insn >> 12) & 0xf;
1727         rd0 = (insn >> 16) & 0xf;
1728         rd1 = (insn >> 0) & 0xf;
1729         gen_op_iwmmxt_movq_M0_wRn(rd0);
1730         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1731         tcg_gen_andi_i32(tmp, tmp, 7);
1732         iwmmxt_load_reg(cpu_V1, rd1);
1733         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1734         tcg_temp_free_i32(tmp);
1735         gen_op_iwmmxt_movq_wRn_M0(wrd);
1736         gen_op_iwmmxt_set_mup();
1737         break;
1738     case 0x601: case 0x605: case 0x609: case 0x60d:     /* TINSR */
1739         if (((insn >> 6) & 3) == 3)
1740             return 1;
1741         rd = (insn >> 12) & 0xf;
1742         wrd = (insn >> 16) & 0xf;
1743         tmp = load_reg(s, rd);
1744         gen_op_iwmmxt_movq_M0_wRn(wrd);
1745         switch ((insn >> 6) & 3) {
1746         case 0:
1747             tmp2 = tcg_const_i32(0xff);
1748             tmp3 = tcg_const_i32((insn & 7) << 3);
1749             break;
1750         case 1:
1751             tmp2 = tcg_const_i32(0xffff);
1752             tmp3 = tcg_const_i32((insn & 3) << 4);
1753             break;
1754         case 2:
1755             tmp2 = tcg_const_i32(0xffffffff);
1756             tmp3 = tcg_const_i32((insn & 1) << 5);
1757             break;
1758         default:
1759             tmp2 = NULL;
1760             tmp3 = NULL;
1761         }
1762         gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1763         tcg_temp_free_i32(tmp3);
1764         tcg_temp_free_i32(tmp2);
1765         tcg_temp_free_i32(tmp);
1766         gen_op_iwmmxt_movq_wRn_M0(wrd);
1767         gen_op_iwmmxt_set_mup();
1768         break;
1769     case 0x107: case 0x507: case 0x907: case 0xd07:     /* TEXTRM */
1770         rd = (insn >> 12) & 0xf;
1771         wrd = (insn >> 16) & 0xf;
1772         if (rd == 15 || ((insn >> 22) & 3) == 3)
1773             return 1;
1774         gen_op_iwmmxt_movq_M0_wRn(wrd);
1775         tmp = tcg_temp_new_i32();
1776         switch ((insn >> 22) & 3) {
1777         case 0:
1778             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1779             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1780             if (insn & 8) {
1781                 tcg_gen_ext8s_i32(tmp, tmp);
1782             } else {
1783                 tcg_gen_andi_i32(tmp, tmp, 0xff);
1784             }
1785             break;
1786         case 1:
1787             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1788             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1789             if (insn & 8) {
1790                 tcg_gen_ext16s_i32(tmp, tmp);
1791             } else {
1792                 tcg_gen_andi_i32(tmp, tmp, 0xffff);
1793             }
1794             break;
1795         case 2:
1796             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1797             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1798             break;
1799         }
1800         store_reg(s, rd, tmp);
1801         break;
1802     case 0x117: case 0x517: case 0x917: case 0xd17:     /* TEXTRC */
1803         if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1804             return 1;
1805         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1806         switch ((insn >> 22) & 3) {
1807         case 0:
1808             tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
1809             break;
1810         case 1:
1811             tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
1812             break;
1813         case 2:
1814             tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
1815             break;
1816         }
1817         tcg_gen_shli_i32(tmp, tmp, 28);
1818         gen_set_nzcv(tmp);
1819         tcg_temp_free_i32(tmp);
1820         break;
1821     case 0x401: case 0x405: case 0x409: case 0x40d:     /* TBCST */
1822         if (((insn >> 6) & 3) == 3)
1823             return 1;
1824         rd = (insn >> 12) & 0xf;
1825         wrd = (insn >> 16) & 0xf;
1826         tmp = load_reg(s, rd);
1827         switch ((insn >> 6) & 3) {
1828         case 0:
1829             gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
1830             break;
1831         case 1:
1832             gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
1833             break;
1834         case 2:
1835             gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
1836             break;
1837         }
1838         tcg_temp_free_i32(tmp);
1839         gen_op_iwmmxt_movq_wRn_M0(wrd);
1840         gen_op_iwmmxt_set_mup();
1841         break;
1842     case 0x113: case 0x513: case 0x913: case 0xd13:     /* TANDC */
1843         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1844             return 1;
1845         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1846         tmp2 = tcg_temp_new_i32();
1847         tcg_gen_mov_i32(tmp2, tmp);
1848         switch ((insn >> 22) & 3) {
1849         case 0:
1850             for (i = 0; i < 7; i ++) {
1851                 tcg_gen_shli_i32(tmp2, tmp2, 4);
1852                 tcg_gen_and_i32(tmp, tmp, tmp2);
1853             }
1854             break;
1855         case 1:
1856             for (i = 0; i < 3; i ++) {
1857                 tcg_gen_shli_i32(tmp2, tmp2, 8);
1858                 tcg_gen_and_i32(tmp, tmp, tmp2);
1859             }
1860             break;
1861         case 2:
1862             tcg_gen_shli_i32(tmp2, tmp2, 16);
1863             tcg_gen_and_i32(tmp, tmp, tmp2);
1864             break;
1865         }
1866         gen_set_nzcv(tmp);
1867         tcg_temp_free_i32(tmp2);
1868         tcg_temp_free_i32(tmp);
1869         break;
1870     case 0x01c: case 0x41c: case 0x81c: case 0xc1c:     /* WACC */
1871         wrd = (insn >> 12) & 0xf;
1872         rd0 = (insn >> 16) & 0xf;
1873         gen_op_iwmmxt_movq_M0_wRn(rd0);
1874         switch ((insn >> 22) & 3) {
1875         case 0:
1876             gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
1877             break;
1878         case 1:
1879             gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
1880             break;
1881         case 2:
1882             gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
1883             break;
1884         case 3:
1885             return 1;
1886         }
1887         gen_op_iwmmxt_movq_wRn_M0(wrd);
1888         gen_op_iwmmxt_set_mup();
1889         break;
1890     case 0x115: case 0x515: case 0x915: case 0xd15:     /* TORC */
1891         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1892             return 1;
1893         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1894         tmp2 = tcg_temp_new_i32();
1895         tcg_gen_mov_i32(tmp2, tmp);
1896         switch ((insn >> 22) & 3) {
1897         case 0:
1898             for (i = 0; i < 7; i ++) {
1899                 tcg_gen_shli_i32(tmp2, tmp2, 4);
1900                 tcg_gen_or_i32(tmp, tmp, tmp2);
1901             }
1902             break;
1903         case 1:
1904             for (i = 0; i < 3; i ++) {
1905                 tcg_gen_shli_i32(tmp2, tmp2, 8);
1906                 tcg_gen_or_i32(tmp, tmp, tmp2);
1907             }
1908             break;
1909         case 2:
1910             tcg_gen_shli_i32(tmp2, tmp2, 16);
1911             tcg_gen_or_i32(tmp, tmp, tmp2);
1912             break;
1913         }
1914         gen_set_nzcv(tmp);
1915         tcg_temp_free_i32(tmp2);
1916         tcg_temp_free_i32(tmp);
1917         break;
1918     case 0x103: case 0x503: case 0x903: case 0xd03:     /* TMOVMSK */
1919         rd = (insn >> 12) & 0xf;
1920         rd0 = (insn >> 16) & 0xf;
1921         if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
1922             return 1;
1923         gen_op_iwmmxt_movq_M0_wRn(rd0);
1924         tmp = tcg_temp_new_i32();
1925         switch ((insn >> 22) & 3) {
1926         case 0:
1927             gen_helper_iwmmxt_msbb(tmp, cpu_M0);
1928             break;
1929         case 1:
1930             gen_helper_iwmmxt_msbw(tmp, cpu_M0);
1931             break;
1932         case 2:
1933             gen_helper_iwmmxt_msbl(tmp, cpu_M0);
1934             break;
1935         }
1936         store_reg(s, rd, tmp);
1937         break;
1938     case 0x106: case 0x306: case 0x506: case 0x706:     /* WCMPGT */
1939     case 0x906: case 0xb06: case 0xd06: case 0xf06:
1940         wrd = (insn >> 12) & 0xf;
1941         rd0 = (insn >> 16) & 0xf;
1942         rd1 = (insn >> 0) & 0xf;
1943         gen_op_iwmmxt_movq_M0_wRn(rd0);
1944         switch ((insn >> 22) & 3) {
1945         case 0:
1946             if (insn & (1 << 21))
1947                 gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
1948             else
1949                 gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
1950             break;
1951         case 1:
1952             if (insn & (1 << 21))
1953                 gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
1954             else
1955                 gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
1956             break;
1957         case 2:
1958             if (insn & (1 << 21))
1959                 gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
1960             else
1961                 gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
1962             break;
1963         case 3:
1964             return 1;
1965         }
1966         gen_op_iwmmxt_movq_wRn_M0(wrd);
1967         gen_op_iwmmxt_set_mup();
1968         gen_op_iwmmxt_set_cup();
1969         break;
1970     case 0x00e: case 0x20e: case 0x40e: case 0x60e:     /* WUNPCKEL */
1971     case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
1972         wrd = (insn >> 12) & 0xf;
1973         rd0 = (insn >> 16) & 0xf;
1974         gen_op_iwmmxt_movq_M0_wRn(rd0);
1975         switch ((insn >> 22) & 3) {
1976         case 0:
1977             if (insn & (1 << 21))
1978                 gen_op_iwmmxt_unpacklsb_M0();
1979             else
1980                 gen_op_iwmmxt_unpacklub_M0();
1981             break;
1982         case 1:
1983             if (insn & (1 << 21))
1984                 gen_op_iwmmxt_unpacklsw_M0();
1985             else
1986                 gen_op_iwmmxt_unpackluw_M0();
1987             break;
1988         case 2:
1989             if (insn & (1 << 21))
1990                 gen_op_iwmmxt_unpacklsl_M0();
1991             else
1992                 gen_op_iwmmxt_unpacklul_M0();
1993             break;
1994         case 3:
1995             return 1;
1996         }
1997         gen_op_iwmmxt_movq_wRn_M0(wrd);
1998         gen_op_iwmmxt_set_mup();
1999         gen_op_iwmmxt_set_cup();
2000         break;
2001     case 0x00c: case 0x20c: case 0x40c: case 0x60c:     /* WUNPCKEH */
2002     case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2003         wrd = (insn >> 12) & 0xf;
2004         rd0 = (insn >> 16) & 0xf;
2005         gen_op_iwmmxt_movq_M0_wRn(rd0);
2006         switch ((insn >> 22) & 3) {
2007         case 0:
2008             if (insn & (1 << 21))
2009                 gen_op_iwmmxt_unpackhsb_M0();
2010             else
2011                 gen_op_iwmmxt_unpackhub_M0();
2012             break;
2013         case 1:
2014             if (insn & (1 << 21))
2015                 gen_op_iwmmxt_unpackhsw_M0();
2016             else
2017                 gen_op_iwmmxt_unpackhuw_M0();
2018             break;
2019         case 2:
2020             if (insn & (1 << 21))
2021                 gen_op_iwmmxt_unpackhsl_M0();
2022             else
2023                 gen_op_iwmmxt_unpackhul_M0();
2024             break;
2025         case 3:
2026             return 1;
2027         }
2028         gen_op_iwmmxt_movq_wRn_M0(wrd);
2029         gen_op_iwmmxt_set_mup();
2030         gen_op_iwmmxt_set_cup();
2031         break;
2032     case 0x204: case 0x604: case 0xa04: case 0xe04:     /* WSRL */
2033     case 0x214: case 0x614: case 0xa14: case 0xe14:
2034         if (((insn >> 22) & 3) == 0)
2035             return 1;
2036         wrd = (insn >> 12) & 0xf;
2037         rd0 = (insn >> 16) & 0xf;
2038         gen_op_iwmmxt_movq_M0_wRn(rd0);
2039         tmp = tcg_temp_new_i32();
2040         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2041             tcg_temp_free_i32(tmp);
2042             return 1;
2043         }
2044         switch ((insn >> 22) & 3) {
2045         case 1:
2046             gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2047             break;
2048         case 2:
2049             gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2050             break;
2051         case 3:
2052             gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2053             break;
2054         }
2055         tcg_temp_free_i32(tmp);
2056         gen_op_iwmmxt_movq_wRn_M0(wrd);
2057         gen_op_iwmmxt_set_mup();
2058         gen_op_iwmmxt_set_cup();
2059         break;
2060     case 0x004: case 0x404: case 0x804: case 0xc04:     /* WSRA */
2061     case 0x014: case 0x414: case 0x814: case 0xc14:
2062         if (((insn >> 22) & 3) == 0)
2063             return 1;
2064         wrd = (insn >> 12) & 0xf;
2065         rd0 = (insn >> 16) & 0xf;
2066         gen_op_iwmmxt_movq_M0_wRn(rd0);
2067         tmp = tcg_temp_new_i32();
2068         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2069             tcg_temp_free_i32(tmp);
2070             return 1;
2071         }
2072         switch ((insn >> 22) & 3) {
2073         case 1:
2074             gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2075             break;
2076         case 2:
2077             gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2078             break;
2079         case 3:
2080             gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2081             break;
2082         }
2083         tcg_temp_free_i32(tmp);
2084         gen_op_iwmmxt_movq_wRn_M0(wrd);
2085         gen_op_iwmmxt_set_mup();
2086         gen_op_iwmmxt_set_cup();
2087         break;
2088     case 0x104: case 0x504: case 0x904: case 0xd04:     /* WSLL */
2089     case 0x114: case 0x514: case 0x914: case 0xd14:
2090         if (((insn >> 22) & 3) == 0)
2091             return 1;
2092         wrd = (insn >> 12) & 0xf;
2093         rd0 = (insn >> 16) & 0xf;
2094         gen_op_iwmmxt_movq_M0_wRn(rd0);
2095         tmp = tcg_temp_new_i32();
2096         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2097             tcg_temp_free_i32(tmp);
2098             return 1;
2099         }
2100         switch ((insn >> 22) & 3) {
2101         case 1:
2102             gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2103             break;
2104         case 2:
2105             gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2106             break;
2107         case 3:
2108             gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2109             break;
2110         }
2111         tcg_temp_free_i32(tmp);
2112         gen_op_iwmmxt_movq_wRn_M0(wrd);
2113         gen_op_iwmmxt_set_mup();
2114         gen_op_iwmmxt_set_cup();
2115         break;
2116     case 0x304: case 0x704: case 0xb04: case 0xf04:     /* WROR */
2117     case 0x314: case 0x714: case 0xb14: case 0xf14:
2118         if (((insn >> 22) & 3) == 0)
2119             return 1;
2120         wrd = (insn >> 12) & 0xf;
2121         rd0 = (insn >> 16) & 0xf;
2122         gen_op_iwmmxt_movq_M0_wRn(rd0);
2123         tmp = tcg_temp_new_i32();
2124         switch ((insn >> 22) & 3) {
2125         case 1:
2126             if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2127                 tcg_temp_free_i32(tmp);
2128                 return 1;
2129             }
2130             gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2131             break;
2132         case 2:
2133             if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2134                 tcg_temp_free_i32(tmp);
2135                 return 1;
2136             }
2137             gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2138             break;
2139         case 3:
2140             if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2141                 tcg_temp_free_i32(tmp);
2142                 return 1;
2143             }
2144             gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2145             break;
2146         }
2147         tcg_temp_free_i32(tmp);
2148         gen_op_iwmmxt_movq_wRn_M0(wrd);
2149         gen_op_iwmmxt_set_mup();
2150         gen_op_iwmmxt_set_cup();
2151         break;
2152     case 0x116: case 0x316: case 0x516: case 0x716:     /* WMIN */
2153     case 0x916: case 0xb16: case 0xd16: case 0xf16:
2154         wrd = (insn >> 12) & 0xf;
2155         rd0 = (insn >> 16) & 0xf;
2156         rd1 = (insn >> 0) & 0xf;
2157         gen_op_iwmmxt_movq_M0_wRn(rd0);
2158         switch ((insn >> 22) & 3) {
2159         case 0:
2160             if (insn & (1 << 21))
2161                 gen_op_iwmmxt_minsb_M0_wRn(rd1);
2162             else
2163                 gen_op_iwmmxt_minub_M0_wRn(rd1);
2164             break;
2165         case 1:
2166             if (insn & (1 << 21))
2167                 gen_op_iwmmxt_minsw_M0_wRn(rd1);
2168             else
2169                 gen_op_iwmmxt_minuw_M0_wRn(rd1);
2170             break;
2171         case 2:
2172             if (insn & (1 << 21))
2173                 gen_op_iwmmxt_minsl_M0_wRn(rd1);
2174             else
2175                 gen_op_iwmmxt_minul_M0_wRn(rd1);
2176             break;
2177         case 3:
2178             return 1;
2179         }
2180         gen_op_iwmmxt_movq_wRn_M0(wrd);
2181         gen_op_iwmmxt_set_mup();
2182         break;
2183     case 0x016: case 0x216: case 0x416: case 0x616:     /* WMAX */
2184     case 0x816: case 0xa16: case 0xc16: case 0xe16:
2185         wrd = (insn >> 12) & 0xf;
2186         rd0 = (insn >> 16) & 0xf;
2187         rd1 = (insn >> 0) & 0xf;
2188         gen_op_iwmmxt_movq_M0_wRn(rd0);
2189         switch ((insn >> 22) & 3) {
2190         case 0:
2191             if (insn & (1 << 21))
2192                 gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2193             else
2194                 gen_op_iwmmxt_maxub_M0_wRn(rd1);
2195             break;
2196         case 1:
2197             if (insn & (1 << 21))
2198                 gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2199             else
2200                 gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2201             break;
2202         case 2:
2203             if (insn & (1 << 21))
2204                 gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2205             else
2206                 gen_op_iwmmxt_maxul_M0_wRn(rd1);
2207             break;
2208         case 3:
2209             return 1;
2210         }
2211         gen_op_iwmmxt_movq_wRn_M0(wrd);
2212         gen_op_iwmmxt_set_mup();
2213         break;
2214     case 0x002: case 0x102: case 0x202: case 0x302:     /* WALIGNI */
2215     case 0x402: case 0x502: case 0x602: case 0x702:
2216         wrd = (insn >> 12) & 0xf;
2217         rd0 = (insn >> 16) & 0xf;
2218         rd1 = (insn >> 0) & 0xf;
2219         gen_op_iwmmxt_movq_M0_wRn(rd0);
2220         tmp = tcg_const_i32((insn >> 20) & 3);
2221         iwmmxt_load_reg(cpu_V1, rd1);
2222         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
2223         tcg_temp_free_i32(tmp);
2224         gen_op_iwmmxt_movq_wRn_M0(wrd);
2225         gen_op_iwmmxt_set_mup();
2226         break;
2227     case 0x01a: case 0x11a: case 0x21a: case 0x31a:     /* WSUB */
2228     case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2229     case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2230     case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2231         wrd = (insn >> 12) & 0xf;
2232         rd0 = (insn >> 16) & 0xf;
2233         rd1 = (insn >> 0) & 0xf;
2234         gen_op_iwmmxt_movq_M0_wRn(rd0);
2235         switch ((insn >> 20) & 0xf) {
2236         case 0x0:
2237             gen_op_iwmmxt_subnb_M0_wRn(rd1);
2238             break;
2239         case 0x1:
2240             gen_op_iwmmxt_subub_M0_wRn(rd1);
2241             break;
2242         case 0x3:
2243             gen_op_iwmmxt_subsb_M0_wRn(rd1);
2244             break;
2245         case 0x4:
2246             gen_op_iwmmxt_subnw_M0_wRn(rd1);
2247             break;
2248         case 0x5:
2249             gen_op_iwmmxt_subuw_M0_wRn(rd1);
2250             break;
2251         case 0x7:
2252             gen_op_iwmmxt_subsw_M0_wRn(rd1);
2253             break;
2254         case 0x8:
2255             gen_op_iwmmxt_subnl_M0_wRn(rd1);
2256             break;
2257         case 0x9:
2258             gen_op_iwmmxt_subul_M0_wRn(rd1);
2259             break;
2260         case 0xb:
2261             gen_op_iwmmxt_subsl_M0_wRn(rd1);
2262             break;
2263         default:
2264             return 1;
2265         }
2266         gen_op_iwmmxt_movq_wRn_M0(wrd);
2267         gen_op_iwmmxt_set_mup();
2268         gen_op_iwmmxt_set_cup();
2269         break;
2270     case 0x01e: case 0x11e: case 0x21e: case 0x31e:     /* WSHUFH */
2271     case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2272     case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2273     case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2274         wrd = (insn >> 12) & 0xf;
2275         rd0 = (insn >> 16) & 0xf;
2276         gen_op_iwmmxt_movq_M0_wRn(rd0);
2277         tmp = tcg_const_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2278         gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2279         tcg_temp_free_i32(tmp);
2280         gen_op_iwmmxt_movq_wRn_M0(wrd);
2281         gen_op_iwmmxt_set_mup();
2282         gen_op_iwmmxt_set_cup();
2283         break;
2284     case 0x018: case 0x118: case 0x218: case 0x318:     /* WADD */
2285     case 0x418: case 0x518: case 0x618: case 0x718:
2286     case 0x818: case 0x918: case 0xa18: case 0xb18:
2287     case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2288         wrd = (insn >> 12) & 0xf;
2289         rd0 = (insn >> 16) & 0xf;
2290         rd1 = (insn >> 0) & 0xf;
2291         gen_op_iwmmxt_movq_M0_wRn(rd0);
2292         switch ((insn >> 20) & 0xf) {
2293         case 0x0:
2294             gen_op_iwmmxt_addnb_M0_wRn(rd1);
2295             break;
2296         case 0x1:
2297             gen_op_iwmmxt_addub_M0_wRn(rd1);
2298             break;
2299         case 0x3:
2300             gen_op_iwmmxt_addsb_M0_wRn(rd1);
2301             break;
2302         case 0x4:
2303             gen_op_iwmmxt_addnw_M0_wRn(rd1);
2304             break;
2305         case 0x5:
2306             gen_op_iwmmxt_adduw_M0_wRn(rd1);
2307             break;
2308         case 0x7:
2309             gen_op_iwmmxt_addsw_M0_wRn(rd1);
2310             break;
2311         case 0x8:
2312             gen_op_iwmmxt_addnl_M0_wRn(rd1);
2313             break;
2314         case 0x9:
2315             gen_op_iwmmxt_addul_M0_wRn(rd1);
2316             break;
2317         case 0xb:
2318             gen_op_iwmmxt_addsl_M0_wRn(rd1);
2319             break;
2320         default:
2321             return 1;
2322         }
2323         gen_op_iwmmxt_movq_wRn_M0(wrd);
2324         gen_op_iwmmxt_set_mup();
2325         gen_op_iwmmxt_set_cup();
2326         break;
2327     case 0x008: case 0x108: case 0x208: case 0x308:     /* WPACK */
2328     case 0x408: case 0x508: case 0x608: case 0x708:
2329     case 0x808: case 0x908: case 0xa08: case 0xb08:
2330     case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2331         if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2332             return 1;
2333         wrd = (insn >> 12) & 0xf;
2334         rd0 = (insn >> 16) & 0xf;
2335         rd1 = (insn >> 0) & 0xf;
2336         gen_op_iwmmxt_movq_M0_wRn(rd0);
2337         switch ((insn >> 22) & 3) {
2338         case 1:
2339             if (insn & (1 << 21))
2340                 gen_op_iwmmxt_packsw_M0_wRn(rd1);
2341             else
2342                 gen_op_iwmmxt_packuw_M0_wRn(rd1);
2343             break;
2344         case 2:
2345             if (insn & (1 << 21))
2346                 gen_op_iwmmxt_packsl_M0_wRn(rd1);
2347             else
2348                 gen_op_iwmmxt_packul_M0_wRn(rd1);
2349             break;
2350         case 3:
2351             if (insn & (1 << 21))
2352                 gen_op_iwmmxt_packsq_M0_wRn(rd1);
2353             else
2354                 gen_op_iwmmxt_packuq_M0_wRn(rd1);
2355             break;
2356         }
2357         gen_op_iwmmxt_movq_wRn_M0(wrd);
2358         gen_op_iwmmxt_set_mup();
2359         gen_op_iwmmxt_set_cup();
2360         break;
2361     case 0x201: case 0x203: case 0x205: case 0x207:
2362     case 0x209: case 0x20b: case 0x20d: case 0x20f:
2363     case 0x211: case 0x213: case 0x215: case 0x217:
2364     case 0x219: case 0x21b: case 0x21d: case 0x21f:
2365         wrd = (insn >> 5) & 0xf;
2366         rd0 = (insn >> 12) & 0xf;
2367         rd1 = (insn >> 0) & 0xf;
2368         if (rd0 == 0xf || rd1 == 0xf)
2369             return 1;
2370         gen_op_iwmmxt_movq_M0_wRn(wrd);
2371         tmp = load_reg(s, rd0);
2372         tmp2 = load_reg(s, rd1);
2373         switch ((insn >> 16) & 0xf) {
2374         case 0x0:                                       /* TMIA */
2375             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2376             break;
2377         case 0x8:                                       /* TMIAPH */
2378             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2379             break;
2380         case 0xc: case 0xd: case 0xe: case 0xf:                 /* TMIAxy */
2381             if (insn & (1 << 16))
2382                 tcg_gen_shri_i32(tmp, tmp, 16);
2383             if (insn & (1 << 17))
2384                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2385             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2386             break;
2387         default:
2388             tcg_temp_free_i32(tmp2);
2389             tcg_temp_free_i32(tmp);
2390             return 1;
2391         }
2392         tcg_temp_free_i32(tmp2);
2393         tcg_temp_free_i32(tmp);
2394         gen_op_iwmmxt_movq_wRn_M0(wrd);
2395         gen_op_iwmmxt_set_mup();
2396         break;
2397     default:
2398         return 1;
2399     }
2400
2401     return 0;
2402 }
2403
2404 /* Disassemble an XScale DSP instruction.  Returns nonzero if an error occurred
2405    (ie. an undefined instruction).  */
2406 static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2407 {
2408     int acc, rd0, rd1, rdhi, rdlo;
2409     TCGv_i32 tmp, tmp2;
2410
2411     if ((insn & 0x0ff00f10) == 0x0e200010) {
2412         /* Multiply with Internal Accumulate Format */
2413         rd0 = (insn >> 12) & 0xf;
2414         rd1 = insn & 0xf;
2415         acc = (insn >> 5) & 7;
2416
2417         if (acc != 0)
2418             return 1;
2419
2420         tmp = load_reg(s, rd0);
2421         tmp2 = load_reg(s, rd1);
2422         switch ((insn >> 16) & 0xf) {
2423         case 0x0:                                       /* MIA */
2424             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2425             break;
2426         case 0x8:                                       /* MIAPH */
2427             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2428             break;
2429         case 0xc:                                       /* MIABB */
2430         case 0xd:                                       /* MIABT */
2431         case 0xe:                                       /* MIATB */
2432         case 0xf:                                       /* MIATT */
2433             if (insn & (1 << 16))
2434                 tcg_gen_shri_i32(tmp, tmp, 16);
2435             if (insn & (1 << 17))
2436                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2437             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2438             break;
2439         default:
2440             return 1;
2441         }
2442         tcg_temp_free_i32(tmp2);
2443         tcg_temp_free_i32(tmp);
2444
2445         gen_op_iwmmxt_movq_wRn_M0(acc);
2446         return 0;
2447     }
2448
2449     if ((insn & 0x0fe00ff8) == 0x0c400000) {
2450         /* Internal Accumulator Access Format */
2451         rdhi = (insn >> 16) & 0xf;
2452         rdlo = (insn >> 12) & 0xf;
2453         acc = insn & 7;
2454
2455         if (acc != 0)
2456             return 1;
2457
2458         if (insn & ARM_CP_RW_BIT) {                     /* MRA */
2459             iwmmxt_load_reg(cpu_V0, acc);
2460             tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2461             tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
2462             tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2463         } else {                                        /* MAR */
2464             tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2465             iwmmxt_store_reg(cpu_V0, acc);
2466         }
2467         return 0;
2468     }
2469
2470     return 1;
2471 }
2472
2473 #define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n))
2474 #define VFP_DREG(reg, insn, bigbit, smallbit) do { \
2475     if (dc_isar_feature(aa32_simd_r32, s)) { \
2476         reg = (((insn) >> (bigbit)) & 0x0f) \
2477               | (((insn) >> ((smallbit) - 4)) & 0x10); \
2478     } else { \
2479         if (insn & (1 << (smallbit))) \
2480             return 1; \
2481         reg = ((insn) >> (bigbit)) & 0x0f; \
2482     }} while (0)
2483
2484 #define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22)
2485 #define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16,  7)
2486 #define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn,  0,  5)
2487
2488 static inline bool use_goto_tb(DisasContext *s, target_ulong dest)
2489 {
2490 #ifndef CONFIG_USER_ONLY
2491     return (s->base.tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK) ||
2492            ((s->base.pc_next - 1) & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
2493 #else
2494     return true;
2495 #endif
2496 }
2497
2498 static void gen_goto_ptr(void)
2499 {
2500     tcg_gen_lookup_and_goto_ptr();
2501 }
2502
2503 /* This will end the TB but doesn't guarantee we'll return to
2504  * cpu_loop_exec. Any live exit_requests will be processed as we
2505  * enter the next TB.
2506  */
2507 static void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
2508 {
2509     if (use_goto_tb(s, dest)) {
2510         tcg_gen_goto_tb(n);
2511         gen_set_pc_im(s, dest);
2512         tcg_gen_exit_tb(s->base.tb, n);
2513     } else {
2514         gen_set_pc_im(s, dest);
2515         gen_goto_ptr();
2516     }
2517     s->base.is_jmp = DISAS_NORETURN;
2518 }
2519
2520 static inline void gen_jmp (DisasContext *s, uint32_t dest)
2521 {
2522     if (unlikely(is_singlestepping(s))) {
2523         /* An indirect jump so that we still trigger the debug exception.  */
2524         gen_set_pc_im(s, dest);
2525         s->base.is_jmp = DISAS_JUMP;
2526     } else {
2527         gen_goto_tb(s, 0, dest);
2528     }
2529 }
2530
2531 static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
2532 {
2533     if (x)
2534         tcg_gen_sari_i32(t0, t0, 16);
2535     else
2536         gen_sxth(t0);
2537     if (y)
2538         tcg_gen_sari_i32(t1, t1, 16);
2539     else
2540         gen_sxth(t1);
2541     tcg_gen_mul_i32(t0, t0, t1);
2542 }
2543
2544 /* Return the mask of PSR bits set by a MSR instruction.  */
2545 static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
2546 {
2547     uint32_t mask = 0;
2548
2549     if (flags & (1 << 0)) {
2550         mask |= 0xff;
2551     }
2552     if (flags & (1 << 1)) {
2553         mask |= 0xff00;
2554     }
2555     if (flags & (1 << 2)) {
2556         mask |= 0xff0000;
2557     }
2558     if (flags & (1 << 3)) {
2559         mask |= 0xff000000;
2560     }
2561
2562     /* Mask out undefined and reserved bits.  */
2563     mask &= aarch32_cpsr_valid_mask(s->features, s->isar);
2564
2565     /* Mask out execution state.  */
2566     if (!spsr) {
2567         mask &= ~CPSR_EXEC;
2568     }
2569
2570     /* Mask out privileged bits.  */
2571     if (IS_USER(s)) {
2572         mask &= CPSR_USER;
2573     }
2574     return mask;
2575 }
2576
2577 /* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
2578 static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
2579 {
2580     TCGv_i32 tmp;
2581     if (spsr) {
2582         /* ??? This is also undefined in system mode.  */
2583         if (IS_USER(s))
2584             return 1;
2585
2586         tmp = load_cpu_field(spsr);
2587         tcg_gen_andi_i32(tmp, tmp, ~mask);
2588         tcg_gen_andi_i32(t0, t0, mask);
2589         tcg_gen_or_i32(tmp, tmp, t0);
2590         store_cpu_field(tmp, spsr);
2591     } else {
2592         gen_set_cpsr(t0, mask);
2593     }
2594     tcg_temp_free_i32(t0);
2595     gen_lookup_tb(s);
2596     return 0;
2597 }
2598
2599 /* Returns nonzero if access to the PSR is not permitted.  */
2600 static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
2601 {
2602     TCGv_i32 tmp;
2603     tmp = tcg_temp_new_i32();
2604     tcg_gen_movi_i32(tmp, val);
2605     return gen_set_psr(s, mask, spsr, tmp);
2606 }
2607
2608 static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
2609                                      int *tgtmode, int *regno)
2610 {
2611     /* Decode the r and sysm fields of MSR/MRS banked accesses into
2612      * the target mode and register number, and identify the various
2613      * unpredictable cases.
2614      * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
2615      *  + executed in user mode
2616      *  + using R15 as the src/dest register
2617      *  + accessing an unimplemented register
2618      *  + accessing a register that's inaccessible at current PL/security state*
2619      *  + accessing a register that you could access with a different insn
2620      * We choose to UNDEF in all these cases.
2621      * Since we don't know which of the various AArch32 modes we are in
2622      * we have to defer some checks to runtime.
2623      * Accesses to Monitor mode registers from Secure EL1 (which implies
2624      * that EL3 is AArch64) must trap to EL3.
2625      *
2626      * If the access checks fail this function will emit code to take
2627      * an exception and return false. Otherwise it will return true,
2628      * and set *tgtmode and *regno appropriately.
2629      */
2630     int exc_target = default_exception_el(s);
2631
2632     /* These instructions are present only in ARMv8, or in ARMv7 with the
2633      * Virtualization Extensions.
2634      */
2635     if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
2636         !arm_dc_feature(s, ARM_FEATURE_EL2)) {
2637         goto undef;
2638     }
2639
2640     if (IS_USER(s) || rn == 15) {
2641         goto undef;
2642     }
2643
2644     /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
2645      * of registers into (r, sysm).
2646      */
2647     if (r) {
2648         /* SPSRs for other modes */
2649         switch (sysm) {
2650         case 0xe: /* SPSR_fiq */
2651             *tgtmode = ARM_CPU_MODE_FIQ;
2652             break;
2653         case 0x10: /* SPSR_irq */
2654             *tgtmode = ARM_CPU_MODE_IRQ;
2655             break;
2656         case 0x12: /* SPSR_svc */
2657             *tgtmode = ARM_CPU_MODE_SVC;
2658             break;
2659         case 0x14: /* SPSR_abt */
2660             *tgtmode = ARM_CPU_MODE_ABT;
2661             break;
2662         case 0x16: /* SPSR_und */
2663             *tgtmode = ARM_CPU_MODE_UND;
2664             break;
2665         case 0x1c: /* SPSR_mon */
2666             *tgtmode = ARM_CPU_MODE_MON;
2667             break;
2668         case 0x1e: /* SPSR_hyp */
2669             *tgtmode = ARM_CPU_MODE_HYP;
2670             break;
2671         default: /* unallocated */
2672             goto undef;
2673         }
2674         /* We arbitrarily assign SPSR a register number of 16. */
2675         *regno = 16;
2676     } else {
2677         /* general purpose registers for other modes */
2678         switch (sysm) {
2679         case 0x0 ... 0x6:   /* 0b00xxx : r8_usr ... r14_usr */
2680             *tgtmode = ARM_CPU_MODE_USR;
2681             *regno = sysm + 8;
2682             break;
2683         case 0x8 ... 0xe:   /* 0b01xxx : r8_fiq ... r14_fiq */
2684             *tgtmode = ARM_CPU_MODE_FIQ;
2685             *regno = sysm;
2686             break;
2687         case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
2688             *tgtmode = ARM_CPU_MODE_IRQ;
2689             *regno = sysm & 1 ? 13 : 14;
2690             break;
2691         case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
2692             *tgtmode = ARM_CPU_MODE_SVC;
2693             *regno = sysm & 1 ? 13 : 14;
2694             break;
2695         case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
2696             *tgtmode = ARM_CPU_MODE_ABT;
2697             *regno = sysm & 1 ? 13 : 14;
2698             break;
2699         case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
2700             *tgtmode = ARM_CPU_MODE_UND;
2701             *regno = sysm & 1 ? 13 : 14;
2702             break;
2703         case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
2704             *tgtmode = ARM_CPU_MODE_MON;
2705             *regno = sysm & 1 ? 13 : 14;
2706             break;
2707         case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
2708             *tgtmode = ARM_CPU_MODE_HYP;
2709             /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
2710             *regno = sysm & 1 ? 13 : 17;
2711             break;
2712         default: /* unallocated */
2713             goto undef;
2714         }
2715     }
2716
2717     /* Catch the 'accessing inaccessible register' cases we can detect
2718      * at translate time.
2719      */
2720     switch (*tgtmode) {
2721     case ARM_CPU_MODE_MON:
2722         if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
2723             goto undef;
2724         }
2725         if (s->current_el == 1) {
2726             /* If we're in Secure EL1 (which implies that EL3 is AArch64)
2727              * then accesses to Mon registers trap to EL3
2728              */
2729             exc_target = 3;
2730             goto undef;
2731         }
2732         break;
2733     case ARM_CPU_MODE_HYP:
2734         /*
2735          * SPSR_hyp and r13_hyp can only be accessed from Monitor mode
2736          * (and so we can forbid accesses from EL2 or below). elr_hyp
2737          * can be accessed also from Hyp mode, so forbid accesses from
2738          * EL0 or EL1.
2739          */
2740         if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
2741             (s->current_el < 3 && *regno != 17)) {
2742             goto undef;
2743         }
2744         break;
2745     default:
2746         break;
2747     }
2748
2749     return true;
2750
2751 undef:
2752     /* If we get here then some access check did not pass */
2753     gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
2754                        syn_uncategorized(), exc_target);
2755     return false;
2756 }
2757
2758 static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
2759 {
2760     TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
2761     int tgtmode = 0, regno = 0;
2762
2763     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2764         return;
2765     }
2766
2767     /* Sync state because msr_banked() can raise exceptions */
2768     gen_set_condexec(s);
2769     gen_set_pc_im(s, s->pc_curr);
2770     tcg_reg = load_reg(s, rn);
2771     tcg_tgtmode = tcg_const_i32(tgtmode);
2772     tcg_regno = tcg_const_i32(regno);
2773     gen_helper_msr_banked(cpu_env, tcg_reg, tcg_tgtmode, tcg_regno);
2774     tcg_temp_free_i32(tcg_tgtmode);
2775     tcg_temp_free_i32(tcg_regno);
2776     tcg_temp_free_i32(tcg_reg);
2777     s->base.is_jmp = DISAS_UPDATE_EXIT;
2778 }
2779
2780 static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
2781 {
2782     TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
2783     int tgtmode = 0, regno = 0;
2784
2785     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2786         return;
2787     }
2788
2789     /* Sync state because mrs_banked() can raise exceptions */
2790     gen_set_condexec(s);
2791     gen_set_pc_im(s, s->pc_curr);
2792     tcg_reg = tcg_temp_new_i32();
2793     tcg_tgtmode = tcg_const_i32(tgtmode);
2794     tcg_regno = tcg_const_i32(regno);
2795     gen_helper_mrs_banked(tcg_reg, cpu_env, tcg_tgtmode, tcg_regno);
2796     tcg_temp_free_i32(tcg_tgtmode);
2797     tcg_temp_free_i32(tcg_regno);
2798     store_reg(s, rn, tcg_reg);
2799     s->base.is_jmp = DISAS_UPDATE_EXIT;
2800 }
2801
2802 /* Store value to PC as for an exception return (ie don't
2803  * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
2804  * will do the masking based on the new value of the Thumb bit.
2805  */
2806 static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
2807 {
2808     tcg_gen_mov_i32(cpu_R[15], pc);
2809     tcg_temp_free_i32(pc);
2810 }
2811
2812 /* Generate a v6 exception return.  Marks both values as dead.  */
2813 static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
2814 {
2815     store_pc_exc_ret(s, pc);
2816     /* The cpsr_write_eret helper will mask the low bits of PC
2817      * appropriately depending on the new Thumb bit, so it must
2818      * be called after storing the new PC.
2819      */
2820     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
2821         gen_io_start();
2822     }
2823     gen_helper_cpsr_write_eret(cpu_env, cpsr);
2824     tcg_temp_free_i32(cpsr);
2825     /* Must exit loop to check un-masked IRQs */
2826     s->base.is_jmp = DISAS_EXIT;
2827 }
2828
2829 /* Generate an old-style exception return. Marks pc as dead. */
2830 static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
2831 {
2832     gen_rfe(s, pc, load_cpu_field(spsr));
2833 }
2834
2835 static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
2836                             uint32_t opr_sz, uint32_t max_sz,
2837                             gen_helper_gvec_3_ptr *fn)
2838 {
2839     TCGv_ptr qc_ptr = tcg_temp_new_ptr();
2840
2841     tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
2842     tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
2843                        opr_sz, max_sz, 0, fn);
2844     tcg_temp_free_ptr(qc_ptr);
2845 }
2846
2847 void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2848                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2849 {
2850     static gen_helper_gvec_3_ptr * const fns[2] = {
2851         gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
2852     };
2853     tcg_debug_assert(vece >= 1 && vece <= 2);
2854     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
2855 }
2856
2857 void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2858                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2859 {
2860     static gen_helper_gvec_3_ptr * const fns[2] = {
2861         gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
2862     };
2863     tcg_debug_assert(vece >= 1 && vece <= 2);
2864     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
2865 }
2866
2867 #define GEN_CMP0(NAME, COND)                                            \
2868     static void gen_##NAME##0_i32(TCGv_i32 d, TCGv_i32 a)               \
2869     {                                                                   \
2870         tcg_gen_setcondi_i32(COND, d, a, 0);                            \
2871         tcg_gen_neg_i32(d, d);                                          \
2872     }                                                                   \
2873     static void gen_##NAME##0_i64(TCGv_i64 d, TCGv_i64 a)               \
2874     {                                                                   \
2875         tcg_gen_setcondi_i64(COND, d, a, 0);                            \
2876         tcg_gen_neg_i64(d, d);                                          \
2877     }                                                                   \
2878     static void gen_##NAME##0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) \
2879     {                                                                   \
2880         TCGv_vec zero = tcg_const_zeros_vec_matching(d);                \
2881         tcg_gen_cmp_vec(COND, vece, d, a, zero);                        \
2882         tcg_temp_free_vec(zero);                                        \
2883     }                                                                   \
2884     void gen_gvec_##NAME##0(unsigned vece, uint32_t d, uint32_t m,      \
2885                             uint32_t opr_sz, uint32_t max_sz)           \
2886     {                                                                   \
2887         const GVecGen2 op[4] = {                                        \
2888             { .fno = gen_helper_gvec_##NAME##0_b,                       \
2889               .fniv = gen_##NAME##0_vec,                                \
2890               .opt_opc = vecop_list_cmp,                                \
2891               .vece = MO_8 },                                           \
2892             { .fno = gen_helper_gvec_##NAME##0_h,                       \
2893               .fniv = gen_##NAME##0_vec,                                \
2894               .opt_opc = vecop_list_cmp,                                \
2895               .vece = MO_16 },                                          \
2896             { .fni4 = gen_##NAME##0_i32,                                \
2897               .fniv = gen_##NAME##0_vec,                                \
2898               .opt_opc = vecop_list_cmp,                                \
2899               .vece = MO_32 },                                          \
2900             { .fni8 = gen_##NAME##0_i64,                                \
2901               .fniv = gen_##NAME##0_vec,                                \
2902               .opt_opc = vecop_list_cmp,                                \
2903               .prefer_i64 = TCG_TARGET_REG_BITS == 64,                  \
2904               .vece = MO_64 },                                          \
2905         };                                                              \
2906         tcg_gen_gvec_2(d, m, opr_sz, max_sz, &op[vece]);                \
2907     }
2908
2909 static const TCGOpcode vecop_list_cmp[] = {
2910     INDEX_op_cmp_vec, 0
2911 };
2912
2913 GEN_CMP0(ceq, TCG_COND_EQ)
2914 GEN_CMP0(cle, TCG_COND_LE)
2915 GEN_CMP0(cge, TCG_COND_GE)
2916 GEN_CMP0(clt, TCG_COND_LT)
2917 GEN_CMP0(cgt, TCG_COND_GT)
2918
2919 #undef GEN_CMP0
2920
2921 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
2922 {
2923     tcg_gen_vec_sar8i_i64(a, a, shift);
2924     tcg_gen_vec_add8_i64(d, d, a);
2925 }
2926
2927 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
2928 {
2929     tcg_gen_vec_sar16i_i64(a, a, shift);
2930     tcg_gen_vec_add16_i64(d, d, a);
2931 }
2932
2933 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
2934 {
2935     tcg_gen_sari_i32(a, a, shift);
2936     tcg_gen_add_i32(d, d, a);
2937 }
2938
2939 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
2940 {
2941     tcg_gen_sari_i64(a, a, shift);
2942     tcg_gen_add_i64(d, d, a);
2943 }
2944
2945 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
2946 {
2947     tcg_gen_sari_vec(vece, a, a, sh);
2948     tcg_gen_add_vec(vece, d, d, a);
2949 }
2950
2951 void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
2952                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
2953 {
2954     static const TCGOpcode vecop_list[] = {
2955         INDEX_op_sari_vec, INDEX_op_add_vec, 0
2956     };
2957     static const GVecGen2i ops[4] = {
2958         { .fni8 = gen_ssra8_i64,
2959           .fniv = gen_ssra_vec,
2960           .fno = gen_helper_gvec_ssra_b,
2961           .load_dest = true,
2962           .opt_opc = vecop_list,
2963           .vece = MO_8 },
2964         { .fni8 = gen_ssra16_i64,
2965           .fniv = gen_ssra_vec,
2966           .fno = gen_helper_gvec_ssra_h,
2967           .load_dest = true,
2968           .opt_opc = vecop_list,
2969           .vece = MO_16 },
2970         { .fni4 = gen_ssra32_i32,
2971           .fniv = gen_ssra_vec,
2972           .fno = gen_helper_gvec_ssra_s,
2973           .load_dest = true,
2974           .opt_opc = vecop_list,
2975           .vece = MO_32 },
2976         { .fni8 = gen_ssra64_i64,
2977           .fniv = gen_ssra_vec,
2978           .fno = gen_helper_gvec_ssra_b,
2979           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
2980           .opt_opc = vecop_list,
2981           .load_dest = true,
2982           .vece = MO_64 },
2983     };
2984
2985     /* tszimm encoding produces immediates in the range [1..esize]. */
2986     tcg_debug_assert(shift > 0);
2987     tcg_debug_assert(shift <= (8 << vece));
2988
2989     /*
2990      * Shifts larger than the element size are architecturally valid.
2991      * Signed results in all sign bits.
2992      */
2993     shift = MIN(shift, (8 << vece) - 1);
2994     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
2995 }
2996
2997 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
2998 {
2999     tcg_gen_vec_shr8i_i64(a, a, shift);
3000     tcg_gen_vec_add8_i64(d, d, a);
3001 }
3002
3003 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3004 {
3005     tcg_gen_vec_shr16i_i64(a, a, shift);
3006     tcg_gen_vec_add16_i64(d, d, a);
3007 }
3008
3009 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3010 {
3011     tcg_gen_shri_i32(a, a, shift);
3012     tcg_gen_add_i32(d, d, a);
3013 }
3014
3015 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3016 {
3017     tcg_gen_shri_i64(a, a, shift);
3018     tcg_gen_add_i64(d, d, a);
3019 }
3020
3021 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3022 {
3023     tcg_gen_shri_vec(vece, a, a, sh);
3024     tcg_gen_add_vec(vece, d, d, a);
3025 }
3026
3027 void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3028                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3029 {
3030     static const TCGOpcode vecop_list[] = {
3031         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3032     };
3033     static const GVecGen2i ops[4] = {
3034         { .fni8 = gen_usra8_i64,
3035           .fniv = gen_usra_vec,
3036           .fno = gen_helper_gvec_usra_b,
3037           .load_dest = true,
3038           .opt_opc = vecop_list,
3039           .vece = MO_8, },
3040         { .fni8 = gen_usra16_i64,
3041           .fniv = gen_usra_vec,
3042           .fno = gen_helper_gvec_usra_h,
3043           .load_dest = true,
3044           .opt_opc = vecop_list,
3045           .vece = MO_16, },
3046         { .fni4 = gen_usra32_i32,
3047           .fniv = gen_usra_vec,
3048           .fno = gen_helper_gvec_usra_s,
3049           .load_dest = true,
3050           .opt_opc = vecop_list,
3051           .vece = MO_32, },
3052         { .fni8 = gen_usra64_i64,
3053           .fniv = gen_usra_vec,
3054           .fno = gen_helper_gvec_usra_d,
3055           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3056           .load_dest = true,
3057           .opt_opc = vecop_list,
3058           .vece = MO_64, },
3059     };
3060
3061     /* tszimm encoding produces immediates in the range [1..esize]. */
3062     tcg_debug_assert(shift > 0);
3063     tcg_debug_assert(shift <= (8 << vece));
3064
3065     /*
3066      * Shifts larger than the element size are architecturally valid.
3067      * Unsigned results in all zeros as input to accumulate: nop.
3068      */
3069     if (shift < (8 << vece)) {
3070         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3071     } else {
3072         /* Nop, but we do need to clear the tail. */
3073         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3074     }
3075 }
3076
3077 /*
3078  * Shift one less than the requested amount, and the low bit is
3079  * the rounding bit.  For the 8 and 16-bit operations, because we
3080  * mask the low bit, we can perform a normal integer shift instead
3081  * of a vector shift.
3082  */
3083 static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3084 {
3085     TCGv_i64 t = tcg_temp_new_i64();
3086
3087     tcg_gen_shri_i64(t, a, sh - 1);
3088     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3089     tcg_gen_vec_sar8i_i64(d, a, sh);
3090     tcg_gen_vec_add8_i64(d, d, t);
3091     tcg_temp_free_i64(t);
3092 }
3093
3094 static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3095 {
3096     TCGv_i64 t = tcg_temp_new_i64();
3097
3098     tcg_gen_shri_i64(t, a, sh - 1);
3099     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3100     tcg_gen_vec_sar16i_i64(d, a, sh);
3101     tcg_gen_vec_add16_i64(d, d, t);
3102     tcg_temp_free_i64(t);
3103 }
3104
3105 static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3106 {
3107     TCGv_i32 t = tcg_temp_new_i32();
3108
3109     tcg_gen_extract_i32(t, a, sh - 1, 1);
3110     tcg_gen_sari_i32(d, a, sh);
3111     tcg_gen_add_i32(d, d, t);
3112     tcg_temp_free_i32(t);
3113 }
3114
3115 static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3116 {
3117     TCGv_i64 t = tcg_temp_new_i64();
3118
3119     tcg_gen_extract_i64(t, a, sh - 1, 1);
3120     tcg_gen_sari_i64(d, a, sh);
3121     tcg_gen_add_i64(d, d, t);
3122     tcg_temp_free_i64(t);
3123 }
3124
3125 static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3126 {
3127     TCGv_vec t = tcg_temp_new_vec_matching(d);
3128     TCGv_vec ones = tcg_temp_new_vec_matching(d);
3129
3130     tcg_gen_shri_vec(vece, t, a, sh - 1);
3131     tcg_gen_dupi_vec(vece, ones, 1);
3132     tcg_gen_and_vec(vece, t, t, ones);
3133     tcg_gen_sari_vec(vece, d, a, sh);
3134     tcg_gen_add_vec(vece, d, d, t);
3135
3136     tcg_temp_free_vec(t);
3137     tcg_temp_free_vec(ones);
3138 }
3139
3140 void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3141                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3142 {
3143     static const TCGOpcode vecop_list[] = {
3144         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3145     };
3146     static const GVecGen2i ops[4] = {
3147         { .fni8 = gen_srshr8_i64,
3148           .fniv = gen_srshr_vec,
3149           .fno = gen_helper_gvec_srshr_b,
3150           .opt_opc = vecop_list,
3151           .vece = MO_8 },
3152         { .fni8 = gen_srshr16_i64,
3153           .fniv = gen_srshr_vec,
3154           .fno = gen_helper_gvec_srshr_h,
3155           .opt_opc = vecop_list,
3156           .vece = MO_16 },
3157         { .fni4 = gen_srshr32_i32,
3158           .fniv = gen_srshr_vec,
3159           .fno = gen_helper_gvec_srshr_s,
3160           .opt_opc = vecop_list,
3161           .vece = MO_32 },
3162         { .fni8 = gen_srshr64_i64,
3163           .fniv = gen_srshr_vec,
3164           .fno = gen_helper_gvec_srshr_d,
3165           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3166           .opt_opc = vecop_list,
3167           .vece = MO_64 },
3168     };
3169
3170     /* tszimm encoding produces immediates in the range [1..esize] */
3171     tcg_debug_assert(shift > 0);
3172     tcg_debug_assert(shift <= (8 << vece));
3173
3174     if (shift == (8 << vece)) {
3175         /*
3176          * Shifts larger than the element size are architecturally valid.
3177          * Signed results in all sign bits.  With rounding, this produces
3178          *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3179          * I.e. always zero.
3180          */
3181         tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
3182     } else {
3183         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3184     }
3185 }
3186
3187 static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3188 {
3189     TCGv_i64 t = tcg_temp_new_i64();
3190
3191     gen_srshr8_i64(t, a, sh);
3192     tcg_gen_vec_add8_i64(d, d, t);
3193     tcg_temp_free_i64(t);
3194 }
3195
3196 static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3197 {
3198     TCGv_i64 t = tcg_temp_new_i64();
3199
3200     gen_srshr16_i64(t, a, sh);
3201     tcg_gen_vec_add16_i64(d, d, t);
3202     tcg_temp_free_i64(t);
3203 }
3204
3205 static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3206 {
3207     TCGv_i32 t = tcg_temp_new_i32();
3208
3209     gen_srshr32_i32(t, a, sh);
3210     tcg_gen_add_i32(d, d, t);
3211     tcg_temp_free_i32(t);
3212 }
3213
3214 static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3215 {
3216     TCGv_i64 t = tcg_temp_new_i64();
3217
3218     gen_srshr64_i64(t, a, sh);
3219     tcg_gen_add_i64(d, d, t);
3220     tcg_temp_free_i64(t);
3221 }
3222
3223 static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3224 {
3225     TCGv_vec t = tcg_temp_new_vec_matching(d);
3226
3227     gen_srshr_vec(vece, t, a, sh);
3228     tcg_gen_add_vec(vece, d, d, t);
3229     tcg_temp_free_vec(t);
3230 }
3231
3232 void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3233                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3234 {
3235     static const TCGOpcode vecop_list[] = {
3236         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3237     };
3238     static const GVecGen2i ops[4] = {
3239         { .fni8 = gen_srsra8_i64,
3240           .fniv = gen_srsra_vec,
3241           .fno = gen_helper_gvec_srsra_b,
3242           .opt_opc = vecop_list,
3243           .load_dest = true,
3244           .vece = MO_8 },
3245         { .fni8 = gen_srsra16_i64,
3246           .fniv = gen_srsra_vec,
3247           .fno = gen_helper_gvec_srsra_h,
3248           .opt_opc = vecop_list,
3249           .load_dest = true,
3250           .vece = MO_16 },
3251         { .fni4 = gen_srsra32_i32,
3252           .fniv = gen_srsra_vec,
3253           .fno = gen_helper_gvec_srsra_s,
3254           .opt_opc = vecop_list,
3255           .load_dest = true,
3256           .vece = MO_32 },
3257         { .fni8 = gen_srsra64_i64,
3258           .fniv = gen_srsra_vec,
3259           .fno = gen_helper_gvec_srsra_d,
3260           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3261           .opt_opc = vecop_list,
3262           .load_dest = true,
3263           .vece = MO_64 },
3264     };
3265
3266     /* tszimm encoding produces immediates in the range [1..esize] */
3267     tcg_debug_assert(shift > 0);
3268     tcg_debug_assert(shift <= (8 << vece));
3269
3270     /*
3271      * Shifts larger than the element size are architecturally valid.
3272      * Signed results in all sign bits.  With rounding, this produces
3273      *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3274      * I.e. always zero.  With accumulation, this leaves D unchanged.
3275      */
3276     if (shift == (8 << vece)) {
3277         /* Nop, but we do need to clear the tail. */
3278         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3279     } else {
3280         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3281     }
3282 }
3283
3284 static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3285 {
3286     TCGv_i64 t = tcg_temp_new_i64();
3287
3288     tcg_gen_shri_i64(t, a, sh - 1);
3289     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3290     tcg_gen_vec_shr8i_i64(d, a, sh);
3291     tcg_gen_vec_add8_i64(d, d, t);
3292     tcg_temp_free_i64(t);
3293 }
3294
3295 static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3296 {
3297     TCGv_i64 t = tcg_temp_new_i64();
3298
3299     tcg_gen_shri_i64(t, a, sh - 1);
3300     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3301     tcg_gen_vec_shr16i_i64(d, a, sh);
3302     tcg_gen_vec_add16_i64(d, d, t);
3303     tcg_temp_free_i64(t);
3304 }
3305
3306 static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3307 {
3308     TCGv_i32 t = tcg_temp_new_i32();
3309
3310     tcg_gen_extract_i32(t, a, sh - 1, 1);
3311     tcg_gen_shri_i32(d, a, sh);
3312     tcg_gen_add_i32(d, d, t);
3313     tcg_temp_free_i32(t);
3314 }
3315
3316 static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3317 {
3318     TCGv_i64 t = tcg_temp_new_i64();
3319
3320     tcg_gen_extract_i64(t, a, sh - 1, 1);
3321     tcg_gen_shri_i64(d, a, sh);
3322     tcg_gen_add_i64(d, d, t);
3323     tcg_temp_free_i64(t);
3324 }
3325
3326 static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
3327 {
3328     TCGv_vec t = tcg_temp_new_vec_matching(d);
3329     TCGv_vec ones = tcg_temp_new_vec_matching(d);
3330
3331     tcg_gen_shri_vec(vece, t, a, shift - 1);
3332     tcg_gen_dupi_vec(vece, ones, 1);
3333     tcg_gen_and_vec(vece, t, t, ones);
3334     tcg_gen_shri_vec(vece, d, a, shift);
3335     tcg_gen_add_vec(vece, d, d, t);
3336
3337     tcg_temp_free_vec(t);
3338     tcg_temp_free_vec(ones);
3339 }
3340
3341 void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3342                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3343 {
3344     static const TCGOpcode vecop_list[] = {
3345         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3346     };
3347     static const GVecGen2i ops[4] = {
3348         { .fni8 = gen_urshr8_i64,
3349           .fniv = gen_urshr_vec,
3350           .fno = gen_helper_gvec_urshr_b,
3351           .opt_opc = vecop_list,
3352           .vece = MO_8 },
3353         { .fni8 = gen_urshr16_i64,
3354           .fniv = gen_urshr_vec,
3355           .fno = gen_helper_gvec_urshr_h,
3356           .opt_opc = vecop_list,
3357           .vece = MO_16 },
3358         { .fni4 = gen_urshr32_i32,
3359           .fniv = gen_urshr_vec,
3360           .fno = gen_helper_gvec_urshr_s,
3361           .opt_opc = vecop_list,
3362           .vece = MO_32 },
3363         { .fni8 = gen_urshr64_i64,
3364           .fniv = gen_urshr_vec,
3365           .fno = gen_helper_gvec_urshr_d,
3366           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3367           .opt_opc = vecop_list,
3368           .vece = MO_64 },
3369     };
3370
3371     /* tszimm encoding produces immediates in the range [1..esize] */
3372     tcg_debug_assert(shift > 0);
3373     tcg_debug_assert(shift <= (8 << vece));
3374
3375     if (shift == (8 << vece)) {
3376         /*
3377          * Shifts larger than the element size are architecturally valid.
3378          * Unsigned results in zero.  With rounding, this produces a
3379          * copy of the most significant bit.
3380          */
3381         tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
3382     } else {
3383         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3384     }
3385 }
3386
3387 static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3388 {
3389     TCGv_i64 t = tcg_temp_new_i64();
3390
3391     if (sh == 8) {
3392         tcg_gen_vec_shr8i_i64(t, a, 7);
3393     } else {
3394         gen_urshr8_i64(t, a, sh);
3395     }
3396     tcg_gen_vec_add8_i64(d, d, t);
3397     tcg_temp_free_i64(t);
3398 }
3399
3400 static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3401 {
3402     TCGv_i64 t = tcg_temp_new_i64();
3403
3404     if (sh == 16) {
3405         tcg_gen_vec_shr16i_i64(t, a, 15);
3406     } else {
3407         gen_urshr16_i64(t, a, sh);
3408     }
3409     tcg_gen_vec_add16_i64(d, d, t);
3410     tcg_temp_free_i64(t);
3411 }
3412
3413 static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3414 {
3415     TCGv_i32 t = tcg_temp_new_i32();
3416
3417     if (sh == 32) {
3418         tcg_gen_shri_i32(t, a, 31);
3419     } else {
3420         gen_urshr32_i32(t, a, sh);
3421     }
3422     tcg_gen_add_i32(d, d, t);
3423     tcg_temp_free_i32(t);
3424 }
3425
3426 static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3427 {
3428     TCGv_i64 t = tcg_temp_new_i64();
3429
3430     if (sh == 64) {
3431         tcg_gen_shri_i64(t, a, 63);
3432     } else {
3433         gen_urshr64_i64(t, a, sh);
3434     }
3435     tcg_gen_add_i64(d, d, t);
3436     tcg_temp_free_i64(t);
3437 }
3438
3439 static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3440 {
3441     TCGv_vec t = tcg_temp_new_vec_matching(d);
3442
3443     if (sh == (8 << vece)) {
3444         tcg_gen_shri_vec(vece, t, a, sh - 1);
3445     } else {
3446         gen_urshr_vec(vece, t, a, sh);
3447     }
3448     tcg_gen_add_vec(vece, d, d, t);
3449     tcg_temp_free_vec(t);
3450 }
3451
3452 void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3453                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3454 {
3455     static const TCGOpcode vecop_list[] = {
3456         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3457     };
3458     static const GVecGen2i ops[4] = {
3459         { .fni8 = gen_ursra8_i64,
3460           .fniv = gen_ursra_vec,
3461           .fno = gen_helper_gvec_ursra_b,
3462           .opt_opc = vecop_list,
3463           .load_dest = true,
3464           .vece = MO_8 },
3465         { .fni8 = gen_ursra16_i64,
3466           .fniv = gen_ursra_vec,
3467           .fno = gen_helper_gvec_ursra_h,
3468           .opt_opc = vecop_list,
3469           .load_dest = true,
3470           .vece = MO_16 },
3471         { .fni4 = gen_ursra32_i32,
3472           .fniv = gen_ursra_vec,
3473           .fno = gen_helper_gvec_ursra_s,
3474           .opt_opc = vecop_list,
3475           .load_dest = true,
3476           .vece = MO_32 },
3477         { .fni8 = gen_ursra64_i64,
3478           .fniv = gen_ursra_vec,
3479           .fno = gen_helper_gvec_ursra_d,
3480           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3481           .opt_opc = vecop_list,
3482           .load_dest = true,
3483           .vece = MO_64 },
3484     };
3485
3486     /* tszimm encoding produces immediates in the range [1..esize] */
3487     tcg_debug_assert(shift > 0);
3488     tcg_debug_assert(shift <= (8 << vece));
3489
3490     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3491 }
3492
3493 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3494 {
3495     uint64_t mask = dup_const(MO_8, 0xff >> shift);
3496     TCGv_i64 t = tcg_temp_new_i64();
3497
3498     tcg_gen_shri_i64(t, a, shift);
3499     tcg_gen_andi_i64(t, t, mask);
3500     tcg_gen_andi_i64(d, d, ~mask);
3501     tcg_gen_or_i64(d, d, t);
3502     tcg_temp_free_i64(t);
3503 }
3504
3505 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3506 {
3507     uint64_t mask = dup_const(MO_16, 0xffff >> shift);
3508     TCGv_i64 t = tcg_temp_new_i64();
3509
3510     tcg_gen_shri_i64(t, a, shift);
3511     tcg_gen_andi_i64(t, t, mask);
3512     tcg_gen_andi_i64(d, d, ~mask);
3513     tcg_gen_or_i64(d, d, t);
3514     tcg_temp_free_i64(t);
3515 }
3516
3517 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3518 {
3519     tcg_gen_shri_i32(a, a, shift);
3520     tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
3521 }
3522
3523 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3524 {
3525     tcg_gen_shri_i64(a, a, shift);
3526     tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
3527 }
3528
3529 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3530 {
3531     TCGv_vec t = tcg_temp_new_vec_matching(d);
3532     TCGv_vec m = tcg_temp_new_vec_matching(d);
3533
3534     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
3535     tcg_gen_shri_vec(vece, t, a, sh);
3536     tcg_gen_and_vec(vece, d, d, m);
3537     tcg_gen_or_vec(vece, d, d, t);
3538
3539     tcg_temp_free_vec(t);
3540     tcg_temp_free_vec(m);
3541 }
3542
3543 void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3544                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3545 {
3546     static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
3547     const GVecGen2i ops[4] = {
3548         { .fni8 = gen_shr8_ins_i64,
3549           .fniv = gen_shr_ins_vec,
3550           .fno = gen_helper_gvec_sri_b,
3551           .load_dest = true,
3552           .opt_opc = vecop_list,
3553           .vece = MO_8 },
3554         { .fni8 = gen_shr16_ins_i64,
3555           .fniv = gen_shr_ins_vec,
3556           .fno = gen_helper_gvec_sri_h,
3557           .load_dest = true,
3558           .opt_opc = vecop_list,
3559           .vece = MO_16 },
3560         { .fni4 = gen_shr32_ins_i32,
3561           .fniv = gen_shr_ins_vec,
3562           .fno = gen_helper_gvec_sri_s,
3563           .load_dest = true,
3564           .opt_opc = vecop_list,
3565           .vece = MO_32 },
3566         { .fni8 = gen_shr64_ins_i64,
3567           .fniv = gen_shr_ins_vec,
3568           .fno = gen_helper_gvec_sri_d,
3569           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3570           .load_dest = true,
3571           .opt_opc = vecop_list,
3572           .vece = MO_64 },
3573     };
3574
3575     /* tszimm encoding produces immediates in the range [1..esize]. */
3576     tcg_debug_assert(shift > 0);
3577     tcg_debug_assert(shift <= (8 << vece));
3578
3579     /* Shift of esize leaves destination unchanged. */
3580     if (shift < (8 << vece)) {
3581         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3582     } else {
3583         /* Nop, but we do need to clear the tail. */
3584         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3585     }
3586 }
3587
3588 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3589 {
3590     uint64_t mask = dup_const(MO_8, 0xff << shift);
3591     TCGv_i64 t = tcg_temp_new_i64();
3592
3593     tcg_gen_shli_i64(t, a, shift);
3594     tcg_gen_andi_i64(t, t, mask);
3595     tcg_gen_andi_i64(d, d, ~mask);
3596     tcg_gen_or_i64(d, d, t);
3597     tcg_temp_free_i64(t);
3598 }
3599
3600 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3601 {
3602     uint64_t mask = dup_const(MO_16, 0xffff << shift);
3603     TCGv_i64 t = tcg_temp_new_i64();
3604
3605     tcg_gen_shli_i64(t, a, shift);
3606     tcg_gen_andi_i64(t, t, mask);
3607     tcg_gen_andi_i64(d, d, ~mask);
3608     tcg_gen_or_i64(d, d, t);
3609     tcg_temp_free_i64(t);
3610 }
3611
3612 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3613 {
3614     tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
3615 }
3616
3617 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3618 {
3619     tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
3620 }
3621
3622 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3623 {
3624     TCGv_vec t = tcg_temp_new_vec_matching(d);
3625     TCGv_vec m = tcg_temp_new_vec_matching(d);
3626
3627     tcg_gen_shli_vec(vece, t, a, sh);
3628     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
3629     tcg_gen_and_vec(vece, d, d, m);
3630     tcg_gen_or_vec(vece, d, d, t);
3631
3632     tcg_temp_free_vec(t);
3633     tcg_temp_free_vec(m);
3634 }
3635
3636 void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3637                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3638 {
3639     static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
3640     const GVecGen2i ops[4] = {
3641         { .fni8 = gen_shl8_ins_i64,
3642           .fniv = gen_shl_ins_vec,
3643           .fno = gen_helper_gvec_sli_b,
3644           .load_dest = true,
3645           .opt_opc = vecop_list,
3646           .vece = MO_8 },
3647         { .fni8 = gen_shl16_ins_i64,
3648           .fniv = gen_shl_ins_vec,
3649           .fno = gen_helper_gvec_sli_h,
3650           .load_dest = true,
3651           .opt_opc = vecop_list,
3652           .vece = MO_16 },
3653         { .fni4 = gen_shl32_ins_i32,
3654           .fniv = gen_shl_ins_vec,
3655           .fno = gen_helper_gvec_sli_s,
3656           .load_dest = true,
3657           .opt_opc = vecop_list,
3658           .vece = MO_32 },
3659         { .fni8 = gen_shl64_ins_i64,
3660           .fniv = gen_shl_ins_vec,
3661           .fno = gen_helper_gvec_sli_d,
3662           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3663           .load_dest = true,
3664           .opt_opc = vecop_list,
3665           .vece = MO_64 },
3666     };
3667
3668     /* tszimm encoding produces immediates in the range [0..esize-1]. */
3669     tcg_debug_assert(shift >= 0);
3670     tcg_debug_assert(shift < (8 << vece));
3671
3672     if (shift == 0) {
3673         tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
3674     } else {
3675         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3676     }
3677 }
3678
3679 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3680 {
3681     gen_helper_neon_mul_u8(a, a, b);
3682     gen_helper_neon_add_u8(d, d, a);
3683 }
3684
3685 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3686 {
3687     gen_helper_neon_mul_u8(a, a, b);
3688     gen_helper_neon_sub_u8(d, d, a);
3689 }
3690
3691 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3692 {
3693     gen_helper_neon_mul_u16(a, a, b);
3694     gen_helper_neon_add_u16(d, d, a);
3695 }
3696
3697 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3698 {
3699     gen_helper_neon_mul_u16(a, a, b);
3700     gen_helper_neon_sub_u16(d, d, a);
3701 }
3702
3703 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3704 {
3705     tcg_gen_mul_i32(a, a, b);
3706     tcg_gen_add_i32(d, d, a);
3707 }
3708
3709 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3710 {
3711     tcg_gen_mul_i32(a, a, b);
3712     tcg_gen_sub_i32(d, d, a);
3713 }
3714
3715 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3716 {
3717     tcg_gen_mul_i64(a, a, b);
3718     tcg_gen_add_i64(d, d, a);
3719 }
3720
3721 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3722 {
3723     tcg_gen_mul_i64(a, a, b);
3724     tcg_gen_sub_i64(d, d, a);
3725 }
3726
3727 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3728 {
3729     tcg_gen_mul_vec(vece, a, a, b);
3730     tcg_gen_add_vec(vece, d, d, a);
3731 }
3732
3733 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3734 {
3735     tcg_gen_mul_vec(vece, a, a, b);
3736     tcg_gen_sub_vec(vece, d, d, a);
3737 }
3738
3739 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
3740  * these tables are shared with AArch64 which does support them.
3741  */
3742 void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3743                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3744 {
3745     static const TCGOpcode vecop_list[] = {
3746         INDEX_op_mul_vec, INDEX_op_add_vec, 0
3747     };
3748     static const GVecGen3 ops[4] = {
3749         { .fni4 = gen_mla8_i32,
3750           .fniv = gen_mla_vec,
3751           .load_dest = true,
3752           .opt_opc = vecop_list,
3753           .vece = MO_8 },
3754         { .fni4 = gen_mla16_i32,
3755           .fniv = gen_mla_vec,
3756           .load_dest = true,
3757           .opt_opc = vecop_list,
3758           .vece = MO_16 },
3759         { .fni4 = gen_mla32_i32,
3760           .fniv = gen_mla_vec,
3761           .load_dest = true,
3762           .opt_opc = vecop_list,
3763           .vece = MO_32 },
3764         { .fni8 = gen_mla64_i64,
3765           .fniv = gen_mla_vec,
3766           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3767           .load_dest = true,
3768           .opt_opc = vecop_list,
3769           .vece = MO_64 },
3770     };
3771     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3772 }
3773
3774 void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3775                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3776 {
3777     static const TCGOpcode vecop_list[] = {
3778         INDEX_op_mul_vec, INDEX_op_sub_vec, 0
3779     };
3780     static const GVecGen3 ops[4] = {
3781         { .fni4 = gen_mls8_i32,
3782           .fniv = gen_mls_vec,
3783           .load_dest = true,
3784           .opt_opc = vecop_list,
3785           .vece = MO_8 },
3786         { .fni4 = gen_mls16_i32,
3787           .fniv = gen_mls_vec,
3788           .load_dest = true,
3789           .opt_opc = vecop_list,
3790           .vece = MO_16 },
3791         { .fni4 = gen_mls32_i32,
3792           .fniv = gen_mls_vec,
3793           .load_dest = true,
3794           .opt_opc = vecop_list,
3795           .vece = MO_32 },
3796         { .fni8 = gen_mls64_i64,
3797           .fniv = gen_mls_vec,
3798           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3799           .load_dest = true,
3800           .opt_opc = vecop_list,
3801           .vece = MO_64 },
3802     };
3803     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3804 }
3805
3806 /* CMTST : test is "if (X & Y != 0)". */
3807 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3808 {
3809     tcg_gen_and_i32(d, a, b);
3810     tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
3811     tcg_gen_neg_i32(d, d);
3812 }
3813
3814 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3815 {
3816     tcg_gen_and_i64(d, a, b);
3817     tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
3818     tcg_gen_neg_i64(d, d);
3819 }
3820
3821 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3822 {
3823     tcg_gen_and_vec(vece, d, a, b);
3824     tcg_gen_dupi_vec(vece, a, 0);
3825     tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
3826 }
3827
3828 void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3829                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3830 {
3831     static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
3832     static const GVecGen3 ops[4] = {
3833         { .fni4 = gen_helper_neon_tst_u8,
3834           .fniv = gen_cmtst_vec,
3835           .opt_opc = vecop_list,
3836           .vece = MO_8 },
3837         { .fni4 = gen_helper_neon_tst_u16,
3838           .fniv = gen_cmtst_vec,
3839           .opt_opc = vecop_list,
3840           .vece = MO_16 },
3841         { .fni4 = gen_cmtst_i32,
3842           .fniv = gen_cmtst_vec,
3843           .opt_opc = vecop_list,
3844           .vece = MO_32 },
3845         { .fni8 = gen_cmtst_i64,
3846           .fniv = gen_cmtst_vec,
3847           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3848           .opt_opc = vecop_list,
3849           .vece = MO_64 },
3850     };
3851     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3852 }
3853
3854 void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
3855 {
3856     TCGv_i32 lval = tcg_temp_new_i32();
3857     TCGv_i32 rval = tcg_temp_new_i32();
3858     TCGv_i32 lsh = tcg_temp_new_i32();
3859     TCGv_i32 rsh = tcg_temp_new_i32();
3860     TCGv_i32 zero = tcg_const_i32(0);
3861     TCGv_i32 max = tcg_const_i32(32);
3862
3863     /*
3864      * Rely on the TCG guarantee that out of range shifts produce
3865      * unspecified results, not undefined behaviour (i.e. no trap).
3866      * Discard out-of-range results after the fact.
3867      */
3868     tcg_gen_ext8s_i32(lsh, shift);
3869     tcg_gen_neg_i32(rsh, lsh);
3870     tcg_gen_shl_i32(lval, src, lsh);
3871     tcg_gen_shr_i32(rval, src, rsh);
3872     tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
3873     tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
3874
3875     tcg_temp_free_i32(lval);
3876     tcg_temp_free_i32(rval);
3877     tcg_temp_free_i32(lsh);
3878     tcg_temp_free_i32(rsh);
3879     tcg_temp_free_i32(zero);
3880     tcg_temp_free_i32(max);
3881 }
3882
3883 void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
3884 {
3885     TCGv_i64 lval = tcg_temp_new_i64();
3886     TCGv_i64 rval = tcg_temp_new_i64();
3887     TCGv_i64 lsh = tcg_temp_new_i64();
3888     TCGv_i64 rsh = tcg_temp_new_i64();
3889     TCGv_i64 zero = tcg_const_i64(0);
3890     TCGv_i64 max = tcg_const_i64(64);
3891
3892     /*
3893      * Rely on the TCG guarantee that out of range shifts produce
3894      * unspecified results, not undefined behaviour (i.e. no trap).
3895      * Discard out-of-range results after the fact.
3896      */
3897     tcg_gen_ext8s_i64(lsh, shift);
3898     tcg_gen_neg_i64(rsh, lsh);
3899     tcg_gen_shl_i64(lval, src, lsh);
3900     tcg_gen_shr_i64(rval, src, rsh);
3901     tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
3902     tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
3903
3904     tcg_temp_free_i64(lval);
3905     tcg_temp_free_i64(rval);
3906     tcg_temp_free_i64(lsh);
3907     tcg_temp_free_i64(rsh);
3908     tcg_temp_free_i64(zero);
3909     tcg_temp_free_i64(max);
3910 }
3911
3912 static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
3913                          TCGv_vec src, TCGv_vec shift)
3914 {
3915     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
3916     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
3917     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
3918     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
3919     TCGv_vec msk, max;
3920
3921     tcg_gen_neg_vec(vece, rsh, shift);
3922     if (vece == MO_8) {
3923         tcg_gen_mov_vec(lsh, shift);
3924     } else {
3925         msk = tcg_temp_new_vec_matching(dst);
3926         tcg_gen_dupi_vec(vece, msk, 0xff);
3927         tcg_gen_and_vec(vece, lsh, shift, msk);
3928         tcg_gen_and_vec(vece, rsh, rsh, msk);
3929         tcg_temp_free_vec(msk);
3930     }
3931
3932     /*
3933      * Rely on the TCG guarantee that out of range shifts produce
3934      * unspecified results, not undefined behaviour (i.e. no trap).
3935      * Discard out-of-range results after the fact.
3936      */
3937     tcg_gen_shlv_vec(vece, lval, src, lsh);
3938     tcg_gen_shrv_vec(vece, rval, src, rsh);
3939
3940     max = tcg_temp_new_vec_matching(dst);
3941     tcg_gen_dupi_vec(vece, max, 8 << vece);
3942
3943     /*
3944      * The choice of LT (signed) and GEU (unsigned) are biased toward
3945      * the instructions of the x86_64 host.  For MO_8, the whole byte
3946      * is significant so we must use an unsigned compare; otherwise we
3947      * have already masked to a byte and so a signed compare works.
3948      * Other tcg hosts have a full set of comparisons and do not care.
3949      */
3950     if (vece == MO_8) {
3951         tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
3952         tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
3953         tcg_gen_andc_vec(vece, lval, lval, lsh);
3954         tcg_gen_andc_vec(vece, rval, rval, rsh);
3955     } else {
3956         tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
3957         tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
3958         tcg_gen_and_vec(vece, lval, lval, lsh);
3959         tcg_gen_and_vec(vece, rval, rval, rsh);
3960     }
3961     tcg_gen_or_vec(vece, dst, lval, rval);
3962
3963     tcg_temp_free_vec(max);
3964     tcg_temp_free_vec(lval);
3965     tcg_temp_free_vec(rval);
3966     tcg_temp_free_vec(lsh);
3967     tcg_temp_free_vec(rsh);
3968 }
3969
3970 void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3971                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3972 {
3973     static const TCGOpcode vecop_list[] = {
3974         INDEX_op_neg_vec, INDEX_op_shlv_vec,
3975         INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
3976     };
3977     static const GVecGen3 ops[4] = {
3978         { .fniv = gen_ushl_vec,
3979           .fno = gen_helper_gvec_ushl_b,
3980           .opt_opc = vecop_list,
3981           .vece = MO_8 },
3982         { .fniv = gen_ushl_vec,
3983           .fno = gen_helper_gvec_ushl_h,
3984           .opt_opc = vecop_list,
3985           .vece = MO_16 },
3986         { .fni4 = gen_ushl_i32,
3987           .fniv = gen_ushl_vec,
3988           .opt_opc = vecop_list,
3989           .vece = MO_32 },
3990         { .fni8 = gen_ushl_i64,
3991           .fniv = gen_ushl_vec,
3992           .opt_opc = vecop_list,
3993           .vece = MO_64 },
3994     };
3995     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3996 }
3997
3998 void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
3999 {
4000     TCGv_i32 lval = tcg_temp_new_i32();
4001     TCGv_i32 rval = tcg_temp_new_i32();
4002     TCGv_i32 lsh = tcg_temp_new_i32();
4003     TCGv_i32 rsh = tcg_temp_new_i32();
4004     TCGv_i32 zero = tcg_const_i32(0);
4005     TCGv_i32 max = tcg_const_i32(31);
4006
4007     /*
4008      * Rely on the TCG guarantee that out of range shifts produce
4009      * unspecified results, not undefined behaviour (i.e. no trap).
4010      * Discard out-of-range results after the fact.
4011      */
4012     tcg_gen_ext8s_i32(lsh, shift);
4013     tcg_gen_neg_i32(rsh, lsh);
4014     tcg_gen_shl_i32(lval, src, lsh);
4015     tcg_gen_umin_i32(rsh, rsh, max);
4016     tcg_gen_sar_i32(rval, src, rsh);
4017     tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
4018     tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
4019
4020     tcg_temp_free_i32(lval);
4021     tcg_temp_free_i32(rval);
4022     tcg_temp_free_i32(lsh);
4023     tcg_temp_free_i32(rsh);
4024     tcg_temp_free_i32(zero);
4025     tcg_temp_free_i32(max);
4026 }
4027
4028 void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4029 {
4030     TCGv_i64 lval = tcg_temp_new_i64();
4031     TCGv_i64 rval = tcg_temp_new_i64();
4032     TCGv_i64 lsh = tcg_temp_new_i64();
4033     TCGv_i64 rsh = tcg_temp_new_i64();
4034     TCGv_i64 zero = tcg_const_i64(0);
4035     TCGv_i64 max = tcg_const_i64(63);
4036
4037     /*
4038      * Rely on the TCG guarantee that out of range shifts produce
4039      * unspecified results, not undefined behaviour (i.e. no trap).
4040      * Discard out-of-range results after the fact.
4041      */
4042     tcg_gen_ext8s_i64(lsh, shift);
4043     tcg_gen_neg_i64(rsh, lsh);
4044     tcg_gen_shl_i64(lval, src, lsh);
4045     tcg_gen_umin_i64(rsh, rsh, max);
4046     tcg_gen_sar_i64(rval, src, rsh);
4047     tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
4048     tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
4049
4050     tcg_temp_free_i64(lval);
4051     tcg_temp_free_i64(rval);
4052     tcg_temp_free_i64(lsh);
4053     tcg_temp_free_i64(rsh);
4054     tcg_temp_free_i64(zero);
4055     tcg_temp_free_i64(max);
4056 }
4057
4058 static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
4059                          TCGv_vec src, TCGv_vec shift)
4060 {
4061     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4062     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4063     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4064     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4065     TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
4066
4067     /*
4068      * Rely on the TCG guarantee that out of range shifts produce
4069      * unspecified results, not undefined behaviour (i.e. no trap).
4070      * Discard out-of-range results after the fact.
4071      */
4072     tcg_gen_neg_vec(vece, rsh, shift);
4073     if (vece == MO_8) {
4074         tcg_gen_mov_vec(lsh, shift);
4075     } else {
4076         tcg_gen_dupi_vec(vece, tmp, 0xff);
4077         tcg_gen_and_vec(vece, lsh, shift, tmp);
4078         tcg_gen_and_vec(vece, rsh, rsh, tmp);
4079     }
4080
4081     /* Bound rsh so out of bound right shift gets -1.  */
4082     tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
4083     tcg_gen_umin_vec(vece, rsh, rsh, tmp);
4084     tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
4085
4086     tcg_gen_shlv_vec(vece, lval, src, lsh);
4087     tcg_gen_sarv_vec(vece, rval, src, rsh);
4088
4089     /* Select in-bound left shift.  */
4090     tcg_gen_andc_vec(vece, lval, lval, tmp);
4091
4092     /* Select between left and right shift.  */
4093     if (vece == MO_8) {
4094         tcg_gen_dupi_vec(vece, tmp, 0);
4095         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
4096     } else {
4097         tcg_gen_dupi_vec(vece, tmp, 0x80);
4098         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
4099     }
4100
4101     tcg_temp_free_vec(lval);
4102     tcg_temp_free_vec(rval);
4103     tcg_temp_free_vec(lsh);
4104     tcg_temp_free_vec(rsh);
4105     tcg_temp_free_vec(tmp);
4106 }
4107
4108 void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4109                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4110 {
4111     static const TCGOpcode vecop_list[] = {
4112         INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
4113         INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
4114     };
4115     static const GVecGen3 ops[4] = {
4116         { .fniv = gen_sshl_vec,
4117           .fno = gen_helper_gvec_sshl_b,
4118           .opt_opc = vecop_list,
4119           .vece = MO_8 },
4120         { .fniv = gen_sshl_vec,
4121           .fno = gen_helper_gvec_sshl_h,
4122           .opt_opc = vecop_list,
4123           .vece = MO_16 },
4124         { .fni4 = gen_sshl_i32,
4125           .fniv = gen_sshl_vec,
4126           .opt_opc = vecop_list,
4127           .vece = MO_32 },
4128         { .fni8 = gen_sshl_i64,
4129           .fniv = gen_sshl_vec,
4130           .opt_opc = vecop_list,
4131           .vece = MO_64 },
4132     };
4133     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4134 }
4135
4136 static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4137                           TCGv_vec a, TCGv_vec b)
4138 {
4139     TCGv_vec x = tcg_temp_new_vec_matching(t);
4140     tcg_gen_add_vec(vece, x, a, b);
4141     tcg_gen_usadd_vec(vece, t, a, b);
4142     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4143     tcg_gen_or_vec(vece, sat, sat, x);
4144     tcg_temp_free_vec(x);
4145 }
4146
4147 void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4148                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4149 {
4150     static const TCGOpcode vecop_list[] = {
4151         INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4152     };
4153     static const GVecGen4 ops[4] = {
4154         { .fniv = gen_uqadd_vec,
4155           .fno = gen_helper_gvec_uqadd_b,
4156           .write_aofs = true,
4157           .opt_opc = vecop_list,
4158           .vece = MO_8 },
4159         { .fniv = gen_uqadd_vec,
4160           .fno = gen_helper_gvec_uqadd_h,
4161           .write_aofs = true,
4162           .opt_opc = vecop_list,
4163           .vece = MO_16 },
4164         { .fniv = gen_uqadd_vec,
4165           .fno = gen_helper_gvec_uqadd_s,
4166           .write_aofs = true,
4167           .opt_opc = vecop_list,
4168           .vece = MO_32 },
4169         { .fniv = gen_uqadd_vec,
4170           .fno = gen_helper_gvec_uqadd_d,
4171           .write_aofs = true,
4172           .opt_opc = vecop_list,
4173           .vece = MO_64 },
4174     };
4175     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4176                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4177 }
4178
4179 static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4180                           TCGv_vec a, TCGv_vec b)
4181 {
4182     TCGv_vec x = tcg_temp_new_vec_matching(t);
4183     tcg_gen_add_vec(vece, x, a, b);
4184     tcg_gen_ssadd_vec(vece, t, a, b);
4185     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4186     tcg_gen_or_vec(vece, sat, sat, x);
4187     tcg_temp_free_vec(x);
4188 }
4189
4190 void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4191                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4192 {
4193     static const TCGOpcode vecop_list[] = {
4194         INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4195     };
4196     static const GVecGen4 ops[4] = {
4197         { .fniv = gen_sqadd_vec,
4198           .fno = gen_helper_gvec_sqadd_b,
4199           .opt_opc = vecop_list,
4200           .write_aofs = true,
4201           .vece = MO_8 },
4202         { .fniv = gen_sqadd_vec,
4203           .fno = gen_helper_gvec_sqadd_h,
4204           .opt_opc = vecop_list,
4205           .write_aofs = true,
4206           .vece = MO_16 },
4207         { .fniv = gen_sqadd_vec,
4208           .fno = gen_helper_gvec_sqadd_s,
4209           .opt_opc = vecop_list,
4210           .write_aofs = true,
4211           .vece = MO_32 },
4212         { .fniv = gen_sqadd_vec,
4213           .fno = gen_helper_gvec_sqadd_d,
4214           .opt_opc = vecop_list,
4215           .write_aofs = true,
4216           .vece = MO_64 },
4217     };
4218     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4219                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4220 }
4221
4222 static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4223                           TCGv_vec a, TCGv_vec b)
4224 {
4225     TCGv_vec x = tcg_temp_new_vec_matching(t);
4226     tcg_gen_sub_vec(vece, x, a, b);
4227     tcg_gen_ussub_vec(vece, t, a, b);
4228     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4229     tcg_gen_or_vec(vece, sat, sat, x);
4230     tcg_temp_free_vec(x);
4231 }
4232
4233 void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4234                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4235 {
4236     static const TCGOpcode vecop_list[] = {
4237         INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4238     };
4239     static const GVecGen4 ops[4] = {
4240         { .fniv = gen_uqsub_vec,
4241           .fno = gen_helper_gvec_uqsub_b,
4242           .opt_opc = vecop_list,
4243           .write_aofs = true,
4244           .vece = MO_8 },
4245         { .fniv = gen_uqsub_vec,
4246           .fno = gen_helper_gvec_uqsub_h,
4247           .opt_opc = vecop_list,
4248           .write_aofs = true,
4249           .vece = MO_16 },
4250         { .fniv = gen_uqsub_vec,
4251           .fno = gen_helper_gvec_uqsub_s,
4252           .opt_opc = vecop_list,
4253           .write_aofs = true,
4254           .vece = MO_32 },
4255         { .fniv = gen_uqsub_vec,
4256           .fno = gen_helper_gvec_uqsub_d,
4257           .opt_opc = vecop_list,
4258           .write_aofs = true,
4259           .vece = MO_64 },
4260     };
4261     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4262                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4263 }
4264
4265 static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4266                           TCGv_vec a, TCGv_vec b)
4267 {
4268     TCGv_vec x = tcg_temp_new_vec_matching(t);
4269     tcg_gen_sub_vec(vece, x, a, b);
4270     tcg_gen_sssub_vec(vece, t, a, b);
4271     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4272     tcg_gen_or_vec(vece, sat, sat, x);
4273     tcg_temp_free_vec(x);
4274 }
4275
4276 void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4277                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4278 {
4279     static const TCGOpcode vecop_list[] = {
4280         INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4281     };
4282     static const GVecGen4 ops[4] = {
4283         { .fniv = gen_sqsub_vec,
4284           .fno = gen_helper_gvec_sqsub_b,
4285           .opt_opc = vecop_list,
4286           .write_aofs = true,
4287           .vece = MO_8 },
4288         { .fniv = gen_sqsub_vec,
4289           .fno = gen_helper_gvec_sqsub_h,
4290           .opt_opc = vecop_list,
4291           .write_aofs = true,
4292           .vece = MO_16 },
4293         { .fniv = gen_sqsub_vec,
4294           .fno = gen_helper_gvec_sqsub_s,
4295           .opt_opc = vecop_list,
4296           .write_aofs = true,
4297           .vece = MO_32 },
4298         { .fniv = gen_sqsub_vec,
4299           .fno = gen_helper_gvec_sqsub_d,
4300           .opt_opc = vecop_list,
4301           .write_aofs = true,
4302           .vece = MO_64 },
4303     };
4304     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4305                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4306 }
4307
4308 static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4309 {
4310     TCGv_i32 t = tcg_temp_new_i32();
4311
4312     tcg_gen_sub_i32(t, a, b);
4313     tcg_gen_sub_i32(d, b, a);
4314     tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
4315     tcg_temp_free_i32(t);
4316 }
4317
4318 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4319 {
4320     TCGv_i64 t = tcg_temp_new_i64();
4321
4322     tcg_gen_sub_i64(t, a, b);
4323     tcg_gen_sub_i64(d, b, a);
4324     tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
4325     tcg_temp_free_i64(t);
4326 }
4327
4328 static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4329 {
4330     TCGv_vec t = tcg_temp_new_vec_matching(d);
4331
4332     tcg_gen_smin_vec(vece, t, a, b);
4333     tcg_gen_smax_vec(vece, d, a, b);
4334     tcg_gen_sub_vec(vece, d, d, t);
4335     tcg_temp_free_vec(t);
4336 }
4337
4338 void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4339                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4340 {
4341     static const TCGOpcode vecop_list[] = {
4342         INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4343     };
4344     static const GVecGen3 ops[4] = {
4345         { .fniv = gen_sabd_vec,
4346           .fno = gen_helper_gvec_sabd_b,
4347           .opt_opc = vecop_list,
4348           .vece = MO_8 },
4349         { .fniv = gen_sabd_vec,
4350           .fno = gen_helper_gvec_sabd_h,
4351           .opt_opc = vecop_list,
4352           .vece = MO_16 },
4353         { .fni4 = gen_sabd_i32,
4354           .fniv = gen_sabd_vec,
4355           .fno = gen_helper_gvec_sabd_s,
4356           .opt_opc = vecop_list,
4357           .vece = MO_32 },
4358         { .fni8 = gen_sabd_i64,
4359           .fniv = gen_sabd_vec,
4360           .fno = gen_helper_gvec_sabd_d,
4361           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4362           .opt_opc = vecop_list,
4363           .vece = MO_64 },
4364     };
4365     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4366 }
4367
4368 static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4369 {
4370     TCGv_i32 t = tcg_temp_new_i32();
4371
4372     tcg_gen_sub_i32(t, a, b);
4373     tcg_gen_sub_i32(d, b, a);
4374     tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
4375     tcg_temp_free_i32(t);
4376 }
4377
4378 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4379 {
4380     TCGv_i64 t = tcg_temp_new_i64();
4381
4382     tcg_gen_sub_i64(t, a, b);
4383     tcg_gen_sub_i64(d, b, a);
4384     tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
4385     tcg_temp_free_i64(t);
4386 }
4387
4388 static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4389 {
4390     TCGv_vec t = tcg_temp_new_vec_matching(d);
4391
4392     tcg_gen_umin_vec(vece, t, a, b);
4393     tcg_gen_umax_vec(vece, d, a, b);
4394     tcg_gen_sub_vec(vece, d, d, t);
4395     tcg_temp_free_vec(t);
4396 }
4397
4398 void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4399                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4400 {
4401     static const TCGOpcode vecop_list[] = {
4402         INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4403     };
4404     static const GVecGen3 ops[4] = {
4405         { .fniv = gen_uabd_vec,
4406           .fno = gen_helper_gvec_uabd_b,
4407           .opt_opc = vecop_list,
4408           .vece = MO_8 },
4409         { .fniv = gen_uabd_vec,
4410           .fno = gen_helper_gvec_uabd_h,
4411           .opt_opc = vecop_list,
4412           .vece = MO_16 },
4413         { .fni4 = gen_uabd_i32,
4414           .fniv = gen_uabd_vec,
4415           .fno = gen_helper_gvec_uabd_s,
4416           .opt_opc = vecop_list,
4417           .vece = MO_32 },
4418         { .fni8 = gen_uabd_i64,
4419           .fniv = gen_uabd_vec,
4420           .fno = gen_helper_gvec_uabd_d,
4421           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4422           .opt_opc = vecop_list,
4423           .vece = MO_64 },
4424     };
4425     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4426 }
4427
4428 static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4429 {
4430     TCGv_i32 t = tcg_temp_new_i32();
4431     gen_sabd_i32(t, a, b);
4432     tcg_gen_add_i32(d, d, t);
4433     tcg_temp_free_i32(t);
4434 }
4435
4436 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4437 {
4438     TCGv_i64 t = tcg_temp_new_i64();
4439     gen_sabd_i64(t, a, b);
4440     tcg_gen_add_i64(d, d, t);
4441     tcg_temp_free_i64(t);
4442 }
4443
4444 static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4445 {
4446     TCGv_vec t = tcg_temp_new_vec_matching(d);
4447     gen_sabd_vec(vece, t, a, b);
4448     tcg_gen_add_vec(vece, d, d, t);
4449     tcg_temp_free_vec(t);
4450 }
4451
4452 void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4453                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4454 {
4455     static const TCGOpcode vecop_list[] = {
4456         INDEX_op_sub_vec, INDEX_op_add_vec,
4457         INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4458     };
4459     static const GVecGen3 ops[4] = {
4460         { .fniv = gen_saba_vec,
4461           .fno = gen_helper_gvec_saba_b,
4462           .opt_opc = vecop_list,
4463           .load_dest = true,
4464           .vece = MO_8 },
4465         { .fniv = gen_saba_vec,
4466           .fno = gen_helper_gvec_saba_h,
4467           .opt_opc = vecop_list,
4468           .load_dest = true,
4469           .vece = MO_16 },
4470         { .fni4 = gen_saba_i32,
4471           .fniv = gen_saba_vec,
4472           .fno = gen_helper_gvec_saba_s,
4473           .opt_opc = vecop_list,
4474           .load_dest = true,
4475           .vece = MO_32 },
4476         { .fni8 = gen_saba_i64,
4477           .fniv = gen_saba_vec,
4478           .fno = gen_helper_gvec_saba_d,
4479           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4480           .opt_opc = vecop_list,
4481           .load_dest = true,
4482           .vece = MO_64 },
4483     };
4484     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4485 }
4486
4487 static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4488 {
4489     TCGv_i32 t = tcg_temp_new_i32();
4490     gen_uabd_i32(t, a, b);
4491     tcg_gen_add_i32(d, d, t);
4492     tcg_temp_free_i32(t);
4493 }
4494
4495 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4496 {
4497     TCGv_i64 t = tcg_temp_new_i64();
4498     gen_uabd_i64(t, a, b);
4499     tcg_gen_add_i64(d, d, t);
4500     tcg_temp_free_i64(t);
4501 }
4502
4503 static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4504 {
4505     TCGv_vec t = tcg_temp_new_vec_matching(d);
4506     gen_uabd_vec(vece, t, a, b);
4507     tcg_gen_add_vec(vece, d, d, t);
4508     tcg_temp_free_vec(t);
4509 }
4510
4511 void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4512                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4513 {
4514     static const TCGOpcode vecop_list[] = {
4515         INDEX_op_sub_vec, INDEX_op_add_vec,
4516         INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4517     };
4518     static const GVecGen3 ops[4] = {
4519         { .fniv = gen_uaba_vec,
4520           .fno = gen_helper_gvec_uaba_b,
4521           .opt_opc = vecop_list,
4522           .load_dest = true,
4523           .vece = MO_8 },
4524         { .fniv = gen_uaba_vec,
4525           .fno = gen_helper_gvec_uaba_h,
4526           .opt_opc = vecop_list,
4527           .load_dest = true,
4528           .vece = MO_16 },
4529         { .fni4 = gen_uaba_i32,
4530           .fniv = gen_uaba_vec,
4531           .fno = gen_helper_gvec_uaba_s,
4532           .opt_opc = vecop_list,
4533           .load_dest = true,
4534           .vece = MO_32 },
4535         { .fni8 = gen_uaba_i64,
4536           .fniv = gen_uaba_vec,
4537           .fno = gen_helper_gvec_uaba_d,
4538           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4539           .opt_opc = vecop_list,
4540           .load_dest = true,
4541           .vece = MO_64 },
4542     };
4543     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4544 }
4545
4546 static void do_coproc_insn(DisasContext *s, int cpnum, int is64,
4547                            int opc1, int crn, int crm, int opc2,
4548                            bool isread, int rt, int rt2)
4549 {
4550     const ARMCPRegInfo *ri;
4551
4552     ri = get_arm_cp_reginfo(s->cp_regs,
4553             ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2));
4554     if (ri) {
4555         bool need_exit_tb;
4556
4557         /* Check access permissions */
4558         if (!cp_access_ok(s->current_el, ri, isread)) {
4559             unallocated_encoding(s);
4560             return;
4561         }
4562
4563         if (s->hstr_active || ri->accessfn ||
4564             (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
4565             /* Emit code to perform further access permissions checks at
4566              * runtime; this may result in an exception.
4567              * Note that on XScale all cp0..c13 registers do an access check
4568              * call in order to handle c15_cpar.
4569              */
4570             TCGv_ptr tmpptr;
4571             TCGv_i32 tcg_syn, tcg_isread;
4572             uint32_t syndrome;
4573
4574             /* Note that since we are an implementation which takes an
4575              * exception on a trapped conditional instruction only if the
4576              * instruction passes its condition code check, we can take
4577              * advantage of the clause in the ARM ARM that allows us to set
4578              * the COND field in the instruction to 0xE in all cases.
4579              * We could fish the actual condition out of the insn (ARM)
4580              * or the condexec bits (Thumb) but it isn't necessary.
4581              */
4582             switch (cpnum) {
4583             case 14:
4584                 if (is64) {
4585                     syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4586                                                  isread, false);
4587                 } else {
4588                     syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4589                                                 rt, isread, false);
4590                 }
4591                 break;
4592             case 15:
4593                 if (is64) {
4594                     syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4595                                                  isread, false);
4596                 } else {
4597                     syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4598                                                 rt, isread, false);
4599                 }
4600                 break;
4601             default:
4602                 /* ARMv8 defines that only coprocessors 14 and 15 exist,
4603                  * so this can only happen if this is an ARMv7 or earlier CPU,
4604                  * in which case the syndrome information won't actually be
4605                  * guest visible.
4606                  */
4607                 assert(!arm_dc_feature(s, ARM_FEATURE_V8));
4608                 syndrome = syn_uncategorized();
4609                 break;
4610             }
4611
4612             gen_set_condexec(s);
4613             gen_set_pc_im(s, s->pc_curr);
4614             tmpptr = tcg_const_ptr(ri);
4615             tcg_syn = tcg_const_i32(syndrome);
4616             tcg_isread = tcg_const_i32(isread);
4617             gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn,
4618                                            tcg_isread);
4619             tcg_temp_free_ptr(tmpptr);
4620             tcg_temp_free_i32(tcg_syn);
4621             tcg_temp_free_i32(tcg_isread);
4622         } else if (ri->type & ARM_CP_RAISES_EXC) {
4623             /*
4624              * The readfn or writefn might raise an exception;
4625              * synchronize the CPU state in case it does.
4626              */
4627             gen_set_condexec(s);
4628             gen_set_pc_im(s, s->pc_curr);
4629         }
4630
4631         /* Handle special cases first */
4632         switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
4633         case ARM_CP_NOP:
4634             return;
4635         case ARM_CP_WFI:
4636             if (isread) {
4637                 unallocated_encoding(s);
4638                 return;
4639             }
4640             gen_set_pc_im(s, s->base.pc_next);
4641             s->base.is_jmp = DISAS_WFI;
4642             return;
4643         default:
4644             break;
4645         }
4646
4647         if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
4648             gen_io_start();
4649         }
4650
4651         if (isread) {
4652             /* Read */
4653             if (is64) {
4654                 TCGv_i64 tmp64;
4655                 TCGv_i32 tmp;
4656                 if (ri->type & ARM_CP_CONST) {
4657                     tmp64 = tcg_const_i64(ri->resetvalue);
4658                 } else if (ri->readfn) {
4659                     TCGv_ptr tmpptr;
4660                     tmp64 = tcg_temp_new_i64();
4661                     tmpptr = tcg_const_ptr(ri);
4662                     gen_helper_get_cp_reg64(tmp64, cpu_env, tmpptr);
4663                     tcg_temp_free_ptr(tmpptr);
4664                 } else {
4665                     tmp64 = tcg_temp_new_i64();
4666                     tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
4667                 }
4668                 tmp = tcg_temp_new_i32();
4669                 tcg_gen_extrl_i64_i32(tmp, tmp64);
4670                 store_reg(s, rt, tmp);
4671                 tmp = tcg_temp_new_i32();
4672                 tcg_gen_extrh_i64_i32(tmp, tmp64);
4673                 tcg_temp_free_i64(tmp64);
4674                 store_reg(s, rt2, tmp);
4675             } else {
4676                 TCGv_i32 tmp;
4677                 if (ri->type & ARM_CP_CONST) {
4678                     tmp = tcg_const_i32(ri->resetvalue);
4679                 } else if (ri->readfn) {
4680                     TCGv_ptr tmpptr;
4681                     tmp = tcg_temp_new_i32();
4682                     tmpptr = tcg_const_ptr(ri);
4683                     gen_helper_get_cp_reg(tmp, cpu_env, tmpptr);
4684                     tcg_temp_free_ptr(tmpptr);
4685                 } else {
4686                     tmp = load_cpu_offset(ri->fieldoffset);
4687                 }
4688                 if (rt == 15) {
4689                     /* Destination register of r15 for 32 bit loads sets
4690                      * the condition codes from the high 4 bits of the value
4691                      */
4692                     gen_set_nzcv(tmp);
4693                     tcg_temp_free_i32(tmp);
4694                 } else {
4695                     store_reg(s, rt, tmp);
4696                 }
4697             }
4698         } else {
4699             /* Write */
4700             if (ri->type & ARM_CP_CONST) {
4701                 /* If not forbidden by access permissions, treat as WI */
4702                 return;
4703             }
4704
4705             if (is64) {
4706                 TCGv_i32 tmplo, tmphi;
4707                 TCGv_i64 tmp64 = tcg_temp_new_i64();
4708                 tmplo = load_reg(s, rt);
4709                 tmphi = load_reg(s, rt2);
4710                 tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
4711                 tcg_temp_free_i32(tmplo);
4712                 tcg_temp_free_i32(tmphi);
4713                 if (ri->writefn) {
4714                     TCGv_ptr tmpptr = tcg_const_ptr(ri);
4715                     gen_helper_set_cp_reg64(cpu_env, tmpptr, tmp64);
4716                     tcg_temp_free_ptr(tmpptr);
4717                 } else {
4718                     tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
4719                 }
4720                 tcg_temp_free_i64(tmp64);
4721             } else {
4722                 if (ri->writefn) {
4723                     TCGv_i32 tmp;
4724                     TCGv_ptr tmpptr;
4725                     tmp = load_reg(s, rt);
4726                     tmpptr = tcg_const_ptr(ri);
4727                     gen_helper_set_cp_reg(cpu_env, tmpptr, tmp);
4728                     tcg_temp_free_ptr(tmpptr);
4729                     tcg_temp_free_i32(tmp);
4730                 } else {
4731                     TCGv_i32 tmp = load_reg(s, rt);
4732                     store_cpu_offset(tmp, ri->fieldoffset);
4733                 }
4734             }
4735         }
4736
4737         /* I/O operations must end the TB here (whether read or write) */
4738         need_exit_tb = ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) &&
4739                         (ri->type & ARM_CP_IO));
4740
4741         if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
4742             /*
4743              * A write to any coprocessor register that ends a TB
4744              * must rebuild the hflags for the next TB.
4745              */
4746             TCGv_i32 tcg_el = tcg_const_i32(s->current_el);
4747             if (arm_dc_feature(s, ARM_FEATURE_M)) {
4748                 gen_helper_rebuild_hflags_m32(cpu_env, tcg_el);
4749             } else {
4750                 if (ri->type & ARM_CP_NEWEL) {
4751                     gen_helper_rebuild_hflags_a32_newel(cpu_env);
4752                 } else {
4753                     gen_helper_rebuild_hflags_a32(cpu_env, tcg_el);
4754                 }
4755             }
4756             tcg_temp_free_i32(tcg_el);
4757             /*
4758              * We default to ending the TB on a coprocessor register write,
4759              * but allow this to be suppressed by the register definition
4760              * (usually only necessary to work around guest bugs).
4761              */
4762             need_exit_tb = true;
4763         }
4764         if (need_exit_tb) {
4765             gen_lookup_tb(s);
4766         }
4767
4768         return;
4769     }
4770
4771     /* Unknown register; this might be a guest error or a QEMU
4772      * unimplemented feature.
4773      */
4774     if (is64) {
4775         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4776                       "64 bit system register cp:%d opc1: %d crm:%d "
4777                       "(%s)\n",
4778                       isread ? "read" : "write", cpnum, opc1, crm,
4779                       s->ns ? "non-secure" : "secure");
4780     } else {
4781         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4782                       "system register cp:%d opc1:%d crn:%d crm:%d opc2:%d "
4783                       "(%s)\n",
4784                       isread ? "read" : "write", cpnum, opc1, crn, crm, opc2,
4785                       s->ns ? "non-secure" : "secure");
4786     }
4787
4788     unallocated_encoding(s);
4789     return;
4790 }
4791
4792 /* Decode XScale DSP or iWMMXt insn (in the copro space, cp=0 or 1) */
4793 static void disas_xscale_insn(DisasContext *s, uint32_t insn)
4794 {
4795     int cpnum = (insn >> 8) & 0xf;
4796
4797     if (extract32(s->c15_cpar, cpnum, 1) == 0) {
4798         unallocated_encoding(s);
4799     } else if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
4800         if (disas_iwmmxt_insn(s, insn)) {
4801             unallocated_encoding(s);
4802         }
4803     } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
4804         if (disas_dsp_insn(s, insn)) {
4805             unallocated_encoding(s);
4806         }
4807     }
4808 }
4809
4810 /* Store a 64-bit value to a register pair.  Clobbers val.  */
4811 static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
4812 {
4813     TCGv_i32 tmp;
4814     tmp = tcg_temp_new_i32();
4815     tcg_gen_extrl_i64_i32(tmp, val);
4816     store_reg(s, rlow, tmp);
4817     tmp = tcg_temp_new_i32();
4818     tcg_gen_extrh_i64_i32(tmp, val);
4819     store_reg(s, rhigh, tmp);
4820 }
4821
4822 /* load and add a 64-bit value from a register pair.  */
4823 static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
4824 {
4825     TCGv_i64 tmp;
4826     TCGv_i32 tmpl;
4827     TCGv_i32 tmph;
4828
4829     /* Load 64-bit value rd:rn.  */
4830     tmpl = load_reg(s, rlow);
4831     tmph = load_reg(s, rhigh);
4832     tmp = tcg_temp_new_i64();
4833     tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
4834     tcg_temp_free_i32(tmpl);
4835     tcg_temp_free_i32(tmph);
4836     tcg_gen_add_i64(val, val, tmp);
4837     tcg_temp_free_i64(tmp);
4838 }
4839
4840 /* Set N and Z flags from hi|lo.  */
4841 static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
4842 {
4843     tcg_gen_mov_i32(cpu_NF, hi);
4844     tcg_gen_or_i32(cpu_ZF, lo, hi);
4845 }
4846
4847 /* Load/Store exclusive instructions are implemented by remembering
4848    the value/address loaded, and seeing if these are the same
4849    when the store is performed.  This should be sufficient to implement
4850    the architecturally mandated semantics, and avoids having to monitor
4851    regular stores.  The compare vs the remembered value is done during
4852    the cmpxchg operation, but we must compare the addresses manually.  */
4853 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
4854                                TCGv_i32 addr, int size)
4855 {
4856     TCGv_i32 tmp = tcg_temp_new_i32();
4857     MemOp opc = size | MO_ALIGN | s->be_data;
4858
4859     s->is_ldex = true;
4860
4861     if (size == 3) {
4862         TCGv_i32 tmp2 = tcg_temp_new_i32();
4863         TCGv_i64 t64 = tcg_temp_new_i64();
4864
4865         /* For AArch32, architecturally the 32-bit word at the lowest
4866          * address is always Rt and the one at addr+4 is Rt2, even if
4867          * the CPU is big-endian. That means we don't want to do a
4868          * gen_aa32_ld_i64(), which invokes gen_aa32_frob64() as if
4869          * for an architecturally 64-bit access, but instead do a
4870          * 64-bit access using MO_BE if appropriate and then split
4871          * the two halves.
4872          * This only makes a difference for BE32 user-mode, where
4873          * frob64() must not flip the two halves of the 64-bit data
4874          * but this code must treat BE32 user-mode like BE32 system.
4875          */
4876         TCGv taddr = gen_aa32_addr(s, addr, opc);
4877
4878         tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
4879         tcg_temp_free(taddr);
4880         tcg_gen_mov_i64(cpu_exclusive_val, t64);
4881         if (s->be_data == MO_BE) {
4882             tcg_gen_extr_i64_i32(tmp2, tmp, t64);
4883         } else {
4884             tcg_gen_extr_i64_i32(tmp, tmp2, t64);
4885         }
4886         tcg_temp_free_i64(t64);
4887
4888         store_reg(s, rt2, tmp2);
4889     } else {
4890         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
4891         tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
4892     }
4893
4894     store_reg(s, rt, tmp);
4895     tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
4896 }
4897
4898 static void gen_clrex(DisasContext *s)
4899 {
4900     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
4901 }
4902
4903 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
4904                                 TCGv_i32 addr, int size)
4905 {
4906     TCGv_i32 t0, t1, t2;
4907     TCGv_i64 extaddr;
4908     TCGv taddr;
4909     TCGLabel *done_label;
4910     TCGLabel *fail_label;
4911     MemOp opc = size | MO_ALIGN | s->be_data;
4912
4913     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
4914          [addr] = {Rt};
4915          {Rd} = 0;
4916        } else {
4917          {Rd} = 1;
4918        } */
4919     fail_label = gen_new_label();
4920     done_label = gen_new_label();
4921     extaddr = tcg_temp_new_i64();
4922     tcg_gen_extu_i32_i64(extaddr, addr);
4923     tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
4924     tcg_temp_free_i64(extaddr);
4925
4926     taddr = gen_aa32_addr(s, addr, opc);
4927     t0 = tcg_temp_new_i32();
4928     t1 = load_reg(s, rt);
4929     if (size == 3) {
4930         TCGv_i64 o64 = tcg_temp_new_i64();
4931         TCGv_i64 n64 = tcg_temp_new_i64();
4932
4933         t2 = load_reg(s, rt2);
4934         /* For AArch32, architecturally the 32-bit word at the lowest
4935          * address is always Rt and the one at addr+4 is Rt2, even if
4936          * the CPU is big-endian. Since we're going to treat this as a
4937          * single 64-bit BE store, we need to put the two halves in the
4938          * opposite order for BE to LE, so that they end up in the right
4939          * places.
4940          * We don't want gen_aa32_frob64() because that does the wrong
4941          * thing for BE32 usermode.
4942          */
4943         if (s->be_data == MO_BE) {
4944             tcg_gen_concat_i32_i64(n64, t2, t1);
4945         } else {
4946             tcg_gen_concat_i32_i64(n64, t1, t2);
4947         }
4948         tcg_temp_free_i32(t2);
4949
4950         tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
4951                                    get_mem_index(s), opc);
4952         tcg_temp_free_i64(n64);
4953
4954         tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
4955         tcg_gen_extrl_i64_i32(t0, o64);
4956
4957         tcg_temp_free_i64(o64);
4958     } else {
4959         t2 = tcg_temp_new_i32();
4960         tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
4961         tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
4962         tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
4963         tcg_temp_free_i32(t2);
4964     }
4965     tcg_temp_free_i32(t1);
4966     tcg_temp_free(taddr);
4967     tcg_gen_mov_i32(cpu_R[rd], t0);
4968     tcg_temp_free_i32(t0);
4969     tcg_gen_br(done_label);
4970
4971     gen_set_label(fail_label);
4972     tcg_gen_movi_i32(cpu_R[rd], 1);
4973     gen_set_label(done_label);
4974     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
4975 }
4976
4977 /* gen_srs:
4978  * @env: CPUARMState
4979  * @s: DisasContext
4980  * @mode: mode field from insn (which stack to store to)
4981  * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
4982  * @writeback: true if writeback bit set
4983  *
4984  * Generate code for the SRS (Store Return State) insn.
4985  */
4986 static void gen_srs(DisasContext *s,
4987                     uint32_t mode, uint32_t amode, bool writeback)
4988 {
4989     int32_t offset;
4990     TCGv_i32 addr, tmp;
4991     bool undef = false;
4992
4993     /* SRS is:
4994      * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
4995      *   and specified mode is monitor mode
4996      * - UNDEFINED in Hyp mode
4997      * - UNPREDICTABLE in User or System mode
4998      * - UNPREDICTABLE if the specified mode is:
4999      * -- not implemented
5000      * -- not a valid mode number
5001      * -- a mode that's at a higher exception level
5002      * -- Monitor, if we are Non-secure
5003      * For the UNPREDICTABLE cases we choose to UNDEF.
5004      */
5005     if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
5006         gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(), 3);
5007         return;
5008     }
5009
5010     if (s->current_el == 0 || s->current_el == 2) {
5011         undef = true;
5012     }
5013
5014     switch (mode) {
5015     case ARM_CPU_MODE_USR:
5016     case ARM_CPU_MODE_FIQ:
5017     case ARM_CPU_MODE_IRQ:
5018     case ARM_CPU_MODE_SVC:
5019     case ARM_CPU_MODE_ABT:
5020     case ARM_CPU_MODE_UND:
5021     case ARM_CPU_MODE_SYS:
5022         break;
5023     case ARM_CPU_MODE_HYP:
5024         if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
5025             undef = true;
5026         }
5027         break;
5028     case ARM_CPU_MODE_MON:
5029         /* No need to check specifically for "are we non-secure" because
5030          * we've already made EL0 UNDEF and handled the trap for S-EL1;
5031          * so if this isn't EL3 then we must be non-secure.
5032          */
5033         if (s->current_el != 3) {
5034             undef = true;
5035         }
5036         break;
5037     default:
5038         undef = true;
5039     }
5040
5041     if (undef) {
5042         unallocated_encoding(s);
5043         return;
5044     }
5045
5046     addr = tcg_temp_new_i32();
5047     tmp = tcg_const_i32(mode);
5048     /* get_r13_banked() will raise an exception if called from System mode */
5049     gen_set_condexec(s);
5050     gen_set_pc_im(s, s->pc_curr);
5051     gen_helper_get_r13_banked(addr, cpu_env, tmp);
5052     tcg_temp_free_i32(tmp);
5053     switch (amode) {
5054     case 0: /* DA */
5055         offset = -4;
5056         break;
5057     case 1: /* IA */
5058         offset = 0;
5059         break;
5060     case 2: /* DB */
5061         offset = -8;
5062         break;
5063     case 3: /* IB */
5064         offset = 4;
5065         break;
5066     default:
5067         abort();
5068     }
5069     tcg_gen_addi_i32(addr, addr, offset);
5070     tmp = load_reg(s, 14);
5071     gen_aa32_st32(s, tmp, addr, get_mem_index(s));
5072     tcg_temp_free_i32(tmp);
5073     tmp = load_cpu_field(spsr);
5074     tcg_gen_addi_i32(addr, addr, 4);
5075     gen_aa32_st32(s, tmp, addr, get_mem_index(s));
5076     tcg_temp_free_i32(tmp);
5077     if (writeback) {
5078         switch (amode) {
5079         case 0:
5080             offset = -8;
5081             break;
5082         case 1:
5083             offset = 4;
5084             break;
5085         case 2:
5086             offset = -4;
5087             break;
5088         case 3:
5089             offset = 0;
5090             break;
5091         default:
5092             abort();
5093         }
5094         tcg_gen_addi_i32(addr, addr, offset);
5095         tmp = tcg_const_i32(mode);
5096         gen_helper_set_r13_banked(cpu_env, tmp, addr);
5097         tcg_temp_free_i32(tmp);
5098     }
5099     tcg_temp_free_i32(addr);
5100     s->base.is_jmp = DISAS_UPDATE_EXIT;
5101 }
5102
5103 /* Generate a label used for skipping this instruction */
5104 static void arm_gen_condlabel(DisasContext *s)
5105 {
5106     if (!s->condjmp) {
5107         s->condlabel = gen_new_label();
5108         s->condjmp = 1;
5109     }
5110 }
5111
5112 /* Skip this instruction if the ARM condition is false */
5113 static void arm_skip_unless(DisasContext *s, uint32_t cond)
5114 {
5115     arm_gen_condlabel(s);
5116     arm_gen_test_cc(cond ^ 1, s->condlabel);
5117 }
5118
5119
5120 /*
5121  * Constant expanders for the decoders.
5122  */
5123
5124 static int negate(DisasContext *s, int x)
5125 {
5126     return -x;
5127 }
5128
5129 static int plus_2(DisasContext *s, int x)
5130 {
5131     return x + 2;
5132 }
5133
5134 static int times_2(DisasContext *s, int x)
5135 {
5136     return x * 2;
5137 }
5138
5139 static int times_4(DisasContext *s, int x)
5140 {
5141     return x * 4;
5142 }
5143
5144 /* Return only the rotation part of T32ExpandImm.  */
5145 static int t32_expandimm_rot(DisasContext *s, int x)
5146 {
5147     return x & 0xc00 ? extract32(x, 7, 5) : 0;
5148 }
5149
5150 /* Return the unrotated immediate from T32ExpandImm.  */
5151 static int t32_expandimm_imm(DisasContext *s, int x)
5152 {
5153     int imm = extract32(x, 0, 8);
5154
5155     switch (extract32(x, 8, 4)) {
5156     case 0: /* XY */
5157         /* Nothing to do.  */
5158         break;
5159     case 1: /* 00XY00XY */
5160         imm *= 0x00010001;
5161         break;
5162     case 2: /* XY00XY00 */
5163         imm *= 0x01000100;
5164         break;
5165     case 3: /* XYXYXYXY */
5166         imm *= 0x01010101;
5167         break;
5168     default:
5169         /* Rotated constant.  */
5170         imm |= 0x80;
5171         break;
5172     }
5173     return imm;
5174 }
5175
5176 static int t32_branch24(DisasContext *s, int x)
5177 {
5178     /* Convert J1:J2 at x[22:21] to I2:I1, which involves I=J^~S.  */
5179     x ^= !(x < 0) * (3 << 21);
5180     /* Append the final zero.  */
5181     return x << 1;
5182 }
5183
5184 static int t16_setflags(DisasContext *s)
5185 {
5186     return s->condexec_mask == 0;
5187 }
5188
5189 static int t16_push_list(DisasContext *s, int x)
5190 {
5191     return (x & 0xff) | (x & 0x100) << (14 - 8);
5192 }
5193
5194 static int t16_pop_list(DisasContext *s, int x)
5195 {
5196     return (x & 0xff) | (x & 0x100) << (15 - 8);
5197 }
5198
5199 /*
5200  * Include the generated decoders.
5201  */
5202
5203 #include "decode-a32.c.inc"
5204 #include "decode-a32-uncond.c.inc"
5205 #include "decode-t32.c.inc"
5206 #include "decode-t16.c.inc"
5207
5208 static bool valid_cp(DisasContext *s, int cp)
5209 {
5210     /*
5211      * Return true if this coprocessor field indicates something
5212      * that's really a possible coprocessor.
5213      * For v7 and earlier, coprocessors 8..15 were reserved for Arm use,
5214      * and of those only cp14 and cp15 were used for registers.
5215      * cp10 and cp11 were used for VFP and Neon, whose decode is
5216      * dealt with elsewhere. With the advent of fp16, cp9 is also
5217      * now part of VFP.
5218      * For v8A and later, the encoding has been tightened so that
5219      * only cp14 and cp15 are valid, and other values aren't considered
5220      * to be in the coprocessor-instruction space at all. v8M still
5221      * permits coprocessors 0..7.
5222      */
5223     if (arm_dc_feature(s, ARM_FEATURE_V8) &&
5224         !arm_dc_feature(s, ARM_FEATURE_M)) {
5225         return cp >= 14;
5226     }
5227     return cp < 8 || cp >= 14;
5228 }
5229
5230 static bool trans_MCR(DisasContext *s, arg_MCR *a)
5231 {
5232     if (!valid_cp(s, a->cp)) {
5233         return false;
5234     }
5235     do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5236                    false, a->rt, 0);
5237     return true;
5238 }
5239
5240 static bool trans_MRC(DisasContext *s, arg_MRC *a)
5241 {
5242     if (!valid_cp(s, a->cp)) {
5243         return false;
5244     }
5245     do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5246                    true, a->rt, 0);
5247     return true;
5248 }
5249
5250 static bool trans_MCRR(DisasContext *s, arg_MCRR *a)
5251 {
5252     if (!valid_cp(s, a->cp)) {
5253         return false;
5254     }
5255     do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5256                    false, a->rt, a->rt2);
5257     return true;
5258 }
5259
5260 static bool trans_MRRC(DisasContext *s, arg_MRRC *a)
5261 {
5262     if (!valid_cp(s, a->cp)) {
5263         return false;
5264     }
5265     do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5266                    true, a->rt, a->rt2);
5267     return true;
5268 }
5269
5270 /* Helpers to swap operands for reverse-subtract.  */
5271 static void gen_rsb(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5272 {
5273     tcg_gen_sub_i32(dst, b, a);
5274 }
5275
5276 static void gen_rsb_CC(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5277 {
5278     gen_sub_CC(dst, b, a);
5279 }
5280
5281 static void gen_rsc(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5282 {
5283     gen_sub_carry(dest, b, a);
5284 }
5285
5286 static void gen_rsc_CC(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5287 {
5288     gen_sbc_CC(dest, b, a);
5289 }
5290
5291 /*
5292  * Helpers for the data processing routines.
5293  *
5294  * After the computation store the results back.
5295  * This may be suppressed altogether (STREG_NONE), require a runtime
5296  * check against the stack limits (STREG_SP_CHECK), or generate an
5297  * exception return.  Oh, or store into a register.
5298  *
5299  * Always return true, indicating success for a trans_* function.
5300  */
5301 typedef enum {
5302    STREG_NONE,
5303    STREG_NORMAL,
5304    STREG_SP_CHECK,
5305    STREG_EXC_RET,
5306 } StoreRegKind;
5307
5308 static bool store_reg_kind(DisasContext *s, int rd,
5309                             TCGv_i32 val, StoreRegKind kind)
5310 {
5311     switch (kind) {
5312     case STREG_NONE:
5313         tcg_temp_free_i32(val);
5314         return true;
5315     case STREG_NORMAL:
5316         /* See ALUWritePC: Interworking only from a32 mode. */
5317         if (s->thumb) {
5318             store_reg(s, rd, val);
5319         } else {
5320             store_reg_bx(s, rd, val);
5321         }
5322         return true;
5323     case STREG_SP_CHECK:
5324         store_sp_checked(s, val);
5325         return true;
5326     case STREG_EXC_RET:
5327         gen_exception_return(s, val);
5328         return true;
5329     }
5330     g_assert_not_reached();
5331 }
5332
5333 /*
5334  * Data Processing (register)
5335  *
5336  * Operate, with set flags, one register source,
5337  * one immediate shifted register source, and a destination.
5338  */
5339 static bool op_s_rrr_shi(DisasContext *s, arg_s_rrr_shi *a,
5340                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5341                          int logic_cc, StoreRegKind kind)
5342 {
5343     TCGv_i32 tmp1, tmp2;
5344
5345     tmp2 = load_reg(s, a->rm);
5346     gen_arm_shift_im(tmp2, a->shty, a->shim, logic_cc);
5347     tmp1 = load_reg(s, a->rn);
5348
5349     gen(tmp1, tmp1, tmp2);
5350     tcg_temp_free_i32(tmp2);
5351
5352     if (logic_cc) {
5353         gen_logic_CC(tmp1);
5354     }
5355     return store_reg_kind(s, a->rd, tmp1, kind);
5356 }
5357
5358 static bool op_s_rxr_shi(DisasContext *s, arg_s_rrr_shi *a,
5359                          void (*gen)(TCGv_i32, TCGv_i32),
5360                          int logic_cc, StoreRegKind kind)
5361 {
5362     TCGv_i32 tmp;
5363
5364     tmp = load_reg(s, a->rm);
5365     gen_arm_shift_im(tmp, a->shty, a->shim, logic_cc);
5366
5367     gen(tmp, tmp);
5368     if (logic_cc) {
5369         gen_logic_CC(tmp);
5370     }
5371     return store_reg_kind(s, a->rd, tmp, kind);
5372 }
5373
5374 /*
5375  * Data-processing (register-shifted register)
5376  *
5377  * Operate, with set flags, one register source,
5378  * one register shifted register source, and a destination.
5379  */
5380 static bool op_s_rrr_shr(DisasContext *s, arg_s_rrr_shr *a,
5381                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5382                          int logic_cc, StoreRegKind kind)
5383 {
5384     TCGv_i32 tmp1, tmp2;
5385
5386     tmp1 = load_reg(s, a->rs);
5387     tmp2 = load_reg(s, a->rm);
5388     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5389     tmp1 = load_reg(s, a->rn);
5390
5391     gen(tmp1, tmp1, tmp2);
5392     tcg_temp_free_i32(tmp2);
5393
5394     if (logic_cc) {
5395         gen_logic_CC(tmp1);
5396     }
5397     return store_reg_kind(s, a->rd, tmp1, kind);
5398 }
5399
5400 static bool op_s_rxr_shr(DisasContext *s, arg_s_rrr_shr *a,
5401                          void (*gen)(TCGv_i32, TCGv_i32),
5402                          int logic_cc, StoreRegKind kind)
5403 {
5404     TCGv_i32 tmp1, tmp2;
5405
5406     tmp1 = load_reg(s, a->rs);
5407     tmp2 = load_reg(s, a->rm);
5408     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5409
5410     gen(tmp2, tmp2);
5411     if (logic_cc) {
5412         gen_logic_CC(tmp2);
5413     }
5414     return store_reg_kind(s, a->rd, tmp2, kind);
5415 }
5416
5417 /*
5418  * Data-processing (immediate)
5419  *
5420  * Operate, with set flags, one register source,
5421  * one rotated immediate, and a destination.
5422  *
5423  * Note that logic_cc && a->rot setting CF based on the msb of the
5424  * immediate is the reason why we must pass in the unrotated form
5425  * of the immediate.
5426  */
5427 static bool op_s_rri_rot(DisasContext *s, arg_s_rri_rot *a,
5428                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5429                          int logic_cc, StoreRegKind kind)
5430 {
5431     TCGv_i32 tmp1, tmp2;
5432     uint32_t imm;
5433
5434     imm = ror32(a->imm, a->rot);
5435     if (logic_cc && a->rot) {
5436         tcg_gen_movi_i32(cpu_CF, imm >> 31);
5437     }
5438     tmp2 = tcg_const_i32(imm);
5439     tmp1 = load_reg(s, a->rn);
5440
5441     gen(tmp1, tmp1, tmp2);
5442     tcg_temp_free_i32(tmp2);
5443
5444     if (logic_cc) {
5445         gen_logic_CC(tmp1);
5446     }
5447     return store_reg_kind(s, a->rd, tmp1, kind);
5448 }
5449
5450 static bool op_s_rxi_rot(DisasContext *s, arg_s_rri_rot *a,
5451                          void (*gen)(TCGv_i32, TCGv_i32),
5452                          int logic_cc, StoreRegKind kind)
5453 {
5454     TCGv_i32 tmp;
5455     uint32_t imm;
5456
5457     imm = ror32(a->imm, a->rot);
5458     if (logic_cc && a->rot) {
5459         tcg_gen_movi_i32(cpu_CF, imm >> 31);
5460     }
5461     tmp = tcg_const_i32(imm);
5462
5463     gen(tmp, tmp);
5464     if (logic_cc) {
5465         gen_logic_CC(tmp);
5466     }
5467     return store_reg_kind(s, a->rd, tmp, kind);
5468 }
5469
5470 #define DO_ANY3(NAME, OP, L, K)                                         \
5471     static bool trans_##NAME##_rrri(DisasContext *s, arg_s_rrr_shi *a)  \
5472     { StoreRegKind k = (K); return op_s_rrr_shi(s, a, OP, L, k); }      \
5473     static bool trans_##NAME##_rrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5474     { StoreRegKind k = (K); return op_s_rrr_shr(s, a, OP, L, k); }      \
5475     static bool trans_##NAME##_rri(DisasContext *s, arg_s_rri_rot *a)   \
5476     { StoreRegKind k = (K); return op_s_rri_rot(s, a, OP, L, k); }
5477
5478 #define DO_ANY2(NAME, OP, L, K)                                         \
5479     static bool trans_##NAME##_rxri(DisasContext *s, arg_s_rrr_shi *a)  \
5480     { StoreRegKind k = (K); return op_s_rxr_shi(s, a, OP, L, k); }      \
5481     static bool trans_##NAME##_rxrr(DisasContext *s, arg_s_rrr_shr *a)  \
5482     { StoreRegKind k = (K); return op_s_rxr_shr(s, a, OP, L, k); }      \
5483     static bool trans_##NAME##_rxi(DisasContext *s, arg_s_rri_rot *a)   \
5484     { StoreRegKind k = (K); return op_s_rxi_rot(s, a, OP, L, k); }
5485
5486 #define DO_CMP2(NAME, OP, L)                                            \
5487     static bool trans_##NAME##_xrri(DisasContext *s, arg_s_rrr_shi *a)  \
5488     { return op_s_rrr_shi(s, a, OP, L, STREG_NONE); }                   \
5489     static bool trans_##NAME##_xrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5490     { return op_s_rrr_shr(s, a, OP, L, STREG_NONE); }                   \
5491     static bool trans_##NAME##_xri(DisasContext *s, arg_s_rri_rot *a)   \
5492     { return op_s_rri_rot(s, a, OP, L, STREG_NONE); }
5493
5494 DO_ANY3(AND, tcg_gen_and_i32, a->s, STREG_NORMAL)
5495 DO_ANY3(EOR, tcg_gen_xor_i32, a->s, STREG_NORMAL)
5496 DO_ANY3(ORR, tcg_gen_or_i32, a->s, STREG_NORMAL)
5497 DO_ANY3(BIC, tcg_gen_andc_i32, a->s, STREG_NORMAL)
5498
5499 DO_ANY3(RSB, a->s ? gen_rsb_CC : gen_rsb, false, STREG_NORMAL)
5500 DO_ANY3(ADC, a->s ? gen_adc_CC : gen_add_carry, false, STREG_NORMAL)
5501 DO_ANY3(SBC, a->s ? gen_sbc_CC : gen_sub_carry, false, STREG_NORMAL)
5502 DO_ANY3(RSC, a->s ? gen_rsc_CC : gen_rsc, false, STREG_NORMAL)
5503
5504 DO_CMP2(TST, tcg_gen_and_i32, true)
5505 DO_CMP2(TEQ, tcg_gen_xor_i32, true)
5506 DO_CMP2(CMN, gen_add_CC, false)
5507 DO_CMP2(CMP, gen_sub_CC, false)
5508
5509 DO_ANY3(ADD, a->s ? gen_add_CC : tcg_gen_add_i32, false,
5510         a->rd == 13 && a->rn == 13 ? STREG_SP_CHECK : STREG_NORMAL)
5511
5512 /*
5513  * Note for the computation of StoreRegKind we return out of the
5514  * middle of the functions that are expanded by DO_ANY3, and that
5515  * we modify a->s via that parameter before it is used by OP.
5516  */
5517 DO_ANY3(SUB, a->s ? gen_sub_CC : tcg_gen_sub_i32, false,
5518         ({
5519             StoreRegKind ret = STREG_NORMAL;
5520             if (a->rd == 15 && a->s) {
5521                 /*
5522                  * See ALUExceptionReturn:
5523                  * In User mode, UNPREDICTABLE; we choose UNDEF.
5524                  * In Hyp mode, UNDEFINED.
5525                  */
5526                 if (IS_USER(s) || s->current_el == 2) {
5527                     unallocated_encoding(s);
5528                     return true;
5529                 }
5530                 /* There is no writeback of nzcv to PSTATE.  */
5531                 a->s = 0;
5532                 ret = STREG_EXC_RET;
5533             } else if (a->rd == 13 && a->rn == 13) {
5534                 ret = STREG_SP_CHECK;
5535             }
5536             ret;
5537         }))
5538
5539 DO_ANY2(MOV, tcg_gen_mov_i32, a->s,
5540         ({
5541             StoreRegKind ret = STREG_NORMAL;
5542             if (a->rd == 15 && a->s) {
5543                 /*
5544                  * See ALUExceptionReturn:
5545                  * In User mode, UNPREDICTABLE; we choose UNDEF.
5546                  * In Hyp mode, UNDEFINED.
5547                  */
5548                 if (IS_USER(s) || s->current_el == 2) {
5549                     unallocated_encoding(s);
5550                     return true;
5551                 }
5552                 /* There is no writeback of nzcv to PSTATE.  */
5553                 a->s = 0;
5554                 ret = STREG_EXC_RET;
5555             } else if (a->rd == 13) {
5556                 ret = STREG_SP_CHECK;
5557             }
5558             ret;
5559         }))
5560
5561 DO_ANY2(MVN, tcg_gen_not_i32, a->s, STREG_NORMAL)
5562
5563 /*
5564  * ORN is only available with T32, so there is no register-shifted-register
5565  * form of the insn.  Using the DO_ANY3 macro would create an unused function.
5566  */
5567 static bool trans_ORN_rrri(DisasContext *s, arg_s_rrr_shi *a)
5568 {
5569     return op_s_rrr_shi(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5570 }
5571
5572 static bool trans_ORN_rri(DisasContext *s, arg_s_rri_rot *a)
5573 {
5574     return op_s_rri_rot(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5575 }
5576
5577 #undef DO_ANY3
5578 #undef DO_ANY2
5579 #undef DO_CMP2
5580
5581 static bool trans_ADR(DisasContext *s, arg_ri *a)
5582 {
5583     store_reg_bx(s, a->rd, add_reg_for_lit(s, 15, a->imm));
5584     return true;
5585 }
5586
5587 static bool trans_MOVW(DisasContext *s, arg_MOVW *a)
5588 {
5589     TCGv_i32 tmp;
5590
5591     if (!ENABLE_ARCH_6T2) {
5592         return false;
5593     }
5594
5595     tmp = tcg_const_i32(a->imm);
5596     store_reg(s, a->rd, tmp);
5597     return true;
5598 }
5599
5600 static bool trans_MOVT(DisasContext *s, arg_MOVW *a)
5601 {
5602     TCGv_i32 tmp;
5603
5604     if (!ENABLE_ARCH_6T2) {
5605         return false;
5606     }
5607
5608     tmp = load_reg(s, a->rd);
5609     tcg_gen_ext16u_i32(tmp, tmp);
5610     tcg_gen_ori_i32(tmp, tmp, a->imm << 16);
5611     store_reg(s, a->rd, tmp);
5612     return true;
5613 }
5614
5615 /*
5616  * Multiply and multiply accumulate
5617  */
5618
5619 static bool op_mla(DisasContext *s, arg_s_rrrr *a, bool add)
5620 {
5621     TCGv_i32 t1, t2;
5622
5623     t1 = load_reg(s, a->rn);
5624     t2 = load_reg(s, a->rm);
5625     tcg_gen_mul_i32(t1, t1, t2);
5626     tcg_temp_free_i32(t2);
5627     if (add) {
5628         t2 = load_reg(s, a->ra);
5629         tcg_gen_add_i32(t1, t1, t2);
5630         tcg_temp_free_i32(t2);
5631     }
5632     if (a->s) {
5633         gen_logic_CC(t1);
5634     }
5635     store_reg(s, a->rd, t1);
5636     return true;
5637 }
5638
5639 static bool trans_MUL(DisasContext *s, arg_MUL *a)
5640 {
5641     return op_mla(s, a, false);
5642 }
5643
5644 static bool trans_MLA(DisasContext *s, arg_MLA *a)
5645 {
5646     return op_mla(s, a, true);
5647 }
5648
5649 static bool trans_MLS(DisasContext *s, arg_MLS *a)
5650 {
5651     TCGv_i32 t1, t2;
5652
5653     if (!ENABLE_ARCH_6T2) {
5654         return false;
5655     }
5656     t1 = load_reg(s, a->rn);
5657     t2 = load_reg(s, a->rm);
5658     tcg_gen_mul_i32(t1, t1, t2);
5659     tcg_temp_free_i32(t2);
5660     t2 = load_reg(s, a->ra);
5661     tcg_gen_sub_i32(t1, t2, t1);
5662     tcg_temp_free_i32(t2);
5663     store_reg(s, a->rd, t1);
5664     return true;
5665 }
5666
5667 static bool op_mlal(DisasContext *s, arg_s_rrrr *a, bool uns, bool add)
5668 {
5669     TCGv_i32 t0, t1, t2, t3;
5670
5671     t0 = load_reg(s, a->rm);
5672     t1 = load_reg(s, a->rn);
5673     if (uns) {
5674         tcg_gen_mulu2_i32(t0, t1, t0, t1);
5675     } else {
5676         tcg_gen_muls2_i32(t0, t1, t0, t1);
5677     }
5678     if (add) {
5679         t2 = load_reg(s, a->ra);
5680         t3 = load_reg(s, a->rd);
5681         tcg_gen_add2_i32(t0, t1, t0, t1, t2, t3);
5682         tcg_temp_free_i32(t2);
5683         tcg_temp_free_i32(t3);
5684     }
5685     if (a->s) {
5686         gen_logicq_cc(t0, t1);
5687     }
5688     store_reg(s, a->ra, t0);
5689     store_reg(s, a->rd, t1);
5690     return true;
5691 }
5692
5693 static bool trans_UMULL(DisasContext *s, arg_UMULL *a)
5694 {
5695     return op_mlal(s, a, true, false);
5696 }
5697
5698 static bool trans_SMULL(DisasContext *s, arg_SMULL *a)
5699 {
5700     return op_mlal(s, a, false, false);
5701 }
5702
5703 static bool trans_UMLAL(DisasContext *s, arg_UMLAL *a)
5704 {
5705     return op_mlal(s, a, true, true);
5706 }
5707
5708 static bool trans_SMLAL(DisasContext *s, arg_SMLAL *a)
5709 {
5710     return op_mlal(s, a, false, true);
5711 }
5712
5713 static bool trans_UMAAL(DisasContext *s, arg_UMAAL *a)
5714 {
5715     TCGv_i32 t0, t1, t2, zero;
5716
5717     if (s->thumb
5718         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5719         : !ENABLE_ARCH_6) {
5720         return false;
5721     }
5722
5723     t0 = load_reg(s, a->rm);
5724     t1 = load_reg(s, a->rn);
5725     tcg_gen_mulu2_i32(t0, t1, t0, t1);
5726     zero = tcg_const_i32(0);
5727     t2 = load_reg(s, a->ra);
5728     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
5729     tcg_temp_free_i32(t2);
5730     t2 = load_reg(s, a->rd);
5731     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
5732     tcg_temp_free_i32(t2);
5733     tcg_temp_free_i32(zero);
5734     store_reg(s, a->ra, t0);
5735     store_reg(s, a->rd, t1);
5736     return true;
5737 }
5738
5739 /*
5740  * Saturating addition and subtraction
5741  */
5742
5743 static bool op_qaddsub(DisasContext *s, arg_rrr *a, bool add, bool doub)
5744 {
5745     TCGv_i32 t0, t1;
5746
5747     if (s->thumb
5748         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5749         : !ENABLE_ARCH_5TE) {
5750         return false;
5751     }
5752
5753     t0 = load_reg(s, a->rm);
5754     t1 = load_reg(s, a->rn);
5755     if (doub) {
5756         gen_helper_add_saturate(t1, cpu_env, t1, t1);
5757     }
5758     if (add) {
5759         gen_helper_add_saturate(t0, cpu_env, t0, t1);
5760     } else {
5761         gen_helper_sub_saturate(t0, cpu_env, t0, t1);
5762     }
5763     tcg_temp_free_i32(t1);
5764     store_reg(s, a->rd, t0);
5765     return true;
5766 }
5767
5768 #define DO_QADDSUB(NAME, ADD, DOUB) \
5769 static bool trans_##NAME(DisasContext *s, arg_rrr *a)    \
5770 {                                                        \
5771     return op_qaddsub(s, a, ADD, DOUB);                  \
5772 }
5773
5774 DO_QADDSUB(QADD, true, false)
5775 DO_QADDSUB(QSUB, false, false)
5776 DO_QADDSUB(QDADD, true, true)
5777 DO_QADDSUB(QDSUB, false, true)
5778
5779 #undef DO_QADDSUB
5780
5781 /*
5782  * Halfword multiply and multiply accumulate
5783  */
5784
5785 static bool op_smlaxxx(DisasContext *s, arg_rrrr *a,
5786                        int add_long, bool nt, bool mt)
5787 {
5788     TCGv_i32 t0, t1, tl, th;
5789
5790     if (s->thumb
5791         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5792         : !ENABLE_ARCH_5TE) {
5793         return false;
5794     }
5795
5796     t0 = load_reg(s, a->rn);
5797     t1 = load_reg(s, a->rm);
5798     gen_mulxy(t0, t1, nt, mt);
5799     tcg_temp_free_i32(t1);
5800
5801     switch (add_long) {
5802     case 0:
5803         store_reg(s, a->rd, t0);
5804         break;
5805     case 1:
5806         t1 = load_reg(s, a->ra);
5807         gen_helper_add_setq(t0, cpu_env, t0, t1);
5808         tcg_temp_free_i32(t1);
5809         store_reg(s, a->rd, t0);
5810         break;
5811     case 2:
5812         tl = load_reg(s, a->ra);
5813         th = load_reg(s, a->rd);
5814         /* Sign-extend the 32-bit product to 64 bits.  */
5815         t1 = tcg_temp_new_i32();
5816         tcg_gen_sari_i32(t1, t0, 31);
5817         tcg_gen_add2_i32(tl, th, tl, th, t0, t1);
5818         tcg_temp_free_i32(t0);
5819         tcg_temp_free_i32(t1);
5820         store_reg(s, a->ra, tl);
5821         store_reg(s, a->rd, th);
5822         break;
5823     default:
5824         g_assert_not_reached();
5825     }
5826     return true;
5827 }
5828
5829 #define DO_SMLAX(NAME, add, nt, mt) \
5830 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
5831 {                                                          \
5832     return op_smlaxxx(s, a, add, nt, mt);                  \
5833 }
5834
5835 DO_SMLAX(SMULBB, 0, 0, 0)
5836 DO_SMLAX(SMULBT, 0, 0, 1)
5837 DO_SMLAX(SMULTB, 0, 1, 0)
5838 DO_SMLAX(SMULTT, 0, 1, 1)
5839
5840 DO_SMLAX(SMLABB, 1, 0, 0)
5841 DO_SMLAX(SMLABT, 1, 0, 1)
5842 DO_SMLAX(SMLATB, 1, 1, 0)
5843 DO_SMLAX(SMLATT, 1, 1, 1)
5844
5845 DO_SMLAX(SMLALBB, 2, 0, 0)
5846 DO_SMLAX(SMLALBT, 2, 0, 1)
5847 DO_SMLAX(SMLALTB, 2, 1, 0)
5848 DO_SMLAX(SMLALTT, 2, 1, 1)
5849
5850 #undef DO_SMLAX
5851
5852 static bool op_smlawx(DisasContext *s, arg_rrrr *a, bool add, bool mt)
5853 {
5854     TCGv_i32 t0, t1;
5855
5856     if (!ENABLE_ARCH_5TE) {
5857         return false;
5858     }
5859
5860     t0 = load_reg(s, a->rn);
5861     t1 = load_reg(s, a->rm);
5862     /*
5863      * Since the nominal result is product<47:16>, shift the 16-bit
5864      * input up by 16 bits, so that the result is at product<63:32>.
5865      */
5866     if (mt) {
5867         tcg_gen_andi_i32(t1, t1, 0xffff0000);
5868     } else {
5869         tcg_gen_shli_i32(t1, t1, 16);
5870     }
5871     tcg_gen_muls2_i32(t0, t1, t0, t1);
5872     tcg_temp_free_i32(t0);
5873     if (add) {
5874         t0 = load_reg(s, a->ra);
5875         gen_helper_add_setq(t1, cpu_env, t1, t0);
5876         tcg_temp_free_i32(t0);
5877     }
5878     store_reg(s, a->rd, t1);
5879     return true;
5880 }
5881
5882 #define DO_SMLAWX(NAME, add, mt) \
5883 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
5884 {                                                          \
5885     return op_smlawx(s, a, add, mt);                       \
5886 }
5887
5888 DO_SMLAWX(SMULWB, 0, 0)
5889 DO_SMLAWX(SMULWT, 0, 1)
5890 DO_SMLAWX(SMLAWB, 1, 0)
5891 DO_SMLAWX(SMLAWT, 1, 1)
5892
5893 #undef DO_SMLAWX
5894
5895 /*
5896  * MSR (immediate) and hints
5897  */
5898
5899 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
5900 {
5901     /*
5902      * When running single-threaded TCG code, use the helper to ensure that
5903      * the next round-robin scheduled vCPU gets a crack.  When running in
5904      * MTTCG we don't generate jumps to the helper as it won't affect the
5905      * scheduling of other vCPUs.
5906      */
5907     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
5908         gen_set_pc_im(s, s->base.pc_next);
5909         s->base.is_jmp = DISAS_YIELD;
5910     }
5911     return true;
5912 }
5913
5914 static bool trans_WFE(DisasContext *s, arg_WFE *a)
5915 {
5916     /*
5917      * When running single-threaded TCG code, use the helper to ensure that
5918      * the next round-robin scheduled vCPU gets a crack.  In MTTCG mode we
5919      * just skip this instruction.  Currently the SEV/SEVL instructions,
5920      * which are *one* of many ways to wake the CPU from WFE, are not
5921      * implemented so we can't sleep like WFI does.
5922      */
5923     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
5924         gen_set_pc_im(s, s->base.pc_next);
5925         s->base.is_jmp = DISAS_WFE;
5926     }
5927     return true;
5928 }
5929
5930 static bool trans_WFI(DisasContext *s, arg_WFI *a)
5931 {
5932     /* For WFI, halt the vCPU until an IRQ. */
5933     gen_set_pc_im(s, s->base.pc_next);
5934     s->base.is_jmp = DISAS_WFI;
5935     return true;
5936 }
5937
5938 static bool trans_NOP(DisasContext *s, arg_NOP *a)
5939 {
5940     return true;
5941 }
5942
5943 static bool trans_MSR_imm(DisasContext *s, arg_MSR_imm *a)
5944 {
5945     uint32_t val = ror32(a->imm, a->rot * 2);
5946     uint32_t mask = msr_mask(s, a->mask, a->r);
5947
5948     if (gen_set_psr_im(s, mask, a->r, val)) {
5949         unallocated_encoding(s);
5950     }
5951     return true;
5952 }
5953
5954 /*
5955  * Cyclic Redundancy Check
5956  */
5957
5958 static bool op_crc32(DisasContext *s, arg_rrr *a, bool c, MemOp sz)
5959 {
5960     TCGv_i32 t1, t2, t3;
5961
5962     if (!dc_isar_feature(aa32_crc32, s)) {
5963         return false;
5964     }
5965
5966     t1 = load_reg(s, a->rn);
5967     t2 = load_reg(s, a->rm);
5968     switch (sz) {
5969     case MO_8:
5970         gen_uxtb(t2);
5971         break;
5972     case MO_16:
5973         gen_uxth(t2);
5974         break;
5975     case MO_32:
5976         break;
5977     default:
5978         g_assert_not_reached();
5979     }
5980     t3 = tcg_const_i32(1 << sz);
5981     if (c) {
5982         gen_helper_crc32c(t1, t1, t2, t3);
5983     } else {
5984         gen_helper_crc32(t1, t1, t2, t3);
5985     }
5986     tcg_temp_free_i32(t2);
5987     tcg_temp_free_i32(t3);
5988     store_reg(s, a->rd, t1);
5989     return true;
5990 }
5991
5992 #define DO_CRC32(NAME, c, sz) \
5993 static bool trans_##NAME(DisasContext *s, arg_rrr *a)  \
5994     { return op_crc32(s, a, c, sz); }
5995
5996 DO_CRC32(CRC32B, false, MO_8)
5997 DO_CRC32(CRC32H, false, MO_16)
5998 DO_CRC32(CRC32W, false, MO_32)
5999 DO_CRC32(CRC32CB, true, MO_8)
6000 DO_CRC32(CRC32CH, true, MO_16)
6001 DO_CRC32(CRC32CW, true, MO_32)
6002
6003 #undef DO_CRC32
6004
6005 /*
6006  * Miscellaneous instructions
6007  */
6008
6009 static bool trans_MRS_bank(DisasContext *s, arg_MRS_bank *a)
6010 {
6011     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6012         return false;
6013     }
6014     gen_mrs_banked(s, a->r, a->sysm, a->rd);
6015     return true;
6016 }
6017
6018 static bool trans_MSR_bank(DisasContext *s, arg_MSR_bank *a)
6019 {
6020     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6021         return false;
6022     }
6023     gen_msr_banked(s, a->r, a->sysm, a->rn);
6024     return true;
6025 }
6026
6027 static bool trans_MRS_reg(DisasContext *s, arg_MRS_reg *a)
6028 {
6029     TCGv_i32 tmp;
6030
6031     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6032         return false;
6033     }
6034     if (a->r) {
6035         if (IS_USER(s)) {
6036             unallocated_encoding(s);
6037             return true;
6038         }
6039         tmp = load_cpu_field(spsr);
6040     } else {
6041         tmp = tcg_temp_new_i32();
6042         gen_helper_cpsr_read(tmp, cpu_env);
6043     }
6044     store_reg(s, a->rd, tmp);
6045     return true;
6046 }
6047
6048 static bool trans_MSR_reg(DisasContext *s, arg_MSR_reg *a)
6049 {
6050     TCGv_i32 tmp;
6051     uint32_t mask = msr_mask(s, a->mask, a->r);
6052
6053     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6054         return false;
6055     }
6056     tmp = load_reg(s, a->rn);
6057     if (gen_set_psr(s, mask, a->r, tmp)) {
6058         unallocated_encoding(s);
6059     }
6060     return true;
6061 }
6062
6063 static bool trans_MRS_v7m(DisasContext *s, arg_MRS_v7m *a)
6064 {
6065     TCGv_i32 tmp;
6066
6067     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6068         return false;
6069     }
6070     tmp = tcg_const_i32(a->sysm);
6071     gen_helper_v7m_mrs(tmp, cpu_env, tmp);
6072     store_reg(s, a->rd, tmp);
6073     return true;
6074 }
6075
6076 static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
6077 {
6078     TCGv_i32 addr, reg;
6079
6080     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6081         return false;
6082     }
6083     addr = tcg_const_i32((a->mask << 10) | a->sysm);
6084     reg = load_reg(s, a->rn);
6085     gen_helper_v7m_msr(cpu_env, addr, reg);
6086     tcg_temp_free_i32(addr);
6087     tcg_temp_free_i32(reg);
6088     /* If we wrote to CONTROL, the EL might have changed */
6089     gen_helper_rebuild_hflags_m32_newel(cpu_env);
6090     gen_lookup_tb(s);
6091     return true;
6092 }
6093
6094 static bool trans_BX(DisasContext *s, arg_BX *a)
6095 {
6096     if (!ENABLE_ARCH_4T) {
6097         return false;
6098     }
6099     gen_bx_excret(s, load_reg(s, a->rm));
6100     return true;
6101 }
6102
6103 static bool trans_BXJ(DisasContext *s, arg_BXJ *a)
6104 {
6105     if (!ENABLE_ARCH_5J || arm_dc_feature(s, ARM_FEATURE_M)) {
6106         return false;
6107     }
6108     /* Trivial implementation equivalent to bx.  */
6109     gen_bx(s, load_reg(s, a->rm));
6110     return true;
6111 }
6112
6113 static bool trans_BLX_r(DisasContext *s, arg_BLX_r *a)
6114 {
6115     TCGv_i32 tmp;
6116
6117     if (!ENABLE_ARCH_5) {
6118         return false;
6119     }
6120     tmp = load_reg(s, a->rm);
6121     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
6122     gen_bx(s, tmp);
6123     return true;
6124 }
6125
6126 /*
6127  * BXNS/BLXNS: only exist for v8M with the security extensions,
6128  * and always UNDEF if NonSecure.  We don't implement these in
6129  * the user-only mode either (in theory you can use them from
6130  * Secure User mode but they are too tied in to system emulation).
6131  */
6132 static bool trans_BXNS(DisasContext *s, arg_BXNS *a)
6133 {
6134     if (!s->v8m_secure || IS_USER_ONLY) {
6135         unallocated_encoding(s);
6136     } else {
6137         gen_bxns(s, a->rm);
6138     }
6139     return true;
6140 }
6141
6142 static bool trans_BLXNS(DisasContext *s, arg_BLXNS *a)
6143 {
6144     if (!s->v8m_secure || IS_USER_ONLY) {
6145         unallocated_encoding(s);
6146     } else {
6147         gen_blxns(s, a->rm);
6148     }
6149     return true;
6150 }
6151
6152 static bool trans_CLZ(DisasContext *s, arg_CLZ *a)
6153 {
6154     TCGv_i32 tmp;
6155
6156     if (!ENABLE_ARCH_5) {
6157         return false;
6158     }
6159     tmp = load_reg(s, a->rm);
6160     tcg_gen_clzi_i32(tmp, tmp, 32);
6161     store_reg(s, a->rd, tmp);
6162     return true;
6163 }
6164
6165 static bool trans_ERET(DisasContext *s, arg_ERET *a)
6166 {
6167     TCGv_i32 tmp;
6168
6169     if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
6170         return false;
6171     }
6172     if (IS_USER(s)) {
6173         unallocated_encoding(s);
6174         return true;
6175     }
6176     if (s->current_el == 2) {
6177         /* ERET from Hyp uses ELR_Hyp, not LR */
6178         tmp = load_cpu_field(elr_el[2]);
6179     } else {
6180         tmp = load_reg(s, 14);
6181     }
6182     gen_exception_return(s, tmp);
6183     return true;
6184 }
6185
6186 static bool trans_HLT(DisasContext *s, arg_HLT *a)
6187 {
6188     gen_hlt(s, a->imm);
6189     return true;
6190 }
6191
6192 static bool trans_BKPT(DisasContext *s, arg_BKPT *a)
6193 {
6194     if (!ENABLE_ARCH_5) {
6195         return false;
6196     }
6197     if (arm_dc_feature(s, ARM_FEATURE_M) &&
6198         semihosting_enabled() &&
6199 #ifndef CONFIG_USER_ONLY
6200         !IS_USER(s) &&
6201 #endif
6202         (a->imm == 0xab)) {
6203         gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
6204     } else {
6205         gen_exception_bkpt_insn(s, syn_aa32_bkpt(a->imm, false));
6206     }
6207     return true;
6208 }
6209
6210 static bool trans_HVC(DisasContext *s, arg_HVC *a)
6211 {
6212     if (!ENABLE_ARCH_7 || arm_dc_feature(s, ARM_FEATURE_M)) {
6213         return false;
6214     }
6215     if (IS_USER(s)) {
6216         unallocated_encoding(s);
6217     } else {
6218         gen_hvc(s, a->imm);
6219     }
6220     return true;
6221 }
6222
6223 static bool trans_SMC(DisasContext *s, arg_SMC *a)
6224 {
6225     if (!ENABLE_ARCH_6K || arm_dc_feature(s, ARM_FEATURE_M)) {
6226         return false;
6227     }
6228     if (IS_USER(s)) {
6229         unallocated_encoding(s);
6230     } else {
6231         gen_smc(s);
6232     }
6233     return true;
6234 }
6235
6236 static bool trans_SG(DisasContext *s, arg_SG *a)
6237 {
6238     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6239         !arm_dc_feature(s, ARM_FEATURE_V8)) {
6240         return false;
6241     }
6242     /*
6243      * SG (v8M only)
6244      * The bulk of the behaviour for this instruction is implemented
6245      * in v7m_handle_execute_nsc(), which deals with the insn when
6246      * it is executed by a CPU in non-secure state from memory
6247      * which is Secure & NonSecure-Callable.
6248      * Here we only need to handle the remaining cases:
6249      *  * in NS memory (including the "security extension not
6250      *    implemented" case) : NOP
6251      *  * in S memory but CPU already secure (clear IT bits)
6252      * We know that the attribute for the memory this insn is
6253      * in must match the current CPU state, because otherwise
6254      * get_phys_addr_pmsav8 would have generated an exception.
6255      */
6256     if (s->v8m_secure) {
6257         /* Like the IT insn, we don't need to generate any code */
6258         s->condexec_cond = 0;
6259         s->condexec_mask = 0;
6260     }
6261     return true;
6262 }
6263
6264 static bool trans_TT(DisasContext *s, arg_TT *a)
6265 {
6266     TCGv_i32 addr, tmp;
6267
6268     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6269         !arm_dc_feature(s, ARM_FEATURE_V8)) {
6270         return false;
6271     }
6272     if (a->rd == 13 || a->rd == 15 || a->rn == 15) {
6273         /* We UNDEF for these UNPREDICTABLE cases */
6274         unallocated_encoding(s);
6275         return true;
6276     }
6277     if (a->A && !s->v8m_secure) {
6278         /* This case is UNDEFINED.  */
6279         unallocated_encoding(s);
6280         return true;
6281     }
6282
6283     addr = load_reg(s, a->rn);
6284     tmp = tcg_const_i32((a->A << 1) | a->T);
6285     gen_helper_v7m_tt(tmp, cpu_env, addr, tmp);
6286     tcg_temp_free_i32(addr);
6287     store_reg(s, a->rd, tmp);
6288     return true;
6289 }
6290
6291 /*
6292  * Load/store register index
6293  */
6294
6295 static ISSInfo make_issinfo(DisasContext *s, int rd, bool p, bool w)
6296 {
6297     ISSInfo ret;
6298
6299     /* ISS not valid if writeback */
6300     if (p && !w) {
6301         ret = rd;
6302         if (s->base.pc_next - s->pc_curr == 2) {
6303             ret |= ISSIs16Bit;
6304         }
6305     } else {
6306         ret = ISSInvalid;
6307     }
6308     return ret;
6309 }
6310
6311 static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a)
6312 {
6313     TCGv_i32 addr = load_reg(s, a->rn);
6314
6315     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6316         gen_helper_v8m_stackcheck(cpu_env, addr);
6317     }
6318
6319     if (a->p) {
6320         TCGv_i32 ofs = load_reg(s, a->rm);
6321         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6322         if (a->u) {
6323             tcg_gen_add_i32(addr, addr, ofs);
6324         } else {
6325             tcg_gen_sub_i32(addr, addr, ofs);
6326         }
6327         tcg_temp_free_i32(ofs);
6328     }
6329     return addr;
6330 }
6331
6332 static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
6333                             TCGv_i32 addr, int address_offset)
6334 {
6335     if (!a->p) {
6336         TCGv_i32 ofs = load_reg(s, a->rm);
6337         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6338         if (a->u) {
6339             tcg_gen_add_i32(addr, addr, ofs);
6340         } else {
6341             tcg_gen_sub_i32(addr, addr, ofs);
6342         }
6343         tcg_temp_free_i32(ofs);
6344     } else if (!a->w) {
6345         tcg_temp_free_i32(addr);
6346         return;
6347     }
6348     tcg_gen_addi_i32(addr, addr, address_offset);
6349     store_reg(s, a->rn, addr);
6350 }
6351
6352 static bool op_load_rr(DisasContext *s, arg_ldst_rr *a,
6353                        MemOp mop, int mem_idx)
6354 {
6355     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6356     TCGv_i32 addr, tmp;
6357
6358     addr = op_addr_rr_pre(s, a);
6359
6360     tmp = tcg_temp_new_i32();
6361     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop | s->be_data);
6362     disas_set_da_iss(s, mop, issinfo);
6363
6364     /*
6365      * Perform base writeback before the loaded value to
6366      * ensure correct behavior with overlapping index registers.
6367      */
6368     op_addr_rr_post(s, a, addr, 0);
6369     store_reg_from_load(s, a->rt, tmp);
6370     return true;
6371 }
6372
6373 static bool op_store_rr(DisasContext *s, arg_ldst_rr *a,
6374                         MemOp mop, int mem_idx)
6375 {
6376     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6377     TCGv_i32 addr, tmp;
6378
6379     addr = op_addr_rr_pre(s, a);
6380
6381     tmp = load_reg(s, a->rt);
6382     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop | s->be_data);
6383     disas_set_da_iss(s, mop, issinfo);
6384     tcg_temp_free_i32(tmp);
6385
6386     op_addr_rr_post(s, a, addr, 0);
6387     return true;
6388 }
6389
6390 static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
6391 {
6392     int mem_idx = get_mem_index(s);
6393     TCGv_i32 addr, tmp;
6394
6395     if (!ENABLE_ARCH_5TE) {
6396         return false;
6397     }
6398     if (a->rt & 1) {
6399         unallocated_encoding(s);
6400         return true;
6401     }
6402     addr = op_addr_rr_pre(s, a);
6403
6404     tmp = tcg_temp_new_i32();
6405     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
6406     store_reg(s, a->rt, tmp);
6407
6408     tcg_gen_addi_i32(addr, addr, 4);
6409
6410     tmp = tcg_temp_new_i32();
6411     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
6412     store_reg(s, a->rt + 1, tmp);
6413
6414     /* LDRD w/ base writeback is undefined if the registers overlap.  */
6415     op_addr_rr_post(s, a, addr, -4);
6416     return true;
6417 }
6418
6419 static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
6420 {
6421     int mem_idx = get_mem_index(s);
6422     TCGv_i32 addr, tmp;
6423
6424     if (!ENABLE_ARCH_5TE) {
6425         return false;
6426     }
6427     if (a->rt & 1) {
6428         unallocated_encoding(s);
6429         return true;
6430     }
6431     addr = op_addr_rr_pre(s, a);
6432
6433     tmp = load_reg(s, a->rt);
6434     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
6435     tcg_temp_free_i32(tmp);
6436
6437     tcg_gen_addi_i32(addr, addr, 4);
6438
6439     tmp = load_reg(s, a->rt + 1);
6440     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
6441     tcg_temp_free_i32(tmp);
6442
6443     op_addr_rr_post(s, a, addr, -4);
6444     return true;
6445 }
6446
6447 /*
6448  * Load/store immediate index
6449  */
6450
6451 static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a)
6452 {
6453     int ofs = a->imm;
6454
6455     if (!a->u) {
6456         ofs = -ofs;
6457     }
6458
6459     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6460         /*
6461          * Stackcheck. Here we know 'addr' is the current SP;
6462          * U is set if we're moving SP up, else down. It is
6463          * UNKNOWN whether the limit check triggers when SP starts
6464          * below the limit and ends up above it; we chose to do so.
6465          */
6466         if (!a->u) {
6467             TCGv_i32 newsp = tcg_temp_new_i32();
6468             tcg_gen_addi_i32(newsp, cpu_R[13], ofs);
6469             gen_helper_v8m_stackcheck(cpu_env, newsp);
6470             tcg_temp_free_i32(newsp);
6471         } else {
6472             gen_helper_v8m_stackcheck(cpu_env, cpu_R[13]);
6473         }
6474     }
6475
6476     return add_reg_for_lit(s, a->rn, a->p ? ofs : 0);
6477 }
6478
6479 static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a,
6480                             TCGv_i32 addr, int address_offset)
6481 {
6482     if (!a->p) {
6483         if (a->u) {
6484             address_offset += a->imm;
6485         } else {
6486             address_offset -= a->imm;
6487         }
6488     } else if (!a->w) {
6489         tcg_temp_free_i32(addr);
6490         return;
6491     }
6492     tcg_gen_addi_i32(addr, addr, address_offset);
6493     store_reg(s, a->rn, addr);
6494 }
6495
6496 static bool op_load_ri(DisasContext *s, arg_ldst_ri *a,
6497                        MemOp mop, int mem_idx)
6498 {
6499     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6500     TCGv_i32 addr, tmp;
6501
6502     addr = op_addr_ri_pre(s, a);
6503
6504     tmp = tcg_temp_new_i32();
6505     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop | s->be_data);
6506     disas_set_da_iss(s, mop, issinfo);
6507
6508     /*
6509      * Perform base writeback before the loaded value to
6510      * ensure correct behavior with overlapping index registers.
6511      */
6512     op_addr_ri_post(s, a, addr, 0);
6513     store_reg_from_load(s, a->rt, tmp);
6514     return true;
6515 }
6516
6517 static bool op_store_ri(DisasContext *s, arg_ldst_ri *a,
6518                         MemOp mop, int mem_idx)
6519 {
6520     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6521     TCGv_i32 addr, tmp;
6522
6523     addr = op_addr_ri_pre(s, a);
6524
6525     tmp = load_reg(s, a->rt);
6526     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop | s->be_data);
6527     disas_set_da_iss(s, mop, issinfo);
6528     tcg_temp_free_i32(tmp);
6529
6530     op_addr_ri_post(s, a, addr, 0);
6531     return true;
6532 }
6533
6534 static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
6535 {
6536     int mem_idx = get_mem_index(s);
6537     TCGv_i32 addr, tmp;
6538
6539     addr = op_addr_ri_pre(s, a);
6540
6541     tmp = tcg_temp_new_i32();
6542     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
6543     store_reg(s, a->rt, tmp);
6544
6545     tcg_gen_addi_i32(addr, addr, 4);
6546
6547     tmp = tcg_temp_new_i32();
6548     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
6549     store_reg(s, rt2, tmp);
6550
6551     /* LDRD w/ base writeback is undefined if the registers overlap.  */
6552     op_addr_ri_post(s, a, addr, -4);
6553     return true;
6554 }
6555
6556 static bool trans_LDRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
6557 {
6558     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
6559         return false;
6560     }
6561     return op_ldrd_ri(s, a, a->rt + 1);
6562 }
6563
6564 static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
6565 {
6566     arg_ldst_ri b = {
6567         .u = a->u, .w = a->w, .p = a->p,
6568         .rn = a->rn, .rt = a->rt, .imm = a->imm
6569     };
6570     return op_ldrd_ri(s, &b, a->rt2);
6571 }
6572
6573 static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
6574 {
6575     int mem_idx = get_mem_index(s);
6576     TCGv_i32 addr, tmp;
6577
6578     addr = op_addr_ri_pre(s, a);
6579
6580     tmp = load_reg(s, a->rt);
6581     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
6582     tcg_temp_free_i32(tmp);
6583
6584     tcg_gen_addi_i32(addr, addr, 4);
6585
6586     tmp = load_reg(s, rt2);
6587     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
6588     tcg_temp_free_i32(tmp);
6589
6590     op_addr_ri_post(s, a, addr, -4);
6591     return true;
6592 }
6593
6594 static bool trans_STRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
6595 {
6596     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
6597         return false;
6598     }
6599     return op_strd_ri(s, a, a->rt + 1);
6600 }
6601
6602 static bool trans_STRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
6603 {
6604     arg_ldst_ri b = {
6605         .u = a->u, .w = a->w, .p = a->p,
6606         .rn = a->rn, .rt = a->rt, .imm = a->imm
6607     };
6608     return op_strd_ri(s, &b, a->rt2);
6609 }
6610
6611 #define DO_LDST(NAME, WHICH, MEMOP) \
6612 static bool trans_##NAME##_ri(DisasContext *s, arg_ldst_ri *a)        \
6613 {                                                                     \
6614     return op_##WHICH##_ri(s, a, MEMOP, get_mem_index(s));            \
6615 }                                                                     \
6616 static bool trans_##NAME##T_ri(DisasContext *s, arg_ldst_ri *a)       \
6617 {                                                                     \
6618     return op_##WHICH##_ri(s, a, MEMOP, get_a32_user_mem_index(s));   \
6619 }                                                                     \
6620 static bool trans_##NAME##_rr(DisasContext *s, arg_ldst_rr *a)        \
6621 {                                                                     \
6622     return op_##WHICH##_rr(s, a, MEMOP, get_mem_index(s));            \
6623 }                                                                     \
6624 static bool trans_##NAME##T_rr(DisasContext *s, arg_ldst_rr *a)       \
6625 {                                                                     \
6626     return op_##WHICH##_rr(s, a, MEMOP, get_a32_user_mem_index(s));   \
6627 }
6628
6629 DO_LDST(LDR, load, MO_UL)
6630 DO_LDST(LDRB, load, MO_UB)
6631 DO_LDST(LDRH, load, MO_UW)
6632 DO_LDST(LDRSB, load, MO_SB)
6633 DO_LDST(LDRSH, load, MO_SW)
6634
6635 DO_LDST(STR, store, MO_UL)
6636 DO_LDST(STRB, store, MO_UB)
6637 DO_LDST(STRH, store, MO_UW)
6638
6639 #undef DO_LDST
6640
6641 /*
6642  * Synchronization primitives
6643  */
6644
6645 static bool op_swp(DisasContext *s, arg_SWP *a, MemOp opc)
6646 {
6647     TCGv_i32 addr, tmp;
6648     TCGv taddr;
6649
6650     opc |= s->be_data;
6651     addr = load_reg(s, a->rn);
6652     taddr = gen_aa32_addr(s, addr, opc);
6653     tcg_temp_free_i32(addr);
6654
6655     tmp = load_reg(s, a->rt2);
6656     tcg_gen_atomic_xchg_i32(tmp, taddr, tmp, get_mem_index(s), opc);
6657     tcg_temp_free(taddr);
6658
6659     store_reg(s, a->rt, tmp);
6660     return true;
6661 }
6662
6663 static bool trans_SWP(DisasContext *s, arg_SWP *a)
6664 {
6665     return op_swp(s, a, MO_UL | MO_ALIGN);
6666 }
6667
6668 static bool trans_SWPB(DisasContext *s, arg_SWP *a)
6669 {
6670     return op_swp(s, a, MO_UB);
6671 }
6672
6673 /*
6674  * Load/Store Exclusive and Load-Acquire/Store-Release
6675  */
6676
6677 static bool op_strex(DisasContext *s, arg_STREX *a, MemOp mop, bool rel)
6678 {
6679     TCGv_i32 addr;
6680     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
6681     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
6682
6683     /* We UNDEF for these UNPREDICTABLE cases.  */
6684     if (a->rd == 15 || a->rn == 15 || a->rt == 15
6685         || a->rd == a->rn || a->rd == a->rt
6686         || (!v8a && s->thumb && (a->rd == 13 || a->rt == 13))
6687         || (mop == MO_64
6688             && (a->rt2 == 15
6689                 || a->rd == a->rt2
6690                 || (!v8a && s->thumb && a->rt2 == 13)))) {
6691         unallocated_encoding(s);
6692         return true;
6693     }
6694
6695     if (rel) {
6696         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
6697     }
6698
6699     addr = tcg_temp_local_new_i32();
6700     load_reg_var(s, addr, a->rn);
6701     tcg_gen_addi_i32(addr, addr, a->imm);
6702
6703     gen_store_exclusive(s, a->rd, a->rt, a->rt2, addr, mop);
6704     tcg_temp_free_i32(addr);
6705     return true;
6706 }
6707
6708 static bool trans_STREX(DisasContext *s, arg_STREX *a)
6709 {
6710     if (!ENABLE_ARCH_6) {
6711         return false;
6712     }
6713     return op_strex(s, a, MO_32, false);
6714 }
6715
6716 static bool trans_STREXD_a32(DisasContext *s, arg_STREX *a)
6717 {
6718     if (!ENABLE_ARCH_6K) {
6719         return false;
6720     }
6721     /* We UNDEF for these UNPREDICTABLE cases.  */
6722     if (a->rt & 1) {
6723         unallocated_encoding(s);
6724         return true;
6725     }
6726     a->rt2 = a->rt + 1;
6727     return op_strex(s, a, MO_64, false);
6728 }
6729
6730 static bool trans_STREXD_t32(DisasContext *s, arg_STREX *a)
6731 {
6732     return op_strex(s, a, MO_64, false);
6733 }
6734
6735 static bool trans_STREXB(DisasContext *s, arg_STREX *a)
6736 {
6737     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
6738         return false;
6739     }
6740     return op_strex(s, a, MO_8, false);
6741 }
6742
6743 static bool trans_STREXH(DisasContext *s, arg_STREX *a)
6744 {
6745     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
6746         return false;
6747     }
6748     return op_strex(s, a, MO_16, false);
6749 }
6750
6751 static bool trans_STLEX(DisasContext *s, arg_STREX *a)
6752 {
6753     if (!ENABLE_ARCH_8) {
6754         return false;
6755     }
6756     return op_strex(s, a, MO_32, true);
6757 }
6758
6759 static bool trans_STLEXD_a32(DisasContext *s, arg_STREX *a)
6760 {
6761     if (!ENABLE_ARCH_8) {
6762         return false;
6763     }
6764     /* We UNDEF for these UNPREDICTABLE cases.  */
6765     if (a->rt & 1) {
6766         unallocated_encoding(s);
6767         return true;
6768     }
6769     a->rt2 = a->rt + 1;
6770     return op_strex(s, a, MO_64, true);
6771 }
6772
6773 static bool trans_STLEXD_t32(DisasContext *s, arg_STREX *a)
6774 {
6775     if (!ENABLE_ARCH_8) {
6776         return false;
6777     }
6778     return op_strex(s, a, MO_64, true);
6779 }
6780
6781 static bool trans_STLEXB(DisasContext *s, arg_STREX *a)
6782 {
6783     if (!ENABLE_ARCH_8) {
6784         return false;
6785     }
6786     return op_strex(s, a, MO_8, true);
6787 }
6788
6789 static bool trans_STLEXH(DisasContext *s, arg_STREX *a)
6790 {
6791     if (!ENABLE_ARCH_8) {
6792         return false;
6793     }
6794     return op_strex(s, a, MO_16, true);
6795 }
6796
6797 static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop)
6798 {
6799     TCGv_i32 addr, tmp;
6800
6801     if (!ENABLE_ARCH_8) {
6802         return false;
6803     }
6804     /* We UNDEF for these UNPREDICTABLE cases.  */
6805     if (a->rn == 15 || a->rt == 15) {
6806         unallocated_encoding(s);
6807         return true;
6808     }
6809
6810     addr = load_reg(s, a->rn);
6811     tmp = load_reg(s, a->rt);
6812     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
6813     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | s->be_data);
6814     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite);
6815
6816     tcg_temp_free_i32(tmp);
6817     tcg_temp_free_i32(addr);
6818     return true;
6819 }
6820
6821 static bool trans_STL(DisasContext *s, arg_STL *a)
6822 {
6823     return op_stl(s, a, MO_UL);
6824 }
6825
6826 static bool trans_STLB(DisasContext *s, arg_STL *a)
6827 {
6828     return op_stl(s, a, MO_UB);
6829 }
6830
6831 static bool trans_STLH(DisasContext *s, arg_STL *a)
6832 {
6833     return op_stl(s, a, MO_UW);
6834 }
6835
6836 static bool op_ldrex(DisasContext *s, arg_LDREX *a, MemOp mop, bool acq)
6837 {
6838     TCGv_i32 addr;
6839     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
6840     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
6841
6842     /* We UNDEF for these UNPREDICTABLE cases.  */
6843     if (a->rn == 15 || a->rt == 15
6844         || (!v8a && s->thumb && a->rt == 13)
6845         || (mop == MO_64
6846             && (a->rt2 == 15 || a->rt == a->rt2
6847                 || (!v8a && s->thumb && a->rt2 == 13)))) {
6848         unallocated_encoding(s);
6849         return true;
6850     }
6851
6852     addr = tcg_temp_local_new_i32();
6853     load_reg_var(s, addr, a->rn);
6854     tcg_gen_addi_i32(addr, addr, a->imm);
6855
6856     gen_load_exclusive(s, a->rt, a->rt2, addr, mop);
6857     tcg_temp_free_i32(addr);
6858
6859     if (acq) {
6860         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
6861     }
6862     return true;
6863 }
6864
6865 static bool trans_LDREX(DisasContext *s, arg_LDREX *a)
6866 {
6867     if (!ENABLE_ARCH_6) {
6868         return false;
6869     }
6870     return op_ldrex(s, a, MO_32, false);
6871 }
6872
6873 static bool trans_LDREXD_a32(DisasContext *s, arg_LDREX *a)
6874 {
6875     if (!ENABLE_ARCH_6K) {
6876         return false;
6877     }
6878     /* We UNDEF for these UNPREDICTABLE cases.  */
6879     if (a->rt & 1) {
6880         unallocated_encoding(s);
6881         return true;
6882     }
6883     a->rt2 = a->rt + 1;
6884     return op_ldrex(s, a, MO_64, false);
6885 }
6886
6887 static bool trans_LDREXD_t32(DisasContext *s, arg_LDREX *a)
6888 {
6889     return op_ldrex(s, a, MO_64, false);
6890 }
6891
6892 static bool trans_LDREXB(DisasContext *s, arg_LDREX *a)
6893 {
6894     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
6895         return false;
6896     }
6897     return op_ldrex(s, a, MO_8, false);
6898 }
6899
6900 static bool trans_LDREXH(DisasContext *s, arg_LDREX *a)
6901 {
6902     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
6903         return false;
6904     }
6905     return op_ldrex(s, a, MO_16, false);
6906 }
6907
6908 static bool trans_LDAEX(DisasContext *s, arg_LDREX *a)
6909 {
6910     if (!ENABLE_ARCH_8) {
6911         return false;
6912     }
6913     return op_ldrex(s, a, MO_32, true);
6914 }
6915
6916 static bool trans_LDAEXD_a32(DisasContext *s, arg_LDREX *a)
6917 {
6918     if (!ENABLE_ARCH_8) {
6919         return false;
6920     }
6921     /* We UNDEF for these UNPREDICTABLE cases.  */
6922     if (a->rt & 1) {
6923         unallocated_encoding(s);
6924         return true;
6925     }
6926     a->rt2 = a->rt + 1;
6927     return op_ldrex(s, a, MO_64, true);
6928 }
6929
6930 static bool trans_LDAEXD_t32(DisasContext *s, arg_LDREX *a)
6931 {
6932     if (!ENABLE_ARCH_8) {
6933         return false;
6934     }
6935     return op_ldrex(s, a, MO_64, true);
6936 }
6937
6938 static bool trans_LDAEXB(DisasContext *s, arg_LDREX *a)
6939 {
6940     if (!ENABLE_ARCH_8) {
6941         return false;
6942     }
6943     return op_ldrex(s, a, MO_8, true);
6944 }
6945
6946 static bool trans_LDAEXH(DisasContext *s, arg_LDREX *a)
6947 {
6948     if (!ENABLE_ARCH_8) {
6949         return false;
6950     }
6951     return op_ldrex(s, a, MO_16, true);
6952 }
6953
6954 static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop)
6955 {
6956     TCGv_i32 addr, tmp;
6957
6958     if (!ENABLE_ARCH_8) {
6959         return false;
6960     }
6961     /* We UNDEF for these UNPREDICTABLE cases.  */
6962     if (a->rn == 15 || a->rt == 15) {
6963         unallocated_encoding(s);
6964         return true;
6965     }
6966
6967     addr = load_reg(s, a->rn);
6968     tmp = tcg_temp_new_i32();
6969     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | s->be_data);
6970     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel);
6971     tcg_temp_free_i32(addr);
6972
6973     store_reg(s, a->rt, tmp);
6974     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
6975     return true;
6976 }
6977
6978 static bool trans_LDA(DisasContext *s, arg_LDA *a)
6979 {
6980     return op_lda(s, a, MO_UL);
6981 }
6982
6983 static bool trans_LDAB(DisasContext *s, arg_LDA *a)
6984 {
6985     return op_lda(s, a, MO_UB);
6986 }
6987
6988 static bool trans_LDAH(DisasContext *s, arg_LDA *a)
6989 {
6990     return op_lda(s, a, MO_UW);
6991 }
6992
6993 /*
6994  * Media instructions
6995  */
6996
6997 static bool trans_USADA8(DisasContext *s, arg_USADA8 *a)
6998 {
6999     TCGv_i32 t1, t2;
7000
7001     if (!ENABLE_ARCH_6) {
7002         return false;
7003     }
7004
7005     t1 = load_reg(s, a->rn);
7006     t2 = load_reg(s, a->rm);
7007     gen_helper_usad8(t1, t1, t2);
7008     tcg_temp_free_i32(t2);
7009     if (a->ra != 15) {
7010         t2 = load_reg(s, a->ra);
7011         tcg_gen_add_i32(t1, t1, t2);
7012         tcg_temp_free_i32(t2);
7013     }
7014     store_reg(s, a->rd, t1);
7015     return true;
7016 }
7017
7018 static bool op_bfx(DisasContext *s, arg_UBFX *a, bool u)
7019 {
7020     TCGv_i32 tmp;
7021     int width = a->widthm1 + 1;
7022     int shift = a->lsb;
7023
7024     if (!ENABLE_ARCH_6T2) {
7025         return false;
7026     }
7027     if (shift + width > 32) {
7028         /* UNPREDICTABLE; we choose to UNDEF */
7029         unallocated_encoding(s);
7030         return true;
7031     }
7032
7033     tmp = load_reg(s, a->rn);
7034     if (u) {
7035         tcg_gen_extract_i32(tmp, tmp, shift, width);
7036     } else {
7037         tcg_gen_sextract_i32(tmp, tmp, shift, width);
7038     }
7039     store_reg(s, a->rd, tmp);
7040     return true;
7041 }
7042
7043 static bool trans_SBFX(DisasContext *s, arg_SBFX *a)
7044 {
7045     return op_bfx(s, a, false);
7046 }
7047
7048 static bool trans_UBFX(DisasContext *s, arg_UBFX *a)
7049 {
7050     return op_bfx(s, a, true);
7051 }
7052
7053 static bool trans_BFCI(DisasContext *s, arg_BFCI *a)
7054 {
7055     TCGv_i32 tmp;
7056     int msb = a->msb, lsb = a->lsb;
7057     int width;
7058
7059     if (!ENABLE_ARCH_6T2) {
7060         return false;
7061     }
7062     if (msb < lsb) {
7063         /* UNPREDICTABLE; we choose to UNDEF */
7064         unallocated_encoding(s);
7065         return true;
7066     }
7067
7068     width = msb + 1 - lsb;
7069     if (a->rn == 15) {
7070         /* BFC */
7071         tmp = tcg_const_i32(0);
7072     } else {
7073         /* BFI */
7074         tmp = load_reg(s, a->rn);
7075     }
7076     if (width != 32) {
7077         TCGv_i32 tmp2 = load_reg(s, a->rd);
7078         tcg_gen_deposit_i32(tmp, tmp2, tmp, lsb, width);
7079         tcg_temp_free_i32(tmp2);
7080     }
7081     store_reg(s, a->rd, tmp);
7082     return true;
7083 }
7084
7085 static bool trans_UDF(DisasContext *s, arg_UDF *a)
7086 {
7087     unallocated_encoding(s);
7088     return true;
7089 }
7090
7091 /*
7092  * Parallel addition and subtraction
7093  */
7094
7095 static bool op_par_addsub(DisasContext *s, arg_rrr *a,
7096                           void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
7097 {
7098     TCGv_i32 t0, t1;
7099
7100     if (s->thumb
7101         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7102         : !ENABLE_ARCH_6) {
7103         return false;
7104     }
7105
7106     t0 = load_reg(s, a->rn);
7107     t1 = load_reg(s, a->rm);
7108
7109     gen(t0, t0, t1);
7110
7111     tcg_temp_free_i32(t1);
7112     store_reg(s, a->rd, t0);
7113     return true;
7114 }
7115
7116 static bool op_par_addsub_ge(DisasContext *s, arg_rrr *a,
7117                              void (*gen)(TCGv_i32, TCGv_i32,
7118                                          TCGv_i32, TCGv_ptr))
7119 {
7120     TCGv_i32 t0, t1;
7121     TCGv_ptr ge;
7122
7123     if (s->thumb
7124         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7125         : !ENABLE_ARCH_6) {
7126         return false;
7127     }
7128
7129     t0 = load_reg(s, a->rn);
7130     t1 = load_reg(s, a->rm);
7131
7132     ge = tcg_temp_new_ptr();
7133     tcg_gen_addi_ptr(ge, cpu_env, offsetof(CPUARMState, GE));
7134     gen(t0, t0, t1, ge);
7135
7136     tcg_temp_free_ptr(ge);
7137     tcg_temp_free_i32(t1);
7138     store_reg(s, a->rd, t0);
7139     return true;
7140 }
7141
7142 #define DO_PAR_ADDSUB(NAME, helper) \
7143 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7144 {                                                       \
7145     return op_par_addsub(s, a, helper);                 \
7146 }
7147
7148 #define DO_PAR_ADDSUB_GE(NAME, helper) \
7149 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7150 {                                                       \
7151     return op_par_addsub_ge(s, a, helper);              \
7152 }
7153
7154 DO_PAR_ADDSUB_GE(SADD16, gen_helper_sadd16)
7155 DO_PAR_ADDSUB_GE(SASX, gen_helper_saddsubx)
7156 DO_PAR_ADDSUB_GE(SSAX, gen_helper_ssubaddx)
7157 DO_PAR_ADDSUB_GE(SSUB16, gen_helper_ssub16)
7158 DO_PAR_ADDSUB_GE(SADD8, gen_helper_sadd8)
7159 DO_PAR_ADDSUB_GE(SSUB8, gen_helper_ssub8)
7160
7161 DO_PAR_ADDSUB_GE(UADD16, gen_helper_uadd16)
7162 DO_PAR_ADDSUB_GE(UASX, gen_helper_uaddsubx)
7163 DO_PAR_ADDSUB_GE(USAX, gen_helper_usubaddx)
7164 DO_PAR_ADDSUB_GE(USUB16, gen_helper_usub16)
7165 DO_PAR_ADDSUB_GE(UADD8, gen_helper_uadd8)
7166 DO_PAR_ADDSUB_GE(USUB8, gen_helper_usub8)
7167
7168 DO_PAR_ADDSUB(QADD16, gen_helper_qadd16)
7169 DO_PAR_ADDSUB(QASX, gen_helper_qaddsubx)
7170 DO_PAR_ADDSUB(QSAX, gen_helper_qsubaddx)
7171 DO_PAR_ADDSUB(QSUB16, gen_helper_qsub16)
7172 DO_PAR_ADDSUB(QADD8, gen_helper_qadd8)
7173 DO_PAR_ADDSUB(QSUB8, gen_helper_qsub8)
7174
7175 DO_PAR_ADDSUB(UQADD16, gen_helper_uqadd16)
7176 DO_PAR_ADDSUB(UQASX, gen_helper_uqaddsubx)
7177 DO_PAR_ADDSUB(UQSAX, gen_helper_uqsubaddx)
7178 DO_PAR_ADDSUB(UQSUB16, gen_helper_uqsub16)
7179 DO_PAR_ADDSUB(UQADD8, gen_helper_uqadd8)
7180 DO_PAR_ADDSUB(UQSUB8, gen_helper_uqsub8)
7181
7182 DO_PAR_ADDSUB(SHADD16, gen_helper_shadd16)
7183 DO_PAR_ADDSUB(SHASX, gen_helper_shaddsubx)
7184 DO_PAR_ADDSUB(SHSAX, gen_helper_shsubaddx)
7185 DO_PAR_ADDSUB(SHSUB16, gen_helper_shsub16)
7186 DO_PAR_ADDSUB(SHADD8, gen_helper_shadd8)
7187 DO_PAR_ADDSUB(SHSUB8, gen_helper_shsub8)
7188
7189 DO_PAR_ADDSUB(UHADD16, gen_helper_uhadd16)
7190 DO_PAR_ADDSUB(UHASX, gen_helper_uhaddsubx)
7191 DO_PAR_ADDSUB(UHSAX, gen_helper_uhsubaddx)
7192 DO_PAR_ADDSUB(UHSUB16, gen_helper_uhsub16)
7193 DO_PAR_ADDSUB(UHADD8, gen_helper_uhadd8)
7194 DO_PAR_ADDSUB(UHSUB8, gen_helper_uhsub8)
7195
7196 #undef DO_PAR_ADDSUB
7197 #undef DO_PAR_ADDSUB_GE
7198
7199 /*
7200  * Packing, unpacking, saturation, and reversal
7201  */
7202
7203 static bool trans_PKH(DisasContext *s, arg_PKH *a)
7204 {
7205     TCGv_i32 tn, tm;
7206     int shift = a->imm;
7207
7208     if (s->thumb
7209         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7210         : !ENABLE_ARCH_6) {
7211         return false;
7212     }
7213
7214     tn = load_reg(s, a->rn);
7215     tm = load_reg(s, a->rm);
7216     if (a->tb) {
7217         /* PKHTB */
7218         if (shift == 0) {
7219             shift = 31;
7220         }
7221         tcg_gen_sari_i32(tm, tm, shift);
7222         tcg_gen_deposit_i32(tn, tn, tm, 0, 16);
7223     } else {
7224         /* PKHBT */
7225         tcg_gen_shli_i32(tm, tm, shift);
7226         tcg_gen_deposit_i32(tn, tm, tn, 0, 16);
7227     }
7228     tcg_temp_free_i32(tm);
7229     store_reg(s, a->rd, tn);
7230     return true;
7231 }
7232
7233 static bool op_sat(DisasContext *s, arg_sat *a,
7234                    void (*gen)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32))
7235 {
7236     TCGv_i32 tmp, satimm;
7237     int shift = a->imm;
7238
7239     if (!ENABLE_ARCH_6) {
7240         return false;
7241     }
7242
7243     tmp = load_reg(s, a->rn);
7244     if (a->sh) {
7245         tcg_gen_sari_i32(tmp, tmp, shift ? shift : 31);
7246     } else {
7247         tcg_gen_shli_i32(tmp, tmp, shift);
7248     }
7249
7250     satimm = tcg_const_i32(a->satimm);
7251     gen(tmp, cpu_env, tmp, satimm);
7252     tcg_temp_free_i32(satimm);
7253
7254     store_reg(s, a->rd, tmp);
7255     return true;
7256 }
7257
7258 static bool trans_SSAT(DisasContext *s, arg_sat *a)
7259 {
7260     return op_sat(s, a, gen_helper_ssat);
7261 }
7262
7263 static bool trans_USAT(DisasContext *s, arg_sat *a)
7264 {
7265     return op_sat(s, a, gen_helper_usat);
7266 }
7267
7268 static bool trans_SSAT16(DisasContext *s, arg_sat *a)
7269 {
7270     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7271         return false;
7272     }
7273     return op_sat(s, a, gen_helper_ssat16);
7274 }
7275
7276 static bool trans_USAT16(DisasContext *s, arg_sat *a)
7277 {
7278     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7279         return false;
7280     }
7281     return op_sat(s, a, gen_helper_usat16);
7282 }
7283
7284 static bool op_xta(DisasContext *s, arg_rrr_rot *a,
7285                    void (*gen_extract)(TCGv_i32, TCGv_i32),
7286                    void (*gen_add)(TCGv_i32, TCGv_i32, TCGv_i32))
7287 {
7288     TCGv_i32 tmp;
7289
7290     if (!ENABLE_ARCH_6) {
7291         return false;
7292     }
7293
7294     tmp = load_reg(s, a->rm);
7295     /*
7296      * TODO: In many cases we could do a shift instead of a rotate.
7297      * Combined with a simple extend, that becomes an extract.
7298      */
7299     tcg_gen_rotri_i32(tmp, tmp, a->rot * 8);
7300     gen_extract(tmp, tmp);
7301
7302     if (a->rn != 15) {
7303         TCGv_i32 tmp2 = load_reg(s, a->rn);
7304         gen_add(tmp, tmp, tmp2);
7305         tcg_temp_free_i32(tmp2);
7306     }
7307     store_reg(s, a->rd, tmp);
7308     return true;
7309 }
7310
7311 static bool trans_SXTAB(DisasContext *s, arg_rrr_rot *a)
7312 {
7313     return op_xta(s, a, tcg_gen_ext8s_i32, tcg_gen_add_i32);
7314 }
7315
7316 static bool trans_SXTAH(DisasContext *s, arg_rrr_rot *a)
7317 {
7318     return op_xta(s, a, tcg_gen_ext16s_i32, tcg_gen_add_i32);
7319 }
7320
7321 static bool trans_SXTAB16(DisasContext *s, arg_rrr_rot *a)
7322 {
7323     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7324         return false;
7325     }
7326     return op_xta(s, a, gen_helper_sxtb16, gen_add16);
7327 }
7328
7329 static bool trans_UXTAB(DisasContext *s, arg_rrr_rot *a)
7330 {
7331     return op_xta(s, a, tcg_gen_ext8u_i32, tcg_gen_add_i32);
7332 }
7333
7334 static bool trans_UXTAH(DisasContext *s, arg_rrr_rot *a)
7335 {
7336     return op_xta(s, a, tcg_gen_ext16u_i32, tcg_gen_add_i32);
7337 }
7338
7339 static bool trans_UXTAB16(DisasContext *s, arg_rrr_rot *a)
7340 {
7341     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7342         return false;
7343     }
7344     return op_xta(s, a, gen_helper_uxtb16, gen_add16);
7345 }
7346
7347 static bool trans_SEL(DisasContext *s, arg_rrr *a)
7348 {
7349     TCGv_i32 t1, t2, t3;
7350
7351     if (s->thumb
7352         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7353         : !ENABLE_ARCH_6) {
7354         return false;
7355     }
7356
7357     t1 = load_reg(s, a->rn);
7358     t2 = load_reg(s, a->rm);
7359     t3 = tcg_temp_new_i32();
7360     tcg_gen_ld_i32(t3, cpu_env, offsetof(CPUARMState, GE));
7361     gen_helper_sel_flags(t1, t3, t1, t2);
7362     tcg_temp_free_i32(t3);
7363     tcg_temp_free_i32(t2);
7364     store_reg(s, a->rd, t1);
7365     return true;
7366 }
7367
7368 static bool op_rr(DisasContext *s, arg_rr *a,
7369                   void (*gen)(TCGv_i32, TCGv_i32))
7370 {
7371     TCGv_i32 tmp;
7372
7373     tmp = load_reg(s, a->rm);
7374     gen(tmp, tmp);
7375     store_reg(s, a->rd, tmp);
7376     return true;
7377 }
7378
7379 static bool trans_REV(DisasContext *s, arg_rr *a)
7380 {
7381     if (!ENABLE_ARCH_6) {
7382         return false;
7383     }
7384     return op_rr(s, a, tcg_gen_bswap32_i32);
7385 }
7386
7387 static bool trans_REV16(DisasContext *s, arg_rr *a)
7388 {
7389     if (!ENABLE_ARCH_6) {
7390         return false;
7391     }
7392     return op_rr(s, a, gen_rev16);
7393 }
7394
7395 static bool trans_REVSH(DisasContext *s, arg_rr *a)
7396 {
7397     if (!ENABLE_ARCH_6) {
7398         return false;
7399     }
7400     return op_rr(s, a, gen_revsh);
7401 }
7402
7403 static bool trans_RBIT(DisasContext *s, arg_rr *a)
7404 {
7405     if (!ENABLE_ARCH_6T2) {
7406         return false;
7407     }
7408     return op_rr(s, a, gen_helper_rbit);
7409 }
7410
7411 /*
7412  * Signed multiply, signed and unsigned divide
7413  */
7414
7415 static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7416 {
7417     TCGv_i32 t1, t2;
7418
7419     if (!ENABLE_ARCH_6) {
7420         return false;
7421     }
7422
7423     t1 = load_reg(s, a->rn);
7424     t2 = load_reg(s, a->rm);
7425     if (m_swap) {
7426         gen_swap_half(t2, t2);
7427     }
7428     gen_smul_dual(t1, t2);
7429
7430     if (sub) {
7431         /* This subtraction cannot overflow. */
7432         tcg_gen_sub_i32(t1, t1, t2);
7433     } else {
7434         /*
7435          * This addition cannot overflow 32 bits; however it may
7436          * overflow considered as a signed operation, in which case
7437          * we must set the Q flag.
7438          */
7439         gen_helper_add_setq(t1, cpu_env, t1, t2);
7440     }
7441     tcg_temp_free_i32(t2);
7442
7443     if (a->ra != 15) {
7444         t2 = load_reg(s, a->ra);
7445         gen_helper_add_setq(t1, cpu_env, t1, t2);
7446         tcg_temp_free_i32(t2);
7447     }
7448     store_reg(s, a->rd, t1);
7449     return true;
7450 }
7451
7452 static bool trans_SMLAD(DisasContext *s, arg_rrrr *a)
7453 {
7454     return op_smlad(s, a, false, false);
7455 }
7456
7457 static bool trans_SMLADX(DisasContext *s, arg_rrrr *a)
7458 {
7459     return op_smlad(s, a, true, false);
7460 }
7461
7462 static bool trans_SMLSD(DisasContext *s, arg_rrrr *a)
7463 {
7464     return op_smlad(s, a, false, true);
7465 }
7466
7467 static bool trans_SMLSDX(DisasContext *s, arg_rrrr *a)
7468 {
7469     return op_smlad(s, a, true, true);
7470 }
7471
7472 static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7473 {
7474     TCGv_i32 t1, t2;
7475     TCGv_i64 l1, l2;
7476
7477     if (!ENABLE_ARCH_6) {
7478         return false;
7479     }
7480
7481     t1 = load_reg(s, a->rn);
7482     t2 = load_reg(s, a->rm);
7483     if (m_swap) {
7484         gen_swap_half(t2, t2);
7485     }
7486     gen_smul_dual(t1, t2);
7487
7488     l1 = tcg_temp_new_i64();
7489     l2 = tcg_temp_new_i64();
7490     tcg_gen_ext_i32_i64(l1, t1);
7491     tcg_gen_ext_i32_i64(l2, t2);
7492     tcg_temp_free_i32(t1);
7493     tcg_temp_free_i32(t2);
7494
7495     if (sub) {
7496         tcg_gen_sub_i64(l1, l1, l2);
7497     } else {
7498         tcg_gen_add_i64(l1, l1, l2);
7499     }
7500     tcg_temp_free_i64(l2);
7501
7502     gen_addq(s, l1, a->ra, a->rd);
7503     gen_storeq_reg(s, a->ra, a->rd, l1);
7504     tcg_temp_free_i64(l1);
7505     return true;
7506 }
7507
7508 static bool trans_SMLALD(DisasContext *s, arg_rrrr *a)
7509 {
7510     return op_smlald(s, a, false, false);
7511 }
7512
7513 static bool trans_SMLALDX(DisasContext *s, arg_rrrr *a)
7514 {
7515     return op_smlald(s, a, true, false);
7516 }
7517
7518 static bool trans_SMLSLD(DisasContext *s, arg_rrrr *a)
7519 {
7520     return op_smlald(s, a, false, true);
7521 }
7522
7523 static bool trans_SMLSLDX(DisasContext *s, arg_rrrr *a)
7524 {
7525     return op_smlald(s, a, true, true);
7526 }
7527
7528 static bool op_smmla(DisasContext *s, arg_rrrr *a, bool round, bool sub)
7529 {
7530     TCGv_i32 t1, t2;
7531
7532     if (s->thumb
7533         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7534         : !ENABLE_ARCH_6) {
7535         return false;
7536     }
7537
7538     t1 = load_reg(s, a->rn);
7539     t2 = load_reg(s, a->rm);
7540     tcg_gen_muls2_i32(t2, t1, t1, t2);
7541
7542     if (a->ra != 15) {
7543         TCGv_i32 t3 = load_reg(s, a->ra);
7544         if (sub) {
7545             /*
7546              * For SMMLS, we need a 64-bit subtract.  Borrow caused by
7547              * a non-zero multiplicand lowpart, and the correct result
7548              * lowpart for rounding.
7549              */
7550             TCGv_i32 zero = tcg_const_i32(0);
7551             tcg_gen_sub2_i32(t2, t1, zero, t3, t2, t1);
7552             tcg_temp_free_i32(zero);
7553         } else {
7554             tcg_gen_add_i32(t1, t1, t3);
7555         }
7556         tcg_temp_free_i32(t3);
7557     }
7558     if (round) {
7559         /*
7560          * Adding 0x80000000 to the 64-bit quantity means that we have
7561          * carry in to the high word when the low word has the msb set.
7562          */
7563         tcg_gen_shri_i32(t2, t2, 31);
7564         tcg_gen_add_i32(t1, t1, t2);
7565     }
7566     tcg_temp_free_i32(t2);
7567     store_reg(s, a->rd, t1);
7568     return true;
7569 }
7570
7571 static bool trans_SMMLA(DisasContext *s, arg_rrrr *a)
7572 {
7573     return op_smmla(s, a, false, false);
7574 }
7575
7576 static bool trans_SMMLAR(DisasContext *s, arg_rrrr *a)
7577 {
7578     return op_smmla(s, a, true, false);
7579 }
7580
7581 static bool trans_SMMLS(DisasContext *s, arg_rrrr *a)
7582 {
7583     return op_smmla(s, a, false, true);
7584 }
7585
7586 static bool trans_SMMLSR(DisasContext *s, arg_rrrr *a)
7587 {
7588     return op_smmla(s, a, true, true);
7589 }
7590
7591 static bool op_div(DisasContext *s, arg_rrr *a, bool u)
7592 {
7593     TCGv_i32 t1, t2;
7594
7595     if (s->thumb
7596         ? !dc_isar_feature(aa32_thumb_div, s)
7597         : !dc_isar_feature(aa32_arm_div, s)) {
7598         return false;
7599     }
7600
7601     t1 = load_reg(s, a->rn);
7602     t2 = load_reg(s, a->rm);
7603     if (u) {
7604         gen_helper_udiv(t1, t1, t2);
7605     } else {
7606         gen_helper_sdiv(t1, t1, t2);
7607     }
7608     tcg_temp_free_i32(t2);
7609     store_reg(s, a->rd, t1);
7610     return true;
7611 }
7612
7613 static bool trans_SDIV(DisasContext *s, arg_rrr *a)
7614 {
7615     return op_div(s, a, false);
7616 }
7617
7618 static bool trans_UDIV(DisasContext *s, arg_rrr *a)
7619 {
7620     return op_div(s, a, true);
7621 }
7622
7623 /*
7624  * Block data transfer
7625  */
7626
7627 static TCGv_i32 op_addr_block_pre(DisasContext *s, arg_ldst_block *a, int n)
7628 {
7629     TCGv_i32 addr = load_reg(s, a->rn);
7630
7631     if (a->b) {
7632         if (a->i) {
7633             /* pre increment */
7634             tcg_gen_addi_i32(addr, addr, 4);
7635         } else {
7636             /* pre decrement */
7637             tcg_gen_addi_i32(addr, addr, -(n * 4));
7638         }
7639     } else if (!a->i && n != 1) {
7640         /* post decrement */
7641         tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
7642     }
7643
7644     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
7645         /*
7646          * If the writeback is incrementing SP rather than
7647          * decrementing it, and the initial SP is below the
7648          * stack limit but the final written-back SP would
7649          * be above, then then we must not perform any memory
7650          * accesses, but it is IMPDEF whether we generate
7651          * an exception. We choose to do so in this case.
7652          * At this point 'addr' is the lowest address, so
7653          * either the original SP (if incrementing) or our
7654          * final SP (if decrementing), so that's what we check.
7655          */
7656         gen_helper_v8m_stackcheck(cpu_env, addr);
7657     }
7658
7659     return addr;
7660 }
7661
7662 static void op_addr_block_post(DisasContext *s, arg_ldst_block *a,
7663                                TCGv_i32 addr, int n)
7664 {
7665     if (a->w) {
7666         /* write back */
7667         if (!a->b) {
7668             if (a->i) {
7669                 /* post increment */
7670                 tcg_gen_addi_i32(addr, addr, 4);
7671             } else {
7672                 /* post decrement */
7673                 tcg_gen_addi_i32(addr, addr, -(n * 4));
7674             }
7675         } else if (!a->i && n != 1) {
7676             /* pre decrement */
7677             tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
7678         }
7679         store_reg(s, a->rn, addr);
7680     } else {
7681         tcg_temp_free_i32(addr);
7682     }
7683 }
7684
7685 static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n)
7686 {
7687     int i, j, n, list, mem_idx;
7688     bool user = a->u;
7689     TCGv_i32 addr, tmp, tmp2;
7690
7691     if (user) {
7692         /* STM (user) */
7693         if (IS_USER(s)) {
7694             /* Only usable in supervisor mode.  */
7695             unallocated_encoding(s);
7696             return true;
7697         }
7698     }
7699
7700     list = a->list;
7701     n = ctpop16(list);
7702     if (n < min_n || a->rn == 15) {
7703         unallocated_encoding(s);
7704         return true;
7705     }
7706
7707     addr = op_addr_block_pre(s, a, n);
7708     mem_idx = get_mem_index(s);
7709
7710     for (i = j = 0; i < 16; i++) {
7711         if (!(list & (1 << i))) {
7712             continue;
7713         }
7714
7715         if (user && i != 15) {
7716             tmp = tcg_temp_new_i32();
7717             tmp2 = tcg_const_i32(i);
7718             gen_helper_get_user_reg(tmp, cpu_env, tmp2);
7719             tcg_temp_free_i32(tmp2);
7720         } else {
7721             tmp = load_reg(s, i);
7722         }
7723         gen_aa32_st32(s, tmp, addr, mem_idx);
7724         tcg_temp_free_i32(tmp);
7725
7726         /* No need to add after the last transfer.  */
7727         if (++j != n) {
7728             tcg_gen_addi_i32(addr, addr, 4);
7729         }
7730     }
7731
7732     op_addr_block_post(s, a, addr, n);
7733     return true;
7734 }
7735
7736 static bool trans_STM(DisasContext *s, arg_ldst_block *a)
7737 {
7738     /* BitCount(list) < 1 is UNPREDICTABLE */
7739     return op_stm(s, a, 1);
7740 }
7741
7742 static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a)
7743 {
7744     /* Writeback register in register list is UNPREDICTABLE for T32.  */
7745     if (a->w && (a->list & (1 << a->rn))) {
7746         unallocated_encoding(s);
7747         return true;
7748     }
7749     /* BitCount(list) < 2 is UNPREDICTABLE */
7750     return op_stm(s, a, 2);
7751 }
7752
7753 static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n)
7754 {
7755     int i, j, n, list, mem_idx;
7756     bool loaded_base;
7757     bool user = a->u;
7758     bool exc_return = false;
7759     TCGv_i32 addr, tmp, tmp2, loaded_var;
7760
7761     if (user) {
7762         /* LDM (user), LDM (exception return) */
7763         if (IS_USER(s)) {
7764             /* Only usable in supervisor mode.  */
7765             unallocated_encoding(s);
7766             return true;
7767         }
7768         if (extract32(a->list, 15, 1)) {
7769             exc_return = true;
7770             user = false;
7771         } else {
7772             /* LDM (user) does not allow writeback.  */
7773             if (a->w) {
7774                 unallocated_encoding(s);
7775                 return true;
7776             }
7777         }
7778     }
7779
7780     list = a->list;
7781     n = ctpop16(list);
7782     if (n < min_n || a->rn == 15) {
7783         unallocated_encoding(s);
7784         return true;
7785     }
7786
7787     addr = op_addr_block_pre(s, a, n);
7788     mem_idx = get_mem_index(s);
7789     loaded_base = false;
7790     loaded_var = NULL;
7791
7792     for (i = j = 0; i < 16; i++) {
7793         if (!(list & (1 << i))) {
7794             continue;
7795         }
7796
7797         tmp = tcg_temp_new_i32();
7798         gen_aa32_ld32u(s, tmp, addr, mem_idx);
7799         if (user) {
7800             tmp2 = tcg_const_i32(i);
7801             gen_helper_set_user_reg(cpu_env, tmp2, tmp);
7802             tcg_temp_free_i32(tmp2);
7803             tcg_temp_free_i32(tmp);
7804         } else if (i == a->rn) {
7805             loaded_var = tmp;
7806             loaded_base = true;
7807         } else if (i == 15 && exc_return) {
7808             store_pc_exc_ret(s, tmp);
7809         } else {
7810             store_reg_from_load(s, i, tmp);
7811         }
7812
7813         /* No need to add after the last transfer.  */
7814         if (++j != n) {
7815             tcg_gen_addi_i32(addr, addr, 4);
7816         }
7817     }
7818
7819     op_addr_block_post(s, a, addr, n);
7820
7821     if (loaded_base) {
7822         /* Note that we reject base == pc above.  */
7823         store_reg(s, a->rn, loaded_var);
7824     }
7825
7826     if (exc_return) {
7827         /* Restore CPSR from SPSR.  */
7828         tmp = load_cpu_field(spsr);
7829         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7830             gen_io_start();
7831         }
7832         gen_helper_cpsr_write_eret(cpu_env, tmp);
7833         tcg_temp_free_i32(tmp);
7834         /* Must exit loop to check un-masked IRQs */
7835         s->base.is_jmp = DISAS_EXIT;
7836     }
7837     return true;
7838 }
7839
7840 static bool trans_LDM_a32(DisasContext *s, arg_ldst_block *a)
7841 {
7842     /*
7843      * Writeback register in register list is UNPREDICTABLE
7844      * for ArchVersion() >= 7.  Prior to v7, A32 would write
7845      * an UNKNOWN value to the base register.
7846      */
7847     if (ENABLE_ARCH_7 && a->w && (a->list & (1 << a->rn))) {
7848         unallocated_encoding(s);
7849         return true;
7850     }
7851     /* BitCount(list) < 1 is UNPREDICTABLE */
7852     return do_ldm(s, a, 1);
7853 }
7854
7855 static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a)
7856 {
7857     /* Writeback register in register list is UNPREDICTABLE for T32. */
7858     if (a->w && (a->list & (1 << a->rn))) {
7859         unallocated_encoding(s);
7860         return true;
7861     }
7862     /* BitCount(list) < 2 is UNPREDICTABLE */
7863     return do_ldm(s, a, 2);
7864 }
7865
7866 static bool trans_LDM_t16(DisasContext *s, arg_ldst_block *a)
7867 {
7868     /* Writeback is conditional on the base register not being loaded.  */
7869     a->w = !(a->list & (1 << a->rn));
7870     /* BitCount(list) < 1 is UNPREDICTABLE */
7871     return do_ldm(s, a, 1);
7872 }
7873
7874 /*
7875  * Branch, branch with link
7876  */
7877
7878 static bool trans_B(DisasContext *s, arg_i *a)
7879 {
7880     gen_jmp(s, read_pc(s) + a->imm);
7881     return true;
7882 }
7883
7884 static bool trans_B_cond_thumb(DisasContext *s, arg_ci *a)
7885 {
7886     /* This has cond from encoding, required to be outside IT block.  */
7887     if (a->cond >= 0xe) {
7888         return false;
7889     }
7890     if (s->condexec_mask) {
7891         unallocated_encoding(s);
7892         return true;
7893     }
7894     arm_skip_unless(s, a->cond);
7895     gen_jmp(s, read_pc(s) + a->imm);
7896     return true;
7897 }
7898
7899 static bool trans_BL(DisasContext *s, arg_i *a)
7900 {
7901     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
7902     gen_jmp(s, read_pc(s) + a->imm);
7903     return true;
7904 }
7905
7906 static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
7907 {
7908     TCGv_i32 tmp;
7909
7910     /* For A32, ARM_FEATURE_V5 is checked near the start of the uncond block. */
7911     if (s->thumb && (a->imm & 2)) {
7912         return false;
7913     }
7914     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
7915     tmp = tcg_const_i32(!s->thumb);
7916     store_cpu_field(tmp, thumb);
7917     gen_jmp(s, (read_pc(s) & ~3) + a->imm);
7918     return true;
7919 }
7920
7921 static bool trans_BL_BLX_prefix(DisasContext *s, arg_BL_BLX_prefix *a)
7922 {
7923     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
7924     tcg_gen_movi_i32(cpu_R[14], read_pc(s) + (a->imm << 12));
7925     return true;
7926 }
7927
7928 static bool trans_BL_suffix(DisasContext *s, arg_BL_suffix *a)
7929 {
7930     TCGv_i32 tmp = tcg_temp_new_i32();
7931
7932     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
7933     tcg_gen_addi_i32(tmp, cpu_R[14], (a->imm << 1) | 1);
7934     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1);
7935     gen_bx(s, tmp);
7936     return true;
7937 }
7938
7939 static bool trans_BLX_suffix(DisasContext *s, arg_BLX_suffix *a)
7940 {
7941     TCGv_i32 tmp;
7942
7943     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
7944     if (!ENABLE_ARCH_5) {
7945         return false;
7946     }
7947     tmp = tcg_temp_new_i32();
7948     tcg_gen_addi_i32(tmp, cpu_R[14], a->imm << 1);
7949     tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
7950     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1);
7951     gen_bx(s, tmp);
7952     return true;
7953 }
7954
7955 static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
7956 {
7957     TCGv_i32 addr, tmp;
7958
7959     tmp = load_reg(s, a->rm);
7960     if (half) {
7961         tcg_gen_add_i32(tmp, tmp, tmp);
7962     }
7963     addr = load_reg(s, a->rn);
7964     tcg_gen_add_i32(addr, addr, tmp);
7965
7966     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
7967                     half ? MO_UW | s->be_data : MO_UB);
7968     tcg_temp_free_i32(addr);
7969
7970     tcg_gen_add_i32(tmp, tmp, tmp);
7971     tcg_gen_addi_i32(tmp, tmp, read_pc(s));
7972     store_reg(s, 15, tmp);
7973     return true;
7974 }
7975
7976 static bool trans_TBB(DisasContext *s, arg_tbranch *a)
7977 {
7978     return op_tbranch(s, a, false);
7979 }
7980
7981 static bool trans_TBH(DisasContext *s, arg_tbranch *a)
7982 {
7983     return op_tbranch(s, a, true);
7984 }
7985
7986 static bool trans_CBZ(DisasContext *s, arg_CBZ *a)
7987 {
7988     TCGv_i32 tmp = load_reg(s, a->rn);
7989
7990     arm_gen_condlabel(s);
7991     tcg_gen_brcondi_i32(a->nz ? TCG_COND_EQ : TCG_COND_NE,
7992                         tmp, 0, s->condlabel);
7993     tcg_temp_free_i32(tmp);
7994     gen_jmp(s, read_pc(s) + a->imm);
7995     return true;
7996 }
7997
7998 /*
7999  * Supervisor call - both T32 & A32 come here so we need to check
8000  * which mode we are in when checking for semihosting.
8001  */
8002
8003 static bool trans_SVC(DisasContext *s, arg_SVC *a)
8004 {
8005     const uint32_t semihost_imm = s->thumb ? 0xab : 0x123456;
8006
8007     if (!arm_dc_feature(s, ARM_FEATURE_M) && semihosting_enabled() &&
8008 #ifndef CONFIG_USER_ONLY
8009         !IS_USER(s) &&
8010 #endif
8011         (a->imm == semihost_imm)) {
8012         gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
8013     } else {
8014         gen_set_pc_im(s, s->base.pc_next);
8015         s->svc_imm = a->imm;
8016         s->base.is_jmp = DISAS_SWI;
8017     }
8018     return true;
8019 }
8020
8021 /*
8022  * Unconditional system instructions
8023  */
8024
8025 static bool trans_RFE(DisasContext *s, arg_RFE *a)
8026 {
8027     static const int8_t pre_offset[4] = {
8028         /* DA */ -4, /* IA */ 0, /* DB */ -8, /* IB */ 4
8029     };
8030     static const int8_t post_offset[4] = {
8031         /* DA */ -8, /* IA */ 4, /* DB */ -4, /* IB */ 0
8032     };
8033     TCGv_i32 addr, t1, t2;
8034
8035     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8036         return false;
8037     }
8038     if (IS_USER(s)) {
8039         unallocated_encoding(s);
8040         return true;
8041     }
8042
8043     addr = load_reg(s, a->rn);
8044     tcg_gen_addi_i32(addr, addr, pre_offset[a->pu]);
8045
8046     /* Load PC into tmp and CPSR into tmp2.  */
8047     t1 = tcg_temp_new_i32();
8048     gen_aa32_ld32u(s, t1, addr, get_mem_index(s));
8049     tcg_gen_addi_i32(addr, addr, 4);
8050     t2 = tcg_temp_new_i32();
8051     gen_aa32_ld32u(s, t2, addr, get_mem_index(s));
8052
8053     if (a->w) {
8054         /* Base writeback.  */
8055         tcg_gen_addi_i32(addr, addr, post_offset[a->pu]);
8056         store_reg(s, a->rn, addr);
8057     } else {
8058         tcg_temp_free_i32(addr);
8059     }
8060     gen_rfe(s, t1, t2);
8061     return true;
8062 }
8063
8064 static bool trans_SRS(DisasContext *s, arg_SRS *a)
8065 {
8066     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8067         return false;
8068     }
8069     gen_srs(s, a->mode, a->pu, a->w);
8070     return true;
8071 }
8072
8073 static bool trans_CPS(DisasContext *s, arg_CPS *a)
8074 {
8075     uint32_t mask, val;
8076
8077     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8078         return false;
8079     }
8080     if (IS_USER(s)) {
8081         /* Implemented as NOP in user mode.  */
8082         return true;
8083     }
8084     /* TODO: There are quite a lot of UNPREDICTABLE argument combinations. */
8085
8086     mask = val = 0;
8087     if (a->imod & 2) {
8088         if (a->A) {
8089             mask |= CPSR_A;
8090         }
8091         if (a->I) {
8092             mask |= CPSR_I;
8093         }
8094         if (a->F) {
8095             mask |= CPSR_F;
8096         }
8097         if (a->imod & 1) {
8098             val |= mask;
8099         }
8100     }
8101     if (a->M) {
8102         mask |= CPSR_M;
8103         val |= a->mode;
8104     }
8105     if (mask) {
8106         gen_set_psr_im(s, mask, 0, val);
8107     }
8108     return true;
8109 }
8110
8111 static bool trans_CPS_v7m(DisasContext *s, arg_CPS_v7m *a)
8112 {
8113     TCGv_i32 tmp, addr, el;
8114
8115     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8116         return false;
8117     }
8118     if (IS_USER(s)) {
8119         /* Implemented as NOP in user mode.  */
8120         return true;
8121     }
8122
8123     tmp = tcg_const_i32(a->im);
8124     /* FAULTMASK */
8125     if (a->F) {
8126         addr = tcg_const_i32(19);
8127         gen_helper_v7m_msr(cpu_env, addr, tmp);
8128         tcg_temp_free_i32(addr);
8129     }
8130     /* PRIMASK */
8131     if (a->I) {
8132         addr = tcg_const_i32(16);
8133         gen_helper_v7m_msr(cpu_env, addr, tmp);
8134         tcg_temp_free_i32(addr);
8135     }
8136     el = tcg_const_i32(s->current_el);
8137     gen_helper_rebuild_hflags_m32(cpu_env, el);
8138     tcg_temp_free_i32(el);
8139     tcg_temp_free_i32(tmp);
8140     gen_lookup_tb(s);
8141     return true;
8142 }
8143
8144 /*
8145  * Clear-Exclusive, Barriers
8146  */
8147
8148 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
8149 {
8150     if (s->thumb
8151         ? !ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)
8152         : !ENABLE_ARCH_6K) {
8153         return false;
8154     }
8155     gen_clrex(s);
8156     return true;
8157 }
8158
8159 static bool trans_DSB(DisasContext *s, arg_DSB *a)
8160 {
8161     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8162         return false;
8163     }
8164     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8165     return true;
8166 }
8167
8168 static bool trans_DMB(DisasContext *s, arg_DMB *a)
8169 {
8170     return trans_DSB(s, NULL);
8171 }
8172
8173 static bool trans_ISB(DisasContext *s, arg_ISB *a)
8174 {
8175     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8176         return false;
8177     }
8178     /*
8179      * We need to break the TB after this insn to execute
8180      * self-modifying code correctly and also to take
8181      * any pending interrupts immediately.
8182      */
8183     gen_goto_tb(s, 0, s->base.pc_next);
8184     return true;
8185 }
8186
8187 static bool trans_SB(DisasContext *s, arg_SB *a)
8188 {
8189     if (!dc_isar_feature(aa32_sb, s)) {
8190         return false;
8191     }
8192     /*
8193      * TODO: There is no speculation barrier opcode
8194      * for TCG; MB and end the TB instead.
8195      */
8196     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8197     gen_goto_tb(s, 0, s->base.pc_next);
8198     return true;
8199 }
8200
8201 static bool trans_SETEND(DisasContext *s, arg_SETEND *a)
8202 {
8203     if (!ENABLE_ARCH_6) {
8204         return false;
8205     }
8206     if (a->E != (s->be_data == MO_BE)) {
8207         gen_helper_setend(cpu_env);
8208         s->base.is_jmp = DISAS_UPDATE_EXIT;
8209     }
8210     return true;
8211 }
8212
8213 /*
8214  * Preload instructions
8215  * All are nops, contingent on the appropriate arch level.
8216  */
8217
8218 static bool trans_PLD(DisasContext *s, arg_PLD *a)
8219 {
8220     return ENABLE_ARCH_5TE;
8221 }
8222
8223 static bool trans_PLDW(DisasContext *s, arg_PLD *a)
8224 {
8225     return arm_dc_feature(s, ARM_FEATURE_V7MP);
8226 }
8227
8228 static bool trans_PLI(DisasContext *s, arg_PLD *a)
8229 {
8230     return ENABLE_ARCH_7;
8231 }
8232
8233 /*
8234  * If-then
8235  */
8236
8237 static bool trans_IT(DisasContext *s, arg_IT *a)
8238 {
8239     int cond_mask = a->cond_mask;
8240
8241     /*
8242      * No actual code generated for this insn, just setup state.
8243      *
8244      * Combinations of firstcond and mask which set up an 0b1111
8245      * condition are UNPREDICTABLE; we take the CONSTRAINED
8246      * UNPREDICTABLE choice to treat 0b1111 the same as 0b1110,
8247      * i.e. both meaning "execute always".
8248      */
8249     s->condexec_cond = (cond_mask >> 4) & 0xe;
8250     s->condexec_mask = cond_mask & 0x1f;
8251     return true;
8252 }
8253
8254 /*
8255  * Legacy decoder.
8256  */
8257
8258 static void disas_arm_insn(DisasContext *s, unsigned int insn)
8259 {
8260     unsigned int cond = insn >> 28;
8261
8262     /* M variants do not implement ARM mode; this must raise the INVSTATE
8263      * UsageFault exception.
8264      */
8265     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8266         gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
8267                            default_exception_el(s));
8268         return;
8269     }
8270
8271     if (cond == 0xf) {
8272         /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
8273          * choose to UNDEF. In ARMv5 and above the space is used
8274          * for miscellaneous unconditional instructions.
8275          */
8276         if (!arm_dc_feature(s, ARM_FEATURE_V5)) {
8277             unallocated_encoding(s);
8278             return;
8279         }
8280
8281         /* Unconditional instructions.  */
8282         /* TODO: Perhaps merge these into one decodetree output file.  */
8283         if (disas_a32_uncond(s, insn) ||
8284             disas_vfp_uncond(s, insn) ||
8285             disas_neon_dp(s, insn) ||
8286             disas_neon_ls(s, insn) ||
8287             disas_neon_shared(s, insn)) {
8288             return;
8289         }
8290         /* fall back to legacy decoder */
8291
8292         if ((insn & 0x0e000f00) == 0x0c000100) {
8293             if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
8294                 /* iWMMXt register transfer.  */
8295                 if (extract32(s->c15_cpar, 1, 1)) {
8296                     if (!disas_iwmmxt_insn(s, insn)) {
8297                         return;
8298                     }
8299                 }
8300             }
8301         }
8302         goto illegal_op;
8303     }
8304     if (cond != 0xe) {
8305         /* if not always execute, we generate a conditional jump to
8306            next instruction */
8307         arm_skip_unless(s, cond);
8308     }
8309
8310     /* TODO: Perhaps merge these into one decodetree output file.  */
8311     if (disas_a32(s, insn) ||
8312         disas_vfp(s, insn)) {
8313         return;
8314     }
8315     /* fall back to legacy decoder */
8316     /* TODO: convert xscale/iwmmxt decoder to decodetree ?? */
8317     if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
8318         if (((insn & 0x0c000e00) == 0x0c000000)
8319             && ((insn & 0x03000000) != 0x03000000)) {
8320             /* Coprocessor insn, coprocessor 0 or 1 */
8321             disas_xscale_insn(s, insn);
8322             return;
8323         }
8324     }
8325
8326 illegal_op:
8327     unallocated_encoding(s);
8328 }
8329
8330 static bool thumb_insn_is_16bit(DisasContext *s, uint32_t pc, uint32_t insn)
8331 {
8332     /*
8333      * Return true if this is a 16 bit instruction. We must be precise
8334      * about this (matching the decode).
8335      */
8336     if ((insn >> 11) < 0x1d) {
8337         /* Definitely a 16-bit instruction */
8338         return true;
8339     }
8340
8341     /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
8342      * first half of a 32-bit Thumb insn. Thumb-1 cores might
8343      * end up actually treating this as two 16-bit insns, though,
8344      * if it's half of a bl/blx pair that might span a page boundary.
8345      */
8346     if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
8347         arm_dc_feature(s, ARM_FEATURE_M)) {
8348         /* Thumb2 cores (including all M profile ones) always treat
8349          * 32-bit insns as 32-bit.
8350          */
8351         return false;
8352     }
8353
8354     if ((insn >> 11) == 0x1e && pc - s->page_start < TARGET_PAGE_SIZE - 3) {
8355         /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
8356          * is not on the next page; we merge this into a 32-bit
8357          * insn.
8358          */
8359         return false;
8360     }
8361     /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
8362      * 0b1111_1xxx_xxxx_xxxx : BL suffix;
8363      * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
8364      *  -- handle as single 16 bit insn
8365      */
8366     return true;
8367 }
8368
8369 /* Translate a 32-bit thumb instruction. */
8370 static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
8371 {
8372     /*
8373      * ARMv6-M supports a limited subset of Thumb2 instructions.
8374      * Other Thumb1 architectures allow only 32-bit
8375      * combined BL/BLX prefix and suffix.
8376      */
8377     if (arm_dc_feature(s, ARM_FEATURE_M) &&
8378         !arm_dc_feature(s, ARM_FEATURE_V7)) {
8379         int i;
8380         bool found = false;
8381         static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
8382                                                0xf3b08040 /* dsb */,
8383                                                0xf3b08050 /* dmb */,
8384                                                0xf3b08060 /* isb */,
8385                                                0xf3e08000 /* mrs */,
8386                                                0xf000d000 /* bl */};
8387         static const uint32_t armv6m_mask[] = {0xffe0d000,
8388                                                0xfff0d0f0,
8389                                                0xfff0d0f0,
8390                                                0xfff0d0f0,
8391                                                0xffe0d000,
8392                                                0xf800d000};
8393
8394         for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
8395             if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
8396                 found = true;
8397                 break;
8398             }
8399         }
8400         if (!found) {
8401             goto illegal_op;
8402         }
8403     } else if ((insn & 0xf800e800) != 0xf000e800)  {
8404         if (!arm_dc_feature(s, ARM_FEATURE_THUMB2)) {
8405             unallocated_encoding(s);
8406             return;
8407         }
8408     }
8409
8410     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8411         /*
8412          * NOCP takes precedence over any UNDEF for (almost) the
8413          * entire wide range of coprocessor-space encodings, so check
8414          * for it first before proceeding to actually decode eg VFP
8415          * insns. This decode also handles the few insns which are
8416          * in copro space but do not have NOCP checks (eg VLLDM, VLSTM).
8417          */
8418         if (disas_m_nocp(s, insn)) {
8419             return;
8420         }
8421     }
8422
8423     if ((insn & 0xef000000) == 0xef000000) {
8424         /*
8425          * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
8426          * transform into
8427          * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
8428          */
8429         uint32_t a32_insn = (insn & 0xe2ffffff) |
8430             ((insn & (1 << 28)) >> 4) | (1 << 28);
8431
8432         if (disas_neon_dp(s, a32_insn)) {
8433             return;
8434         }
8435     }
8436
8437     if ((insn & 0xff100000) == 0xf9000000) {
8438         /*
8439          * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
8440          * transform into
8441          * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
8442          */
8443         uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000;
8444
8445         if (disas_neon_ls(s, a32_insn)) {
8446             return;
8447         }
8448     }
8449
8450     /*
8451      * TODO: Perhaps merge these into one decodetree output file.
8452      * Note disas_vfp is written for a32 with cond field in the
8453      * top nibble.  The t32 encoding requires 0xe in the top nibble.
8454      */
8455     if (disas_t32(s, insn) ||
8456         disas_vfp_uncond(s, insn) ||
8457         disas_neon_shared(s, insn) ||
8458         ((insn >> 28) == 0xe && disas_vfp(s, insn))) {
8459         return;
8460     }
8461
8462 illegal_op:
8463     unallocated_encoding(s);
8464 }
8465
8466 static void disas_thumb_insn(DisasContext *s, uint32_t insn)
8467 {
8468     if (!disas_t16(s, insn)) {
8469         unallocated_encoding(s);
8470     }
8471 }
8472
8473 static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
8474 {
8475     /* Return true if the insn at dc->base.pc_next might cross a page boundary.
8476      * (False positives are OK, false negatives are not.)
8477      * We know this is a Thumb insn, and our caller ensures we are
8478      * only called if dc->base.pc_next is less than 4 bytes from the page
8479      * boundary, so we cross the page if the first 16 bits indicate
8480      * that this is a 32 bit insn.
8481      */
8482     uint16_t insn = arm_lduw_code(env, s->base.pc_next, s->sctlr_b);
8483
8484     return !thumb_insn_is_16bit(s, s->base.pc_next, insn);
8485 }
8486
8487 static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
8488 {
8489     DisasContext *dc = container_of(dcbase, DisasContext, base);
8490     CPUARMState *env = cs->env_ptr;
8491     ARMCPU *cpu = env_archcpu(env);
8492     uint32_t tb_flags = dc->base.tb->flags;
8493     uint32_t condexec, core_mmu_idx;
8494
8495     dc->isar = &cpu->isar;
8496     dc->condjmp = 0;
8497
8498     dc->aarch64 = 0;
8499     /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
8500      * there is no secure EL1, so we route exceptions to EL3.
8501      */
8502     dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
8503                                !arm_el_is_aa64(env, 3);
8504     dc->thumb = FIELD_EX32(tb_flags, TBFLAG_AM32, THUMB);
8505     dc->be_data = FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
8506     condexec = FIELD_EX32(tb_flags, TBFLAG_AM32, CONDEXEC);
8507     dc->condexec_mask = (condexec & 0xf) << 1;
8508     dc->condexec_cond = condexec >> 4;
8509
8510     core_mmu_idx = FIELD_EX32(tb_flags, TBFLAG_ANY, MMUIDX);
8511     dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
8512     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
8513 #if !defined(CONFIG_USER_ONLY)
8514     dc->user = (dc->current_el == 0);
8515 #endif
8516     dc->fp_excp_el = FIELD_EX32(tb_flags, TBFLAG_ANY, FPEXC_EL);
8517
8518     if (arm_feature(env, ARM_FEATURE_M)) {
8519         dc->vfp_enabled = 1;
8520         dc->be_data = MO_TE;
8521         dc->v7m_handler_mode = FIELD_EX32(tb_flags, TBFLAG_M32, HANDLER);
8522         dc->v8m_secure = arm_feature(env, ARM_FEATURE_M_SECURITY) &&
8523             regime_is_secure(env, dc->mmu_idx);
8524         dc->v8m_stackcheck = FIELD_EX32(tb_flags, TBFLAG_M32, STACKCHECK);
8525         dc->v8m_fpccr_s_wrong =
8526             FIELD_EX32(tb_flags, TBFLAG_M32, FPCCR_S_WRONG);
8527         dc->v7m_new_fp_ctxt_needed =
8528             FIELD_EX32(tb_flags, TBFLAG_M32, NEW_FP_CTXT_NEEDED);
8529         dc->v7m_lspact = FIELD_EX32(tb_flags, TBFLAG_M32, LSPACT);
8530     } else {
8531         dc->be_data =
8532             FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
8533         dc->debug_target_el =
8534             FIELD_EX32(tb_flags, TBFLAG_ANY, DEBUG_TARGET_EL);
8535         dc->sctlr_b = FIELD_EX32(tb_flags, TBFLAG_A32, SCTLR_B);
8536         dc->hstr_active = FIELD_EX32(tb_flags, TBFLAG_A32, HSTR_ACTIVE);
8537         dc->ns = FIELD_EX32(tb_flags, TBFLAG_A32, NS);
8538         dc->vfp_enabled = FIELD_EX32(tb_flags, TBFLAG_A32, VFPEN);
8539         if (arm_feature(env, ARM_FEATURE_XSCALE)) {
8540             dc->c15_cpar = FIELD_EX32(tb_flags, TBFLAG_A32, XSCALE_CPAR);
8541         } else {
8542             dc->vec_len = FIELD_EX32(tb_flags, TBFLAG_A32, VECLEN);
8543             dc->vec_stride = FIELD_EX32(tb_flags, TBFLAG_A32, VECSTRIDE);
8544         }
8545     }
8546     dc->cp_regs = cpu->cp_regs;
8547     dc->features = env->features;
8548
8549     /* Single step state. The code-generation logic here is:
8550      *  SS_ACTIVE == 0:
8551      *   generate code with no special handling for single-stepping (except
8552      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
8553      *   this happens anyway because those changes are all system register or
8554      *   PSTATE writes).
8555      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
8556      *   emit code for one insn
8557      *   emit code to clear PSTATE.SS
8558      *   emit code to generate software step exception for completed step
8559      *   end TB (as usual for having generated an exception)
8560      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
8561      *   emit code to generate a software step exception
8562      *   end the TB
8563      */
8564     dc->ss_active = FIELD_EX32(tb_flags, TBFLAG_ANY, SS_ACTIVE);
8565     dc->pstate_ss = FIELD_EX32(tb_flags, TBFLAG_ANY, PSTATE_SS);
8566     dc->is_ldex = false;
8567
8568     dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
8569
8570     /* If architectural single step active, limit to 1.  */
8571     if (is_singlestepping(dc)) {
8572         dc->base.max_insns = 1;
8573     }
8574
8575     /* ARM is a fixed-length ISA.  Bound the number of insns to execute
8576        to those left on the page.  */
8577     if (!dc->thumb) {
8578         int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
8579         dc->base.max_insns = MIN(dc->base.max_insns, bound);
8580     }
8581
8582     cpu_V0 = tcg_temp_new_i64();
8583     cpu_V1 = tcg_temp_new_i64();
8584     /* FIXME: cpu_M0 can probably be the same as cpu_V0.  */
8585     cpu_M0 = tcg_temp_new_i64();
8586 }
8587
8588 static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
8589 {
8590     DisasContext *dc = container_of(dcbase, DisasContext, base);
8591
8592     /* A note on handling of the condexec (IT) bits:
8593      *
8594      * We want to avoid the overhead of having to write the updated condexec
8595      * bits back to the CPUARMState for every instruction in an IT block. So:
8596      * (1) if the condexec bits are not already zero then we write
8597      * zero back into the CPUARMState now. This avoids complications trying
8598      * to do it at the end of the block. (For example if we don't do this
8599      * it's hard to identify whether we can safely skip writing condexec
8600      * at the end of the TB, which we definitely want to do for the case
8601      * where a TB doesn't do anything with the IT state at all.)
8602      * (2) if we are going to leave the TB then we call gen_set_condexec()
8603      * which will write the correct value into CPUARMState if zero is wrong.
8604      * This is done both for leaving the TB at the end, and for leaving
8605      * it because of an exception we know will happen, which is done in
8606      * gen_exception_insn(). The latter is necessary because we need to
8607      * leave the TB with the PC/IT state just prior to execution of the
8608      * instruction which caused the exception.
8609      * (3) if we leave the TB unexpectedly (eg a data abort on a load)
8610      * then the CPUARMState will be wrong and we need to reset it.
8611      * This is handled in the same way as restoration of the
8612      * PC in these situations; we save the value of the condexec bits
8613      * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
8614      * then uses this to restore them after an exception.
8615      *
8616      * Note that there are no instructions which can read the condexec
8617      * bits, and none which can write non-static values to them, so
8618      * we don't need to care about whether CPUARMState is correct in the
8619      * middle of a TB.
8620      */
8621
8622     /* Reset the conditional execution bits immediately. This avoids
8623        complications trying to do it at the end of the block.  */
8624     if (dc->condexec_mask || dc->condexec_cond) {
8625         TCGv_i32 tmp = tcg_temp_new_i32();
8626         tcg_gen_movi_i32(tmp, 0);
8627         store_cpu_field(tmp, condexec_bits);
8628     }
8629 }
8630
8631 static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
8632 {
8633     DisasContext *dc = container_of(dcbase, DisasContext, base);
8634
8635     tcg_gen_insn_start(dc->base.pc_next,
8636                        (dc->condexec_cond << 4) | (dc->condexec_mask >> 1),
8637                        0);
8638     dc->insn_start = tcg_last_op();
8639 }
8640
8641 static bool arm_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
8642                                     const CPUBreakpoint *bp)
8643 {
8644     DisasContext *dc = container_of(dcbase, DisasContext, base);
8645
8646     if (bp->flags & BP_CPU) {
8647         gen_set_condexec(dc);
8648         gen_set_pc_im(dc, dc->base.pc_next);
8649         gen_helper_check_breakpoints(cpu_env);
8650         /* End the TB early; it's likely not going to be executed */
8651         dc->base.is_jmp = DISAS_TOO_MANY;
8652     } else {
8653         gen_exception_internal_insn(dc, dc->base.pc_next, EXCP_DEBUG);
8654         /* The address covered by the breakpoint must be
8655            included in [tb->pc, tb->pc + tb->size) in order
8656            to for it to be properly cleared -- thus we
8657            increment the PC here so that the logic setting
8658            tb->size below does the right thing.  */
8659         /* TODO: Advance PC by correct instruction length to
8660          * avoid disassembler error messages */
8661         dc->base.pc_next += 2;
8662         dc->base.is_jmp = DISAS_NORETURN;
8663     }
8664
8665     return true;
8666 }
8667
8668 static bool arm_pre_translate_insn(DisasContext *dc)
8669 {
8670 #ifdef CONFIG_USER_ONLY
8671     /* Intercept jump to the magic kernel page.  */
8672     if (dc->base.pc_next >= 0xffff0000) {
8673         /* We always get here via a jump, so know we are not in a
8674            conditional execution block.  */
8675         gen_exception_internal(EXCP_KERNEL_TRAP);
8676         dc->base.is_jmp = DISAS_NORETURN;
8677         return true;
8678     }
8679 #endif
8680
8681     if (dc->ss_active && !dc->pstate_ss) {
8682         /* Singlestep state is Active-pending.
8683          * If we're in this state at the start of a TB then either
8684          *  a) we just took an exception to an EL which is being debugged
8685          *     and this is the first insn in the exception handler
8686          *  b) debug exceptions were masked and we just unmasked them
8687          *     without changing EL (eg by clearing PSTATE.D)
8688          * In either case we're going to take a swstep exception in the
8689          * "did not step an insn" case, and so the syndrome ISV and EX
8690          * bits should be zero.
8691          */
8692         assert(dc->base.num_insns == 1);
8693         gen_swstep_exception(dc, 0, 0);
8694         dc->base.is_jmp = DISAS_NORETURN;
8695         return true;
8696     }
8697
8698     return false;
8699 }
8700
8701 static void arm_post_translate_insn(DisasContext *dc)
8702 {
8703     if (dc->condjmp && !dc->base.is_jmp) {
8704         gen_set_label(dc->condlabel);
8705         dc->condjmp = 0;
8706     }
8707     translator_loop_temp_check(&dc->base);
8708 }
8709
8710 static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
8711 {
8712     DisasContext *dc = container_of(dcbase, DisasContext, base);
8713     CPUARMState *env = cpu->env_ptr;
8714     unsigned int insn;
8715
8716     if (arm_pre_translate_insn(dc)) {
8717         return;
8718     }
8719
8720     dc->pc_curr = dc->base.pc_next;
8721     insn = arm_ldl_code(env, dc->base.pc_next, dc->sctlr_b);
8722     dc->insn = insn;
8723     dc->base.pc_next += 4;
8724     disas_arm_insn(dc, insn);
8725
8726     arm_post_translate_insn(dc);
8727
8728     /* ARM is a fixed-length ISA.  We performed the cross-page check
8729        in init_disas_context by adjusting max_insns.  */
8730 }
8731
8732 static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
8733 {
8734     /* Return true if this Thumb insn is always unconditional,
8735      * even inside an IT block. This is true of only a very few
8736      * instructions: BKPT, HLT, and SG.
8737      *
8738      * A larger class of instructions are UNPREDICTABLE if used
8739      * inside an IT block; we do not need to detect those here, because
8740      * what we do by default (perform the cc check and update the IT
8741      * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
8742      * choice for those situations.
8743      *
8744      * insn is either a 16-bit or a 32-bit instruction; the two are
8745      * distinguishable because for the 16-bit case the top 16 bits
8746      * are zeroes, and that isn't a valid 32-bit encoding.
8747      */
8748     if ((insn & 0xffffff00) == 0xbe00) {
8749         /* BKPT */
8750         return true;
8751     }
8752
8753     if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
8754         !arm_dc_feature(s, ARM_FEATURE_M)) {
8755         /* HLT: v8A only. This is unconditional even when it is going to
8756          * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
8757          * For v7 cores this was a plain old undefined encoding and so
8758          * honours its cc check. (We might be using the encoding as
8759          * a semihosting trap, but we don't change the cc check behaviour
8760          * on that account, because a debugger connected to a real v7A
8761          * core and emulating semihosting traps by catching the UNDEF
8762          * exception would also only see cases where the cc check passed.
8763          * No guest code should be trying to do a HLT semihosting trap
8764          * in an IT block anyway.
8765          */
8766         return true;
8767     }
8768
8769     if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
8770         arm_dc_feature(s, ARM_FEATURE_M)) {
8771         /* SG: v8M only */
8772         return true;
8773     }
8774
8775     return false;
8776 }
8777
8778 static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
8779 {
8780     DisasContext *dc = container_of(dcbase, DisasContext, base);
8781     CPUARMState *env = cpu->env_ptr;
8782     uint32_t insn;
8783     bool is_16bit;
8784
8785     if (arm_pre_translate_insn(dc)) {
8786         return;
8787     }
8788
8789     dc->pc_curr = dc->base.pc_next;
8790     insn = arm_lduw_code(env, dc->base.pc_next, dc->sctlr_b);
8791     is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn);
8792     dc->base.pc_next += 2;
8793     if (!is_16bit) {
8794         uint32_t insn2 = arm_lduw_code(env, dc->base.pc_next, dc->sctlr_b);
8795
8796         insn = insn << 16 | insn2;
8797         dc->base.pc_next += 2;
8798     }
8799     dc->insn = insn;
8800
8801     if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
8802         uint32_t cond = dc->condexec_cond;
8803
8804         /*
8805          * Conditionally skip the insn. Note that both 0xe and 0xf mean
8806          * "always"; 0xf is not "never".
8807          */
8808         if (cond < 0x0e) {
8809             arm_skip_unless(dc, cond);
8810         }
8811     }
8812
8813     if (is_16bit) {
8814         disas_thumb_insn(dc, insn);
8815     } else {
8816         disas_thumb2_insn(dc, insn);
8817     }
8818
8819     /* Advance the Thumb condexec condition.  */
8820     if (dc->condexec_mask) {
8821         dc->condexec_cond = ((dc->condexec_cond & 0xe) |
8822                              ((dc->condexec_mask >> 4) & 1));
8823         dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
8824         if (dc->condexec_mask == 0) {
8825             dc->condexec_cond = 0;
8826         }
8827     }
8828
8829     arm_post_translate_insn(dc);
8830
8831     /* Thumb is a variable-length ISA.  Stop translation when the next insn
8832      * will touch a new page.  This ensures that prefetch aborts occur at
8833      * the right place.
8834      *
8835      * We want to stop the TB if the next insn starts in a new page,
8836      * or if it spans between this page and the next. This means that
8837      * if we're looking at the last halfword in the page we need to
8838      * see if it's a 16-bit Thumb insn (which will fit in this TB)
8839      * or a 32-bit Thumb insn (which won't).
8840      * This is to avoid generating a silly TB with a single 16-bit insn
8841      * in it at the end of this page (which would execute correctly
8842      * but isn't very efficient).
8843      */
8844     if (dc->base.is_jmp == DISAS_NEXT
8845         && (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE
8846             || (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE - 3
8847                 && insn_crosses_page(env, dc)))) {
8848         dc->base.is_jmp = DISAS_TOO_MANY;
8849     }
8850 }
8851
8852 static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
8853 {
8854     DisasContext *dc = container_of(dcbase, DisasContext, base);
8855
8856     if (tb_cflags(dc->base.tb) & CF_LAST_IO && dc->condjmp) {
8857         /* FIXME: This can theoretically happen with self-modifying code. */
8858         cpu_abort(cpu, "IO on conditional branch instruction");
8859     }
8860
8861     /* At this stage dc->condjmp will only be set when the skipped
8862        instruction was a conditional branch or trap, and the PC has
8863        already been written.  */
8864     gen_set_condexec(dc);
8865     if (dc->base.is_jmp == DISAS_BX_EXCRET) {
8866         /* Exception return branches need some special case code at the
8867          * end of the TB, which is complex enough that it has to
8868          * handle the single-step vs not and the condition-failed
8869          * insn codepath itself.
8870          */
8871         gen_bx_excret_final_code(dc);
8872     } else if (unlikely(is_singlestepping(dc))) {
8873         /* Unconditional and "condition passed" instruction codepath. */
8874         switch (dc->base.is_jmp) {
8875         case DISAS_SWI:
8876             gen_ss_advance(dc);
8877             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
8878                           default_exception_el(dc));
8879             break;
8880         case DISAS_HVC:
8881             gen_ss_advance(dc);
8882             gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
8883             break;
8884         case DISAS_SMC:
8885             gen_ss_advance(dc);
8886             gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
8887             break;
8888         case DISAS_NEXT:
8889         case DISAS_TOO_MANY:
8890         case DISAS_UPDATE_EXIT:
8891         case DISAS_UPDATE_NOCHAIN:
8892             gen_set_pc_im(dc, dc->base.pc_next);
8893             /* fall through */
8894         default:
8895             /* FIXME: Single stepping a WFI insn will not halt the CPU. */
8896             gen_singlestep_exception(dc);
8897             break;
8898         case DISAS_NORETURN:
8899             break;
8900         }
8901     } else {
8902         /* While branches must always occur at the end of an IT block,
8903            there are a few other things that can cause us to terminate
8904            the TB in the middle of an IT block:
8905             - Exception generating instructions (bkpt, swi, undefined).
8906             - Page boundaries.
8907             - Hardware watchpoints.
8908            Hardware breakpoints have already been handled and skip this code.
8909          */
8910         switch(dc->base.is_jmp) {
8911         case DISAS_NEXT:
8912         case DISAS_TOO_MANY:
8913             gen_goto_tb(dc, 1, dc->base.pc_next);
8914             break;
8915         case DISAS_UPDATE_NOCHAIN:
8916             gen_set_pc_im(dc, dc->base.pc_next);
8917             /* fall through */
8918         case DISAS_JUMP:
8919             gen_goto_ptr();
8920             break;
8921         case DISAS_UPDATE_EXIT:
8922             gen_set_pc_im(dc, dc->base.pc_next);
8923             /* fall through */
8924         default:
8925             /* indicate that the hash table must be used to find the next TB */
8926             tcg_gen_exit_tb(NULL, 0);
8927             break;
8928         case DISAS_NORETURN:
8929             /* nothing more to generate */
8930             break;
8931         case DISAS_WFI:
8932         {
8933             TCGv_i32 tmp = tcg_const_i32((dc->thumb &&
8934                                           !(dc->insn & (1U << 31))) ? 2 : 4);
8935
8936             gen_helper_wfi(cpu_env, tmp);
8937             tcg_temp_free_i32(tmp);
8938             /* The helper doesn't necessarily throw an exception, but we
8939              * must go back to the main loop to check for interrupts anyway.
8940              */
8941             tcg_gen_exit_tb(NULL, 0);
8942             break;
8943         }
8944         case DISAS_WFE:
8945             gen_helper_wfe(cpu_env);
8946             break;
8947         case DISAS_YIELD:
8948             gen_helper_yield(cpu_env);
8949             break;
8950         case DISAS_SWI:
8951             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
8952                           default_exception_el(dc));
8953             break;
8954         case DISAS_HVC:
8955             gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
8956             break;
8957         case DISAS_SMC:
8958             gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
8959             break;
8960         }
8961     }
8962
8963     if (dc->condjmp) {
8964         /* "Condition failed" instruction codepath for the branch/trap insn */
8965         gen_set_label(dc->condlabel);
8966         gen_set_condexec(dc);
8967         if (unlikely(is_singlestepping(dc))) {
8968             gen_set_pc_im(dc, dc->base.pc_next);
8969             gen_singlestep_exception(dc);
8970         } else {
8971             gen_goto_tb(dc, 1, dc->base.pc_next);
8972         }
8973     }
8974 }
8975
8976 static void arm_tr_disas_log(const DisasContextBase *dcbase, CPUState *cpu)
8977 {
8978     DisasContext *dc = container_of(dcbase, DisasContext, base);
8979
8980     qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
8981     log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
8982 }
8983
8984 static const TranslatorOps arm_translator_ops = {
8985     .init_disas_context = arm_tr_init_disas_context,
8986     .tb_start           = arm_tr_tb_start,
8987     .insn_start         = arm_tr_insn_start,
8988     .breakpoint_check   = arm_tr_breakpoint_check,
8989     .translate_insn     = arm_tr_translate_insn,
8990     .tb_stop            = arm_tr_tb_stop,
8991     .disas_log          = arm_tr_disas_log,
8992 };
8993
8994 static const TranslatorOps thumb_translator_ops = {
8995     .init_disas_context = arm_tr_init_disas_context,
8996     .tb_start           = arm_tr_tb_start,
8997     .insn_start         = arm_tr_insn_start,
8998     .breakpoint_check   = arm_tr_breakpoint_check,
8999     .translate_insn     = thumb_tr_translate_insn,
9000     .tb_stop            = arm_tr_tb_stop,
9001     .disas_log          = arm_tr_disas_log,
9002 };
9003
9004 /* generate intermediate code for basic block 'tb'.  */
9005 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
9006 {
9007     DisasContext dc = { };
9008     const TranslatorOps *ops = &arm_translator_ops;
9009
9010     if (FIELD_EX32(tb->flags, TBFLAG_AM32, THUMB)) {
9011         ops = &thumb_translator_ops;
9012     }
9013 #ifdef TARGET_AARCH64
9014     if (FIELD_EX32(tb->flags, TBFLAG_ANY, AARCH64_STATE)) {
9015         ops = &aarch64_translator_ops;
9016     }
9017 #endif
9018
9019     translator_loop(ops, &dc.base, cpu, tb, max_insns);
9020 }
9021
9022 void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb,
9023                           target_ulong *data)
9024 {
9025     if (is_a64(env)) {
9026         env->pc = data[0];
9027         env->condexec_bits = 0;
9028         env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
9029     } else {
9030         env->regs[15] = data[0];
9031         env->condexec_bits = data[1];
9032         env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
9033     }
9034 }