target/arm/translate.c

   1 /*
   2  *  ARM translation
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *  Copyright (c) 2005-2007 CodeSourcery
   6  *  Copyright (c) 2007 OpenedHand, Ltd.
   7  *
   8  * This library is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2 of the License, or (at your option) any later version.
  12  *
  13  * This library is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  20  */
  21 #include "qemu/osdep.h"
  22
  23 #include "cpu.h"
  24 #include "internals.h"
  25 #include "disas/disas.h"
  26 #include "exec/exec-all.h"
  27 #include "tcg/tcg-op.h"
  28 #include "tcg/tcg-op-gvec.h"
  29 #include "qemu/log.h"
  30 #include "qemu/bitops.h"
  31 #include "arm_ldst.h"
  32 #include "hw/semihosting/semihost.h"
  33
  34 #include "exec/helper-proto.h"
  35 #include "exec/helper-gen.h"
  36
  37 #include "trace-tcg.h"
  38 #include "exec/log.h"
  39
  40
  41 #define ENABLE_ARCH_4T    arm_dc_feature(s, ARM_FEATURE_V4T)
  42 #define ENABLE_ARCH_5     arm_dc_feature(s, ARM_FEATURE_V5)
  43 /* currently all emulated v5 cores are also v5TE, so don't bother */
  44 #define ENABLE_ARCH_5TE   arm_dc_feature(s, ARM_FEATURE_V5)
  45 #define ENABLE_ARCH_5J    dc_isar_feature(aa32_jazelle, s)
  46 #define ENABLE_ARCH_6     arm_dc_feature(s, ARM_FEATURE_V6)
  47 #define ENABLE_ARCH_6K    arm_dc_feature(s, ARM_FEATURE_V6K)
  48 #define ENABLE_ARCH_6T2   arm_dc_feature(s, ARM_FEATURE_THUMB2)
  49 #define ENABLE_ARCH_7     arm_dc_feature(s, ARM_FEATURE_V7)
  50 #define ENABLE_ARCH_8     arm_dc_feature(s, ARM_FEATURE_V8)
  51
  52 #define ARCH(x) do { if (!ENABLE_ARCH_##x) goto illegal_op; } while(0)
  53
  54 #include "translate.h"
  55
  56 #if defined(CONFIG_USER_ONLY)
  57 #define IS_USER(s) 1
  58 #else
  59 #define IS_USER(s) (s->user)
  60 #endif
  61
  62 /* We reuse the same 64-bit temporaries for efficiency.  */
  63 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
  64 static TCGv_i32 cpu_R[16];
  65 TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
  66 TCGv_i64 cpu_exclusive_addr;
  67 TCGv_i64 cpu_exclusive_val;
  68
  69 #include "exec/gen-icount.h"
  70
  71 static const char * const regnames[] =
  72     { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
  73       "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
  74
  75 /* Function prototypes for gen_ functions calling Neon helpers.  */
  76 typedef void NeonGenThreeOpEnvFn(TCGv_i32, TCGv_env, TCGv_i32,
  77                                  TCGv_i32, TCGv_i32);
  78 /* Function prototypes for gen_ functions for fix point conversions */
  79 typedef void VFPGenFixPointFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
  80
  81 /* initialize TCG globals.  */
  82 void arm_translate_init(void)
  83 {
  84     int i;
  85
  86     for (i = 0; i < 16; i++) {
  87         cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
  88                                           offsetof(CPUARMState, regs[i]),
  89                                           regnames[i]);
  90     }
  91     cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF");
  92     cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF");
  93     cpu_VF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, VF), "VF");
  94     cpu_ZF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, ZF), "ZF");
  95
  96     cpu_exclusive_addr = tcg_global_mem_new_i64(cpu_env,
  97         offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
  98     cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
  99         offsetof(CPUARMState, exclusive_val), "exclusive_val");
 100
 101     a64_translate_init();
 102 }
 103
 104 /* Flags for the disas_set_da_iss info argument:
 105  * lower bits hold the Rt register number, higher bits are flags.
 106  */
 107 typedef enum ISSInfo {
 108     ISSNone = 0,
 109     ISSRegMask = 0x1f,
 110     ISSInvalid = (1 << 5),
 111     ISSIsAcqRel = (1 << 6),
 112     ISSIsWrite = (1 << 7),
 113     ISSIs16Bit = (1 << 8),
 114 } ISSInfo;
 115
 116 /* Save the syndrome information for a Data Abort */
 117 static void disas_set_da_iss(DisasContext *s, MemOp memop, ISSInfo issinfo)
 118 {
 119     uint32_t syn;
 120     int sas = memop & MO_SIZE;
 121     bool sse = memop & MO_SIGN;
 122     bool is_acqrel = issinfo & ISSIsAcqRel;
 123     bool is_write = issinfo & ISSIsWrite;
 124     bool is_16bit = issinfo & ISSIs16Bit;
 125     int srt = issinfo & ISSRegMask;
 126
 127     if (issinfo & ISSInvalid) {
 128         /* Some callsites want to conditionally provide ISS info,
 129          * eg "only if this was not a writeback"
 130          */
 131         return;
 132     }
 133
 134     if (srt == 15) {
 135         /* For AArch32, insns where the src/dest is R15 never generate
 136          * ISS information. Catching that here saves checking at all
 137          * the call sites.
 138          */
 139         return;
 140     }
 141
 142     syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
 143                                   0, 0, 0, is_write, 0, is_16bit);
 144     disas_set_insn_syndrome(s, syn);
 145 }
 146
 147 static inline int get_a32_user_mem_index(DisasContext *s)
 148 {
 149     /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
 150      * insns:
 151      *  if PL2, UNPREDICTABLE (we choose to implement as if PL0)
 152      *  otherwise, access as if at PL0.
 153      */
 154     switch (s->mmu_idx) {
 155     case ARMMMUIdx_E2:        /* this one is UNPREDICTABLE */
 156     case ARMMMUIdx_E10_0:
 157     case ARMMMUIdx_E10_1:
 158     case ARMMMUIdx_E10_1_PAN:
 159         return arm_to_core_mmu_idx(ARMMMUIdx_E10_0);
 160     case ARMMMUIdx_SE3:
 161     case ARMMMUIdx_SE10_0:
 162     case ARMMMUIdx_SE10_1:
 163     case ARMMMUIdx_SE10_1_PAN:
 164         return arm_to_core_mmu_idx(ARMMMUIdx_SE10_0);
 165     case ARMMMUIdx_MUser:
 166     case ARMMMUIdx_MPriv:
 167         return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
 168     case ARMMMUIdx_MUserNegPri:
 169     case ARMMMUIdx_MPrivNegPri:
 170         return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
 171     case ARMMMUIdx_MSUser:
 172     case ARMMMUIdx_MSPriv:
 173         return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
 174     case ARMMMUIdx_MSUserNegPri:
 175     case ARMMMUIdx_MSPrivNegPri:
 176         return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
 177     default:
 178         g_assert_not_reached();
 179     }
 180 }
 181
 182 static inline TCGv_i32 load_cpu_offset(int offset)
 183 {
 184     TCGv_i32 tmp = tcg_temp_new_i32();
 185     tcg_gen_ld_i32(tmp, cpu_env, offset);
 186     return tmp;
 187 }
 188
 189 #define load_cpu_field(name) load_cpu_offset(offsetof(CPUARMState, name))
 190
 191 static inline void store_cpu_offset(TCGv_i32 var, int offset)
 192 {
 193     tcg_gen_st_i32(var, cpu_env, offset);
 194     tcg_temp_free_i32(var);
 195 }
 196
 197 #define store_cpu_field(var, name) \
 198     store_cpu_offset(var, offsetof(CPUARMState, name))
 199
 200 /* The architectural value of PC.  */
 201 static uint32_t read_pc(DisasContext *s)
 202 {
 203     return s->pc_curr + (s->thumb ? 4 : 8);
 204 }
 205
 206 /* Set a variable to the value of a CPU register.  */
 207 static void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
 208 {
 209     if (reg == 15) {
 210         tcg_gen_movi_i32(var, read_pc(s));
 211     } else {
 212         tcg_gen_mov_i32(var, cpu_R[reg]);
 213     }
 214 }
 215
 216 /* Create a new temporary and set it to the value of a CPU register.  */
 217 static inline TCGv_i32 load_reg(DisasContext *s, int reg)
 218 {
 219     TCGv_i32 tmp = tcg_temp_new_i32();
 220     load_reg_var(s, tmp, reg);
 221     return tmp;
 222 }
 223
 224 /*
 225  * Create a new temp, REG + OFS, except PC is ALIGN(PC, 4).
 226  * This is used for load/store for which use of PC implies (literal),
 227  * or ADD that implies ADR.
 228  */
 229 static TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
 230 {
 231     TCGv_i32 tmp = tcg_temp_new_i32();
 232
 233     if (reg == 15) {
 234         tcg_gen_movi_i32(tmp, (read_pc(s) & ~3) + ofs);
 235     } else {
 236         tcg_gen_addi_i32(tmp, cpu_R[reg], ofs);
 237     }
 238     return tmp;
 239 }
 240
 241 /* Set a CPU register.  The source must be a temporary and will be
 242    marked as dead.  */
 243 static void store_reg(DisasContext *s, int reg, TCGv_i32 var)
 244 {
 245     if (reg == 15) {
 246         /* In Thumb mode, we must ignore bit 0.
 247          * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
 248          * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
 249          * We choose to ignore [1:0] in ARM mode for all architecture versions.
 250          */
 251         tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
 252         s->base.is_jmp = DISAS_JUMP;
 253     }
 254     tcg_gen_mov_i32(cpu_R[reg], var);
 255     tcg_temp_free_i32(var);
 256 }
 257
 258 /*
 259  * Variant of store_reg which applies v8M stack-limit checks before updating
 260  * SP. If the check fails this will result in an exception being taken.
 261  * We disable the stack checks for CONFIG_USER_ONLY because we have
 262  * no idea what the stack limits should be in that case.
 263  * If stack checking is not being done this just acts like store_reg().
 264  */
 265 static void store_sp_checked(DisasContext *s, TCGv_i32 var)
 266 {
 267 #ifndef CONFIG_USER_ONLY
 268     if (s->v8m_stackcheck) {
 269         gen_helper_v8m_stackcheck(cpu_env, var);
 270     }
 271 #endif
 272     store_reg(s, 13, var);
 273 }
 274
 275 /* Value extensions.  */
 276 #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
 277 #define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
 278 #define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
 279 #define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
 280
 281 #define gen_sxtb16(var) gen_helper_sxtb16(var, var)
 282 #define gen_uxtb16(var) gen_helper_uxtb16(var, var)
 283
 284
 285 static inline void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
 286 {
 287     TCGv_i32 tmp_mask = tcg_const_i32(mask);
 288     gen_helper_cpsr_write(cpu_env, var, tmp_mask);
 289     tcg_temp_free_i32(tmp_mask);
 290 }
 291 /* Set NZCV flags from the high 4 bits of var.  */
 292 #define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
 293
 294 static void gen_exception_internal(int excp)
 295 {
 296     TCGv_i32 tcg_excp = tcg_const_i32(excp);
 297
 298     assert(excp_is_internal(excp));
 299     gen_helper_exception_internal(cpu_env, tcg_excp);
 300     tcg_temp_free_i32(tcg_excp);
 301 }
 302
 303 static void gen_step_complete_exception(DisasContext *s)
 304 {
 305     /* We just completed step of an insn. Move from Active-not-pending
 306      * to Active-pending, and then also take the swstep exception.
 307      * This corresponds to making the (IMPDEF) choice to prioritize
 308      * swstep exceptions over asynchronous exceptions taken to an exception
 309      * level where debug is disabled. This choice has the advantage that
 310      * we do not need to maintain internal state corresponding to the
 311      * ISV/EX syndrome bits between completion of the step and generation
 312      * of the exception, and our syndrome information is always correct.
 313      */
 314     gen_ss_advance(s);
 315     gen_swstep_exception(s, 1, s->is_ldex);
 316     s->base.is_jmp = DISAS_NORETURN;
 317 }
 318
 319 static void gen_singlestep_exception(DisasContext *s)
 320 {
 321     /* Generate the right kind of exception for singlestep, which is
 322      * either the architectural singlestep or EXCP_DEBUG for QEMU's
 323      * gdb singlestepping.
 324      */
 325     if (s->ss_active) {
 326         gen_step_complete_exception(s);
 327     } else {
 328         gen_exception_internal(EXCP_DEBUG);
 329     }
 330 }
 331
 332 static inline bool is_singlestepping(DisasContext *s)
 333 {
 334     /* Return true if we are singlestepping either because of
 335      * architectural singlestep or QEMU gdbstub singlestep. This does
 336      * not include the command line '-singlestep' mode which is rather
 337      * misnamed as it only means "one instruction per TB" and doesn't
 338      * affect the code we generate.
 339      */
 340     return s->base.singlestep_enabled || s->ss_active;
 341 }
 342
 343 static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
 344 {
 345     TCGv_i32 tmp1 = tcg_temp_new_i32();
 346     TCGv_i32 tmp2 = tcg_temp_new_i32();
 347     tcg_gen_ext16s_i32(tmp1, a);
 348     tcg_gen_ext16s_i32(tmp2, b);
 349     tcg_gen_mul_i32(tmp1, tmp1, tmp2);
 350     tcg_temp_free_i32(tmp2);
 351     tcg_gen_sari_i32(a, a, 16);
 352     tcg_gen_sari_i32(b, b, 16);
 353     tcg_gen_mul_i32(b, b, a);
 354     tcg_gen_mov_i32(a, tmp1);
 355     tcg_temp_free_i32(tmp1);
 356 }
 357
 358 /* Byteswap each halfword.  */
 359 static void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
 360 {
 361     TCGv_i32 tmp = tcg_temp_new_i32();
 362     TCGv_i32 mask = tcg_const_i32(0x00ff00ff);
 363     tcg_gen_shri_i32(tmp, var, 8);
 364     tcg_gen_and_i32(tmp, tmp, mask);
 365     tcg_gen_and_i32(var, var, mask);
 366     tcg_gen_shli_i32(var, var, 8);
 367     tcg_gen_or_i32(dest, var, tmp);
 368     tcg_temp_free_i32(mask);
 369     tcg_temp_free_i32(tmp);
 370 }
 371
 372 /* Byteswap low halfword and sign extend.  */
 373 static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
 374 {
 375     tcg_gen_ext16u_i32(var, var);
 376     tcg_gen_bswap16_i32(var, var);
 377     tcg_gen_ext16s_i32(dest, var);
 378 }
 379
 380 /* 32x32->64 multiply.  Marks inputs as dead.  */
 381 static TCGv_i64 gen_mulu_i64_i32(TCGv_i32 a, TCGv_i32 b)
 382 {
 383     TCGv_i32 lo = tcg_temp_new_i32();
 384     TCGv_i32 hi = tcg_temp_new_i32();
 385     TCGv_i64 ret;
 386
 387     tcg_gen_mulu2_i32(lo, hi, a, b);
 388     tcg_temp_free_i32(a);
 389     tcg_temp_free_i32(b);
 390
 391     ret = tcg_temp_new_i64();
 392     tcg_gen_concat_i32_i64(ret, lo, hi);
 393     tcg_temp_free_i32(lo);
 394     tcg_temp_free_i32(hi);
 395
 396     return ret;
 397 }
 398
 399 static TCGv_i64 gen_muls_i64_i32(TCGv_i32 a, TCGv_i32 b)
 400 {
 401     TCGv_i32 lo = tcg_temp_new_i32();
 402     TCGv_i32 hi = tcg_temp_new_i32();
 403     TCGv_i64 ret;
 404
 405     tcg_gen_muls2_i32(lo, hi, a, b);
 406     tcg_temp_free_i32(a);
 407     tcg_temp_free_i32(b);
 408
 409     ret = tcg_temp_new_i64();
 410     tcg_gen_concat_i32_i64(ret, lo, hi);
 411     tcg_temp_free_i32(lo);
 412     tcg_temp_free_i32(hi);
 413
 414     return ret;
 415 }
 416
 417 /* Swap low and high halfwords.  */
 418 static void gen_swap_half(TCGv_i32 var)
 419 {
 420     tcg_gen_rotri_i32(var, var, 16);
 421 }
 422
 423 /* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
 424     tmp = (t0 ^ t1) & 0x8000;
 425     t0 &= ~0x8000;
 426     t1 &= ~0x8000;
 427     t0 = (t0 + t1) ^ tmp;
 428  */
 429
 430 static void gen_add16(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 431 {
 432     TCGv_i32 tmp = tcg_temp_new_i32();
 433     tcg_gen_xor_i32(tmp, t0, t1);
 434     tcg_gen_andi_i32(tmp, tmp, 0x8000);
 435     tcg_gen_andi_i32(t0, t0, ~0x8000);
 436     tcg_gen_andi_i32(t1, t1, ~0x8000);
 437     tcg_gen_add_i32(t0, t0, t1);
 438     tcg_gen_xor_i32(dest, t0, tmp);
 439     tcg_temp_free_i32(tmp);
 440 }
 441
 442 /* Set N and Z flags from var.  */
 443 static inline void gen_logic_CC(TCGv_i32 var)
 444 {
 445     tcg_gen_mov_i32(cpu_NF, var);
 446     tcg_gen_mov_i32(cpu_ZF, var);
 447 }
 448
 449 /* dest = T0 + T1 + CF. */
 450 static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 451 {
 452     tcg_gen_add_i32(dest, t0, t1);
 453     tcg_gen_add_i32(dest, dest, cpu_CF);
 454 }
 455
 456 /* dest = T0 - T1 + CF - 1.  */
 457 static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 458 {
 459     tcg_gen_sub_i32(dest, t0, t1);
 460     tcg_gen_add_i32(dest, dest, cpu_CF);
 461     tcg_gen_subi_i32(dest, dest, 1);
 462 }
 463
 464 /* dest = T0 + T1. Compute C, N, V and Z flags */
 465 static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 466 {
 467     TCGv_i32 tmp = tcg_temp_new_i32();
 468     tcg_gen_movi_i32(tmp, 0);
 469     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
 470     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 471     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 472     tcg_gen_xor_i32(tmp, t0, t1);
 473     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 474     tcg_temp_free_i32(tmp);
 475     tcg_gen_mov_i32(dest, cpu_NF);
 476 }
 477
 478 /* dest = T0 + T1 + CF.  Compute C, N, V and Z flags */
 479 static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 480 {
 481     TCGv_i32 tmp = tcg_temp_new_i32();
 482     if (TCG_TARGET_HAS_add2_i32) {
 483         tcg_gen_movi_i32(tmp, 0);
 484         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
 485         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
 486     } else {
 487         TCGv_i64 q0 = tcg_temp_new_i64();
 488         TCGv_i64 q1 = tcg_temp_new_i64();
 489         tcg_gen_extu_i32_i64(q0, t0);
 490         tcg_gen_extu_i32_i64(q1, t1);
 491         tcg_gen_add_i64(q0, q0, q1);
 492         tcg_gen_extu_i32_i64(q1, cpu_CF);
 493         tcg_gen_add_i64(q0, q0, q1);
 494         tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
 495         tcg_temp_free_i64(q0);
 496         tcg_temp_free_i64(q1);
 497     }
 498     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 499     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 500     tcg_gen_xor_i32(tmp, t0, t1);
 501     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 502     tcg_temp_free_i32(tmp);
 503     tcg_gen_mov_i32(dest, cpu_NF);
 504 }
 505
 506 /* dest = T0 - T1. Compute C, N, V and Z flags */
 507 static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 508 {
 509     TCGv_i32 tmp;
 510     tcg_gen_sub_i32(cpu_NF, t0, t1);
 511     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 512     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
 513     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 514     tmp = tcg_temp_new_i32();
 515     tcg_gen_xor_i32(tmp, t0, t1);
 516     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 517     tcg_temp_free_i32(tmp);
 518     tcg_gen_mov_i32(dest, cpu_NF);
 519 }
 520
 521 /* dest = T0 + ~T1 + CF.  Compute C, N, V and Z flags */
 522 static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 523 {
 524     TCGv_i32 tmp = tcg_temp_new_i32();
 525     tcg_gen_not_i32(tmp, t1);
 526     gen_adc_CC(dest, t0, tmp);
 527     tcg_temp_free_i32(tmp);
 528 }
 529
 530 #define GEN_SHIFT(name)                                               \
 531 static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)       \
 532 {                                                                     \
 533     TCGv_i32 tmp1, tmp2, tmp3;                                        \
 534     tmp1 = tcg_temp_new_i32();                                        \
 535     tcg_gen_andi_i32(tmp1, t1, 0xff);                                 \
 536     tmp2 = tcg_const_i32(0);                                          \
 537     tmp3 = tcg_const_i32(0x1f);                                       \
 538     tcg_gen_movcond_i32(TCG_COND_GTU, tmp2, tmp1, tmp3, tmp2, t0);    \
 539     tcg_temp_free_i32(tmp3);                                          \
 540     tcg_gen_andi_i32(tmp1, tmp1, 0x1f);                               \
 541     tcg_gen_##name##_i32(dest, tmp2, tmp1);                           \
 542     tcg_temp_free_i32(tmp2);                                          \
 543     tcg_temp_free_i32(tmp1);                                          \
 544 }
 545 GEN_SHIFT(shl)
 546 GEN_SHIFT(shr)
 547 #undef GEN_SHIFT
 548
 549 static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 550 {
 551     TCGv_i32 tmp1, tmp2;
 552     tmp1 = tcg_temp_new_i32();
 553     tcg_gen_andi_i32(tmp1, t1, 0xff);
 554     tmp2 = tcg_const_i32(0x1f);
 555     tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
 556     tcg_temp_free_i32(tmp2);
 557     tcg_gen_sar_i32(dest, t0, tmp1);
 558     tcg_temp_free_i32(tmp1);
 559 }
 560
 561 static void shifter_out_im(TCGv_i32 var, int shift)
 562 {
 563     tcg_gen_extract_i32(cpu_CF, var, shift, 1);
 564 }
 565
 566 /* Shift by immediate.  Includes special handling for shift == 0.  */
 567 static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
 568                                     int shift, int flags)
 569 {
 570     switch (shiftop) {
 571     case 0: /* LSL */
 572         if (shift != 0) {
 573             if (flags)
 574                 shifter_out_im(var, 32 - shift);
 575             tcg_gen_shli_i32(var, var, shift);
 576         }
 577         break;
 578     case 1: /* LSR */
 579         if (shift == 0) {
 580             if (flags) {
 581                 tcg_gen_shri_i32(cpu_CF, var, 31);
 582             }
 583             tcg_gen_movi_i32(var, 0);
 584         } else {
 585             if (flags)
 586                 shifter_out_im(var, shift - 1);
 587             tcg_gen_shri_i32(var, var, shift);
 588         }
 589         break;
 590     case 2: /* ASR */
 591         if (shift == 0)
 592             shift = 32;
 593         if (flags)
 594             shifter_out_im(var, shift - 1);
 595         if (shift == 32)
 596           shift = 31;
 597         tcg_gen_sari_i32(var, var, shift);
 598         break;
 599     case 3: /* ROR/RRX */
 600         if (shift != 0) {
 601             if (flags)
 602                 shifter_out_im(var, shift - 1);
 603             tcg_gen_rotri_i32(var, var, shift); break;
 604         } else {
 605             TCGv_i32 tmp = tcg_temp_new_i32();
 606             tcg_gen_shli_i32(tmp, cpu_CF, 31);
 607             if (flags)
 608                 shifter_out_im(var, 0);
 609             tcg_gen_shri_i32(var, var, 1);
 610             tcg_gen_or_i32(var, var, tmp);
 611             tcg_temp_free_i32(tmp);
 612         }
 613     }
 614 };
 615
 616 static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
 617                                      TCGv_i32 shift, int flags)
 618 {
 619     if (flags) {
 620         switch (shiftop) {
 621         case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
 622         case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
 623         case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
 624         case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
 625         }
 626     } else {
 627         switch (shiftop) {
 628         case 0:
 629             gen_shl(var, var, shift);
 630             break;
 631         case 1:
 632             gen_shr(var, var, shift);
 633             break;
 634         case 2:
 635             gen_sar(var, var, shift);
 636             break;
 637         case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
 638                 tcg_gen_rotr_i32(var, var, shift); break;
 639         }
 640     }
 641     tcg_temp_free_i32(shift);
 642 }
 643
 644 /*
 645  * Generate a conditional based on ARM condition code cc.
 646  * This is common between ARM and Aarch64 targets.
 647  */
 648 void arm_test_cc(DisasCompare *cmp, int cc)
 649 {
 650     TCGv_i32 value;
 651     TCGCond cond;
 652     bool global = true;
 653
 654     switch (cc) {
 655     case 0: /* eq: Z */
 656     case 1: /* ne: !Z */
 657         cond = TCG_COND_EQ;
 658         value = cpu_ZF;
 659         break;
 660
 661     case 2: /* cs: C */
 662     case 3: /* cc: !C */
 663         cond = TCG_COND_NE;
 664         value = cpu_CF;
 665         break;
 666
 667     case 4: /* mi: N */
 668     case 5: /* pl: !N */
 669         cond = TCG_COND_LT;
 670         value = cpu_NF;
 671         break;
 672
 673     case 6: /* vs: V */
 674     case 7: /* vc: !V */
 675         cond = TCG_COND_LT;
 676         value = cpu_VF;
 677         break;
 678
 679     case 8: /* hi: C && !Z */
 680     case 9: /* ls: !C || Z -> !(C && !Z) */
 681         cond = TCG_COND_NE;
 682         value = tcg_temp_new_i32();
 683         global = false;
 684         /* CF is 1 for C, so -CF is an all-bits-set mask for C;
 685            ZF is non-zero for !Z; so AND the two subexpressions.  */
 686         tcg_gen_neg_i32(value, cpu_CF);
 687         tcg_gen_and_i32(value, value, cpu_ZF);
 688         break;
 689
 690     case 10: /* ge: N == V -> N ^ V == 0 */
 691     case 11: /* lt: N != V -> N ^ V != 0 */
 692         /* Since we're only interested in the sign bit, == 0 is >= 0.  */
 693         cond = TCG_COND_GE;
 694         value = tcg_temp_new_i32();
 695         global = false;
 696         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
 697         break;
 698
 699     case 12: /* gt: !Z && N == V */
 700     case 13: /* le: Z || N != V */
 701         cond = TCG_COND_NE;
 702         value = tcg_temp_new_i32();
 703         global = false;
 704         /* (N == V) is equal to the sign bit of ~(NF ^ VF).  Propagate
 705          * the sign bit then AND with ZF to yield the result.  */
 706         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
 707         tcg_gen_sari_i32(value, value, 31);
 708         tcg_gen_andc_i32(value, cpu_ZF, value);
 709         break;
 710
 711     case 14: /* always */
 712     case 15: /* always */
 713         /* Use the ALWAYS condition, which will fold early.
 714          * It doesn't matter what we use for the value.  */
 715         cond = TCG_COND_ALWAYS;
 716         value = cpu_ZF;
 717         goto no_invert;
 718
 719     default:
 720         fprintf(stderr, "Bad condition code 0x%x\n", cc);
 721         abort();
 722     }
 723
 724     if (cc & 1) {
 725         cond = tcg_invert_cond(cond);
 726     }
 727
 728  no_invert:
 729     cmp->cond = cond;
 730     cmp->value = value;
 731     cmp->value_global = global;
 732 }
 733
 734 void arm_free_cc(DisasCompare *cmp)
 735 {
 736     if (!cmp->value_global) {
 737         tcg_temp_free_i32(cmp->value);
 738     }
 739 }
 740
 741 void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
 742 {
 743     tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
 744 }
 745
 746 void arm_gen_test_cc(int cc, TCGLabel *label)
 747 {
 748     DisasCompare cmp;
 749     arm_test_cc(&cmp, cc);
 750     arm_jump_cc(&cmp, label);
 751     arm_free_cc(&cmp);
 752 }
 753
 754 static inline void gen_set_condexec(DisasContext *s)
 755 {
 756     if (s->condexec_mask) {
 757         uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
 758         TCGv_i32 tmp = tcg_temp_new_i32();
 759         tcg_gen_movi_i32(tmp, val);
 760         store_cpu_field(tmp, condexec_bits);
 761     }
 762 }
 763
 764 static inline void gen_set_pc_im(DisasContext *s, target_ulong val)
 765 {
 766     tcg_gen_movi_i32(cpu_R[15], val);
 767 }
 768
 769 /* Set PC and Thumb state from var.  var is marked as dead.  */
 770 static inline void gen_bx(DisasContext *s, TCGv_i32 var)
 771 {
 772     s->base.is_jmp = DISAS_JUMP;
 773     tcg_gen_andi_i32(cpu_R[15], var, ~1);
 774     tcg_gen_andi_i32(var, var, 1);
 775     store_cpu_field(var, thumb);
 776 }
 777
 778 /*
 779  * Set PC and Thumb state from var. var is marked as dead.
 780  * For M-profile CPUs, include logic to detect exception-return
 781  * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
 782  * and BX reg, and no others, and happens only for code in Handler mode.
 783  * The Security Extension also requires us to check for the FNC_RETURN
 784  * which signals a function return from non-secure state; this can happen
 785  * in both Handler and Thread mode.
 786  * To avoid having to do multiple comparisons in inline generated code,
 787  * we make the check we do here loose, so it will match for EXC_RETURN
 788  * in Thread mode. For system emulation do_v7m_exception_exit() checks
 789  * for these spurious cases and returns without doing anything (giving
 790  * the same behaviour as for a branch to a non-magic address).
 791  *
 792  * In linux-user mode it is unclear what the right behaviour for an
 793  * attempted FNC_RETURN should be, because in real hardware this will go
 794  * directly to Secure code (ie not the Linux kernel) which will then treat
 795  * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN
 796  * attempt behave the way it would on a CPU without the security extension,
 797  * which is to say "like a normal branch". That means we can simply treat
 798  * all branches as normal with no magic address behaviour.
 799  */
 800 static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
 801 {
 802     /* Generate the same code here as for a simple bx, but flag via
 803      * s->base.is_jmp that we need to do the rest of the work later.
 804      */
 805     gen_bx(s, var);
 806 #ifndef CONFIG_USER_ONLY
 807     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
 808         (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
 809         s->base.is_jmp = DISAS_BX_EXCRET;
 810     }
 811 #endif
 812 }
 813
 814 static inline void gen_bx_excret_final_code(DisasContext *s)
 815 {
 816     /* Generate the code to finish possible exception return and end the TB */
 817     TCGLabel *excret_label = gen_new_label();
 818     uint32_t min_magic;
 819
 820     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
 821         /* Covers FNC_RETURN and EXC_RETURN magic */
 822         min_magic = FNC_RETURN_MIN_MAGIC;
 823     } else {
 824         /* EXC_RETURN magic only */
 825         min_magic = EXC_RETURN_MIN_MAGIC;
 826     }
 827
 828     /* Is the new PC value in the magic range indicating exception return? */
 829     tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label);
 830     /* No: end the TB as we would for a DISAS_JMP */
 831     if (is_singlestepping(s)) {
 832         gen_singlestep_exception(s);
 833     } else {
 834         tcg_gen_exit_tb(NULL, 0);
 835     }
 836     gen_set_label(excret_label);
 837     /* Yes: this is an exception return.
 838      * At this point in runtime env->regs[15] and env->thumb will hold
 839      * the exception-return magic number, which do_v7m_exception_exit()
 840      * will read. Nothing else will be able to see those values because
 841      * the cpu-exec main loop guarantees that we will always go straight
 842      * from raising the exception to the exception-handling code.
 843      *
 844      * gen_ss_advance(s) does nothing on M profile currently but
 845      * calling it is conceptually the right thing as we have executed
 846      * this instruction (compare SWI, HVC, SMC handling).
 847      */
 848     gen_ss_advance(s);
 849     gen_exception_internal(EXCP_EXCEPTION_EXIT);
 850 }
 851
 852 static inline void gen_bxns(DisasContext *s, int rm)
 853 {
 854     TCGv_i32 var = load_reg(s, rm);
 855
 856     /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
 857      * we need to sync state before calling it, but:
 858      *  - we don't need to do gen_set_pc_im() because the bxns helper will
 859      *    always set the PC itself
 860      *  - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
 861      *    unless it's outside an IT block or the last insn in an IT block,
 862      *    so we know that condexec == 0 (already set at the top of the TB)
 863      *    is correct in the non-UNPREDICTABLE cases, and we can choose
 864      *    "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
 865      */
 866     gen_helper_v7m_bxns(cpu_env, var);
 867     tcg_temp_free_i32(var);
 868     s->base.is_jmp = DISAS_EXIT;
 869 }
 870
 871 static inline void gen_blxns(DisasContext *s, int rm)
 872 {
 873     TCGv_i32 var = load_reg(s, rm);
 874
 875     /* We don't need to sync condexec state, for the same reason as bxns.
 876      * We do however need to set the PC, because the blxns helper reads it.
 877      * The blxns helper may throw an exception.
 878      */
 879     gen_set_pc_im(s, s->base.pc_next);
 880     gen_helper_v7m_blxns(cpu_env, var);
 881     tcg_temp_free_i32(var);
 882     s->base.is_jmp = DISAS_EXIT;
 883 }
 884
 885 /* Variant of store_reg which uses branch&exchange logic when storing
 886    to r15 in ARM architecture v7 and above. The source must be a temporary
 887    and will be marked as dead. */
 888 static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
 889 {
 890     if (reg == 15 && ENABLE_ARCH_7) {
 891         gen_bx(s, var);
 892     } else {
 893         store_reg(s, reg, var);
 894     }
 895 }
 896
 897 /* Variant of store_reg which uses branch&exchange logic when storing
 898  * to r15 in ARM architecture v5T and above. This is used for storing
 899  * the results of a LDR/LDM/POP into r15, and corresponds to the cases
 900  * in the ARM ARM which use the LoadWritePC() pseudocode function. */
 901 static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
 902 {
 903     if (reg == 15 && ENABLE_ARCH_5) {
 904         gen_bx_excret(s, var);
 905     } else {
 906         store_reg(s, reg, var);
 907     }
 908 }
 909
 910 #ifdef CONFIG_USER_ONLY
 911 #define IS_USER_ONLY 1
 912 #else
 913 #define IS_USER_ONLY 0
 914 #endif
 915
 916 /* Abstractions of "generate code to do a guest load/store for
 917  * AArch32", where a vaddr is always 32 bits (and is zero
 918  * extended if we're a 64 bit core) and  data is also
 919  * 32 bits unless specifically doing a 64 bit access.
 920  * These functions work like tcg_gen_qemu_{ld,st}* except
 921  * that the address argument is TCGv_i32 rather than TCGv.
 922  */
 923
 924 static inline TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
 925 {
 926     TCGv addr = tcg_temp_new();
 927     tcg_gen_extu_i32_tl(addr, a32);
 928
 929     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
 930     if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
 931         tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
 932     }
 933     return addr;
 934 }
 935
 936 static void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
 937                             int index, MemOp opc)
 938 {
 939     TCGv addr;
 940
 941     if (arm_dc_feature(s, ARM_FEATURE_M) &&
 942         !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
 943         opc |= MO_ALIGN;
 944     }
 945
 946     addr = gen_aa32_addr(s, a32, opc);
 947     tcg_gen_qemu_ld_i32(val, addr, index, opc);
 948     tcg_temp_free(addr);
 949 }
 950
 951 static void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
 952                             int index, MemOp opc)
 953 {
 954     TCGv addr;
 955
 956     if (arm_dc_feature(s, ARM_FEATURE_M) &&
 957         !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
 958         opc |= MO_ALIGN;
 959     }
 960
 961     addr = gen_aa32_addr(s, a32, opc);
 962     tcg_gen_qemu_st_i32(val, addr, index, opc);
 963     tcg_temp_free(addr);
 964 }
 965
 966 #define DO_GEN_LD(SUFF, OPC)                                             \
 967 static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val,      \
 968                                      TCGv_i32 a32, int index)            \
 969 {                                                                        \
 970     gen_aa32_ld_i32(s, val, a32, index, OPC | s->be_data);               \
 971 }
 972
 973 #define DO_GEN_ST(SUFF, OPC)                                             \
 974 static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val,      \
 975                                      TCGv_i32 a32, int index)            \
 976 {                                                                        \
 977     gen_aa32_st_i32(s, val, a32, index, OPC | s->be_data);               \
 978 }
 979
 980 static inline void gen_aa32_frob64(DisasContext *s, TCGv_i64 val)
 981 {
 982     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
 983     if (!IS_USER_ONLY && s->sctlr_b) {
 984         tcg_gen_rotri_i64(val, val, 32);
 985     }
 986 }
 987
 988 static void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
 989                             int index, MemOp opc)
 990 {
 991     TCGv addr = gen_aa32_addr(s, a32, opc);
 992     tcg_gen_qemu_ld_i64(val, addr, index, opc);
 993     gen_aa32_frob64(s, val);
 994     tcg_temp_free(addr);
 995 }
 996
 997 static inline void gen_aa32_ld64(DisasContext *s, TCGv_i64 val,
 998                                  TCGv_i32 a32, int index)
 999 {
1000     gen_aa32_ld_i64(s, val, a32, index, MO_Q | s->be_data);
1001 }
1002
1003 static void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1004                             int index, MemOp opc)
1005 {
1006     TCGv addr = gen_aa32_addr(s, a32, opc);
1007
1008     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
1009     if (!IS_USER_ONLY && s->sctlr_b) {
1010         TCGv_i64 tmp = tcg_temp_new_i64();
1011         tcg_gen_rotri_i64(tmp, val, 32);
1012         tcg_gen_qemu_st_i64(tmp, addr, index, opc);
1013         tcg_temp_free_i64(tmp);
1014     } else {
1015         tcg_gen_qemu_st_i64(val, addr, index, opc);
1016     }
1017     tcg_temp_free(addr);
1018 }
1019
1020 static inline void gen_aa32_st64(DisasContext *s, TCGv_i64 val,
1021                                  TCGv_i32 a32, int index)
1022 {
1023     gen_aa32_st_i64(s, val, a32, index, MO_Q | s->be_data);
1024 }
1025
1026 DO_GEN_LD(8u, MO_UB)
1027 DO_GEN_LD(16u, MO_UW)
1028 DO_GEN_LD(32u, MO_UL)
1029 DO_GEN_ST(8, MO_UB)
1030 DO_GEN_ST(16, MO_UW)
1031 DO_GEN_ST(32, MO_UL)
1032
1033 static inline void gen_hvc(DisasContext *s, int imm16)
1034 {
1035     /* The pre HVC helper handles cases when HVC gets trapped
1036      * as an undefined insn by runtime configuration (ie before
1037      * the insn really executes).
1038      */
1039     gen_set_pc_im(s, s->pc_curr);
1040     gen_helper_pre_hvc(cpu_env);
1041     /* Otherwise we will treat this as a real exception which
1042      * happens after execution of the insn. (The distinction matters
1043      * for the PC value reported to the exception handler and also
1044      * for single stepping.)
1045      */
1046     s->svc_imm = imm16;
1047     gen_set_pc_im(s, s->base.pc_next);
1048     s->base.is_jmp = DISAS_HVC;
1049 }
1050
1051 static inline void gen_smc(DisasContext *s)
1052 {
1053     /* As with HVC, we may take an exception either before or after
1054      * the insn executes.
1055      */
1056     TCGv_i32 tmp;
1057
1058     gen_set_pc_im(s, s->pc_curr);
1059     tmp = tcg_const_i32(syn_aa32_smc());
1060     gen_helper_pre_smc(cpu_env, tmp);
1061     tcg_temp_free_i32(tmp);
1062     gen_set_pc_im(s, s->base.pc_next);
1063     s->base.is_jmp = DISAS_SMC;
1064 }
1065
1066 static void gen_exception_internal_insn(DisasContext *s, uint32_t pc, int excp)
1067 {
1068     gen_set_condexec(s);
1069     gen_set_pc_im(s, pc);
1070     gen_exception_internal(excp);
1071     s->base.is_jmp = DISAS_NORETURN;
1072 }
1073
1074 static void gen_exception_insn(DisasContext *s, uint32_t pc, int excp,
1075                                int syn, uint32_t target_el)
1076 {
1077     gen_set_condexec(s);
1078     gen_set_pc_im(s, pc);
1079     gen_exception(excp, syn, target_el);
1080     s->base.is_jmp = DISAS_NORETURN;
1081 }
1082
1083 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn)
1084 {
1085     TCGv_i32 tcg_syn;
1086
1087     gen_set_condexec(s);
1088     gen_set_pc_im(s, s->pc_curr);
1089     tcg_syn = tcg_const_i32(syn);
1090     gen_helper_exception_bkpt_insn(cpu_env, tcg_syn);
1091     tcg_temp_free_i32(tcg_syn);
1092     s->base.is_jmp = DISAS_NORETURN;
1093 }
1094
1095 static void unallocated_encoding(DisasContext *s)
1096 {
1097     /* Unallocated and reserved encodings are uncategorized */
1098     gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(),
1099                        default_exception_el(s));
1100 }
1101
1102 /* Force a TB lookup after an instruction that changes the CPU state.  */
1103 static inline void gen_lookup_tb(DisasContext *s)
1104 {
1105     tcg_gen_movi_i32(cpu_R[15], s->base.pc_next);
1106     s->base.is_jmp = DISAS_EXIT;
1107 }
1108
1109 static inline void gen_hlt(DisasContext *s, int imm)
1110 {
1111     /* HLT. This has two purposes.
1112      * Architecturally, it is an external halting debug instruction.
1113      * Since QEMU doesn't implement external debug, we treat this as
1114      * it is required for halting debug disabled: it will UNDEF.
1115      * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
1116      * and "HLT 0xF000" is an A32 semihosting syscall. These traps
1117      * must trigger semihosting even for ARMv7 and earlier, where
1118      * HLT was an undefined encoding.
1119      * In system mode, we don't allow userspace access to
1120      * semihosting, to provide some semblance of security
1121      * (and for consistency with our 32-bit semihosting).
1122      */
1123     if (semihosting_enabled() &&
1124 #ifndef CONFIG_USER_ONLY
1125         s->current_el != 0 &&
1126 #endif
1127         (imm == (s->thumb ? 0x3c : 0xf000))) {
1128         gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
1129         return;
1130     }
1131
1132     unallocated_encoding(s);
1133 }
1134
1135 static TCGv_ptr get_fpstatus_ptr(int neon)
1136 {
1137     TCGv_ptr statusptr = tcg_temp_new_ptr();
1138     int offset;
1139     if (neon) {
1140         offset = offsetof(CPUARMState, vfp.standard_fp_status);
1141     } else {
1142         offset = offsetof(CPUARMState, vfp.fp_status);
1143     }
1144     tcg_gen_addi_ptr(statusptr, cpu_env, offset);
1145     return statusptr;
1146 }
1147
1148 static inline long vfp_reg_offset(bool dp, unsigned reg)
1149 {
1150     if (dp) {
1151         return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
1152     } else {
1153         long ofs = offsetof(CPUARMState, vfp.zregs[reg >> 2].d[(reg >> 1) & 1]);
1154         if (reg & 1) {
1155             ofs += offsetof(CPU_DoubleU, l.upper);
1156         } else {
1157             ofs += offsetof(CPU_DoubleU, l.lower);
1158         }
1159         return ofs;
1160     }
1161 }
1162
1163 /* Return the offset of a 32-bit piece of a NEON register.
1164    zero is the least significant end of the register.  */
1165 static inline long
1166 neon_reg_offset (int reg, int n)
1167 {
1168     int sreg;
1169     sreg = reg * 2 + n;
1170     return vfp_reg_offset(0, sreg);
1171 }
1172
1173 /* Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
1174  * where 0 is the least significant end of the register.
1175  */
1176 static inline long
1177 neon_element_offset(int reg, int element, MemOp size)
1178 {
1179     int element_size = 1 << size;
1180     int ofs = element * element_size;
1181 #ifdef HOST_WORDS_BIGENDIAN
1182     /* Calculate the offset assuming fully little-endian,
1183      * then XOR to account for the order of the 8-byte units.
1184      */
1185     if (element_size < 8) {
1186         ofs ^= 8 - element_size;
1187     }
1188 #endif
1189     return neon_reg_offset(reg, 0) + ofs;
1190 }
1191
1192 static TCGv_i32 neon_load_reg(int reg, int pass)
1193 {
1194     TCGv_i32 tmp = tcg_temp_new_i32();
1195     tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass));
1196     return tmp;
1197 }
1198
1199 static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop)
1200 {
1201     long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
1202
1203     switch (mop) {
1204     case MO_UB:
1205         tcg_gen_ld8u_i32(var, cpu_env, offset);
1206         break;
1207     case MO_UW:
1208         tcg_gen_ld16u_i32(var, cpu_env, offset);
1209         break;
1210     case MO_UL:
1211         tcg_gen_ld_i32(var, cpu_env, offset);
1212         break;
1213     default:
1214         g_assert_not_reached();
1215     }
1216 }
1217
1218 static void neon_load_element64(TCGv_i64 var, int reg, int ele, MemOp mop)
1219 {
1220     long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
1221
1222     switch (mop) {
1223     case MO_UB:
1224         tcg_gen_ld8u_i64(var, cpu_env, offset);
1225         break;
1226     case MO_UW:
1227         tcg_gen_ld16u_i64(var, cpu_env, offset);
1228         break;
1229     case MO_UL:
1230         tcg_gen_ld32u_i64(var, cpu_env, offset);
1231         break;
1232     case MO_Q:
1233         tcg_gen_ld_i64(var, cpu_env, offset);
1234         break;
1235     default:
1236         g_assert_not_reached();
1237     }
1238 }
1239
1240 static void neon_store_reg(int reg, int pass, TCGv_i32 var)
1241 {
1242     tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass));
1243     tcg_temp_free_i32(var);
1244 }
1245
1246 static void neon_store_element(int reg, int ele, MemOp size, TCGv_i32 var)
1247 {
1248     long offset = neon_element_offset(reg, ele, size);
1249
1250     switch (size) {
1251     case MO_8:
1252         tcg_gen_st8_i32(var, cpu_env, offset);
1253         break;
1254     case MO_16:
1255         tcg_gen_st16_i32(var, cpu_env, offset);
1256         break;
1257     case MO_32:
1258         tcg_gen_st_i32(var, cpu_env, offset);
1259         break;
1260     default:
1261         g_assert_not_reached();
1262     }
1263 }
1264
1265 static void neon_store_element64(int reg, int ele, MemOp size, TCGv_i64 var)
1266 {
1267     long offset = neon_element_offset(reg, ele, size);
1268
1269     switch (size) {
1270     case MO_8:
1271         tcg_gen_st8_i64(var, cpu_env, offset);
1272         break;
1273     case MO_16:
1274         tcg_gen_st16_i64(var, cpu_env, offset);
1275         break;
1276     case MO_32:
1277         tcg_gen_st32_i64(var, cpu_env, offset);
1278         break;
1279     case MO_64:
1280         tcg_gen_st_i64(var, cpu_env, offset);
1281         break;
1282     default:
1283         g_assert_not_reached();
1284     }
1285 }
1286
1287 static inline void neon_load_reg64(TCGv_i64 var, int reg)
1288 {
1289     tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg));
1290 }
1291
1292 static inline void neon_store_reg64(TCGv_i64 var, int reg)
1293 {
1294     tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg));
1295 }
1296
1297 static inline void neon_load_reg32(TCGv_i32 var, int reg)
1298 {
1299     tcg_gen_ld_i32(var, cpu_env, vfp_reg_offset(false, reg));
1300 }
1301
1302 static inline void neon_store_reg32(TCGv_i32 var, int reg)
1303 {
1304     tcg_gen_st_i32(var, cpu_env, vfp_reg_offset(false, reg));
1305 }
1306
1307 static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
1308 {
1309     TCGv_ptr ret = tcg_temp_new_ptr();
1310     tcg_gen_addi_ptr(ret, cpu_env, vfp_reg_offset(dp, reg));
1311     return ret;
1312 }
1313
1314 #define ARM_CP_RW_BIT   (1 << 20)
1315
1316 /* Include the VFP and Neon decoders */
1317 #include "translate-vfp.inc.c"
1318 #include "translate-neon.inc.c"
1319
1320 static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1321 {
1322     tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1323 }
1324
1325 static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1326 {
1327     tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1328 }
1329
1330 static inline TCGv_i32 iwmmxt_load_creg(int reg)
1331 {
1332     TCGv_i32 var = tcg_temp_new_i32();
1333     tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1334     return var;
1335 }
1336
1337 static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1338 {
1339     tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1340     tcg_temp_free_i32(var);
1341 }
1342
1343 static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1344 {
1345     iwmmxt_store_reg(cpu_M0, rn);
1346 }
1347
1348 static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1349 {
1350     iwmmxt_load_reg(cpu_M0, rn);
1351 }
1352
1353 static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1354 {
1355     iwmmxt_load_reg(cpu_V1, rn);
1356     tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1357 }
1358
1359 static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1360 {
1361     iwmmxt_load_reg(cpu_V1, rn);
1362     tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1363 }
1364
1365 static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1366 {
1367     iwmmxt_load_reg(cpu_V1, rn);
1368     tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1369 }
1370
1371 #define IWMMXT_OP(name) \
1372 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1373 { \
1374     iwmmxt_load_reg(cpu_V1, rn); \
1375     gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1376 }
1377
1378 #define IWMMXT_OP_ENV(name) \
1379 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1380 { \
1381     iwmmxt_load_reg(cpu_V1, rn); \
1382     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1383 }
1384
1385 #define IWMMXT_OP_ENV_SIZE(name) \
1386 IWMMXT_OP_ENV(name##b) \
1387 IWMMXT_OP_ENV(name##w) \
1388 IWMMXT_OP_ENV(name##l)
1389
1390 #define IWMMXT_OP_ENV1(name) \
1391 static inline void gen_op_iwmmxt_##name##_M0(void) \
1392 { \
1393     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1394 }
1395
1396 IWMMXT_OP(maddsq)
1397 IWMMXT_OP(madduq)
1398 IWMMXT_OP(sadb)
1399 IWMMXT_OP(sadw)
1400 IWMMXT_OP(mulslw)
1401 IWMMXT_OP(mulshw)
1402 IWMMXT_OP(mululw)
1403 IWMMXT_OP(muluhw)
1404 IWMMXT_OP(macsw)
1405 IWMMXT_OP(macuw)
1406
1407 IWMMXT_OP_ENV_SIZE(unpackl)
1408 IWMMXT_OP_ENV_SIZE(unpackh)
1409
1410 IWMMXT_OP_ENV1(unpacklub)
1411 IWMMXT_OP_ENV1(unpackluw)
1412 IWMMXT_OP_ENV1(unpacklul)
1413 IWMMXT_OP_ENV1(unpackhub)
1414 IWMMXT_OP_ENV1(unpackhuw)
1415 IWMMXT_OP_ENV1(unpackhul)
1416 IWMMXT_OP_ENV1(unpacklsb)
1417 IWMMXT_OP_ENV1(unpacklsw)
1418 IWMMXT_OP_ENV1(unpacklsl)
1419 IWMMXT_OP_ENV1(unpackhsb)
1420 IWMMXT_OP_ENV1(unpackhsw)
1421 IWMMXT_OP_ENV1(unpackhsl)
1422
1423 IWMMXT_OP_ENV_SIZE(cmpeq)
1424 IWMMXT_OP_ENV_SIZE(cmpgtu)
1425 IWMMXT_OP_ENV_SIZE(cmpgts)
1426
1427 IWMMXT_OP_ENV_SIZE(mins)
1428 IWMMXT_OP_ENV_SIZE(minu)
1429 IWMMXT_OP_ENV_SIZE(maxs)
1430 IWMMXT_OP_ENV_SIZE(maxu)
1431
1432 IWMMXT_OP_ENV_SIZE(subn)
1433 IWMMXT_OP_ENV_SIZE(addn)
1434 IWMMXT_OP_ENV_SIZE(subu)
1435 IWMMXT_OP_ENV_SIZE(addu)
1436 IWMMXT_OP_ENV_SIZE(subs)
1437 IWMMXT_OP_ENV_SIZE(adds)
1438
1439 IWMMXT_OP_ENV(avgb0)
1440 IWMMXT_OP_ENV(avgb1)
1441 IWMMXT_OP_ENV(avgw0)
1442 IWMMXT_OP_ENV(avgw1)
1443
1444 IWMMXT_OP_ENV(packuw)
1445 IWMMXT_OP_ENV(packul)
1446 IWMMXT_OP_ENV(packuq)
1447 IWMMXT_OP_ENV(packsw)
1448 IWMMXT_OP_ENV(packsl)
1449 IWMMXT_OP_ENV(packsq)
1450
1451 static void gen_op_iwmmxt_set_mup(void)
1452 {
1453     TCGv_i32 tmp;
1454     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1455     tcg_gen_ori_i32(tmp, tmp, 2);
1456     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1457 }
1458
1459 static void gen_op_iwmmxt_set_cup(void)
1460 {
1461     TCGv_i32 tmp;
1462     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1463     tcg_gen_ori_i32(tmp, tmp, 1);
1464     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1465 }
1466
1467 static void gen_op_iwmmxt_setpsr_nz(void)
1468 {
1469     TCGv_i32 tmp = tcg_temp_new_i32();
1470     gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1471     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1472 }
1473
1474 static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1475 {
1476     iwmmxt_load_reg(cpu_V1, rn);
1477     tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1478     tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1479 }
1480
1481 static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1482                                      TCGv_i32 dest)
1483 {
1484     int rd;
1485     uint32_t offset;
1486     TCGv_i32 tmp;
1487
1488     rd = (insn >> 16) & 0xf;
1489     tmp = load_reg(s, rd);
1490
1491     offset = (insn & 0xff) << ((insn >> 7) & 2);
1492     if (insn & (1 << 24)) {
1493         /* Pre indexed */
1494         if (insn & (1 << 23))
1495             tcg_gen_addi_i32(tmp, tmp, offset);
1496         else
1497             tcg_gen_addi_i32(tmp, tmp, -offset);
1498         tcg_gen_mov_i32(dest, tmp);
1499         if (insn & (1 << 21))
1500             store_reg(s, rd, tmp);
1501         else
1502             tcg_temp_free_i32(tmp);
1503     } else if (insn & (1 << 21)) {
1504         /* Post indexed */
1505         tcg_gen_mov_i32(dest, tmp);
1506         if (insn & (1 << 23))
1507             tcg_gen_addi_i32(tmp, tmp, offset);
1508         else
1509             tcg_gen_addi_i32(tmp, tmp, -offset);
1510         store_reg(s, rd, tmp);
1511     } else if (!(insn & (1 << 23)))
1512         return 1;
1513     return 0;
1514 }
1515
1516 static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1517 {
1518     int rd = (insn >> 0) & 0xf;
1519     TCGv_i32 tmp;
1520
1521     if (insn & (1 << 8)) {
1522         if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1523             return 1;
1524         } else {
1525             tmp = iwmmxt_load_creg(rd);
1526         }
1527     } else {
1528         tmp = tcg_temp_new_i32();
1529         iwmmxt_load_reg(cpu_V0, rd);
1530         tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1531     }
1532     tcg_gen_andi_i32(tmp, tmp, mask);
1533     tcg_gen_mov_i32(dest, tmp);
1534     tcg_temp_free_i32(tmp);
1535     return 0;
1536 }
1537
1538 /* Disassemble an iwMMXt instruction.  Returns nonzero if an error occurred
1539    (ie. an undefined instruction).  */
1540 static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1541 {
1542     int rd, wrd;
1543     int rdhi, rdlo, rd0, rd1, i;
1544     TCGv_i32 addr;
1545     TCGv_i32 tmp, tmp2, tmp3;
1546
1547     if ((insn & 0x0e000e00) == 0x0c000000) {
1548         if ((insn & 0x0fe00ff0) == 0x0c400000) {
1549             wrd = insn & 0xf;
1550             rdlo = (insn >> 12) & 0xf;
1551             rdhi = (insn >> 16) & 0xf;
1552             if (insn & ARM_CP_RW_BIT) {                         /* TMRRC */
1553                 iwmmxt_load_reg(cpu_V0, wrd);
1554                 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1555                 tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
1556             } else {                                    /* TMCRR */
1557                 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1558                 iwmmxt_store_reg(cpu_V0, wrd);
1559                 gen_op_iwmmxt_set_mup();
1560             }
1561             return 0;
1562         }
1563
1564         wrd = (insn >> 12) & 0xf;
1565         addr = tcg_temp_new_i32();
1566         if (gen_iwmmxt_address(s, insn, addr)) {
1567             tcg_temp_free_i32(addr);
1568             return 1;
1569         }
1570         if (insn & ARM_CP_RW_BIT) {
1571             if ((insn >> 28) == 0xf) {                  /* WLDRW wCx */
1572                 tmp = tcg_temp_new_i32();
1573                 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1574                 iwmmxt_store_creg(wrd, tmp);
1575             } else {
1576                 i = 1;
1577                 if (insn & (1 << 8)) {
1578                     if (insn & (1 << 22)) {             /* WLDRD */
1579                         gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
1580                         i = 0;
1581                     } else {                            /* WLDRW wRd */
1582                         tmp = tcg_temp_new_i32();
1583                         gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1584                     }
1585                 } else {
1586                     tmp = tcg_temp_new_i32();
1587                     if (insn & (1 << 22)) {             /* WLDRH */
1588                         gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1589                     } else {                            /* WLDRB */
1590                         gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
1591                     }
1592                 }
1593                 if (i) {
1594                     tcg_gen_extu_i32_i64(cpu_M0, tmp);
1595                     tcg_temp_free_i32(tmp);
1596                 }
1597                 gen_op_iwmmxt_movq_wRn_M0(wrd);
1598             }
1599         } else {
1600             if ((insn >> 28) == 0xf) {                  /* WSTRW wCx */
1601                 tmp = iwmmxt_load_creg(wrd);
1602                 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1603             } else {
1604                 gen_op_iwmmxt_movq_M0_wRn(wrd);
1605                 tmp = tcg_temp_new_i32();
1606                 if (insn & (1 << 8)) {
1607                     if (insn & (1 << 22)) {             /* WSTRD */
1608                         gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
1609                     } else {                            /* WSTRW wRd */
1610                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1611                         gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1612                     }
1613                 } else {
1614                     if (insn & (1 << 22)) {             /* WSTRH */
1615                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1616                         gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1617                     } else {                            /* WSTRB */
1618                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1619                         gen_aa32_st8(s, tmp, addr, get_mem_index(s));
1620                     }
1621                 }
1622             }
1623             tcg_temp_free_i32(tmp);
1624         }
1625         tcg_temp_free_i32(addr);
1626         return 0;
1627     }
1628
1629     if ((insn & 0x0f000000) != 0x0e000000)
1630         return 1;
1631
1632     switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1633     case 0x000:                                                 /* WOR */
1634         wrd = (insn >> 12) & 0xf;
1635         rd0 = (insn >> 0) & 0xf;
1636         rd1 = (insn >> 16) & 0xf;
1637         gen_op_iwmmxt_movq_M0_wRn(rd0);
1638         gen_op_iwmmxt_orq_M0_wRn(rd1);
1639         gen_op_iwmmxt_setpsr_nz();
1640         gen_op_iwmmxt_movq_wRn_M0(wrd);
1641         gen_op_iwmmxt_set_mup();
1642         gen_op_iwmmxt_set_cup();
1643         break;
1644     case 0x011:                                                 /* TMCR */
1645         if (insn & 0xf)
1646             return 1;
1647         rd = (insn >> 12) & 0xf;
1648         wrd = (insn >> 16) & 0xf;
1649         switch (wrd) {
1650         case ARM_IWMMXT_wCID:
1651         case ARM_IWMMXT_wCASF:
1652             break;
1653         case ARM_IWMMXT_wCon:
1654             gen_op_iwmmxt_set_cup();
1655             /* Fall through.  */
1656         case ARM_IWMMXT_wCSSF:
1657             tmp = iwmmxt_load_creg(wrd);
1658             tmp2 = load_reg(s, rd);
1659             tcg_gen_andc_i32(tmp, tmp, tmp2);
1660             tcg_temp_free_i32(tmp2);
1661             iwmmxt_store_creg(wrd, tmp);
1662             break;
1663         case ARM_IWMMXT_wCGR0:
1664         case ARM_IWMMXT_wCGR1:
1665         case ARM_IWMMXT_wCGR2:
1666         case ARM_IWMMXT_wCGR3:
1667             gen_op_iwmmxt_set_cup();
1668             tmp = load_reg(s, rd);
1669             iwmmxt_store_creg(wrd, tmp);
1670             break;
1671         default:
1672             return 1;
1673         }
1674         break;
1675     case 0x100:                                                 /* WXOR */
1676         wrd = (insn >> 12) & 0xf;
1677         rd0 = (insn >> 0) & 0xf;
1678         rd1 = (insn >> 16) & 0xf;
1679         gen_op_iwmmxt_movq_M0_wRn(rd0);
1680         gen_op_iwmmxt_xorq_M0_wRn(rd1);
1681         gen_op_iwmmxt_setpsr_nz();
1682         gen_op_iwmmxt_movq_wRn_M0(wrd);
1683         gen_op_iwmmxt_set_mup();
1684         gen_op_iwmmxt_set_cup();
1685         break;
1686     case 0x111:                                                 /* TMRC */
1687         if (insn & 0xf)
1688             return 1;
1689         rd = (insn >> 12) & 0xf;
1690         wrd = (insn >> 16) & 0xf;
1691         tmp = iwmmxt_load_creg(wrd);
1692         store_reg(s, rd, tmp);
1693         break;
1694     case 0x300:                                                 /* WANDN */
1695         wrd = (insn >> 12) & 0xf;
1696         rd0 = (insn >> 0) & 0xf;
1697         rd1 = (insn >> 16) & 0xf;
1698         gen_op_iwmmxt_movq_M0_wRn(rd0);
1699         tcg_gen_neg_i64(cpu_M0, cpu_M0);
1700         gen_op_iwmmxt_andq_M0_wRn(rd1);
1701         gen_op_iwmmxt_setpsr_nz();
1702         gen_op_iwmmxt_movq_wRn_M0(wrd);
1703         gen_op_iwmmxt_set_mup();
1704         gen_op_iwmmxt_set_cup();
1705         break;
1706     case 0x200:                                                 /* WAND */
1707         wrd = (insn >> 12) & 0xf;
1708         rd0 = (insn >> 0) & 0xf;
1709         rd1 = (insn >> 16) & 0xf;
1710         gen_op_iwmmxt_movq_M0_wRn(rd0);
1711         gen_op_iwmmxt_andq_M0_wRn(rd1);
1712         gen_op_iwmmxt_setpsr_nz();
1713         gen_op_iwmmxt_movq_wRn_M0(wrd);
1714         gen_op_iwmmxt_set_mup();
1715         gen_op_iwmmxt_set_cup();
1716         break;
1717     case 0x810: case 0xa10:                             /* WMADD */
1718         wrd = (insn >> 12) & 0xf;
1719         rd0 = (insn >> 0) & 0xf;
1720         rd1 = (insn >> 16) & 0xf;
1721         gen_op_iwmmxt_movq_M0_wRn(rd0);
1722         if (insn & (1 << 21))
1723             gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1724         else
1725             gen_op_iwmmxt_madduq_M0_wRn(rd1);
1726         gen_op_iwmmxt_movq_wRn_M0(wrd);
1727         gen_op_iwmmxt_set_mup();
1728         break;
1729     case 0x10e: case 0x50e: case 0x90e: case 0xd0e:     /* WUNPCKIL */
1730         wrd = (insn >> 12) & 0xf;
1731         rd0 = (insn >> 16) & 0xf;
1732         rd1 = (insn >> 0) & 0xf;
1733         gen_op_iwmmxt_movq_M0_wRn(rd0);
1734         switch ((insn >> 22) & 3) {
1735         case 0:
1736             gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1737             break;
1738         case 1:
1739             gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1740             break;
1741         case 2:
1742             gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1743             break;
1744         case 3:
1745             return 1;
1746         }
1747         gen_op_iwmmxt_movq_wRn_M0(wrd);
1748         gen_op_iwmmxt_set_mup();
1749         gen_op_iwmmxt_set_cup();
1750         break;
1751     case 0x10c: case 0x50c: case 0x90c: case 0xd0c:     /* WUNPCKIH */
1752         wrd = (insn >> 12) & 0xf;
1753         rd0 = (insn >> 16) & 0xf;
1754         rd1 = (insn >> 0) & 0xf;
1755         gen_op_iwmmxt_movq_M0_wRn(rd0);
1756         switch ((insn >> 22) & 3) {
1757         case 0:
1758             gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1759             break;
1760         case 1:
1761             gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1762             break;
1763         case 2:
1764             gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1765             break;
1766         case 3:
1767             return 1;
1768         }
1769         gen_op_iwmmxt_movq_wRn_M0(wrd);
1770         gen_op_iwmmxt_set_mup();
1771         gen_op_iwmmxt_set_cup();
1772         break;
1773     case 0x012: case 0x112: case 0x412: case 0x512:     /* WSAD */
1774         wrd = (insn >> 12) & 0xf;
1775         rd0 = (insn >> 16) & 0xf;
1776         rd1 = (insn >> 0) & 0xf;
1777         gen_op_iwmmxt_movq_M0_wRn(rd0);
1778         if (insn & (1 << 22))
1779             gen_op_iwmmxt_sadw_M0_wRn(rd1);
1780         else
1781             gen_op_iwmmxt_sadb_M0_wRn(rd1);
1782         if (!(insn & (1 << 20)))
1783             gen_op_iwmmxt_addl_M0_wRn(wrd);
1784         gen_op_iwmmxt_movq_wRn_M0(wrd);
1785         gen_op_iwmmxt_set_mup();
1786         break;
1787     case 0x010: case 0x110: case 0x210: case 0x310:     /* WMUL */
1788         wrd = (insn >> 12) & 0xf;
1789         rd0 = (insn >> 16) & 0xf;
1790         rd1 = (insn >> 0) & 0xf;
1791         gen_op_iwmmxt_movq_M0_wRn(rd0);
1792         if (insn & (1 << 21)) {
1793             if (insn & (1 << 20))
1794                 gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1795             else
1796                 gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1797         } else {
1798             if (insn & (1 << 20))
1799                 gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1800             else
1801                 gen_op_iwmmxt_mululw_M0_wRn(rd1);
1802         }
1803         gen_op_iwmmxt_movq_wRn_M0(wrd);
1804         gen_op_iwmmxt_set_mup();
1805         break;
1806     case 0x410: case 0x510: case 0x610: case 0x710:     /* WMAC */
1807         wrd = (insn >> 12) & 0xf;
1808         rd0 = (insn >> 16) & 0xf;
1809         rd1 = (insn >> 0) & 0xf;
1810         gen_op_iwmmxt_movq_M0_wRn(rd0);
1811         if (insn & (1 << 21))
1812             gen_op_iwmmxt_macsw_M0_wRn(rd1);
1813         else
1814             gen_op_iwmmxt_macuw_M0_wRn(rd1);
1815         if (!(insn & (1 << 20))) {
1816             iwmmxt_load_reg(cpu_V1, wrd);
1817             tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1818         }
1819         gen_op_iwmmxt_movq_wRn_M0(wrd);
1820         gen_op_iwmmxt_set_mup();
1821         break;
1822     case 0x006: case 0x406: case 0x806: case 0xc06:     /* WCMPEQ */
1823         wrd = (insn >> 12) & 0xf;
1824         rd0 = (insn >> 16) & 0xf;
1825         rd1 = (insn >> 0) & 0xf;
1826         gen_op_iwmmxt_movq_M0_wRn(rd0);
1827         switch ((insn >> 22) & 3) {
1828         case 0:
1829             gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1830             break;
1831         case 1:
1832             gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1833             break;
1834         case 2:
1835             gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1836             break;
1837         case 3:
1838             return 1;
1839         }
1840         gen_op_iwmmxt_movq_wRn_M0(wrd);
1841         gen_op_iwmmxt_set_mup();
1842         gen_op_iwmmxt_set_cup();
1843         break;
1844     case 0x800: case 0x900: case 0xc00: case 0xd00:     /* WAVG2 */
1845         wrd = (insn >> 12) & 0xf;
1846         rd0 = (insn >> 16) & 0xf;
1847         rd1 = (insn >> 0) & 0xf;
1848         gen_op_iwmmxt_movq_M0_wRn(rd0);
1849         if (insn & (1 << 22)) {
1850             if (insn & (1 << 20))
1851                 gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1852             else
1853                 gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1854         } else {
1855             if (insn & (1 << 20))
1856                 gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1857             else
1858                 gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1859         }
1860         gen_op_iwmmxt_movq_wRn_M0(wrd);
1861         gen_op_iwmmxt_set_mup();
1862         gen_op_iwmmxt_set_cup();
1863         break;
1864     case 0x802: case 0x902: case 0xa02: case 0xb02:     /* WALIGNR */
1865         wrd = (insn >> 12) & 0xf;
1866         rd0 = (insn >> 16) & 0xf;
1867         rd1 = (insn >> 0) & 0xf;
1868         gen_op_iwmmxt_movq_M0_wRn(rd0);
1869         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1870         tcg_gen_andi_i32(tmp, tmp, 7);
1871         iwmmxt_load_reg(cpu_V1, rd1);
1872         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1873         tcg_temp_free_i32(tmp);
1874         gen_op_iwmmxt_movq_wRn_M0(wrd);
1875         gen_op_iwmmxt_set_mup();
1876         break;
1877     case 0x601: case 0x605: case 0x609: case 0x60d:     /* TINSR */
1878         if (((insn >> 6) & 3) == 3)
1879             return 1;
1880         rd = (insn >> 12) & 0xf;
1881         wrd = (insn >> 16) & 0xf;
1882         tmp = load_reg(s, rd);
1883         gen_op_iwmmxt_movq_M0_wRn(wrd);
1884         switch ((insn >> 6) & 3) {
1885         case 0:
1886             tmp2 = tcg_const_i32(0xff);
1887             tmp3 = tcg_const_i32((insn & 7) << 3);
1888             break;
1889         case 1:
1890             tmp2 = tcg_const_i32(0xffff);
1891             tmp3 = tcg_const_i32((insn & 3) << 4);
1892             break;
1893         case 2:
1894             tmp2 = tcg_const_i32(0xffffffff);
1895             tmp3 = tcg_const_i32((insn & 1) << 5);
1896             break;
1897         default:
1898             tmp2 = NULL;
1899             tmp3 = NULL;
1900         }
1901         gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1902         tcg_temp_free_i32(tmp3);
1903         tcg_temp_free_i32(tmp2);
1904         tcg_temp_free_i32(tmp);
1905         gen_op_iwmmxt_movq_wRn_M0(wrd);
1906         gen_op_iwmmxt_set_mup();
1907         break;
1908     case 0x107: case 0x507: case 0x907: case 0xd07:     /* TEXTRM */
1909         rd = (insn >> 12) & 0xf;
1910         wrd = (insn >> 16) & 0xf;
1911         if (rd == 15 || ((insn >> 22) & 3) == 3)
1912             return 1;
1913         gen_op_iwmmxt_movq_M0_wRn(wrd);
1914         tmp = tcg_temp_new_i32();
1915         switch ((insn >> 22) & 3) {
1916         case 0:
1917             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1918             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1919             if (insn & 8) {
1920                 tcg_gen_ext8s_i32(tmp, tmp);
1921             } else {
1922                 tcg_gen_andi_i32(tmp, tmp, 0xff);
1923             }
1924             break;
1925         case 1:
1926             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1927             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1928             if (insn & 8) {
1929                 tcg_gen_ext16s_i32(tmp, tmp);
1930             } else {
1931                 tcg_gen_andi_i32(tmp, tmp, 0xffff);
1932             }
1933             break;
1934         case 2:
1935             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1936             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1937             break;
1938         }
1939         store_reg(s, rd, tmp);
1940         break;
1941     case 0x117: case 0x517: case 0x917: case 0xd17:     /* TEXTRC */
1942         if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1943             return 1;
1944         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1945         switch ((insn >> 22) & 3) {
1946         case 0:
1947             tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
1948             break;
1949         case 1:
1950             tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
1951             break;
1952         case 2:
1953             tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
1954             break;
1955         }
1956         tcg_gen_shli_i32(tmp, tmp, 28);
1957         gen_set_nzcv(tmp);
1958         tcg_temp_free_i32(tmp);
1959         break;
1960     case 0x401: case 0x405: case 0x409: case 0x40d:     /* TBCST */
1961         if (((insn >> 6) & 3) == 3)
1962             return 1;
1963         rd = (insn >> 12) & 0xf;
1964         wrd = (insn >> 16) & 0xf;
1965         tmp = load_reg(s, rd);
1966         switch ((insn >> 6) & 3) {
1967         case 0:
1968             gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
1969             break;
1970         case 1:
1971             gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
1972             break;
1973         case 2:
1974             gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
1975             break;
1976         }
1977         tcg_temp_free_i32(tmp);
1978         gen_op_iwmmxt_movq_wRn_M0(wrd);
1979         gen_op_iwmmxt_set_mup();
1980         break;
1981     case 0x113: case 0x513: case 0x913: case 0xd13:     /* TANDC */
1982         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1983             return 1;
1984         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1985         tmp2 = tcg_temp_new_i32();
1986         tcg_gen_mov_i32(tmp2, tmp);
1987         switch ((insn >> 22) & 3) {
1988         case 0:
1989             for (i = 0; i < 7; i ++) {
1990                 tcg_gen_shli_i32(tmp2, tmp2, 4);
1991                 tcg_gen_and_i32(tmp, tmp, tmp2);
1992             }
1993             break;
1994         case 1:
1995             for (i = 0; i < 3; i ++) {
1996                 tcg_gen_shli_i32(tmp2, tmp2, 8);
1997                 tcg_gen_and_i32(tmp, tmp, tmp2);
1998             }
1999             break;
2000         case 2:
2001             tcg_gen_shli_i32(tmp2, tmp2, 16);
2002             tcg_gen_and_i32(tmp, tmp, tmp2);
2003             break;
2004         }
2005         gen_set_nzcv(tmp);
2006         tcg_temp_free_i32(tmp2);
2007         tcg_temp_free_i32(tmp);
2008         break;
2009     case 0x01c: case 0x41c: case 0x81c: case 0xc1c:     /* WACC */
2010         wrd = (insn >> 12) & 0xf;
2011         rd0 = (insn >> 16) & 0xf;
2012         gen_op_iwmmxt_movq_M0_wRn(rd0);
2013         switch ((insn >> 22) & 3) {
2014         case 0:
2015             gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
2016             break;
2017         case 1:
2018             gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
2019             break;
2020         case 2:
2021             gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
2022             break;
2023         case 3:
2024             return 1;
2025         }
2026         gen_op_iwmmxt_movq_wRn_M0(wrd);
2027         gen_op_iwmmxt_set_mup();
2028         break;
2029     case 0x115: case 0x515: case 0x915: case 0xd15:     /* TORC */
2030         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
2031             return 1;
2032         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
2033         tmp2 = tcg_temp_new_i32();
2034         tcg_gen_mov_i32(tmp2, tmp);
2035         switch ((insn >> 22) & 3) {
2036         case 0:
2037             for (i = 0; i < 7; i ++) {
2038                 tcg_gen_shli_i32(tmp2, tmp2, 4);
2039                 tcg_gen_or_i32(tmp, tmp, tmp2);
2040             }
2041             break;
2042         case 1:
2043             for (i = 0; i < 3; i ++) {
2044                 tcg_gen_shli_i32(tmp2, tmp2, 8);
2045                 tcg_gen_or_i32(tmp, tmp, tmp2);
2046             }
2047             break;
2048         case 2:
2049             tcg_gen_shli_i32(tmp2, tmp2, 16);
2050             tcg_gen_or_i32(tmp, tmp, tmp2);
2051             break;
2052         }
2053         gen_set_nzcv(tmp);
2054         tcg_temp_free_i32(tmp2);
2055         tcg_temp_free_i32(tmp);
2056         break;
2057     case 0x103: case 0x503: case 0x903: case 0xd03:     /* TMOVMSK */
2058         rd = (insn >> 12) & 0xf;
2059         rd0 = (insn >> 16) & 0xf;
2060         if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
2061             return 1;
2062         gen_op_iwmmxt_movq_M0_wRn(rd0);
2063         tmp = tcg_temp_new_i32();
2064         switch ((insn >> 22) & 3) {
2065         case 0:
2066             gen_helper_iwmmxt_msbb(tmp, cpu_M0);
2067             break;
2068         case 1:
2069             gen_helper_iwmmxt_msbw(tmp, cpu_M0);
2070             break;
2071         case 2:
2072             gen_helper_iwmmxt_msbl(tmp, cpu_M0);
2073             break;
2074         }
2075         store_reg(s, rd, tmp);
2076         break;
2077     case 0x106: case 0x306: case 0x506: case 0x706:     /* WCMPGT */
2078     case 0x906: case 0xb06: case 0xd06: case 0xf06:
2079         wrd = (insn >> 12) & 0xf;
2080         rd0 = (insn >> 16) & 0xf;
2081         rd1 = (insn >> 0) & 0xf;
2082         gen_op_iwmmxt_movq_M0_wRn(rd0);
2083         switch ((insn >> 22) & 3) {
2084         case 0:
2085             if (insn & (1 << 21))
2086                 gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2087             else
2088                 gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2089             break;
2090         case 1:
2091             if (insn & (1 << 21))
2092                 gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2093             else
2094                 gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2095             break;
2096         case 2:
2097             if (insn & (1 << 21))
2098                 gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2099             else
2100                 gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2101             break;
2102         case 3:
2103             return 1;
2104         }
2105         gen_op_iwmmxt_movq_wRn_M0(wrd);
2106         gen_op_iwmmxt_set_mup();
2107         gen_op_iwmmxt_set_cup();
2108         break;
2109     case 0x00e: case 0x20e: case 0x40e: case 0x60e:     /* WUNPCKEL */
2110     case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2111         wrd = (insn >> 12) & 0xf;
2112         rd0 = (insn >> 16) & 0xf;
2113         gen_op_iwmmxt_movq_M0_wRn(rd0);
2114         switch ((insn >> 22) & 3) {
2115         case 0:
2116             if (insn & (1 << 21))
2117                 gen_op_iwmmxt_unpacklsb_M0();
2118             else
2119                 gen_op_iwmmxt_unpacklub_M0();
2120             break;
2121         case 1:
2122             if (insn & (1 << 21))
2123                 gen_op_iwmmxt_unpacklsw_M0();
2124             else
2125                 gen_op_iwmmxt_unpackluw_M0();
2126             break;
2127         case 2:
2128             if (insn & (1 << 21))
2129                 gen_op_iwmmxt_unpacklsl_M0();
2130             else
2131                 gen_op_iwmmxt_unpacklul_M0();
2132             break;
2133         case 3:
2134             return 1;
2135         }
2136         gen_op_iwmmxt_movq_wRn_M0(wrd);
2137         gen_op_iwmmxt_set_mup();
2138         gen_op_iwmmxt_set_cup();
2139         break;
2140     case 0x00c: case 0x20c: case 0x40c: case 0x60c:     /* WUNPCKEH */
2141     case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2142         wrd = (insn >> 12) & 0xf;
2143         rd0 = (insn >> 16) & 0xf;
2144         gen_op_iwmmxt_movq_M0_wRn(rd0);
2145         switch ((insn >> 22) & 3) {
2146         case 0:
2147             if (insn & (1 << 21))
2148                 gen_op_iwmmxt_unpackhsb_M0();
2149             else
2150                 gen_op_iwmmxt_unpackhub_M0();
2151             break;
2152         case 1:
2153             if (insn & (1 << 21))
2154                 gen_op_iwmmxt_unpackhsw_M0();
2155             else
2156                 gen_op_iwmmxt_unpackhuw_M0();
2157             break;
2158         case 2:
2159             if (insn & (1 << 21))
2160                 gen_op_iwmmxt_unpackhsl_M0();
2161             else
2162                 gen_op_iwmmxt_unpackhul_M0();
2163             break;
2164         case 3:
2165             return 1;
2166         }
2167         gen_op_iwmmxt_movq_wRn_M0(wrd);
2168         gen_op_iwmmxt_set_mup();
2169         gen_op_iwmmxt_set_cup();
2170         break;
2171     case 0x204: case 0x604: case 0xa04: case 0xe04:     /* WSRL */
2172     case 0x214: case 0x614: case 0xa14: case 0xe14:
2173         if (((insn >> 22) & 3) == 0)
2174             return 1;
2175         wrd = (insn >> 12) & 0xf;
2176         rd0 = (insn >> 16) & 0xf;
2177         gen_op_iwmmxt_movq_M0_wRn(rd0);
2178         tmp = tcg_temp_new_i32();
2179         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2180             tcg_temp_free_i32(tmp);
2181             return 1;
2182         }
2183         switch ((insn >> 22) & 3) {
2184         case 1:
2185             gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2186             break;
2187         case 2:
2188             gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2189             break;
2190         case 3:
2191             gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2192             break;
2193         }
2194         tcg_temp_free_i32(tmp);
2195         gen_op_iwmmxt_movq_wRn_M0(wrd);
2196         gen_op_iwmmxt_set_mup();
2197         gen_op_iwmmxt_set_cup();
2198         break;
2199     case 0x004: case 0x404: case 0x804: case 0xc04:     /* WSRA */
2200     case 0x014: case 0x414: case 0x814: case 0xc14:
2201         if (((insn >> 22) & 3) == 0)
2202             return 1;
2203         wrd = (insn >> 12) & 0xf;
2204         rd0 = (insn >> 16) & 0xf;
2205         gen_op_iwmmxt_movq_M0_wRn(rd0);
2206         tmp = tcg_temp_new_i32();
2207         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2208             tcg_temp_free_i32(tmp);
2209             return 1;
2210         }
2211         switch ((insn >> 22) & 3) {
2212         case 1:
2213             gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2214             break;
2215         case 2:
2216             gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2217             break;
2218         case 3:
2219             gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2220             break;
2221         }
2222         tcg_temp_free_i32(tmp);
2223         gen_op_iwmmxt_movq_wRn_M0(wrd);
2224         gen_op_iwmmxt_set_mup();
2225         gen_op_iwmmxt_set_cup();
2226         break;
2227     case 0x104: case 0x504: case 0x904: case 0xd04:     /* WSLL */
2228     case 0x114: case 0x514: case 0x914: case 0xd14:
2229         if (((insn >> 22) & 3) == 0)
2230             return 1;
2231         wrd = (insn >> 12) & 0xf;
2232         rd0 = (insn >> 16) & 0xf;
2233         gen_op_iwmmxt_movq_M0_wRn(rd0);
2234         tmp = tcg_temp_new_i32();
2235         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2236             tcg_temp_free_i32(tmp);
2237             return 1;
2238         }
2239         switch ((insn >> 22) & 3) {
2240         case 1:
2241             gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2242             break;
2243         case 2:
2244             gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2245             break;
2246         case 3:
2247             gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2248             break;
2249         }
2250         tcg_temp_free_i32(tmp);
2251         gen_op_iwmmxt_movq_wRn_M0(wrd);
2252         gen_op_iwmmxt_set_mup();
2253         gen_op_iwmmxt_set_cup();
2254         break;
2255     case 0x304: case 0x704: case 0xb04: case 0xf04:     /* WROR */
2256     case 0x314: case 0x714: case 0xb14: case 0xf14:
2257         if (((insn >> 22) & 3) == 0)
2258             return 1;
2259         wrd = (insn >> 12) & 0xf;
2260         rd0 = (insn >> 16) & 0xf;
2261         gen_op_iwmmxt_movq_M0_wRn(rd0);
2262         tmp = tcg_temp_new_i32();
2263         switch ((insn >> 22) & 3) {
2264         case 1:
2265             if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2266                 tcg_temp_free_i32(tmp);
2267                 return 1;
2268             }
2269             gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2270             break;
2271         case 2:
2272             if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2273                 tcg_temp_free_i32(tmp);
2274                 return 1;
2275             }
2276             gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2277             break;
2278         case 3:
2279             if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2280                 tcg_temp_free_i32(tmp);
2281                 return 1;
2282             }
2283             gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2284             break;
2285         }
2286         tcg_temp_free_i32(tmp);
2287         gen_op_iwmmxt_movq_wRn_M0(wrd);
2288         gen_op_iwmmxt_set_mup();
2289         gen_op_iwmmxt_set_cup();
2290         break;
2291     case 0x116: case 0x316: case 0x516: case 0x716:     /* WMIN */
2292     case 0x916: case 0xb16: case 0xd16: case 0xf16:
2293         wrd = (insn >> 12) & 0xf;
2294         rd0 = (insn >> 16) & 0xf;
2295         rd1 = (insn >> 0) & 0xf;
2296         gen_op_iwmmxt_movq_M0_wRn(rd0);
2297         switch ((insn >> 22) & 3) {
2298         case 0:
2299             if (insn & (1 << 21))
2300                 gen_op_iwmmxt_minsb_M0_wRn(rd1);
2301             else
2302                 gen_op_iwmmxt_minub_M0_wRn(rd1);
2303             break;
2304         case 1:
2305             if (insn & (1 << 21))
2306                 gen_op_iwmmxt_minsw_M0_wRn(rd1);
2307             else
2308                 gen_op_iwmmxt_minuw_M0_wRn(rd1);
2309             break;
2310         case 2:
2311             if (insn & (1 << 21))
2312                 gen_op_iwmmxt_minsl_M0_wRn(rd1);
2313             else
2314                 gen_op_iwmmxt_minul_M0_wRn(rd1);
2315             break;
2316         case 3:
2317             return 1;
2318         }
2319         gen_op_iwmmxt_movq_wRn_M0(wrd);
2320         gen_op_iwmmxt_set_mup();
2321         break;
2322     case 0x016: case 0x216: case 0x416: case 0x616:     /* WMAX */
2323     case 0x816: case 0xa16: case 0xc16: case 0xe16:
2324         wrd = (insn >> 12) & 0xf;
2325         rd0 = (insn >> 16) & 0xf;
2326         rd1 = (insn >> 0) & 0xf;
2327         gen_op_iwmmxt_movq_M0_wRn(rd0);
2328         switch ((insn >> 22) & 3) {
2329         case 0:
2330             if (insn & (1 << 21))
2331                 gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2332             else
2333                 gen_op_iwmmxt_maxub_M0_wRn(rd1);
2334             break;
2335         case 1:
2336             if (insn & (1 << 21))
2337                 gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2338             else
2339                 gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2340             break;
2341         case 2:
2342             if (insn & (1 << 21))
2343                 gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2344             else
2345                 gen_op_iwmmxt_maxul_M0_wRn(rd1);
2346             break;
2347         case 3:
2348             return 1;
2349         }
2350         gen_op_iwmmxt_movq_wRn_M0(wrd);
2351         gen_op_iwmmxt_set_mup();
2352         break;
2353     case 0x002: case 0x102: case 0x202: case 0x302:     /* WALIGNI */
2354     case 0x402: case 0x502: case 0x602: case 0x702:
2355         wrd = (insn >> 12) & 0xf;
2356         rd0 = (insn >> 16) & 0xf;
2357         rd1 = (insn >> 0) & 0xf;
2358         gen_op_iwmmxt_movq_M0_wRn(rd0);
2359         tmp = tcg_const_i32((insn >> 20) & 3);
2360         iwmmxt_load_reg(cpu_V1, rd1);
2361         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
2362         tcg_temp_free_i32(tmp);
2363         gen_op_iwmmxt_movq_wRn_M0(wrd);
2364         gen_op_iwmmxt_set_mup();
2365         break;
2366     case 0x01a: case 0x11a: case 0x21a: case 0x31a:     /* WSUB */
2367     case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2368     case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2369     case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2370         wrd = (insn >> 12) & 0xf;
2371         rd0 = (insn >> 16) & 0xf;
2372         rd1 = (insn >> 0) & 0xf;
2373         gen_op_iwmmxt_movq_M0_wRn(rd0);
2374         switch ((insn >> 20) & 0xf) {
2375         case 0x0:
2376             gen_op_iwmmxt_subnb_M0_wRn(rd1);
2377             break;
2378         case 0x1:
2379             gen_op_iwmmxt_subub_M0_wRn(rd1);
2380             break;
2381         case 0x3:
2382             gen_op_iwmmxt_subsb_M0_wRn(rd1);
2383             break;
2384         case 0x4:
2385             gen_op_iwmmxt_subnw_M0_wRn(rd1);
2386             break;
2387         case 0x5:
2388             gen_op_iwmmxt_subuw_M0_wRn(rd1);
2389             break;
2390         case 0x7:
2391             gen_op_iwmmxt_subsw_M0_wRn(rd1);
2392             break;
2393         case 0x8:
2394             gen_op_iwmmxt_subnl_M0_wRn(rd1);
2395             break;
2396         case 0x9:
2397             gen_op_iwmmxt_subul_M0_wRn(rd1);
2398             break;
2399         case 0xb:
2400             gen_op_iwmmxt_subsl_M0_wRn(rd1);
2401             break;
2402         default:
2403             return 1;
2404         }
2405         gen_op_iwmmxt_movq_wRn_M0(wrd);
2406         gen_op_iwmmxt_set_mup();
2407         gen_op_iwmmxt_set_cup();
2408         break;
2409     case 0x01e: case 0x11e: case 0x21e: case 0x31e:     /* WSHUFH */
2410     case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2411     case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2412     case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2413         wrd = (insn >> 12) & 0xf;
2414         rd0 = (insn >> 16) & 0xf;
2415         gen_op_iwmmxt_movq_M0_wRn(rd0);
2416         tmp = tcg_const_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2417         gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2418         tcg_temp_free_i32(tmp);
2419         gen_op_iwmmxt_movq_wRn_M0(wrd);
2420         gen_op_iwmmxt_set_mup();
2421         gen_op_iwmmxt_set_cup();
2422         break;
2423     case 0x018: case 0x118: case 0x218: case 0x318:     /* WADD */
2424     case 0x418: case 0x518: case 0x618: case 0x718:
2425     case 0x818: case 0x918: case 0xa18: case 0xb18:
2426     case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2427         wrd = (insn >> 12) & 0xf;
2428         rd0 = (insn >> 16) & 0xf;
2429         rd1 = (insn >> 0) & 0xf;
2430         gen_op_iwmmxt_movq_M0_wRn(rd0);
2431         switch ((insn >> 20) & 0xf) {
2432         case 0x0:
2433             gen_op_iwmmxt_addnb_M0_wRn(rd1);
2434             break;
2435         case 0x1:
2436             gen_op_iwmmxt_addub_M0_wRn(rd1);
2437             break;
2438         case 0x3:
2439             gen_op_iwmmxt_addsb_M0_wRn(rd1);
2440             break;
2441         case 0x4:
2442             gen_op_iwmmxt_addnw_M0_wRn(rd1);
2443             break;
2444         case 0x5:
2445             gen_op_iwmmxt_adduw_M0_wRn(rd1);
2446             break;
2447         case 0x7:
2448             gen_op_iwmmxt_addsw_M0_wRn(rd1);
2449             break;
2450         case 0x8:
2451             gen_op_iwmmxt_addnl_M0_wRn(rd1);
2452             break;
2453         case 0x9:
2454             gen_op_iwmmxt_addul_M0_wRn(rd1);
2455             break;
2456         case 0xb:
2457             gen_op_iwmmxt_addsl_M0_wRn(rd1);
2458             break;
2459         default:
2460             return 1;
2461         }
2462         gen_op_iwmmxt_movq_wRn_M0(wrd);
2463         gen_op_iwmmxt_set_mup();
2464         gen_op_iwmmxt_set_cup();
2465         break;
2466     case 0x008: case 0x108: case 0x208: case 0x308:     /* WPACK */
2467     case 0x408: case 0x508: case 0x608: case 0x708:
2468     case 0x808: case 0x908: case 0xa08: case 0xb08:
2469     case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2470         if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2471             return 1;
2472         wrd = (insn >> 12) & 0xf;
2473         rd0 = (insn >> 16) & 0xf;
2474         rd1 = (insn >> 0) & 0xf;
2475         gen_op_iwmmxt_movq_M0_wRn(rd0);
2476         switch ((insn >> 22) & 3) {
2477         case 1:
2478             if (insn & (1 << 21))
2479                 gen_op_iwmmxt_packsw_M0_wRn(rd1);
2480             else
2481                 gen_op_iwmmxt_packuw_M0_wRn(rd1);
2482             break;
2483         case 2:
2484             if (insn & (1 << 21))
2485                 gen_op_iwmmxt_packsl_M0_wRn(rd1);
2486             else
2487                 gen_op_iwmmxt_packul_M0_wRn(rd1);
2488             break;
2489         case 3:
2490             if (insn & (1 << 21))
2491                 gen_op_iwmmxt_packsq_M0_wRn(rd1);
2492             else
2493                 gen_op_iwmmxt_packuq_M0_wRn(rd1);
2494             break;
2495         }
2496         gen_op_iwmmxt_movq_wRn_M0(wrd);
2497         gen_op_iwmmxt_set_mup();
2498         gen_op_iwmmxt_set_cup();
2499         break;
2500     case 0x201: case 0x203: case 0x205: case 0x207:
2501     case 0x209: case 0x20b: case 0x20d: case 0x20f:
2502     case 0x211: case 0x213: case 0x215: case 0x217:
2503     case 0x219: case 0x21b: case 0x21d: case 0x21f:
2504         wrd = (insn >> 5) & 0xf;
2505         rd0 = (insn >> 12) & 0xf;
2506         rd1 = (insn >> 0) & 0xf;
2507         if (rd0 == 0xf || rd1 == 0xf)
2508             return 1;
2509         gen_op_iwmmxt_movq_M0_wRn(wrd);
2510         tmp = load_reg(s, rd0);
2511         tmp2 = load_reg(s, rd1);
2512         switch ((insn >> 16) & 0xf) {
2513         case 0x0:                                       /* TMIA */
2514             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2515             break;
2516         case 0x8:                                       /* TMIAPH */
2517             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2518             break;
2519         case 0xc: case 0xd: case 0xe: case 0xf:                 /* TMIAxy */
2520             if (insn & (1 << 16))
2521                 tcg_gen_shri_i32(tmp, tmp, 16);
2522             if (insn & (1 << 17))
2523                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2524             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2525             break;
2526         default:
2527             tcg_temp_free_i32(tmp2);
2528             tcg_temp_free_i32(tmp);
2529             return 1;
2530         }
2531         tcg_temp_free_i32(tmp2);
2532         tcg_temp_free_i32(tmp);
2533         gen_op_iwmmxt_movq_wRn_M0(wrd);
2534         gen_op_iwmmxt_set_mup();
2535         break;
2536     default:
2537         return 1;
2538     }
2539
2540     return 0;
2541 }
2542
2543 /* Disassemble an XScale DSP instruction.  Returns nonzero if an error occurred
2544    (ie. an undefined instruction).  */
2545 static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2546 {
2547     int acc, rd0, rd1, rdhi, rdlo;
2548     TCGv_i32 tmp, tmp2;
2549
2550     if ((insn & 0x0ff00f10) == 0x0e200010) {
2551         /* Multiply with Internal Accumulate Format */
2552         rd0 = (insn >> 12) & 0xf;
2553         rd1 = insn & 0xf;
2554         acc = (insn >> 5) & 7;
2555
2556         if (acc != 0)
2557             return 1;
2558
2559         tmp = load_reg(s, rd0);
2560         tmp2 = load_reg(s, rd1);
2561         switch ((insn >> 16) & 0xf) {
2562         case 0x0:                                       /* MIA */
2563             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2564             break;
2565         case 0x8:                                       /* MIAPH */
2566             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2567             break;
2568         case 0xc:                                       /* MIABB */
2569         case 0xd:                                       /* MIABT */
2570         case 0xe:                                       /* MIATB */
2571         case 0xf:                                       /* MIATT */
2572             if (insn & (1 << 16))
2573                 tcg_gen_shri_i32(tmp, tmp, 16);
2574             if (insn & (1 << 17))
2575                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2576             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2577             break;
2578         default:
2579             return 1;
2580         }
2581         tcg_temp_free_i32(tmp2);
2582         tcg_temp_free_i32(tmp);
2583
2584         gen_op_iwmmxt_movq_wRn_M0(acc);
2585         return 0;
2586     }
2587
2588     if ((insn & 0x0fe00ff8) == 0x0c400000) {
2589         /* Internal Accumulator Access Format */
2590         rdhi = (insn >> 16) & 0xf;
2591         rdlo = (insn >> 12) & 0xf;
2592         acc = insn & 7;
2593
2594         if (acc != 0)
2595             return 1;
2596
2597         if (insn & ARM_CP_RW_BIT) {                     /* MRA */
2598             iwmmxt_load_reg(cpu_V0, acc);
2599             tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2600             tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
2601             tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2602         } else {                                        /* MAR */
2603             tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2604             iwmmxt_store_reg(cpu_V0, acc);
2605         }
2606         return 0;
2607     }
2608
2609     return 1;
2610 }
2611
2612 #define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n))
2613 #define VFP_DREG(reg, insn, bigbit, smallbit) do { \
2614     if (dc_isar_feature(aa32_simd_r32, s)) { \
2615         reg = (((insn) >> (bigbit)) & 0x0f) \
2616               | (((insn) >> ((smallbit) - 4)) & 0x10); \
2617     } else { \
2618         if (insn & (1 << (smallbit))) \
2619             return 1; \
2620         reg = ((insn) >> (bigbit)) & 0x0f; \
2621     }} while (0)
2622
2623 #define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22)
2624 #define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16,  7)
2625 #define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn,  0,  5)
2626
2627 static void gen_neon_dup_low16(TCGv_i32 var)
2628 {
2629     TCGv_i32 tmp = tcg_temp_new_i32();
2630     tcg_gen_ext16u_i32(var, var);
2631     tcg_gen_shli_i32(tmp, var, 16);
2632     tcg_gen_or_i32(var, var, tmp);
2633     tcg_temp_free_i32(tmp);
2634 }
2635
2636 static void gen_neon_dup_high16(TCGv_i32 var)
2637 {
2638     TCGv_i32 tmp = tcg_temp_new_i32();
2639     tcg_gen_andi_i32(var, var, 0xffff0000);
2640     tcg_gen_shri_i32(tmp, var, 16);
2641     tcg_gen_or_i32(var, var, tmp);
2642     tcg_temp_free_i32(tmp);
2643 }
2644
2645 static inline bool use_goto_tb(DisasContext *s, target_ulong dest)
2646 {
2647 #ifndef CONFIG_USER_ONLY
2648     return (s->base.tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK) ||
2649            ((s->base.pc_next - 1) & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
2650 #else
2651     return true;
2652 #endif
2653 }
2654
2655 static void gen_goto_ptr(void)
2656 {
2657     tcg_gen_lookup_and_goto_ptr();
2658 }
2659
2660 /* This will end the TB but doesn't guarantee we'll return to
2661  * cpu_loop_exec. Any live exit_requests will be processed as we
2662  * enter the next TB.
2663  */
2664 static void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
2665 {
2666     if (use_goto_tb(s, dest)) {
2667         tcg_gen_goto_tb(n);
2668         gen_set_pc_im(s, dest);
2669         tcg_gen_exit_tb(s->base.tb, n);
2670     } else {
2671         gen_set_pc_im(s, dest);
2672         gen_goto_ptr();
2673     }
2674     s->base.is_jmp = DISAS_NORETURN;
2675 }
2676
2677 static inline void gen_jmp (DisasContext *s, uint32_t dest)
2678 {
2679     if (unlikely(is_singlestepping(s))) {
2680         /* An indirect jump so that we still trigger the debug exception.  */
2681         gen_set_pc_im(s, dest);
2682         s->base.is_jmp = DISAS_JUMP;
2683     } else {
2684         gen_goto_tb(s, 0, dest);
2685     }
2686 }
2687
2688 static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
2689 {
2690     if (x)
2691         tcg_gen_sari_i32(t0, t0, 16);
2692     else
2693         gen_sxth(t0);
2694     if (y)
2695         tcg_gen_sari_i32(t1, t1, 16);
2696     else
2697         gen_sxth(t1);
2698     tcg_gen_mul_i32(t0, t0, t1);
2699 }
2700
2701 /* Return the mask of PSR bits set by a MSR instruction.  */
2702 static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
2703 {
2704     uint32_t mask = 0;
2705
2706     if (flags & (1 << 0)) {
2707         mask |= 0xff;
2708     }
2709     if (flags & (1 << 1)) {
2710         mask |= 0xff00;
2711     }
2712     if (flags & (1 << 2)) {
2713         mask |= 0xff0000;
2714     }
2715     if (flags & (1 << 3)) {
2716         mask |= 0xff000000;
2717     }
2718
2719     /* Mask out undefined and reserved bits.  */
2720     mask &= aarch32_cpsr_valid_mask(s->features, s->isar);
2721
2722     /* Mask out execution state.  */
2723     if (!spsr) {
2724         mask &= ~CPSR_EXEC;
2725     }
2726
2727     /* Mask out privileged bits.  */
2728     if (IS_USER(s)) {
2729         mask &= CPSR_USER;
2730     }
2731     return mask;
2732 }
2733
2734 /* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
2735 static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
2736 {
2737     TCGv_i32 tmp;
2738     if (spsr) {
2739         /* ??? This is also undefined in system mode.  */
2740         if (IS_USER(s))
2741             return 1;
2742
2743         tmp = load_cpu_field(spsr);
2744         tcg_gen_andi_i32(tmp, tmp, ~mask);
2745         tcg_gen_andi_i32(t0, t0, mask);
2746         tcg_gen_or_i32(tmp, tmp, t0);
2747         store_cpu_field(tmp, spsr);
2748     } else {
2749         gen_set_cpsr(t0, mask);
2750     }
2751     tcg_temp_free_i32(t0);
2752     gen_lookup_tb(s);
2753     return 0;
2754 }
2755
2756 /* Returns nonzero if access to the PSR is not permitted.  */
2757 static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
2758 {
2759     TCGv_i32 tmp;
2760     tmp = tcg_temp_new_i32();
2761     tcg_gen_movi_i32(tmp, val);
2762     return gen_set_psr(s, mask, spsr, tmp);
2763 }
2764
2765 static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
2766                                      int *tgtmode, int *regno)
2767 {
2768     /* Decode the r and sysm fields of MSR/MRS banked accesses into
2769      * the target mode and register number, and identify the various
2770      * unpredictable cases.
2771      * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
2772      *  + executed in user mode
2773      *  + using R15 as the src/dest register
2774      *  + accessing an unimplemented register
2775      *  + accessing a register that's inaccessible at current PL/security state*
2776      *  + accessing a register that you could access with a different insn
2777      * We choose to UNDEF in all these cases.
2778      * Since we don't know which of the various AArch32 modes we are in
2779      * we have to defer some checks to runtime.
2780      * Accesses to Monitor mode registers from Secure EL1 (which implies
2781      * that EL3 is AArch64) must trap to EL3.
2782      *
2783      * If the access checks fail this function will emit code to take
2784      * an exception and return false. Otherwise it will return true,
2785      * and set *tgtmode and *regno appropriately.
2786      */
2787     int exc_target = default_exception_el(s);
2788
2789     /* These instructions are present only in ARMv8, or in ARMv7 with the
2790      * Virtualization Extensions.
2791      */
2792     if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
2793         !arm_dc_feature(s, ARM_FEATURE_EL2)) {
2794         goto undef;
2795     }
2796
2797     if (IS_USER(s) || rn == 15) {
2798         goto undef;
2799     }
2800
2801     /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
2802      * of registers into (r, sysm).
2803      */
2804     if (r) {
2805         /* SPSRs for other modes */
2806         switch (sysm) {
2807         case 0xe: /* SPSR_fiq */
2808             *tgtmode = ARM_CPU_MODE_FIQ;
2809             break;
2810         case 0x10: /* SPSR_irq */
2811             *tgtmode = ARM_CPU_MODE_IRQ;
2812             break;
2813         case 0x12: /* SPSR_svc */
2814             *tgtmode = ARM_CPU_MODE_SVC;
2815             break;
2816         case 0x14: /* SPSR_abt */
2817             *tgtmode = ARM_CPU_MODE_ABT;
2818             break;
2819         case 0x16: /* SPSR_und */
2820             *tgtmode = ARM_CPU_MODE_UND;
2821             break;
2822         case 0x1c: /* SPSR_mon */
2823             *tgtmode = ARM_CPU_MODE_MON;
2824             break;
2825         case 0x1e: /* SPSR_hyp */
2826             *tgtmode = ARM_CPU_MODE_HYP;
2827             break;
2828         default: /* unallocated */
2829             goto undef;
2830         }
2831         /* We arbitrarily assign SPSR a register number of 16. */
2832         *regno = 16;
2833     } else {
2834         /* general purpose registers for other modes */
2835         switch (sysm) {
2836         case 0x0 ... 0x6:   /* 0b00xxx : r8_usr ... r14_usr */
2837             *tgtmode = ARM_CPU_MODE_USR;
2838             *regno = sysm + 8;
2839             break;
2840         case 0x8 ... 0xe:   /* 0b01xxx : r8_fiq ... r14_fiq */
2841             *tgtmode = ARM_CPU_MODE_FIQ;
2842             *regno = sysm;
2843             break;
2844         case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
2845             *tgtmode = ARM_CPU_MODE_IRQ;
2846             *regno = sysm & 1 ? 13 : 14;
2847             break;
2848         case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
2849             *tgtmode = ARM_CPU_MODE_SVC;
2850             *regno = sysm & 1 ? 13 : 14;
2851             break;
2852         case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
2853             *tgtmode = ARM_CPU_MODE_ABT;
2854             *regno = sysm & 1 ? 13 : 14;
2855             break;
2856         case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
2857             *tgtmode = ARM_CPU_MODE_UND;
2858             *regno = sysm & 1 ? 13 : 14;
2859             break;
2860         case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
2861             *tgtmode = ARM_CPU_MODE_MON;
2862             *regno = sysm & 1 ? 13 : 14;
2863             break;
2864         case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
2865             *tgtmode = ARM_CPU_MODE_HYP;
2866             /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
2867             *regno = sysm & 1 ? 13 : 17;
2868             break;
2869         default: /* unallocated */
2870             goto undef;
2871         }
2872     }
2873
2874     /* Catch the 'accessing inaccessible register' cases we can detect
2875      * at translate time.
2876      */
2877     switch (*tgtmode) {
2878     case ARM_CPU_MODE_MON:
2879         if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
2880             goto undef;
2881         }
2882         if (s->current_el == 1) {
2883             /* If we're in Secure EL1 (which implies that EL3 is AArch64)
2884              * then accesses to Mon registers trap to EL3
2885              */
2886             exc_target = 3;
2887             goto undef;
2888         }
2889         break;
2890     case ARM_CPU_MODE_HYP:
2891         /*
2892          * SPSR_hyp and r13_hyp can only be accessed from Monitor mode
2893          * (and so we can forbid accesses from EL2 or below). elr_hyp
2894          * can be accessed also from Hyp mode, so forbid accesses from
2895          * EL0 or EL1.
2896          */
2897         if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
2898             (s->current_el < 3 && *regno != 17)) {
2899             goto undef;
2900         }
2901         break;
2902     default:
2903         break;
2904     }
2905
2906     return true;
2907
2908 undef:
2909     /* If we get here then some access check did not pass */
2910     gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
2911                        syn_uncategorized(), exc_target);
2912     return false;
2913 }
2914
2915 static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
2916 {
2917     TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
2918     int tgtmode = 0, regno = 0;
2919
2920     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2921         return;
2922     }
2923
2924     /* Sync state because msr_banked() can raise exceptions */
2925     gen_set_condexec(s);
2926     gen_set_pc_im(s, s->pc_curr);
2927     tcg_reg = load_reg(s, rn);
2928     tcg_tgtmode = tcg_const_i32(tgtmode);
2929     tcg_regno = tcg_const_i32(regno);
2930     gen_helper_msr_banked(cpu_env, tcg_reg, tcg_tgtmode, tcg_regno);
2931     tcg_temp_free_i32(tcg_tgtmode);
2932     tcg_temp_free_i32(tcg_regno);
2933     tcg_temp_free_i32(tcg_reg);
2934     s->base.is_jmp = DISAS_UPDATE;
2935 }
2936
2937 static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
2938 {
2939     TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
2940     int tgtmode = 0, regno = 0;
2941
2942     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2943         return;
2944     }
2945
2946     /* Sync state because mrs_banked() can raise exceptions */
2947     gen_set_condexec(s);
2948     gen_set_pc_im(s, s->pc_curr);
2949     tcg_reg = tcg_temp_new_i32();
2950     tcg_tgtmode = tcg_const_i32(tgtmode);
2951     tcg_regno = tcg_const_i32(regno);
2952     gen_helper_mrs_banked(tcg_reg, cpu_env, tcg_tgtmode, tcg_regno);
2953     tcg_temp_free_i32(tcg_tgtmode);
2954     tcg_temp_free_i32(tcg_regno);
2955     store_reg(s, rn, tcg_reg);
2956     s->base.is_jmp = DISAS_UPDATE;
2957 }
2958
2959 /* Store value to PC as for an exception return (ie don't
2960  * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
2961  * will do the masking based on the new value of the Thumb bit.
2962  */
2963 static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
2964 {
2965     tcg_gen_mov_i32(cpu_R[15], pc);
2966     tcg_temp_free_i32(pc);
2967 }
2968
2969 /* Generate a v6 exception return.  Marks both values as dead.  */
2970 static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
2971 {
2972     store_pc_exc_ret(s, pc);
2973     /* The cpsr_write_eret helper will mask the low bits of PC
2974      * appropriately depending on the new Thumb bit, so it must
2975      * be called after storing the new PC.
2976      */
2977     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
2978         gen_io_start();
2979     }
2980     gen_helper_cpsr_write_eret(cpu_env, cpsr);
2981     tcg_temp_free_i32(cpsr);
2982     /* Must exit loop to check un-masked IRQs */
2983     s->base.is_jmp = DISAS_EXIT;
2984 }
2985
2986 /* Generate an old-style exception return. Marks pc as dead. */
2987 static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
2988 {
2989     gen_rfe(s, pc, load_cpu_field(spsr));
2990 }
2991
2992 #define CPU_V001 cpu_V0, cpu_V0, cpu_V1
2993
2994 static inline void gen_neon_add(int size, TCGv_i32 t0, TCGv_i32 t1)
2995 {
2996     switch (size) {
2997     case 0: gen_helper_neon_add_u8(t0, t0, t1); break;
2998     case 1: gen_helper_neon_add_u16(t0, t0, t1); break;
2999     case 2: tcg_gen_add_i32(t0, t0, t1); break;
3000     default: abort();
3001     }
3002 }
3003
3004 static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1)
3005 {
3006     switch (size) {
3007     case 0: gen_helper_neon_sub_u8(t0, t1, t0); break;
3008     case 1: gen_helper_neon_sub_u16(t0, t1, t0); break;
3009     case 2: tcg_gen_sub_i32(t0, t1, t0); break;
3010     default: return;
3011     }
3012 }
3013
3014 /* 32-bit pairwise ops end up the same as the elementwise versions.  */
3015 #define gen_helper_neon_pmax_s32  tcg_gen_smax_i32
3016 #define gen_helper_neon_pmax_u32  tcg_gen_umax_i32
3017 #define gen_helper_neon_pmin_s32  tcg_gen_smin_i32
3018 #define gen_helper_neon_pmin_u32  tcg_gen_umin_i32
3019
3020 #define GEN_NEON_INTEGER_OP_ENV(name) do { \
3021     switch ((size << 1) | u) { \
3022     case 0: \
3023         gen_helper_neon_##name##_s8(tmp, cpu_env, tmp, tmp2); \
3024         break; \
3025     case 1: \
3026         gen_helper_neon_##name##_u8(tmp, cpu_env, tmp, tmp2); \
3027         break; \
3028     case 2: \
3029         gen_helper_neon_##name##_s16(tmp, cpu_env, tmp, tmp2); \
3030         break; \
3031     case 3: \
3032         gen_helper_neon_##name##_u16(tmp, cpu_env, tmp, tmp2); \
3033         break; \
3034     case 4: \
3035         gen_helper_neon_##name##_s32(tmp, cpu_env, tmp, tmp2); \
3036         break; \
3037     case 5: \
3038         gen_helper_neon_##name##_u32(tmp, cpu_env, tmp, tmp2); \
3039         break; \
3040     default: return 1; \
3041     }} while (0)
3042
3043 #define GEN_NEON_INTEGER_OP(name) do { \
3044     switch ((size << 1) | u) { \
3045     case 0: \
3046         gen_helper_neon_##name##_s8(tmp, tmp, tmp2); \
3047         break; \
3048     case 1: \
3049         gen_helper_neon_##name##_u8(tmp, tmp, tmp2); \
3050         break; \
3051     case 2: \
3052         gen_helper_neon_##name##_s16(tmp, tmp, tmp2); \
3053         break; \
3054     case 3: \
3055         gen_helper_neon_##name##_u16(tmp, tmp, tmp2); \
3056         break; \
3057     case 4: \
3058         gen_helper_neon_##name##_s32(tmp, tmp, tmp2); \
3059         break; \
3060     case 5: \
3061         gen_helper_neon_##name##_u32(tmp, tmp, tmp2); \
3062         break; \
3063     default: return 1; \
3064     }} while (0)
3065
3066 static TCGv_i32 neon_load_scratch(int scratch)
3067 {
3068     TCGv_i32 tmp = tcg_temp_new_i32();
3069     tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
3070     return tmp;
3071 }
3072
3073 static void neon_store_scratch(int scratch, TCGv_i32 var)
3074 {
3075     tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
3076     tcg_temp_free_i32(var);
3077 }
3078
3079 static inline TCGv_i32 neon_get_scalar(int size, int reg)
3080 {
3081     TCGv_i32 tmp;
3082     if (size == 1) {
3083         tmp = neon_load_reg(reg & 7, reg >> 4);
3084         if (reg & 8) {
3085             gen_neon_dup_high16(tmp);
3086         } else {
3087             gen_neon_dup_low16(tmp);
3088         }
3089     } else {
3090         tmp = neon_load_reg(reg & 15, reg >> 4);
3091     }
3092     return tmp;
3093 }
3094
3095 static int gen_neon_unzip(int rd, int rm, int size, int q)
3096 {
3097     TCGv_ptr pd, pm;
3098
3099     if (!q && size == 2) {
3100         return 1;
3101     }
3102     pd = vfp_reg_ptr(true, rd);
3103     pm = vfp_reg_ptr(true, rm);
3104     if (q) {
3105         switch (size) {
3106         case 0:
3107             gen_helper_neon_qunzip8(pd, pm);
3108             break;
3109         case 1:
3110             gen_helper_neon_qunzip16(pd, pm);
3111             break;
3112         case 2:
3113             gen_helper_neon_qunzip32(pd, pm);
3114             break;
3115         default:
3116             abort();
3117         }
3118     } else {
3119         switch (size) {
3120         case 0:
3121             gen_helper_neon_unzip8(pd, pm);
3122             break;
3123         case 1:
3124             gen_helper_neon_unzip16(pd, pm);
3125             break;
3126         default:
3127             abort();
3128         }
3129     }
3130     tcg_temp_free_ptr(pd);
3131     tcg_temp_free_ptr(pm);
3132     return 0;
3133 }
3134
3135 static int gen_neon_zip(int rd, int rm, int size, int q)
3136 {
3137     TCGv_ptr pd, pm;
3138
3139     if (!q && size == 2) {
3140         return 1;
3141     }
3142     pd = vfp_reg_ptr(true, rd);
3143     pm = vfp_reg_ptr(true, rm);
3144     if (q) {
3145         switch (size) {
3146         case 0:
3147             gen_helper_neon_qzip8(pd, pm);
3148             break;
3149         case 1:
3150             gen_helper_neon_qzip16(pd, pm);
3151             break;
3152         case 2:
3153             gen_helper_neon_qzip32(pd, pm);
3154             break;
3155         default:
3156             abort();
3157         }
3158     } else {
3159         switch (size) {
3160         case 0:
3161             gen_helper_neon_zip8(pd, pm);
3162             break;
3163         case 1:
3164             gen_helper_neon_zip16(pd, pm);
3165             break;
3166         default:
3167             abort();
3168         }
3169     }
3170     tcg_temp_free_ptr(pd);
3171     tcg_temp_free_ptr(pm);
3172     return 0;
3173 }
3174
3175 static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
3176 {
3177     TCGv_i32 rd, tmp;
3178
3179     rd = tcg_temp_new_i32();
3180     tmp = tcg_temp_new_i32();
3181
3182     tcg_gen_shli_i32(rd, t0, 8);
3183     tcg_gen_andi_i32(rd, rd, 0xff00ff00);
3184     tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
3185     tcg_gen_or_i32(rd, rd, tmp);
3186
3187     tcg_gen_shri_i32(t1, t1, 8);
3188     tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
3189     tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
3190     tcg_gen_or_i32(t1, t1, tmp);
3191     tcg_gen_mov_i32(t0, rd);
3192
3193     tcg_temp_free_i32(tmp);
3194     tcg_temp_free_i32(rd);
3195 }
3196
3197 static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
3198 {
3199     TCGv_i32 rd, tmp;
3200
3201     rd = tcg_temp_new_i32();
3202     tmp = tcg_temp_new_i32();
3203
3204     tcg_gen_shli_i32(rd, t0, 16);
3205     tcg_gen_andi_i32(tmp, t1, 0xffff);
3206     tcg_gen_or_i32(rd, rd, tmp);
3207     tcg_gen_shri_i32(t1, t1, 16);
3208     tcg_gen_andi_i32(tmp, t0, 0xffff0000);
3209     tcg_gen_or_i32(t1, t1, tmp);
3210     tcg_gen_mov_i32(t0, rd);
3211
3212     tcg_temp_free_i32(tmp);
3213     tcg_temp_free_i32(rd);
3214 }
3215
3216 static inline void gen_neon_narrow(int size, TCGv_i32 dest, TCGv_i64 src)
3217 {
3218     switch (size) {
3219     case 0: gen_helper_neon_narrow_u8(dest, src); break;
3220     case 1: gen_helper_neon_narrow_u16(dest, src); break;
3221     case 2: tcg_gen_extrl_i64_i32(dest, src); break;
3222     default: abort();
3223     }
3224 }
3225
3226 static inline void gen_neon_narrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
3227 {
3228     switch (size) {
3229     case 0: gen_helper_neon_narrow_sat_s8(dest, cpu_env, src); break;
3230     case 1: gen_helper_neon_narrow_sat_s16(dest, cpu_env, src); break;
3231     case 2: gen_helper_neon_narrow_sat_s32(dest, cpu_env, src); break;
3232     default: abort();
3233     }
3234 }
3235
3236 static inline void gen_neon_narrow_satu(int size, TCGv_i32 dest, TCGv_i64 src)
3237 {
3238     switch (size) {
3239     case 0: gen_helper_neon_narrow_sat_u8(dest, cpu_env, src); break;
3240     case 1: gen_helper_neon_narrow_sat_u16(dest, cpu_env, src); break;
3241     case 2: gen_helper_neon_narrow_sat_u32(dest, cpu_env, src); break;
3242     default: abort();
3243     }
3244 }
3245
3246 static inline void gen_neon_unarrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
3247 {
3248     switch (size) {
3249     case 0: gen_helper_neon_unarrow_sat8(dest, cpu_env, src); break;
3250     case 1: gen_helper_neon_unarrow_sat16(dest, cpu_env, src); break;
3251     case 2: gen_helper_neon_unarrow_sat32(dest, cpu_env, src); break;
3252     default: abort();
3253     }
3254 }
3255
3256 static inline void gen_neon_shift_narrow(int size, TCGv_i32 var, TCGv_i32 shift,
3257                                          int q, int u)
3258 {
3259     if (q) {
3260         if (u) {
3261             switch (size) {
3262             case 1: gen_helper_neon_rshl_u16(var, var, shift); break;
3263             case 2: gen_helper_neon_rshl_u32(var, var, shift); break;
3264             default: abort();
3265             }
3266         } else {
3267             switch (size) {
3268             case 1: gen_helper_neon_rshl_s16(var, var, shift); break;
3269             case 2: gen_helper_neon_rshl_s32(var, var, shift); break;
3270             default: abort();
3271             }
3272         }
3273     } else {
3274         if (u) {
3275             switch (size) {
3276             case 1: gen_helper_neon_shl_u16(var, var, shift); break;
3277             case 2: gen_ushl_i32(var, var, shift); break;
3278             default: abort();
3279             }
3280         } else {
3281             switch (size) {
3282             case 1: gen_helper_neon_shl_s16(var, var, shift); break;
3283             case 2: gen_sshl_i32(var, var, shift); break;
3284             default: abort();
3285             }
3286         }
3287     }
3288 }
3289
3290 static inline void gen_neon_widen(TCGv_i64 dest, TCGv_i32 src, int size, int u)
3291 {
3292     if (u) {
3293         switch (size) {
3294         case 0: gen_helper_neon_widen_u8(dest, src); break;
3295         case 1: gen_helper_neon_widen_u16(dest, src); break;
3296         case 2: tcg_gen_extu_i32_i64(dest, src); break;
3297         default: abort();
3298         }
3299     } else {
3300         switch (size) {
3301         case 0: gen_helper_neon_widen_s8(dest, src); break;
3302         case 1: gen_helper_neon_widen_s16(dest, src); break;
3303         case 2: tcg_gen_ext_i32_i64(dest, src); break;
3304         default: abort();
3305         }
3306     }
3307     tcg_temp_free_i32(src);
3308 }
3309
3310 static inline void gen_neon_addl(int size)
3311 {
3312     switch (size) {
3313     case 0: gen_helper_neon_addl_u16(CPU_V001); break;
3314     case 1: gen_helper_neon_addl_u32(CPU_V001); break;
3315     case 2: tcg_gen_add_i64(CPU_V001); break;
3316     default: abort();
3317     }
3318 }
3319
3320 static inline void gen_neon_subl(int size)
3321 {
3322     switch (size) {
3323     case 0: gen_helper_neon_subl_u16(CPU_V001); break;
3324     case 1: gen_helper_neon_subl_u32(CPU_V001); break;
3325     case 2: tcg_gen_sub_i64(CPU_V001); break;
3326     default: abort();
3327     }
3328 }
3329
3330 static inline void gen_neon_negl(TCGv_i64 var, int size)
3331 {
3332     switch (size) {
3333     case 0: gen_helper_neon_negl_u16(var, var); break;
3334     case 1: gen_helper_neon_negl_u32(var, var); break;
3335     case 2:
3336         tcg_gen_neg_i64(var, var);
3337         break;
3338     default: abort();
3339     }
3340 }
3341
3342 static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size)
3343 {
3344     switch (size) {
3345     case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break;
3346     case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break;
3347     default: abort();
3348     }
3349 }
3350
3351 static inline void gen_neon_mull(TCGv_i64 dest, TCGv_i32 a, TCGv_i32 b,
3352                                  int size, int u)
3353 {
3354     TCGv_i64 tmp;
3355
3356     switch ((size << 1) | u) {
3357     case 0: gen_helper_neon_mull_s8(dest, a, b); break;
3358     case 1: gen_helper_neon_mull_u8(dest, a, b); break;
3359     case 2: gen_helper_neon_mull_s16(dest, a, b); break;
3360     case 3: gen_helper_neon_mull_u16(dest, a, b); break;
3361     case 4:
3362         tmp = gen_muls_i64_i32(a, b);
3363         tcg_gen_mov_i64(dest, tmp);
3364         tcg_temp_free_i64(tmp);
3365         break;
3366     case 5:
3367         tmp = gen_mulu_i64_i32(a, b);
3368         tcg_gen_mov_i64(dest, tmp);
3369         tcg_temp_free_i64(tmp);
3370         break;
3371     default: abort();
3372     }
3373
3374     /* gen_helper_neon_mull_[su]{8|16} do not free their parameters.
3375        Don't forget to clean them now.  */
3376     if (size < 2) {
3377         tcg_temp_free_i32(a);
3378         tcg_temp_free_i32(b);
3379     }
3380 }
3381
3382 static void gen_neon_narrow_op(int op, int u, int size,
3383                                TCGv_i32 dest, TCGv_i64 src)
3384 {
3385     if (op) {
3386         if (u) {
3387             gen_neon_unarrow_sats(size, dest, src);
3388         } else {
3389             gen_neon_narrow(size, dest, src);
3390         }
3391     } else {
3392         if (u) {
3393             gen_neon_narrow_satu(size, dest, src);
3394         } else {
3395             gen_neon_narrow_sats(size, dest, src);
3396         }
3397     }
3398 }
3399
3400 /* Symbolic constants for op fields for Neon 3-register same-length.
3401  * The values correspond to bits [11:8,4]; see the ARM ARM DDI0406B
3402  * table A7-9.
3403  */
3404 #define NEON_3R_VHADD 0
3405 #define NEON_3R_VQADD 1
3406 #define NEON_3R_VRHADD 2
3407 #define NEON_3R_LOGIC 3 /* VAND,VBIC,VORR,VMOV,VORN,VEOR,VBIF,VBIT,VBSL */
3408 #define NEON_3R_VHSUB 4
3409 #define NEON_3R_VQSUB 5
3410 #define NEON_3R_VCGT 6
3411 #define NEON_3R_VCGE 7
3412 #define NEON_3R_VSHL 8
3413 #define NEON_3R_VQSHL 9
3414 #define NEON_3R_VRSHL 10
3415 #define NEON_3R_VQRSHL 11
3416 #define NEON_3R_VMAX 12
3417 #define NEON_3R_VMIN 13
3418 #define NEON_3R_VABD 14
3419 #define NEON_3R_VABA 15
3420 #define NEON_3R_VADD_VSUB 16
3421 #define NEON_3R_VTST_VCEQ 17
3422 #define NEON_3R_VML 18 /* VMLA, VMLS */
3423 #define NEON_3R_VMUL 19
3424 #define NEON_3R_VPMAX 20
3425 #define NEON_3R_VPMIN 21
3426 #define NEON_3R_VQDMULH_VQRDMULH 22
3427 #define NEON_3R_VPADD_VQRDMLAH 23
3428 #define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */
3429 #define NEON_3R_VFM_VQRDMLSH 25 /* VFMA, VFMS, VQRDMLSH */
3430 #define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
3431 #define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
3432 #define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */
3433 #define NEON_3R_FLOAT_ACMP 29 /* float VACGE, VACGT, VACLE, VACLT */
3434 #define NEON_3R_FLOAT_MINMAX 30 /* float VMIN, VMAX */
3435 #define NEON_3R_FLOAT_MISC 31 /* float VRECPS, VRSQRTS, VMAXNM/MINNM */
3436
3437 static const uint8_t neon_3r_sizes[] = {
3438     [NEON_3R_VHADD] = 0x7,
3439     [NEON_3R_VQADD] = 0xf,
3440     [NEON_3R_VRHADD] = 0x7,
3441     [NEON_3R_LOGIC] = 0xf, /* size field encodes op type */
3442     [NEON_3R_VHSUB] = 0x7,
3443     [NEON_3R_VQSUB] = 0xf,
3444     [NEON_3R_VCGT] = 0x7,
3445     [NEON_3R_VCGE] = 0x7,
3446     [NEON_3R_VSHL] = 0xf,
3447     [NEON_3R_VQSHL] = 0xf,
3448     [NEON_3R_VRSHL] = 0xf,
3449     [NEON_3R_VQRSHL] = 0xf,
3450     [NEON_3R_VMAX] = 0x7,
3451     [NEON_3R_VMIN] = 0x7,
3452     [NEON_3R_VABD] = 0x7,
3453     [NEON_3R_VABA] = 0x7,
3454     [NEON_3R_VADD_VSUB] = 0xf,
3455     [NEON_3R_VTST_VCEQ] = 0x7,
3456     [NEON_3R_VML] = 0x7,
3457     [NEON_3R_VMUL] = 0x7,
3458     [NEON_3R_VPMAX] = 0x7,
3459     [NEON_3R_VPMIN] = 0x7,
3460     [NEON_3R_VQDMULH_VQRDMULH] = 0x6,
3461     [NEON_3R_VPADD_VQRDMLAH] = 0x7,
3462     [NEON_3R_SHA] = 0xf, /* size field encodes op type */
3463     [NEON_3R_VFM_VQRDMLSH] = 0x7, /* For VFM, size bit 1 encodes op */
3464     [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
3465     [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
3466     [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */
3467     [NEON_3R_FLOAT_ACMP] = 0x5, /* size bit 1 encodes op */
3468     [NEON_3R_FLOAT_MINMAX] = 0x5, /* size bit 1 encodes op */
3469     [NEON_3R_FLOAT_MISC] = 0x5, /* size bit 1 encodes op */
3470 };
3471
3472 /* Symbolic constants for op fields for Neon 2-register miscellaneous.
3473  * The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B
3474  * table A7-13.
3475  */
3476 #define NEON_2RM_VREV64 0
3477 #define NEON_2RM_VREV32 1
3478 #define NEON_2RM_VREV16 2
3479 #define NEON_2RM_VPADDL 4
3480 #define NEON_2RM_VPADDL_U 5
3481 #define NEON_2RM_AESE 6 /* Includes AESD */
3482 #define NEON_2RM_AESMC 7 /* Includes AESIMC */
3483 #define NEON_2RM_VCLS 8
3484 #define NEON_2RM_VCLZ 9
3485 #define NEON_2RM_VCNT 10
3486 #define NEON_2RM_VMVN 11
3487 #define NEON_2RM_VPADAL 12
3488 #define NEON_2RM_VPADAL_U 13
3489 #define NEON_2RM_VQABS 14
3490 #define NEON_2RM_VQNEG 15
3491 #define NEON_2RM_VCGT0 16
3492 #define NEON_2RM_VCGE0 17
3493 #define NEON_2RM_VCEQ0 18
3494 #define NEON_2RM_VCLE0 19
3495 #define NEON_2RM_VCLT0 20
3496 #define NEON_2RM_SHA1H 21
3497 #define NEON_2RM_VABS 22
3498 #define NEON_2RM_VNEG 23
3499 #define NEON_2RM_VCGT0_F 24
3500 #define NEON_2RM_VCGE0_F 25
3501 #define NEON_2RM_VCEQ0_F 26
3502 #define NEON_2RM_VCLE0_F 27
3503 #define NEON_2RM_VCLT0_F 28
3504 #define NEON_2RM_VABS_F 30
3505 #define NEON_2RM_VNEG_F 31
3506 #define NEON_2RM_VSWP 32
3507 #define NEON_2RM_VTRN 33
3508 #define NEON_2RM_VUZP 34
3509 #define NEON_2RM_VZIP 35
3510 #define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */
3511 #define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */
3512 #define NEON_2RM_VSHLL 38
3513 #define NEON_2RM_SHA1SU1 39 /* Includes SHA256SU0 */
3514 #define NEON_2RM_VRINTN 40
3515 #define NEON_2RM_VRINTX 41
3516 #define NEON_2RM_VRINTA 42
3517 #define NEON_2RM_VRINTZ 43
3518 #define NEON_2RM_VCVT_F16_F32 44
3519 #define NEON_2RM_VRINTM 45
3520 #define NEON_2RM_VCVT_F32_F16 46
3521 #define NEON_2RM_VRINTP 47
3522 #define NEON_2RM_VCVTAU 48
3523 #define NEON_2RM_VCVTAS 49
3524 #define NEON_2RM_VCVTNU 50
3525 #define NEON_2RM_VCVTNS 51
3526 #define NEON_2RM_VCVTPU 52
3527 #define NEON_2RM_VCVTPS 53
3528 #define NEON_2RM_VCVTMU 54
3529 #define NEON_2RM_VCVTMS 55
3530 #define NEON_2RM_VRECPE 56
3531 #define NEON_2RM_VRSQRTE 57
3532 #define NEON_2RM_VRECPE_F 58
3533 #define NEON_2RM_VRSQRTE_F 59
3534 #define NEON_2RM_VCVT_FS 60
3535 #define NEON_2RM_VCVT_FU 61
3536 #define NEON_2RM_VCVT_SF 62
3537 #define NEON_2RM_VCVT_UF 63
3538
3539 static bool neon_2rm_is_v8_op(int op)
3540 {
3541     /* Return true if this neon 2reg-misc op is ARMv8 and up */
3542     switch (op) {
3543     case NEON_2RM_VRINTN:
3544     case NEON_2RM_VRINTA:
3545     case NEON_2RM_VRINTM:
3546     case NEON_2RM_VRINTP:
3547     case NEON_2RM_VRINTZ:
3548     case NEON_2RM_VRINTX:
3549     case NEON_2RM_VCVTAU:
3550     case NEON_2RM_VCVTAS:
3551     case NEON_2RM_VCVTNU:
3552     case NEON_2RM_VCVTNS:
3553     case NEON_2RM_VCVTPU:
3554     case NEON_2RM_VCVTPS:
3555     case NEON_2RM_VCVTMU:
3556     case NEON_2RM_VCVTMS:
3557         return true;
3558     default:
3559         return false;
3560     }
3561 }
3562
3563 /* Each entry in this array has bit n set if the insn allows
3564  * size value n (otherwise it will UNDEF). Since unallocated
3565  * op values will have no bits set they always UNDEF.
3566  */
3567 static const uint8_t neon_2rm_sizes[] = {
3568     [NEON_2RM_VREV64] = 0x7,
3569     [NEON_2RM_VREV32] = 0x3,
3570     [NEON_2RM_VREV16] = 0x1,
3571     [NEON_2RM_VPADDL] = 0x7,
3572     [NEON_2RM_VPADDL_U] = 0x7,
3573     [NEON_2RM_AESE] = 0x1,
3574     [NEON_2RM_AESMC] = 0x1,
3575     [NEON_2RM_VCLS] = 0x7,
3576     [NEON_2RM_VCLZ] = 0x7,
3577     [NEON_2RM_VCNT] = 0x1,
3578     [NEON_2RM_VMVN] = 0x1,
3579     [NEON_2RM_VPADAL] = 0x7,
3580     [NEON_2RM_VPADAL_U] = 0x7,
3581     [NEON_2RM_VQABS] = 0x7,
3582     [NEON_2RM_VQNEG] = 0x7,
3583     [NEON_2RM_VCGT0] = 0x7,
3584     [NEON_2RM_VCGE0] = 0x7,
3585     [NEON_2RM_VCEQ0] = 0x7,
3586     [NEON_2RM_VCLE0] = 0x7,
3587     [NEON_2RM_VCLT0] = 0x7,
3588     [NEON_2RM_SHA1H] = 0x4,
3589     [NEON_2RM_VABS] = 0x7,
3590     [NEON_2RM_VNEG] = 0x7,
3591     [NEON_2RM_VCGT0_F] = 0x4,
3592     [NEON_2RM_VCGE0_F] = 0x4,
3593     [NEON_2RM_VCEQ0_F] = 0x4,
3594     [NEON_2RM_VCLE0_F] = 0x4,
3595     [NEON_2RM_VCLT0_F] = 0x4,
3596     [NEON_2RM_VABS_F] = 0x4,
3597     [NEON_2RM_VNEG_F] = 0x4,
3598     [NEON_2RM_VSWP] = 0x1,
3599     [NEON_2RM_VTRN] = 0x7,
3600     [NEON_2RM_VUZP] = 0x7,
3601     [NEON_2RM_VZIP] = 0x7,
3602     [NEON_2RM_VMOVN] = 0x7,
3603     [NEON_2RM_VQMOVN] = 0x7,
3604     [NEON_2RM_VSHLL] = 0x7,
3605     [NEON_2RM_SHA1SU1] = 0x4,
3606     [NEON_2RM_VRINTN] = 0x4,
3607     [NEON_2RM_VRINTX] = 0x4,
3608     [NEON_2RM_VRINTA] = 0x4,
3609     [NEON_2RM_VRINTZ] = 0x4,
3610     [NEON_2RM_VCVT_F16_F32] = 0x2,
3611     [NEON_2RM_VRINTM] = 0x4,
3612     [NEON_2RM_VCVT_F32_F16] = 0x2,
3613     [NEON_2RM_VRINTP] = 0x4,
3614     [NEON_2RM_VCVTAU] = 0x4,
3615     [NEON_2RM_VCVTAS] = 0x4,
3616     [NEON_2RM_VCVTNU] = 0x4,
3617     [NEON_2RM_VCVTNS] = 0x4,
3618     [NEON_2RM_VCVTPU] = 0x4,
3619     [NEON_2RM_VCVTPS] = 0x4,
3620     [NEON_2RM_VCVTMU] = 0x4,
3621     [NEON_2RM_VCVTMS] = 0x4,
3622     [NEON_2RM_VRECPE] = 0x4,
3623     [NEON_2RM_VRSQRTE] = 0x4,
3624     [NEON_2RM_VRECPE_F] = 0x4,
3625     [NEON_2RM_VRSQRTE_F] = 0x4,
3626     [NEON_2RM_VCVT_FS] = 0x4,
3627     [NEON_2RM_VCVT_FU] = 0x4,
3628     [NEON_2RM_VCVT_SF] = 0x4,
3629     [NEON_2RM_VCVT_UF] = 0x4,
3630 };
3631
3632
3633 /* Expand v8.1 simd helper.  */
3634 static int do_v81_helper(DisasContext *s, gen_helper_gvec_3_ptr *fn,
3635                          int q, int rd, int rn, int rm)
3636 {
3637     if (dc_isar_feature(aa32_rdm, s)) {
3638         int opr_sz = (1 + q) * 8;
3639         tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
3640                            vfp_reg_offset(1, rn),
3641                            vfp_reg_offset(1, rm), cpu_env,
3642                            opr_sz, opr_sz, 0, fn);
3643         return 0;
3644     }
3645     return 1;
3646 }
3647
3648 static void gen_ceq0_i32(TCGv_i32 d, TCGv_i32 a)
3649 {
3650     tcg_gen_setcondi_i32(TCG_COND_EQ, d, a, 0);
3651     tcg_gen_neg_i32(d, d);
3652 }
3653
3654 static void gen_ceq0_i64(TCGv_i64 d, TCGv_i64 a)
3655 {
3656     tcg_gen_setcondi_i64(TCG_COND_EQ, d, a, 0);
3657     tcg_gen_neg_i64(d, d);
3658 }
3659
3660 static void gen_ceq0_vec(unsigned vece, TCGv_vec d, TCGv_vec a)
3661 {
3662     TCGv_vec zero = tcg_const_zeros_vec_matching(d);
3663     tcg_gen_cmp_vec(TCG_COND_EQ, vece, d, a, zero);
3664     tcg_temp_free_vec(zero);
3665 }
3666
3667 static const TCGOpcode vecop_list_cmp[] = {
3668     INDEX_op_cmp_vec, 0
3669 };
3670
3671 const GVecGen2 ceq0_op[4] = {
3672     { .fno = gen_helper_gvec_ceq0_b,
3673       .fniv = gen_ceq0_vec,
3674       .opt_opc = vecop_list_cmp,
3675       .vece = MO_8 },
3676     { .fno = gen_helper_gvec_ceq0_h,
3677       .fniv = gen_ceq0_vec,
3678       .opt_opc = vecop_list_cmp,
3679       .vece = MO_16 },
3680     { .fni4 = gen_ceq0_i32,
3681       .fniv = gen_ceq0_vec,
3682       .opt_opc = vecop_list_cmp,
3683       .vece = MO_32 },
3684     { .fni8 = gen_ceq0_i64,
3685       .fniv = gen_ceq0_vec,
3686       .opt_opc = vecop_list_cmp,
3687       .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3688       .vece = MO_64 },
3689 };
3690
3691 static void gen_cle0_i32(TCGv_i32 d, TCGv_i32 a)
3692 {
3693     tcg_gen_setcondi_i32(TCG_COND_LE, d, a, 0);
3694     tcg_gen_neg_i32(d, d);
3695 }
3696
3697 static void gen_cle0_i64(TCGv_i64 d, TCGv_i64 a)
3698 {
3699     tcg_gen_setcondi_i64(TCG_COND_LE, d, a, 0);
3700     tcg_gen_neg_i64(d, d);
3701 }
3702
3703 static void gen_cle0_vec(unsigned vece, TCGv_vec d, TCGv_vec a)
3704 {
3705     TCGv_vec zero = tcg_const_zeros_vec_matching(d);
3706     tcg_gen_cmp_vec(TCG_COND_LE, vece, d, a, zero);
3707     tcg_temp_free_vec(zero);
3708 }
3709
3710 const GVecGen2 cle0_op[4] = {
3711     { .fno = gen_helper_gvec_cle0_b,
3712       .fniv = gen_cle0_vec,
3713       .opt_opc = vecop_list_cmp,
3714       .vece = MO_8 },
3715     { .fno = gen_helper_gvec_cle0_h,
3716       .fniv = gen_cle0_vec,
3717       .opt_opc = vecop_list_cmp,
3718       .vece = MO_16 },
3719     { .fni4 = gen_cle0_i32,
3720       .fniv = gen_cle0_vec,
3721       .opt_opc = vecop_list_cmp,
3722       .vece = MO_32 },
3723     { .fni8 = gen_cle0_i64,
3724       .fniv = gen_cle0_vec,
3725       .opt_opc = vecop_list_cmp,
3726       .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3727       .vece = MO_64 },
3728 };
3729
3730 static void gen_cge0_i32(TCGv_i32 d, TCGv_i32 a)
3731 {
3732     tcg_gen_setcondi_i32(TCG_COND_GE, d, a, 0);
3733     tcg_gen_neg_i32(d, d);
3734 }
3735
3736 static void gen_cge0_i64(TCGv_i64 d, TCGv_i64 a)
3737 {
3738     tcg_gen_setcondi_i64(TCG_COND_GE, d, a, 0);
3739     tcg_gen_neg_i64(d, d);
3740 }
3741
3742 static void gen_cge0_vec(unsigned vece, TCGv_vec d, TCGv_vec a)
3743 {
3744     TCGv_vec zero = tcg_const_zeros_vec_matching(d);
3745     tcg_gen_cmp_vec(TCG_COND_GE, vece, d, a, zero);
3746     tcg_temp_free_vec(zero);
3747 }
3748
3749 const GVecGen2 cge0_op[4] = {
3750     { .fno = gen_helper_gvec_cge0_b,
3751       .fniv = gen_cge0_vec,
3752       .opt_opc = vecop_list_cmp,
3753       .vece = MO_8 },
3754     { .fno = gen_helper_gvec_cge0_h,
3755       .fniv = gen_cge0_vec,
3756       .opt_opc = vecop_list_cmp,
3757       .vece = MO_16 },
3758     { .fni4 = gen_cge0_i32,
3759       .fniv = gen_cge0_vec,
3760       .opt_opc = vecop_list_cmp,
3761       .vece = MO_32 },
3762     { .fni8 = gen_cge0_i64,
3763       .fniv = gen_cge0_vec,
3764       .opt_opc = vecop_list_cmp,
3765       .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3766       .vece = MO_64 },
3767 };
3768
3769 static void gen_clt0_i32(TCGv_i32 d, TCGv_i32 a)
3770 {
3771     tcg_gen_setcondi_i32(TCG_COND_LT, d, a, 0);
3772     tcg_gen_neg_i32(d, d);
3773 }
3774
3775 static void gen_clt0_i64(TCGv_i64 d, TCGv_i64 a)
3776 {
3777     tcg_gen_setcondi_i64(TCG_COND_LT, d, a, 0);
3778     tcg_gen_neg_i64(d, d);
3779 }
3780
3781 static void gen_clt0_vec(unsigned vece, TCGv_vec d, TCGv_vec a)
3782 {
3783     TCGv_vec zero = tcg_const_zeros_vec_matching(d);
3784     tcg_gen_cmp_vec(TCG_COND_LT, vece, d, a, zero);
3785     tcg_temp_free_vec(zero);
3786 }
3787
3788 const GVecGen2 clt0_op[4] = {
3789     { .fno = gen_helper_gvec_clt0_b,
3790       .fniv = gen_clt0_vec,
3791       .opt_opc = vecop_list_cmp,
3792       .vece = MO_8 },
3793     { .fno = gen_helper_gvec_clt0_h,
3794       .fniv = gen_clt0_vec,
3795       .opt_opc = vecop_list_cmp,
3796       .vece = MO_16 },
3797     { .fni4 = gen_clt0_i32,
3798       .fniv = gen_clt0_vec,
3799       .opt_opc = vecop_list_cmp,
3800       .vece = MO_32 },
3801     { .fni8 = gen_clt0_i64,
3802       .fniv = gen_clt0_vec,
3803       .opt_opc = vecop_list_cmp,
3804       .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3805       .vece = MO_64 },
3806 };
3807
3808 static void gen_cgt0_i32(TCGv_i32 d, TCGv_i32 a)
3809 {
3810     tcg_gen_setcondi_i32(TCG_COND_GT, d, a, 0);
3811     tcg_gen_neg_i32(d, d);
3812 }
3813
3814 static void gen_cgt0_i64(TCGv_i64 d, TCGv_i64 a)
3815 {
3816     tcg_gen_setcondi_i64(TCG_COND_GT, d, a, 0);
3817     tcg_gen_neg_i64(d, d);
3818 }
3819
3820 static void gen_cgt0_vec(unsigned vece, TCGv_vec d, TCGv_vec a)
3821 {
3822     TCGv_vec zero = tcg_const_zeros_vec_matching(d);
3823     tcg_gen_cmp_vec(TCG_COND_GT, vece, d, a, zero);
3824     tcg_temp_free_vec(zero);
3825 }
3826
3827 const GVecGen2 cgt0_op[4] = {
3828     { .fno = gen_helper_gvec_cgt0_b,
3829       .fniv = gen_cgt0_vec,
3830       .opt_opc = vecop_list_cmp,
3831       .vece = MO_8 },
3832     { .fno = gen_helper_gvec_cgt0_h,
3833       .fniv = gen_cgt0_vec,
3834       .opt_opc = vecop_list_cmp,
3835       .vece = MO_16 },
3836     { .fni4 = gen_cgt0_i32,
3837       .fniv = gen_cgt0_vec,
3838       .opt_opc = vecop_list_cmp,
3839       .vece = MO_32 },
3840     { .fni8 = gen_cgt0_i64,
3841       .fniv = gen_cgt0_vec,
3842       .opt_opc = vecop_list_cmp,
3843       .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3844       .vece = MO_64 },
3845 };
3846
3847 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3848 {
3849     tcg_gen_vec_sar8i_i64(a, a, shift);
3850     tcg_gen_vec_add8_i64(d, d, a);
3851 }
3852
3853 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3854 {
3855     tcg_gen_vec_sar16i_i64(a, a, shift);
3856     tcg_gen_vec_add16_i64(d, d, a);
3857 }
3858
3859 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3860 {
3861     tcg_gen_sari_i32(a, a, shift);
3862     tcg_gen_add_i32(d, d, a);
3863 }
3864
3865 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3866 {
3867     tcg_gen_sari_i64(a, a, shift);
3868     tcg_gen_add_i64(d, d, a);
3869 }
3870
3871 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3872 {
3873     tcg_gen_sari_vec(vece, a, a, sh);
3874     tcg_gen_add_vec(vece, d, d, a);
3875 }
3876
3877 void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3878                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3879 {
3880     static const TCGOpcode vecop_list[] = {
3881         INDEX_op_sari_vec, INDEX_op_add_vec, 0
3882     };
3883     static const GVecGen2i ops[4] = {
3884         { .fni8 = gen_ssra8_i64,
3885           .fniv = gen_ssra_vec,
3886           .fno = gen_helper_gvec_ssra_b,
3887           .load_dest = true,
3888           .opt_opc = vecop_list,
3889           .vece = MO_8 },
3890         { .fni8 = gen_ssra16_i64,
3891           .fniv = gen_ssra_vec,
3892           .fno = gen_helper_gvec_ssra_h,
3893           .load_dest = true,
3894           .opt_opc = vecop_list,
3895           .vece = MO_16 },
3896         { .fni4 = gen_ssra32_i32,
3897           .fniv = gen_ssra_vec,
3898           .fno = gen_helper_gvec_ssra_s,
3899           .load_dest = true,
3900           .opt_opc = vecop_list,
3901           .vece = MO_32 },
3902         { .fni8 = gen_ssra64_i64,
3903           .fniv = gen_ssra_vec,
3904           .fno = gen_helper_gvec_ssra_b,
3905           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3906           .opt_opc = vecop_list,
3907           .load_dest = true,
3908           .vece = MO_64 },
3909     };
3910
3911     /* tszimm encoding produces immediates in the range [1..esize]. */
3912     tcg_debug_assert(shift > 0);
3913     tcg_debug_assert(shift <= (8 << vece));
3914
3915     /*
3916      * Shifts larger than the element size are architecturally valid.
3917      * Signed results in all sign bits.
3918      */
3919     shift = MIN(shift, (8 << vece) - 1);
3920     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3921 }
3922
3923 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3924 {
3925     tcg_gen_vec_shr8i_i64(a, a, shift);
3926     tcg_gen_vec_add8_i64(d, d, a);
3927 }
3928
3929 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3930 {
3931     tcg_gen_vec_shr16i_i64(a, a, shift);
3932     tcg_gen_vec_add16_i64(d, d, a);
3933 }
3934
3935 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3936 {
3937     tcg_gen_shri_i32(a, a, shift);
3938     tcg_gen_add_i32(d, d, a);
3939 }
3940
3941 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3942 {
3943     tcg_gen_shri_i64(a, a, shift);
3944     tcg_gen_add_i64(d, d, a);
3945 }
3946
3947 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3948 {
3949     tcg_gen_shri_vec(vece, a, a, sh);
3950     tcg_gen_add_vec(vece, d, d, a);
3951 }
3952
3953 void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3954                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3955 {
3956     static const TCGOpcode vecop_list[] = {
3957         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3958     };
3959     static const GVecGen2i ops[4] = {
3960         { .fni8 = gen_usra8_i64,
3961           .fniv = gen_usra_vec,
3962           .fno = gen_helper_gvec_usra_b,
3963           .load_dest = true,
3964           .opt_opc = vecop_list,
3965           .vece = MO_8, },
3966         { .fni8 = gen_usra16_i64,
3967           .fniv = gen_usra_vec,
3968           .fno = gen_helper_gvec_usra_h,
3969           .load_dest = true,
3970           .opt_opc = vecop_list,
3971           .vece = MO_16, },
3972         { .fni4 = gen_usra32_i32,
3973           .fniv = gen_usra_vec,
3974           .fno = gen_helper_gvec_usra_s,
3975           .load_dest = true,
3976           .opt_opc = vecop_list,
3977           .vece = MO_32, },
3978         { .fni8 = gen_usra64_i64,
3979           .fniv = gen_usra_vec,
3980           .fno = gen_helper_gvec_usra_d,
3981           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3982           .load_dest = true,
3983           .opt_opc = vecop_list,
3984           .vece = MO_64, },
3985     };
3986
3987     /* tszimm encoding produces immediates in the range [1..esize]. */
3988     tcg_debug_assert(shift > 0);
3989     tcg_debug_assert(shift <= (8 << vece));
3990
3991     /*
3992      * Shifts larger than the element size are architecturally valid.
3993      * Unsigned results in all zeros as input to accumulate: nop.
3994      */
3995     if (shift < (8 << vece)) {
3996         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3997     } else {
3998         /* Nop, but we do need to clear the tail. */
3999         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
4000     }
4001 }
4002
4003 /*
4004  * Shift one less than the requested amount, and the low bit is
4005  * the rounding bit.  For the 8 and 16-bit operations, because we
4006  * mask the low bit, we can perform a normal integer shift instead
4007  * of a vector shift.
4008  */
4009 static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4010 {
4011     TCGv_i64 t = tcg_temp_new_i64();
4012
4013     tcg_gen_shri_i64(t, a, sh - 1);
4014     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
4015     tcg_gen_vec_sar8i_i64(d, a, sh);
4016     tcg_gen_vec_add8_i64(d, d, t);
4017     tcg_temp_free_i64(t);
4018 }
4019
4020 static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4021 {
4022     TCGv_i64 t = tcg_temp_new_i64();
4023
4024     tcg_gen_shri_i64(t, a, sh - 1);
4025     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
4026     tcg_gen_vec_sar16i_i64(d, a, sh);
4027     tcg_gen_vec_add16_i64(d, d, t);
4028     tcg_temp_free_i64(t);
4029 }
4030
4031 static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
4032 {
4033     TCGv_i32 t = tcg_temp_new_i32();
4034
4035     tcg_gen_extract_i32(t, a, sh - 1, 1);
4036     tcg_gen_sari_i32(d, a, sh);
4037     tcg_gen_add_i32(d, d, t);
4038     tcg_temp_free_i32(t);
4039 }
4040
4041 static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4042 {
4043     TCGv_i64 t = tcg_temp_new_i64();
4044
4045     tcg_gen_extract_i64(t, a, sh - 1, 1);
4046     tcg_gen_sari_i64(d, a, sh);
4047     tcg_gen_add_i64(d, d, t);
4048     tcg_temp_free_i64(t);
4049 }
4050
4051 static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4052 {
4053     TCGv_vec t = tcg_temp_new_vec_matching(d);
4054     TCGv_vec ones = tcg_temp_new_vec_matching(d);
4055
4056     tcg_gen_shri_vec(vece, t, a, sh - 1);
4057     tcg_gen_dupi_vec(vece, ones, 1);
4058     tcg_gen_and_vec(vece, t, t, ones);
4059     tcg_gen_sari_vec(vece, d, a, sh);
4060     tcg_gen_add_vec(vece, d, d, t);
4061
4062     tcg_temp_free_vec(t);
4063     tcg_temp_free_vec(ones);
4064 }
4065
4066 void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4067                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4068 {
4069     static const TCGOpcode vecop_list[] = {
4070         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
4071     };
4072     static const GVecGen2i ops[4] = {
4073         { .fni8 = gen_srshr8_i64,
4074           .fniv = gen_srshr_vec,
4075           .fno = gen_helper_gvec_srshr_b,
4076           .opt_opc = vecop_list,
4077           .vece = MO_8 },
4078         { .fni8 = gen_srshr16_i64,
4079           .fniv = gen_srshr_vec,
4080           .fno = gen_helper_gvec_srshr_h,
4081           .opt_opc = vecop_list,
4082           .vece = MO_16 },
4083         { .fni4 = gen_srshr32_i32,
4084           .fniv = gen_srshr_vec,
4085           .fno = gen_helper_gvec_srshr_s,
4086           .opt_opc = vecop_list,
4087           .vece = MO_32 },
4088         { .fni8 = gen_srshr64_i64,
4089           .fniv = gen_srshr_vec,
4090           .fno = gen_helper_gvec_srshr_d,
4091           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4092           .opt_opc = vecop_list,
4093           .vece = MO_64 },
4094     };
4095
4096     /* tszimm encoding produces immediates in the range [1..esize] */
4097     tcg_debug_assert(shift > 0);
4098     tcg_debug_assert(shift <= (8 << vece));
4099
4100     if (shift == (8 << vece)) {
4101         /*
4102          * Shifts larger than the element size are architecturally valid.
4103          * Signed results in all sign bits.  With rounding, this produces
4104          *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
4105          * I.e. always zero.
4106          */
4107         tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
4108     } else {
4109         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4110     }
4111 }
4112
4113 static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4114 {
4115     TCGv_i64 t = tcg_temp_new_i64();
4116
4117     gen_srshr8_i64(t, a, sh);
4118     tcg_gen_vec_add8_i64(d, d, t);
4119     tcg_temp_free_i64(t);
4120 }
4121
4122 static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4123 {
4124     TCGv_i64 t = tcg_temp_new_i64();
4125
4126     gen_srshr16_i64(t, a, sh);
4127     tcg_gen_vec_add16_i64(d, d, t);
4128     tcg_temp_free_i64(t);
4129 }
4130
4131 static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
4132 {
4133     TCGv_i32 t = tcg_temp_new_i32();
4134
4135     gen_srshr32_i32(t, a, sh);
4136     tcg_gen_add_i32(d, d, t);
4137     tcg_temp_free_i32(t);
4138 }
4139
4140 static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4141 {
4142     TCGv_i64 t = tcg_temp_new_i64();
4143
4144     gen_srshr64_i64(t, a, sh);
4145     tcg_gen_add_i64(d, d, t);
4146     tcg_temp_free_i64(t);
4147 }
4148
4149 static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4150 {
4151     TCGv_vec t = tcg_temp_new_vec_matching(d);
4152
4153     gen_srshr_vec(vece, t, a, sh);
4154     tcg_gen_add_vec(vece, d, d, t);
4155     tcg_temp_free_vec(t);
4156 }
4157
4158 void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4159                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4160 {
4161     static const TCGOpcode vecop_list[] = {
4162         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
4163     };
4164     static const GVecGen2i ops[4] = {
4165         { .fni8 = gen_srsra8_i64,
4166           .fniv = gen_srsra_vec,
4167           .fno = gen_helper_gvec_srsra_b,
4168           .opt_opc = vecop_list,
4169           .load_dest = true,
4170           .vece = MO_8 },
4171         { .fni8 = gen_srsra16_i64,
4172           .fniv = gen_srsra_vec,
4173           .fno = gen_helper_gvec_srsra_h,
4174           .opt_opc = vecop_list,
4175           .load_dest = true,
4176           .vece = MO_16 },
4177         { .fni4 = gen_srsra32_i32,
4178           .fniv = gen_srsra_vec,
4179           .fno = gen_helper_gvec_srsra_s,
4180           .opt_opc = vecop_list,
4181           .load_dest = true,
4182           .vece = MO_32 },
4183         { .fni8 = gen_srsra64_i64,
4184           .fniv = gen_srsra_vec,
4185           .fno = gen_helper_gvec_srsra_d,
4186           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4187           .opt_opc = vecop_list,
4188           .load_dest = true,
4189           .vece = MO_64 },
4190     };
4191
4192     /* tszimm encoding produces immediates in the range [1..esize] */
4193     tcg_debug_assert(shift > 0);
4194     tcg_debug_assert(shift <= (8 << vece));
4195
4196     /*
4197      * Shifts larger than the element size are architecturally valid.
4198      * Signed results in all sign bits.  With rounding, this produces
4199      *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
4200      * I.e. always zero.  With accumulation, this leaves D unchanged.
4201      */
4202     if (shift == (8 << vece)) {
4203         /* Nop, but we do need to clear the tail. */
4204         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
4205     } else {
4206         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4207     }
4208 }
4209
4210 static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4211 {
4212     TCGv_i64 t = tcg_temp_new_i64();
4213
4214     tcg_gen_shri_i64(t, a, sh - 1);
4215     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
4216     tcg_gen_vec_shr8i_i64(d, a, sh);
4217     tcg_gen_vec_add8_i64(d, d, t);
4218     tcg_temp_free_i64(t);
4219 }
4220
4221 static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4222 {
4223     TCGv_i64 t = tcg_temp_new_i64();
4224
4225     tcg_gen_shri_i64(t, a, sh - 1);
4226     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
4227     tcg_gen_vec_shr16i_i64(d, a, sh);
4228     tcg_gen_vec_add16_i64(d, d, t);
4229     tcg_temp_free_i64(t);
4230 }
4231
4232 static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
4233 {
4234     TCGv_i32 t = tcg_temp_new_i32();
4235
4236     tcg_gen_extract_i32(t, a, sh - 1, 1);
4237     tcg_gen_shri_i32(d, a, sh);
4238     tcg_gen_add_i32(d, d, t);
4239     tcg_temp_free_i32(t);
4240 }
4241
4242 static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4243 {
4244     TCGv_i64 t = tcg_temp_new_i64();
4245
4246     tcg_gen_extract_i64(t, a, sh - 1, 1);
4247     tcg_gen_shri_i64(d, a, sh);
4248     tcg_gen_add_i64(d, d, t);
4249     tcg_temp_free_i64(t);
4250 }
4251
4252 static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
4253 {
4254     TCGv_vec t = tcg_temp_new_vec_matching(d);
4255     TCGv_vec ones = tcg_temp_new_vec_matching(d);
4256
4257     tcg_gen_shri_vec(vece, t, a, shift - 1);
4258     tcg_gen_dupi_vec(vece, ones, 1);
4259     tcg_gen_and_vec(vece, t, t, ones);
4260     tcg_gen_shri_vec(vece, d, a, shift);
4261     tcg_gen_add_vec(vece, d, d, t);
4262
4263     tcg_temp_free_vec(t);
4264     tcg_temp_free_vec(ones);
4265 }
4266
4267 void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4268                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4269 {
4270     static const TCGOpcode vecop_list[] = {
4271         INDEX_op_shri_vec, INDEX_op_add_vec, 0
4272     };
4273     static const GVecGen2i ops[4] = {
4274         { .fni8 = gen_urshr8_i64,
4275           .fniv = gen_urshr_vec,
4276           .fno = gen_helper_gvec_urshr_b,
4277           .opt_opc = vecop_list,
4278           .vece = MO_8 },
4279         { .fni8 = gen_urshr16_i64,
4280           .fniv = gen_urshr_vec,
4281           .fno = gen_helper_gvec_urshr_h,
4282           .opt_opc = vecop_list,
4283           .vece = MO_16 },
4284         { .fni4 = gen_urshr32_i32,
4285           .fniv = gen_urshr_vec,
4286           .fno = gen_helper_gvec_urshr_s,
4287           .opt_opc = vecop_list,
4288           .vece = MO_32 },
4289         { .fni8 = gen_urshr64_i64,
4290           .fniv = gen_urshr_vec,
4291           .fno = gen_helper_gvec_urshr_d,
4292           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4293           .opt_opc = vecop_list,
4294           .vece = MO_64 },
4295     };
4296
4297     /* tszimm encoding produces immediates in the range [1..esize] */
4298     tcg_debug_assert(shift > 0);
4299     tcg_debug_assert(shift <= (8 << vece));
4300
4301     if (shift == (8 << vece)) {
4302         /*
4303          * Shifts larger than the element size are architecturally valid.
4304          * Unsigned results in zero.  With rounding, this produces a
4305          * copy of the most significant bit.
4306          */
4307         tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
4308     } else {
4309         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4310     }
4311 }
4312
4313 static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4314 {
4315     TCGv_i64 t = tcg_temp_new_i64();
4316
4317     if (sh == 8) {
4318         tcg_gen_vec_shr8i_i64(t, a, 7);
4319     } else {
4320         gen_urshr8_i64(t, a, sh);
4321     }
4322     tcg_gen_vec_add8_i64(d, d, t);
4323     tcg_temp_free_i64(t);
4324 }
4325
4326 static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4327 {
4328     TCGv_i64 t = tcg_temp_new_i64();
4329
4330     if (sh == 16) {
4331         tcg_gen_vec_shr16i_i64(t, a, 15);
4332     } else {
4333         gen_urshr16_i64(t, a, sh);
4334     }
4335     tcg_gen_vec_add16_i64(d, d, t);
4336     tcg_temp_free_i64(t);
4337 }
4338
4339 static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
4340 {
4341     TCGv_i32 t = tcg_temp_new_i32();
4342
4343     if (sh == 32) {
4344         tcg_gen_shri_i32(t, a, 31);
4345     } else {
4346         gen_urshr32_i32(t, a, sh);
4347     }
4348     tcg_gen_add_i32(d, d, t);
4349     tcg_temp_free_i32(t);
4350 }
4351
4352 static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4353 {
4354     TCGv_i64 t = tcg_temp_new_i64();
4355
4356     if (sh == 64) {
4357         tcg_gen_shri_i64(t, a, 63);
4358     } else {
4359         gen_urshr64_i64(t, a, sh);
4360     }
4361     tcg_gen_add_i64(d, d, t);
4362     tcg_temp_free_i64(t);
4363 }
4364
4365 static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4366 {
4367     TCGv_vec t = tcg_temp_new_vec_matching(d);
4368
4369     if (sh == (8 << vece)) {
4370         tcg_gen_shri_vec(vece, t, a, sh - 1);
4371     } else {
4372         gen_urshr_vec(vece, t, a, sh);
4373     }
4374     tcg_gen_add_vec(vece, d, d, t);
4375     tcg_temp_free_vec(t);
4376 }
4377
4378 void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4379                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4380 {
4381     static const TCGOpcode vecop_list[] = {
4382         INDEX_op_shri_vec, INDEX_op_add_vec, 0
4383     };
4384     static const GVecGen2i ops[4] = {
4385         { .fni8 = gen_ursra8_i64,
4386           .fniv = gen_ursra_vec,
4387           .fno = gen_helper_gvec_ursra_b,
4388           .opt_opc = vecop_list,
4389           .load_dest = true,
4390           .vece = MO_8 },
4391         { .fni8 = gen_ursra16_i64,
4392           .fniv = gen_ursra_vec,
4393           .fno = gen_helper_gvec_ursra_h,
4394           .opt_opc = vecop_list,
4395           .load_dest = true,
4396           .vece = MO_16 },
4397         { .fni4 = gen_ursra32_i32,
4398           .fniv = gen_ursra_vec,
4399           .fno = gen_helper_gvec_ursra_s,
4400           .opt_opc = vecop_list,
4401           .load_dest = true,
4402           .vece = MO_32 },
4403         { .fni8 = gen_ursra64_i64,
4404           .fniv = gen_ursra_vec,
4405           .fno = gen_helper_gvec_ursra_d,
4406           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4407           .opt_opc = vecop_list,
4408           .load_dest = true,
4409           .vece = MO_64 },
4410     };
4411
4412     /* tszimm encoding produces immediates in the range [1..esize] */
4413     tcg_debug_assert(shift > 0);
4414     tcg_debug_assert(shift <= (8 << vece));
4415
4416     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4417 }
4418
4419 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4420 {
4421     uint64_t mask = dup_const(MO_8, 0xff >> shift);
4422     TCGv_i64 t = tcg_temp_new_i64();
4423
4424     tcg_gen_shri_i64(t, a, shift);
4425     tcg_gen_andi_i64(t, t, mask);
4426     tcg_gen_andi_i64(d, d, ~mask);
4427     tcg_gen_or_i64(d, d, t);
4428     tcg_temp_free_i64(t);
4429 }
4430
4431 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4432 {
4433     uint64_t mask = dup_const(MO_16, 0xffff >> shift);
4434     TCGv_i64 t = tcg_temp_new_i64();
4435
4436     tcg_gen_shri_i64(t, a, shift);
4437     tcg_gen_andi_i64(t, t, mask);
4438     tcg_gen_andi_i64(d, d, ~mask);
4439     tcg_gen_or_i64(d, d, t);
4440     tcg_temp_free_i64(t);
4441 }
4442
4443 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
4444 {
4445     tcg_gen_shri_i32(a, a, shift);
4446     tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
4447 }
4448
4449 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4450 {
4451     tcg_gen_shri_i64(a, a, shift);
4452     tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
4453 }
4454
4455 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4456 {
4457     TCGv_vec t = tcg_temp_new_vec_matching(d);
4458     TCGv_vec m = tcg_temp_new_vec_matching(d);
4459
4460     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
4461     tcg_gen_shri_vec(vece, t, a, sh);
4462     tcg_gen_and_vec(vece, d, d, m);
4463     tcg_gen_or_vec(vece, d, d, t);
4464
4465     tcg_temp_free_vec(t);
4466     tcg_temp_free_vec(m);
4467 }
4468
4469 void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4470                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4471 {
4472     static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
4473     const GVecGen2i ops[4] = {
4474         { .fni8 = gen_shr8_ins_i64,
4475           .fniv = gen_shr_ins_vec,
4476           .fno = gen_helper_gvec_sri_b,
4477           .load_dest = true,
4478           .opt_opc = vecop_list,
4479           .vece = MO_8 },
4480         { .fni8 = gen_shr16_ins_i64,
4481           .fniv = gen_shr_ins_vec,
4482           .fno = gen_helper_gvec_sri_h,
4483           .load_dest = true,
4484           .opt_opc = vecop_list,
4485           .vece = MO_16 },
4486         { .fni4 = gen_shr32_ins_i32,
4487           .fniv = gen_shr_ins_vec,
4488           .fno = gen_helper_gvec_sri_s,
4489           .load_dest = true,
4490           .opt_opc = vecop_list,
4491           .vece = MO_32 },
4492         { .fni8 = gen_shr64_ins_i64,
4493           .fniv = gen_shr_ins_vec,
4494           .fno = gen_helper_gvec_sri_d,
4495           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4496           .load_dest = true,
4497           .opt_opc = vecop_list,
4498           .vece = MO_64 },
4499     };
4500
4501     /* tszimm encoding produces immediates in the range [1..esize]. */
4502     tcg_debug_assert(shift > 0);
4503     tcg_debug_assert(shift <= (8 << vece));
4504
4505     /* Shift of esize leaves destination unchanged. */
4506     if (shift < (8 << vece)) {
4507         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4508     } else {
4509         /* Nop, but we do need to clear the tail. */
4510         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
4511     }
4512 }
4513
4514 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4515 {
4516     uint64_t mask = dup_const(MO_8, 0xff << shift);
4517     TCGv_i64 t = tcg_temp_new_i64();
4518
4519     tcg_gen_shli_i64(t, a, shift);
4520     tcg_gen_andi_i64(t, t, mask);
4521     tcg_gen_andi_i64(d, d, ~mask);
4522     tcg_gen_or_i64(d, d, t);
4523     tcg_temp_free_i64(t);
4524 }
4525
4526 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4527 {
4528     uint64_t mask = dup_const(MO_16, 0xffff << shift);
4529     TCGv_i64 t = tcg_temp_new_i64();
4530
4531     tcg_gen_shli_i64(t, a, shift);
4532     tcg_gen_andi_i64(t, t, mask);
4533     tcg_gen_andi_i64(d, d, ~mask);
4534     tcg_gen_or_i64(d, d, t);
4535     tcg_temp_free_i64(t);
4536 }
4537
4538 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
4539 {
4540     tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
4541 }
4542
4543 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4544 {
4545     tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
4546 }
4547
4548 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4549 {
4550     TCGv_vec t = tcg_temp_new_vec_matching(d);
4551     TCGv_vec m = tcg_temp_new_vec_matching(d);
4552
4553     tcg_gen_shli_vec(vece, t, a, sh);
4554     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
4555     tcg_gen_and_vec(vece, d, d, m);
4556     tcg_gen_or_vec(vece, d, d, t);
4557
4558     tcg_temp_free_vec(t);
4559     tcg_temp_free_vec(m);
4560 }
4561
4562 void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4563                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4564 {
4565     static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
4566     const GVecGen2i ops[4] = {
4567         { .fni8 = gen_shl8_ins_i64,
4568           .fniv = gen_shl_ins_vec,
4569           .fno = gen_helper_gvec_sli_b,
4570           .load_dest = true,
4571           .opt_opc = vecop_list,
4572           .vece = MO_8 },
4573         { .fni8 = gen_shl16_ins_i64,
4574           .fniv = gen_shl_ins_vec,
4575           .fno = gen_helper_gvec_sli_h,
4576           .load_dest = true,
4577           .opt_opc = vecop_list,
4578           .vece = MO_16 },
4579         { .fni4 = gen_shl32_ins_i32,
4580           .fniv = gen_shl_ins_vec,
4581           .fno = gen_helper_gvec_sli_s,
4582           .load_dest = true,
4583           .opt_opc = vecop_list,
4584           .vece = MO_32 },
4585         { .fni8 = gen_shl64_ins_i64,
4586           .fniv = gen_shl_ins_vec,
4587           .fno = gen_helper_gvec_sli_d,
4588           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4589           .load_dest = true,
4590           .opt_opc = vecop_list,
4591           .vece = MO_64 },
4592     };
4593
4594     /* tszimm encoding produces immediates in the range [0..esize-1]. */
4595     tcg_debug_assert(shift >= 0);
4596     tcg_debug_assert(shift < (8 << vece));
4597
4598     if (shift == 0) {
4599         tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
4600     } else {
4601         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4602     }
4603 }
4604
4605 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4606 {
4607     gen_helper_neon_mul_u8(a, a, b);
4608     gen_helper_neon_add_u8(d, d, a);
4609 }
4610
4611 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4612 {
4613     gen_helper_neon_mul_u8(a, a, b);
4614     gen_helper_neon_sub_u8(d, d, a);
4615 }
4616
4617 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4618 {
4619     gen_helper_neon_mul_u16(a, a, b);
4620     gen_helper_neon_add_u16(d, d, a);
4621 }
4622
4623 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4624 {
4625     gen_helper_neon_mul_u16(a, a, b);
4626     gen_helper_neon_sub_u16(d, d, a);
4627 }
4628
4629 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4630 {
4631     tcg_gen_mul_i32(a, a, b);
4632     tcg_gen_add_i32(d, d, a);
4633 }
4634
4635 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4636 {
4637     tcg_gen_mul_i32(a, a, b);
4638     tcg_gen_sub_i32(d, d, a);
4639 }
4640
4641 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4642 {
4643     tcg_gen_mul_i64(a, a, b);
4644     tcg_gen_add_i64(d, d, a);
4645 }
4646
4647 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4648 {
4649     tcg_gen_mul_i64(a, a, b);
4650     tcg_gen_sub_i64(d, d, a);
4651 }
4652
4653 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4654 {
4655     tcg_gen_mul_vec(vece, a, a, b);
4656     tcg_gen_add_vec(vece, d, d, a);
4657 }
4658
4659 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4660 {
4661     tcg_gen_mul_vec(vece, a, a, b);
4662     tcg_gen_sub_vec(vece, d, d, a);
4663 }
4664
4665 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
4666  * these tables are shared with AArch64 which does support them.
4667  */
4668
4669 static const TCGOpcode vecop_list_mla[] = {
4670     INDEX_op_mul_vec, INDEX_op_add_vec, 0
4671 };
4672
4673 static const TCGOpcode vecop_list_mls[] = {
4674     INDEX_op_mul_vec, INDEX_op_sub_vec, 0
4675 };
4676
4677 const GVecGen3 mla_op[4] = {
4678     { .fni4 = gen_mla8_i32,
4679       .fniv = gen_mla_vec,
4680       .load_dest = true,
4681       .opt_opc = vecop_list_mla,
4682       .vece = MO_8 },
4683     { .fni4 = gen_mla16_i32,
4684       .fniv = gen_mla_vec,
4685       .load_dest = true,
4686       .opt_opc = vecop_list_mla,
4687       .vece = MO_16 },
4688     { .fni4 = gen_mla32_i32,
4689       .fniv = gen_mla_vec,
4690       .load_dest = true,
4691       .opt_opc = vecop_list_mla,
4692       .vece = MO_32 },
4693     { .fni8 = gen_mla64_i64,
4694       .fniv = gen_mla_vec,
4695       .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4696       .load_dest = true,
4697       .opt_opc = vecop_list_mla,
4698       .vece = MO_64 },
4699 };
4700
4701 const GVecGen3 mls_op[4] = {
4702     { .fni4 = gen_mls8_i32,
4703       .fniv = gen_mls_vec,
4704       .load_dest = true,
4705       .opt_opc = vecop_list_mls,
4706       .vece = MO_8 },
4707     { .fni4 = gen_mls16_i32,
4708       .fniv = gen_mls_vec,
4709       .load_dest = true,
4710       .opt_opc = vecop_list_mls,
4711       .vece = MO_16 },
4712     { .fni4 = gen_mls32_i32,
4713       .fniv = gen_mls_vec,
4714       .load_dest = true,
4715       .opt_opc = vecop_list_mls,
4716       .vece = MO_32 },
4717     { .fni8 = gen_mls64_i64,
4718       .fniv = gen_mls_vec,
4719       .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4720       .load_dest = true,
4721       .opt_opc = vecop_list_mls,
4722       .vece = MO_64 },
4723 };
4724
4725 /* CMTST : test is "if (X & Y != 0)". */
4726 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4727 {
4728     tcg_gen_and_i32(d, a, b);
4729     tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
4730     tcg_gen_neg_i32(d, d);
4731 }
4732
4733 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4734 {
4735     tcg_gen_and_i64(d, a, b);
4736     tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
4737     tcg_gen_neg_i64(d, d);
4738 }
4739
4740 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4741 {
4742     tcg_gen_and_vec(vece, d, a, b);
4743     tcg_gen_dupi_vec(vece, a, 0);
4744     tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
4745 }
4746
4747 static const TCGOpcode vecop_list_cmtst[] = { INDEX_op_cmp_vec, 0 };
4748
4749 const GVecGen3 cmtst_op[4] = {
4750     { .fni4 = gen_helper_neon_tst_u8,
4751       .fniv = gen_cmtst_vec,
4752       .opt_opc = vecop_list_cmtst,
4753       .vece = MO_8 },
4754     { .fni4 = gen_helper_neon_tst_u16,
4755       .fniv = gen_cmtst_vec,
4756       .opt_opc = vecop_list_cmtst,
4757       .vece = MO_16 },
4758     { .fni4 = gen_cmtst_i32,
4759       .fniv = gen_cmtst_vec,
4760       .opt_opc = vecop_list_cmtst,
4761       .vece = MO_32 },
4762     { .fni8 = gen_cmtst_i64,
4763       .fniv = gen_cmtst_vec,
4764       .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4765       .opt_opc = vecop_list_cmtst,
4766       .vece = MO_64 },
4767 };
4768
4769 void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4770 {
4771     TCGv_i32 lval = tcg_temp_new_i32();
4772     TCGv_i32 rval = tcg_temp_new_i32();
4773     TCGv_i32 lsh = tcg_temp_new_i32();
4774     TCGv_i32 rsh = tcg_temp_new_i32();
4775     TCGv_i32 zero = tcg_const_i32(0);
4776     TCGv_i32 max = tcg_const_i32(32);
4777
4778     /*
4779      * Rely on the TCG guarantee that out of range shifts produce
4780      * unspecified results, not undefined behaviour (i.e. no trap).
4781      * Discard out-of-range results after the fact.
4782      */
4783     tcg_gen_ext8s_i32(lsh, shift);
4784     tcg_gen_neg_i32(rsh, lsh);
4785     tcg_gen_shl_i32(lval, src, lsh);
4786     tcg_gen_shr_i32(rval, src, rsh);
4787     tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
4788     tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
4789
4790     tcg_temp_free_i32(lval);
4791     tcg_temp_free_i32(rval);
4792     tcg_temp_free_i32(lsh);
4793     tcg_temp_free_i32(rsh);
4794     tcg_temp_free_i32(zero);
4795     tcg_temp_free_i32(max);
4796 }
4797
4798 void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4799 {
4800     TCGv_i64 lval = tcg_temp_new_i64();
4801     TCGv_i64 rval = tcg_temp_new_i64();
4802     TCGv_i64 lsh = tcg_temp_new_i64();
4803     TCGv_i64 rsh = tcg_temp_new_i64();
4804     TCGv_i64 zero = tcg_const_i64(0);
4805     TCGv_i64 max = tcg_const_i64(64);
4806
4807     /*
4808      * Rely on the TCG guarantee that out of range shifts produce
4809      * unspecified results, not undefined behaviour (i.e. no trap).
4810      * Discard out-of-range results after the fact.
4811      */
4812     tcg_gen_ext8s_i64(lsh, shift);
4813     tcg_gen_neg_i64(rsh, lsh);
4814     tcg_gen_shl_i64(lval, src, lsh);
4815     tcg_gen_shr_i64(rval, src, rsh);
4816     tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
4817     tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
4818
4819     tcg_temp_free_i64(lval);
4820     tcg_temp_free_i64(rval);
4821     tcg_temp_free_i64(lsh);
4822     tcg_temp_free_i64(rsh);
4823     tcg_temp_free_i64(zero);
4824     tcg_temp_free_i64(max);
4825 }
4826
4827 static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
4828                          TCGv_vec src, TCGv_vec shift)
4829 {
4830     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4831     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4832     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4833     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4834     TCGv_vec msk, max;
4835
4836     tcg_gen_neg_vec(vece, rsh, shift);
4837     if (vece == MO_8) {
4838         tcg_gen_mov_vec(lsh, shift);
4839     } else {
4840         msk = tcg_temp_new_vec_matching(dst);
4841         tcg_gen_dupi_vec(vece, msk, 0xff);
4842         tcg_gen_and_vec(vece, lsh, shift, msk);
4843         tcg_gen_and_vec(vece, rsh, rsh, msk);
4844         tcg_temp_free_vec(msk);
4845     }
4846
4847     /*
4848      * Rely on the TCG guarantee that out of range shifts produce
4849      * unspecified results, not undefined behaviour (i.e. no trap).
4850      * Discard out-of-range results after the fact.
4851      */
4852     tcg_gen_shlv_vec(vece, lval, src, lsh);
4853     tcg_gen_shrv_vec(vece, rval, src, rsh);
4854
4855     max = tcg_temp_new_vec_matching(dst);
4856     tcg_gen_dupi_vec(vece, max, 8 << vece);
4857
4858     /*
4859      * The choice of LT (signed) and GEU (unsigned) are biased toward
4860      * the instructions of the x86_64 host.  For MO_8, the whole byte
4861      * is significant so we must use an unsigned compare; otherwise we
4862      * have already masked to a byte and so a signed compare works.
4863      * Other tcg hosts have a full set of comparisons and do not care.
4864      */
4865     if (vece == MO_8) {
4866         tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
4867         tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
4868         tcg_gen_andc_vec(vece, lval, lval, lsh);
4869         tcg_gen_andc_vec(vece, rval, rval, rsh);
4870     } else {
4871         tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
4872         tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
4873         tcg_gen_and_vec(vece, lval, lval, lsh);
4874         tcg_gen_and_vec(vece, rval, rval, rsh);
4875     }
4876     tcg_gen_or_vec(vece, dst, lval, rval);
4877
4878     tcg_temp_free_vec(max);
4879     tcg_temp_free_vec(lval);
4880     tcg_temp_free_vec(rval);
4881     tcg_temp_free_vec(lsh);
4882     tcg_temp_free_vec(rsh);
4883 }
4884
4885 static const TCGOpcode ushl_list[] = {
4886     INDEX_op_neg_vec, INDEX_op_shlv_vec,
4887     INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
4888 };
4889
4890 const GVecGen3 ushl_op[4] = {
4891     { .fniv = gen_ushl_vec,
4892       .fno = gen_helper_gvec_ushl_b,
4893       .opt_opc = ushl_list,
4894       .vece = MO_8 },
4895     { .fniv = gen_ushl_vec,
4896       .fno = gen_helper_gvec_ushl_h,
4897       .opt_opc = ushl_list,
4898       .vece = MO_16 },
4899     { .fni4 = gen_ushl_i32,
4900       .fniv = gen_ushl_vec,
4901       .opt_opc = ushl_list,
4902       .vece = MO_32 },
4903     { .fni8 = gen_ushl_i64,
4904       .fniv = gen_ushl_vec,
4905       .opt_opc = ushl_list,
4906       .vece = MO_64 },
4907 };
4908
4909 void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4910 {
4911     TCGv_i32 lval = tcg_temp_new_i32();
4912     TCGv_i32 rval = tcg_temp_new_i32();
4913     TCGv_i32 lsh = tcg_temp_new_i32();
4914     TCGv_i32 rsh = tcg_temp_new_i32();
4915     TCGv_i32 zero = tcg_const_i32(0);
4916     TCGv_i32 max = tcg_const_i32(31);
4917
4918     /*
4919      * Rely on the TCG guarantee that out of range shifts produce
4920      * unspecified results, not undefined behaviour (i.e. no trap).
4921      * Discard out-of-range results after the fact.
4922      */
4923     tcg_gen_ext8s_i32(lsh, shift);
4924     tcg_gen_neg_i32(rsh, lsh);
4925     tcg_gen_shl_i32(lval, src, lsh);
4926     tcg_gen_umin_i32(rsh, rsh, max);
4927     tcg_gen_sar_i32(rval, src, rsh);
4928     tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
4929     tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
4930
4931     tcg_temp_free_i32(lval);
4932     tcg_temp_free_i32(rval);
4933     tcg_temp_free_i32(lsh);
4934     tcg_temp_free_i32(rsh);
4935     tcg_temp_free_i32(zero);
4936     tcg_temp_free_i32(max);
4937 }
4938
4939 void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4940 {
4941     TCGv_i64 lval = tcg_temp_new_i64();
4942     TCGv_i64 rval = tcg_temp_new_i64();
4943     TCGv_i64 lsh = tcg_temp_new_i64();
4944     TCGv_i64 rsh = tcg_temp_new_i64();
4945     TCGv_i64 zero = tcg_const_i64(0);
4946     TCGv_i64 max = tcg_const_i64(63);
4947
4948     /*
4949      * Rely on the TCG guarantee that out of range shifts produce
4950      * unspecified results, not undefined behaviour (i.e. no trap).
4951      * Discard out-of-range results after the fact.
4952      */
4953     tcg_gen_ext8s_i64(lsh, shift);
4954     tcg_gen_neg_i64(rsh, lsh);
4955     tcg_gen_shl_i64(lval, src, lsh);
4956     tcg_gen_umin_i64(rsh, rsh, max);
4957     tcg_gen_sar_i64(rval, src, rsh);
4958     tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
4959     tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
4960
4961     tcg_temp_free_i64(lval);
4962     tcg_temp_free_i64(rval);
4963     tcg_temp_free_i64(lsh);
4964     tcg_temp_free_i64(rsh);
4965     tcg_temp_free_i64(zero);
4966     tcg_temp_free_i64(max);
4967 }
4968
4969 static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
4970                          TCGv_vec src, TCGv_vec shift)
4971 {
4972     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4973     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4974     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4975     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4976     TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
4977
4978     /*
4979      * Rely on the TCG guarantee that out of range shifts produce
4980      * unspecified results, not undefined behaviour (i.e. no trap).
4981      * Discard out-of-range results after the fact.
4982      */
4983     tcg_gen_neg_vec(vece, rsh, shift);
4984     if (vece == MO_8) {
4985         tcg_gen_mov_vec(lsh, shift);
4986     } else {
4987         tcg_gen_dupi_vec(vece, tmp, 0xff);
4988         tcg_gen_and_vec(vece, lsh, shift, tmp);
4989         tcg_gen_and_vec(vece, rsh, rsh, tmp);
4990     }
4991
4992     /* Bound rsh so out of bound right shift gets -1.  */
4993     tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
4994     tcg_gen_umin_vec(vece, rsh, rsh, tmp);
4995     tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
4996
4997     tcg_gen_shlv_vec(vece, lval, src, lsh);
4998     tcg_gen_sarv_vec(vece, rval, src, rsh);
4999
5000     /* Select in-bound left shift.  */
5001     tcg_gen_andc_vec(vece, lval, lval, tmp);
5002
5003     /* Select between left and right shift.  */
5004     if (vece == MO_8) {
5005         tcg_gen_dupi_vec(vece, tmp, 0);
5006         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
5007     } else {
5008         tcg_gen_dupi_vec(vece, tmp, 0x80);
5009         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
5010     }
5011
5012     tcg_temp_free_vec(lval);
5013     tcg_temp_free_vec(rval);
5014     tcg_temp_free_vec(lsh);
5015     tcg_temp_free_vec(rsh);
5016     tcg_temp_free_vec(tmp);
5017 }
5018
5019 static const TCGOpcode sshl_list[] = {
5020     INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
5021     INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
5022 };
5023
5024 const GVecGen3 sshl_op[4] = {
5025     { .fniv = gen_sshl_vec,
5026       .fno = gen_helper_gvec_sshl_b,
5027       .opt_opc = sshl_list,
5028       .vece = MO_8 },
5029     { .fniv = gen_sshl_vec,
5030       .fno = gen_helper_gvec_sshl_h,
5031       .opt_opc = sshl_list,
5032       .vece = MO_16 },
5033     { .fni4 = gen_sshl_i32,
5034       .fniv = gen_sshl_vec,
5035       .opt_opc = sshl_list,
5036       .vece = MO_32 },
5037     { .fni8 = gen_sshl_i64,
5038       .fniv = gen_sshl_vec,
5039       .opt_opc = sshl_list,
5040       .vece = MO_64 },
5041 };
5042
5043 static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
5044                           TCGv_vec a, TCGv_vec b)
5045 {
5046     TCGv_vec x = tcg_temp_new_vec_matching(t);
5047     tcg_gen_add_vec(vece, x, a, b);
5048     tcg_gen_usadd_vec(vece, t, a, b);
5049     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
5050     tcg_gen_or_vec(vece, sat, sat, x);
5051     tcg_temp_free_vec(x);
5052 }
5053
5054 static const TCGOpcode vecop_list_uqadd[] = {
5055     INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
5056 };
5057
5058 const GVecGen4 uqadd_op[4] = {
5059     { .fniv = gen_uqadd_vec,
5060       .fno = gen_helper_gvec_uqadd_b,
5061       .write_aofs = true,
5062       .opt_opc = vecop_list_uqadd,
5063       .vece = MO_8 },
5064     { .fniv = gen_uqadd_vec,
5065       .fno = gen_helper_gvec_uqadd_h,
5066       .write_aofs = true,
5067       .opt_opc = vecop_list_uqadd,
5068       .vece = MO_16 },
5069     { .fniv = gen_uqadd_vec,
5070       .fno = gen_helper_gvec_uqadd_s,
5071       .write_aofs = true,
5072       .opt_opc = vecop_list_uqadd,
5073       .vece = MO_32 },
5074     { .fniv = gen_uqadd_vec,
5075       .fno = gen_helper_gvec_uqadd_d,
5076       .write_aofs = true,
5077       .opt_opc = vecop_list_uqadd,
5078       .vece = MO_64 },
5079 };
5080
5081 static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
5082                           TCGv_vec a, TCGv_vec b)
5083 {
5084     TCGv_vec x = tcg_temp_new_vec_matching(t);
5085     tcg_gen_add_vec(vece, x, a, b);
5086     tcg_gen_ssadd_vec(vece, t, a, b);
5087     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
5088     tcg_gen_or_vec(vece, sat, sat, x);
5089     tcg_temp_free_vec(x);
5090 }
5091
5092 static const TCGOpcode vecop_list_sqadd[] = {
5093     INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
5094 };
5095
5096 const GVecGen4 sqadd_op[4] = {
5097     { .fniv = gen_sqadd_vec,
5098       .fno = gen_helper_gvec_sqadd_b,
5099       .opt_opc = vecop_list_sqadd,
5100       .write_aofs = true,
5101       .vece = MO_8 },
5102     { .fniv = gen_sqadd_vec,
5103       .fno = gen_helper_gvec_sqadd_h,
5104       .opt_opc = vecop_list_sqadd,
5105       .write_aofs = true,
5106       .vece = MO_16 },
5107     { .fniv = gen_sqadd_vec,
5108       .fno = gen_helper_gvec_sqadd_s,
5109       .opt_opc = vecop_list_sqadd,
5110       .write_aofs = true,
5111       .vece = MO_32 },
5112     { .fniv = gen_sqadd_vec,
5113       .fno = gen_helper_gvec_sqadd_d,
5114       .opt_opc = vecop_list_sqadd,
5115       .write_aofs = true,
5116       .vece = MO_64 },
5117 };
5118
5119 static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
5120                           TCGv_vec a, TCGv_vec b)
5121 {
5122     TCGv_vec x = tcg_temp_new_vec_matching(t);
5123     tcg_gen_sub_vec(vece, x, a, b);
5124     tcg_gen_ussub_vec(vece, t, a, b);
5125     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
5126     tcg_gen_or_vec(vece, sat, sat, x);
5127     tcg_temp_free_vec(x);
5128 }
5129
5130 static const TCGOpcode vecop_list_uqsub[] = {
5131     INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
5132 };
5133
5134 const GVecGen4 uqsub_op[4] = {
5135     { .fniv = gen_uqsub_vec,
5136       .fno = gen_helper_gvec_uqsub_b,
5137       .opt_opc = vecop_list_uqsub,
5138       .write_aofs = true,
5139       .vece = MO_8 },
5140     { .fniv = gen_uqsub_vec,
5141       .fno = gen_helper_gvec_uqsub_h,
5142       .opt_opc = vecop_list_uqsub,
5143       .write_aofs = true,
5144       .vece = MO_16 },
5145     { .fniv = gen_uqsub_vec,
5146       .fno = gen_helper_gvec_uqsub_s,
5147       .opt_opc = vecop_list_uqsub,
5148       .write_aofs = true,
5149       .vece = MO_32 },
5150     { .fniv = gen_uqsub_vec,
5151       .fno = gen_helper_gvec_uqsub_d,
5152       .opt_opc = vecop_list_uqsub,
5153       .write_aofs = true,
5154       .vece = MO_64 },
5155 };
5156
5157 static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
5158                           TCGv_vec a, TCGv_vec b)
5159 {
5160     TCGv_vec x = tcg_temp_new_vec_matching(t);
5161     tcg_gen_sub_vec(vece, x, a, b);
5162     tcg_gen_sssub_vec(vece, t, a, b);
5163     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
5164     tcg_gen_or_vec(vece, sat, sat, x);
5165     tcg_temp_free_vec(x);
5166 }
5167
5168 static const TCGOpcode vecop_list_sqsub[] = {
5169     INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
5170 };
5171
5172 const GVecGen4 sqsub_op[4] = {
5173     { .fniv = gen_sqsub_vec,
5174       .fno = gen_helper_gvec_sqsub_b,
5175       .opt_opc = vecop_list_sqsub,
5176       .write_aofs = true,
5177       .vece = MO_8 },
5178     { .fniv = gen_sqsub_vec,
5179       .fno = gen_helper_gvec_sqsub_h,
5180       .opt_opc = vecop_list_sqsub,
5181       .write_aofs = true,
5182       .vece = MO_16 },
5183     { .fniv = gen_sqsub_vec,
5184       .fno = gen_helper_gvec_sqsub_s,
5185       .opt_opc = vecop_list_sqsub,
5186       .write_aofs = true,
5187       .vece = MO_32 },
5188     { .fniv = gen_sqsub_vec,
5189       .fno = gen_helper_gvec_sqsub_d,
5190       .opt_opc = vecop_list_sqsub,
5191       .write_aofs = true,
5192       .vece = MO_64 },
5193 };
5194
5195 /* Translate a NEON data processing instruction.  Return nonzero if the
5196    instruction is invalid.
5197    We process data in a mixture of 32-bit and 64-bit chunks.
5198    Mostly we use 32-bit chunks so we can use normal scalar instructions.  */
5199
5200 static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
5201 {
5202     int op;
5203     int q;
5204     int rd, rn, rm, rd_ofs, rn_ofs, rm_ofs;
5205     int size;
5206     int shift;
5207     int pass;
5208     int count;
5209     int pairwise;
5210     int u;
5211     int vec_size;
5212     uint32_t imm;
5213     TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5;
5214     TCGv_ptr ptr1, ptr2, ptr3;
5215     TCGv_i64 tmp64;
5216
5217     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
5218         return 1;
5219     }
5220
5221     /* FIXME: this access check should not take precedence over UNDEF
5222      * for invalid encodings; we will generate incorrect syndrome information
5223      * for attempts to execute invalid vfp/neon encodings with FP disabled.
5224      */
5225     if (s->fp_excp_el) {
5226         gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
5227                            syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
5228         return 0;
5229     }
5230
5231     if (!s->vfp_enabled)
5232       return 1;
5233     q = (insn & (1 << 6)) != 0;
5234     u = (insn >> 24) & 1;
5235     VFP_DREG_D(rd, insn);
5236     VFP_DREG_N(rn, insn);
5237     VFP_DREG_M(rm, insn);
5238     size = (insn >> 20) & 3;
5239     vec_size = q ? 16 : 8;
5240     rd_ofs = neon_reg_offset(rd, 0);
5241     rn_ofs = neon_reg_offset(rn, 0);
5242     rm_ofs = neon_reg_offset(rm, 0);
5243
5244     if ((insn & (1 << 23)) == 0) {
5245         /* Three register same length.  */
5246         op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1);
5247         /* Catch invalid op and bad size combinations: UNDEF */
5248         if ((neon_3r_sizes[op] & (1 << size)) == 0) {
5249             return 1;
5250         }
5251         /* All insns of this form UNDEF for either this condition or the
5252          * superset of cases "Q==1"; we catch the latter later.
5253          */
5254         if (q && ((rd | rn | rm) & 1)) {
5255             return 1;
5256         }
5257         switch (op) {
5258         case NEON_3R_SHA:
5259             /* The SHA-1/SHA-256 3-register instructions require special
5260              * treatment here, as their size field is overloaded as an
5261              * op type selector, and they all consume their input in a
5262              * single pass.
5263              */
5264             if (!q) {
5265                 return 1;
5266             }
5267             if (!u) { /* SHA-1 */
5268                 if (!dc_isar_feature(aa32_sha1, s)) {
5269                     return 1;
5270                 }
5271                 ptr1 = vfp_reg_ptr(true, rd);
5272                 ptr2 = vfp_reg_ptr(true, rn);
5273                 ptr3 = vfp_reg_ptr(true, rm);
5274                 tmp4 = tcg_const_i32(size);
5275                 gen_helper_crypto_sha1_3reg(ptr1, ptr2, ptr3, tmp4);
5276                 tcg_temp_free_i32(tmp4);
5277             } else { /* SHA-256 */
5278                 if (!dc_isar_feature(aa32_sha2, s) || size == 3) {
5279                     return 1;
5280                 }
5281                 ptr1 = vfp_reg_ptr(true, rd);
5282                 ptr2 = vfp_reg_ptr(true, rn);
5283                 ptr3 = vfp_reg_ptr(true, rm);
5284                 switch (size) {
5285                 case 0:
5286                     gen_helper_crypto_sha256h(ptr1, ptr2, ptr3);
5287                     break;
5288                 case 1:
5289                     gen_helper_crypto_sha256h2(ptr1, ptr2, ptr3);
5290                     break;
5291                 case 2:
5292                     gen_helper_crypto_sha256su1(ptr1, ptr2, ptr3);
5293                     break;
5294                 }
5295             }
5296             tcg_temp_free_ptr(ptr1);
5297             tcg_temp_free_ptr(ptr2);
5298             tcg_temp_free_ptr(ptr3);
5299             return 0;
5300
5301         case NEON_3R_VPADD_VQRDMLAH:
5302             if (!u) {
5303                 break;  /* VPADD */
5304             }
5305             /* VQRDMLAH */
5306             switch (size) {
5307             case 1:
5308                 return do_v81_helper(s, gen_helper_gvec_qrdmlah_s16,
5309                                      q, rd, rn, rm);
5310             case 2:
5311                 return do_v81_helper(s, gen_helper_gvec_qrdmlah_s32,
5312                                      q, rd, rn, rm);
5313             }
5314             return 1;
5315
5316         case NEON_3R_VFM_VQRDMLSH:
5317             if (!u) {
5318                 /* VFM, VFMS */
5319                 if (size == 1) {
5320                     return 1;
5321                 }
5322                 break;
5323             }
5324             /* VQRDMLSH */
5325             switch (size) {
5326             case 1:
5327                 return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s16,
5328                                      q, rd, rn, rm);
5329             case 2:
5330                 return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s32,
5331                                      q, rd, rn, rm);
5332             }
5333             return 1;
5334
5335         case NEON_3R_VADD_VSUB:
5336         case NEON_3R_LOGIC:
5337         case NEON_3R_VMAX:
5338         case NEON_3R_VMIN:
5339         case NEON_3R_VTST_VCEQ:
5340         case NEON_3R_VCGT:
5341         case NEON_3R_VCGE:
5342         case NEON_3R_VQADD:
5343         case NEON_3R_VQSUB:
5344         case NEON_3R_VMUL:
5345         case NEON_3R_VML:
5346         case NEON_3R_VSHL:
5347             /* Already handled by decodetree */
5348             return 1;
5349         }
5350
5351         if (size == 3) {
5352             /* 64-bit element instructions. */
5353             for (pass = 0; pass < (q ? 2 : 1); pass++) {
5354                 neon_load_reg64(cpu_V0, rn + pass);
5355                 neon_load_reg64(cpu_V1, rm + pass);
5356                 switch (op) {
5357                 case NEON_3R_VQSHL:
5358                     if (u) {
5359                         gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
5360                                                  cpu_V1, cpu_V0);
5361                     } else {
5362                         gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
5363                                                  cpu_V1, cpu_V0);
5364                     }
5365                     break;
5366                 case NEON_3R_VRSHL:
5367                     if (u) {
5368                         gen_helper_neon_rshl_u64(cpu_V0, cpu_V1, cpu_V0);
5369                     } else {
5370                         gen_helper_neon_rshl_s64(cpu_V0, cpu_V1, cpu_V0);
5371                     }
5372                     break;
5373                 case NEON_3R_VQRSHL:
5374                     if (u) {
5375                         gen_helper_neon_qrshl_u64(cpu_V0, cpu_env,
5376                                                   cpu_V1, cpu_V0);
5377                     } else {
5378                         gen_helper_neon_qrshl_s64(cpu_V0, cpu_env,
5379                                                   cpu_V1, cpu_V0);
5380                     }
5381                     break;
5382                 default:
5383                     abort();
5384                 }
5385                 neon_store_reg64(cpu_V0, rd + pass);
5386             }
5387             return 0;
5388         }
5389         pairwise = 0;
5390         switch (op) {
5391         case NEON_3R_VQSHL:
5392         case NEON_3R_VRSHL:
5393         case NEON_3R_VQRSHL:
5394             {
5395                 int rtmp;
5396                 /* Shift instruction operands are reversed.  */
5397                 rtmp = rn;
5398                 rn = rm;
5399                 rm = rtmp;
5400             }
5401             break;
5402         case NEON_3R_VPADD_VQRDMLAH:
5403         case NEON_3R_VPMAX:
5404         case NEON_3R_VPMIN:
5405             pairwise = 1;
5406             break;
5407         case NEON_3R_FLOAT_ARITH:
5408             pairwise = (u && size < 2); /* if VPADD (float) */
5409             break;
5410         case NEON_3R_FLOAT_MINMAX:
5411             pairwise = u; /* if VPMIN/VPMAX (float) */
5412             break;
5413         case NEON_3R_FLOAT_CMP:
5414             if (!u && size) {
5415                 /* no encoding for U=0 C=1x */
5416                 return 1;
5417             }
5418             break;
5419         case NEON_3R_FLOAT_ACMP:
5420             if (!u) {
5421                 return 1;
5422             }
5423             break;
5424         case NEON_3R_FLOAT_MISC:
5425             /* VMAXNM/VMINNM in ARMv8 */
5426             if (u && !arm_dc_feature(s, ARM_FEATURE_V8)) {
5427                 return 1;
5428             }
5429             break;
5430         case NEON_3R_VFM_VQRDMLSH:
5431             if (!dc_isar_feature(aa32_simdfmac, s)) {
5432                 return 1;
5433             }
5434             break;
5435         default:
5436             break;
5437         }
5438
5439         if (pairwise && q) {
5440             /* All the pairwise insns UNDEF if Q is set */
5441             return 1;
5442         }
5443
5444         for (pass = 0; pass < (q ? 4 : 2); pass++) {
5445
5446         if (pairwise) {
5447             /* Pairwise.  */
5448             if (pass < 1) {
5449                 tmp = neon_load_reg(rn, 0);
5450                 tmp2 = neon_load_reg(rn, 1);
5451             } else {
5452                 tmp = neon_load_reg(rm, 0);
5453                 tmp2 = neon_load_reg(rm, 1);
5454             }
5455         } else {
5456             /* Elementwise.  */
5457             tmp = neon_load_reg(rn, pass);
5458             tmp2 = neon_load_reg(rm, pass);
5459         }
5460         switch (op) {
5461         case NEON_3R_VHADD:
5462             GEN_NEON_INTEGER_OP(hadd);
5463             break;
5464         case NEON_3R_VRHADD:
5465             GEN_NEON_INTEGER_OP(rhadd);
5466             break;
5467         case NEON_3R_VHSUB:
5468             GEN_NEON_INTEGER_OP(hsub);
5469             break;
5470         case NEON_3R_VQSHL:
5471             GEN_NEON_INTEGER_OP_ENV(qshl);
5472             break;
5473         case NEON_3R_VRSHL:
5474             GEN_NEON_INTEGER_OP(rshl);
5475             break;
5476         case NEON_3R_VQRSHL:
5477             GEN_NEON_INTEGER_OP_ENV(qrshl);
5478             break;
5479         case NEON_3R_VABD:
5480             GEN_NEON_INTEGER_OP(abd);
5481             break;
5482         case NEON_3R_VABA:
5483             GEN_NEON_INTEGER_OP(abd);
5484             tcg_temp_free_i32(tmp2);
5485             tmp2 = neon_load_reg(rd, pass);
5486             gen_neon_add(size, tmp, tmp2);
5487             break;
5488         case NEON_3R_VPMAX:
5489             GEN_NEON_INTEGER_OP(pmax);
5490             break;
5491         case NEON_3R_VPMIN:
5492             GEN_NEON_INTEGER_OP(pmin);
5493             break;
5494         case NEON_3R_VQDMULH_VQRDMULH: /* Multiply high.  */
5495             if (!u) { /* VQDMULH */
5496                 switch (size) {
5497                 case 1:
5498                     gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
5499                     break;
5500                 case 2:
5501                     gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
5502                     break;
5503                 default: abort();
5504                 }
5505             } else { /* VQRDMULH */
5506                 switch (size) {
5507                 case 1:
5508                     gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
5509                     break;
5510                 case 2:
5511                     gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
5512                     break;
5513                 default: abort();
5514                 }
5515             }
5516             break;
5517         case NEON_3R_VPADD_VQRDMLAH:
5518             switch (size) {
5519             case 0: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break;
5520             case 1: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break;
5521             case 2: tcg_gen_add_i32(tmp, tmp, tmp2); break;
5522             default: abort();
5523             }
5524             break;
5525         case NEON_3R_FLOAT_ARITH: /* Floating point arithmetic. */
5526         {
5527             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5528             switch ((u << 2) | size) {
5529             case 0: /* VADD */
5530             case 4: /* VPADD */
5531                 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
5532                 break;
5533             case 2: /* VSUB */
5534                 gen_helper_vfp_subs(tmp, tmp, tmp2, fpstatus);
5535                 break;
5536             case 6: /* VABD */
5537                 gen_helper_neon_abd_f32(tmp, tmp, tmp2, fpstatus);
5538                 break;
5539             default:
5540                 abort();
5541             }
5542             tcg_temp_free_ptr(fpstatus);
5543             break;
5544         }
5545         case NEON_3R_FLOAT_MULTIPLY:
5546         {
5547             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5548             gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
5549             if (!u) {
5550                 tcg_temp_free_i32(tmp2);
5551                 tmp2 = neon_load_reg(rd, pass);
5552                 if (size == 0) {
5553                     gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
5554                 } else {
5555                     gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
5556                 }
5557             }
5558             tcg_temp_free_ptr(fpstatus);
5559             break;
5560         }
5561         case NEON_3R_FLOAT_CMP:
5562         {
5563             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5564             if (!u) {
5565                 gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
5566             } else {
5567                 if (size == 0) {
5568                     gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
5569                 } else {
5570                     gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
5571                 }
5572             }
5573             tcg_temp_free_ptr(fpstatus);
5574             break;
5575         }
5576         case NEON_3R_FLOAT_ACMP:
5577         {
5578             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5579             if (size == 0) {
5580                 gen_helper_neon_acge_f32(tmp, tmp, tmp2, fpstatus);
5581             } else {
5582                 gen_helper_neon_acgt_f32(tmp, tmp, tmp2, fpstatus);
5583             }
5584             tcg_temp_free_ptr(fpstatus);
5585             break;
5586         }
5587         case NEON_3R_FLOAT_MINMAX:
5588         {
5589             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5590             if (size == 0) {
5591                 gen_helper_vfp_maxs(tmp, tmp, tmp2, fpstatus);
5592             } else {
5593                 gen_helper_vfp_mins(tmp, tmp, tmp2, fpstatus);
5594             }
5595             tcg_temp_free_ptr(fpstatus);
5596             break;
5597         }
5598         case NEON_3R_FLOAT_MISC:
5599             if (u) {
5600                 /* VMAXNM/VMINNM */
5601                 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5602                 if (size == 0) {
5603                     gen_helper_vfp_maxnums(tmp, tmp, tmp2, fpstatus);
5604                 } else {
5605                     gen_helper_vfp_minnums(tmp, tmp, tmp2, fpstatus);
5606                 }
5607                 tcg_temp_free_ptr(fpstatus);
5608             } else {
5609                 if (size == 0) {
5610                     gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env);
5611                 } else {
5612                     gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env);
5613               }
5614             }
5615             break;
5616         case NEON_3R_VFM_VQRDMLSH:
5617         {
5618             /* VFMA, VFMS: fused multiply-add */
5619             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5620             TCGv_i32 tmp3 = neon_load_reg(rd, pass);
5621             if (size) {
5622                 /* VFMS */
5623                 gen_helper_vfp_negs(tmp, tmp);
5624             }
5625             gen_helper_vfp_muladds(tmp, tmp, tmp2, tmp3, fpstatus);
5626             tcg_temp_free_i32(tmp3);
5627             tcg_temp_free_ptr(fpstatus);
5628             break;
5629         }
5630         default:
5631             abort();
5632         }
5633         tcg_temp_free_i32(tmp2);
5634
5635         /* Save the result.  For elementwise operations we can put it
5636            straight into the destination register.  For pairwise operations
5637            we have to be careful to avoid clobbering the source operands.  */
5638         if (pairwise && rd == rm) {
5639             neon_store_scratch(pass, tmp);
5640         } else {
5641             neon_store_reg(rd, pass, tmp);
5642         }
5643
5644         } /* for pass */
5645         if (pairwise && rd == rm) {
5646             for (pass = 0; pass < (q ? 4 : 2); pass++) {
5647                 tmp = neon_load_scratch(pass);
5648                 neon_store_reg(rd, pass, tmp);
5649             }
5650         }
5651         /* End of 3 register same size operations.  */
5652     } else if (insn & (1 << 4)) {
5653         if ((insn & 0x00380080) != 0) {
5654             /* Two registers and shift.  */
5655             op = (insn >> 8) & 0xf;
5656             if (insn & (1 << 7)) {
5657                 /* 64-bit shift. */
5658                 if (op > 7) {
5659                     return 1;
5660                 }
5661                 size = 3;
5662             } else {
5663                 size = 2;
5664                 while ((insn & (1 << (size + 19))) == 0)
5665                     size--;
5666             }
5667             shift = (insn >> 16) & ((1 << (3 + size)) - 1);
5668             if (op < 8) {
5669                 /* Shift by immediate:
5670                    VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU.  */
5671                 if (q && ((rd | rm) & 1)) {
5672                     return 1;
5673                 }
5674                 if (!u && (op == 4 || op == 6)) {
5675                     return 1;
5676                 }
5677                 /* Right shifts are encoded as N - shift, where N is the
5678                    element size in bits.  */
5679                 if (op <= 4) {
5680                     shift = shift - (1 << (size + 3));
5681                 }
5682
5683                 switch (op) {
5684                 case 0:  /* VSHR */
5685                     /* Right shift comes here negative.  */
5686                     shift = -shift;
5687                     /* Shifts larger than the element size are architecturally
5688                      * valid.  Unsigned results in all zeros; signed results
5689                      * in all sign bits.
5690                      */
5691                     if (!u) {
5692                         tcg_gen_gvec_sari(size, rd_ofs, rm_ofs,
5693                                           MIN(shift, (8 << size) - 1),
5694                                           vec_size, vec_size);
5695                     } else if (shift >= 8 << size) {
5696                         tcg_gen_gvec_dup_imm(MO_8, rd_ofs, vec_size,
5697                                              vec_size, 0);
5698                     } else {
5699                         tcg_gen_gvec_shri(size, rd_ofs, rm_ofs, shift,
5700                                           vec_size, vec_size);
5701                     }
5702                     return 0;
5703
5704                 case 1:  /* VSRA */
5705                     /* Right shift comes here negative.  */
5706                     shift = -shift;
5707                     if (u) {
5708                         gen_gvec_usra(size, rd_ofs, rm_ofs, shift,
5709                                       vec_size, vec_size);
5710                     } else {
5711                         gen_gvec_ssra(size, rd_ofs, rm_ofs, shift,
5712                                       vec_size, vec_size);
5713                     }
5714                     return 0;
5715
5716                 case 2: /* VRSHR */
5717                     /* Right shift comes here negative.  */
5718                     shift = -shift;
5719                     if (u) {
5720                         gen_gvec_urshr(size, rd_ofs, rm_ofs, shift,
5721                                        vec_size, vec_size);
5722                     } else {
5723                         gen_gvec_srshr(size, rd_ofs, rm_ofs, shift,
5724                                        vec_size, vec_size);
5725                     }
5726                     return 0;
5727
5728                 case 3: /* VRSRA */
5729                     /* Right shift comes here negative.  */
5730                     shift = -shift;
5731                     if (u) {
5732                         gen_gvec_ursra(size, rd_ofs, rm_ofs, shift,
5733                                        vec_size, vec_size);
5734                     } else {
5735                         gen_gvec_srsra(size, rd_ofs, rm_ofs, shift,
5736                                        vec_size, vec_size);
5737                     }
5738                     return 0;
5739
5740                 case 4: /* VSRI */
5741                     if (!u) {
5742                         return 1;
5743                     }
5744                     /* Right shift comes here negative.  */
5745                     shift = -shift;
5746                     gen_gvec_sri(size, rd_ofs, rm_ofs, shift,
5747                                  vec_size, vec_size);
5748                     return 0;
5749
5750                 case 5: /* VSHL, VSLI */
5751                     if (u) { /* VSLI */
5752                         gen_gvec_sli(size, rd_ofs, rm_ofs, shift,
5753                                      vec_size, vec_size);
5754                     } else { /* VSHL */
5755                         tcg_gen_gvec_shli(size, rd_ofs, rm_ofs, shift,
5756                                           vec_size, vec_size);
5757                     }
5758                     return 0;
5759                 }
5760
5761                 if (size == 3) {
5762                     count = q + 1;
5763                 } else {
5764                     count = q ? 4: 2;
5765                 }
5766
5767                 /* To avoid excessive duplication of ops we implement shift
5768                  * by immediate using the variable shift operations.
5769                   */
5770                 imm = dup_const(size, shift);
5771
5772                 for (pass = 0; pass < count; pass++) {
5773                     if (size == 3) {
5774                         neon_load_reg64(cpu_V0, rm + pass);
5775                         tcg_gen_movi_i64(cpu_V1, imm);
5776                         switch (op) {
5777                         case 6: /* VQSHLU */
5778                             gen_helper_neon_qshlu_s64(cpu_V0, cpu_env,
5779                                                       cpu_V0, cpu_V1);
5780                             break;
5781                         case 7: /* VQSHL */
5782                             if (u) {
5783                                 gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
5784                                                          cpu_V0, cpu_V1);
5785                             } else {
5786                                 gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
5787                                                          cpu_V0, cpu_V1);
5788                             }
5789                             break;
5790                         default:
5791                             g_assert_not_reached();
5792                         }
5793                         neon_store_reg64(cpu_V0, rd + pass);
5794                     } else { /* size < 3 */
5795                         /* Operands in T0 and T1.  */
5796                         tmp = neon_load_reg(rm, pass);
5797                         tmp2 = tcg_temp_new_i32();
5798                         tcg_gen_movi_i32(tmp2, imm);
5799                         switch (op) {
5800                         case 6: /* VQSHLU */
5801                             switch (size) {
5802                             case 0:
5803                                 gen_helper_neon_qshlu_s8(tmp, cpu_env,
5804                                                          tmp, tmp2);
5805                                 break;
5806                             case 1:
5807                                 gen_helper_neon_qshlu_s16(tmp, cpu_env,
5808                                                           tmp, tmp2);
5809                                 break;
5810                             case 2:
5811                                 gen_helper_neon_qshlu_s32(tmp, cpu_env,
5812                                                           tmp, tmp2);
5813                                 break;
5814                             default:
5815                                 abort();
5816                             }
5817                             break;
5818                         case 7: /* VQSHL */
5819                             GEN_NEON_INTEGER_OP_ENV(qshl);
5820                             break;
5821                         default:
5822                             g_assert_not_reached();
5823                         }
5824                         tcg_temp_free_i32(tmp2);
5825                         neon_store_reg(rd, pass, tmp);
5826                     }
5827                 } /* for pass */
5828             } else if (op < 10) {
5829                 /* Shift by immediate and narrow:
5830                    VSHRN, VRSHRN, VQSHRN, VQRSHRN.  */
5831                 int input_unsigned = (op == 8) ? !u : u;
5832                 if (rm & 1) {
5833                     return 1;
5834                 }
5835                 shift = shift - (1 << (size + 3));
5836                 size++;
5837                 if (size == 3) {
5838                     tmp64 = tcg_const_i64(shift);
5839                     neon_load_reg64(cpu_V0, rm);
5840                     neon_load_reg64(cpu_V1, rm + 1);
5841                     for (pass = 0; pass < 2; pass++) {
5842                         TCGv_i64 in;
5843                         if (pass == 0) {
5844                             in = cpu_V0;
5845                         } else {
5846                             in = cpu_V1;
5847                         }
5848                         if (q) {
5849                             if (input_unsigned) {
5850                                 gen_helper_neon_rshl_u64(cpu_V0, in, tmp64);
5851                             } else {
5852                                 gen_helper_neon_rshl_s64(cpu_V0, in, tmp64);
5853                             }
5854                         } else {
5855                             if (input_unsigned) {
5856                                 gen_ushl_i64(cpu_V0, in, tmp64);
5857                             } else {
5858                                 gen_sshl_i64(cpu_V0, in, tmp64);
5859                             }
5860                         }
5861                         tmp = tcg_temp_new_i32();
5862                         gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
5863                         neon_store_reg(rd, pass, tmp);
5864                     } /* for pass */
5865                     tcg_temp_free_i64(tmp64);
5866                 } else {
5867                     if (size == 1) {
5868                         imm = (uint16_t)shift;
5869                         imm |= imm << 16;
5870                     } else {
5871                         /* size == 2 */
5872                         imm = (uint32_t)shift;
5873                     }
5874                     tmp2 = tcg_const_i32(imm);
5875                     tmp4 = neon_load_reg(rm + 1, 0);
5876                     tmp5 = neon_load_reg(rm + 1, 1);
5877                     for (pass = 0; pass < 2; pass++) {
5878                         if (pass == 0) {
5879                             tmp = neon_load_reg(rm, 0);
5880                         } else {
5881                             tmp = tmp4;
5882                         }
5883                         gen_neon_shift_narrow(size, tmp, tmp2, q,
5884                                               input_unsigned);
5885                         if (pass == 0) {
5886                             tmp3 = neon_load_reg(rm, 1);
5887                         } else {
5888                             tmp3 = tmp5;
5889                         }
5890                         gen_neon_shift_narrow(size, tmp3, tmp2, q,
5891                                               input_unsigned);
5892                         tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3);
5893                         tcg_temp_free_i32(tmp);
5894                         tcg_temp_free_i32(tmp3);
5895                         tmp = tcg_temp_new_i32();
5896                         gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
5897                         neon_store_reg(rd, pass, tmp);
5898                     } /* for pass */
5899                     tcg_temp_free_i32(tmp2);
5900                 }
5901             } else if (op == 10) {
5902                 /* VSHLL, VMOVL */
5903                 if (q || (rd & 1)) {
5904                     return 1;
5905                 }
5906                 tmp = neon_load_reg(rm, 0);
5907                 tmp2 = neon_load_reg(rm, 1);
5908                 for (pass = 0; pass < 2; pass++) {
5909                     if (pass == 1)
5910                         tmp = tmp2;
5911
5912                     gen_neon_widen(cpu_V0, tmp, size, u);
5913
5914                     if (shift != 0) {
5915                         /* The shift is less than the width of the source
5916                            type, so we can just shift the whole register.  */
5917                         tcg_gen_shli_i64(cpu_V0, cpu_V0, shift);
5918                         /* Widen the result of shift: we need to clear
5919                          * the potential overflow bits resulting from
5920                          * left bits of the narrow input appearing as
5921                          * right bits of left the neighbour narrow
5922                          * input.  */
5923                         if (size < 2 || !u) {
5924                             uint64_t imm64;
5925                             if (size == 0) {
5926                                 imm = (0xffu >> (8 - shift));
5927                                 imm |= imm << 16;
5928                             } else if (size == 1) {
5929                                 imm = 0xffff >> (16 - shift);
5930                             } else {
5931                                 /* size == 2 */
5932                                 imm = 0xffffffff >> (32 - shift);
5933                             }
5934                             if (size < 2) {
5935                                 imm64 = imm | (((uint64_t)imm) << 32);
5936                             } else {
5937                                 imm64 = imm;
5938                             }
5939                             tcg_gen_andi_i64(cpu_V0, cpu_V0, ~imm64);
5940                         }
5941                     }
5942                     neon_store_reg64(cpu_V0, rd + pass);
5943                 }
5944             } else if (op >= 14) {
5945                 /* VCVT fixed-point.  */
5946                 TCGv_ptr fpst;
5947                 TCGv_i32 shiftv;
5948                 VFPGenFixPointFn *fn;
5949
5950                 if (!(insn & (1 << 21)) || (q && ((rd | rm) & 1))) {
5951                     return 1;
5952                 }
5953
5954                 if (!(op & 1)) {
5955                     if (u) {
5956                         fn = gen_helper_vfp_ultos;
5957                     } else {
5958                         fn = gen_helper_vfp_sltos;
5959                     }
5960                 } else {
5961                     if (u) {
5962                         fn = gen_helper_vfp_touls_round_to_zero;
5963                     } else {
5964                         fn = gen_helper_vfp_tosls_round_to_zero;
5965                     }
5966                 }
5967
5968                 /* We have already masked out the must-be-1 top bit of imm6,
5969                  * hence this 32-shift where the ARM ARM has 64-imm6.
5970                  */
5971                 shift = 32 - shift;
5972                 fpst = get_fpstatus_ptr(1);
5973                 shiftv = tcg_const_i32(shift);
5974                 for (pass = 0; pass < (q ? 4 : 2); pass++) {
5975                     TCGv_i32 tmpf = neon_load_reg(rm, pass);
5976                     fn(tmpf, tmpf, shiftv, fpst);
5977                     neon_store_reg(rd, pass, tmpf);
5978                 }
5979                 tcg_temp_free_ptr(fpst);
5980                 tcg_temp_free_i32(shiftv);
5981             } else {
5982                 return 1;
5983             }
5984         } else { /* (insn & 0x00380080) == 0 */
5985             int invert, reg_ofs, vec_size;
5986
5987             if (q && (rd & 1)) {
5988                 return 1;
5989             }
5990
5991             op = (insn >> 8) & 0xf;
5992             /* One register and immediate.  */
5993             imm = (u << 7) | ((insn >> 12) & 0x70) | (insn & 0xf);
5994             invert = (insn & (1 << 5)) != 0;
5995             /* Note that op = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE.
5996              * We choose to not special-case this and will behave as if a
5997              * valid constant encoding of 0 had been given.
5998              */
5999             switch (op) {
6000             case 0: case 1:
6001                 /* no-op */
6002                 break;
6003             case 2: case 3:
6004                 imm <<= 8;
6005                 break;
6006             case 4: case 5:
6007                 imm <<= 16;
6008                 break;
6009             case 6: case 7:
6010                 imm <<= 24;
6011                 break;
6012             case 8: case 9:
6013                 imm |= imm << 16;
6014                 break;
6015             case 10: case 11:
6016                 imm = (imm << 8) | (imm << 24);
6017                 break;
6018             case 12:
6019                 imm = (imm << 8) | 0xff;
6020                 break;
6021             case 13:
6022                 imm = (imm << 16) | 0xffff;
6023                 break;
6024             case 14:
6025                 imm |= (imm << 8) | (imm << 16) | (imm << 24);
6026                 if (invert) {
6027                     imm = ~imm;
6028                 }
6029                 break;
6030             case 15:
6031                 if (invert) {
6032                     return 1;
6033                 }
6034                 imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
6035                       | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
6036                 break;
6037             }
6038             if (invert) {
6039                 imm = ~imm;
6040             }
6041
6042             reg_ofs = neon_reg_offset(rd, 0);
6043             vec_size = q ? 16 : 8;
6044
6045             if (op & 1 && op < 12) {
6046                 if (invert) {
6047                     /* The immediate value has already been inverted,
6048                      * so BIC becomes AND.
6049                      */
6050                     tcg_gen_gvec_andi(MO_32, reg_ofs, reg_ofs, imm,
6051                                       vec_size, vec_size);
6052                 } else {
6053                     tcg_gen_gvec_ori(MO_32, reg_ofs, reg_ofs, imm,
6054                                      vec_size, vec_size);
6055                 }
6056             } else {
6057                 /* VMOV, VMVN.  */
6058                 if (op == 14 && invert) {
6059                     TCGv_i64 t64 = tcg_temp_new_i64();
6060
6061                     for (pass = 0; pass <= q; ++pass) {
6062                         uint64_t val = 0;
6063                         int n;
6064
6065                         for (n = 0; n < 8; n++) {
6066                             if (imm & (1 << (n + pass * 8))) {
6067                                 val |= 0xffull << (n * 8);
6068                             }
6069                         }
6070                         tcg_gen_movi_i64(t64, val);
6071                         neon_store_reg64(t64, rd + pass);
6072                     }
6073                     tcg_temp_free_i64(t64);
6074                 } else {
6075                     tcg_gen_gvec_dup_imm(MO_32, reg_ofs, vec_size,
6076                                          vec_size, imm);
6077                 }
6078             }
6079         }
6080     } else { /* (insn & 0x00800010 == 0x00800000) */
6081         if (size != 3) {
6082             op = (insn >> 8) & 0xf;
6083             if ((insn & (1 << 6)) == 0) {
6084                 /* Three registers of different lengths.  */
6085                 int src1_wide;
6086                 int src2_wide;
6087                 int prewiden;
6088                 /* undefreq: bit 0 : UNDEF if size == 0
6089                  *           bit 1 : UNDEF if size == 1
6090                  *           bit 2 : UNDEF if size == 2
6091                  *           bit 3 : UNDEF if U == 1
6092                  * Note that [2:0] set implies 'always UNDEF'
6093                  */
6094                 int undefreq;
6095                 /* prewiden, src1_wide, src2_wide, undefreq */
6096                 static const int neon_3reg_wide[16][4] = {
6097                     {1, 0, 0, 0}, /* VADDL */
6098                     {1, 1, 0, 0}, /* VADDW */
6099                     {1, 0, 0, 0}, /* VSUBL */
6100                     {1, 1, 0, 0}, /* VSUBW */
6101                     {0, 1, 1, 0}, /* VADDHN */
6102                     {0, 0, 0, 0}, /* VABAL */
6103                     {0, 1, 1, 0}, /* VSUBHN */
6104                     {0, 0, 0, 0}, /* VABDL */
6105                     {0, 0, 0, 0}, /* VMLAL */
6106                     {0, 0, 0, 9}, /* VQDMLAL */
6107                     {0, 0, 0, 0}, /* VMLSL */
6108                     {0, 0, 0, 9}, /* VQDMLSL */
6109                     {0, 0, 0, 0}, /* Integer VMULL */
6110                     {0, 0, 0, 9}, /* VQDMULL */
6111                     {0, 0, 0, 0xa}, /* Polynomial VMULL */
6112                     {0, 0, 0, 7}, /* Reserved: always UNDEF */
6113                 };
6114
6115                 prewiden = neon_3reg_wide[op][0];
6116                 src1_wide = neon_3reg_wide[op][1];
6117                 src2_wide = neon_3reg_wide[op][2];
6118                 undefreq = neon_3reg_wide[op][3];
6119
6120                 if ((undefreq & (1 << size)) ||
6121                     ((undefreq & 8) && u)) {
6122                     return 1;
6123                 }
6124                 if ((src1_wide && (rn & 1)) ||
6125                     (src2_wide && (rm & 1)) ||
6126                     (!src2_wide && (rd & 1))) {
6127                     return 1;
6128                 }
6129
6130                 /* Handle polynomial VMULL in a single pass.  */
6131                 if (op == 14) {
6132                     if (size == 0) {
6133                         /* VMULL.P8 */
6134                         tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16,
6135                                            0, gen_helper_neon_pmull_h);
6136                     } else {
6137                         /* VMULL.P64 */
6138                         if (!dc_isar_feature(aa32_pmull, s)) {
6139                             return 1;
6140                         }
6141                         tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16,
6142                                            0, gen_helper_gvec_pmull_q);
6143                     }
6144                     return 0;
6145                 }
6146
6147                 /* Avoid overlapping operands.  Wide source operands are
6148                    always aligned so will never overlap with wide
6149                    destinations in problematic ways.  */
6150                 if (rd == rm && !src2_wide) {
6151                     tmp = neon_load_reg(rm, 1);
6152                     neon_store_scratch(2, tmp);
6153                 } else if (rd == rn && !src1_wide) {
6154                     tmp = neon_load_reg(rn, 1);
6155                     neon_store_scratch(2, tmp);
6156                 }
6157                 tmp3 = NULL;
6158                 for (pass = 0; pass < 2; pass++) {
6159                     if (src1_wide) {
6160                         neon_load_reg64(cpu_V0, rn + pass);
6161                         tmp = NULL;
6162                     } else {
6163                         if (pass == 1 && rd == rn) {
6164                             tmp = neon_load_scratch(2);
6165                         } else {
6166                             tmp = neon_load_reg(rn, pass);
6167                         }
6168                         if (prewiden) {
6169                             gen_neon_widen(cpu_V0, tmp, size, u);
6170                         }
6171                     }
6172                     if (src2_wide) {
6173                         neon_load_reg64(cpu_V1, rm + pass);
6174                         tmp2 = NULL;
6175                     } else {
6176                         if (pass == 1 && rd == rm) {
6177                             tmp2 = neon_load_scratch(2);
6178                         } else {
6179                             tmp2 = neon_load_reg(rm, pass);
6180                         }
6181                         if (prewiden) {
6182                             gen_neon_widen(cpu_V1, tmp2, size, u);
6183                         }
6184                     }
6185                     switch (op) {
6186                     case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */
6187                         gen_neon_addl(size);
6188                         break;
6189                     case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHN, VRSUBHN */
6190                         gen_neon_subl(size);
6191                         break;
6192                     case 5: case 7: /* VABAL, VABDL */
6193                         switch ((size << 1) | u) {
6194                         case 0:
6195                             gen_helper_neon_abdl_s16(cpu_V0, tmp, tmp2);
6196                             break;
6197                         case 1:
6198                             gen_helper_neon_abdl_u16(cpu_V0, tmp, tmp2);
6199                             break;
6200                         case 2:
6201                             gen_helper_neon_abdl_s32(cpu_V0, tmp, tmp2);
6202                             break;
6203                         case 3:
6204                             gen_helper_neon_abdl_u32(cpu_V0, tmp, tmp2);
6205                             break;
6206                         case 4:
6207                             gen_helper_neon_abdl_s64(cpu_V0, tmp, tmp2);
6208                             break;
6209                         case 5:
6210                             gen_helper_neon_abdl_u64(cpu_V0, tmp, tmp2);
6211                             break;
6212                         default: abort();
6213                         }
6214                         tcg_temp_free_i32(tmp2);
6215                         tcg_temp_free_i32(tmp);
6216                         break;
6217                     case 8: case 9: case 10: case 11: case 12: case 13:
6218                         /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
6219                         gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
6220                         break;
6221                     default: /* 15 is RESERVED: caught earlier  */
6222                         abort();
6223                     }
6224                     if (op == 13) {
6225                         /* VQDMULL */
6226                         gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6227                         neon_store_reg64(cpu_V0, rd + pass);
6228                     } else if (op == 5 || (op >= 8 && op <= 11)) {
6229                         /* Accumulate.  */
6230                         neon_load_reg64(cpu_V1, rd + pass);
6231                         switch (op) {
6232                         case 10: /* VMLSL */
6233                             gen_neon_negl(cpu_V0, size);
6234                             /* Fall through */
6235                         case 5: case 8: /* VABAL, VMLAL */
6236                             gen_neon_addl(size);
6237                             break;
6238                         case 9: case 11: /* VQDMLAL, VQDMLSL */
6239                             gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6240                             if (op == 11) {
6241                                 gen_neon_negl(cpu_V0, size);
6242                             }
6243                             gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
6244                             break;
6245                         default:
6246                             abort();
6247                         }
6248                         neon_store_reg64(cpu_V0, rd + pass);
6249                     } else if (op == 4 || op == 6) {
6250                         /* Narrowing operation.  */
6251                         tmp = tcg_temp_new_i32();
6252                         if (!u) {
6253                             switch (size) {
6254                             case 0:
6255                                 gen_helper_neon_narrow_high_u8(tmp, cpu_V0);
6256                                 break;
6257                             case 1:
6258                                 gen_helper_neon_narrow_high_u16(tmp, cpu_V0);
6259                                 break;
6260                             case 2:
6261                                 tcg_gen_extrh_i64_i32(tmp, cpu_V0);
6262                                 break;
6263                             default: abort();
6264                             }
6265                         } else {
6266                             switch (size) {
6267                             case 0:
6268                                 gen_helper_neon_narrow_round_high_u8(tmp, cpu_V0);
6269                                 break;
6270                             case 1:
6271                                 gen_helper_neon_narrow_round_high_u16(tmp, cpu_V0);
6272                                 break;
6273                             case 2:
6274                                 tcg_gen_addi_i64(cpu_V0, cpu_V0, 1u << 31);
6275                                 tcg_gen_extrh_i64_i32(tmp, cpu_V0);
6276                                 break;
6277                             default: abort();
6278                             }
6279                         }
6280                         if (pass == 0) {
6281                             tmp3 = tmp;
6282                         } else {
6283                             neon_store_reg(rd, 0, tmp3);
6284                             neon_store_reg(rd, 1, tmp);
6285                         }
6286                     } else {
6287                         /* Write back the result.  */
6288                         neon_store_reg64(cpu_V0, rd + pass);
6289                     }
6290                 }
6291             } else {
6292                 /* Two registers and a scalar. NB that for ops of this form
6293                  * the ARM ARM labels bit 24 as Q, but it is in our variable
6294                  * 'u', not 'q'.
6295                  */
6296                 if (size == 0) {
6297                     return 1;
6298                 }
6299                 switch (op) {
6300                 case 1: /* Float VMLA scalar */
6301                 case 5: /* Floating point VMLS scalar */
6302                 case 9: /* Floating point VMUL scalar */
6303                     if (size == 1) {
6304                         return 1;
6305                     }
6306                     /* fall through */
6307                 case 0: /* Integer VMLA scalar */
6308                 case 4: /* Integer VMLS scalar */
6309                 case 8: /* Integer VMUL scalar */
6310                 case 12: /* VQDMULH scalar */
6311                 case 13: /* VQRDMULH scalar */
6312                     if (u && ((rd | rn) & 1)) {
6313                         return 1;
6314                     }
6315                     tmp = neon_get_scalar(size, rm);
6316                     neon_store_scratch(0, tmp);
6317                     for (pass = 0; pass < (u ? 4 : 2); pass++) {
6318                         tmp = neon_load_scratch(0);
6319                         tmp2 = neon_load_reg(rn, pass);
6320                         if (op == 12) {
6321                             if (size == 1) {
6322                                 gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
6323                             } else {
6324                                 gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
6325                             }
6326                         } else if (op == 13) {
6327                             if (size == 1) {
6328                                 gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
6329                             } else {
6330                                 gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
6331                             }
6332                         } else if (op & 1) {
6333                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6334                             gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
6335                             tcg_temp_free_ptr(fpstatus);
6336                         } else {
6337                             switch (size) {
6338                             case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
6339                             case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
6340                             case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
6341                             default: abort();
6342                             }
6343                         }
6344                         tcg_temp_free_i32(tmp2);
6345                         if (op < 8) {
6346                             /* Accumulate.  */
6347                             tmp2 = neon_load_reg(rd, pass);
6348                             switch (op) {
6349                             case 0:
6350                                 gen_neon_add(size, tmp, tmp2);
6351                                 break;
6352                             case 1:
6353                             {
6354                                 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6355                                 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
6356                                 tcg_temp_free_ptr(fpstatus);
6357                                 break;
6358                             }
6359                             case 4:
6360                                 gen_neon_rsb(size, tmp, tmp2);
6361                                 break;
6362                             case 5:
6363                             {
6364                                 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6365                                 gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
6366                                 tcg_temp_free_ptr(fpstatus);
6367                                 break;
6368                             }
6369                             default:
6370                                 abort();
6371                             }
6372                             tcg_temp_free_i32(tmp2);
6373                         }
6374                         neon_store_reg(rd, pass, tmp);
6375                     }
6376                     break;
6377                 case 3: /* VQDMLAL scalar */
6378                 case 7: /* VQDMLSL scalar */
6379                 case 11: /* VQDMULL scalar */
6380                     if (u == 1) {
6381                         return 1;
6382                     }
6383                     /* fall through */
6384                 case 2: /* VMLAL sclar */
6385                 case 6: /* VMLSL scalar */
6386                 case 10: /* VMULL scalar */
6387                     if (rd & 1) {
6388                         return 1;
6389                     }
6390                     tmp2 = neon_get_scalar(size, rm);
6391                     /* We need a copy of tmp2 because gen_neon_mull
6392                      * deletes it during pass 0.  */
6393                     tmp4 = tcg_temp_new_i32();
6394                     tcg_gen_mov_i32(tmp4, tmp2);
6395                     tmp3 = neon_load_reg(rn, 1);
6396
6397                     for (pass = 0; pass < 2; pass++) {
6398                         if (pass == 0) {
6399                             tmp = neon_load_reg(rn, 0);
6400                         } else {
6401                             tmp = tmp3;
6402                             tmp2 = tmp4;
6403                         }
6404                         gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
6405                         if (op != 11) {
6406                             neon_load_reg64(cpu_V1, rd + pass);
6407                         }
6408                         switch (op) {
6409                         case 6:
6410                             gen_neon_negl(cpu_V0, size);
6411                             /* Fall through */
6412                         case 2:
6413                             gen_neon_addl(size);
6414                             break;
6415                         case 3: case 7:
6416                             gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6417                             if (op == 7) {
6418                                 gen_neon_negl(cpu_V0, size);
6419                             }
6420                             gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
6421                             break;
6422                         case 10:
6423                             /* no-op */
6424                             break;
6425                         case 11:
6426                             gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6427                             break;
6428                         default:
6429                             abort();
6430                         }
6431                         neon_store_reg64(cpu_V0, rd + pass);
6432                     }
6433                     break;
6434                 case 14: /* VQRDMLAH scalar */
6435                 case 15: /* VQRDMLSH scalar */
6436                     {
6437                         NeonGenThreeOpEnvFn *fn;
6438
6439                         if (!dc_isar_feature(aa32_rdm, s)) {
6440                             return 1;
6441                         }
6442                         if (u && ((rd | rn) & 1)) {
6443                             return 1;
6444                         }
6445                         if (op == 14) {
6446                             if (size == 1) {
6447                                 fn = gen_helper_neon_qrdmlah_s16;
6448                             } else {
6449                                 fn = gen_helper_neon_qrdmlah_s32;
6450                             }
6451                         } else {
6452                             if (size == 1) {
6453                                 fn = gen_helper_neon_qrdmlsh_s16;
6454                             } else {
6455                                 fn = gen_helper_neon_qrdmlsh_s32;
6456                             }
6457                         }
6458
6459                         tmp2 = neon_get_scalar(size, rm);
6460                         for (pass = 0; pass < (u ? 4 : 2); pass++) {
6461                             tmp = neon_load_reg(rn, pass);
6462                             tmp3 = neon_load_reg(rd, pass);
6463                             fn(tmp, cpu_env, tmp, tmp2, tmp3);
6464                             tcg_temp_free_i32(tmp3);
6465                             neon_store_reg(rd, pass, tmp);
6466                         }
6467                         tcg_temp_free_i32(tmp2);
6468                     }
6469                     break;
6470                 default:
6471                     g_assert_not_reached();
6472                 }
6473             }
6474         } else { /* size == 3 */
6475             if (!u) {
6476                 /* Extract.  */
6477                 imm = (insn >> 8) & 0xf;
6478
6479                 if (imm > 7 && !q)
6480                     return 1;
6481
6482                 if (q && ((rd | rn | rm) & 1)) {
6483                     return 1;
6484                 }
6485
6486                 if (imm == 0) {
6487                     neon_load_reg64(cpu_V0, rn);
6488                     if (q) {
6489                         neon_load_reg64(cpu_V1, rn + 1);
6490                     }
6491                 } else if (imm == 8) {
6492                     neon_load_reg64(cpu_V0, rn + 1);
6493                     if (q) {
6494                         neon_load_reg64(cpu_V1, rm);
6495                     }
6496                 } else if (q) {
6497                     tmp64 = tcg_temp_new_i64();
6498                     if (imm < 8) {
6499                         neon_load_reg64(cpu_V0, rn);
6500                         neon_load_reg64(tmp64, rn + 1);
6501                     } else {
6502                         neon_load_reg64(cpu_V0, rn + 1);
6503                         neon_load_reg64(tmp64, rm);
6504                     }
6505                     tcg_gen_shri_i64(cpu_V0, cpu_V0, (imm & 7) * 8);
6506                     tcg_gen_shli_i64(cpu_V1, tmp64, 64 - ((imm & 7) * 8));
6507                     tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
6508                     if (imm < 8) {
6509                         neon_load_reg64(cpu_V1, rm);
6510                     } else {
6511                         neon_load_reg64(cpu_V1, rm + 1);
6512                         imm -= 8;
6513                     }
6514                     tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
6515                     tcg_gen_shri_i64(tmp64, tmp64, imm * 8);
6516                     tcg_gen_or_i64(cpu_V1, cpu_V1, tmp64);
6517                     tcg_temp_free_i64(tmp64);
6518                 } else {
6519                     /* BUGFIX */
6520                     neon_load_reg64(cpu_V0, rn);
6521                     tcg_gen_shri_i64(cpu_V0, cpu_V0, imm * 8);
6522                     neon_load_reg64(cpu_V1, rm);
6523                     tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
6524                     tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
6525                 }
6526                 neon_store_reg64(cpu_V0, rd);
6527                 if (q) {
6528                     neon_store_reg64(cpu_V1, rd + 1);
6529                 }
6530             } else if ((insn & (1 << 11)) == 0) {
6531                 /* Two register misc.  */
6532                 op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf);
6533                 size = (insn >> 18) & 3;
6534                 /* UNDEF for unknown op values and bad op-size combinations */
6535                 if ((neon_2rm_sizes[op] & (1 << size)) == 0) {
6536                     return 1;
6537                 }
6538                 if (neon_2rm_is_v8_op(op) &&
6539                     !arm_dc_feature(s, ARM_FEATURE_V8)) {
6540                     return 1;
6541                 }
6542                 if ((op != NEON_2RM_VMOVN && op != NEON_2RM_VQMOVN) &&
6543                     q && ((rm | rd) & 1)) {
6544                     return 1;
6545                 }
6546                 switch (op) {
6547                 case NEON_2RM_VREV64:
6548                     for (pass = 0; pass < (q ? 2 : 1); pass++) {
6549                         tmp = neon_load_reg(rm, pass * 2);
6550                         tmp2 = neon_load_reg(rm, pass * 2 + 1);
6551                         switch (size) {
6552                         case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
6553                         case 1: gen_swap_half(tmp); break;
6554                         case 2: /* no-op */ break;
6555                         default: abort();
6556                         }
6557                         neon_store_reg(rd, pass * 2 + 1, tmp);
6558                         if (size == 2) {
6559                             neon_store_reg(rd, pass * 2, tmp2);
6560                         } else {
6561                             switch (size) {
6562                             case 0: tcg_gen_bswap32_i32(tmp2, tmp2); break;
6563                             case 1: gen_swap_half(tmp2); break;
6564                             default: abort();
6565                             }
6566                             neon_store_reg(rd, pass * 2, tmp2);
6567                         }
6568                     }
6569                     break;
6570                 case NEON_2RM_VPADDL: case NEON_2RM_VPADDL_U:
6571                 case NEON_2RM_VPADAL: case NEON_2RM_VPADAL_U:
6572                     for (pass = 0; pass < q + 1; pass++) {
6573                         tmp = neon_load_reg(rm, pass * 2);
6574                         gen_neon_widen(cpu_V0, tmp, size, op & 1);
6575                         tmp = neon_load_reg(rm, pass * 2 + 1);
6576                         gen_neon_widen(cpu_V1, tmp, size, op & 1);
6577                         switch (size) {
6578                         case 0: gen_helper_neon_paddl_u16(CPU_V001); break;
6579                         case 1: gen_helper_neon_paddl_u32(CPU_V001); break;
6580                         case 2: tcg_gen_add_i64(CPU_V001); break;
6581                         default: abort();
6582                         }
6583                         if (op >= NEON_2RM_VPADAL) {
6584                             /* Accumulate.  */
6585                             neon_load_reg64(cpu_V1, rd + pass);
6586                             gen_neon_addl(size);
6587                         }
6588                         neon_store_reg64(cpu_V0, rd + pass);
6589                     }
6590                     break;
6591                 case NEON_2RM_VTRN:
6592                     if (size == 2) {
6593                         int n;
6594                         for (n = 0; n < (q ? 4 : 2); n += 2) {
6595                             tmp = neon_load_reg(rm, n);
6596                             tmp2 = neon_load_reg(rd, n + 1);
6597                             neon_store_reg(rm, n, tmp2);
6598                             neon_store_reg(rd, n + 1, tmp);
6599                         }
6600                     } else {
6601                         goto elementwise;
6602                     }
6603                     break;
6604                 case NEON_2RM_VUZP:
6605                     if (gen_neon_unzip(rd, rm, size, q)) {
6606                         return 1;
6607                     }
6608                     break;
6609                 case NEON_2RM_VZIP:
6610                     if (gen_neon_zip(rd, rm, size, q)) {
6611                         return 1;
6612                     }
6613                     break;
6614                 case NEON_2RM_VMOVN: case NEON_2RM_VQMOVN:
6615                     /* also VQMOVUN; op field and mnemonics don't line up */
6616                     if (rm & 1) {
6617                         return 1;
6618                     }
6619                     tmp2 = NULL;
6620                     for (pass = 0; pass < 2; pass++) {
6621                         neon_load_reg64(cpu_V0, rm + pass);
6622                         tmp = tcg_temp_new_i32();
6623                         gen_neon_narrow_op(op == NEON_2RM_VMOVN, q, size,
6624                                            tmp, cpu_V0);
6625                         if (pass == 0) {
6626                             tmp2 = tmp;
6627                         } else {
6628                             neon_store_reg(rd, 0, tmp2);
6629                             neon_store_reg(rd, 1, tmp);
6630                         }
6631                     }
6632                     break;
6633                 case NEON_2RM_VSHLL:
6634                     if (q || (rd & 1)) {
6635                         return 1;
6636                     }
6637                     tmp = neon_load_reg(rm, 0);
6638                     tmp2 = neon_load_reg(rm, 1);
6639                     for (pass = 0; pass < 2; pass++) {
6640                         if (pass == 1)
6641                             tmp = tmp2;
6642                         gen_neon_widen(cpu_V0, tmp, size, 1);
6643                         tcg_gen_shli_i64(cpu_V0, cpu_V0, 8 << size);
6644                         neon_store_reg64(cpu_V0, rd + pass);
6645                     }
6646                     break;
6647                 case NEON_2RM_VCVT_F16_F32:
6648                 {
6649                     TCGv_ptr fpst;
6650                     TCGv_i32 ahp;
6651
6652                     if (!dc_isar_feature(aa32_fp16_spconv, s) ||
6653                         q || (rm & 1)) {
6654                         return 1;
6655                     }
6656                     fpst = get_fpstatus_ptr(true);
6657                     ahp = get_ahp_flag();
6658                     tmp = neon_load_reg(rm, 0);
6659                     gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
6660                     tmp2 = neon_load_reg(rm, 1);
6661                     gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
6662                     tcg_gen_shli_i32(tmp2, tmp2, 16);
6663                     tcg_gen_or_i32(tmp2, tmp2, tmp);
6664                     tcg_temp_free_i32(tmp);
6665                     tmp = neon_load_reg(rm, 2);
6666                     gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
6667                     tmp3 = neon_load_reg(rm, 3);
6668                     neon_store_reg(rd, 0, tmp2);
6669                     gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
6670                     tcg_gen_shli_i32(tmp3, tmp3, 16);
6671                     tcg_gen_or_i32(tmp3, tmp3, tmp);
6672                     neon_store_reg(rd, 1, tmp3);
6673                     tcg_temp_free_i32(tmp);
6674                     tcg_temp_free_i32(ahp);
6675                     tcg_temp_free_ptr(fpst);
6676                     break;
6677                 }
6678                 case NEON_2RM_VCVT_F32_F16:
6679                 {
6680                     TCGv_ptr fpst;
6681                     TCGv_i32 ahp;
6682                     if (!dc_isar_feature(aa32_fp16_spconv, s) ||
6683                         q || (rd & 1)) {
6684                         return 1;
6685                     }
6686                     fpst = get_fpstatus_ptr(true);
6687                     ahp = get_ahp_flag();
6688                     tmp3 = tcg_temp_new_i32();
6689                     tmp = neon_load_reg(rm, 0);
6690                     tmp2 = neon_load_reg(rm, 1);
6691                     tcg_gen_ext16u_i32(tmp3, tmp);
6692                     gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
6693                     neon_store_reg(rd, 0, tmp3);
6694                     tcg_gen_shri_i32(tmp, tmp, 16);
6695                     gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
6696                     neon_store_reg(rd, 1, tmp);
6697                     tmp3 = tcg_temp_new_i32();
6698                     tcg_gen_ext16u_i32(tmp3, tmp2);
6699                     gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
6700                     neon_store_reg(rd, 2, tmp3);
6701                     tcg_gen_shri_i32(tmp2, tmp2, 16);
6702                     gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
6703                     neon_store_reg(rd, 3, tmp2);
6704                     tcg_temp_free_i32(ahp);
6705                     tcg_temp_free_ptr(fpst);
6706                     break;
6707                 }
6708                 case NEON_2RM_AESE: case NEON_2RM_AESMC:
6709                     if (!dc_isar_feature(aa32_aes, s) || ((rm | rd) & 1)) {
6710                         return 1;
6711                     }
6712                     ptr1 = vfp_reg_ptr(true, rd);
6713                     ptr2 = vfp_reg_ptr(true, rm);
6714
6715                      /* Bit 6 is the lowest opcode bit; it distinguishes between
6716                       * encryption (AESE/AESMC) and decryption (AESD/AESIMC)
6717                       */
6718                     tmp3 = tcg_const_i32(extract32(insn, 6, 1));
6719
6720                     if (op == NEON_2RM_AESE) {
6721                         gen_helper_crypto_aese(ptr1, ptr2, tmp3);
6722                     } else {
6723                         gen_helper_crypto_aesmc(ptr1, ptr2, tmp3);
6724                     }
6725                     tcg_temp_free_ptr(ptr1);
6726                     tcg_temp_free_ptr(ptr2);
6727                     tcg_temp_free_i32(tmp3);
6728                     break;
6729                 case NEON_2RM_SHA1H:
6730                     if (!dc_isar_feature(aa32_sha1, s) || ((rm | rd) & 1)) {
6731                         return 1;
6732                     }
6733                     ptr1 = vfp_reg_ptr(true, rd);
6734                     ptr2 = vfp_reg_ptr(true, rm);
6735
6736                     gen_helper_crypto_sha1h(ptr1, ptr2);
6737
6738                     tcg_temp_free_ptr(ptr1);
6739                     tcg_temp_free_ptr(ptr2);
6740                     break;
6741                 case NEON_2RM_SHA1SU1:
6742                     if ((rm | rd) & 1) {
6743                             return 1;
6744                     }
6745                     /* bit 6 (q): set -> SHA256SU0, cleared -> SHA1SU1 */
6746                     if (q) {
6747                         if (!dc_isar_feature(aa32_sha2, s)) {
6748                             return 1;
6749                         }
6750                     } else if (!dc_isar_feature(aa32_sha1, s)) {
6751                         return 1;
6752                     }
6753                     ptr1 = vfp_reg_ptr(true, rd);
6754                     ptr2 = vfp_reg_ptr(true, rm);
6755                     if (q) {
6756                         gen_helper_crypto_sha256su0(ptr1, ptr2);
6757                     } else {
6758                         gen_helper_crypto_sha1su1(ptr1, ptr2);
6759                     }
6760                     tcg_temp_free_ptr(ptr1);
6761                     tcg_temp_free_ptr(ptr2);
6762                     break;
6763
6764                 case NEON_2RM_VMVN:
6765                     tcg_gen_gvec_not(0, rd_ofs, rm_ofs, vec_size, vec_size);
6766                     break;
6767                 case NEON_2RM_VNEG:
6768                     tcg_gen_gvec_neg(size, rd_ofs, rm_ofs, vec_size, vec_size);
6769                     break;
6770                 case NEON_2RM_VABS:
6771                     tcg_gen_gvec_abs(size, rd_ofs, rm_ofs, vec_size, vec_size);
6772                     break;
6773
6774                 case NEON_2RM_VCEQ0:
6775                     tcg_gen_gvec_2(rd_ofs, rm_ofs, vec_size,
6776                                    vec_size, &ceq0_op[size]);
6777                     break;
6778                 case NEON_2RM_VCGT0:
6779                     tcg_gen_gvec_2(rd_ofs, rm_ofs, vec_size,
6780                                    vec_size, &cgt0_op[size]);
6781                     break;
6782                 case NEON_2RM_VCLE0:
6783                     tcg_gen_gvec_2(rd_ofs, rm_ofs, vec_size,
6784                                    vec_size, &cle0_op[size]);
6785                     break;
6786                 case NEON_2RM_VCGE0:
6787                     tcg_gen_gvec_2(rd_ofs, rm_ofs, vec_size,
6788                                    vec_size, &cge0_op[size]);
6789                     break;
6790                 case NEON_2RM_VCLT0:
6791                     tcg_gen_gvec_2(rd_ofs, rm_ofs, vec_size,
6792                                    vec_size, &clt0_op[size]);
6793                     break;
6794
6795                 default:
6796                 elementwise:
6797                     for (pass = 0; pass < (q ? 4 : 2); pass++) {
6798                         tmp = neon_load_reg(rm, pass);
6799                         switch (op) {
6800                         case NEON_2RM_VREV32:
6801                             switch (size) {
6802                             case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
6803                             case 1: gen_swap_half(tmp); break;
6804                             default: abort();
6805                             }
6806                             break;
6807                         case NEON_2RM_VREV16:
6808                             gen_rev16(tmp, tmp);
6809                             break;
6810                         case NEON_2RM_VCLS:
6811                             switch (size) {
6812                             case 0: gen_helper_neon_cls_s8(tmp, tmp); break;
6813                             case 1: gen_helper_neon_cls_s16(tmp, tmp); break;
6814                             case 2: gen_helper_neon_cls_s32(tmp, tmp); break;
6815                             default: abort();
6816                             }
6817                             break;
6818                         case NEON_2RM_VCLZ:
6819                             switch (size) {
6820                             case 0: gen_helper_neon_clz_u8(tmp, tmp); break;
6821                             case 1: gen_helper_neon_clz_u16(tmp, tmp); break;
6822                             case 2: tcg_gen_clzi_i32(tmp, tmp, 32); break;
6823                             default: abort();
6824                             }
6825                             break;
6826                         case NEON_2RM_VCNT:
6827                             gen_helper_neon_cnt_u8(tmp, tmp);
6828                             break;
6829                         case NEON_2RM_VQABS:
6830                             switch (size) {
6831                             case 0:
6832                                 gen_helper_neon_qabs_s8(tmp, cpu_env, tmp);
6833                                 break;
6834                             case 1:
6835                                 gen_helper_neon_qabs_s16(tmp, cpu_env, tmp);
6836                                 break;
6837                             case 2:
6838                                 gen_helper_neon_qabs_s32(tmp, cpu_env, tmp);
6839                                 break;
6840                             default: abort();
6841                             }
6842                             break;
6843                         case NEON_2RM_VQNEG:
6844                             switch (size) {
6845                             case 0:
6846                                 gen_helper_neon_qneg_s8(tmp, cpu_env, tmp);
6847                                 break;
6848                             case 1:
6849                                 gen_helper_neon_qneg_s16(tmp, cpu_env, tmp);
6850                                 break;
6851                             case 2:
6852                                 gen_helper_neon_qneg_s32(tmp, cpu_env, tmp);
6853                                 break;
6854                             default: abort();
6855                             }
6856                             break;
6857                         case NEON_2RM_VCGT0_F:
6858                         {
6859                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6860                             tmp2 = tcg_const_i32(0);
6861                             gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
6862                             tcg_temp_free_i32(tmp2);
6863                             tcg_temp_free_ptr(fpstatus);
6864                             break;
6865                         }
6866                         case NEON_2RM_VCGE0_F:
6867                         {
6868                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6869                             tmp2 = tcg_const_i32(0);
6870                             gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
6871                             tcg_temp_free_i32(tmp2);
6872                             tcg_temp_free_ptr(fpstatus);
6873                             break;
6874                         }
6875                         case NEON_2RM_VCEQ0_F:
6876                         {
6877                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6878                             tmp2 = tcg_const_i32(0);
6879                             gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
6880                             tcg_temp_free_i32(tmp2);
6881                             tcg_temp_free_ptr(fpstatus);
6882                             break;
6883                         }
6884                         case NEON_2RM_VCLE0_F:
6885                         {
6886                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6887                             tmp2 = tcg_const_i32(0);
6888                             gen_helper_neon_cge_f32(tmp, tmp2, tmp, fpstatus);
6889                             tcg_temp_free_i32(tmp2);
6890                             tcg_temp_free_ptr(fpstatus);
6891                             break;
6892                         }
6893                         case NEON_2RM_VCLT0_F:
6894                         {
6895                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6896                             tmp2 = tcg_const_i32(0);
6897                             gen_helper_neon_cgt_f32(tmp, tmp2, tmp, fpstatus);
6898                             tcg_temp_free_i32(tmp2);
6899                             tcg_temp_free_ptr(fpstatus);
6900                             break;
6901                         }
6902                         case NEON_2RM_VABS_F:
6903                             gen_helper_vfp_abss(tmp, tmp);
6904                             break;
6905                         case NEON_2RM_VNEG_F:
6906                             gen_helper_vfp_negs(tmp, tmp);
6907                             break;
6908                         case NEON_2RM_VSWP:
6909                             tmp2 = neon_load_reg(rd, pass);
6910                             neon_store_reg(rm, pass, tmp2);
6911                             break;
6912                         case NEON_2RM_VTRN:
6913                             tmp2 = neon_load_reg(rd, pass);
6914                             switch (size) {
6915                             case 0: gen_neon_trn_u8(tmp, tmp2); break;
6916                             case 1: gen_neon_trn_u16(tmp, tmp2); break;
6917                             default: abort();
6918                             }
6919                             neon_store_reg(rm, pass, tmp2);
6920                             break;
6921                         case NEON_2RM_VRINTN:
6922                         case NEON_2RM_VRINTA:
6923                         case NEON_2RM_VRINTM:
6924                         case NEON_2RM_VRINTP:
6925                         case NEON_2RM_VRINTZ:
6926                         {
6927                             TCGv_i32 tcg_rmode;
6928                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6929                             int rmode;
6930
6931                             if (op == NEON_2RM_VRINTZ) {
6932                                 rmode = FPROUNDING_ZERO;
6933                             } else {
6934                                 rmode = fp_decode_rm[((op & 0x6) >> 1) ^ 1];
6935                             }
6936
6937                             tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
6938                             gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6939                                                       cpu_env);
6940                             gen_helper_rints(tmp, tmp, fpstatus);
6941                             gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6942                                                       cpu_env);
6943                             tcg_temp_free_ptr(fpstatus);
6944                             tcg_temp_free_i32(tcg_rmode);
6945                             break;
6946                         }
6947                         case NEON_2RM_VRINTX:
6948                         {
6949                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6950                             gen_helper_rints_exact(tmp, tmp, fpstatus);
6951                             tcg_temp_free_ptr(fpstatus);
6952                             break;
6953                         }
6954                         case NEON_2RM_VCVTAU:
6955                         case NEON_2RM_VCVTAS:
6956                         case NEON_2RM_VCVTNU:
6957                         case NEON_2RM_VCVTNS:
6958                         case NEON_2RM_VCVTPU:
6959                         case NEON_2RM_VCVTPS:
6960                         case NEON_2RM_VCVTMU:
6961                         case NEON_2RM_VCVTMS:
6962                         {
6963                             bool is_signed = !extract32(insn, 7, 1);
6964                             TCGv_ptr fpst = get_fpstatus_ptr(1);
6965                             TCGv_i32 tcg_rmode, tcg_shift;
6966                             int rmode = fp_decode_rm[extract32(insn, 8, 2)];
6967
6968                             tcg_shift = tcg_const_i32(0);
6969                             tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
6970                             gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6971                                                       cpu_env);
6972
6973                             if (is_signed) {
6974                                 gen_helper_vfp_tosls(tmp, tmp,
6975                                                      tcg_shift, fpst);
6976                             } else {
6977                                 gen_helper_vfp_touls(tmp, tmp,
6978                                                      tcg_shift, fpst);
6979                             }
6980
6981                             gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6982                                                       cpu_env);
6983                             tcg_temp_free_i32(tcg_rmode);
6984                             tcg_temp_free_i32(tcg_shift);
6985                             tcg_temp_free_ptr(fpst);
6986                             break;
6987                         }
6988                         case NEON_2RM_VRECPE:
6989                         {
6990                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6991                             gen_helper_recpe_u32(tmp, tmp, fpstatus);
6992                             tcg_temp_free_ptr(fpstatus);
6993                             break;
6994                         }
6995                         case NEON_2RM_VRSQRTE:
6996                         {
6997                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6998                             gen_helper_rsqrte_u32(tmp, tmp, fpstatus);
6999                             tcg_temp_free_ptr(fpstatus);
7000                             break;
7001                         }
7002                         case NEON_2RM_VRECPE_F:
7003                         {
7004                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
7005                             gen_helper_recpe_f32(tmp, tmp, fpstatus);
7006                             tcg_temp_free_ptr(fpstatus);
7007                             break;
7008                         }
7009                         case NEON_2RM_VRSQRTE_F:
7010                         {
7011                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
7012                             gen_helper_rsqrte_f32(tmp, tmp, fpstatus);
7013                             tcg_temp_free_ptr(fpstatus);
7014                             break;
7015                         }
7016                         case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */
7017                         {
7018                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
7019                             gen_helper_vfp_sitos(tmp, tmp, fpstatus);
7020                             tcg_temp_free_ptr(fpstatus);
7021                             break;
7022                         }
7023                         case NEON_2RM_VCVT_FU: /* VCVT.F32.U32 */
7024                         {
7025                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
7026                             gen_helper_vfp_uitos(tmp, tmp, fpstatus);
7027                             tcg_temp_free_ptr(fpstatus);
7028                             break;
7029                         }
7030                         case NEON_2RM_VCVT_SF: /* VCVT.S32.F32 */
7031                         {
7032                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
7033                             gen_helper_vfp_tosizs(tmp, tmp, fpstatus);
7034                             tcg_temp_free_ptr(fpstatus);
7035                             break;
7036                         }
7037                         case NEON_2RM_VCVT_UF: /* VCVT.U32.F32 */
7038                         {
7039                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
7040                             gen_helper_vfp_touizs(tmp, tmp, fpstatus);
7041                             tcg_temp_free_ptr(fpstatus);
7042                             break;
7043                         }
7044                         default:
7045                             /* Reserved op values were caught by the
7046                              * neon_2rm_sizes[] check earlier.
7047                              */
7048                             abort();
7049                         }
7050                         neon_store_reg(rd, pass, tmp);
7051                     }
7052                     break;
7053                 }
7054             } else if ((insn & (1 << 10)) == 0) {
7055                 /* VTBL, VTBX.  */
7056                 int n = ((insn >> 8) & 3) + 1;
7057                 if ((rn + n) > 32) {
7058                     /* This is UNPREDICTABLE; we choose to UNDEF to avoid the
7059                      * helper function running off the end of the register file.
7060                      */
7061                     return 1;
7062                 }
7063                 n <<= 3;
7064                 if (insn & (1 << 6)) {
7065                     tmp = neon_load_reg(rd, 0);
7066                 } else {
7067                     tmp = tcg_temp_new_i32();
7068                     tcg_gen_movi_i32(tmp, 0);
7069                 }
7070                 tmp2 = neon_load_reg(rm, 0);
7071                 ptr1 = vfp_reg_ptr(true, rn);
7072                 tmp5 = tcg_const_i32(n);
7073                 gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp5);
7074                 tcg_temp_free_i32(tmp);
7075                 if (insn & (1 << 6)) {
7076                     tmp = neon_load_reg(rd, 1);
7077                 } else {
7078                     tmp = tcg_temp_new_i32();
7079                     tcg_gen_movi_i32(tmp, 0);
7080                 }
7081                 tmp3 = neon_load_reg(rm, 1);
7082                 gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp5);
7083                 tcg_temp_free_i32(tmp5);
7084                 tcg_temp_free_ptr(ptr1);
7085                 neon_store_reg(rd, 0, tmp2);
7086                 neon_store_reg(rd, 1, tmp3);
7087                 tcg_temp_free_i32(tmp);
7088             } else if ((insn & 0x380) == 0) {
7089                 /* VDUP */
7090                 int element;
7091                 MemOp size;
7092
7093                 if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) {
7094                     return 1;
7095                 }
7096                 if (insn & (1 << 16)) {
7097                     size = MO_8;
7098                     element = (insn >> 17) & 7;
7099                 } else if (insn & (1 << 17)) {
7100                     size = MO_16;
7101                     element = (insn >> 18) & 3;
7102                 } else {
7103                     size = MO_32;
7104                     element = (insn >> 19) & 1;
7105                 }
7106                 tcg_gen_gvec_dup_mem(size, neon_reg_offset(rd, 0),
7107                                      neon_element_offset(rm, element, size),
7108                                      q ? 16 : 8, q ? 16 : 8);
7109             } else {
7110                 return 1;
7111             }
7112         }
7113     }
7114     return 0;
7115 }
7116
7117 static int disas_coproc_insn(DisasContext *s, uint32_t insn)
7118 {
7119     int cpnum, is64, crn, crm, opc1, opc2, isread, rt, rt2;
7120     const ARMCPRegInfo *ri;
7121
7122     cpnum = (insn >> 8) & 0xf;
7123
7124     /* First check for coprocessor space used for XScale/iwMMXt insns */
7125     if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cpnum < 2)) {
7126         if (extract32(s->c15_cpar, cpnum, 1) == 0) {
7127             return 1;
7128         }
7129         if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
7130             return disas_iwmmxt_insn(s, insn);
7131         } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
7132             return disas_dsp_insn(s, insn);
7133         }
7134         return 1;
7135     }
7136
7137     /* Otherwise treat as a generic register access */
7138     is64 = (insn & (1 << 25)) == 0;
7139     if (!is64 && ((insn & (1 << 4)) == 0)) {
7140         /* cdp */
7141         return 1;
7142     }
7143
7144     crm = insn & 0xf;
7145     if (is64) {
7146         crn = 0;
7147         opc1 = (insn >> 4) & 0xf;
7148         opc2 = 0;
7149         rt2 = (insn >> 16) & 0xf;
7150     } else {
7151         crn = (insn >> 16) & 0xf;
7152         opc1 = (insn >> 21) & 7;
7153         opc2 = (insn >> 5) & 7;
7154         rt2 = 0;
7155     }
7156     isread = (insn >> 20) & 1;
7157     rt = (insn >> 12) & 0xf;
7158
7159     ri = get_arm_cp_reginfo(s->cp_regs,
7160             ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2));
7161     if (ri) {
7162         bool need_exit_tb;
7163
7164         /* Check access permissions */
7165         if (!cp_access_ok(s->current_el, ri, isread)) {
7166             return 1;
7167         }
7168
7169         if (s->hstr_active || ri->accessfn ||
7170             (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
7171             /* Emit code to perform further access permissions checks at
7172              * runtime; this may result in an exception.
7173              * Note that on XScale all cp0..c13 registers do an access check
7174              * call in order to handle c15_cpar.
7175              */
7176             TCGv_ptr tmpptr;
7177             TCGv_i32 tcg_syn, tcg_isread;
7178             uint32_t syndrome;
7179
7180             /* Note that since we are an implementation which takes an
7181              * exception on a trapped conditional instruction only if the
7182              * instruction passes its condition code check, we can take
7183              * advantage of the clause in the ARM ARM that allows us to set
7184              * the COND field in the instruction to 0xE in all cases.
7185              * We could fish the actual condition out of the insn (ARM)
7186              * or the condexec bits (Thumb) but it isn't necessary.
7187              */
7188             switch (cpnum) {
7189             case 14:
7190                 if (is64) {
7191                     syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
7192                                                  isread, false);
7193                 } else {
7194                     syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
7195                                                 rt, isread, false);
7196                 }
7197                 break;
7198             case 15:
7199                 if (is64) {
7200                     syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
7201                                                  isread, false);
7202                 } else {
7203                     syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
7204                                                 rt, isread, false);
7205                 }
7206                 break;
7207             default:
7208                 /* ARMv8 defines that only coprocessors 14 and 15 exist,
7209                  * so this can only happen if this is an ARMv7 or earlier CPU,
7210                  * in which case the syndrome information won't actually be
7211                  * guest visible.
7212                  */
7213                 assert(!arm_dc_feature(s, ARM_FEATURE_V8));
7214                 syndrome = syn_uncategorized();
7215                 break;
7216             }
7217
7218             gen_set_condexec(s);
7219             gen_set_pc_im(s, s->pc_curr);
7220             tmpptr = tcg_const_ptr(ri);
7221             tcg_syn = tcg_const_i32(syndrome);
7222             tcg_isread = tcg_const_i32(isread);
7223             gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn,
7224                                            tcg_isread);
7225             tcg_temp_free_ptr(tmpptr);
7226             tcg_temp_free_i32(tcg_syn);
7227             tcg_temp_free_i32(tcg_isread);
7228         } else if (ri->type & ARM_CP_RAISES_EXC) {
7229             /*
7230              * The readfn or writefn might raise an exception;
7231              * synchronize the CPU state in case it does.
7232              */
7233             gen_set_condexec(s);
7234             gen_set_pc_im(s, s->pc_curr);
7235         }
7236
7237         /* Handle special cases first */
7238         switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
7239         case ARM_CP_NOP:
7240             return 0;
7241         case ARM_CP_WFI:
7242             if (isread) {
7243                 return 1;
7244             }
7245             gen_set_pc_im(s, s->base.pc_next);
7246             s->base.is_jmp = DISAS_WFI;
7247             return 0;
7248         default:
7249             break;
7250         }
7251
7252         if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
7253             gen_io_start();
7254         }
7255
7256         if (isread) {
7257             /* Read */
7258             if (is64) {
7259                 TCGv_i64 tmp64;
7260                 TCGv_i32 tmp;
7261                 if (ri->type & ARM_CP_CONST) {
7262                     tmp64 = tcg_const_i64(ri->resetvalue);
7263                 } else if (ri->readfn) {
7264                     TCGv_ptr tmpptr;
7265                     tmp64 = tcg_temp_new_i64();
7266                     tmpptr = tcg_const_ptr(ri);
7267                     gen_helper_get_cp_reg64(tmp64, cpu_env, tmpptr);
7268                     tcg_temp_free_ptr(tmpptr);
7269                 } else {
7270                     tmp64 = tcg_temp_new_i64();
7271                     tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
7272                 }
7273                 tmp = tcg_temp_new_i32();
7274                 tcg_gen_extrl_i64_i32(tmp, tmp64);
7275                 store_reg(s, rt, tmp);
7276                 tmp = tcg_temp_new_i32();
7277                 tcg_gen_extrh_i64_i32(tmp, tmp64);
7278                 tcg_temp_free_i64(tmp64);
7279                 store_reg(s, rt2, tmp);
7280             } else {
7281                 TCGv_i32 tmp;
7282                 if (ri->type & ARM_CP_CONST) {
7283                     tmp = tcg_const_i32(ri->resetvalue);
7284                 } else if (ri->readfn) {
7285                     TCGv_ptr tmpptr;
7286                     tmp = tcg_temp_new_i32();
7287                     tmpptr = tcg_const_ptr(ri);
7288                     gen_helper_get_cp_reg(tmp, cpu_env, tmpptr);
7289                     tcg_temp_free_ptr(tmpptr);
7290                 } else {
7291                     tmp = load_cpu_offset(ri->fieldoffset);
7292                 }
7293                 if (rt == 15) {
7294                     /* Destination register of r15 for 32 bit loads sets
7295                      * the condition codes from the high 4 bits of the value
7296                      */
7297                     gen_set_nzcv(tmp);
7298                     tcg_temp_free_i32(tmp);
7299                 } else {
7300                     store_reg(s, rt, tmp);
7301                 }
7302             }
7303         } else {
7304             /* Write */
7305             if (ri->type & ARM_CP_CONST) {
7306                 /* If not forbidden by access permissions, treat as WI */
7307                 return 0;
7308             }
7309
7310             if (is64) {
7311                 TCGv_i32 tmplo, tmphi;
7312                 TCGv_i64 tmp64 = tcg_temp_new_i64();
7313                 tmplo = load_reg(s, rt);
7314                 tmphi = load_reg(s, rt2);
7315                 tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
7316                 tcg_temp_free_i32(tmplo);
7317                 tcg_temp_free_i32(tmphi);
7318                 if (ri->writefn) {
7319                     TCGv_ptr tmpptr = tcg_const_ptr(ri);
7320                     gen_helper_set_cp_reg64(cpu_env, tmpptr, tmp64);
7321                     tcg_temp_free_ptr(tmpptr);
7322                 } else {
7323                     tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
7324                 }
7325                 tcg_temp_free_i64(tmp64);
7326             } else {
7327                 if (ri->writefn) {
7328                     TCGv_i32 tmp;
7329                     TCGv_ptr tmpptr;
7330                     tmp = load_reg(s, rt);
7331                     tmpptr = tcg_const_ptr(ri);
7332                     gen_helper_set_cp_reg(cpu_env, tmpptr, tmp);
7333                     tcg_temp_free_ptr(tmpptr);
7334                     tcg_temp_free_i32(tmp);
7335                 } else {
7336                     TCGv_i32 tmp = load_reg(s, rt);
7337                     store_cpu_offset(tmp, ri->fieldoffset);
7338                 }
7339             }
7340         }
7341
7342         /* I/O operations must end the TB here (whether read or write) */
7343         need_exit_tb = ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) &&
7344                         (ri->type & ARM_CP_IO));
7345
7346         if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
7347             /*
7348              * A write to any coprocessor register that ends a TB
7349              * must rebuild the hflags for the next TB.
7350              */
7351             TCGv_i32 tcg_el = tcg_const_i32(s->current_el);
7352             if (arm_dc_feature(s, ARM_FEATURE_M)) {
7353                 gen_helper_rebuild_hflags_m32(cpu_env, tcg_el);
7354             } else {
7355                 if (ri->type & ARM_CP_NEWEL) {
7356                     gen_helper_rebuild_hflags_a32_newel(cpu_env);
7357                 } else {
7358                     gen_helper_rebuild_hflags_a32(cpu_env, tcg_el);
7359                 }
7360             }
7361             tcg_temp_free_i32(tcg_el);
7362             /*
7363              * We default to ending the TB on a coprocessor register write,
7364              * but allow this to be suppressed by the register definition
7365              * (usually only necessary to work around guest bugs).
7366              */
7367             need_exit_tb = true;
7368         }
7369         if (need_exit_tb) {
7370             gen_lookup_tb(s);
7371         }
7372
7373         return 0;
7374     }
7375
7376     /* Unknown register; this might be a guest error or a QEMU
7377      * unimplemented feature.
7378      */
7379     if (is64) {
7380         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
7381                       "64 bit system register cp:%d opc1: %d crm:%d "
7382                       "(%s)\n",
7383                       isread ? "read" : "write", cpnum, opc1, crm,
7384                       s->ns ? "non-secure" : "secure");
7385     } else {
7386         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
7387                       "system register cp:%d opc1:%d crn:%d crm:%d opc2:%d "
7388                       "(%s)\n",
7389                       isread ? "read" : "write", cpnum, opc1, crn, crm, opc2,
7390                       s->ns ? "non-secure" : "secure");
7391     }
7392
7393     return 1;
7394 }
7395
7396
7397 /* Store a 64-bit value to a register pair.  Clobbers val.  */
7398 static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
7399 {
7400     TCGv_i32 tmp;
7401     tmp = tcg_temp_new_i32();
7402     tcg_gen_extrl_i64_i32(tmp, val);
7403     store_reg(s, rlow, tmp);
7404     tmp = tcg_temp_new_i32();
7405     tcg_gen_extrh_i64_i32(tmp, val);
7406     store_reg(s, rhigh, tmp);
7407 }
7408
7409 /* load and add a 64-bit value from a register pair.  */
7410 static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
7411 {
7412     TCGv_i64 tmp;
7413     TCGv_i32 tmpl;
7414     TCGv_i32 tmph;
7415
7416     /* Load 64-bit value rd:rn.  */
7417     tmpl = load_reg(s, rlow);
7418     tmph = load_reg(s, rhigh);
7419     tmp = tcg_temp_new_i64();
7420     tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
7421     tcg_temp_free_i32(tmpl);
7422     tcg_temp_free_i32(tmph);
7423     tcg_gen_add_i64(val, val, tmp);
7424     tcg_temp_free_i64(tmp);
7425 }
7426
7427 /* Set N and Z flags from hi|lo.  */
7428 static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
7429 {
7430     tcg_gen_mov_i32(cpu_NF, hi);
7431     tcg_gen_or_i32(cpu_ZF, lo, hi);
7432 }
7433
7434 /* Load/Store exclusive instructions are implemented by remembering
7435    the value/address loaded, and seeing if these are the same
7436    when the store is performed.  This should be sufficient to implement
7437    the architecturally mandated semantics, and avoids having to monitor
7438    regular stores.  The compare vs the remembered value is done during
7439    the cmpxchg operation, but we must compare the addresses manually.  */
7440 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
7441                                TCGv_i32 addr, int size)
7442 {
7443     TCGv_i32 tmp = tcg_temp_new_i32();
7444     MemOp opc = size | MO_ALIGN | s->be_data;
7445
7446     s->is_ldex = true;
7447
7448     if (size == 3) {
7449         TCGv_i32 tmp2 = tcg_temp_new_i32();
7450         TCGv_i64 t64 = tcg_temp_new_i64();
7451
7452         /* For AArch32, architecturally the 32-bit word at the lowest
7453          * address is always Rt and the one at addr+4 is Rt2, even if
7454          * the CPU is big-endian. That means we don't want to do a
7455          * gen_aa32_ld_i64(), which invokes gen_aa32_frob64() as if
7456          * for an architecturally 64-bit access, but instead do a
7457          * 64-bit access using MO_BE if appropriate and then split
7458          * the two halves.
7459          * This only makes a difference for BE32 user-mode, where
7460          * frob64() must not flip the two halves of the 64-bit data
7461          * but this code must treat BE32 user-mode like BE32 system.
7462          */
7463         TCGv taddr = gen_aa32_addr(s, addr, opc);
7464
7465         tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
7466         tcg_temp_free(taddr);
7467         tcg_gen_mov_i64(cpu_exclusive_val, t64);
7468         if (s->be_data == MO_BE) {
7469             tcg_gen_extr_i64_i32(tmp2, tmp, t64);
7470         } else {
7471             tcg_gen_extr_i64_i32(tmp, tmp2, t64);
7472         }
7473         tcg_temp_free_i64(t64);
7474
7475         store_reg(s, rt2, tmp2);
7476     } else {
7477         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
7478         tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
7479     }
7480
7481     store_reg(s, rt, tmp);
7482     tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
7483 }
7484
7485 static void gen_clrex(DisasContext *s)
7486 {
7487     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
7488 }
7489
7490 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
7491                                 TCGv_i32 addr, int size)
7492 {
7493     TCGv_i32 t0, t1, t2;
7494     TCGv_i64 extaddr;
7495     TCGv taddr;
7496     TCGLabel *done_label;
7497     TCGLabel *fail_label;
7498     MemOp opc = size | MO_ALIGN | s->be_data;
7499
7500     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
7501          [addr] = {Rt};
7502          {Rd} = 0;
7503        } else {
7504          {Rd} = 1;
7505        } */
7506     fail_label = gen_new_label();
7507     done_label = gen_new_label();
7508     extaddr = tcg_temp_new_i64();
7509     tcg_gen_extu_i32_i64(extaddr, addr);
7510     tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
7511     tcg_temp_free_i64(extaddr);
7512
7513     taddr = gen_aa32_addr(s, addr, opc);
7514     t0 = tcg_temp_new_i32();
7515     t1 = load_reg(s, rt);
7516     if (size == 3) {
7517         TCGv_i64 o64 = tcg_temp_new_i64();
7518         TCGv_i64 n64 = tcg_temp_new_i64();
7519
7520         t2 = load_reg(s, rt2);
7521         /* For AArch32, architecturally the 32-bit word at the lowest
7522          * address is always Rt and the one at addr+4 is Rt2, even if
7523          * the CPU is big-endian. Since we're going to treat this as a
7524          * single 64-bit BE store, we need to put the two halves in the
7525          * opposite order for BE to LE, so that they end up in the right
7526          * places.
7527          * We don't want gen_aa32_frob64() because that does the wrong
7528          * thing for BE32 usermode.
7529          */
7530         if (s->be_data == MO_BE) {
7531             tcg_gen_concat_i32_i64(n64, t2, t1);
7532         } else {
7533             tcg_gen_concat_i32_i64(n64, t1, t2);
7534         }
7535         tcg_temp_free_i32(t2);
7536
7537         tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
7538                                    get_mem_index(s), opc);
7539         tcg_temp_free_i64(n64);
7540
7541         tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
7542         tcg_gen_extrl_i64_i32(t0, o64);
7543
7544         tcg_temp_free_i64(o64);
7545     } else {
7546         t2 = tcg_temp_new_i32();
7547         tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
7548         tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
7549         tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
7550         tcg_temp_free_i32(t2);
7551     }
7552     tcg_temp_free_i32(t1);
7553     tcg_temp_free(taddr);
7554     tcg_gen_mov_i32(cpu_R[rd], t0);
7555     tcg_temp_free_i32(t0);
7556     tcg_gen_br(done_label);
7557
7558     gen_set_label(fail_label);
7559     tcg_gen_movi_i32(cpu_R[rd], 1);
7560     gen_set_label(done_label);
7561     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
7562 }
7563
7564 /* gen_srs:
7565  * @env: CPUARMState
7566  * @s: DisasContext
7567  * @mode: mode field from insn (which stack to store to)
7568  * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
7569  * @writeback: true if writeback bit set
7570  *
7571  * Generate code for the SRS (Store Return State) insn.
7572  */
7573 static void gen_srs(DisasContext *s,
7574                     uint32_t mode, uint32_t amode, bool writeback)
7575 {
7576     int32_t offset;
7577     TCGv_i32 addr, tmp;
7578     bool undef = false;
7579
7580     /* SRS is:
7581      * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
7582      *   and specified mode is monitor mode
7583      * - UNDEFINED in Hyp mode
7584      * - UNPREDICTABLE in User or System mode
7585      * - UNPREDICTABLE if the specified mode is:
7586      * -- not implemented
7587      * -- not a valid mode number
7588      * -- a mode that's at a higher exception level
7589      * -- Monitor, if we are Non-secure
7590      * For the UNPREDICTABLE cases we choose to UNDEF.
7591      */
7592     if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
7593         gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(), 3);
7594         return;
7595     }
7596
7597     if (s->current_el == 0 || s->current_el == 2) {
7598         undef = true;
7599     }
7600
7601     switch (mode) {
7602     case ARM_CPU_MODE_USR:
7603     case ARM_CPU_MODE_FIQ:
7604     case ARM_CPU_MODE_IRQ:
7605     case ARM_CPU_MODE_SVC:
7606     case ARM_CPU_MODE_ABT:
7607     case ARM_CPU_MODE_UND:
7608     case ARM_CPU_MODE_SYS:
7609         break;
7610     case ARM_CPU_MODE_HYP:
7611         if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
7612             undef = true;
7613         }
7614         break;
7615     case ARM_CPU_MODE_MON:
7616         /* No need to check specifically for "are we non-secure" because
7617          * we've already made EL0 UNDEF and handled the trap for S-EL1;
7618          * so if this isn't EL3 then we must be non-secure.
7619          */
7620         if (s->current_el != 3) {
7621             undef = true;
7622         }
7623         break;
7624     default:
7625         undef = true;
7626     }
7627
7628     if (undef) {
7629         unallocated_encoding(s);
7630         return;
7631     }
7632
7633     addr = tcg_temp_new_i32();
7634     tmp = tcg_const_i32(mode);
7635     /* get_r13_banked() will raise an exception if called from System mode */
7636     gen_set_condexec(s);
7637     gen_set_pc_im(s, s->pc_curr);
7638     gen_helper_get_r13_banked(addr, cpu_env, tmp);
7639     tcg_temp_free_i32(tmp);
7640     switch (amode) {
7641     case 0: /* DA */
7642         offset = -4;
7643         break;
7644     case 1: /* IA */
7645         offset = 0;
7646         break;
7647     case 2: /* DB */
7648         offset = -8;
7649         break;
7650     case 3: /* IB */
7651         offset = 4;
7652         break;
7653     default:
7654         abort();
7655     }
7656     tcg_gen_addi_i32(addr, addr, offset);
7657     tmp = load_reg(s, 14);
7658     gen_aa32_st32(s, tmp, addr, get_mem_index(s));
7659     tcg_temp_free_i32(tmp);
7660     tmp = load_cpu_field(spsr);
7661     tcg_gen_addi_i32(addr, addr, 4);
7662     gen_aa32_st32(s, tmp, addr, get_mem_index(s));
7663     tcg_temp_free_i32(tmp);
7664     if (writeback) {
7665         switch (amode) {
7666         case 0:
7667             offset = -8;
7668             break;
7669         case 1:
7670             offset = 4;
7671             break;
7672         case 2:
7673             offset = -4;
7674             break;
7675         case 3:
7676             offset = 0;
7677             break;
7678         default:
7679             abort();
7680         }
7681         tcg_gen_addi_i32(addr, addr, offset);
7682         tmp = tcg_const_i32(mode);
7683         gen_helper_set_r13_banked(cpu_env, tmp, addr);
7684         tcg_temp_free_i32(tmp);
7685     }
7686     tcg_temp_free_i32(addr);
7687     s->base.is_jmp = DISAS_UPDATE;
7688 }
7689
7690 /* Generate a label used for skipping this instruction */
7691 static void arm_gen_condlabel(DisasContext *s)
7692 {
7693     if (!s->condjmp) {
7694         s->condlabel = gen_new_label();
7695         s->condjmp = 1;
7696     }
7697 }
7698
7699 /* Skip this instruction if the ARM condition is false */
7700 static void arm_skip_unless(DisasContext *s, uint32_t cond)
7701 {
7702     arm_gen_condlabel(s);
7703     arm_gen_test_cc(cond ^ 1, s->condlabel);
7704 }
7705
7706
7707 /*
7708  * Constant expanders for the decoders.
7709  */
7710
7711 static int negate(DisasContext *s, int x)
7712 {
7713     return -x;
7714 }
7715
7716 static int plus_2(DisasContext *s, int x)
7717 {
7718     return x + 2;
7719 }
7720
7721 static int times_2(DisasContext *s, int x)
7722 {
7723     return x * 2;
7724 }
7725
7726 static int times_4(DisasContext *s, int x)
7727 {
7728     return x * 4;
7729 }
7730
7731 /* Return only the rotation part of T32ExpandImm.  */
7732 static int t32_expandimm_rot(DisasContext *s, int x)
7733 {
7734     return x & 0xc00 ? extract32(x, 7, 5) : 0;
7735 }
7736
7737 /* Return the unrotated immediate from T32ExpandImm.  */
7738 static int t32_expandimm_imm(DisasContext *s, int x)
7739 {
7740     int imm = extract32(x, 0, 8);
7741
7742     switch (extract32(x, 8, 4)) {
7743     case 0: /* XY */
7744         /* Nothing to do.  */
7745         break;
7746     case 1: /* 00XY00XY */
7747         imm *= 0x00010001;
7748         break;
7749     case 2: /* XY00XY00 */
7750         imm *= 0x01000100;
7751         break;
7752     case 3: /* XYXYXYXY */
7753         imm *= 0x01010101;
7754         break;
7755     default:
7756         /* Rotated constant.  */
7757         imm |= 0x80;
7758         break;
7759     }
7760     return imm;
7761 }
7762
7763 static int t32_branch24(DisasContext *s, int x)
7764 {
7765     /* Convert J1:J2 at x[22:21] to I2:I1, which involves I=J^~S.  */
7766     x ^= !(x < 0) * (3 << 21);
7767     /* Append the final zero.  */
7768     return x << 1;
7769 }
7770
7771 static int t16_setflags(DisasContext *s)
7772 {
7773     return s->condexec_mask == 0;
7774 }
7775
7776 static int t16_push_list(DisasContext *s, int x)
7777 {
7778     return (x & 0xff) | (x & 0x100) << (14 - 8);
7779 }
7780
7781 static int t16_pop_list(DisasContext *s, int x)
7782 {
7783     return (x & 0xff) | (x & 0x100) << (15 - 8);
7784 }
7785
7786 /*
7787  * Include the generated decoders.
7788  */
7789
7790 #include "decode-a32.inc.c"
7791 #include "decode-a32-uncond.inc.c"
7792 #include "decode-t32.inc.c"
7793 #include "decode-t16.inc.c"
7794
7795 /* Helpers to swap operands for reverse-subtract.  */
7796 static void gen_rsb(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
7797 {
7798     tcg_gen_sub_i32(dst, b, a);
7799 }
7800
7801 static void gen_rsb_CC(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
7802 {
7803     gen_sub_CC(dst, b, a);
7804 }
7805
7806 static void gen_rsc(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
7807 {
7808     gen_sub_carry(dest, b, a);
7809 }
7810
7811 static void gen_rsc_CC(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
7812 {
7813     gen_sbc_CC(dest, b, a);
7814 }
7815
7816 /*
7817  * Helpers for the data processing routines.
7818  *
7819  * After the computation store the results back.
7820  * This may be suppressed altogether (STREG_NONE), require a runtime
7821  * check against the stack limits (STREG_SP_CHECK), or generate an
7822  * exception return.  Oh, or store into a register.
7823  *
7824  * Always return true, indicating success for a trans_* function.
7825  */
7826 typedef enum {
7827    STREG_NONE,
7828    STREG_NORMAL,
7829    STREG_SP_CHECK,
7830    STREG_EXC_RET,
7831 } StoreRegKind;
7832
7833 static bool store_reg_kind(DisasContext *s, int rd,
7834                             TCGv_i32 val, StoreRegKind kind)
7835 {
7836     switch (kind) {
7837     case STREG_NONE:
7838         tcg_temp_free_i32(val);
7839         return true;
7840     case STREG_NORMAL:
7841         /* See ALUWritePC: Interworking only from a32 mode. */
7842         if (s->thumb) {
7843             store_reg(s, rd, val);
7844         } else {
7845             store_reg_bx(s, rd, val);
7846         }
7847         return true;
7848     case STREG_SP_CHECK:
7849         store_sp_checked(s, val);
7850         return true;
7851     case STREG_EXC_RET:
7852         gen_exception_return(s, val);
7853         return true;
7854     }
7855     g_assert_not_reached();
7856 }
7857
7858 /*
7859  * Data Processing (register)
7860  *
7861  * Operate, with set flags, one register source,
7862  * one immediate shifted register source, and a destination.
7863  */
7864 static bool op_s_rrr_shi(DisasContext *s, arg_s_rrr_shi *a,
7865                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
7866                          int logic_cc, StoreRegKind kind)
7867 {
7868     TCGv_i32 tmp1, tmp2;
7869
7870     tmp2 = load_reg(s, a->rm);
7871     gen_arm_shift_im(tmp2, a->shty, a->shim, logic_cc);
7872     tmp1 = load_reg(s, a->rn);
7873
7874     gen(tmp1, tmp1, tmp2);
7875     tcg_temp_free_i32(tmp2);
7876
7877     if (logic_cc) {
7878         gen_logic_CC(tmp1);
7879     }
7880     return store_reg_kind(s, a->rd, tmp1, kind);
7881 }
7882
7883 static bool op_s_rxr_shi(DisasContext *s, arg_s_rrr_shi *a,
7884                          void (*gen)(TCGv_i32, TCGv_i32),
7885                          int logic_cc, StoreRegKind kind)
7886 {
7887     TCGv_i32 tmp;
7888
7889     tmp = load_reg(s, a->rm);
7890     gen_arm_shift_im(tmp, a->shty, a->shim, logic_cc);
7891
7892     gen(tmp, tmp);
7893     if (logic_cc) {
7894         gen_logic_CC(tmp);
7895     }
7896     return store_reg_kind(s, a->rd, tmp, kind);
7897 }
7898
7899 /*
7900  * Data-processing (register-shifted register)
7901  *
7902  * Operate, with set flags, one register source,
7903  * one register shifted register source, and a destination.
7904  */
7905 static bool op_s_rrr_shr(DisasContext *s, arg_s_rrr_shr *a,
7906                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
7907                          int logic_cc, StoreRegKind kind)
7908 {
7909     TCGv_i32 tmp1, tmp2;
7910
7911     tmp1 = load_reg(s, a->rs);
7912     tmp2 = load_reg(s, a->rm);
7913     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
7914     tmp1 = load_reg(s, a->rn);
7915
7916     gen(tmp1, tmp1, tmp2);
7917     tcg_temp_free_i32(tmp2);
7918
7919     if (logic_cc) {
7920         gen_logic_CC(tmp1);
7921     }
7922     return store_reg_kind(s, a->rd, tmp1, kind);
7923 }
7924
7925 static bool op_s_rxr_shr(DisasContext *s, arg_s_rrr_shr *a,
7926                          void (*gen)(TCGv_i32, TCGv_i32),
7927                          int logic_cc, StoreRegKind kind)
7928 {
7929     TCGv_i32 tmp1, tmp2;
7930
7931     tmp1 = load_reg(s, a->rs);
7932     tmp2 = load_reg(s, a->rm);
7933     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
7934
7935     gen(tmp2, tmp2);
7936     if (logic_cc) {
7937         gen_logic_CC(tmp2);
7938     }
7939     return store_reg_kind(s, a->rd, tmp2, kind);
7940 }
7941
7942 /*
7943  * Data-processing (immediate)
7944  *
7945  * Operate, with set flags, one register source,
7946  * one rotated immediate, and a destination.
7947  *
7948  * Note that logic_cc && a->rot setting CF based on the msb of the
7949  * immediate is the reason why we must pass in the unrotated form
7950  * of the immediate.
7951  */
7952 static bool op_s_rri_rot(DisasContext *s, arg_s_rri_rot *a,
7953                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
7954                          int logic_cc, StoreRegKind kind)
7955 {
7956     TCGv_i32 tmp1, tmp2;
7957     uint32_t imm;
7958
7959     imm = ror32(a->imm, a->rot);
7960     if (logic_cc && a->rot) {
7961         tcg_gen_movi_i32(cpu_CF, imm >> 31);
7962     }
7963     tmp2 = tcg_const_i32(imm);
7964     tmp1 = load_reg(s, a->rn);
7965
7966     gen(tmp1, tmp1, tmp2);
7967     tcg_temp_free_i32(tmp2);
7968
7969     if (logic_cc) {
7970         gen_logic_CC(tmp1);
7971     }
7972     return store_reg_kind(s, a->rd, tmp1, kind);
7973 }
7974
7975 static bool op_s_rxi_rot(DisasContext *s, arg_s_rri_rot *a,
7976                          void (*gen)(TCGv_i32, TCGv_i32),
7977                          int logic_cc, StoreRegKind kind)
7978 {
7979     TCGv_i32 tmp;
7980     uint32_t imm;
7981
7982     imm = ror32(a->imm, a->rot);
7983     if (logic_cc && a->rot) {
7984         tcg_gen_movi_i32(cpu_CF, imm >> 31);
7985     }
7986     tmp = tcg_const_i32(imm);
7987
7988     gen(tmp, tmp);
7989     if (logic_cc) {
7990         gen_logic_CC(tmp);
7991     }
7992     return store_reg_kind(s, a->rd, tmp, kind);
7993 }
7994
7995 #define DO_ANY3(NAME, OP, L, K)                                         \
7996     static bool trans_##NAME##_rrri(DisasContext *s, arg_s_rrr_shi *a)  \
7997     { StoreRegKind k = (K); return op_s_rrr_shi(s, a, OP, L, k); }      \
7998     static bool trans_##NAME##_rrrr(DisasContext *s, arg_s_rrr_shr *a)  \
7999     { StoreRegKind k = (K); return op_s_rrr_shr(s, a, OP, L, k); }      \
8000     static bool trans_##NAME##_rri(DisasContext *s, arg_s_rri_rot *a)   \
8001     { StoreRegKind k = (K); return op_s_rri_rot(s, a, OP, L, k); }
8002
8003 #define DO_ANY2(NAME, OP, L, K)                                         \
8004     static bool trans_##NAME##_rxri(DisasContext *s, arg_s_rrr_shi *a)  \
8005     { StoreRegKind k = (K); return op_s_rxr_shi(s, a, OP, L, k); }      \
8006     static bool trans_##NAME##_rxrr(DisasContext *s, arg_s_rrr_shr *a)  \
8007     { StoreRegKind k = (K); return op_s_rxr_shr(s, a, OP, L, k); }      \
8008     static bool trans_##NAME##_rxi(DisasContext *s, arg_s_rri_rot *a)   \
8009     { StoreRegKind k = (K); return op_s_rxi_rot(s, a, OP, L, k); }
8010
8011 #define DO_CMP2(NAME, OP, L)                                            \
8012     static bool trans_##NAME##_xrri(DisasContext *s, arg_s_rrr_shi *a)  \
8013     { return op_s_rrr_shi(s, a, OP, L, STREG_NONE); }                   \
8014     static bool trans_##NAME##_xrrr(DisasContext *s, arg_s_rrr_shr *a)  \
8015     { return op_s_rrr_shr(s, a, OP, L, STREG_NONE); }                   \
8016     static bool trans_##NAME##_xri(DisasContext *s, arg_s_rri_rot *a)   \
8017     { return op_s_rri_rot(s, a, OP, L, STREG_NONE); }
8018
8019 DO_ANY3(AND, tcg_gen_and_i32, a->s, STREG_NORMAL)
8020 DO_ANY3(EOR, tcg_gen_xor_i32, a->s, STREG_NORMAL)
8021 DO_ANY3(ORR, tcg_gen_or_i32, a->s, STREG_NORMAL)
8022 DO_ANY3(BIC, tcg_gen_andc_i32, a->s, STREG_NORMAL)
8023
8024 DO_ANY3(RSB, a->s ? gen_rsb_CC : gen_rsb, false, STREG_NORMAL)
8025 DO_ANY3(ADC, a->s ? gen_adc_CC : gen_add_carry, false, STREG_NORMAL)
8026 DO_ANY3(SBC, a->s ? gen_sbc_CC : gen_sub_carry, false, STREG_NORMAL)
8027 DO_ANY3(RSC, a->s ? gen_rsc_CC : gen_rsc, false, STREG_NORMAL)
8028
8029 DO_CMP2(TST, tcg_gen_and_i32, true)
8030 DO_CMP2(TEQ, tcg_gen_xor_i32, true)
8031 DO_CMP2(CMN, gen_add_CC, false)
8032 DO_CMP2(CMP, gen_sub_CC, false)
8033
8034 DO_ANY3(ADD, a->s ? gen_add_CC : tcg_gen_add_i32, false,
8035         a->rd == 13 && a->rn == 13 ? STREG_SP_CHECK : STREG_NORMAL)
8036
8037 /*
8038  * Note for the computation of StoreRegKind we return out of the
8039  * middle of the functions that are expanded by DO_ANY3, and that
8040  * we modify a->s via that parameter before it is used by OP.
8041  */
8042 DO_ANY3(SUB, a->s ? gen_sub_CC : tcg_gen_sub_i32, false,
8043         ({
8044             StoreRegKind ret = STREG_NORMAL;
8045             if (a->rd == 15 && a->s) {
8046                 /*
8047                  * See ALUExceptionReturn:
8048                  * In User mode, UNPREDICTABLE; we choose UNDEF.
8049                  * In Hyp mode, UNDEFINED.
8050                  */
8051                 if (IS_USER(s) || s->current_el == 2) {
8052                     unallocated_encoding(s);
8053                     return true;
8054                 }
8055                 /* There is no writeback of nzcv to PSTATE.  */
8056                 a->s = 0;
8057                 ret = STREG_EXC_RET;
8058             } else if (a->rd == 13 && a->rn == 13) {
8059                 ret = STREG_SP_CHECK;
8060             }
8061             ret;
8062         }))
8063
8064 DO_ANY2(MOV, tcg_gen_mov_i32, a->s,
8065         ({
8066             StoreRegKind ret = STREG_NORMAL;
8067             if (a->rd == 15 && a->s) {
8068                 /*
8069                  * See ALUExceptionReturn:
8070                  * In User mode, UNPREDICTABLE; we choose UNDEF.
8071                  * In Hyp mode, UNDEFINED.
8072                  */
8073                 if (IS_USER(s) || s->current_el == 2) {
8074                     unallocated_encoding(s);
8075                     return true;
8076                 }
8077                 /* There is no writeback of nzcv to PSTATE.  */
8078                 a->s = 0;
8079                 ret = STREG_EXC_RET;
8080             } else if (a->rd == 13) {
8081                 ret = STREG_SP_CHECK;
8082             }
8083             ret;
8084         }))
8085
8086 DO_ANY2(MVN, tcg_gen_not_i32, a->s, STREG_NORMAL)
8087
8088 /*
8089  * ORN is only available with T32, so there is no register-shifted-register
8090  * form of the insn.  Using the DO_ANY3 macro would create an unused function.
8091  */
8092 static bool trans_ORN_rrri(DisasContext *s, arg_s_rrr_shi *a)
8093 {
8094     return op_s_rrr_shi(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
8095 }
8096
8097 static bool trans_ORN_rri(DisasContext *s, arg_s_rri_rot *a)
8098 {
8099     return op_s_rri_rot(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
8100 }
8101
8102 #undef DO_ANY3
8103 #undef DO_ANY2
8104 #undef DO_CMP2
8105
8106 static bool trans_ADR(DisasContext *s, arg_ri *a)
8107 {
8108     store_reg_bx(s, a->rd, add_reg_for_lit(s, 15, a->imm));
8109     return true;
8110 }
8111
8112 static bool trans_MOVW(DisasContext *s, arg_MOVW *a)
8113 {
8114     TCGv_i32 tmp;
8115
8116     if (!ENABLE_ARCH_6T2) {
8117         return false;
8118     }
8119
8120     tmp = tcg_const_i32(a->imm);
8121     store_reg(s, a->rd, tmp);
8122     return true;
8123 }
8124
8125 static bool trans_MOVT(DisasContext *s, arg_MOVW *a)
8126 {
8127     TCGv_i32 tmp;
8128
8129     if (!ENABLE_ARCH_6T2) {
8130         return false;
8131     }
8132
8133     tmp = load_reg(s, a->rd);
8134     tcg_gen_ext16u_i32(tmp, tmp);
8135     tcg_gen_ori_i32(tmp, tmp, a->imm << 16);
8136     store_reg(s, a->rd, tmp);
8137     return true;
8138 }
8139
8140 /*
8141  * Multiply and multiply accumulate
8142  */
8143
8144 static bool op_mla(DisasContext *s, arg_s_rrrr *a, bool add)
8145 {
8146     TCGv_i32 t1, t2;
8147
8148     t1 = load_reg(s, a->rn);
8149     t2 = load_reg(s, a->rm);
8150     tcg_gen_mul_i32(t1, t1, t2);
8151     tcg_temp_free_i32(t2);
8152     if (add) {
8153         t2 = load_reg(s, a->ra);
8154         tcg_gen_add_i32(t1, t1, t2);
8155         tcg_temp_free_i32(t2);
8156     }
8157     if (a->s) {
8158         gen_logic_CC(t1);
8159     }
8160     store_reg(s, a->rd, t1);
8161     return true;
8162 }
8163
8164 static bool trans_MUL(DisasContext *s, arg_MUL *a)
8165 {
8166     return op_mla(s, a, false);
8167 }
8168
8169 static bool trans_MLA(DisasContext *s, arg_MLA *a)
8170 {
8171     return op_mla(s, a, true);
8172 }
8173
8174 static bool trans_MLS(DisasContext *s, arg_MLS *a)
8175 {
8176     TCGv_i32 t1, t2;
8177
8178     if (!ENABLE_ARCH_6T2) {
8179         return false;
8180     }
8181     t1 = load_reg(s, a->rn);
8182     t2 = load_reg(s, a->rm);
8183     tcg_gen_mul_i32(t1, t1, t2);
8184     tcg_temp_free_i32(t2);
8185     t2 = load_reg(s, a->ra);
8186     tcg_gen_sub_i32(t1, t2, t1);
8187     tcg_temp_free_i32(t2);
8188     store_reg(s, a->rd, t1);
8189     return true;
8190 }
8191
8192 static bool op_mlal(DisasContext *s, arg_s_rrrr *a, bool uns, bool add)
8193 {
8194     TCGv_i32 t0, t1, t2, t3;
8195
8196     t0 = load_reg(s, a->rm);
8197     t1 = load_reg(s, a->rn);
8198     if (uns) {
8199         tcg_gen_mulu2_i32(t0, t1, t0, t1);
8200     } else {
8201         tcg_gen_muls2_i32(t0, t1, t0, t1);
8202     }
8203     if (add) {
8204         t2 = load_reg(s, a->ra);
8205         t3 = load_reg(s, a->rd);
8206         tcg_gen_add2_i32(t0, t1, t0, t1, t2, t3);
8207         tcg_temp_free_i32(t2);
8208         tcg_temp_free_i32(t3);
8209     }
8210     if (a->s) {
8211         gen_logicq_cc(t0, t1);
8212     }
8213     store_reg(s, a->ra, t0);
8214     store_reg(s, a->rd, t1);
8215     return true;
8216 }
8217
8218 static bool trans_UMULL(DisasContext *s, arg_UMULL *a)
8219 {
8220     return op_mlal(s, a, true, false);
8221 }
8222
8223 static bool trans_SMULL(DisasContext *s, arg_SMULL *a)
8224 {
8225     return op_mlal(s, a, false, false);
8226 }
8227
8228 static bool trans_UMLAL(DisasContext *s, arg_UMLAL *a)
8229 {
8230     return op_mlal(s, a, true, true);
8231 }
8232
8233 static bool trans_SMLAL(DisasContext *s, arg_SMLAL *a)
8234 {
8235     return op_mlal(s, a, false, true);
8236 }
8237
8238 static bool trans_UMAAL(DisasContext *s, arg_UMAAL *a)
8239 {
8240     TCGv_i32 t0, t1, t2, zero;
8241
8242     if (s->thumb
8243         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
8244         : !ENABLE_ARCH_6) {
8245         return false;
8246     }
8247
8248     t0 = load_reg(s, a->rm);
8249     t1 = load_reg(s, a->rn);
8250     tcg_gen_mulu2_i32(t0, t1, t0, t1);
8251     zero = tcg_const_i32(0);
8252     t2 = load_reg(s, a->ra);
8253     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
8254     tcg_temp_free_i32(t2);
8255     t2 = load_reg(s, a->rd);
8256     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
8257     tcg_temp_free_i32(t2);
8258     tcg_temp_free_i32(zero);
8259     store_reg(s, a->ra, t0);
8260     store_reg(s, a->rd, t1);
8261     return true;
8262 }
8263
8264 /*
8265  * Saturating addition and subtraction
8266  */
8267
8268 static bool op_qaddsub(DisasContext *s, arg_rrr *a, bool add, bool doub)
8269 {
8270     TCGv_i32 t0, t1;
8271
8272     if (s->thumb
8273         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
8274         : !ENABLE_ARCH_5TE) {
8275         return false;
8276     }
8277
8278     t0 = load_reg(s, a->rm);
8279     t1 = load_reg(s, a->rn);
8280     if (doub) {
8281         gen_helper_add_saturate(t1, cpu_env, t1, t1);
8282     }
8283     if (add) {
8284         gen_helper_add_saturate(t0, cpu_env, t0, t1);
8285     } else {
8286         gen_helper_sub_saturate(t0, cpu_env, t0, t1);
8287     }
8288     tcg_temp_free_i32(t1);
8289     store_reg(s, a->rd, t0);
8290     return true;
8291 }
8292
8293 #define DO_QADDSUB(NAME, ADD, DOUB) \
8294 static bool trans_##NAME(DisasContext *s, arg_rrr *a)    \
8295 {                                                        \
8296     return op_qaddsub(s, a, ADD, DOUB);                  \
8297 }
8298
8299 DO_QADDSUB(QADD, true, false)
8300 DO_QADDSUB(QSUB, false, false)
8301 DO_QADDSUB(QDADD, true, true)
8302 DO_QADDSUB(QDSUB, false, true)
8303
8304 #undef DO_QADDSUB
8305
8306 /*
8307  * Halfword multiply and multiply accumulate
8308  */
8309
8310 static bool op_smlaxxx(DisasContext *s, arg_rrrr *a,
8311                        int add_long, bool nt, bool mt)
8312 {
8313     TCGv_i32 t0, t1, tl, th;
8314
8315     if (s->thumb
8316         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
8317         : !ENABLE_ARCH_5TE) {
8318         return false;
8319     }
8320
8321     t0 = load_reg(s, a->rn);
8322     t1 = load_reg(s, a->rm);
8323     gen_mulxy(t0, t1, nt, mt);
8324     tcg_temp_free_i32(t1);
8325
8326     switch (add_long) {
8327     case 0:
8328         store_reg(s, a->rd, t0);
8329         break;
8330     case 1:
8331         t1 = load_reg(s, a->ra);
8332         gen_helper_add_setq(t0, cpu_env, t0, t1);
8333         tcg_temp_free_i32(t1);
8334         store_reg(s, a->rd, t0);
8335         break;
8336     case 2:
8337         tl = load_reg(s, a->ra);
8338         th = load_reg(s, a->rd);
8339         /* Sign-extend the 32-bit product to 64 bits.  */
8340         t1 = tcg_temp_new_i32();
8341         tcg_gen_sari_i32(t1, t0, 31);
8342         tcg_gen_add2_i32(tl, th, tl, th, t0, t1);
8343         tcg_temp_free_i32(t0);
8344         tcg_temp_free_i32(t1);
8345         store_reg(s, a->ra, tl);
8346         store_reg(s, a->rd, th);
8347         break;
8348     default:
8349         g_assert_not_reached();
8350     }
8351     return true;
8352 }
8353
8354 #define DO_SMLAX(NAME, add, nt, mt) \
8355 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
8356 {                                                          \
8357     return op_smlaxxx(s, a, add, nt, mt);                  \
8358 }
8359
8360 DO_SMLAX(SMULBB, 0, 0, 0)
8361 DO_SMLAX(SMULBT, 0, 0, 1)
8362 DO_SMLAX(SMULTB, 0, 1, 0)
8363 DO_SMLAX(SMULTT, 0, 1, 1)
8364
8365 DO_SMLAX(SMLABB, 1, 0, 0)
8366 DO_SMLAX(SMLABT, 1, 0, 1)
8367 DO_SMLAX(SMLATB, 1, 1, 0)
8368 DO_SMLAX(SMLATT, 1, 1, 1)
8369
8370 DO_SMLAX(SMLALBB, 2, 0, 0)
8371 DO_SMLAX(SMLALBT, 2, 0, 1)
8372 DO_SMLAX(SMLALTB, 2, 1, 0)
8373 DO_SMLAX(SMLALTT, 2, 1, 1)
8374
8375 #undef DO_SMLAX
8376
8377 static bool op_smlawx(DisasContext *s, arg_rrrr *a, bool add, bool mt)
8378 {
8379     TCGv_i32 t0, t1;
8380
8381     if (!ENABLE_ARCH_5TE) {
8382         return false;
8383     }
8384
8385     t0 = load_reg(s, a->rn);
8386     t1 = load_reg(s, a->rm);
8387     /*
8388      * Since the nominal result is product<47:16>, shift the 16-bit
8389      * input up by 16 bits, so that the result is at product<63:32>.
8390      */
8391     if (mt) {
8392         tcg_gen_andi_i32(t1, t1, 0xffff0000);
8393     } else {
8394         tcg_gen_shli_i32(t1, t1, 16);
8395     }
8396     tcg_gen_muls2_i32(t0, t1, t0, t1);
8397     tcg_temp_free_i32(t0);
8398     if (add) {
8399         t0 = load_reg(s, a->ra);
8400         gen_helper_add_setq(t1, cpu_env, t1, t0);
8401         tcg_temp_free_i32(t0);
8402     }
8403     store_reg(s, a->rd, t1);
8404     return true;
8405 }
8406
8407 #define DO_SMLAWX(NAME, add, mt) \
8408 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
8409 {                                                          \
8410     return op_smlawx(s, a, add, mt);                       \
8411 }
8412
8413 DO_SMLAWX(SMULWB, 0, 0)
8414 DO_SMLAWX(SMULWT, 0, 1)
8415 DO_SMLAWX(SMLAWB, 1, 0)
8416 DO_SMLAWX(SMLAWT, 1, 1)
8417
8418 #undef DO_SMLAWX
8419
8420 /*
8421  * MSR (immediate) and hints
8422  */
8423
8424 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
8425 {
8426     /*
8427      * When running single-threaded TCG code, use the helper to ensure that
8428      * the next round-robin scheduled vCPU gets a crack.  When running in
8429      * MTTCG we don't generate jumps to the helper as it won't affect the
8430      * scheduling of other vCPUs.
8431      */
8432     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
8433         gen_set_pc_im(s, s->base.pc_next);
8434         s->base.is_jmp = DISAS_YIELD;
8435     }
8436     return true;
8437 }
8438
8439 static bool trans_WFE(DisasContext *s, arg_WFE *a)
8440 {
8441     /*
8442      * When running single-threaded TCG code, use the helper to ensure that
8443      * the next round-robin scheduled vCPU gets a crack.  In MTTCG mode we
8444      * just skip this instruction.  Currently the SEV/SEVL instructions,
8445      * which are *one* of many ways to wake the CPU from WFE, are not
8446      * implemented so we can't sleep like WFI does.
8447      */
8448     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
8449         gen_set_pc_im(s, s->base.pc_next);
8450         s->base.is_jmp = DISAS_WFE;
8451     }
8452     return true;
8453 }
8454
8455 static bool trans_WFI(DisasContext *s, arg_WFI *a)
8456 {
8457     /* For WFI, halt the vCPU until an IRQ. */
8458     gen_set_pc_im(s, s->base.pc_next);
8459     s->base.is_jmp = DISAS_WFI;
8460     return true;
8461 }
8462
8463 static bool trans_NOP(DisasContext *s, arg_NOP *a)
8464 {
8465     return true;
8466 }
8467
8468 static bool trans_MSR_imm(DisasContext *s, arg_MSR_imm *a)
8469 {
8470     uint32_t val = ror32(a->imm, a->rot * 2);
8471     uint32_t mask = msr_mask(s, a->mask, a->r);
8472
8473     if (gen_set_psr_im(s, mask, a->r, val)) {
8474         unallocated_encoding(s);
8475     }
8476     return true;
8477 }
8478
8479 /*
8480  * Cyclic Redundancy Check
8481  */
8482
8483 static bool op_crc32(DisasContext *s, arg_rrr *a, bool c, MemOp sz)
8484 {
8485     TCGv_i32 t1, t2, t3;
8486
8487     if (!dc_isar_feature(aa32_crc32, s)) {
8488         return false;
8489     }
8490
8491     t1 = load_reg(s, a->rn);
8492     t2 = load_reg(s, a->rm);
8493     switch (sz) {
8494     case MO_8:
8495         gen_uxtb(t2);
8496         break;
8497     case MO_16:
8498         gen_uxth(t2);
8499         break;
8500     case MO_32:
8501         break;
8502     default:
8503         g_assert_not_reached();
8504     }
8505     t3 = tcg_const_i32(1 << sz);
8506     if (c) {
8507         gen_helper_crc32c(t1, t1, t2, t3);
8508     } else {
8509         gen_helper_crc32(t1, t1, t2, t3);
8510     }
8511     tcg_temp_free_i32(t2);
8512     tcg_temp_free_i32(t3);
8513     store_reg(s, a->rd, t1);
8514     return true;
8515 }
8516
8517 #define DO_CRC32(NAME, c, sz) \
8518 static bool trans_##NAME(DisasContext *s, arg_rrr *a)  \
8519     { return op_crc32(s, a, c, sz); }
8520
8521 DO_CRC32(CRC32B, false, MO_8)
8522 DO_CRC32(CRC32H, false, MO_16)
8523 DO_CRC32(CRC32W, false, MO_32)
8524 DO_CRC32(CRC32CB, true, MO_8)
8525 DO_CRC32(CRC32CH, true, MO_16)
8526 DO_CRC32(CRC32CW, true, MO_32)
8527
8528 #undef DO_CRC32
8529
8530 /*
8531  * Miscellaneous instructions
8532  */
8533
8534 static bool trans_MRS_bank(DisasContext *s, arg_MRS_bank *a)
8535 {
8536     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8537         return false;
8538     }
8539     gen_mrs_banked(s, a->r, a->sysm, a->rd);
8540     return true;
8541 }
8542
8543 static bool trans_MSR_bank(DisasContext *s, arg_MSR_bank *a)
8544 {
8545     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8546         return false;
8547     }
8548     gen_msr_banked(s, a->r, a->sysm, a->rn);
8549     return true;
8550 }
8551
8552 static bool trans_MRS_reg(DisasContext *s, arg_MRS_reg *a)
8553 {
8554     TCGv_i32 tmp;
8555
8556     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8557         return false;
8558     }
8559     if (a->r) {
8560         if (IS_USER(s)) {
8561             unallocated_encoding(s);
8562             return true;
8563         }
8564         tmp = load_cpu_field(spsr);
8565     } else {
8566         tmp = tcg_temp_new_i32();
8567         gen_helper_cpsr_read(tmp, cpu_env);
8568     }
8569     store_reg(s, a->rd, tmp);
8570     return true;
8571 }
8572
8573 static bool trans_MSR_reg(DisasContext *s, arg_MSR_reg *a)
8574 {
8575     TCGv_i32 tmp;
8576     uint32_t mask = msr_mask(s, a->mask, a->r);
8577
8578     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8579         return false;
8580     }
8581     tmp = load_reg(s, a->rn);
8582     if (gen_set_psr(s, mask, a->r, tmp)) {
8583         unallocated_encoding(s);
8584     }
8585     return true;
8586 }
8587
8588 static bool trans_MRS_v7m(DisasContext *s, arg_MRS_v7m *a)
8589 {
8590     TCGv_i32 tmp;
8591
8592     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8593         return false;
8594     }
8595     tmp = tcg_const_i32(a->sysm);
8596     gen_helper_v7m_mrs(tmp, cpu_env, tmp);
8597     store_reg(s, a->rd, tmp);
8598     return true;
8599 }
8600
8601 static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
8602 {
8603     TCGv_i32 addr, reg;
8604
8605     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8606         return false;
8607     }
8608     addr = tcg_const_i32((a->mask << 10) | a->sysm);
8609     reg = load_reg(s, a->rn);
8610     gen_helper_v7m_msr(cpu_env, addr, reg);
8611     tcg_temp_free_i32(addr);
8612     tcg_temp_free_i32(reg);
8613     /* If we wrote to CONTROL, the EL might have changed */
8614     gen_helper_rebuild_hflags_m32_newel(cpu_env);
8615     gen_lookup_tb(s);
8616     return true;
8617 }
8618
8619 static bool trans_BX(DisasContext *s, arg_BX *a)
8620 {
8621     if (!ENABLE_ARCH_4T) {
8622         return false;
8623     }
8624     gen_bx_excret(s, load_reg(s, a->rm));
8625     return true;
8626 }
8627
8628 static bool trans_BXJ(DisasContext *s, arg_BXJ *a)
8629 {
8630     if (!ENABLE_ARCH_5J || arm_dc_feature(s, ARM_FEATURE_M)) {
8631         return false;
8632     }
8633     /* Trivial implementation equivalent to bx.  */
8634     gen_bx(s, load_reg(s, a->rm));
8635     return true;
8636 }
8637
8638 static bool trans_BLX_r(DisasContext *s, arg_BLX_r *a)
8639 {
8640     TCGv_i32 tmp;
8641
8642     if (!ENABLE_ARCH_5) {
8643         return false;
8644     }
8645     tmp = load_reg(s, a->rm);
8646     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
8647     gen_bx(s, tmp);
8648     return true;
8649 }
8650
8651 /*
8652  * BXNS/BLXNS: only exist for v8M with the security extensions,
8653  * and always UNDEF if NonSecure.  We don't implement these in
8654  * the user-only mode either (in theory you can use them from
8655  * Secure User mode but they are too tied in to system emulation).
8656  */
8657 static bool trans_BXNS(DisasContext *s, arg_BXNS *a)
8658 {
8659     if (!s->v8m_secure || IS_USER_ONLY) {
8660         unallocated_encoding(s);
8661     } else {
8662         gen_bxns(s, a->rm);
8663     }
8664     return true;
8665 }
8666
8667 static bool trans_BLXNS(DisasContext *s, arg_BLXNS *a)
8668 {
8669     if (!s->v8m_secure || IS_USER_ONLY) {
8670         unallocated_encoding(s);
8671     } else {
8672         gen_blxns(s, a->rm);
8673     }
8674     return true;
8675 }
8676
8677 static bool trans_CLZ(DisasContext *s, arg_CLZ *a)
8678 {
8679     TCGv_i32 tmp;
8680
8681     if (!ENABLE_ARCH_5) {
8682         return false;
8683     }
8684     tmp = load_reg(s, a->rm);
8685     tcg_gen_clzi_i32(tmp, tmp, 32);
8686     store_reg(s, a->rd, tmp);
8687     return true;
8688 }
8689
8690 static bool trans_ERET(DisasContext *s, arg_ERET *a)
8691 {
8692     TCGv_i32 tmp;
8693
8694     if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
8695         return false;
8696     }
8697     if (IS_USER(s)) {
8698         unallocated_encoding(s);
8699         return true;
8700     }
8701     if (s->current_el == 2) {
8702         /* ERET from Hyp uses ELR_Hyp, not LR */
8703         tmp = load_cpu_field(elr_el[2]);
8704     } else {
8705         tmp = load_reg(s, 14);
8706     }
8707     gen_exception_return(s, tmp);
8708     return true;
8709 }
8710
8711 static bool trans_HLT(DisasContext *s, arg_HLT *a)
8712 {
8713     gen_hlt(s, a->imm);
8714     return true;
8715 }
8716
8717 static bool trans_BKPT(DisasContext *s, arg_BKPT *a)
8718 {
8719     if (!ENABLE_ARCH_5) {
8720         return false;
8721     }
8722     if (arm_dc_feature(s, ARM_FEATURE_M) &&
8723         semihosting_enabled() &&
8724 #ifndef CONFIG_USER_ONLY
8725         !IS_USER(s) &&
8726 #endif
8727         (a->imm == 0xab)) {
8728         gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
8729     } else {
8730         gen_exception_bkpt_insn(s, syn_aa32_bkpt(a->imm, false));
8731     }
8732     return true;
8733 }
8734
8735 static bool trans_HVC(DisasContext *s, arg_HVC *a)
8736 {
8737     if (!ENABLE_ARCH_7 || arm_dc_feature(s, ARM_FEATURE_M)) {
8738         return false;
8739     }
8740     if (IS_USER(s)) {
8741         unallocated_encoding(s);
8742     } else {
8743         gen_hvc(s, a->imm);
8744     }
8745     return true;
8746 }
8747
8748 static bool trans_SMC(DisasContext *s, arg_SMC *a)
8749 {
8750     if (!ENABLE_ARCH_6K || arm_dc_feature(s, ARM_FEATURE_M)) {
8751         return false;
8752     }
8753     if (IS_USER(s)) {
8754         unallocated_encoding(s);
8755     } else {
8756         gen_smc(s);
8757     }
8758     return true;
8759 }
8760
8761 static bool trans_SG(DisasContext *s, arg_SG *a)
8762 {
8763     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
8764         !arm_dc_feature(s, ARM_FEATURE_V8)) {
8765         return false;
8766     }
8767     /*
8768      * SG (v8M only)
8769      * The bulk of the behaviour for this instruction is implemented
8770      * in v7m_handle_execute_nsc(), which deals with the insn when
8771      * it is executed by a CPU in non-secure state from memory
8772      * which is Secure & NonSecure-Callable.
8773      * Here we only need to handle the remaining cases:
8774      *  * in NS memory (including the "security extension not
8775      *    implemented" case) : NOP
8776      *  * in S memory but CPU already secure (clear IT bits)
8777      * We know that the attribute for the memory this insn is
8778      * in must match the current CPU state, because otherwise
8779      * get_phys_addr_pmsav8 would have generated an exception.
8780      */
8781     if (s->v8m_secure) {
8782         /* Like the IT insn, we don't need to generate any code */
8783         s->condexec_cond = 0;
8784         s->condexec_mask = 0;
8785     }
8786     return true;
8787 }
8788
8789 static bool trans_TT(DisasContext *s, arg_TT *a)
8790 {
8791     TCGv_i32 addr, tmp;
8792
8793     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
8794         !arm_dc_feature(s, ARM_FEATURE_V8)) {
8795         return false;
8796     }
8797     if (a->rd == 13 || a->rd == 15 || a->rn == 15) {
8798         /* We UNDEF for these UNPREDICTABLE cases */
8799         unallocated_encoding(s);
8800         return true;
8801     }
8802     if (a->A && !s->v8m_secure) {
8803         /* This case is UNDEFINED.  */
8804         unallocated_encoding(s);
8805         return true;
8806     }
8807
8808     addr = load_reg(s, a->rn);
8809     tmp = tcg_const_i32((a->A << 1) | a->T);
8810     gen_helper_v7m_tt(tmp, cpu_env, addr, tmp);
8811     tcg_temp_free_i32(addr);
8812     store_reg(s, a->rd, tmp);
8813     return true;
8814 }
8815
8816 /*
8817  * Load/store register index
8818  */
8819
8820 static ISSInfo make_issinfo(DisasContext *s, int rd, bool p, bool w)
8821 {
8822     ISSInfo ret;
8823
8824     /* ISS not valid if writeback */
8825     if (p && !w) {
8826         ret = rd;
8827         if (s->base.pc_next - s->pc_curr == 2) {
8828             ret |= ISSIs16Bit;
8829         }
8830     } else {
8831         ret = ISSInvalid;
8832     }
8833     return ret;
8834 }
8835
8836 static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a)
8837 {
8838     TCGv_i32 addr = load_reg(s, a->rn);
8839
8840     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
8841         gen_helper_v8m_stackcheck(cpu_env, addr);
8842     }
8843
8844     if (a->p) {
8845         TCGv_i32 ofs = load_reg(s, a->rm);
8846         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
8847         if (a->u) {
8848             tcg_gen_add_i32(addr, addr, ofs);
8849         } else {
8850             tcg_gen_sub_i32(addr, addr, ofs);
8851         }
8852         tcg_temp_free_i32(ofs);
8853     }
8854     return addr;
8855 }
8856
8857 static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
8858                             TCGv_i32 addr, int address_offset)
8859 {
8860     if (!a->p) {
8861         TCGv_i32 ofs = load_reg(s, a->rm);
8862         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
8863         if (a->u) {
8864             tcg_gen_add_i32(addr, addr, ofs);
8865         } else {
8866             tcg_gen_sub_i32(addr, addr, ofs);
8867         }
8868         tcg_temp_free_i32(ofs);
8869     } else if (!a->w) {
8870         tcg_temp_free_i32(addr);
8871         return;
8872     }
8873     tcg_gen_addi_i32(addr, addr, address_offset);
8874     store_reg(s, a->rn, addr);
8875 }
8876
8877 static bool op_load_rr(DisasContext *s, arg_ldst_rr *a,
8878                        MemOp mop, int mem_idx)
8879 {
8880     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
8881     TCGv_i32 addr, tmp;
8882
8883     addr = op_addr_rr_pre(s, a);
8884
8885     tmp = tcg_temp_new_i32();
8886     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8887     disas_set_da_iss(s, mop, issinfo);
8888
8889     /*
8890      * Perform base writeback before the loaded value to
8891      * ensure correct behavior with overlapping index registers.
8892      */
8893     op_addr_rr_post(s, a, addr, 0);
8894     store_reg_from_load(s, a->rt, tmp);
8895     return true;
8896 }
8897
8898 static bool op_store_rr(DisasContext *s, arg_ldst_rr *a,
8899                         MemOp mop, int mem_idx)
8900 {
8901     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
8902     TCGv_i32 addr, tmp;
8903
8904     addr = op_addr_rr_pre(s, a);
8905
8906     tmp = load_reg(s, a->rt);
8907     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8908     disas_set_da_iss(s, mop, issinfo);
8909     tcg_temp_free_i32(tmp);
8910
8911     op_addr_rr_post(s, a, addr, 0);
8912     return true;
8913 }
8914
8915 static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
8916 {
8917     int mem_idx = get_mem_index(s);
8918     TCGv_i32 addr, tmp;
8919
8920     if (!ENABLE_ARCH_5TE) {
8921         return false;
8922     }
8923     if (a->rt & 1) {
8924         unallocated_encoding(s);
8925         return true;
8926     }
8927     addr = op_addr_rr_pre(s, a);
8928
8929     tmp = tcg_temp_new_i32();
8930     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8931     store_reg(s, a->rt, tmp);
8932
8933     tcg_gen_addi_i32(addr, addr, 4);
8934
8935     tmp = tcg_temp_new_i32();
8936     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8937     store_reg(s, a->rt + 1, tmp);
8938
8939     /* LDRD w/ base writeback is undefined if the registers overlap.  */
8940     op_addr_rr_post(s, a, addr, -4);
8941     return true;
8942 }
8943
8944 static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
8945 {
8946     int mem_idx = get_mem_index(s);
8947     TCGv_i32 addr, tmp;
8948
8949     if (!ENABLE_ARCH_5TE) {
8950         return false;
8951     }
8952     if (a->rt & 1) {
8953         unallocated_encoding(s);
8954         return true;
8955     }
8956     addr = op_addr_rr_pre(s, a);
8957
8958     tmp = load_reg(s, a->rt);
8959     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8960     tcg_temp_free_i32(tmp);
8961
8962     tcg_gen_addi_i32(addr, addr, 4);
8963
8964     tmp = load_reg(s, a->rt + 1);
8965     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8966     tcg_temp_free_i32(tmp);
8967
8968     op_addr_rr_post(s, a, addr, -4);
8969     return true;
8970 }
8971
8972 /*
8973  * Load/store immediate index
8974  */
8975
8976 static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a)
8977 {
8978     int ofs = a->imm;
8979
8980     if (!a->u) {
8981         ofs = -ofs;
8982     }
8983
8984     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
8985         /*
8986          * Stackcheck. Here we know 'addr' is the current SP;
8987          * U is set if we're moving SP up, else down. It is
8988          * UNKNOWN whether the limit check triggers when SP starts
8989          * below the limit and ends up above it; we chose to do so.
8990          */
8991         if (!a->u) {
8992             TCGv_i32 newsp = tcg_temp_new_i32();
8993             tcg_gen_addi_i32(newsp, cpu_R[13], ofs);
8994             gen_helper_v8m_stackcheck(cpu_env, newsp);
8995             tcg_temp_free_i32(newsp);
8996         } else {
8997             gen_helper_v8m_stackcheck(cpu_env, cpu_R[13]);
8998         }
8999     }
9000
9001     return add_reg_for_lit(s, a->rn, a->p ? ofs : 0);
9002 }
9003
9004 static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a,
9005                             TCGv_i32 addr, int address_offset)
9006 {
9007     if (!a->p) {
9008         if (a->u) {
9009             address_offset += a->imm;
9010         } else {
9011             address_offset -= a->imm;
9012         }
9013     } else if (!a->w) {
9014         tcg_temp_free_i32(addr);
9015         return;
9016     }
9017     tcg_gen_addi_i32(addr, addr, address_offset);
9018     store_reg(s, a->rn, addr);
9019 }
9020
9021 static bool op_load_ri(DisasContext *s, arg_ldst_ri *a,
9022                        MemOp mop, int mem_idx)
9023 {
9024     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
9025     TCGv_i32 addr, tmp;
9026
9027     addr = op_addr_ri_pre(s, a);
9028
9029     tmp = tcg_temp_new_i32();
9030     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop | s->be_data);
9031     disas_set_da_iss(s, mop, issinfo);
9032
9033     /*
9034      * Perform base writeback before the loaded value to
9035      * ensure correct behavior with overlapping index registers.
9036      */
9037     op_addr_ri_post(s, a, addr, 0);
9038     store_reg_from_load(s, a->rt, tmp);
9039     return true;
9040 }
9041
9042 static bool op_store_ri(DisasContext *s, arg_ldst_ri *a,
9043                         MemOp mop, int mem_idx)
9044 {
9045     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
9046     TCGv_i32 addr, tmp;
9047
9048     addr = op_addr_ri_pre(s, a);
9049
9050     tmp = load_reg(s, a->rt);
9051     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop | s->be_data);
9052     disas_set_da_iss(s, mop, issinfo);
9053     tcg_temp_free_i32(tmp);
9054
9055     op_addr_ri_post(s, a, addr, 0);
9056     return true;
9057 }
9058
9059 static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
9060 {
9061     int mem_idx = get_mem_index(s);
9062     TCGv_i32 addr, tmp;
9063
9064     addr = op_addr_ri_pre(s, a);
9065
9066     tmp = tcg_temp_new_i32();
9067     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
9068     store_reg(s, a->rt, tmp);
9069
9070     tcg_gen_addi_i32(addr, addr, 4);
9071
9072     tmp = tcg_temp_new_i32();
9073     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
9074     store_reg(s, rt2, tmp);
9075
9076     /* LDRD w/ base writeback is undefined if the registers overlap.  */
9077     op_addr_ri_post(s, a, addr, -4);
9078     return true;
9079 }
9080
9081 static bool trans_LDRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
9082 {
9083     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
9084         return false;
9085     }
9086     return op_ldrd_ri(s, a, a->rt + 1);
9087 }
9088
9089 static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
9090 {
9091     arg_ldst_ri b = {
9092         .u = a->u, .w = a->w, .p = a->p,
9093         .rn = a->rn, .rt = a->rt, .imm = a->imm
9094     };
9095     return op_ldrd_ri(s, &b, a->rt2);
9096 }
9097
9098 static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
9099 {
9100     int mem_idx = get_mem_index(s);
9101     TCGv_i32 addr, tmp;
9102
9103     addr = op_addr_ri_pre(s, a);
9104
9105     tmp = load_reg(s, a->rt);
9106     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
9107     tcg_temp_free_i32(tmp);
9108
9109     tcg_gen_addi_i32(addr, addr, 4);
9110
9111     tmp = load_reg(s, rt2);
9112     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
9113     tcg_temp_free_i32(tmp);
9114
9115     op_addr_ri_post(s, a, addr, -4);
9116     return true;
9117 }
9118
9119 static bool trans_STRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
9120 {
9121     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
9122         return false;
9123     }
9124     return op_strd_ri(s, a, a->rt + 1);
9125 }
9126
9127 static bool trans_STRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
9128 {
9129     arg_ldst_ri b = {
9130         .u = a->u, .w = a->w, .p = a->p,
9131         .rn = a->rn, .rt = a->rt, .imm = a->imm
9132     };
9133     return op_strd_ri(s, &b, a->rt2);
9134 }
9135
9136 #define DO_LDST(NAME, WHICH, MEMOP) \
9137 static bool trans_##NAME##_ri(DisasContext *s, arg_ldst_ri *a)        \
9138 {                                                                     \
9139     return op_##WHICH##_ri(s, a, MEMOP, get_mem_index(s));            \
9140 }                                                                     \
9141 static bool trans_##NAME##T_ri(DisasContext *s, arg_ldst_ri *a)       \
9142 {                                                                     \
9143     return op_##WHICH##_ri(s, a, MEMOP, get_a32_user_mem_index(s));   \
9144 }                                                                     \
9145 static bool trans_##NAME##_rr(DisasContext *s, arg_ldst_rr *a)        \
9146 {                                                                     \
9147     return op_##WHICH##_rr(s, a, MEMOP, get_mem_index(s));            \
9148 }                                                                     \
9149 static bool trans_##NAME##T_rr(DisasContext *s, arg_ldst_rr *a)       \
9150 {                                                                     \
9151     return op_##WHICH##_rr(s, a, MEMOP, get_a32_user_mem_index(s));   \
9152 }
9153
9154 DO_LDST(LDR, load, MO_UL)
9155 DO_LDST(LDRB, load, MO_UB)
9156 DO_LDST(LDRH, load, MO_UW)
9157 DO_LDST(LDRSB, load, MO_SB)
9158 DO_LDST(LDRSH, load, MO_SW)
9159
9160 DO_LDST(STR, store, MO_UL)
9161 DO_LDST(STRB, store, MO_UB)
9162 DO_LDST(STRH, store, MO_UW)
9163
9164 #undef DO_LDST
9165
9166 /*
9167  * Synchronization primitives
9168  */
9169
9170 static bool op_swp(DisasContext *s, arg_SWP *a, MemOp opc)
9171 {
9172     TCGv_i32 addr, tmp;
9173     TCGv taddr;
9174
9175     opc |= s->be_data;
9176     addr = load_reg(s, a->rn);
9177     taddr = gen_aa32_addr(s, addr, opc);
9178     tcg_temp_free_i32(addr);
9179
9180     tmp = load_reg(s, a->rt2);
9181     tcg_gen_atomic_xchg_i32(tmp, taddr, tmp, get_mem_index(s), opc);
9182     tcg_temp_free(taddr);
9183
9184     store_reg(s, a->rt, tmp);
9185     return true;
9186 }
9187
9188 static bool trans_SWP(DisasContext *s, arg_SWP *a)
9189 {
9190     return op_swp(s, a, MO_UL | MO_ALIGN);
9191 }
9192
9193 static bool trans_SWPB(DisasContext *s, arg_SWP *a)
9194 {
9195     return op_swp(s, a, MO_UB);
9196 }
9197
9198 /*
9199  * Load/Store Exclusive and Load-Acquire/Store-Release
9200  */
9201
9202 static bool op_strex(DisasContext *s, arg_STREX *a, MemOp mop, bool rel)
9203 {
9204     TCGv_i32 addr;
9205     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
9206     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
9207
9208     /* We UNDEF for these UNPREDICTABLE cases.  */
9209     if (a->rd == 15 || a->rn == 15 || a->rt == 15
9210         || a->rd == a->rn || a->rd == a->rt
9211         || (!v8a && s->thumb && (a->rd == 13 || a->rt == 13))
9212         || (mop == MO_64
9213             && (a->rt2 == 15
9214                 || a->rd == a->rt2
9215                 || (!v8a && s->thumb && a->rt2 == 13)))) {
9216         unallocated_encoding(s);
9217         return true;
9218     }
9219
9220     if (rel) {
9221         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
9222     }
9223
9224     addr = tcg_temp_local_new_i32();
9225     load_reg_var(s, addr, a->rn);
9226     tcg_gen_addi_i32(addr, addr, a->imm);
9227
9228     gen_store_exclusive(s, a->rd, a->rt, a->rt2, addr, mop);
9229     tcg_temp_free_i32(addr);
9230     return true;
9231 }
9232
9233 static bool trans_STREX(DisasContext *s, arg_STREX *a)
9234 {
9235     if (!ENABLE_ARCH_6) {
9236         return false;
9237     }
9238     return op_strex(s, a, MO_32, false);
9239 }
9240
9241 static bool trans_STREXD_a32(DisasContext *s, arg_STREX *a)
9242 {
9243     if (!ENABLE_ARCH_6K) {
9244         return false;
9245     }
9246     /* We UNDEF for these UNPREDICTABLE cases.  */
9247     if (a->rt & 1) {
9248         unallocated_encoding(s);
9249         return true;
9250     }
9251     a->rt2 = a->rt + 1;
9252     return op_strex(s, a, MO_64, false);
9253 }
9254
9255 static bool trans_STREXD_t32(DisasContext *s, arg_STREX *a)
9256 {
9257     return op_strex(s, a, MO_64, false);
9258 }
9259
9260 static bool trans_STREXB(DisasContext *s, arg_STREX *a)
9261 {
9262     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
9263         return false;
9264     }
9265     return op_strex(s, a, MO_8, false);
9266 }
9267
9268 static bool trans_STREXH(DisasContext *s, arg_STREX *a)
9269 {
9270     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
9271         return false;
9272     }
9273     return op_strex(s, a, MO_16, false);
9274 }
9275
9276 static bool trans_STLEX(DisasContext *s, arg_STREX *a)
9277 {
9278     if (!ENABLE_ARCH_8) {
9279         return false;
9280     }
9281     return op_strex(s, a, MO_32, true);
9282 }
9283
9284 static bool trans_STLEXD_a32(DisasContext *s, arg_STREX *a)
9285 {
9286     if (!ENABLE_ARCH_8) {
9287         return false;
9288     }
9289     /* We UNDEF for these UNPREDICTABLE cases.  */
9290     if (a->rt & 1) {
9291         unallocated_encoding(s);
9292         return true;
9293     }
9294     a->rt2 = a->rt + 1;
9295     return op_strex(s, a, MO_64, true);
9296 }
9297
9298 static bool trans_STLEXD_t32(DisasContext *s, arg_STREX *a)
9299 {
9300     if (!ENABLE_ARCH_8) {
9301         return false;
9302     }
9303     return op_strex(s, a, MO_64, true);
9304 }
9305
9306 static bool trans_STLEXB(DisasContext *s, arg_STREX *a)
9307 {
9308     if (!ENABLE_ARCH_8) {
9309         return false;
9310     }
9311     return op_strex(s, a, MO_8, true);
9312 }
9313
9314 static bool trans_STLEXH(DisasContext *s, arg_STREX *a)
9315 {
9316     if (!ENABLE_ARCH_8) {
9317         return false;
9318     }
9319     return op_strex(s, a, MO_16, true);
9320 }
9321
9322 static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop)
9323 {
9324     TCGv_i32 addr, tmp;
9325
9326     if (!ENABLE_ARCH_8) {
9327         return false;
9328     }
9329     /* We UNDEF for these UNPREDICTABLE cases.  */
9330     if (a->rn == 15 || a->rt == 15) {
9331         unallocated_encoding(s);
9332         return true;
9333     }
9334
9335     addr = load_reg(s, a->rn);
9336     tmp = load_reg(s, a->rt);
9337     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
9338     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | s->be_data);
9339     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite);
9340
9341     tcg_temp_free_i32(tmp);
9342     tcg_temp_free_i32(addr);
9343     return true;
9344 }
9345
9346 static bool trans_STL(DisasContext *s, arg_STL *a)
9347 {
9348     return op_stl(s, a, MO_UL);
9349 }
9350
9351 static bool trans_STLB(DisasContext *s, arg_STL *a)
9352 {
9353     return op_stl(s, a, MO_UB);
9354 }
9355
9356 static bool trans_STLH(DisasContext *s, arg_STL *a)
9357 {
9358     return op_stl(s, a, MO_UW);
9359 }
9360
9361 static bool op_ldrex(DisasContext *s, arg_LDREX *a, MemOp mop, bool acq)
9362 {
9363     TCGv_i32 addr;
9364     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
9365     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
9366
9367     /* We UNDEF for these UNPREDICTABLE cases.  */
9368     if (a->rn == 15 || a->rt == 15
9369         || (!v8a && s->thumb && a->rt == 13)
9370         || (mop == MO_64
9371             && (a->rt2 == 15 || a->rt == a->rt2
9372                 || (!v8a && s->thumb && a->rt2 == 13)))) {
9373         unallocated_encoding(s);
9374         return true;
9375     }
9376
9377     addr = tcg_temp_local_new_i32();
9378     load_reg_var(s, addr, a->rn);
9379     tcg_gen_addi_i32(addr, addr, a->imm);
9380
9381     gen_load_exclusive(s, a->rt, a->rt2, addr, mop);
9382     tcg_temp_free_i32(addr);
9383
9384     if (acq) {
9385         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
9386     }
9387     return true;
9388 }
9389
9390 static bool trans_LDREX(DisasContext *s, arg_LDREX *a)
9391 {
9392     if (!ENABLE_ARCH_6) {
9393         return false;
9394     }
9395     return op_ldrex(s, a, MO_32, false);
9396 }
9397
9398 static bool trans_LDREXD_a32(DisasContext *s, arg_LDREX *a)
9399 {
9400     if (!ENABLE_ARCH_6K) {
9401         return false;
9402     }
9403     /* We UNDEF for these UNPREDICTABLE cases.  */
9404     if (a->rt & 1) {
9405         unallocated_encoding(s);
9406         return true;
9407     }
9408     a->rt2 = a->rt + 1;
9409     return op_ldrex(s, a, MO_64, false);
9410 }
9411
9412 static bool trans_LDREXD_t32(DisasContext *s, arg_LDREX *a)
9413 {
9414     return op_ldrex(s, a, MO_64, false);
9415 }
9416
9417 static bool trans_LDREXB(DisasContext *s, arg_LDREX *a)
9418 {
9419     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
9420         return false;
9421     }
9422     return op_ldrex(s, a, MO_8, false);
9423 }
9424
9425 static bool trans_LDREXH(DisasContext *s, arg_LDREX *a)
9426 {
9427     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
9428         return false;
9429     }
9430     return op_ldrex(s, a, MO_16, false);
9431 }
9432
9433 static bool trans_LDAEX(DisasContext *s, arg_LDREX *a)
9434 {
9435     if (!ENABLE_ARCH_8) {
9436         return false;
9437     }
9438     return op_ldrex(s, a, MO_32, true);
9439 }
9440
9441 static bool trans_LDAEXD_a32(DisasContext *s, arg_LDREX *a)
9442 {
9443     if (!ENABLE_ARCH_8) {
9444         return false;
9445     }
9446     /* We UNDEF for these UNPREDICTABLE cases.  */
9447     if (a->rt & 1) {
9448         unallocated_encoding(s);
9449         return true;
9450     }
9451     a->rt2 = a->rt + 1;
9452     return op_ldrex(s, a, MO_64, true);
9453 }
9454
9455 static bool trans_LDAEXD_t32(DisasContext *s, arg_LDREX *a)
9456 {
9457     if (!ENABLE_ARCH_8) {
9458         return false;
9459     }
9460     return op_ldrex(s, a, MO_64, true);
9461 }
9462
9463 static bool trans_LDAEXB(DisasContext *s, arg_LDREX *a)
9464 {
9465     if (!ENABLE_ARCH_8) {
9466         return false;
9467     }
9468     return op_ldrex(s, a, MO_8, true);
9469 }
9470
9471 static bool trans_LDAEXH(DisasContext *s, arg_LDREX *a)
9472 {
9473     if (!ENABLE_ARCH_8) {
9474         return false;
9475     }
9476     return op_ldrex(s, a, MO_16, true);
9477 }
9478
9479 static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop)
9480 {
9481     TCGv_i32 addr, tmp;
9482
9483     if (!ENABLE_ARCH_8) {
9484         return false;
9485     }
9486     /* We UNDEF for these UNPREDICTABLE cases.  */
9487     if (a->rn == 15 || a->rt == 15) {
9488         unallocated_encoding(s);
9489         return true;
9490     }
9491
9492     addr = load_reg(s, a->rn);
9493     tmp = tcg_temp_new_i32();
9494     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | s->be_data);
9495     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel);
9496     tcg_temp_free_i32(addr);
9497
9498     store_reg(s, a->rt, tmp);
9499     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
9500     return true;
9501 }
9502
9503 static bool trans_LDA(DisasContext *s, arg_LDA *a)
9504 {
9505     return op_lda(s, a, MO_UL);
9506 }
9507
9508 static bool trans_LDAB(DisasContext *s, arg_LDA *a)
9509 {
9510     return op_lda(s, a, MO_UB);
9511 }
9512
9513 static bool trans_LDAH(DisasContext *s, arg_LDA *a)
9514 {
9515     return op_lda(s, a, MO_UW);
9516 }
9517
9518 /*
9519  * Media instructions
9520  */
9521
9522 static bool trans_USADA8(DisasContext *s, arg_USADA8 *a)
9523 {
9524     TCGv_i32 t1, t2;
9525
9526     if (!ENABLE_ARCH_6) {
9527         return false;
9528     }
9529
9530     t1 = load_reg(s, a->rn);
9531     t2 = load_reg(s, a->rm);
9532     gen_helper_usad8(t1, t1, t2);
9533     tcg_temp_free_i32(t2);
9534     if (a->ra != 15) {
9535         t2 = load_reg(s, a->ra);
9536         tcg_gen_add_i32(t1, t1, t2);
9537         tcg_temp_free_i32(t2);
9538     }
9539     store_reg(s, a->rd, t1);
9540     return true;
9541 }
9542
9543 static bool op_bfx(DisasContext *s, arg_UBFX *a, bool u)
9544 {
9545     TCGv_i32 tmp;
9546     int width = a->widthm1 + 1;
9547     int shift = a->lsb;
9548
9549     if (!ENABLE_ARCH_6T2) {
9550         return false;
9551     }
9552     if (shift + width > 32) {
9553         /* UNPREDICTABLE; we choose to UNDEF */
9554         unallocated_encoding(s);
9555         return true;
9556     }
9557
9558     tmp = load_reg(s, a->rn);
9559     if (u) {
9560         tcg_gen_extract_i32(tmp, tmp, shift, width);
9561     } else {
9562         tcg_gen_sextract_i32(tmp, tmp, shift, width);
9563     }
9564     store_reg(s, a->rd, tmp);
9565     return true;
9566 }
9567
9568 static bool trans_SBFX(DisasContext *s, arg_SBFX *a)
9569 {
9570     return op_bfx(s, a, false);
9571 }
9572
9573 static bool trans_UBFX(DisasContext *s, arg_UBFX *a)
9574 {
9575     return op_bfx(s, a, true);
9576 }
9577
9578 static bool trans_BFCI(DisasContext *s, arg_BFCI *a)
9579 {
9580     TCGv_i32 tmp;
9581     int msb = a->msb, lsb = a->lsb;
9582     int width;
9583
9584     if (!ENABLE_ARCH_6T2) {
9585         return false;
9586     }
9587     if (msb < lsb) {
9588         /* UNPREDICTABLE; we choose to UNDEF */
9589         unallocated_encoding(s);
9590         return true;
9591     }
9592
9593     width = msb + 1 - lsb;
9594     if (a->rn == 15) {
9595         /* BFC */
9596         tmp = tcg_const_i32(0);
9597     } else {
9598         /* BFI */
9599         tmp = load_reg(s, a->rn);
9600     }
9601     if (width != 32) {
9602         TCGv_i32 tmp2 = load_reg(s, a->rd);
9603         tcg_gen_deposit_i32(tmp, tmp2, tmp, lsb, width);
9604         tcg_temp_free_i32(tmp2);
9605     }
9606     store_reg(s, a->rd, tmp);
9607     return true;
9608 }
9609
9610 static bool trans_UDF(DisasContext *s, arg_UDF *a)
9611 {
9612     unallocated_encoding(s);
9613     return true;
9614 }
9615
9616 /*
9617  * Parallel addition and subtraction
9618  */
9619
9620 static bool op_par_addsub(DisasContext *s, arg_rrr *a,
9621                           void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
9622 {
9623     TCGv_i32 t0, t1;
9624
9625     if (s->thumb
9626         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9627         : !ENABLE_ARCH_6) {
9628         return false;
9629     }
9630
9631     t0 = load_reg(s, a->rn);
9632     t1 = load_reg(s, a->rm);
9633
9634     gen(t0, t0, t1);
9635
9636     tcg_temp_free_i32(t1);
9637     store_reg(s, a->rd, t0);
9638     return true;
9639 }
9640
9641 static bool op_par_addsub_ge(DisasContext *s, arg_rrr *a,
9642                              void (*gen)(TCGv_i32, TCGv_i32,
9643                                          TCGv_i32, TCGv_ptr))
9644 {
9645     TCGv_i32 t0, t1;
9646     TCGv_ptr ge;
9647
9648     if (s->thumb
9649         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9650         : !ENABLE_ARCH_6) {
9651         return false;
9652     }
9653
9654     t0 = load_reg(s, a->rn);
9655     t1 = load_reg(s, a->rm);
9656
9657     ge = tcg_temp_new_ptr();
9658     tcg_gen_addi_ptr(ge, cpu_env, offsetof(CPUARMState, GE));
9659     gen(t0, t0, t1, ge);
9660
9661     tcg_temp_free_ptr(ge);
9662     tcg_temp_free_i32(t1);
9663     store_reg(s, a->rd, t0);
9664     return true;
9665 }
9666
9667 #define DO_PAR_ADDSUB(NAME, helper) \
9668 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
9669 {                                                       \
9670     return op_par_addsub(s, a, helper);                 \
9671 }
9672
9673 #define DO_PAR_ADDSUB_GE(NAME, helper) \
9674 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
9675 {                                                       \
9676     return op_par_addsub_ge(s, a, helper);              \
9677 }
9678
9679 DO_PAR_ADDSUB_GE(SADD16, gen_helper_sadd16)
9680 DO_PAR_ADDSUB_GE(SASX, gen_helper_saddsubx)
9681 DO_PAR_ADDSUB_GE(SSAX, gen_helper_ssubaddx)
9682 DO_PAR_ADDSUB_GE(SSUB16, gen_helper_ssub16)
9683 DO_PAR_ADDSUB_GE(SADD8, gen_helper_sadd8)
9684 DO_PAR_ADDSUB_GE(SSUB8, gen_helper_ssub8)
9685
9686 DO_PAR_ADDSUB_GE(UADD16, gen_helper_uadd16)
9687 DO_PAR_ADDSUB_GE(UASX, gen_helper_uaddsubx)
9688 DO_PAR_ADDSUB_GE(USAX, gen_helper_usubaddx)
9689 DO_PAR_ADDSUB_GE(USUB16, gen_helper_usub16)
9690 DO_PAR_ADDSUB_GE(UADD8, gen_helper_uadd8)
9691 DO_PAR_ADDSUB_GE(USUB8, gen_helper_usub8)
9692
9693 DO_PAR_ADDSUB(QADD16, gen_helper_qadd16)
9694 DO_PAR_ADDSUB(QASX, gen_helper_qaddsubx)
9695 DO_PAR_ADDSUB(QSAX, gen_helper_qsubaddx)
9696 DO_PAR_ADDSUB(QSUB16, gen_helper_qsub16)
9697 DO_PAR_ADDSUB(QADD8, gen_helper_qadd8)
9698 DO_PAR_ADDSUB(QSUB8, gen_helper_qsub8)
9699
9700 DO_PAR_ADDSUB(UQADD16, gen_helper_uqadd16)
9701 DO_PAR_ADDSUB(UQASX, gen_helper_uqaddsubx)
9702 DO_PAR_ADDSUB(UQSAX, gen_helper_uqsubaddx)
9703 DO_PAR_ADDSUB(UQSUB16, gen_helper_uqsub16)
9704 DO_PAR_ADDSUB(UQADD8, gen_helper_uqadd8)
9705 DO_PAR_ADDSUB(UQSUB8, gen_helper_uqsub8)
9706
9707 DO_PAR_ADDSUB(SHADD16, gen_helper_shadd16)
9708 DO_PAR_ADDSUB(SHASX, gen_helper_shaddsubx)
9709 DO_PAR_ADDSUB(SHSAX, gen_helper_shsubaddx)
9710 DO_PAR_ADDSUB(SHSUB16, gen_helper_shsub16)
9711 DO_PAR_ADDSUB(SHADD8, gen_helper_shadd8)
9712 DO_PAR_ADDSUB(SHSUB8, gen_helper_shsub8)
9713
9714 DO_PAR_ADDSUB(UHADD16, gen_helper_uhadd16)
9715 DO_PAR_ADDSUB(UHASX, gen_helper_uhaddsubx)
9716 DO_PAR_ADDSUB(UHSAX, gen_helper_uhsubaddx)
9717 DO_PAR_ADDSUB(UHSUB16, gen_helper_uhsub16)
9718 DO_PAR_ADDSUB(UHADD8, gen_helper_uhadd8)
9719 DO_PAR_ADDSUB(UHSUB8, gen_helper_uhsub8)
9720
9721 #undef DO_PAR_ADDSUB
9722 #undef DO_PAR_ADDSUB_GE
9723
9724 /*
9725  * Packing, unpacking, saturation, and reversal
9726  */
9727
9728 static bool trans_PKH(DisasContext *s, arg_PKH *a)
9729 {
9730     TCGv_i32 tn, tm;
9731     int shift = a->imm;
9732
9733     if (s->thumb
9734         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9735         : !ENABLE_ARCH_6) {
9736         return false;
9737     }
9738
9739     tn = load_reg(s, a->rn);
9740     tm = load_reg(s, a->rm);
9741     if (a->tb) {
9742         /* PKHTB */
9743         if (shift == 0) {
9744             shift = 31;
9745         }
9746         tcg_gen_sari_i32(tm, tm, shift);
9747         tcg_gen_deposit_i32(tn, tn, tm, 0, 16);
9748     } else {
9749         /* PKHBT */
9750         tcg_gen_shli_i32(tm, tm, shift);
9751         tcg_gen_deposit_i32(tn, tm, tn, 0, 16);
9752     }
9753     tcg_temp_free_i32(tm);
9754     store_reg(s, a->rd, tn);
9755     return true;
9756 }
9757
9758 static bool op_sat(DisasContext *s, arg_sat *a,
9759                    void (*gen)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32))
9760 {
9761     TCGv_i32 tmp, satimm;
9762     int shift = a->imm;
9763
9764     if (!ENABLE_ARCH_6) {
9765         return false;
9766     }
9767
9768     tmp = load_reg(s, a->rn);
9769     if (a->sh) {
9770         tcg_gen_sari_i32(tmp, tmp, shift ? shift : 31);
9771     } else {
9772         tcg_gen_shli_i32(tmp, tmp, shift);
9773     }
9774
9775     satimm = tcg_const_i32(a->satimm);
9776     gen(tmp, cpu_env, tmp, satimm);
9777     tcg_temp_free_i32(satimm);
9778
9779     store_reg(s, a->rd, tmp);
9780     return true;
9781 }
9782
9783 static bool trans_SSAT(DisasContext *s, arg_sat *a)
9784 {
9785     return op_sat(s, a, gen_helper_ssat);
9786 }
9787
9788 static bool trans_USAT(DisasContext *s, arg_sat *a)
9789 {
9790     return op_sat(s, a, gen_helper_usat);
9791 }
9792
9793 static bool trans_SSAT16(DisasContext *s, arg_sat *a)
9794 {
9795     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9796         return false;
9797     }
9798     return op_sat(s, a, gen_helper_ssat16);
9799 }
9800
9801 static bool trans_USAT16(DisasContext *s, arg_sat *a)
9802 {
9803     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9804         return false;
9805     }
9806     return op_sat(s, a, gen_helper_usat16);
9807 }
9808
9809 static bool op_xta(DisasContext *s, arg_rrr_rot *a,
9810                    void (*gen_extract)(TCGv_i32, TCGv_i32),
9811                    void (*gen_add)(TCGv_i32, TCGv_i32, TCGv_i32))
9812 {
9813     TCGv_i32 tmp;
9814
9815     if (!ENABLE_ARCH_6) {
9816         return false;
9817     }
9818
9819     tmp = load_reg(s, a->rm);
9820     /*
9821      * TODO: In many cases we could do a shift instead of a rotate.
9822      * Combined with a simple extend, that becomes an extract.
9823      */
9824     tcg_gen_rotri_i32(tmp, tmp, a->rot * 8);
9825     gen_extract(tmp, tmp);
9826
9827     if (a->rn != 15) {
9828         TCGv_i32 tmp2 = load_reg(s, a->rn);
9829         gen_add(tmp, tmp, tmp2);
9830         tcg_temp_free_i32(tmp2);
9831     }
9832     store_reg(s, a->rd, tmp);
9833     return true;
9834 }
9835
9836 static bool trans_SXTAB(DisasContext *s, arg_rrr_rot *a)
9837 {
9838     return op_xta(s, a, tcg_gen_ext8s_i32, tcg_gen_add_i32);
9839 }
9840
9841 static bool trans_SXTAH(DisasContext *s, arg_rrr_rot *a)
9842 {
9843     return op_xta(s, a, tcg_gen_ext16s_i32, tcg_gen_add_i32);
9844 }
9845
9846 static bool trans_SXTAB16(DisasContext *s, arg_rrr_rot *a)
9847 {
9848     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9849         return false;
9850     }
9851     return op_xta(s, a, gen_helper_sxtb16, gen_add16);
9852 }
9853
9854 static bool trans_UXTAB(DisasContext *s, arg_rrr_rot *a)
9855 {
9856     return op_xta(s, a, tcg_gen_ext8u_i32, tcg_gen_add_i32);
9857 }
9858
9859 static bool trans_UXTAH(DisasContext *s, arg_rrr_rot *a)
9860 {
9861     return op_xta(s, a, tcg_gen_ext16u_i32, tcg_gen_add_i32);
9862 }
9863
9864 static bool trans_UXTAB16(DisasContext *s, arg_rrr_rot *a)
9865 {
9866     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9867         return false;
9868     }
9869     return op_xta(s, a, gen_helper_uxtb16, gen_add16);
9870 }
9871
9872 static bool trans_SEL(DisasContext *s, arg_rrr *a)
9873 {
9874     TCGv_i32 t1, t2, t3;
9875
9876     if (s->thumb
9877         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9878         : !ENABLE_ARCH_6) {
9879         return false;
9880     }
9881
9882     t1 = load_reg(s, a->rn);
9883     t2 = load_reg(s, a->rm);
9884     t3 = tcg_temp_new_i32();
9885     tcg_gen_ld_i32(t3, cpu_env, offsetof(CPUARMState, GE));
9886     gen_helper_sel_flags(t1, t3, t1, t2);
9887     tcg_temp_free_i32(t3);
9888     tcg_temp_free_i32(t2);
9889     store_reg(s, a->rd, t1);
9890     return true;
9891 }
9892
9893 static bool op_rr(DisasContext *s, arg_rr *a,
9894                   void (*gen)(TCGv_i32, TCGv_i32))
9895 {
9896     TCGv_i32 tmp;
9897
9898     tmp = load_reg(s, a->rm);
9899     gen(tmp, tmp);
9900     store_reg(s, a->rd, tmp);
9901     return true;
9902 }
9903
9904 static bool trans_REV(DisasContext *s, arg_rr *a)
9905 {
9906     if (!ENABLE_ARCH_6) {
9907         return false;
9908     }
9909     return op_rr(s, a, tcg_gen_bswap32_i32);
9910 }
9911
9912 static bool trans_REV16(DisasContext *s, arg_rr *a)
9913 {
9914     if (!ENABLE_ARCH_6) {
9915         return false;
9916     }
9917     return op_rr(s, a, gen_rev16);
9918 }
9919
9920 static bool trans_REVSH(DisasContext *s, arg_rr *a)
9921 {
9922     if (!ENABLE_ARCH_6) {
9923         return false;
9924     }
9925     return op_rr(s, a, gen_revsh);
9926 }
9927
9928 static bool trans_RBIT(DisasContext *s, arg_rr *a)
9929 {
9930     if (!ENABLE_ARCH_6T2) {
9931         return false;
9932     }
9933     return op_rr(s, a, gen_helper_rbit);
9934 }
9935
9936 /*
9937  * Signed multiply, signed and unsigned divide
9938  */
9939
9940 static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
9941 {
9942     TCGv_i32 t1, t2;
9943
9944     if (!ENABLE_ARCH_6) {
9945         return false;
9946     }
9947
9948     t1 = load_reg(s, a->rn);
9949     t2 = load_reg(s, a->rm);
9950     if (m_swap) {
9951         gen_swap_half(t2);
9952     }
9953     gen_smul_dual(t1, t2);
9954
9955     if (sub) {
9956         /* This subtraction cannot overflow. */
9957         tcg_gen_sub_i32(t1, t1, t2);
9958     } else {
9959         /*
9960          * This addition cannot overflow 32 bits; however it may
9961          * overflow considered as a signed operation, in which case
9962          * we must set the Q flag.
9963          */
9964         gen_helper_add_setq(t1, cpu_env, t1, t2);
9965     }
9966     tcg_temp_free_i32(t2);
9967
9968     if (a->ra != 15) {
9969         t2 = load_reg(s, a->ra);
9970         gen_helper_add_setq(t1, cpu_env, t1, t2);
9971         tcg_temp_free_i32(t2);
9972     }
9973     store_reg(s, a->rd, t1);
9974     return true;
9975 }
9976
9977 static bool trans_SMLAD(DisasContext *s, arg_rrrr *a)
9978 {
9979     return op_smlad(s, a, false, false);
9980 }
9981
9982 static bool trans_SMLADX(DisasContext *s, arg_rrrr *a)
9983 {
9984     return op_smlad(s, a, true, false);
9985 }
9986
9987 static bool trans_SMLSD(DisasContext *s, arg_rrrr *a)
9988 {
9989     return op_smlad(s, a, false, true);
9990 }
9991
9992 static bool trans_SMLSDX(DisasContext *s, arg_rrrr *a)
9993 {
9994     return op_smlad(s, a, true, true);
9995 }
9996
9997 static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
9998 {
9999     TCGv_i32 t1, t2;
10000     TCGv_i64 l1, l2;
10001
10002     if (!ENABLE_ARCH_6) {
10003         return false;
10004     }
10005
10006     t1 = load_reg(s, a->rn);
10007     t2 = load_reg(s, a->rm);
10008     if (m_swap) {
10009         gen_swap_half(t2);
10010     }
10011     gen_smul_dual(t1, t2);
10012
10013     l1 = tcg_temp_new_i64();
10014     l2 = tcg_temp_new_i64();
10015     tcg_gen_ext_i32_i64(l1, t1);
10016     tcg_gen_ext_i32_i64(l2, t2);
10017     tcg_temp_free_i32(t1);
10018     tcg_temp_free_i32(t2);
10019
10020     if (sub) {
10021         tcg_gen_sub_i64(l1, l1, l2);
10022     } else {
10023         tcg_gen_add_i64(l1, l1, l2);
10024     }
10025     tcg_temp_free_i64(l2);
10026
10027     gen_addq(s, l1, a->ra, a->rd);
10028     gen_storeq_reg(s, a->ra, a->rd, l1);
10029     tcg_temp_free_i64(l1);
10030     return true;
10031 }
10032
10033 static bool trans_SMLALD(DisasContext *s, arg_rrrr *a)
10034 {
10035     return op_smlald(s, a, false, false);
10036 }
10037
10038 static bool trans_SMLALDX(DisasContext *s, arg_rrrr *a)
10039 {
10040     return op_smlald(s, a, true, false);
10041 }
10042
10043 static bool trans_SMLSLD(DisasContext *s, arg_rrrr *a)
10044 {
10045     return op_smlald(s, a, false, true);
10046 }
10047
10048 static bool trans_SMLSLDX(DisasContext *s, arg_rrrr *a)
10049 {
10050     return op_smlald(s, a, true, true);
10051 }
10052
10053 static bool op_smmla(DisasContext *s, arg_rrrr *a, bool round, bool sub)
10054 {
10055     TCGv_i32 t1, t2;
10056
10057     if (s->thumb
10058         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
10059         : !ENABLE_ARCH_6) {
10060         return false;
10061     }
10062
10063     t1 = load_reg(s, a->rn);
10064     t2 = load_reg(s, a->rm);
10065     tcg_gen_muls2_i32(t2, t1, t1, t2);
10066
10067     if (a->ra != 15) {
10068         TCGv_i32 t3 = load_reg(s, a->ra);
10069         if (sub) {
10070             /*
10071              * For SMMLS, we need a 64-bit subtract.  Borrow caused by
10072              * a non-zero multiplicand lowpart, and the correct result
10073              * lowpart for rounding.
10074              */
10075             TCGv_i32 zero = tcg_const_i32(0);
10076             tcg_gen_sub2_i32(t2, t1, zero, t3, t2, t1);
10077             tcg_temp_free_i32(zero);
10078         } else {
10079             tcg_gen_add_i32(t1, t1, t3);
10080         }
10081         tcg_temp_free_i32(t3);
10082     }
10083     if (round) {
10084         /*
10085          * Adding 0x80000000 to the 64-bit quantity means that we have
10086          * carry in to the high word when the low word has the msb set.
10087          */
10088         tcg_gen_shri_i32(t2, t2, 31);
10089         tcg_gen_add_i32(t1, t1, t2);
10090     }
10091     tcg_temp_free_i32(t2);
10092     store_reg(s, a->rd, t1);
10093     return true;
10094 }
10095
10096 static bool trans_SMMLA(DisasContext *s, arg_rrrr *a)
10097 {
10098     return op_smmla(s, a, false, false);
10099 }
10100
10101 static bool trans_SMMLAR(DisasContext *s, arg_rrrr *a)
10102 {
10103     return op_smmla(s, a, true, false);
10104 }
10105
10106 static bool trans_SMMLS(DisasContext *s, arg_rrrr *a)
10107 {
10108     return op_smmla(s, a, false, true);
10109 }
10110
10111 static bool trans_SMMLSR(DisasContext *s, arg_rrrr *a)
10112 {
10113     return op_smmla(s, a, true, true);
10114 }
10115
10116 static bool op_div(DisasContext *s, arg_rrr *a, bool u)
10117 {
10118     TCGv_i32 t1, t2;
10119
10120     if (s->thumb
10121         ? !dc_isar_feature(aa32_thumb_div, s)
10122         : !dc_isar_feature(aa32_arm_div, s)) {
10123         return false;
10124     }
10125
10126     t1 = load_reg(s, a->rn);
10127     t2 = load_reg(s, a->rm);
10128     if (u) {
10129         gen_helper_udiv(t1, t1, t2);
10130     } else {
10131         gen_helper_sdiv(t1, t1, t2);
10132     }
10133     tcg_temp_free_i32(t2);
10134     store_reg(s, a->rd, t1);
10135     return true;
10136 }
10137
10138 static bool trans_SDIV(DisasContext *s, arg_rrr *a)
10139 {
10140     return op_div(s, a, false);
10141 }
10142
10143 static bool trans_UDIV(DisasContext *s, arg_rrr *a)
10144 {
10145     return op_div(s, a, true);
10146 }
10147
10148 /*
10149  * Block data transfer
10150  */
10151
10152 static TCGv_i32 op_addr_block_pre(DisasContext *s, arg_ldst_block *a, int n)
10153 {
10154     TCGv_i32 addr = load_reg(s, a->rn);
10155
10156     if (a->b) {
10157         if (a->i) {
10158             /* pre increment */
10159             tcg_gen_addi_i32(addr, addr, 4);
10160         } else {
10161             /* pre decrement */
10162             tcg_gen_addi_i32(addr, addr, -(n * 4));
10163         }
10164     } else if (!a->i && n != 1) {
10165         /* post decrement */
10166         tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
10167     }
10168
10169     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
10170         /*
10171          * If the writeback is incrementing SP rather than
10172          * decrementing it, and the initial SP is below the
10173          * stack limit but the final written-back SP would
10174          * be above, then then we must not perform any memory
10175          * accesses, but it is IMPDEF whether we generate
10176          * an exception. We choose to do so in this case.
10177          * At this point 'addr' is the lowest address, so
10178          * either the original SP (if incrementing) or our
10179          * final SP (if decrementing), so that's what we check.
10180          */
10181         gen_helper_v8m_stackcheck(cpu_env, addr);
10182     }
10183
10184     return addr;
10185 }
10186
10187 static void op_addr_block_post(DisasContext *s, arg_ldst_block *a,
10188                                TCGv_i32 addr, int n)
10189 {
10190     if (a->w) {
10191         /* write back */
10192         if (!a->b) {
10193             if (a->i) {
10194                 /* post increment */
10195                 tcg_gen_addi_i32(addr, addr, 4);
10196             } else {
10197                 /* post decrement */
10198                 tcg_gen_addi_i32(addr, addr, -(n * 4));
10199             }
10200         } else if (!a->i && n != 1) {
10201             /* pre decrement */
10202             tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
10203         }
10204         store_reg(s, a->rn, addr);
10205     } else {
10206         tcg_temp_free_i32(addr);
10207     }
10208 }
10209
10210 static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n)
10211 {
10212     int i, j, n, list, mem_idx;
10213     bool user = a->u;
10214     TCGv_i32 addr, tmp, tmp2;
10215
10216     if (user) {
10217         /* STM (user) */
10218         if (IS_USER(s)) {
10219             /* Only usable in supervisor mode.  */
10220             unallocated_encoding(s);
10221             return true;
10222         }
10223     }
10224
10225     list = a->list;
10226     n = ctpop16(list);
10227     if (n < min_n || a->rn == 15) {
10228         unallocated_encoding(s);
10229         return true;
10230     }
10231
10232     addr = op_addr_block_pre(s, a, n);
10233     mem_idx = get_mem_index(s);
10234
10235     for (i = j = 0; i < 16; i++) {
10236         if (!(list & (1 << i))) {
10237             continue;
10238         }
10239
10240         if (user && i != 15) {
10241             tmp = tcg_temp_new_i32();
10242             tmp2 = tcg_const_i32(i);
10243             gen_helper_get_user_reg(tmp, cpu_env, tmp2);
10244             tcg_temp_free_i32(tmp2);
10245         } else {
10246             tmp = load_reg(s, i);
10247         }
10248         gen_aa32_st32(s, tmp, addr, mem_idx);
10249         tcg_temp_free_i32(tmp);
10250
10251         /* No need to add after the last transfer.  */
10252         if (++j != n) {
10253             tcg_gen_addi_i32(addr, addr, 4);
10254         }
10255     }
10256
10257     op_addr_block_post(s, a, addr, n);
10258     return true;
10259 }
10260
10261 static bool trans_STM(DisasContext *s, arg_ldst_block *a)
10262 {
10263     /* BitCount(list) < 1 is UNPREDICTABLE */
10264     return op_stm(s, a, 1);
10265 }
10266
10267 static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a)
10268 {
10269     /* Writeback register in register list is UNPREDICTABLE for T32.  */
10270     if (a->w && (a->list & (1 << a->rn))) {
10271         unallocated_encoding(s);
10272         return true;
10273     }
10274     /* BitCount(list) < 2 is UNPREDICTABLE */
10275     return op_stm(s, a, 2);
10276 }
10277
10278 static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n)
10279 {
10280     int i, j, n, list, mem_idx;
10281     bool loaded_base;
10282     bool user = a->u;
10283     bool exc_return = false;
10284     TCGv_i32 addr, tmp, tmp2, loaded_var;
10285
10286     if (user) {
10287         /* LDM (user), LDM (exception return) */
10288         if (IS_USER(s)) {
10289             /* Only usable in supervisor mode.  */
10290             unallocated_encoding(s);
10291             return true;
10292         }
10293         if (extract32(a->list, 15, 1)) {
10294             exc_return = true;
10295             user = false;
10296         } else {
10297             /* LDM (user) does not allow writeback.  */
10298             if (a->w) {
10299                 unallocated_encoding(s);
10300                 return true;
10301             }
10302         }
10303     }
10304
10305     list = a->list;
10306     n = ctpop16(list);
10307     if (n < min_n || a->rn == 15) {
10308         unallocated_encoding(s);
10309         return true;
10310     }
10311
10312     addr = op_addr_block_pre(s, a, n);
10313     mem_idx = get_mem_index(s);
10314     loaded_base = false;
10315     loaded_var = NULL;
10316
10317     for (i = j = 0; i < 16; i++) {
10318         if (!(list & (1 << i))) {
10319             continue;
10320         }
10321
10322         tmp = tcg_temp_new_i32();
10323         gen_aa32_ld32u(s, tmp, addr, mem_idx);
10324         if (user) {
10325             tmp2 = tcg_const_i32(i);
10326             gen_helper_set_user_reg(cpu_env, tmp2, tmp);
10327             tcg_temp_free_i32(tmp2);
10328             tcg_temp_free_i32(tmp);
10329         } else if (i == a->rn) {
10330             loaded_var = tmp;
10331             loaded_base = true;
10332         } else if (i == 15 && exc_return) {
10333             store_pc_exc_ret(s, tmp);
10334         } else {
10335             store_reg_from_load(s, i, tmp);
10336         }
10337
10338         /* No need to add after the last transfer.  */
10339         if (++j != n) {
10340             tcg_gen_addi_i32(addr, addr, 4);
10341         }
10342     }
10343
10344     op_addr_block_post(s, a, addr, n);
10345
10346     if (loaded_base) {
10347         /* Note that we reject base == pc above.  */
10348         store_reg(s, a->rn, loaded_var);
10349     }
10350
10351     if (exc_return) {
10352         /* Restore CPSR from SPSR.  */
10353         tmp = load_cpu_field(spsr);
10354         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
10355             gen_io_start();
10356         }
10357         gen_helper_cpsr_write_eret(cpu_env, tmp);
10358         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
10359             gen_io_end();
10360         }
10361         tcg_temp_free_i32(tmp);
10362         /* Must exit loop to check un-masked IRQs */
10363         s->base.is_jmp = DISAS_EXIT;
10364     }
10365     return true;
10366 }
10367
10368 static bool trans_LDM_a32(DisasContext *s, arg_ldst_block *a)
10369 {
10370     /*
10371      * Writeback register in register list is UNPREDICTABLE
10372      * for ArchVersion() >= 7.  Prior to v7, A32 would write
10373      * an UNKNOWN value to the base register.
10374      */
10375     if (ENABLE_ARCH_7 && a->w && (a->list & (1 << a->rn))) {
10376         unallocated_encoding(s);
10377         return true;
10378     }
10379     /* BitCount(list) < 1 is UNPREDICTABLE */
10380     return do_ldm(s, a, 1);
10381 }
10382
10383 static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a)
10384 {
10385     /* Writeback register in register list is UNPREDICTABLE for T32. */
10386     if (a->w && (a->list & (1 << a->rn))) {
10387         unallocated_encoding(s);
10388         return true;
10389     }
10390     /* BitCount(list) < 2 is UNPREDICTABLE */
10391     return do_ldm(s, a, 2);
10392 }
10393
10394 static bool trans_LDM_t16(DisasContext *s, arg_ldst_block *a)
10395 {
10396     /* Writeback is conditional on the base register not being loaded.  */
10397     a->w = !(a->list & (1 << a->rn));
10398     /* BitCount(list) < 1 is UNPREDICTABLE */
10399     return do_ldm(s, a, 1);
10400 }
10401
10402 /*
10403  * Branch, branch with link
10404  */
10405
10406 static bool trans_B(DisasContext *s, arg_i *a)
10407 {
10408     gen_jmp(s, read_pc(s) + a->imm);
10409     return true;
10410 }
10411
10412 static bool trans_B_cond_thumb(DisasContext *s, arg_ci *a)
10413 {
10414     /* This has cond from encoding, required to be outside IT block.  */
10415     if (a->cond >= 0xe) {
10416         return false;
10417     }
10418     if (s->condexec_mask) {
10419         unallocated_encoding(s);
10420         return true;
10421     }
10422     arm_skip_unless(s, a->cond);
10423     gen_jmp(s, read_pc(s) + a->imm);
10424     return true;
10425 }
10426
10427 static bool trans_BL(DisasContext *s, arg_i *a)
10428 {
10429     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
10430     gen_jmp(s, read_pc(s) + a->imm);
10431     return true;
10432 }
10433
10434 static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
10435 {
10436     TCGv_i32 tmp;
10437
10438     /* For A32, ARCH(5) is checked near the start of the uncond block. */
10439     if (s->thumb && (a->imm & 2)) {
10440         return false;
10441     }
10442     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
10443     tmp = tcg_const_i32(!s->thumb);
10444     store_cpu_field(tmp, thumb);
10445     gen_jmp(s, (read_pc(s) & ~3) + a->imm);
10446     return true;
10447 }
10448
10449 static bool trans_BL_BLX_prefix(DisasContext *s, arg_BL_BLX_prefix *a)
10450 {
10451     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
10452     tcg_gen_movi_i32(cpu_R[14], read_pc(s) + (a->imm << 12));
10453     return true;
10454 }
10455
10456 static bool trans_BL_suffix(DisasContext *s, arg_BL_suffix *a)
10457 {
10458     TCGv_i32 tmp = tcg_temp_new_i32();
10459
10460     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
10461     tcg_gen_addi_i32(tmp, cpu_R[14], (a->imm << 1) | 1);
10462     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1);
10463     gen_bx(s, tmp);
10464     return true;
10465 }
10466
10467 static bool trans_BLX_suffix(DisasContext *s, arg_BLX_suffix *a)
10468 {
10469     TCGv_i32 tmp;
10470
10471     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
10472     if (!ENABLE_ARCH_5) {
10473         return false;
10474     }
10475     tmp = tcg_temp_new_i32();
10476     tcg_gen_addi_i32(tmp, cpu_R[14], a->imm << 1);
10477     tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
10478     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1);
10479     gen_bx(s, tmp);
10480     return true;
10481 }
10482
10483 static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
10484 {
10485     TCGv_i32 addr, tmp;
10486
10487     tmp = load_reg(s, a->rm);
10488     if (half) {
10489         tcg_gen_add_i32(tmp, tmp, tmp);
10490     }
10491     addr = load_reg(s, a->rn);
10492     tcg_gen_add_i32(addr, addr, tmp);
10493
10494     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
10495                     half ? MO_UW | s->be_data : MO_UB);
10496     tcg_temp_free_i32(addr);
10497
10498     tcg_gen_add_i32(tmp, tmp, tmp);
10499     tcg_gen_addi_i32(tmp, tmp, read_pc(s));
10500     store_reg(s, 15, tmp);
10501     return true;
10502 }
10503
10504 static bool trans_TBB(DisasContext *s, arg_tbranch *a)
10505 {
10506     return op_tbranch(s, a, false);
10507 }
10508
10509 static bool trans_TBH(DisasContext *s, arg_tbranch *a)
10510 {
10511     return op_tbranch(s, a, true);
10512 }
10513
10514 static bool trans_CBZ(DisasContext *s, arg_CBZ *a)
10515 {
10516     TCGv_i32 tmp = load_reg(s, a->rn);
10517
10518     arm_gen_condlabel(s);
10519     tcg_gen_brcondi_i32(a->nz ? TCG_COND_EQ : TCG_COND_NE,
10520                         tmp, 0, s->condlabel);
10521     tcg_temp_free_i32(tmp);
10522     gen_jmp(s, read_pc(s) + a->imm);
10523     return true;
10524 }
10525
10526 /*
10527  * Supervisor call - both T32 & A32 come here so we need to check
10528  * which mode we are in when checking for semihosting.
10529  */
10530
10531 static bool trans_SVC(DisasContext *s, arg_SVC *a)
10532 {
10533     const uint32_t semihost_imm = s->thumb ? 0xab : 0x123456;
10534
10535     if (!arm_dc_feature(s, ARM_FEATURE_M) && semihosting_enabled() &&
10536 #ifndef CONFIG_USER_ONLY
10537         !IS_USER(s) &&
10538 #endif
10539         (a->imm == semihost_imm)) {
10540         gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
10541     } else {
10542         gen_set_pc_im(s, s->base.pc_next);
10543         s->svc_imm = a->imm;
10544         s->base.is_jmp = DISAS_SWI;
10545     }
10546     return true;
10547 }
10548
10549 /*
10550  * Unconditional system instructions
10551  */
10552
10553 static bool trans_RFE(DisasContext *s, arg_RFE *a)
10554 {
10555     static const int8_t pre_offset[4] = {
10556         /* DA */ -4, /* IA */ 0, /* DB */ -8, /* IB */ 4
10557     };
10558     static const int8_t post_offset[4] = {
10559         /* DA */ -8, /* IA */ 4, /* DB */ -4, /* IB */ 0
10560     };
10561     TCGv_i32 addr, t1, t2;
10562
10563     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
10564         return false;
10565     }
10566     if (IS_USER(s)) {
10567         unallocated_encoding(s);
10568         return true;
10569     }
10570
10571     addr = load_reg(s, a->rn);
10572     tcg_gen_addi_i32(addr, addr, pre_offset[a->pu]);
10573
10574     /* Load PC into tmp and CPSR into tmp2.  */
10575     t1 = tcg_temp_new_i32();
10576     gen_aa32_ld32u(s, t1, addr, get_mem_index(s));
10577     tcg_gen_addi_i32(addr, addr, 4);
10578     t2 = tcg_temp_new_i32();
10579     gen_aa32_ld32u(s, t2, addr, get_mem_index(s));
10580
10581     if (a->w) {
10582         /* Base writeback.  */
10583         tcg_gen_addi_i32(addr, addr, post_offset[a->pu]);
10584         store_reg(s, a->rn, addr);
10585     } else {
10586         tcg_temp_free_i32(addr);
10587     }
10588     gen_rfe(s, t1, t2);
10589     return true;
10590 }
10591
10592 static bool trans_SRS(DisasContext *s, arg_SRS *a)
10593 {
10594     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
10595         return false;
10596     }
10597     gen_srs(s, a->mode, a->pu, a->w);
10598     return true;
10599 }
10600
10601 static bool trans_CPS(DisasContext *s, arg_CPS *a)
10602 {
10603     uint32_t mask, val;
10604
10605     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
10606         return false;
10607     }
10608     if (IS_USER(s)) {
10609         /* Implemented as NOP in user mode.  */
10610         return true;
10611     }
10612     /* TODO: There are quite a lot of UNPREDICTABLE argument combinations. */
10613
10614     mask = val = 0;
10615     if (a->imod & 2) {
10616         if (a->A) {
10617             mask |= CPSR_A;
10618         }
10619         if (a->I) {
10620             mask |= CPSR_I;
10621         }
10622         if (a->F) {
10623             mask |= CPSR_F;
10624         }
10625         if (a->imod & 1) {
10626             val |= mask;
10627         }
10628     }
10629     if (a->M) {
10630         mask |= CPSR_M;
10631         val |= a->mode;
10632     }
10633     if (mask) {
10634         gen_set_psr_im(s, mask, 0, val);
10635     }
10636     return true;
10637 }
10638
10639 static bool trans_CPS_v7m(DisasContext *s, arg_CPS_v7m *a)
10640 {
10641     TCGv_i32 tmp, addr, el;
10642
10643     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
10644         return false;
10645     }
10646     if (IS_USER(s)) {
10647         /* Implemented as NOP in user mode.  */
10648         return true;
10649     }
10650
10651     tmp = tcg_const_i32(a->im);
10652     /* FAULTMASK */
10653     if (a->F) {
10654         addr = tcg_const_i32(19);
10655         gen_helper_v7m_msr(cpu_env, addr, tmp);
10656         tcg_temp_free_i32(addr);
10657     }
10658     /* PRIMASK */
10659     if (a->I) {
10660         addr = tcg_const_i32(16);
10661         gen_helper_v7m_msr(cpu_env, addr, tmp);
10662         tcg_temp_free_i32(addr);
10663     }
10664     el = tcg_const_i32(s->current_el);
10665     gen_helper_rebuild_hflags_m32(cpu_env, el);
10666     tcg_temp_free_i32(el);
10667     tcg_temp_free_i32(tmp);
10668     gen_lookup_tb(s);
10669     return true;
10670 }
10671
10672 /*
10673  * Clear-Exclusive, Barriers
10674  */
10675
10676 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
10677 {
10678     if (s->thumb
10679         ? !ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)
10680         : !ENABLE_ARCH_6K) {
10681         return false;
10682     }
10683     gen_clrex(s);
10684     return true;
10685 }
10686
10687 static bool trans_DSB(DisasContext *s, arg_DSB *a)
10688 {
10689     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
10690         return false;
10691     }
10692     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
10693     return true;
10694 }
10695
10696 static bool trans_DMB(DisasContext *s, arg_DMB *a)
10697 {
10698     return trans_DSB(s, NULL);
10699 }
10700
10701 static bool trans_ISB(DisasContext *s, arg_ISB *a)
10702 {
10703     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
10704         return false;
10705     }
10706     /*
10707      * We need to break the TB after this insn to execute
10708      * self-modifying code correctly and also to take
10709      * any pending interrupts immediately.
10710      */
10711     gen_goto_tb(s, 0, s->base.pc_next);
10712     return true;
10713 }
10714
10715 static bool trans_SB(DisasContext *s, arg_SB *a)
10716 {
10717     if (!dc_isar_feature(aa32_sb, s)) {
10718         return false;
10719     }
10720     /*
10721      * TODO: There is no speculation barrier opcode
10722      * for TCG; MB and end the TB instead.
10723      */
10724     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
10725     gen_goto_tb(s, 0, s->base.pc_next);
10726     return true;
10727 }
10728
10729 static bool trans_SETEND(DisasContext *s, arg_SETEND *a)
10730 {
10731     if (!ENABLE_ARCH_6) {
10732         return false;
10733     }
10734     if (a->E != (s->be_data == MO_BE)) {
10735         gen_helper_setend(cpu_env);
10736         s->base.is_jmp = DISAS_UPDATE;
10737     }
10738     return true;
10739 }
10740
10741 /*
10742  * Preload instructions
10743  * All are nops, contingent on the appropriate arch level.
10744  */
10745
10746 static bool trans_PLD(DisasContext *s, arg_PLD *a)
10747 {
10748     return ENABLE_ARCH_5TE;
10749 }
10750
10751 static bool trans_PLDW(DisasContext *s, arg_PLD *a)
10752 {
10753     return arm_dc_feature(s, ARM_FEATURE_V7MP);
10754 }
10755
10756 static bool trans_PLI(DisasContext *s, arg_PLD *a)
10757 {
10758     return ENABLE_ARCH_7;
10759 }
10760
10761 /*
10762  * If-then
10763  */
10764
10765 static bool trans_IT(DisasContext *s, arg_IT *a)
10766 {
10767     int cond_mask = a->cond_mask;
10768
10769     /*
10770      * No actual code generated for this insn, just setup state.
10771      *
10772      * Combinations of firstcond and mask which set up an 0b1111
10773      * condition are UNPREDICTABLE; we take the CONSTRAINED
10774      * UNPREDICTABLE choice to treat 0b1111 the same as 0b1110,
10775      * i.e. both meaning "execute always".
10776      */
10777     s->condexec_cond = (cond_mask >> 4) & 0xe;
10778     s->condexec_mask = cond_mask & 0x1f;
10779     return true;
10780 }
10781
10782 /*
10783  * Legacy decoder.
10784  */
10785
10786 static void disas_arm_insn(DisasContext *s, unsigned int insn)
10787 {
10788     unsigned int cond = insn >> 28;
10789
10790     /* M variants do not implement ARM mode; this must raise the INVSTATE
10791      * UsageFault exception.
10792      */
10793     if (arm_dc_feature(s, ARM_FEATURE_M)) {
10794         gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
10795                            default_exception_el(s));
10796         return;
10797     }
10798
10799     if (cond == 0xf) {
10800         /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
10801          * choose to UNDEF. In ARMv5 and above the space is used
10802          * for miscellaneous unconditional instructions.
10803          */
10804         ARCH(5);
10805
10806         /* Unconditional instructions.  */
10807         /* TODO: Perhaps merge these into one decodetree output file.  */
10808         if (disas_a32_uncond(s, insn) ||
10809             disas_vfp_uncond(s, insn) ||
10810             disas_neon_dp(s, insn) ||
10811             disas_neon_ls(s, insn) ||
10812             disas_neon_shared(s, insn)) {
10813             return;
10814         }
10815         /* fall back to legacy decoder */
10816
10817         if (((insn >> 25) & 7) == 1) {
10818             /* NEON Data processing.  */
10819             if (disas_neon_data_insn(s, insn)) {
10820                 goto illegal_op;
10821             }
10822             return;
10823         }
10824         if ((insn & 0x0e000f00) == 0x0c000100) {
10825             if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
10826                 /* iWMMXt register transfer.  */
10827                 if (extract32(s->c15_cpar, 1, 1)) {
10828                     if (!disas_iwmmxt_insn(s, insn)) {
10829                         return;
10830                     }
10831                 }
10832             }
10833         }
10834         goto illegal_op;
10835     }
10836     if (cond != 0xe) {
10837         /* if not always execute, we generate a conditional jump to
10838            next instruction */
10839         arm_skip_unless(s, cond);
10840     }
10841
10842     /* TODO: Perhaps merge these into one decodetree output file.  */
10843     if (disas_a32(s, insn) ||
10844         disas_vfp(s, insn)) {
10845         return;
10846     }
10847     /* fall back to legacy decoder */
10848
10849     switch ((insn >> 24) & 0xf) {
10850     case 0xc:
10851     case 0xd:
10852     case 0xe:
10853         if (((insn >> 8) & 0xe) == 10) {
10854             /* VFP, but failed disas_vfp.  */
10855             goto illegal_op;
10856         }
10857         if (disas_coproc_insn(s, insn)) {
10858             /* Coprocessor.  */
10859             goto illegal_op;
10860         }
10861         break;
10862     default:
10863     illegal_op:
10864         unallocated_encoding(s);
10865         break;
10866     }
10867 }
10868
10869 static bool thumb_insn_is_16bit(DisasContext *s, uint32_t pc, uint32_t insn)
10870 {
10871     /*
10872      * Return true if this is a 16 bit instruction. We must be precise
10873      * about this (matching the decode).
10874      */
10875     if ((insn >> 11) < 0x1d) {
10876         /* Definitely a 16-bit instruction */
10877         return true;
10878     }
10879
10880     /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
10881      * first half of a 32-bit Thumb insn. Thumb-1 cores might
10882      * end up actually treating this as two 16-bit insns, though,
10883      * if it's half of a bl/blx pair that might span a page boundary.
10884      */
10885     if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
10886         arm_dc_feature(s, ARM_FEATURE_M)) {
10887         /* Thumb2 cores (including all M profile ones) always treat
10888          * 32-bit insns as 32-bit.
10889          */
10890         return false;
10891     }
10892
10893     if ((insn >> 11) == 0x1e && pc - s->page_start < TARGET_PAGE_SIZE - 3) {
10894         /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
10895          * is not on the next page; we merge this into a 32-bit
10896          * insn.
10897          */
10898         return false;
10899     }
10900     /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
10901      * 0b1111_1xxx_xxxx_xxxx : BL suffix;
10902      * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
10903      *  -- handle as single 16 bit insn
10904      */
10905     return true;
10906 }
10907
10908 /* Translate a 32-bit thumb instruction. */
10909 static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
10910 {
10911     /*
10912      * ARMv6-M supports a limited subset of Thumb2 instructions.
10913      * Other Thumb1 architectures allow only 32-bit
10914      * combined BL/BLX prefix and suffix.
10915      */
10916     if (arm_dc_feature(s, ARM_FEATURE_M) &&
10917         !arm_dc_feature(s, ARM_FEATURE_V7)) {
10918         int i;
10919         bool found = false;
10920         static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
10921                                                0xf3b08040 /* dsb */,
10922                                                0xf3b08050 /* dmb */,
10923                                                0xf3b08060 /* isb */,
10924                                                0xf3e08000 /* mrs */,
10925                                                0xf000d000 /* bl */};
10926         static const uint32_t armv6m_mask[] = {0xffe0d000,
10927                                                0xfff0d0f0,
10928                                                0xfff0d0f0,
10929                                                0xfff0d0f0,
10930                                                0xffe0d000,
10931                                                0xf800d000};
10932
10933         for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
10934             if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
10935                 found = true;
10936                 break;
10937             }
10938         }
10939         if (!found) {
10940             goto illegal_op;
10941         }
10942     } else if ((insn & 0xf800e800) != 0xf000e800)  {
10943         ARCH(6T2);
10944     }
10945
10946     if ((insn & 0xef000000) == 0xef000000) {
10947         /*
10948          * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
10949          * transform into
10950          * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
10951          */
10952         uint32_t a32_insn = (insn & 0xe2ffffff) |
10953             ((insn & (1 << 28)) >> 4) | (1 << 28);
10954
10955         if (disas_neon_dp(s, a32_insn)) {
10956             return;
10957         }
10958     }
10959
10960     if ((insn & 0xff100000) == 0xf9000000) {
10961         /*
10962          * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
10963          * transform into
10964          * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
10965          */
10966         uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000;
10967
10968         if (disas_neon_ls(s, a32_insn)) {
10969             return;
10970         }
10971     }
10972
10973     /*
10974      * TODO: Perhaps merge these into one decodetree output file.
10975      * Note disas_vfp is written for a32 with cond field in the
10976      * top nibble.  The t32 encoding requires 0xe in the top nibble.
10977      */
10978     if (disas_t32(s, insn) ||
10979         disas_vfp_uncond(s, insn) ||
10980         disas_neon_shared(s, insn) ||
10981         ((insn >> 28) == 0xe && disas_vfp(s, insn))) {
10982         return;
10983     }
10984     /* fall back to legacy decoder */
10985
10986     switch ((insn >> 25) & 0xf) {
10987     case 0: case 1: case 2: case 3:
10988         /* 16-bit instructions.  Should never happen.  */
10989         abort();
10990     case 6: case 7: case 14: case 15:
10991         /* Coprocessor.  */
10992         if (arm_dc_feature(s, ARM_FEATURE_M)) {
10993             /* 0b111x_11xx_xxxx_xxxx_xxxx_xxxx_xxxx_xxxx */
10994             if (extract32(insn, 24, 2) == 3) {
10995                 goto illegal_op; /* op0 = 0b11 : unallocated */
10996             }
10997
10998             if (((insn >> 8) & 0xe) == 10 &&
10999                 dc_isar_feature(aa32_fpsp_v2, s)) {
11000                 /* FP, and the CPU supports it */
11001                 goto illegal_op;
11002             } else {
11003                 /* All other insns: NOCP */
11004                 gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
11005                                    syn_uncategorized(),
11006                                    default_exception_el(s));
11007             }
11008             break;
11009         }
11010         if (((insn >> 24) & 3) == 3) {
11011             /* Translate into the equivalent ARM encoding.  */
11012             insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28);
11013             if (disas_neon_data_insn(s, insn)) {
11014                 goto illegal_op;
11015             }
11016         } else if (((insn >> 8) & 0xe) == 10) {
11017             /* VFP, but failed disas_vfp.  */
11018             goto illegal_op;
11019         } else {
11020             if (insn & (1 << 28))
11021                 goto illegal_op;
11022             if (disas_coproc_insn(s, insn)) {
11023                 goto illegal_op;
11024             }
11025         }
11026         break;
11027     case 12:
11028         goto illegal_op;
11029     default:
11030     illegal_op:
11031         unallocated_encoding(s);
11032     }
11033 }
11034
11035 static void disas_thumb_insn(DisasContext *s, uint32_t insn)
11036 {
11037     if (!disas_t16(s, insn)) {
11038         unallocated_encoding(s);
11039     }
11040 }
11041
11042 static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
11043 {
11044     /* Return true if the insn at dc->base.pc_next might cross a page boundary.
11045      * (False positives are OK, false negatives are not.)
11046      * We know this is a Thumb insn, and our caller ensures we are
11047      * only called if dc->base.pc_next is less than 4 bytes from the page
11048      * boundary, so we cross the page if the first 16 bits indicate
11049      * that this is a 32 bit insn.
11050      */
11051     uint16_t insn = arm_lduw_code(env, s->base.pc_next, s->sctlr_b);
11052
11053     return !thumb_insn_is_16bit(s, s->base.pc_next, insn);
11054 }
11055
11056 static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
11057 {
11058     DisasContext *dc = container_of(dcbase, DisasContext, base);
11059     CPUARMState *env = cs->env_ptr;
11060     ARMCPU *cpu = env_archcpu(env);
11061     uint32_t tb_flags = dc->base.tb->flags;
11062     uint32_t condexec, core_mmu_idx;
11063
11064     dc->isar = &cpu->isar;
11065     dc->condjmp = 0;
11066
11067     dc->aarch64 = 0;
11068     /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
11069      * there is no secure EL1, so we route exceptions to EL3.
11070      */
11071     dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
11072                                !arm_el_is_aa64(env, 3);
11073     dc->thumb = FIELD_EX32(tb_flags, TBFLAG_AM32, THUMB);
11074     dc->be_data = FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
11075     condexec = FIELD_EX32(tb_flags, TBFLAG_AM32, CONDEXEC);
11076     dc->condexec_mask = (condexec & 0xf) << 1;
11077     dc->condexec_cond = condexec >> 4;
11078
11079     core_mmu_idx = FIELD_EX32(tb_flags, TBFLAG_ANY, MMUIDX);
11080     dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
11081     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
11082 #if !defined(CONFIG_USER_ONLY)
11083     dc->user = (dc->current_el == 0);
11084 #endif
11085     dc->fp_excp_el = FIELD_EX32(tb_flags, TBFLAG_ANY, FPEXC_EL);
11086
11087     if (arm_feature(env, ARM_FEATURE_M)) {
11088         dc->vfp_enabled = 1;
11089         dc->be_data = MO_TE;
11090         dc->v7m_handler_mode = FIELD_EX32(tb_flags, TBFLAG_M32, HANDLER);
11091         dc->v8m_secure = arm_feature(env, ARM_FEATURE_M_SECURITY) &&
11092             regime_is_secure(env, dc->mmu_idx);
11093         dc->v8m_stackcheck = FIELD_EX32(tb_flags, TBFLAG_M32, STACKCHECK);
11094         dc->v8m_fpccr_s_wrong =
11095             FIELD_EX32(tb_flags, TBFLAG_M32, FPCCR_S_WRONG);
11096         dc->v7m_new_fp_ctxt_needed =
11097             FIELD_EX32(tb_flags, TBFLAG_M32, NEW_FP_CTXT_NEEDED);
11098         dc->v7m_lspact = FIELD_EX32(tb_flags, TBFLAG_M32, LSPACT);
11099     } else {
11100         dc->be_data =
11101             FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
11102         dc->debug_target_el =
11103             FIELD_EX32(tb_flags, TBFLAG_ANY, DEBUG_TARGET_EL);
11104         dc->sctlr_b = FIELD_EX32(tb_flags, TBFLAG_A32, SCTLR_B);
11105         dc->hstr_active = FIELD_EX32(tb_flags, TBFLAG_A32, HSTR_ACTIVE);
11106         dc->ns = FIELD_EX32(tb_flags, TBFLAG_A32, NS);
11107         dc->vfp_enabled = FIELD_EX32(tb_flags, TBFLAG_A32, VFPEN);
11108         if (arm_feature(env, ARM_FEATURE_XSCALE)) {
11109             dc->c15_cpar = FIELD_EX32(tb_flags, TBFLAG_A32, XSCALE_CPAR);
11110         } else {
11111             dc->vec_len = FIELD_EX32(tb_flags, TBFLAG_A32, VECLEN);
11112             dc->vec_stride = FIELD_EX32(tb_flags, TBFLAG_A32, VECSTRIDE);
11113         }
11114     }
11115     dc->cp_regs = cpu->cp_regs;
11116     dc->features = env->features;
11117
11118     /* Single step state. The code-generation logic here is:
11119      *  SS_ACTIVE == 0:
11120      *   generate code with no special handling for single-stepping (except
11121      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
11122      *   this happens anyway because those changes are all system register or
11123      *   PSTATE writes).
11124      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
11125      *   emit code for one insn
11126      *   emit code to clear PSTATE.SS
11127      *   emit code to generate software step exception for completed step
11128      *   end TB (as usual for having generated an exception)
11129      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
11130      *   emit code to generate a software step exception
11131      *   end the TB
11132      */
11133     dc->ss_active = FIELD_EX32(tb_flags, TBFLAG_ANY, SS_ACTIVE);
11134     dc->pstate_ss = FIELD_EX32(tb_flags, TBFLAG_ANY, PSTATE_SS);
11135     dc->is_ldex = false;
11136
11137     dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
11138
11139     /* If architectural single step active, limit to 1.  */
11140     if (is_singlestepping(dc)) {
11141         dc->base.max_insns = 1;
11142     }
11143
11144     /* ARM is a fixed-length ISA.  Bound the number of insns to execute
11145        to those left on the page.  */
11146     if (!dc->thumb) {
11147         int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
11148         dc->base.max_insns = MIN(dc->base.max_insns, bound);
11149     }
11150
11151     cpu_V0 = tcg_temp_new_i64();
11152     cpu_V1 = tcg_temp_new_i64();
11153     /* FIXME: cpu_M0 can probably be the same as cpu_V0.  */
11154     cpu_M0 = tcg_temp_new_i64();
11155 }
11156
11157 static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
11158 {
11159     DisasContext *dc = container_of(dcbase, DisasContext, base);
11160
11161     /* A note on handling of the condexec (IT) bits:
11162      *
11163      * We want to avoid the overhead of having to write the updated condexec
11164      * bits back to the CPUARMState for every instruction in an IT block. So:
11165      * (1) if the condexec bits are not already zero then we write
11166      * zero back into the CPUARMState now. This avoids complications trying
11167      * to do it at the end of the block. (For example if we don't do this
11168      * it's hard to identify whether we can safely skip writing condexec
11169      * at the end of the TB, which we definitely want to do for the case
11170      * where a TB doesn't do anything with the IT state at all.)
11171      * (2) if we are going to leave the TB then we call gen_set_condexec()
11172      * which will write the correct value into CPUARMState if zero is wrong.
11173      * This is done both for leaving the TB at the end, and for leaving
11174      * it because of an exception we know will happen, which is done in
11175      * gen_exception_insn(). The latter is necessary because we need to
11176      * leave the TB with the PC/IT state just prior to execution of the
11177      * instruction which caused the exception.
11178      * (3) if we leave the TB unexpectedly (eg a data abort on a load)
11179      * then the CPUARMState will be wrong and we need to reset it.
11180      * This is handled in the same way as restoration of the
11181      * PC in these situations; we save the value of the condexec bits
11182      * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
11183      * then uses this to restore them after an exception.
11184      *
11185      * Note that there are no instructions which can read the condexec
11186      * bits, and none which can write non-static values to them, so
11187      * we don't need to care about whether CPUARMState is correct in the
11188      * middle of a TB.
11189      */
11190
11191     /* Reset the conditional execution bits immediately. This avoids
11192        complications trying to do it at the end of the block.  */
11193     if (dc->condexec_mask || dc->condexec_cond) {
11194         TCGv_i32 tmp = tcg_temp_new_i32();
11195         tcg_gen_movi_i32(tmp, 0);
11196         store_cpu_field(tmp, condexec_bits);
11197     }
11198 }
11199
11200 static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
11201 {
11202     DisasContext *dc = container_of(dcbase, DisasContext, base);
11203
11204     tcg_gen_insn_start(dc->base.pc_next,
11205                        (dc->condexec_cond << 4) | (dc->condexec_mask >> 1),
11206                        0);
11207     dc->insn_start = tcg_last_op();
11208 }
11209
11210 static bool arm_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
11211                                     const CPUBreakpoint *bp)
11212 {
11213     DisasContext *dc = container_of(dcbase, DisasContext, base);
11214
11215     if (bp->flags & BP_CPU) {
11216         gen_set_condexec(dc);
11217         gen_set_pc_im(dc, dc->base.pc_next);
11218         gen_helper_check_breakpoints(cpu_env);
11219         /* End the TB early; it's likely not going to be executed */
11220         dc->base.is_jmp = DISAS_TOO_MANY;
11221     } else {
11222         gen_exception_internal_insn(dc, dc->base.pc_next, EXCP_DEBUG);
11223         /* The address covered by the breakpoint must be
11224            included in [tb->pc, tb->pc + tb->size) in order
11225            to for it to be properly cleared -- thus we
11226            increment the PC here so that the logic setting
11227            tb->size below does the right thing.  */
11228         /* TODO: Advance PC by correct instruction length to
11229          * avoid disassembler error messages */
11230         dc->base.pc_next += 2;
11231         dc->base.is_jmp = DISAS_NORETURN;
11232     }
11233
11234     return true;
11235 }
11236
11237 static bool arm_pre_translate_insn(DisasContext *dc)
11238 {
11239 #ifdef CONFIG_USER_ONLY
11240     /* Intercept jump to the magic kernel page.  */
11241     if (dc->base.pc_next >= 0xffff0000) {
11242         /* We always get here via a jump, so know we are not in a
11243            conditional execution block.  */
11244         gen_exception_internal(EXCP_KERNEL_TRAP);
11245         dc->base.is_jmp = DISAS_NORETURN;
11246         return true;
11247     }
11248 #endif
11249
11250     if (dc->ss_active && !dc->pstate_ss) {
11251         /* Singlestep state is Active-pending.
11252          * If we're in this state at the start of a TB then either
11253          *  a) we just took an exception to an EL which is being debugged
11254          *     and this is the first insn in the exception handler
11255          *  b) debug exceptions were masked and we just unmasked them
11256          *     without changing EL (eg by clearing PSTATE.D)
11257          * In either case we're going to take a swstep exception in the
11258          * "did not step an insn" case, and so the syndrome ISV and EX
11259          * bits should be zero.
11260          */
11261         assert(dc->base.num_insns == 1);
11262         gen_swstep_exception(dc, 0, 0);
11263         dc->base.is_jmp = DISAS_NORETURN;
11264         return true;
11265     }
11266
11267     return false;
11268 }
11269
11270 static void arm_post_translate_insn(DisasContext *dc)
11271 {
11272     if (dc->condjmp && !dc->base.is_jmp) {
11273         gen_set_label(dc->condlabel);
11274         dc->condjmp = 0;
11275     }
11276     translator_loop_temp_check(&dc->base);
11277 }
11278
11279 static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
11280 {
11281     DisasContext *dc = container_of(dcbase, DisasContext, base);
11282     CPUARMState *env = cpu->env_ptr;
11283     unsigned int insn;
11284
11285     if (arm_pre_translate_insn(dc)) {
11286         return;
11287     }
11288
11289     dc->pc_curr = dc->base.pc_next;
11290     insn = arm_ldl_code(env, dc->base.pc_next, dc->sctlr_b);
11291     dc->insn = insn;
11292     dc->base.pc_next += 4;
11293     disas_arm_insn(dc, insn);
11294
11295     arm_post_translate_insn(dc);
11296
11297     /* ARM is a fixed-length ISA.  We performed the cross-page check
11298        in init_disas_context by adjusting max_insns.  */
11299 }
11300
11301 static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
11302 {
11303     /* Return true if this Thumb insn is always unconditional,
11304      * even inside an IT block. This is true of only a very few
11305      * instructions: BKPT, HLT, and SG.
11306      *
11307      * A larger class of instructions are UNPREDICTABLE if used
11308      * inside an IT block; we do not need to detect those here, because
11309      * what we do by default (perform the cc check and update the IT
11310      * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
11311      * choice for those situations.
11312      *
11313      * insn is either a 16-bit or a 32-bit instruction; the two are
11314      * distinguishable because for the 16-bit case the top 16 bits
11315      * are zeroes, and that isn't a valid 32-bit encoding.
11316      */
11317     if ((insn & 0xffffff00) == 0xbe00) {
11318         /* BKPT */
11319         return true;
11320     }
11321
11322     if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
11323         !arm_dc_feature(s, ARM_FEATURE_M)) {
11324         /* HLT: v8A only. This is unconditional even when it is going to
11325          * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
11326          * For v7 cores this was a plain old undefined encoding and so
11327          * honours its cc check. (We might be using the encoding as
11328          * a semihosting trap, but we don't change the cc check behaviour
11329          * on that account, because a debugger connected to a real v7A
11330          * core and emulating semihosting traps by catching the UNDEF
11331          * exception would also only see cases where the cc check passed.
11332          * No guest code should be trying to do a HLT semihosting trap
11333          * in an IT block anyway.
11334          */
11335         return true;
11336     }
11337
11338     if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
11339         arm_dc_feature(s, ARM_FEATURE_M)) {
11340         /* SG: v8M only */
11341         return true;
11342     }
11343
11344     return false;
11345 }
11346
11347 static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
11348 {
11349     DisasContext *dc = container_of(dcbase, DisasContext, base);
11350     CPUARMState *env = cpu->env_ptr;
11351     uint32_t insn;
11352     bool is_16bit;
11353
11354     if (arm_pre_translate_insn(dc)) {
11355         return;
11356     }
11357
11358     dc->pc_curr = dc->base.pc_next;
11359     insn = arm_lduw_code(env, dc->base.pc_next, dc->sctlr_b);
11360     is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn);
11361     dc->base.pc_next += 2;
11362     if (!is_16bit) {
11363         uint32_t insn2 = arm_lduw_code(env, dc->base.pc_next, dc->sctlr_b);
11364
11365         insn = insn << 16 | insn2;
11366         dc->base.pc_next += 2;
11367     }
11368     dc->insn = insn;
11369
11370     if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
11371         uint32_t cond = dc->condexec_cond;
11372
11373         /*
11374          * Conditionally skip the insn. Note that both 0xe and 0xf mean
11375          * "always"; 0xf is not "never".
11376          */
11377         if (cond < 0x0e) {
11378             arm_skip_unless(dc, cond);
11379         }
11380     }
11381
11382     if (is_16bit) {
11383         disas_thumb_insn(dc, insn);
11384     } else {
11385         disas_thumb2_insn(dc, insn);
11386     }
11387
11388     /* Advance the Thumb condexec condition.  */
11389     if (dc->condexec_mask) {
11390         dc->condexec_cond = ((dc->condexec_cond & 0xe) |
11391                              ((dc->condexec_mask >> 4) & 1));
11392         dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
11393         if (dc->condexec_mask == 0) {
11394             dc->condexec_cond = 0;
11395         }
11396     }
11397
11398     arm_post_translate_insn(dc);
11399
11400     /* Thumb is a variable-length ISA.  Stop translation when the next insn
11401      * will touch a new page.  This ensures that prefetch aborts occur at
11402      * the right place.
11403      *
11404      * We want to stop the TB if the next insn starts in a new page,
11405      * or if it spans between this page and the next. This means that
11406      * if we're looking at the last halfword in the page we need to
11407      * see if it's a 16-bit Thumb insn (which will fit in this TB)
11408      * or a 32-bit Thumb insn (which won't).
11409      * This is to avoid generating a silly TB with a single 16-bit insn
11410      * in it at the end of this page (which would execute correctly
11411      * but isn't very efficient).
11412      */
11413     if (dc->base.is_jmp == DISAS_NEXT
11414         && (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE
11415             || (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE - 3
11416                 && insn_crosses_page(env, dc)))) {
11417         dc->base.is_jmp = DISAS_TOO_MANY;
11418     }
11419 }
11420
11421 static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
11422 {
11423     DisasContext *dc = container_of(dcbase, DisasContext, base);
11424
11425     if (tb_cflags(dc->base.tb) & CF_LAST_IO && dc->condjmp) {
11426         /* FIXME: This can theoretically happen with self-modifying code. */
11427         cpu_abort(cpu, "IO on conditional branch instruction");
11428     }
11429
11430     /* At this stage dc->condjmp will only be set when the skipped
11431        instruction was a conditional branch or trap, and the PC has
11432        already been written.  */
11433     gen_set_condexec(dc);
11434     if (dc->base.is_jmp == DISAS_BX_EXCRET) {
11435         /* Exception return branches need some special case code at the
11436          * end of the TB, which is complex enough that it has to
11437          * handle the single-step vs not and the condition-failed
11438          * insn codepath itself.
11439          */
11440         gen_bx_excret_final_code(dc);
11441     } else if (unlikely(is_singlestepping(dc))) {
11442         /* Unconditional and "condition passed" instruction codepath. */
11443         switch (dc->base.is_jmp) {
11444         case DISAS_SWI:
11445             gen_ss_advance(dc);
11446             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
11447                           default_exception_el(dc));
11448             break;
11449         case DISAS_HVC:
11450             gen_ss_advance(dc);
11451             gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
11452             break;
11453         case DISAS_SMC:
11454             gen_ss_advance(dc);
11455             gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
11456             break;
11457         case DISAS_NEXT:
11458         case DISAS_TOO_MANY:
11459         case DISAS_UPDATE:
11460             gen_set_pc_im(dc, dc->base.pc_next);
11461             /* fall through */
11462         default:
11463             /* FIXME: Single stepping a WFI insn will not halt the CPU. */
11464             gen_singlestep_exception(dc);
11465             break;
11466         case DISAS_NORETURN:
11467             break;
11468         }
11469     } else {
11470         /* While branches must always occur at the end of an IT block,
11471            there are a few other things that can cause us to terminate
11472            the TB in the middle of an IT block:
11473             - Exception generating instructions (bkpt, swi, undefined).
11474             - Page boundaries.
11475             - Hardware watchpoints.
11476            Hardware breakpoints have already been handled and skip this code.
11477          */
11478         switch(dc->base.is_jmp) {
11479         case DISAS_NEXT:
11480         case DISAS_TOO_MANY:
11481             gen_goto_tb(dc, 1, dc->base.pc_next);
11482             break;
11483         case DISAS_JUMP:
11484             gen_goto_ptr();
11485             break;
11486         case DISAS_UPDATE:
11487             gen_set_pc_im(dc, dc->base.pc_next);
11488             /* fall through */
11489         default:
11490             /* indicate that the hash table must be used to find the next TB */
11491             tcg_gen_exit_tb(NULL, 0);
11492             break;
11493         case DISAS_NORETURN:
11494             /* nothing more to generate */
11495             break;
11496         case DISAS_WFI:
11497         {
11498             TCGv_i32 tmp = tcg_const_i32((dc->thumb &&
11499                                           !(dc->insn & (1U << 31))) ? 2 : 4);
11500
11501             gen_helper_wfi(cpu_env, tmp);
11502             tcg_temp_free_i32(tmp);
11503             /* The helper doesn't necessarily throw an exception, but we
11504              * must go back to the main loop to check for interrupts anyway.
11505              */
11506             tcg_gen_exit_tb(NULL, 0);
11507             break;
11508         }
11509         case DISAS_WFE:
11510             gen_helper_wfe(cpu_env);
11511             break;
11512         case DISAS_YIELD:
11513             gen_helper_yield(cpu_env);
11514             break;
11515         case DISAS_SWI:
11516             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
11517                           default_exception_el(dc));
11518             break;
11519         case DISAS_HVC:
11520             gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
11521             break;
11522         case DISAS_SMC:
11523             gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
11524             break;
11525         }
11526     }
11527
11528     if (dc->condjmp) {
11529         /* "Condition failed" instruction codepath for the branch/trap insn */
11530         gen_set_label(dc->condlabel);
11531         gen_set_condexec(dc);
11532         if (unlikely(is_singlestepping(dc))) {
11533             gen_set_pc_im(dc, dc->base.pc_next);
11534             gen_singlestep_exception(dc);
11535         } else {
11536             gen_goto_tb(dc, 1, dc->base.pc_next);
11537         }
11538     }
11539 }
11540
11541 static void arm_tr_disas_log(const DisasContextBase *dcbase, CPUState *cpu)
11542 {
11543     DisasContext *dc = container_of(dcbase, DisasContext, base);
11544
11545     qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
11546     log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
11547 }
11548
11549 static const TranslatorOps arm_translator_ops = {
11550     .init_disas_context = arm_tr_init_disas_context,
11551     .tb_start           = arm_tr_tb_start,
11552     .insn_start         = arm_tr_insn_start,
11553     .breakpoint_check   = arm_tr_breakpoint_check,
11554     .translate_insn     = arm_tr_translate_insn,
11555     .tb_stop            = arm_tr_tb_stop,
11556     .disas_log          = arm_tr_disas_log,
11557 };
11558
11559 static const TranslatorOps thumb_translator_ops = {
11560     .init_disas_context = arm_tr_init_disas_context,
11561     .tb_start           = arm_tr_tb_start,
11562     .insn_start         = arm_tr_insn_start,
11563     .breakpoint_check   = arm_tr_breakpoint_check,
11564     .translate_insn     = thumb_tr_translate_insn,
11565     .tb_stop            = arm_tr_tb_stop,
11566     .disas_log          = arm_tr_disas_log,
11567 };
11568
11569 /* generate intermediate code for basic block 'tb'.  */
11570 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
11571 {
11572     DisasContext dc = { };
11573     const TranslatorOps *ops = &arm_translator_ops;
11574
11575     if (FIELD_EX32(tb->flags, TBFLAG_AM32, THUMB)) {
11576         ops = &thumb_translator_ops;
11577     }
11578 #ifdef TARGET_AARCH64
11579     if (FIELD_EX32(tb->flags, TBFLAG_ANY, AARCH64_STATE)) {
11580         ops = &aarch64_translator_ops;
11581     }
11582 #endif
11583
11584     translator_loop(ops, &dc.base, cpu, tb, max_insns);
11585 }
11586
11587 void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb,
11588                           target_ulong *data)
11589 {
11590     if (is_a64(env)) {
11591         env->pc = data[0];
11592         env->condexec_bits = 0;
11593         env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
11594     } else {
11595         env->regs[15] = data[0];
11596         env->condexec_bits = data[1];
11597         env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
11598     }
11599 }